diff options
211 files changed, 30758 insertions, 13037 deletions
@@ -12,3 +12,4 @@ /configure /sysdep/autoconf.h.in /sysdep/autoconf.h.in~ +/cscope.* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c9863a1c..ff11dda0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -211,9 +211,6 @@ docker_ubuntu-16_04-amd64: # TODO We want to copy these BSDs to our own virtual machines, to make sure someone doesn't update them by accident. .freebsd-11-i386: &freebsd-11-i386_env - variables: - CPPFLAGS: "-I/usr/local/include" - LDFLAGS: "-L/usr/local/lib" tags: - freebsd - i386 @@ -223,9 +220,6 @@ docker_ubuntu-16_04-amd64: #- tags .freebsd-11-amd64: &freebsd-11-amd64_env - variables: - CPPFLAGS: "-I/usr/local/include" - LDFLAGS: "-L/usr/local/lib" tags: - freebsd - amd64 @@ -238,214 +232,78 @@ docker_ubuntu-16_04-amd64: stage: build script: - autoreconf - - ./configure --enable-ipv6=$IPV6 CPPFLAGS="$CPPFLAGS" LDFLAGS="$LDFLAGS" + - ./configure CPPFLAGS="$CPPFLAGS" LDFLAGS="$LDFLAGS" # Detect which make is available - MAKE=make - which gmake 2>/dev/null >/dev/null && MAKE=gmake - $MAKE - # Run tests if they are available (eg. don't fail if "check" isn't a valid make target) - - $MAKE check || [ "$?" = 2 ] + # Run tests if they are available + - $MAKE check build-debian-7-amd64: - variables: - IPV6: "no" <<: *debian-7-amd64_env <<: *build_job build-debian-8-amd64: - variables: - IPV6: "no" <<: *debian-8-amd64_env <<: *build_job build-debian-9-amd64: - variables: - IPV6: "no" <<: *debian-9-amd64_env <<: *build_job build-debian-testing-amd64: - variables: - IPV6: "no" - <<: *debian-testing-amd64_env - <<: *build_job - -build-debian-7-amd64-v6: - variables: - IPV6: "yes" - <<: *debian-7-amd64_env - <<: *build_job - -build-debian-8-amd64-v6: - variables: - IPV6: "yes" - <<: *debian-8-amd64_env - <<: *build_job - -build-debian-9-amd64-v6: - variables: - IPV6: "yes" - <<: *debian-9-amd64_env - <<: *build_job - -build-debian-testing-amd64-v6: - variables: - IPV6: "yes" <<: *debian-testing-amd64_env <<: *build_job build-fedora-25-amd64: - variables: - IPV6: "no" - <<: *fedora-25-amd64_env - <<: *build_job - -build-fedora-25-amd64-v6: - variables: - IPV6: "yes" <<: *fedora-25-amd64_env <<: *build_job build-fedora-26-amd64: - variables: - IPV6: "no" - <<: *fedora-26-amd64_env - <<: *build_job - -build-fedora-26-amd64-v6: - variables: - IPV6: "yes" <<: *fedora-26-amd64_env <<: *build_job build-centos-6-amd64: - variables: - IPV6: "no" - <<: *centos-6-amd64_env - <<: *build_job - -build-centos-6-amd64-v6: - variables: - IPV6: "yes" <<: *centos-6-amd64_env <<: *build_job build-centos-7-amd64: - variables: - IPV6: "no" - <<: *centos-7-amd64_env - <<: *build_job - -build-centos-7-amd64-v6: - variables: - IPV6: "yes" <<: *centos-7-amd64_env <<: *build_job build-opensuse-42_3-amd64: - variables: - IPV6: "no" - <<: *opensuse-42_3-amd64_env - <<: *build_job - -build-opensuse-42_3-amd64-v6: - variables: - IPV6: "yes" <<: *opensuse-42_3-amd64_env <<: *build_job build-ubuntu-14_04-amd64: - variables: - IPV6: "no" - <<: *ubuntu-14_04-amd64_env - <<: *build_job - -build-ubuntu-14_04-amd64-v6: - variables: - IPV6: "yes" <<: *ubuntu-14_04-amd64_env <<: *build_job build-ubuntu-16_04-amd64: - variables: - IPV6: "no" - <<: *ubuntu-16_04-amd64_env - <<: *build_job - -build-ubuntu-16_04-amd64-v6: - variables: - IPV6: "yes" <<: *ubuntu-16_04-amd64_env <<: *build_job build-debian-7-i386: - variables: - IPV6: "no" - <<: *debian-7-i386_env - <<: *build_job - -build-debian-7-i386-v6: - variables: - IPV6: "yes" <<: *debian-7-i386_env <<: *build_job build-debian-8-i386: - variables: - IPV6: "no" - <<: *debian-8-i386_env - <<: *build_job - -build-debian-8-i386-v6: - variables: - IPV6: "yes" <<: *debian-8-i386_env <<: *build_job build-debian-9-i386: - variables: - IPV6: "no" - <<: *debian-9-i386_env - <<: *build_job - -build-debian-9-i386-v6: - variables: - IPV6: "yes" <<: *debian-9-i386_env <<: *build_job build-debian-testing-i386: - variables: - IPV6: "no" - <<: *debian-testing-i386_env - <<: *build_job - -build-debian-testing-i386-v6: - variables: - IPV6: "yes" <<: *debian-testing-i386_env <<: *build_job build-freebsd-11-amd64: - variables: - IPV6: "no" - <<: *freebsd-11-amd64_env - <<: *build_job - -build-freebsd-11-amd64-v6: - variables: - IPV6: "yes" <<: *freebsd-11-amd64_env <<: *build_job build-freebsd-11-i386: - variables: - IPV6: "no" - <<: *freebsd-11-i386_env - <<: *build_job - -build-freebsd-i386-v6: - variables: - IPV6: "yes" <<: *freebsd-11-i386_env <<: *build_job @@ -1,3 +1,4 @@ +D doc/prog-head.sgml C doc C nest C conf @@ -5,3 +6,4 @@ C filter C proto C sysdep C lib +D doc/prog-foot.sgml diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 00000000..c8168bbe --- /dev/null +++ b/Makefile.in @@ -0,0 +1,204 @@ +# Makefile for the BIRD Internet Routing Daemon +# (c) 1999--2000 Martin Mares <mj@ucw.cz> +# (c) 2016 Jan Moskyto Matejka <mq@ucw.cz> + +# Disable build-in rules +MAKEFLAGS += -r + +# Variable definitions +CPPFLAGS=-I$(objdir) -I$(srcdir) @CPPFLAGS@ +CFLAGS=$(CPPFLAGS) @CFLAGS@ +LDFLAGS=@LDFLAGS@ +LIBS=@LIBS@ +DAEMON_LIBS=@DAEMON_LIBS@ +CLIENT_LIBS=@CLIENT_LIBS@ +CC=@CC@ +M4=@M4@ +BISON=@BISON@ +FLEX=@FLEX@ +RANLIB=@RANLIB@ +INSTALL=@INSTALL@ +INSTALL_PROGRAM=@INSTALL_PROGRAM@ +INSTALL_DATA=@INSTALL_DATA@ + +git-label:=$(strip $(shell git describe --always --dirty=-x 2>/dev/null)) +ifneq ($(git-label),) + CFLAGS += -DGIT_LABEL="$(git-label)" +endif + +client=$(addprefix $(exedir)/,@CLIENT@) +daemon=$(exedir)/bird +protocols=@protocols@ + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +bindir=@bindir@ +sbindir=@sbindir@ +sysconfdir=@sysconfdir@ +localstatedir=@localstatedir@ +docdir=@prefix@/doc + +srcdir := @srcdir@ +objdir := @objdir@ +exedir := @exedir@ + +ifeq ($(objdir),.) + objdir := $(realpath .) +endif + +ifeq ($(VERBOSE),) + E:=@ + Q:=@ +else + E:=@\# + Q:= +endif + +# Meta rules +docgoals := docs userdocs progdocs +testgoals := check test tests tests_run +cleangoals := clean distclean testsclean +.PHONY: all daemon cli $(docgoals) $(testgoals) $(cleangoals) tags cscope +all: daemon cli + +daemon: $(daemon) +cli: $(client) + +$(daemon): LIBS += $(DAEMON_LIBS) + +# Include directories +dirs := client conf doc filter lib nest test $(addprefix proto/,$(protocols)) @sysdep_dirs@ + +conf-y-targets := $(addprefix $(objdir)/conf/,cf-parse.y keywords.h commands.h) +cf-local = $(conf-y-targets): $(s)config.Y + +src-o-files = $(patsubst %.c,$(o)%.o,$(src)) +tests-target-files = $(patsubst %.c,$(o)%,$(tests_src)) + +all-daemon = $(daemon): $(obj) +all-client = $(client): $(obj) + +s = $(dir $(lastword $(MAKEFILE_LIST))) +ifeq ($(srcdir),.) + o = $(objdir)/$(s) +else + o = $(patsubst $(srcdir)%,$(objdir)%,$(s)) +endif + +define clean_in = +clean:: + rm -f $(addprefix $(o),$(1)) +endef + +clean = $(eval $(call clean_in,$(1))) + +include $(addsuffix /Makefile,$(addprefix $(srcdir)/,$(dirs))) + +# Generic rules + +$(objdir)/%.o: $(srcdir)/%.c $(objdir)/.dir-stamp $(objdir)/sysdep/paths.h + $(E)echo CC -o $@ -c $< + $(Q)$(CC) $(CFLAGS) -MMD -MP -o $@ -c $< + +$(objdir)/%.o: $(objdir)/%.c $(objdir)/.dir-stamp $(objdir)/sysdep/paths.h + $(E)echo CC -o $@ -c $< + $(Q)$(CC) $(CFLAGS) -MMD -MP -o $@ -c $< + + +$(objdir)/%.S: $(srcdir)/%.c $(objdir)/.dir-stamp $(objdir)/sysdep/paths.h + $(E)echo CC -o $@ -S $< + $(Q)$(CC) $(CFLAGS) -MMD -MP -o $@ -S $< + +$(objdir)/%.S: $(objdir)/%.c $(objdir)/.dir-stamp $(objdir)/sysdep/paths.h + $(E)echo CC -o $@ -S $< + $(Q)$(CC) $(CFLAGS) -MMD -MP -o $@ -S $< + + + +$(objdir)/.dir-stamp: + $(E)echo MKDIR -p $(addprefix $(objdir)/,$(dirs) doc) + $(Q)mkdir -p $(addprefix $(objdir)/,$(dirs) doc) + $(Q)touch $@ + +$(client) $(daemon): + $(E)echo LD $(LDFLAGS) -o $@ $^ $(LIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) + +$(objdir)/sysdep/paths.h: Makefile + echo >$@ "/* Generated by Makefile, don't edit manually! */" + echo >>$@ "#define PATH_CONFIG_FILE \"@CONFIG_FILE@\"" + echo >>$@ "#define PATH_CONTROL_SOCKET \"@CONTROL_SOCKET@\"" + if test -n "@iproutedir@" ; then echo >>$@ "#define PATH_IPROUTE_DIR \"@iproutedir@\"" ; fi + +# Unit tests rules + +tests_targets_ok = $(addsuffix .ok,$(tests_targets)) + +$(tests_targets): %: %.o $(tests_objs) + $(E)echo LD $(LDFLAGS) -o $@ $< "..." $(LIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) + +# Hack to avoid problems with tests linking everything +$(tests_targets): LIBS += $(DAEMON_LIBS) + +$(tests_targets_ok): %.ok: % + $(Q)$* 2>/dev/null && touch $*.ok + +test: testsclean check +check: tests tests_run +tests: $(tests_targets) +tests_run: $(tests_targets_ok) + +# Finally include the computed dependencies + +ifneq ($(filter-out $(cleangoals),$(MAKECMDGOALS)),) +-include $(shell find $(objdir) -name "*.d") +endif + +ifeq ($(MAKECMDGOALS),) +-include $(shell find $(objdir) -name "*.d") +endif + +tags: + cd $(srcdir) ; etags -lc `find $(dirs) -name *.[chY]` + +cscope: + cd $(srcdir) ; find $(dirs) -name *.[chY] > cscope.files ; cscope -b + +# Install + +install: all + $(INSTALL) -d $(DESTDIR)/$(sbindir) $(DESTDIR)/$(sysconfdir) $(DESTDIR)/@runtimedir@ + $(INSTALL_PROGRAM) $(exedir)/bird $(DESTDIR)/$(sbindir)/bird + $(INSTALL_PROGRAM) $(exedir)/birdcl $(DESTDIR)/$(sbindir)/birdcl + if test -n "@CLIENT@" ; then \ + $(INSTALL_PROGRAM) $(exedir)/birdc $(DESTDIR)/$(sbindir)/birdc ; \ + fi + if ! test -f $(DESTDIR)/@CONFIG_FILE@ ; then \ + $(INSTALL_DATA) $(srcdir)/doc/bird.conf.example $(DESTDIR)/@CONFIG_FILE@ ; \ + else \ + echo "Not overwriting old bird.conf" ; \ + fi + +install-docs: + $(INSTALL) -d $(DESTDIR)/$(docdir) + $(INSTALL_DATA) $(objdir)/doc/{bird,prog}{,-*}.html $(DESTDIR)/$(docdir)/ + +# Cleanup +clean:: + rm -f $(objdir)/sysdep/paths.h + rm -f $(addprefix $(exedir)/,bird birdc birdcl) + find $(objdir) -name "*.[od]" -exec rm -f '{}' '+' + +testsclean: + rm -f $(tests_targets_ok) + +ifeq ($(objdir),obj) +distclean: clean + rm -rf $(objdir) + rm -f config.log config.status configure Makefile +else +distclean: clean + rm -rf * .dir-stamp + rm -f config.log config.status configure Makefile +endif @@ -1,3 +1,60 @@ +Version 2.0.1 (2018-01-16) + o Linux MPLS kernel support + o Better handling of channels inherited from templates + o Default EBGP Route Propagation Behavior without Policies (RFC 8212) + o Many bugfixes + + Notes: + + To satisfy requirements of RFC 8212, external BGP protocols now require + explicit configuration of import and export policies. + + +Version 2.0.0 (2017-12-11) + o Integrated IPv4 + IPv6 design + o Support for MPLS next hops + o Support for VPNv4 and VPNv6 networks + o Microsecond timers infrastructure + o Basic VRF support + o Babel: Support for dual-stack IPv4/IPv6 + o Babel: Many improvements and bugfixes + o Major BGP protocol redesign + o Full support for Multiprotocol BGP + o BGP multicast support (SAFI 2) + o BGP flowspec support (RFC 5575) + o BGP with MPLS labels (RFC 3107) + o BGP MPLS/VPN support (RFC 4364) + o BGP 6PE - IPv6 NLRI over IPv4 MPLS (RFC 4798) + o BGP IPv4 NLRI with an IPv6 Next Hop (RFC 5549) + o BGP Confederations (RFC 5065) + o BGP Shutdown communication (RFC 8203) + o BGP: Allow exchanging LOCAL_PREF with eBGP peers + o BGP: Allow to specify interface for regular sessions + o OSPF: Support of address families in OSPFv3 + o OSPF: Enable ECMP and Link detection by default + o RAdv: Support for more specific routes (RFC 4191) + o RAdv: Proper handling of prefix retraction + o RIP: Enable ECMP and Link detection by default + o Redesign of RPKI handling + o New RPKI-Router protocol + o Static: Minor overhaul + o Static: Support for all new route types + o Kenrel: Default Linux kernel metric changed to 32 + o Kernel: Fix IPv6 ECMP handling with Linux 4.11+ + o Update of show route command + o BIRD client persistent history + o New build system + o Unit tests + o ... + + Notes: + + Tables are now defined with appropriate net type keyword. Protocols and tables + are now connected by explicit channels, most related protocol options (table, + import, export, ...) are now channel options. See doc/bird.conf.example2 for + configuration examples. Some options were removed/replaced. + + Version 1.6.3 (2016-12-21) o Large BGP communities o BFD authentication (MD5, SHA1) @@ -6,7 +6,7 @@ (c) 1998--2008 Martin Mares <mj@ucw.cz> (c) 1998--2000 Pavel Machek <pavel@ucw.cz> (c) 1998--2008 Ondrej Filip <feela@network.cz> - (c) 2009--2016 CZ.NIC z.s.p.o. + (c) 2009--2017 CZ.NIC z.s.p.o. ================================================================================ @@ -19,7 +19,7 @@ Public License. What do we support ================== - o Both IPv4 and IPv6 (use --enable-ipv6 when configuring) + o Both IPv4 and IPv6 o Multiple routing tables o Border Gateway Protocol (BGPv4) o Routing Information Protocol (RIPv2, RIPng) @@ -31,6 +31,41 @@ AC_DEFUN([BIRD_CHECK_PTHREADS], CFLAGS="$bird_tmp_cflags" ]) +AC_DEFUN([BIRD_CHECK_MPLS_KERNEL], +[ + AC_CACHE_CHECK( + [for Linux MPLS headers], + [bird_cv_mpls_kernel], + [ + AC_COMPILE_IFELSE( + [ + AC_LANG_PROGRAM( + [ + #include <linux/lwtunnel.h> + #include <linux/netlink.h> + #include <linux/rtnetlink.h> + #include <sys/socket.h> + void t(int arg); + ], + [ + t(AF_MPLS); + t(RTA_VIA); + t(RTA_NEWDST); + t(RTA_ENCAP_TYPE); + t(RTA_ENCAP); + struct rtvia rtvia; + t(LWTUNNEL_ENCAP_MPLS); + ] + ) + ], + [bird_cv_mpls_kernel=yes], + [bird_cv_mpls_kernel=no] + ) + ] + ) +]) + + AC_DEFUN([BIRD_CHECK_GCC_OPTION], [ bird_tmp_cflags="$CFLAGS" @@ -1,44 +1,58 @@ /* - * This is an example configuration file. + * This is a simple example configuration file with no aim for completeness. + * See documentation for full description. */ -# Yet another comment - +# Router ID in IPv4 format router id 62.168.0.1; -define xyzzy = (120+10); +# Load device information from kernel. +protocol device {} -protocol device { +# Generate direct routes for interfaces. Useful on BSD. +protocol direct { + ipv4; disabled; -# interface "eth*", "ppp*"; } protocol direct { + ipv6; + disabled; } +# Feed routes to kernel FIB protocol kernel { - disabled; -# learn; # Learn all routes from the kernel + ipv4 { export all; import all; }; + learn; # Learn all routes from the kernel # scan time 10; # Scan kernel tables every 10 seconds } -protocol static { -# disabled; +protocol kernel { + ipv6 { import all; }; + learn; +} - route fec0:2::/64 blackhole; - route fec0:3::/64 unreachable; - route fec0:4::/64 prohibit; +# Static route feed +protocol static { + ipv4 { export all; }; + route 10.0.0.0/24 via 55.55.55.44; + route 10.10.0.0/16 blackhole; + route 10.20.0.0/20 unreachable; + route 10.30.50.0/28 prohibit; +} -# route 0.0.0.0/0 via 195.113.31.113; -# route 62.168.0.0/25 unreachable; -# route 1.2.3.4/32 via 195.113.31.124; -# route 10.0.0.0/8 unreachable; -# route 10.1.1.0:255.255.255.0 via 62.168.0.3; -# route 10.1.2.0:255.255.255.0 via 62.168.0.3; -# route 10.1.3.0:255.255.255.0 via 62.168.0.4; -# route 10.2.0.0/24 via "arc0"; - export all; +protocol static { + ipv6 { export all; }; + route 2001:db8:1::/48 via 5555::6666; + route 2001:db8:2::/48 blackhole; + route 2001:db8:3::/48 prohibit; + route 2001:db8:4::/48 unreachable; } protocol rip { + ipv4; +} + +protocol rip ng { + ipv6; } diff --git a/client/Makefile b/client/Makefile index a1578766..fccb8346 100644 --- a/client/Makefile +++ b/client/Makefile @@ -1,11 +1,11 @@ -source=commands.c util.c client.c -root-rel=../ -dir-name=client +src := commands.c util.c client.c +obj := $(src-o-files) -clients := $(client) birdcl +$(all-client) -source-dep := $(source) $(addsuffix .c,$(clients)) +$(o)commands.o: $(objdir)/conf/commands.h -subdir: $(addsuffix .o,$(clients)) +$(exedir)/birdc: $(o)birdc.o +$(exedir)/birdc: LIBS += $(CLIENT_LIBS) -include ../Rules +$(exedir)/birdcl: $(o)birdcl.o diff --git a/client/birdc.c b/client/birdc.c index 8aa01c17..f1aea2fe 100644 --- a/client/birdc.c +++ b/client/birdc.c @@ -29,6 +29,9 @@ static int prompt_active; extern int _rl_vis_botlin; extern void _rl_move_vert(int); +#define HISTORY "/.birdc_history" +static char *history_file; + static void add_history_dedup(char *cmd) { @@ -138,8 +141,24 @@ input_help(int arg, int key UNUSED) } void +history_init(void) +{ + const char *homedir = getenv("HOME"); + if (!homedir) + homedir = "."; + history_file = malloc(strlen(homedir) + sizeof(HISTORY)); + if (!history_file) + die("couldn't alloc enough memory for history file name"); + + sprintf(history_file, "%s%s", homedir, HISTORY); + read_history(history_file); +} + +void input_init(void) { + if (interactive) + history_init(); rl_readline_name = "birdc"; rl_add_defun("bird-complete", input_complete, '\t'); rl_add_defun("bird-help", input_help, '?'); @@ -217,5 +236,7 @@ cleanup(void) return; input_hide(); + if (interactive) + write_history(history_file); rl_callback_handler_remove(); } diff --git a/client/commands.c b/client/commands.c index 0da7d835..fdf2652a 100644 --- a/client/commands.c +++ b/client/commands.c @@ -7,8 +7,8 @@ */ #include <stdio.h> -#include <ctype.h> #include <stdlib.h> +#include <ctype.h> #include "nest/bird.h" #include "lib/resource.h" diff --git a/conf/Makefile b/conf/Makefile index cd78c821..fb3dd052 100644 --- a/conf/Makefile +++ b/conf/Makefile @@ -1,33 +1,33 @@ -source=cf-parse.tab.c cf-lex.c conf.c -root-rel=../ +src := cf-parse.tab.c cf-lex.c conf.c +obj := $(src-o-files) -include ../Rules +$(all-daemon) -conf-src=$(srcdir)/conf -conf-fragments=$(conf-src)/confbase.Y @CONFS@ $(addsuffix /config.Y,$(static-dir-paths)) +tests_objs := $(tests_objs) $(src-o-files) ifdef DEBUG BISON_DEBUG=-t #FLEX_DEBUG=-d endif -cf-parse.tab.h: cf-parse.tab.c +$(conf-y-targets): $(s)confbase.Y $(s)flowspec.Y + $(M4) -P $| $^ >$@ -cf-parse.tab.c: cf-parse.y - $(BISON) -bcf-parse -dv -pcf_ $(BISON_DEBUG) cf-parse.y +$(o)cf-parse.y: | $(s)gen_parser.m4 +$(o)keywords.h: | $(s)gen_keywords.m4 +$(o)commands.h: | $(s)gen_commands.m4 $(srcdir)/client/cmds.m4 -cf-parse.y: $(conf-fragments) $(conf-src)/gen_parser.m4 - $(M4) -P $(conf-src)/gen_parser.m4 $(conf-fragments) >cf-parse.y +$(o)cf-parse.tab.h: $(o)cf-parse.tab.c -keywords.h: $(conf-fragments) $(conf-src)/gen_keywords.m4 - $(M4) -P $(conf-src)/gen_keywords.m4 $(conf-fragments) >keywords.h +$(o)cf-parse.tab.c: $(o)cf-parse.y + $(BISON) $(BISON_DEBUG) -dv -pcf_ -b $(@:.tab.c=) $< -commands.h: $(conf-fragments) $(conf-src)/gen_commands.m4 $(srcdir)/client/cmds.m4 - $(M4) -P $(conf-src)/gen_commands.m4 $(srcdir)/client/cmds.m4 $(conf-fragments) | sort >commands.h +$(o)cf-lex.c: $(s)cf-lex.l + $(FLEX) $(FLEX_DEBUG) -s -B -8 -Pcf_ -o$@ $< -cf-lex.c: cf-lex.l - $(FLEX) $(FLEX_DEBUG) -s -B -8 -ocf-lex.c -Pcf_ cf-lex.l +$(o)cf-lex.o: $(o)cf-parse.tab.h $(o)keywords.h +$(o)cf-lex.o: CFLAGS+=-Wno-sign-compare -Wno-unused-function -depend: keywords.h commands.h cf-parse.tab.c cf-lex.c +$(addprefix $(o), cf-parse.y keywords.h commands.h cf-parse.tab.h cf-parse.tab.c cf-lex.c): $(objdir)/.dir-stamp -cf-lex.o: CFLAGS+=-Wno-sign-compare -Wno-unused-function +$(call clean,cf-parse.tab.h cf-parse.tab.c cf-parse.y keywords.h commands.h cf-lex.c cf-parse.output) diff --git a/conf/cf-lex.l b/conf/cf-lex.l index 66be3811..c3154b36 100644 --- a/conf/cf-lex.l +++ b/conf/cf-lex.l @@ -139,28 +139,103 @@ include ^{WHITE}*include{WHITE}*\".*\"{WHITE}*; cf_include(start, end-start); } +{DIGIT}+:{DIGIT}+ { + uint len1 UNUSED, len2; + u64 l; + char *e; + + errno = 0; + l = strtoul(yytext, &e, 10); + if (e && (*e != ':') || (errno == ERANGE) || (l >> 32)) + cf_error("ASN out of range"); + + if (l >> 16) + { + len1 = 32; + len2 = 16; + cf_lval.i64 = (2ULL << 48) | (((u64) l) << len2); + } + else + { + len1 = 16; + len2 = 32; + cf_lval.i64 = 0 | (((u64) l) << len2); + } + + errno = 0; + l = strtoul(e+1, &e, 10); + if (e && *e || (errno == ERANGE) || (l >> len2)) + cf_error("Number out of range"); + cf_lval.i64 |= l; + + return VPN_RD; +} + +[02]:{DIGIT}+:{DIGIT}+ { + uint len1, len2; + u64 l; + char *e; + + if (yytext[0] == '0') + { + cf_lval.i64 = 0; + len1 = 16; + len2 = 32; + } + else + { + cf_lval.i64 = 2ULL << 48; + len1 = 32; + len2 = 16; + } + + errno = 0; + l = strtoul(yytext+2, &e, 10); + if (e && (*e != ':') || (errno == ERANGE) || (l >> len1)) + cf_error("ASN out of range"); + cf_lval.i64 |= ((u64) l) << len2; + + errno = 0; + l = strtoul(e+1, &e, 10); + if (e && *e || (errno == ERANGE) || (l >> len2)) + cf_error("Number out of range"); + cf_lval.i64 |= l; + + return VPN_RD; +} + +{DIGIT}+\.{DIGIT}+\.{DIGIT}+\.{DIGIT}+:{DIGIT}+ { + unsigned long int l; + ip4_addr ip4; + char *e; + + cf_lval.i64 = 1ULL << 48; + + e = strchr(yytext, ':'); + *e++ = '\0'; + if (!ip4_pton(yytext, &ip4)) + cf_error("Invalid IPv4 address %s in Route Distinguisher", yytext); + cf_lval.i64 |= ((u64) ip4_to_u32(ip4)) << 16; + + errno = 0; + l = strtoul(e, &e, 10); + if (e && *e || (errno == ERANGE) || (l >> 16)) + cf_error("Number out of range"); + cf_lval.i64 |= l; + + return VPN_RD; +} + {DIGIT}+\.{DIGIT}+\.{DIGIT}+\.{DIGIT}+ { - ip4_addr a; - if (!ip4_pton(yytext, &a)) + if (!ip4_pton(yytext, &cf_lval.ip4)) cf_error("Invalid IPv4 address %s", yytext); - -#ifdef IPV6 - cf_lval.i32 = ip4_to_u32(a); - return RTRID; -#else - cf_lval.a = ipa_from_ip4(a); - return IPA; -#endif + return IP4; } ({XIGIT}*::|({XIGIT}*:){3,})({XIGIT}*|{DIGIT}+\.{DIGIT}+\.{DIGIT}+\.{DIGIT}+) { -#ifdef IPV6 - if (ipa_pton(yytext, &cf_lval.a)) - return IPA; - cf_error("Invalid IPv6 address %s", yytext); -#else - cf_error("This is an IPv4 router, therefore IPv6 addresses are not supported"); -#endif + if (!ip6_pton(yytext, &cf_lval.ip6)) + cf_error("Invalid IPv6 address %s", yytext); + return IP6; } 0x{XIGIT}+ { @@ -228,6 +303,7 @@ else: { ["][^"\n]*["] { yytext[yyleng-1] = 0; cf_lval.t = cfg_strdup(yytext+1); + yytext[yyleng-1] = '"'; return TEXT; } @@ -662,8 +738,6 @@ cf_symbol_class_name(struct symbol *sym) return "filter"; case SYM_TABLE: return "routing table"; - case SYM_ROA: - return "ROA table"; default: return "unknown type"; } diff --git a/conf/conf.c b/conf/conf.c index 7f4eb7e8..885e2e7e 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -56,6 +56,7 @@ #include "conf/conf.h" #include "filter/filter.h" + static jmp_buf conf_jmpbuf; struct config *config, *new_config; @@ -85,10 +86,10 @@ int undo_available; /* Undo was not requested from last reconfiguration */ * further use. Returns a pointer to the structure. */ struct config * -config_alloc(const byte *name) +config_alloc(const char *name) { pool *p = rp_new(&root_pool, "Config"); - linpool *l = lp_new(p, 4080); + linpool *l = lp_new_default(p); struct config *c = lp_allocz(l, sizeof(struct config)); /* Duplication of name string in local linear pool */ @@ -96,13 +97,14 @@ config_alloc(const byte *name) char *ndup = lp_allocu(l, nlen); memcpy(ndup, name, nlen); + init_list(&c->tests); c->mrtdump_file = -1; /* Hack, this should be sysdep-specific */ c->pool = p; c->mem = l; c->file_name = ndup; - c->load_time = now; - c->tf_route = c->tf_proto = (struct timeformat){"%T", "%F", 20*3600}; - c->tf_base = c->tf_log = (struct timeformat){"%F %T", NULL, 0}; + c->load_time = current_time(); + c->tf_route = c->tf_proto = TM_ISO_SHORT_MS; + c->tf_base = c->tf_log = TM_ISO_LONG_MS; c->gr_wait = DEFAULT_GR_WAIT; return c; @@ -135,15 +137,16 @@ config_parse(struct config *c) sysdep_preconfig(c); protos_preconfig(c); rt_preconfig(c); - roa_preconfig(c); cf_parse(); - protos_postconfig(c); + if (EMPTY_LIST(c->protos)) cf_error("No protocol is specified in the config file"); -#ifdef IPV6 + + /* if (!c->router_id) - cf_error("Router ID must be configured manually on IPv6 routers"); -#endif + cf_error("Router ID must be configured manually"); + */ + done = 1; cleanup: @@ -216,11 +219,6 @@ global_commit(struct config *new, struct config *old) if (!old) return 0; - if (!ipa_equal(old->listen_bgp_addr, new->listen_bgp_addr) || - (old->listen_bgp_port != new->listen_bgp_port) || - (old->listen_bgp_flags != new->listen_bgp_flags)) - log(L_WARN "Reconfiguration of BGP listening socket not implemented, please restart BIRD."); - if (!new->router_id) { new->router_id = old->router_id; @@ -266,7 +264,6 @@ config_do_commit(struct config *c, int type) force_restart |= global_commit(c, old_config); DBG("rt_commit\n"); rt_commit(c, old_config); - roa_commit(c, old_config); DBG("protos_commit\n"); protos_commit(c, old_config, force_restart, type); @@ -305,7 +302,7 @@ config_done(void *unused UNUSED) * config_commit - commit a configuration * @c: new configuration * @type: type of reconfiguration (RECONFIG_SOFT or RECONFIG_HARD) - * @timeout: timeout for undo (or 0 for no timeout) + * @timeout: timeout for undo (in seconds; or 0 for no timeout) * * When a configuration is parsed and prepared for use, the * config_commit() function starts the process of reconfiguration. @@ -329,7 +326,7 @@ config_done(void *unused UNUSED) * are accepted. */ int -config_commit(struct config *c, int type, int timeout) +config_commit(struct config *c, int type, uint timeout) { if (shutting_down) { @@ -338,8 +335,8 @@ config_commit(struct config *c, int type, int timeout) } undo_available = 1; - if (timeout > 0) - tm_start(config_timer, timeout); + if (timeout) + tm_start(config_timer, timeout S); else tm_stop(config_timer); @@ -450,7 +447,7 @@ config_undo(void) extern void cmd_reconfig_undo_notify(void); static void -config_timeout(struct timer *t UNUSED) +config_timeout(timer *t UNUSED) { log(L_INFO "Config timeout expired, starting undo"); cmd_reconfig_undo_notify(); @@ -504,7 +501,7 @@ order_shutdown(void) * error in the configuration. */ void -cf_error(char *msg, ...) +cf_error(const char *msg, ...) { char buf[1024]; va_list args; diff --git a/conf/conf.h b/conf/conf.h index bf74b76b..f174d352 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -9,9 +9,11 @@ #ifndef _BIRD_CONF_H_ #define _BIRD_CONF_H_ +#include "sysdep/config.h" +#include "lib/ip.h" +#include "lib/hash.h" #include "lib/resource.h" #include "lib/timer.h" -#include "lib/hash.h" /* Configuration structure */ @@ -21,25 +23,22 @@ struct config { linpool *mem; /* Linear pool containing configuration data */ list protos; /* Configured protocol instances (struct proto_config) */ list tables; /* Configured routing tables (struct rtable_config) */ - list roa_tables; /* Configured ROA tables (struct roa_table_config) */ list logfiles; /* Configured log files (sysdep) */ + list tests; /* Configured unit tests (f_bt_test_suite) */ int mrtdump_file; /* Configured MRTDump file (sysdep, fd in unix) */ char *syslog_name; /* Name used for syslog (NULL -> no syslog) */ - struct rtable_config *master_rtc; /* Configuration of master routing table */ + struct rtable_config *def_tables[NET_MAX]; /* Default routing tables for each network */ struct iface_patt *router_id_from; /* Configured list of router ID iface patterns */ u32 router_id; /* Our Router ID */ - ip_addr listen_bgp_addr; /* Listening BGP socket should use this address */ - unsigned listen_bgp_port; /* Listening BGP socket should use this port (0 is default) */ - u32 listen_bgp_flags; /* Listening BGP socket should use these flags */ unsigned proto_default_debug; /* Default protocol debug mask */ unsigned proto_default_mrtdump; /* Default protocol mrtdump mask */ struct timeformat tf_route; /* Time format for 'show route' */ struct timeformat tf_proto; /* Time format for 'show protocol' */ struct timeformat tf_log; /* Time format for the logfile */ struct timeformat tf_base; /* Time format for other purposes */ - u32 gr_wait; /* Graceful restart wait timeout */ + u32 gr_wait; /* Graceful restart wait timeout (sec) */ int cli_debug; /* Tracing of CLI connections and commands */ int latency_debug; /* I/O loop tracks duration of each event */ @@ -55,22 +54,22 @@ struct config { struct config *fallback; /* Link to regular config for CLI parsing */ int obstacle_count; /* Number of items blocking freeing of this config */ int shutdown; /* This is a pseudo-config for daemon shutdown */ - bird_clock_t load_time; /* When we've got this configuration */ + btime load_time; /* When we've got this configuration */ }; /* Please don't use these variables in protocols. Use proto_config->global instead. */ extern struct config *config; /* Currently active configuration */ extern struct config *new_config; /* Configuration being parsed */ -struct config *config_alloc(const byte *name); +struct config *config_alloc(const char *name); int config_parse(struct config *); int cli_parse(struct config *); void config_free(struct config *); -int config_commit(struct config *, int type, int timeout); +int config_commit(struct config *, int type, uint timeout); int config_confirm(void); int config_undo(void); void config_init(void); -void cf_error(char *msg, ...) NORET; +void cf_error(const char *msg, ...) NORET; void config_add_obstacle(struct config *); void config_del_obstacle(struct config *); void order_shutdown(void); @@ -128,7 +127,6 @@ struct sym_scope { #define SYM_FUNCTION 3 #define SYM_FILTER 4 #define SYM_TABLE 5 -#define SYM_ROA 6 #define SYM_VARIABLE 0x100 /* 0x100-0x1ff are variable types */ #define SYM_CONSTANT 0x200 /* 0x200-0x2ff are variable types */ @@ -169,6 +167,7 @@ static inline int cf_symbol_is_constant(struct symbol *sym) /* Parser */ +extern char *cf_text; int cf_parse(void); /* Sysdep hooks */ diff --git a/conf/confbase.Y b/conf/confbase.Y index b8deed54..72f56f1e 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -27,21 +27,27 @@ CF_HDR CF_DEFINES static void -check_u16(unsigned val) +check_u16(uint val) { if (val > 0xFFFF) - cf_error("Value %d out of range (0-65535)", val); + cf_error("Value %u out of range (0-65535)", val); } CF_DECLS %union { - int i; + uint i; u32 i32; + u64 i64; ip_addr a; + ip4_addr ip4; + ip6_addr ip6; + net_addr net; + net_addr *net_ptr; struct symbol *s; char *t; struct rtable_config *r; + struct channel_config *cc; struct f_inst *x; struct f_dynamic_attr fda; struct f_static_attr fsa; @@ -52,35 +58,37 @@ CF_DECLS struct f_path_mask *h; struct password_item *p; struct rt_show_data *ra; - struct roa_show_data *ro; struct sym_show_data *sd; struct lsadb_show_data *ld; struct iface *iface; - struct roa_table *rot; void *g; - bird_clock_t time; - struct prefix px; + btime time; + struct f_prefix px; struct proto_spec ps; + struct channel_limit cl; struct timeformat *tf; + mpls_label_stack *mls; } %token END CLI_MARKER INVALID_TOKEN ELSECOL DDOT %token GEQ LEQ NEQ AND OR %token PO PC %token <i> NUM ENUM -%token <i32> RTRID -%token <a> IPA +%token <ip4> IP4 +%token <ip6> IP6 +%token <i64> VPN_RD %token <s> SYM %token <t> TEXT %type <iface> ipa_scope -%type <i> expr bool pxlen -%type <i32> expr_us -%type <time> datetime +%type <i> expr bool pxlen4 +%type <time> expr_us time %type <a> ipa -%type <px> prefix prefix_or_ipa +%type <net> net_ip4_ net_ip6_ net_ip6 net_ip_ net_ip net_or_ipa +%type <net_ptr> net_ net_any net_vpn4_ net_vpn6_ net_vpn_ net_roa4_ net_roa6_ net_roa_ net_ip6_sadr_ net_mpls_ +%type <mls> label_stack_start label_stack + %type <t> text opttext -%type <t> text_or_none %nonassoc PREFIX_DUMMY %left AND OR @@ -90,7 +98,7 @@ CF_DECLS %left '!' %nonassoc '.' -CF_KEYWORDS(DEFINE, ON, OFF, YES, NO, S, MS, US, PORT) +CF_KEYWORDS(DEFINE, ON, OFF, YES, NO, S, MS, US, PORT, VPN, MPLS, FROM) CF_GRAMMAR @@ -130,13 +138,11 @@ expr: expr_us: - expr S { $$ = (u32) $1 * 1000000; } - | expr MS { $$ = (u32) $1 * 1000; } - | expr US { $$ = (u32) $1 * 1; } + expr S { $$ = $1 S_; } + | expr MS { $$ = $1 MS_; } + | expr US { $$ = $1 US_; } ; -/* expr_u16: expr { check_u16($1); $$ = $1; }; */ - /* Switches */ bool: @@ -148,13 +154,15 @@ bool: | /* Silence means agreement */ { $$ = 1; } ; -/* Addresses, prefixes and netmasks */ + +/* Addresses */ ipa: - IPA + IP4 { $$ = ipa_from_ip4($1); } + | IP6 { $$ = ipa_from_ip6($1); } | SYM { if ($1->class != (SYM_CONSTANT | T_IP)) cf_error("IP address expected"); - $$ = SYM_VAL($1).px.ip; + $$ = SYM_VAL($1).ip; } ; @@ -163,34 +171,172 @@ ipa_scope: | '%' SYM { $$ = if_get_by_name($2->name); } ; -prefix: - ipa pxlen { - if (!ip_is_prefix($1, $2)) cf_error("Invalid prefix"); - $$.addr = $1; $$.len = $2; + +/* Networks - internal */ + +pxlen4: + '/' NUM { + if ($2 > IP4_MAX_PREFIX_LENGTH) cf_error("Invalid prefix length %u", $2); + $$ = $2; + } + ; + +net_ip4_: IP4 pxlen4 +{ + net_fill_ip4(&($$), $1, $2); + + net_addr_ip4 *n = (void *) &($$); + if (!net_validate_ip4(n)) + cf_error("Invalid IPv4 prefix %I4/%d, maybe you wanted %I4/%d", + n->prefix, n->pxlen, ip4_and(n->prefix, ip4_mkmask(n->pxlen)), n->pxlen); +}; + +net_ip6_: IP6 '/' NUM +{ + if ($3 > IP6_MAX_PREFIX_LENGTH) + cf_error("Invalid prefix length %u", $3); + + net_fill_ip6(&($$), $1, $3); + + net_addr_ip6 *n = (void *) &($$); + if (!net_validate_ip6(n)) + cf_error("Invalid IPv6 prefix %I6/%d, maybe you wanted %I6/%d", + n->prefix, n->pxlen, ip6_and(n->prefix, ip6_mkmask(n->pxlen)), n->pxlen); +}; + +net_ip6_sadr_: IP6 '/' NUM FROM IP6 '/' NUM +{ + if ($3 > IP6_MAX_PREFIX_LENGTH) + cf_error("Invalid prefix length %u", $3); + + if ($7 > IP6_MAX_PREFIX_LENGTH) + cf_error("Invalid prefix length %u", $7); + + $$ = cfg_alloc(sizeof(net_addr_ip6_sadr)); + net_fill_ip6_sadr($$, $1, $3, $5, $7); + + net_addr_ip6_sadr *n = (void *) $$; + if (!net_validate_ip6_sadr(n)) + cf_error("Invalid SADR IPv6 prefix %I6/%d from %I6/%d, maybe you wanted %I6/%d from %I6/%d", + n->dst_prefix, n->dst_pxlen, n->src_prefix, n->src_pxlen, + ip6_and(n->dst_prefix, ip6_mkmask(n->dst_pxlen)), n->dst_pxlen, + ip6_and(n->src_prefix, ip6_mkmask(n->src_pxlen)), n->src_pxlen); +}; + +net_vpn4_: VPN_RD net_ip4_ +{ + $$ = cfg_alloc(sizeof(net_addr_vpn4)); + net_fill_vpn4($$, net4_prefix(&$2), net4_pxlen(&$2), $1); +} + +net_vpn6_: VPN_RD net_ip6_ +{ + $$ = cfg_alloc(sizeof(net_addr_vpn6)); + net_fill_vpn6($$, net6_prefix(&$2), net6_pxlen(&$2), $1); +} + +net_roa4_: net_ip4_ MAX NUM AS NUM +{ + $$ = cfg_alloc(sizeof(net_addr_roa4)); + net_fill_roa4($$, net4_prefix(&$1), net4_pxlen(&$1), $3, $5); + if ($3 < net4_pxlen(&$1) || $3 > IP4_MAX_PREFIX_LENGTH) + cf_error("Invalid max prefix length %u", $3); +}; + +net_roa6_: net_ip6_ MAX NUM AS NUM +{ + $$ = cfg_alloc(sizeof(net_addr_roa6)); + net_fill_roa6($$, net6_prefix(&$1), net6_pxlen(&$1), $3, $5); + if ($3 < net6_pxlen(&$1) || $3 > IP6_MAX_PREFIX_LENGTH) + cf_error("Invalid max prefix length %u", $3); +}; + +net_mpls_: MPLS NUM +{ + $$ = cfg_alloc(sizeof(net_addr_roa6)); + net_fill_mpls($$, $2); +} + +net_ip_: net_ip4_ | net_ip6_ ; +net_vpn_: net_vpn4_ | net_vpn6_ ; +net_roa_: net_roa4_ | net_roa6_ ; + +net_: + net_ip_ { $$ = cfg_alloc($1.length); net_copy($$, &($1)); } + | net_vpn_ + | net_roa_ + | net_flow_ + | net_ip6_sadr_ + | net_mpls_ + ; + + +/* Networks - regular */ + +net_ip6: + net_ip6_ + | SYM { + if (($1->class != (SYM_CONSTANT | T_NET)) || (SYM_VAL($1).net->type != NET_IP6)) + cf_error("IPv6 network expected"); + $$ = * SYM_VAL($1).net; } ; -prefix_or_ipa: - prefix - | ipa { $$.addr = $1; $$.len = BITS_PER_IP_ADDRESS; } +net_ip: + net_ip_ + | SYM { + if (($1->class != (SYM_CONSTANT | T_NET)) || !net_is_ip(SYM_VAL($1).net)) + cf_error("IP network expected"); + $$ = * SYM_VAL($1).net; + } ; -pxlen: - '/' expr { - if ($2 < 0 || $2 > BITS_PER_IP_ADDRESS) cf_error("Invalid prefix length %d", $2); - $$ = $2; +net_any: + net_ + | SYM { + if ($1->class != (SYM_CONSTANT | T_NET)) + cf_error("Network expected"); + $$ = (net_addr *) SYM_VAL($1).net; /* Avoid const warning */ } - | ':' ipa { - $$ = ipa_masklen($2); - if ($$ < 0) cf_error("Invalid netmask %I", $2); + ; + +net_or_ipa: + net_ip4_ + | net_ip6_ + | IP4 { net_fill_ip4(&($$), $1, IP4_MAX_PREFIX_LENGTH); } + | IP6 { net_fill_ip6(&($$), $1, IP6_MAX_PREFIX_LENGTH); } + | SYM { + if ($1->class == (SYM_CONSTANT | T_IP)) + net_fill_ip_host(&($$), SYM_VAL($1).ip); + else if (($1->class == (SYM_CONSTANT | T_NET)) && net_is_ip(SYM_VAL($1).net)) + $$ = * SYM_VAL($1).net; + else + cf_error("IP address or network expected"); } ; -datetime: +label_stack_start: NUM +{ + $$ = cfg_allocz(sizeof(mpls_label_stack)); + $$->len = 1; + $$->stack[0] = $1; +}; + +label_stack: + label_stack_start + | label_stack '/' NUM { + if ($1->len >= MPLS_MAX_LABEL_STACK) + cf_error("Too many labels in stack"); + $1->stack[$1->len++] = $3; + $$ = $1; + } +; + +time: TEXT { - $$ = tm_parse_datetime($1); + $$ = tm_parse_time($1); if (!$$) - cf_error("Invalid date and time"); + cf_error("Invalid date/time"); } ; @@ -207,10 +353,6 @@ opttext: | /* empty */ { $$ = NULL; } ; -text_or_none: - TEXT { $$ = $1; } - | { $$ = NULL; } - ; CF_CODE diff --git a/conf/flowspec.Y b/conf/flowspec.Y new file mode 100644 index 00000000..4d259763 --- /dev/null +++ b/conf/flowspec.Y @@ -0,0 +1,209 @@ +/* + * BIRD -- Flow specification (RFC 5575) grammar + * + * (c) 2016 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +CF_HDR + +#define PARSER 1 + +#include "nest/bird.h" +#include "lib/flowspec.h" + + +CF_DEFINES + +struct flow_builder *this_flow; + + +CF_DECLS + +%type <i32> flow_num_op flow_srcdst flow_logic_op flow_num_type_ flow_frag_val flow_neg +%type <net_ptr> net_flow4_ net_flow6_ net_flow_ + +CF_KEYWORDS(FLOW4, FLOW6, DST, SRC, PROTO, NEXT, HEADER, DPORT, SPORT, ICMP, + TYPE, CODE, TCP, FLAGS, LENGTH, DSCP, DONT_FRAGMENT, IS_FRAGMENT, + FIRST_FRAGMENT, LAST_FRAGMENT, FRAGMENT, LABEL, OFFSET) + + +CF_GRAMMAR + +/* Network Flow Specification */ + +flow_num_op: + TRUE { $$ = FLOW_OP_TRUE; } + | '=' { $$ = FLOW_OP_EQ; } + | NEQ { $$ = FLOW_OP_NEQ; } + | '<' { $$ = FLOW_OP_LT; } + | LEQ { $$ = FLOW_OP_LEQ; } + | '>' { $$ = FLOW_OP_GT; } + | GEQ { $$ = FLOW_OP_GEQ; } + | FALSE { $$ = FLOW_OP_FALSE; } + ; + +flow_logic_op: + OR { $$ = FLOW_OP_OR; } + | AND { $$ = FLOW_OP_AND; } + ; + +flow_num_type_: + PROTO { $$ = FLOW_TYPE_IP_PROTOCOL; } + | NEXT HEADER { $$ = FLOW_TYPE_NEXT_HEADER; } + | PORT { $$ = FLOW_TYPE_PORT; } + | DPORT { $$ = FLOW_TYPE_DST_PORT; } + | SPORT { $$ = FLOW_TYPE_SRC_PORT; } + | ICMP TYPE { $$ = FLOW_TYPE_ICMP_TYPE; } + | ICMP CODE { $$ = FLOW_TYPE_ICMP_CODE; } + | LENGTH { $$ = FLOW_TYPE_PACKET_LENGTH; } + | DSCP { $$ = FLOW_TYPE_DSCP; } + ; + +flow_num_type: flow_num_type_{ flow_builder_set_type(this_flow, $1); }; +flow_flag_type: TCP FLAGS { flow_builder_set_type(this_flow, FLOW_TYPE_TCP_FLAGS); }; +flow_frag_type: FRAGMENT { flow_builder_set_type(this_flow, FLOW_TYPE_FRAGMENT); }; +flow_label_type: LABEL { flow_builder_set_type(this_flow, FLOW_TYPE_LABEL); }; + +flow_srcdst: + DST { $$ = FLOW_TYPE_DST_PREFIX; } + | SRC { $$ = FLOW_TYPE_SRC_PREFIX; } + ; + +flow_num_opts: + flow_num_op expr { + flow_check_cf_value_length(this_flow, $2); + flow_builder_add_op_val(this_flow, $1, $2); + } + | flow_num_opts flow_logic_op flow_num_op expr { + flow_check_cf_value_length(this_flow, $4); + flow_builder_add_op_val(this_flow, $2 | $3, $4); + } + | flow_num_opt_ext + | flow_num_opts OR flow_num_opt_ext + ; + +flow_num_opt_ext_expr: + expr { + flow_check_cf_value_length(this_flow, $1); + flow_builder_add_op_val(this_flow, FLOW_OP_EQ, $1); + } + | expr DDOT expr { + flow_check_cf_value_length(this_flow, $1); + flow_check_cf_value_length(this_flow, $3); + flow_builder_add_op_val(this_flow, FLOW_OP_GEQ, $1); + flow_builder_add_op_val(this_flow, FLOW_OP_AND | FLOW_OP_LEQ, $3); + } + ; + +flow_num_opt_ext: + flow_num_opt_ext_expr + | flow_num_opt_ext ',' flow_num_opt_ext_expr + ; + +flow_bmk_opts: + flow_neg expr '/' expr { + flow_check_cf_bmk_values(this_flow, $1, $2, $4); + flow_builder_add_val_mask(this_flow, $1, $2, $4); + } + | flow_bmk_opts flow_logic_op flow_neg expr '/' expr { + flow_check_cf_bmk_values(this_flow, $3, $4, $6); + flow_builder_add_val_mask(this_flow, $2 | $3, $4, $6); + } + | flow_bmk_opts ',' flow_neg expr '/' expr { + flow_check_cf_bmk_values(this_flow, $3, $4, $6); + flow_builder_add_val_mask(this_flow, 0x40 | $3, $4, $6); /* AND */ + } + ; + +flow_neg: + /* empty */ { $$ = 0x00; } + | '!' { $$ = 0x02; } + ; + +flow_frag_val: + DONT_FRAGMENT { $$ = 1; } + | IS_FRAGMENT { $$ = 2; } + | FIRST_FRAGMENT { $$ = 4; } + | LAST_FRAGMENT { $$ = 8; } + ; + +flow_frag_opts: + flow_neg flow_frag_val { + flow_builder_add_val_mask(this_flow, 0, ($1 ? 0 : $2), $2); + } + | flow_frag_opts flow_logic_op flow_neg flow_frag_val { + flow_builder_add_val_mask(this_flow, $2, ($3 ? 0 : $4), $4); + } + | flow_frag_opts ',' flow_neg flow_frag_val { + flow_builder_add_val_mask(this_flow, 0x40, ($3 ? 0 : $4), $4); /* AND */ + } + ; + +flow4_item: + flow_srcdst net_ip { + flow_builder_set_type(this_flow, $1); + flow_builder4_add_pfx(this_flow, (net_addr_ip4 *) &($2)); + } + | flow_num_type flow_num_opts + | flow_flag_type flow_bmk_opts + | flow_frag_type flow_frag_opts + ; + +flow6_item: + flow_srcdst net_ip6 { + flow_builder_set_type(this_flow, $1); + flow_builder6_add_pfx(this_flow, (net_addr_ip6 *) &($2), 0); + } + | flow_srcdst net_ip6 OFFSET NUM { + if ($4 > $2.pxlen) + cf_error("Prefix offset is higher than prefix length"); + flow_builder_set_type(this_flow, $1); + flow_builder6_add_pfx(this_flow, (net_addr_ip6 *) &($2), $4); + } + | flow_num_type flow_num_opts + | flow_flag_type flow_bmk_opts + | flow_frag_type flow_frag_opts + | flow_label_type flow_bmk_opts + ; + +flow4_opts: + /* empty */ + | flow4_opts flow4_item ';' + ; + +flow6_opts: + /* empty */ + | flow6_opts flow6_item ';' + ; + +flow_builder_init: +{ + if (this_flow == NULL) + this_flow = flow_builder_init(&root_pool); + else + flow_builder_clear(this_flow); +}; + +flow_builder_set_ipv4: { this_flow->ipv6 = 0; }; +flow_builder_set_ipv6: { this_flow->ipv6 = 1; }; + +net_flow4_: FLOW4 '{' flow_builder_init flow_builder_set_ipv4 flow4_opts '}' +{ + $$ = (net_addr *) flow_builder4_finalize(this_flow, cfg_mem); + flow4_validate_cf((net_addr_flow4 *) $$); +}; + +net_flow6_: FLOW6 '{' flow_builder_init flow_builder_set_ipv6 flow6_opts '}' +{ + $$ = (net_addr *) flow_builder6_finalize(this_flow, cfg_mem); + flow6_validate_cf((net_addr_flow6 *) $$); +}; + +net_flow_: net_flow4_ | net_flow6_ ; + + +CF_CODE + +CF_END diff --git a/configure.ac b/configure.ac index d135dc04..f4de8f93 100644 --- a/configure.ac +++ b/configure.ac @@ -18,12 +18,6 @@ AC_ARG_ENABLE([debug], [enable_debug=no] ) -AC_ARG_ENABLE([ipv6], - [AS_HELP_STRING([--enable-ipv6], [enable building of IPv6 version @<:@no@:>@])], - [], - [enable_ipv6=no] -) - AC_ARG_ENABLE([memcheck], [AS_HELP_STRING([--enable-memcheck], [check memory allocations when debugging @<:@yes@:>@])], [], @@ -36,17 +30,24 @@ AC_ARG_ENABLE([pthreads], [enable_pthreads=try] ) +AC_ARG_ENABLE([libssh], + [AS_HELP_STRING([--enable-libssh], [enable LibSSH support together with RPKI @<:@try@:>@])], + [], + [enable_libssh=try] +) + +AC_ARG_ENABLE([mpls-kernel], + [AS_HELP_STRING([--enable-mpls-kernel], [enable MPLS support in kernel protocol @<:@try@:>@])], + [], + [enable_mpls_kernel=try] +) + AC_ARG_WITH([protocols], [AS_HELP_STRING([--with-protocols=LIST], [include specified routing protocols @<:@all@:>@])], [], [with_protocols="all"] ) -AC_ARG_WITH([suffix], - [AS_HELP_STRING([--with-suffix=STRING], [use specified suffix for BIRD files @<:@6 for IPv6@:>@])], - [given_suffix="yes"] -) - AC_ARG_WITH([sysconfig], [AS_HELP_STRING([--with-sysconfig=FILE], [use specified BIRD system configuration file])], [] @@ -71,48 +72,25 @@ AC_ARG_VAR([M4], [location of the M4 program]) if test "$srcdir" = . ; then # Building in current directory => create obj directory holding all objects objdir=obj - mkdir -p obj - srcdir_rel=.. - makefiles="Makefile:tools/Makefile-top.in obj/Makefile:tools/Makefile.in obj/Rules:tools/Rules.in" - exedir=.. else # Building in separate directory objdir=. - srcdir_rel=$srcdir - makefiles="Makefile:tools/Makefile.in Rules:tools/Rules.in" - exedir=. fi -case $srcdir_rel in - /*) srcdir_rel_mf=$srcdir_rel ;; - *) srcdir_rel_mf="\$(root-rel)$srcdir_rel" ;; -esac +exedir=. AC_SUBST([objdir]) AC_SUBST([exedir]) -AC_SUBST([srcdir_rel_mf]) +AC_SUBST([srcdir]) AC_SUBST([runtimedir]) -if test "$enable_ipv6" = yes ; then - ip=ipv6 - SUFFIX=6 - proto_radv=radv -else - ip=ipv4 - SUFFIX="" -fi - -if test "$given_suffix" = yes ; then - SUFFIX="$with_suffix" -fi -AC_SUBST([SUFFIX]) if test "$enable_debug" = yes ; then - CONFIG_FILE="bird$SUFFIX.conf" - CONTROL_SOCKET="bird$SUFFIX.ctl" + CONFIG_FILE="bird.conf" + CONTROL_SOCKET="bird.ctl" else - CONFIG_FILE="\$(sysconfdir)/bird$SUFFIX.conf" - CONTROL_SOCKET="$runtimedir/bird$SUFFIX.ctl" + CONFIG_FILE="\$(sysconfdir)/bird.conf" + CONTROL_SOCKET="$runtimedir/bird.ctl" fi AC_SUBST([CONFIG_FILE]) AC_SUBST([CONTROL_SOCKET]) @@ -130,6 +108,7 @@ if test "$ac_test_CFLAGS" != set ; then fi AC_PROG_CC +AC_PROG_CC_C99 if test -z "$GCC" ; then AC_MSG_ERROR([This program requires the GNU C Compiler.]) fi @@ -195,54 +174,35 @@ if test -n "$with_sysconfig" -a "$with_sysconfig" != no ; then elif test -f sysconfig.h ; then sysdesc=sysconfig else - case "$ip:$host_os" in - ipv6:linux*) - sysdesc=linux-v6 - default_iproutedir="/etc/iproute2" - ;; - ipv4:linux*) + case "$host_os" in + linux*) sysdesc=linux default_iproutedir="/etc/iproute2" ;; - ipv6:netbsd*) - sysdesc=bsd-v6 - CPPFLAGS="$CPPFLAGS -I/usr/pkg/include" - LDFLAGS="$LDFLAGS -L/usr/pkg/lib -R/usr/pkg/lib" - ;; - ipv4:netbsd*) + freebsd*) sysdesc=bsd - CPPFLAGS="$CPPFLAGS -I/usr/pkg/include" - LDFLAGS="$LDFLAGS -L/usr/pkg/lib -R/usr/pkg/lib" - ;; - ipv6:freebsd*) - sysdesc=bsd-v6 + CPPFLAGS="$CPPFLAGS -I/usr/local/include" + LDFLAGS="$LDFLAGS -L/usr/local/lib" ;; - ipv4:freebsd*) + kfreebsd*) sysdesc=bsd ;; - ipv6:dragonfly*) - sysdesc=bsd-v6 - ;; - ipv4:dragonfly*) + netbsd*) sysdesc=bsd + CPPFLAGS="$CPPFLAGS -I/usr/pkg/include" + LDFLAGS="$LDFLAGS -L/usr/pkg/lib -R/usr/pkg/lib" ;; - ipv6:kfreebsd*) - sysdesc=bsd-v6 - ;; - ipv4:kfreebsd*) + openbsd*) sysdesc=bsd ;; - ipv6:openbsd*) - sysdesc=bsd-v6 - ;; - ipv4:openbsd*) + dragonfly*) sysdesc=bsd ;; *) AC_MSG_ERROR([Cannot determine correct system configuration. Please use --with-sysconfig to set it manually.]) ;; - esac - sysdesc=$srcdir/sysdep/cf/$sysdesc.h + esac + sysdesc=$srcdir/sysdep/cf/$sysdesc.h fi AC_MSG_CHECKING([which OS configuration should we use]) AC_MSG_RESULT([$sysdesc]) @@ -253,7 +213,7 @@ sysname=`echo $sysdesc | sed 's/\.h$//'` AC_DEFINE_UNQUOTED([SYSCONF_INCLUDE], ["$sysdesc"], [Which sysdep header to include]) AC_MSG_CHECKING([system-dependent directories]) -sysdep_dirs="`sed <$sysdesc '/^Link: /!d;s/^Link: \(.*\)$/\1/' | tr '\012' ' '` lib" +sysdep_dirs="`sed <$sysdesc '/^Link: /!d;s/^Link: \(.*\)$/\1/' | tr '\012' ' '`" AC_MSG_RESULT([$sysdep_dirs]) AC_SUBST([sysdep_dirs]) @@ -266,10 +226,43 @@ fi AC_SUBST([iproutedir]) -all_protocols="$proto_bfd bgp ospf pipe $proto_radv rip static" -if test "$ip" = ipv6 ; then - all_protocols="$all_protocols babel" +DAEMON_LIBS= +AC_SUBST(DAEMON_LIBS) + +if test "$enable_libssh" != no ; then + AC_CHECK_HEADER([libssh/libssh.h], [true], [fail=yes], [ ]) + AC_CHECK_LIB([ssh], [ssh_connect], [true], [fail=yes]) + + if test "$fail" != yes ; then + AC_DEFINE([HAVE_LIBSSH], [1], [Define to 1 if you have the `ssh' library (-lssh).]) + DAEMON_LIBS="-lssh $DAEMON_LIBS" + proto_rpki=rpki + enable_libssh=yes + else + if test "$enable_libssh" = yes ; then + AC_MSG_ERROR([LibSSH not available.]) + else + enable_libssh=no + fi + fi fi + +if test "$enable_mpls_kernel" != no ; then + BIRD_CHECK_MPLS_KERNEL + + if test "$bird_cv_mpls_kernel" = yes ; then + AC_DEFINE([HAVE_MPLS_KERNEL], [1], [Define to 1 if kernel is MPLS capable]) + elif test "$enable_mpls_kernel" = yes ; then + AC_MSG_ERROR([Kernel MPLS support not found.]) + fi + + if test "$enable_mpls_kernel" = try ; then + enable_mpls_kernel="$bird_cv_mpls_kernel" + fi +fi + +all_protocols="$proto_bfd babel bgp ospf pipe radv rip $proto_rpki static" + all_protocols=`echo $all_protocols | sed 's/ /,/g'` if test "$with_protocols" = all ; then @@ -283,6 +276,7 @@ AH_TEMPLATE([CONFIG_OSPF], [OSPF protocol]) AH_TEMPLATE([CONFIG_PIPE], [Pipe protocol]) AH_TEMPLATE([CONFIG_RADV], [RAdv protocol]) AH_TEMPLATE([CONFIG_RIP], [RIP protocol]) +AH_TEMPLATE([CONFIG_RPKI], [RPKI protocol]) AH_TEMPLATE([CONFIG_STATIC], [Static protocol]) AC_MSG_CHECKING([protocols]) @@ -299,7 +293,7 @@ AC_MSG_RESULT([ok]) AC_SUBST([protocols]) case $sysdesc in - */linux*|*/linux-v6*) + */linux*) AC_CHECK_HEADER([linux/rtnetlink.h], [], [AC_MSG_ERROR([Appropriate version of Linux kernel headers not found.])], @@ -323,6 +317,19 @@ AC_C_BIGENDIAN( if test "$enable_debug" = yes ; then AC_DEFINE([DEBUGGING], [1], [Define to 1 if debugging is enabled]) + LDFLAGS="$LDFLAGS -rdynamic" + CFLAGS="$CFLAGS -O0 -ggdb -g3 -gdwarf-4" + + AC_CHECK_HEADER([execinfo.h], + [ + AC_DEFINE([HAVE_EXECINFO_H], [1], [Define to 1 if you have the <execinfo.h> header file.]) + AC_SEARCH_LIBS([backtrace], [execinfo], + [], + [AC_MSG_ERROR([Function backtrace not available.])] + ) + ] + ) + if test "$enable_memcheck" = yes ; then AC_CHECK_LIB([dmalloc], [dmalloc_debug]) if test $ac_cv_lib_dmalloc_dmalloc_debug != yes ; then @@ -331,10 +338,10 @@ if test "$enable_debug" = yes ; then fi fi -CLIENT= +CLIENT=birdcl CLIENT_LIBS= if test "$enable_client" = yes ; then - CLIENT=birdc + CLIENT="$CLIENT birdc" BASE_LIBS="$LIBS" LIBS="" @@ -381,20 +388,9 @@ AC_SUBST([CLIENT_LIBS]) mkdir -p $objdir/sysdep AC_CONFIG_HEADERS([$objdir/sysdep/autoconf.h:sysdep/autoconf.h.in]) -AC_CONFIG_COMMANDS([merge], - [ export CPP="$CPP"; $srcdir/tools/mergedirs $srcdir $srcdir_rel $objdir $sysdep_dirs ], - [ - srcdir=$srcdir - srcdir_rel=$srcdir_rel - objdir=$objdir - sysdep_dirs="$sysdep_dirs" - ] -) -AC_CONFIG_FILES([$makefiles]) +AC_CONFIG_FILES([Makefile:Makefile.in]) AC_OUTPUT -rm -f $objdir/sysdep/paths.h - AC_MSG_RESULT() AC_MSG_RESULT([BIRD was configured with the following options:]) AC_MSG_RESULT([ Source directory: $srcdir]) @@ -404,5 +400,7 @@ AC_MSG_RESULT([ System configuration: $sysdesc]) AC_MSG_RESULT([ Debugging: $enable_debug]) AC_MSG_RESULT([ POSIX threads: $enable_pthreads]) AC_MSG_RESULT([ Routing protocols: $protocols]) +AC_MSG_RESULT([ Kernel MPLS support: $enable_mpls_kernel]) AC_MSG_RESULT([ Client: $enable_client]) + rm -f $objdir/.*-stamp diff --git a/doc/LinuxDocTools.pm b/doc/LinuxDocTools.pm index 51d4a04c..39bb401d 100644 --- a/doc/LinuxDocTools.pm +++ b/doc/LinuxDocTools.pm @@ -372,6 +372,8 @@ sub process_file } } # + + local $ENV{PATH} = "$ENV{PATH}:/usr/lib/linuxdoc-tools"; my($precmd) = "|sgmlpre output=$global->{format} $global->{define}"; # diff --git a/doc/Makefile b/doc/Makefile index 70b73943..f36642be 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,50 +1,49 @@ -root-rel=../ -dir-name=doc - -ifneq ($(wildcard ../Rules),) -include ../Rules -else -srcdir=$(shell cd $(root-rel) ; pwd) -srcdir_abs=$(srcdir) -endif - # Force rebuilds -.PHONY: prog.sgml bird.sgml +.PHONY: progspell docs progdocs userdocs + +doc-srcdir := $(shell cd $(s) && pwd) +sgml2 := $(doc-srcdir)/sgml2 docs: progdocs userdocs -progdocs: prog.html prog.ps -userdocs: bird.html bird.pdf -prog.sgml: - $(srcdir)/tools/progdoc $(srcdir_abs) +doc-fmt = $(1): $(o)prog.$(1) $(o)bird.$(1) +$(call doc-fmt,html) +$(call doc-fmt,dvi) +$(call doc-fmt,ps) +$(call doc-fmt,pdf) -%.html: %.sgml - ./sgml2html $< +progdocs: $(o)prog.html $(o)prog.pdf +userdocs: $(o)bird.html $(o)bird.pdf +progspell: $(o)prog.spell -%.dvi: %.tex - latex $< - latex $< +$(o)prog.sgml: $(srcdir)/tools/progdoc $(objdir)/.dir-stamp + $(srcdir)/tools/progdoc $(srcdir) $@ -%.ps: %.dvi - dvips -D600 -ta4 -o $@ $< +$(o)%.sgml: $(s)%.sgml $(objdir)/.dir-stamp + cp $< $@ -%.pdf: %.tex - pdflatex $< - pdflatex $< +$(o)%.html: $(o)%.sgml + cd $(dir $@) && $(sgml2)html $(notdir $<) -%.tex: %.sgml - ./sgml2latex --output=tex $< +$(o)%.tex: $(o)%.sgml + cd $(dir $@) && $(sgml2)latex --output=tex $(notdir $<) + +$(o)%.dvi: $(o)%.tex + cd $(dir $@) && TEXINPUTS=$(TEXINPUTS):$(doc-srcdir)/tex latex $(notdir $<) + cd $(dir $@) && TEXINPUTS=$(TEXINPUTS):$(doc-srcdir)/tex latex $(notdir $<) + +$(o)%.ps: $(o)%.dvi + dvips -D600 -ta4 -o $@ $< -%.txt: %.sgml - ./sgml2txt $< +$(o)%.pdf: $(o)%.tex + TEXINPUTS=$(TEXINPUTS):$(doc-srcdir)/tex pdflatex -output-directory=$(dir $@) $< + TEXINPUTS=$(TEXINPUTS):$(doc-srcdir)/tex pdflatex -output-directory=$(dir $@) $< -progspell: prog.sgml - sed -f prog-spell.sed <prog.sgml >prog.spell - ispell prog.spell +$(o)%.txt: $(o)%.sgml + cd $(dir $@) && $(sgml2)txt $(notdir $<) -clean: - rm -f *.tex *.dvi *.log *.txt *.aux *.toc *.spell - rm -f prog.sgml +$(o)prog.spell: $(o)prog.sgml $(s)prog-spell.sed + sed -f $(lastword $^) <$< >$@ + ispell $@ -distclean: clean - rm -f *.html *.ps +$(call clean,prog.spell $(addprefix *.,html dvi ps pdf sgml tex txt aux log toc)) diff --git a/doc/bird.conf.example b/doc/bird.conf.example index bbfe0020..62c65ce9 100644 --- a/doc/bird.conf.example +++ b/doc/bird.conf.example @@ -1,5 +1,6 @@ /* - * This is an example configuration file. + * This is an example configuration file + * (for version 1.x.x, obsolete) */ # Yes, even shell-like comments work... diff --git a/doc/bird.conf.example2 b/doc/bird.conf.example2 new file mode 100644 index 00000000..815651c7 --- /dev/null +++ b/doc/bird.conf.example2 @@ -0,0 +1,332 @@ +/* + * This is an example configuration file for MB-BGP setting + */ + + +log "bird.log" all; +# debug protocols all; + +router id 192.168.1.1; + +ipv4 table master4; +ipv6 table master6; + +ipv4 table mcast4; +ipv6 table mcast6; + +ipv4 table mtab4; +ipv6 table mtab6; + +vpn4 table vpntab4; +vpn6 table vpntab6; + +vpn4 table vpn4mc; +vpn6 table vpn6mc; + +flow4 table flowtab4; +flow6 table flowtab6; + + +protocol device { +} + +protocol kernel kernel4 { + ipv4 { + export all; + }; +} + +protocol kernel kernel6 { + ipv6 { + export all; + }; +} + + +protocol static static4 { + ipv4; + + route 10.10.0.0/24 via 192.168.1.2; + route 10.10.1.0/24 via 192.168.1.2 { bgp_large_community.add((10,20,30)); bgp_large_community.add((10,(20*3),10)); }; +} + +protocol static static6 { + ipv6; + + route 2001:db8:10:10::/64 via 2001:db8:1:1::10; + route 2001:db8:10:11::/64 via 2001:db8:1:1::10; + + route 2001:db8:1:1::/64 via fe80::ec9b:67ff:fe60:fd5d % ve1; +} + +# VPNv4 routes with MPLS labels +protocol static statvpn4 { + vpn4; + + route 10:10 10.20.0.0/24 via 192.168.1.2 mpls 210; + route 10:10 10.20.1.0/24 via 192.168.1.2 mpls 210; + route 10:20 10.20.0.0/24 via 192.168.1.2 mpls 220; + route 10:20 10.20.1.0/24 via 192.168.1.2 mpls 220; +} + +protocol static statvpn6 { + vpn6; + + route 10:10 2001:db8:20:10::/64 via 2001:db8:1:1::10 mpls 200/210; + route 10:10 2001:db8:20:11::/64 via 2001:db8:1:1::10 mpls 200/210; + route 10:20 2001:db8:20:10::/64 via 2001:db8:1:1::10 mpls 200/220; + route 10:20 2001:db8:20:11::/64 via 2001:db8:1:1::10 mpls 200/220; +} + +# RFC 5575 flow specification +protocol static flowstat4 { + flow4; + + route flow4 { + dst 10.0.0.0/8; + proto = 23; + dport > 24 && < 30 || 40..50,60..70,80; + sport > 24 && < 30 || = 40 || 50,60..70,80; + icmp type 80; + icmp code 90; + tcp flags 0x03/0x0f; + length 2048..65535; + dscp = 63; + fragment dont_fragment, is_fragment || !first_fragment; + }; + + route flow4 { + dst 11.0.0.0/8; + proto = 0x12; + sport > 0x5678 && < 0x9abc || 0xdef0 || 0x1234,0x5678,0x9abc..0xdef0; + dport = 50; + tcp flags 0x000/0xf00; + }; + + route flow4 { + dst 12.0.0.0/32; + tcp flags ! 0/0x999; + }; + + route flow4 { + dst 220.0.254.0/24; + tcp flags 0x99/0x999; + }; + + route flow4 { + dst 220.0.254.192/28; + tcp flags ! 0xfff/0xfff; + }; + + route flow4 { + dst 15.0.0.0/8; + tcp flags ! 0x999/0x999; + }; +} + +protocol static flowstat6 { + flow6; + + route flow6 { + dst fec0:1122:3344:5566::1/128; + src 0000:0000:0000:0001:1234:5678:9800:0000/101 offset 63; + next header = 23; + sport 24..30, 42 || 50,60,70..80; + dport = 50; + tcp flags 0x03/0x0f, !0/0xff || 0x33/0x33; + fragment !is_fragment || !first_fragment; + label 0xaaaa/0xaaaa && 0x33/0x33; + }; + + route flow6 { + dst fec0:1122:3344:5566::1/128; + src ::1:1234:5678:9800:0/101 offset 63; + next header = 23; + dport = 50; + sport > 24 && < 30 || = 40 || = 50 || = 60 || >= 70 && <= 80; + tcp flags 0x3/0x3 && 0x0/0xc; + }; +} + + +protocol pipe { + table master4; + peer table mcast4; + import none; + export where source = RTS_OSPF; +} + +protocol pipe { + table master6; + peer table mcast6; + import none; + export where source = RTS_OSPF; +} + +protocol ospf v2 ospf4 { + ipv4 { + import all; +# export where source = RTS_STATIC; + }; + + area 0 { + interface "ve0" { stub; }; + interface "ve1" { hello 5; type ptp; }; + interface "ve2" { hello 5; type bcast; ttl security; }; + interface "ve3" { hello 5; type bcast; ttl security; }; + }; +} + + +protocol ospf v3 ospf6 { + ipv6 { + import all; +# export where source = RTS_STATIC; + }; + + area 0 { + interface "ve0" { stub; }; + interface "ve1" { hello 5; type ptp; }; + interface "ve2" { hello 5; type bcast; }; + }; +} + +protocol bgp { + local 192.168.11.1 as 1000; + neighbor 192.168.11.2 as 2000; +# local 192.168.1.1 as 1000; +# neighbor 192.168.2.1 as 2000; +# multihop; +# rr client; +# strict bind; +# debug all; + + # regular IPv4 unicast (1/1) + ipv4 { + # connects to master4 table by default + import all; + export where source ~ [ RTS_STATIC, RTS_BGP ]; + }; + + # regular IPv6 unicast (2/1) + ipv6 { + # connects to master6 table by default + import all; + export where source ~ [ RTS_STATIC, RTS_BGP ]; +# next hop address 2001:db8:1:1::1; + }; + + # IPv4 multicast topology (1/2) + ipv4 multicast { + # explicit IPv4 table + table mcast4; + import all; + export all; + }; + + # IPv6 multicast topology (2/2) + ipv6 multicast { + # explicit IPv6 table + table mcast6; + import all; + export all; +# next hop address 2001:db8:1:1::1; + }; + + # IPv4 with MPLS labels (1/4) + ipv4 mpls { + # explicit IPv4 table + table mtab4; + import all; + export all; + }; + + # IPv6 with MPLS labels (2/4) + ipv6 mpls { + # explicit IPv6 table + table mtab6; + import all; + export all; + # allows IPv4 next hops (6PE) + # extended next hop; + }; + + # VPNv4 with MPLS labels (1/128) + vpn4 mpls { + # connects to vpntab4 table by default + import all; + export all; + }; + + # VPNv6 with MPLS labels (2/128) + vpn6 mpls { + # connects to vpntab6 table by default + import all; + export all; + }; + + # VPNv4 multicast topology (1/129) + vpn4 multicast { + table vpn4mc; + import all; + export all; + }; + + # VPNv6 multicast topology (2/129) + vpn6 multicast { + table vpn6mc; + import all; + export all; + }; + + # IPv4 Flowspec (1/133) + flow4 { + # connects to flowtab4 table by default + import all; + export all; + }; + + # IPv6 Flowspec (2/133) + flow6 { + # connects to flowtab6 table by default + import all; + export all; + }; +} + +protocol bgp { + local 192.168.1.1 as 1000; + neighbor 192.168.3.1 as 1000; + multihop; + rr client; + + ipv4 { + import all; + export where source ~ [ RTS_STATIC, RTS_BGP ]; + }; + + ipv6 { + import all; + export where source ~ [ RTS_STATIC, RTS_BGP ]; + next hop address 2001:db8:1:1::1; + }; +} + +protocol bgp { + local 2001:db8:1:1::1 as 1000; + neighbor 2001:db8:4:1::1 as 1000; + multihop; + rr client; + + ipv4 { + import all; + export where source ~ [ RTS_STATIC, RTS_BGP ]; + next hop address 192.168.4.1; + }; + + ipv6 { + import all; + export where source ~ [ RTS_STATIC, RTS_BGP ]; + }; +} + diff --git a/doc/bird.sgml b/doc/bird.sgml index d9200f26..5ed816e8 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1,7 +1,7 @@ <!doctype birddoc system> <!-- - BIRD documentation + BIRD 2.0 documentation This documentation can have 4 forms: sgml (this is master copy), html, ASCII text and dvi/postscript (generated from sgml using sgmltools). You should always @@ -20,11 +20,12 @@ configuration - something in config which is not keyword. <book> -<title>BIRD User's Guide +<title>BIRD 2.0 User's Guide <author> Ondrej Filip <it/<feela@network.cz>/, Pavel Machek <it/<pavel@ucw.cz>/, Martin Mares <it/<mj@ucw.cz>/, +Jan Matejka <it/<mq@jmq.cz>/, Ondrej Zajicek <it/<santiago@crfreenet.org>/ </author> @@ -88,8 +89,9 @@ supports: <item>both IPv4 and IPv6 protocols <item>multiple routing tables <item>the Border Gateway Protocol (BGPv4) - <item>the Routing Information Protocol (RIPv2) + <item>the Routing Information Protocol (RIPv2, RIPng) <item>the Open Shortest Path First protocol (OSPFv2, OSPFv3) + <item>the Babel Routing Protocol <item>the Router Advertisements for IPv6 hosts <item>a virtual protocol for exchange of routes between different routing tables on a single host @@ -112,11 +114,10 @@ developed and tested under Linux 2.0 to 2.6, and then ported to FreeBSD, NetBSD and OpenBSD, porting to other systems (even non-UNIX ones) should be relatively easy due to its highly modular architecture. -<p>BIRD supports either IPv4 or IPv6 protocol, but have to be compiled separately -for each one. Therefore, a dualstack router would run two instances of BIRD (one -for IPv4 and one for IPv6), with completely separate setups (configuration -files, tools ...). - +<p>BIRD 1.x supported either IPv4 or IPv6 protocol, but had to be compiled separately +for each one. BIRD~2 supports both of them with a possibility of further extension. +BIRD~2 supports Linux at least 3.16, FreeBSD 10, NetBSD 7.0, and OpenBSD 5.8. +Anyway, it will probably work well also on older systems. <sect>Installing BIRD <label id="install"> @@ -133,8 +134,7 @@ and Perl, installing BIRD should be as easy as: </code> <p>You can use <tt>./configure --help</tt> to get a list of configure -options. The most important ones are: <tt/--enable-ipv6/ which enables building -of an IPv6 version of BIRD, <tt/--with-protocols=/ to produce a slightly smaller +options. The most important ones are: <tt/--with-protocols=/ to produce a slightly smaller BIRD executable by configuring out routing protocols you don't use, and <tt/--prefix=/ to install BIRD to a place different from <file>/usr/local</file>. @@ -217,47 +217,42 @@ is generally easy -- BIRD needs just the standard library, privileges to read the config file and create the control socket and the CAP_NET_* capabilities. -<chapt>About routing tables +<chapt>Architecture +<label id="architecture"> + +<sect>Routing tables <label id="routing-tables"> -<p>BIRD has one or more routing tables which may or may not be synchronized with -OS kernel and which may or may not be synchronized with each other (see the Pipe -protocol). Each routing table contains a list of known routes. Each route -consists of: +<p>The heart of BIRD is a routing table. BIRD has several independent routing tables; +each of them contains routes of exactly one <m/nettype/ (see below). There are two +default tables -- <cf/master4/ for IPv4 routes and <cf/master6/ for IPv6 routes. +Other tables must be explicitly configured. + +<p> +These routing tables are not kernel forwarding tables. No forwarding is done by +BIRD. If you want to forward packets using the routes in BIRD tables, you may +use the Kernel protocol (see below) to synchronize them with kernel FIBs. + +<p> +Every nettype defines a (kind of) primary key on routes. Every route source can +supply one route for every possible primary key; new route announcement replaces +the old route from the same source, keeping other routes intact. BIRD always +chooses the best route for each primary key among the known routes and keeps the +others as suboptimal. When the best route is retracted, BIRD re-runs the best +route selection algorithm to find the current best route. + +<p> +The global best route selection algorithm is (roughly) as follows: <itemize> - <item>network prefix this route is for (network address and prefix - length -- the number of bits forming the network part of the - address; also known as a netmask) - <item>preference of this route - <item>IP address of router which told us about this route - <item>IP address of router we should forward the packets to using this - route - <item>other attributes common to all routes - <item>dynamic attributes defined by protocols which may or may not be - present (typically protocol metrics) + <item>Preferences of the routes are compared. + <item>Source protocol instance preferences are compared. + <item>If source protocols are the same (e.g. BGP vs. BGP), the protocol's route selection algorithm is invoked. + <item>If source protocols are different (e.g. BGP vs. OSPF), result of the algorithm is undefined. </itemize> -Routing table maintains multiple entries for a network, but at most one entry -for one network and one protocol. The entry with the highest preference is used -for routing (we will call such an entry the <it/selected route/). If there are -more entries with the same preference and they are from the same protocol, the -protocol decides (typically according to metrics). If they aren't, an internal -ordering is used to break the tie. You can get the list of route attributes in -the Route attributes section. - -<p>Each protocol is connected to a routing table through two filters which can -accept, reject and modify the routes. An <it/export/ filter checks routes passed -from the routing table to the protocol, an <it/import/ filter checks routes in -the opposite direction. When the routing table gets a route from a protocol, it -recalculates the selected route and broadcasts it to all protocols connected to -the table. The protocols typically send the update to other routers in the -network. Note that although most protocols are interested in receiving just -selected routes, some protocols (e.g. the <cf/Pipe/ protocol) receive and -process all entries in routing tables (accepted by filters). - -<p><label id="dsc-table-sorted">Usually, a routing table just chooses a selected route -from a list of entries for one network. But if the <cf/sorted/ option is +<p><label id="dsc-table-sorted">Usually, a routing table just chooses a selected +route from a list of entries for one network. But if the <cf/sorted/ option is activated, these lists of entries are kept completely sorted (according to preference or some protocol-dependent metric). This is needed for some features of some protocols (e.g. <cf/secondary/ option of BGP protocol, which allows to @@ -268,6 +263,116 @@ selected route that cannot be described using comparison and ordering). Minor advantage is that routes are shown sorted in <cf/show route/, minor disadvantage is that it is slightly more computationally expensive. +<sect>Routes and network types +<label id="routes"> + +<p>BIRD works with several types of routes. Some of them are typical IP routes, +others are better described as forwarding rules. We call them all routes, +regardless of this difference. + +<p>Every route consists of several attributes (read more about them in the +<ref id="route-attributes" name="Route attributes"> section); the common for all +routes are: + +<itemize> + <item>IP address of router which told us about this route + <item>Source protocol instance + <item>Route preference + <item>Optional attributes defined by protocols +</itemize> + +<p>Other attributes depend on nettypes. Some of them are part of the primary key, these are marked (PK). + +<sect1>IPv4 and IPv6 routes +<label id="ip-routes"> + +<p>The traditional routes. Configuration keywords are <cf/ipv4/ and <cf/ipv6/. + +<itemize> + <item>(PK) Route destination (IP prefix together with its length) + <item>Route next hops (see below) +</itemize> + +<sect1>VPN IPv4 and IPv6 routes +<label id="vpn-routes"> + +<p>Routes for IPv4 and IPv6 with VPN Route Distinguisher (<rfc id="4364">). +Configuration keywords are <cf/vpn4/ and <cf/vpn6/. + +<itemize> + <item>(PK) Route destination (IP prefix together with its length) + <item>(PK) Route distinguisher (according to <rfc id="4364">) + <item>Route next hops +</itemize> + +<sect1>Route Origin Authorization for IPv4 and IPv6 +<label id="roa-routes"> + +<p>These entries can be used to validate route origination of BGP routes. +A ROA entry specifies prefixes which could be originated by an AS number. +Their keywords are <cf/roa4/ and <cf/roa6/. + +<itemize> + <item>(PK) IP prefix together with its length + <item>(PK) Matching prefix maximal length + <item>(PK) AS number +</itemize> + +<sect1>Flowspec for IPv4 and IPv6 +<label id="flow-routes"> + +<p>Flowspec rules are a form of firewall and traffic flow control rules +distributed mostly via BGP. These rules may help the operators stop various +network attacks in the beginning before eating up the whole bandwidth. +Configuration keywords are <cf/flow4/ and <cf/flow6/. + +<itemize> + <item>(PK) IP prefix together with its length + <item>(PK) Flow definition data + <item>Flow action (encoded internally as BGP communities according to <rfc id="5575">) +</itemize> + +<sect1>MPLS switching rules +<label id="mpls-routes"> + +<p>This nettype is currently a stub before implementing more support of <rfc id="3031">. +BIRD currently does not support any label distribution protocol nor any label assignment method. +Only the Kernel, Pipe and Static protocols can use MPLS tables. +Configuration keyword is <cf/mpls/. + +<itemize> + <item>(PK) MPLS label + <item>Route next hops +</itemize> + +<sect1>Route next hops +<label id="route-next-hop"> + +<p>This is not a nettype. The route next hop is a complex attribute common for many +nettypes as you can see before. Every next hop has its assigned device +(either assumed from its IP address or set explicitly). It may have also +an IP address and an MPLS stack (one or both independently). +Maximal MPLS stack depth is set (in compile time) to 8 labels. + +<p>Every route (when eligible to have a next hop) can have more than one next hop. +In that case, every next hop has also its weight. + +<sect>Protocols and channels +<label id="protocols-concept"> + +<p>BIRD protocol is an abstract class of producers and consumers of the routes. +Each protocol may run in multiple instances and bind on one side to route +tables via channels, on the other side to specified listen sockets (BGP), +interfaces (Babel, OSPF, RIP), APIs (Kernel, Direct), or nothing (Static, Pipe). + +<p>There are also two protocols that do not have any channels -- BFD and Device. +Both of them are kind of service for other protocols. + +<p>Each protocol is connected to a routing table through a channel. Some protocols +support only one channel (OSPF, RIP), some protocols support more channels (BGP, Direct). +Each channel has two filters which can accept, reject and modify the routes. +An <it/export/ filter is applied to routes passed from the routing table to the protocol, +an <it/import/ filter is applied to routes in the opposite direction. <sect>Graceful restart <label id="graceful-restart"> @@ -302,31 +407,33 @@ extensive way. a comment, whitespace characters are treated as a single space. If there's a variable number of options, they are grouped using the <cf/{ }/ brackets. Each option is terminated by a <cf/;/. Configuration is case sensitive. There are two -ways how to name symbols (like protocol names, filter names, constants etc.). You -can either use a simple string starting with a letter followed by any -combination of letters and numbers (e.g. "R123", "myfilter", "bgp5") or you can -enclose the name into apostrophes (<cf/'/) and than you can use any combination -of numbers, letters. hyphens, dots and colons (e.g. "'1:strange-name'", -"'-NAME-'", "'cool::name'"). +ways how to name symbols (like protocol names, filter names, constants etc.). +You can either use a simple string starting with a letter followed by any +combination of letters and numbers (e.g. <cf/R123/, <cf/myfilter/, <cf/bgp5/) or +you can enclose the name into apostrophes (<cf/'/) and than you can use any +combination of numbers, letters. hyphens, dots and colons (e.g. +<cf/'1:strange-name'/, <cf/'-NAME-'/, <cf/'cool::name'/). <p>Here is an example of a simple config file. It enables synchronization of -routing tables with OS kernel, scans for new network interfaces every 10 seconds -and runs RIP on all network interfaces found. +routing tables with OS kernel, learns network interfaces and runs RIP on all +network interfaces found. <code> protocol kernel { + ipv4 { + export all; # Default is export none + }; persist; # Don't remove routes on BIRD shutdown - scan time 20; # Scan kernel routing table every 20 seconds - export all; # Default is export none } protocol device { - scan time 10; # Scan interfaces every 10 seconds } protocol rip { - export all; - import all; + ipv4 { + import all; + export all; + }; interface "*"; } </code> @@ -336,11 +443,19 @@ protocol rip { <label id="global-opts"> <p><descrip> - <tag><label id="opt-include">include "<m/filename/"</tag> - This statement causes inclusion of a new file. <m/Filename/ could also - be a wildcard, in that case matching files are included in alphabetic - order. The maximal depth is 8. Note that this statement could be used - anywhere in the config file, not just as a top-level option. + <tag><label id="opt-include">include "<m/filename/";</tag> + This statement causes inclusion of a new file. The <m/filename/ could + also be a wildcard, in that case matching files are included in + alphabetic order. The maximal depth is 8. Note that this statement can + be used anywhere in the config file, even inside other options, but + always on the beginning of line. In the following example, the first + semicolon belongs to the <cf/include/, the second to <cf/ipv6 table/. + If the <file/tablename.conf/ contains exactly one token (the name of the + table), this construction is correct: +<code> +ipv6 table +include "tablename.conf";; +</code> <tag><label id="opt-log">log "<m/filename/"|syslog [name <m/name/]|stderr all|{ <m/list of classes/ }</tag> Set logging of messages having the given class (either <cf/all/ or @@ -406,7 +521,7 @@ protocol rip { <cf><m/name2/</cf> You can run more than one instance of most protocols (like RIP or BGP). By default, no instances are configured. - <tag><label id="opt-template">template rip|bgp|<m/.../ [<m/name/ [from <m/name2/]] { <m>protocol options</m> }</tag> + <tag><label id="opt-template">template rip|ospf|bgp|<m/.../ [<m/name/ [from <m/name2/]] { <m>protocol options</m> }</tag> Define a protocol template instance called <m/name/ (or with a name like "bgp1" generated automatically if you don't specify any <m/name/). Protocol templates can be used to group common options when many @@ -423,25 +538,15 @@ protocol rip { <tag><label id="opt-router-id">router id <m/IPv4 address/</tag> Set BIRD's router ID. It's a world-wide unique identification of your - router, usually one of router's IPv4 addresses. Default: in IPv4 - version, the lowest IP address of a non-loopback interface. In IPv6 - version, this option is mandatory. + router, usually one of router's IPv4 addresses. Default: the lowest + IPv4 address of a non-loopback interface. <tag><label id="opt-router-id-from">router id from [-] [ "<m/mask/" ] [ <m/prefix/ ] [, <m/.../]</tag> - Set BIRD's router ID based on an IP address of an interface specified by - an interface pattern. The option is applicable for IPv4 version only. + Set BIRD's router ID based on an IPv4 address of an interface specified by + an interface pattern. See <ref id="proto-iface" name="interface"> section for detailed description of interface patterns with extended clauses. - <tag><label id="opt-listen-bgp">listen bgp [address <m/address/] [port <m/port/] [dual]</tag> - This option allows to specify address and port where BGP protocol should - listen. It is global option as listening socket is common to all BGP - instances. Default is to listen on all addresses (0.0.0.0) and port 179. - In IPv6 mode, option <cf/dual/ can be used to specify that BGP socket - should accept both IPv4 and IPv6 connections (but even in that case, - BIRD would accept IPv6 routes only). Such behavior was default in older - versions of BIRD. - <tag><label id="opt-graceful-restart">graceful restart wait <m/number/</tag> During graceful restart recovery, BIRD waits for convergence of routing protocols. This option allows to specify a timeout for the recovery to @@ -456,46 +561,35 @@ protocol rip { used for other commands and <cf/log/ is used in a log file. "<m/format1/" is a format string using <it/strftime(3)/ notation (see - <it/man strftime/ for details). <m/limit> and "<m/format2/" allow to - specify the second format string for times in past deeper than <m/limit/ - seconds. There are few shorthands: <cf/iso long/ is a ISO 8601 date/time - format (YYYY-MM-DD hh:mm:ss) that can be also specified using <cf/"%F %T"/. + <it/man strftime/ for details). It is extended to support sub-second + time part with variable precision (up to microseconds) using "%f" + conversion code (e.g., "%T.%3f" is hh:mm:ss.sss time). <m/limit/ and + "<m/format2/" allow to specify the second format string for times in + past deeper than <m/limit/ seconds. + + There are several shorthands: <cf/iso long/ is a ISO 8601 date/time + format (YYYY-MM-DD hh:mm:ss) that can be also specified using <cf/"%F + %T"/. Similarly, <cf/iso long ms/ and <cf/iso long us/ are ISO 8601 + date/time formats with millisecond or microsecond precision. <cf/iso short/ is a variant of ISO 8601 that uses just the time format (hh:mm:ss) for near times (up to 20 hours in the past) and the date - format (YYYY-MM-DD) for far times. This is a shorthand for - <cf/"%T" 72000 "%F"/. + format (YYYY-MM-DD) for far times. This is a shorthand for <cf/"%T" + 72000 "%F"/. And there are also <cf/iso short ms/ and <cf/iso short us/ + high-precision variants of that. - By default, BIRD uses the <cf/iso short/ format for <cf/route/ and - <cf/protocol/ times, and the <cf/iso long/ format for <cf/base/ and + By default, BIRD uses the <cf/iso short ms/ format for <cf/route/ and + <cf/protocol/ times, and the <cf/iso long ms/ format for <cf/base/ and <cf/log/ times. - In pre-1.4.0 versions, BIRD used an short, ad-hoc format for <cf/route/ - and <cf/protocol/ times, and a <cf/iso long/ similar format (DD-MM-YYYY - hh:mm:ss) for <cf/base/ and <cf/log/. These timeformats could be set by - <cf/old short/ and <cf/old long/ compatibility shorthands. - - <tag><label id="opt-table">table <m/name/ [sorted]</tag> - Create a new routing table. The default routing table is created - implicitly, other routing tables have to be added by this command. - Option <cf/sorted/ can be used to enable sorting of routes, see - <ref id="dsc-table-sorted" name="sorted table"> description for details. - - <tag><label id="opt-roa-table">roa table <m/name/ [ { <m/roa table options .../ } ]</tag> - Create a new ROA (Route Origin Authorization) table. ROA tables can be - used to validate route origination of BGP routes. A ROA table contains - ROA entries, each consist of a network prefix, a max prefix length and - an AS number. A ROA entry specifies prefixes which could be originated - by that AS number. ROA tables could be filled with data from RPKI (<rfc - id="6480">) or from public databases like Whois. ROA tables are - examined by <cf/roa_check()/ operator in filters. - - Currently, there is just one option, <cf>roa <m/prefix/ max <m/num/ as - <m/num/</cf>, which can be used to populate the ROA table with static - ROA entries. The option may be used multiple times. Other entries can be - added dynamically by <cf/add roa/ command. + <tag><label id="opt-table"><m/nettype/ table <m/name/ [sorted]</tag> + Create a new routing table. The default routing tables <cf/master4/ and + <cf/master6/ are created implicitly, other routing tables have to be + added by this command. Option <cf/sorted/ can be used to enable sorting + of routes, see <ref id="dsc-table-sorted" name="sorted table"> + description for details. <tag><label id="opt-eval">eval <m/expr/</tag> - Evaluates given filter expression. It is used by us for testing of filters. + Evaluates given filter expression. It is used by the developers for testing of filters. </descrip> @@ -513,10 +607,6 @@ disable it. An empty <m/switch/ is equivalent to <cf/on/ ("silence means agreement"). <descrip> - <tag><label id="proto-preference">preference <m/expr/</tag> - Sets the preference of routes generated by this protocol. Default: - protocol dependent. - <tag><label id="proto-disabled">disabled <m/switch/</tag> Disables the protocol. You can change the disable/enable status from the command line interface without needing to touch the configuration. @@ -547,58 +637,10 @@ agreement"). This option can be used to override global router id for a given protocol. Default: uses global router id. - <tag><label id="proto-import">import all | none | filter <m/name/ | filter { <m/filter commands/ } | where <m/filter expression/</tag> - Specify a filter to be used for filtering routes coming from the - protocol to the routing table. <cf/all/ is shorthand for <cf/where true/ - and <cf/none/ is shorthand for <cf/where false/. Default: <cf/all/. - - <tag><label id="proto-export">export <m/filter/</tag> - This is similar to the <cf>import</cf> keyword, except that it works in - the direction from the routing table to the protocol. Default: <cf/none/. - - <tag><label id="proto-import-keep-filtered">import keep filtered <m/switch/</tag> - Usually, if an import filter rejects a route, the route is forgotten. - When this option is active, these routes are kept in the routing table, - but they are hidden and not propagated to other protocols. But it is - possible to show them using <cf/show route filtered/. Note that this - option does not work for the pipe protocol. Default: off. - - <tag><label id="proto-import-limit">import limit [<m/number/ | off ] [action warn | block | restart | disable]</tag> - Specify an import route limit (a maximum number of routes imported from - the protocol) and optionally the action to be taken when the limit is - hit. Warn action just prints warning log message. Block action discards - new routes coming from the protocol. Restart and disable actions shut - the protocol down like appropriate commands. Disable is the default - action if an action is not explicitly specified. Note that limits are - reset during protocol reconfigure, reload or restart. Default: <cf/off/. - - <tag><label id="proto-receive-limit">receive limit [<m/number/ | off ] [action warn | block | restart | disable]</tag> - Specify an receive route limit (a maximum number of routes received from - the protocol and remembered). It works almost identically to <cf>import - limit</cf> option, the only difference is that if <cf/import keep - filtered/ option is active, filtered routes are counted towards the - limit and blocked routes are forgotten, as the main purpose of the - receive limit is to protect routing tables from overflow. Import limit, - on the contrary, counts accepted routes only and routes blocked by the - limit are handled like filtered routes. Default: <cf/off/. - - <tag><label id="proto-export-limit">export limit [ <m/number/ | off ] [action warn | block | restart | disable]</tag> - Specify an export route limit, works similarly to the <cf>import - limit</cf> option, but for the routes exported to the protocol. This - option is experimental, there are some problems in details of its - behavior -- the number of exported routes can temporarily exceed the - limit without triggering it during protocol reload, exported routes - counter ignores route blocking and block action also blocks route - updates of already accepted routes -- and these details will probably - change in the future. Default: <cf/off/. - <tag><label id="proto-description">description "<m/text/"</tag> This is an optional description of the protocol. It is displayed as a part of the output of 'show route all' command. - <tag><label id="proto-table">table <m/name/</tag> - Connect this protocol to a non-default routing table. - <tag><label id="proto-vrf">vrf "<m/text/"</tag> Associate the protocol with specific VRF. The protocol will be restricted to interfaces assigned to the VRF and will use sockets bound @@ -607,6 +649,12 @@ agreement"). <cf/table/ option. Note that the VRF support in BIRD and Linux kernel (4.11) is still in development and is currently problematic outside of multihop BGP. + + <tag><label id="proto-channel"><m/channel name/ [{<m/channel config/}]</tag> + Every channel must be explicitly stated. See the protocol-specific + configuration for the list of supported channel names. See the + <ref id="channel-opts" name="channel configuration section"> for channel + definition. </descrip> <p>There are several options that give sense only with certain protocols: @@ -635,8 +683,8 @@ agreement"). options, in that case for given interface the first matching interface option is used. - This option is allowed in Babel, BFD, Direct, OSPF, RAdv and RIP - protocols, but in OSPF protocol it is used in the <cf/area/ subsection. + This option is allowed in Babel, BFD, Device, Direct, OSPF, RAdv and RIP + protocols. In OSPF protocol it is used in the <cf/area/ subsection. Default: none. @@ -724,6 +772,123 @@ agreement"). </descrip> + +<sect>Channel options +<label id="channel-opts"> + +<p>Every channel belongs to a protocol and is configured inside its block. The +minimal channel config is empty, then it uses default values. The name of the +channel implies its nettype. Channel definitions can be inherited from protocol +templates. Multiple definitions of the same channel are forbidden, but channels +inherited from templates can be updated by new definitions. + +<descrip> + <tag><label id="proto-table">table <m/name/</tag> + Specify a table to which the channel is connected. Default: the first + table of given nettype. + + <tag><label id="proto-preference">preference <m/expr/</tag> + Sets the preference of routes generated by the protocol and imported + through this channel. Default: protocol dependent. + + <tag><label id="proto-import">import all | none | filter <m/name/ | filter { <m/filter commands/ } | where <m/boolean filter expression/</tag> + Specify a filter to be used for filtering routes coming from the + protocol to the routing table. <cf/all/ is for keeping all routes, + <cf/none/ is for dropping all routes. Default: <cf/all/ (except for + EBGP). + + <tag><label id="proto-export">export <m/filter/</tag> + This is similar to the <cf>import</cf> keyword, except that it works in + the direction from the routing table to the protocol. Default: <cf/none/ + (except for EBGP). + + <tag><label id="proto-import-keep-filtered">import keep filtered <m/switch/</tag> + Usually, if an import filter rejects a route, the route is forgotten. + When this option is active, these routes are kept in the routing table, + but they are hidden and not propagated to other protocols. But it is + possible to show them using <cf/show route filtered/. Note that this + option does not work for the pipe protocol. Default: off. + + <tag><label id="proto-import-limit">import limit [<m/number/ | off ] [action warn | block | restart | disable]</tag> + Specify an import route limit (a maximum number of routes imported from + the protocol) and optionally the action to be taken when the limit is + hit. Warn action just prints warning log message. Block action discards + new routes coming from the protocol. Restart and disable actions shut + the protocol down like appropriate commands. Disable is the default + action if an action is not explicitly specified. Note that limits are + reset during protocol reconfigure, reload or restart. Default: <cf/off/. + + <tag><label id="proto-receive-limit">receive limit [<m/number/ | off ] [action warn | block | restart | disable]</tag> + Specify an receive route limit (a maximum number of routes received from + the protocol and remembered). It works almost identically to <cf>import + limit</cf> option, the only difference is that if <cf/import keep + filtered/ option is active, filtered routes are counted towards the + limit and blocked routes are forgotten, as the main purpose of the + receive limit is to protect routing tables from overflow. Import limit, + on the contrary, counts accepted routes only and routes blocked by the + limit are handled like filtered routes. Default: <cf/off/. + + <tag><label id="proto-export-limit">export limit [ <m/number/ | off ] [action warn | block | restart | disable]</tag> + Specify an export route limit, works similarly to the <cf>import + limit</cf> option, but for the routes exported to the protocol. This + option is experimental, there are some problems in details of its + behavior -- the number of exported routes can temporarily exceed the + limit without triggering it during protocol reload, exported routes + counter ignores route blocking and block action also blocks route + updates of already accepted routes -- and these details will probably + change in the future. Default: <cf/off/. +</descrip> + +<p>This is a trivial example of RIP configured for IPv6 on all interfaces: +<code> +protocol rip ng { + ipv6; + interface "*"; +} +</code> + +<p>This is a non-trivial example. +<code> +protocol rip ng { + ipv6 { + table mytable6; + import filter { ... }; + export filter { ... }; + import limit 50; + }; + interface "*"; +} +</code> + +<p>And this is even more complicated example using templates. +<code> +template bgp { + local 198.51.100.14 as 65000; + + ipv4 { + table mytable4; + import filter { ... }; + export none; + }; + ipv6 { + table mytable6; + import filter { ... }; + export none; + }; +} + +protocol bgp from { + neighbor 198.51.100.130 as 64496; + + # IPv4 channel is inherited as-is, while IPv6 + # channel is adjusted by export filter option + ipv6 { + export filter { ... }; + }; +} +</code> + + <chapt>Remote control <label id="remote-control"> @@ -764,6 +929,7 @@ This argument can be omitted if there exists only a single instance. and protocol status, possibly giving verbose information, if <cf/all/ is specified. + <!-- TODO: Move these protocol-specific remote control commands to the protocol sections --> <tag><label id="cli-show-ospf-iface">show ospf interface [<m/name/] ["<m/interface/"]</tag> Show detailed information about OSPF interfaces. @@ -802,9 +968,8 @@ This argument can be omitted if there exists only a single instance. Show the list of symbols defined in the configuration (names of protocols, routing tables etc.). - <tag><label id="cli-show-route">show route [[for] <m/prefix/|<m/IP/] [table <m/t/] [filter <m/f/|where <m/c/] [(export|preexport|noexport) <m/p/] [protocol <m/p/] [<m/options/]</tag> - Show contents of a routing table (by default of the main one or the - table attached to a respective protocol), that is routes, their metrics + <tag><label id="cli-show-route">show route [[for] <m/prefix/|<m/IP/] [table (<m/t/ | all)] [filter <m/f/|where <m/c/] [(export|preexport|noexport) <m/p/] [protocol <m/p/] [(stats|count)] [<m/options/]</tag> + Show contents of specified routing tables, that is routes, their metrics and (in case the <cf/all/ switch is given) all their attributes. <p>You can specify a <m/prefix/ if you want to print routes for a @@ -814,20 +979,31 @@ This argument can be omitted if there exists only a single instance. the selected one at the top, unless <cf/primary/ is given in which case only the selected route is shown. + <p>The <cf/show route/ command can process one or multiple routing + tables. The set of selected tables is determined on three levels: First, + tables can be explicitly selected by <cf/table/ switch, which could be + used multiple times, all tables are specified by <cf/table all/. Second, + tables can be implicitly selected by channels or protocols that are + arguments of several other switches (e.g., <cf/export/, <cf/protocol/). + Last, the set of default tables is used: <cf/master4/, <cf/master6/ and + each first table of any other network type. + <p>You can also ask for printing only routes processed and accepted by a given filter (<cf>filter <m/name/</cf> or <cf>filter { <m/filter/ } </cf> or matching a given condition (<cf>where <m/condition/</cf>). The <cf/export/, <cf/preexport/ and <cf/noexport/ switches ask for - printing of routes that are exported to the specified protocol. - With <cf/preexport/, the export filter of the protocol is skipped. - With <cf/noexport/, routes rejected by the export filter are printed - instead. Note that routes not exported to the protocol for other reasons + printing of routes that are exported to the specified protocol or + channel. With <cf/preexport/, the export filter of the channel is + skipped. With <cf/noexport/, routes rejected by the export filter are + printed instead. Note that routes not exported for other reasons (e.g. secondary routes or routes imported from that protocol) are not - printed even with <cf/noexport/. + printed even with <cf/noexport/. These switches also imply that + associated routing tables are selected instead of default ones. <p>You can also select just routes added by a specific protocol. - <cf>protocol <m/p/</cf>. + <cf>protocol <m/p/</cf>. This switch also implies that associated + routing tables are selected instead of default ones. <p>If BIRD is configured to keep filtered routes (see <cf/import keep filtered/ option), you can show them instead of routes by using @@ -837,27 +1013,6 @@ This argument can be omitted if there exists only a single instance. number of networks, number of routes before and after filtering). If you use <cf/count/ instead, only the statistics will be printed. - <tag><label id="cli-show-roa">show roa [<m/prefix/ | in <m/prefix/ | for <m/prefix/] [as <m/num/] [table <m/t/]</tag> - Show contents of a ROA table (by default of the first one). You can - specify a <m/prefix/ to print ROA entries for a specific network. If you - use <cf>for <m/prefix/</cf>, you'll get all entries relevant for route - validation of the network prefix; i.e., ROA entries whose prefixes cover - the network prefix. Or you can use <cf>in <m/prefix/</cf> to get ROA - entries covered by the network prefix. You could also use <cf/as/ option - to show just entries for given AS. - - <tag><label id="cli-add-roa">add roa <m/prefix/ max <m/num/ as <m/num/ [table <m/t/]</tag> - Add a new ROA entry to a ROA table. Such entry is called <it/dynamic/ - compared to <it/static/ entries specified in the config file. These - dynamic entries survive reconfiguration. - - <tag><label id="cli-delete-roa">delete roa <m/prefix/ max <m/num/ as <m/num/ [table <m/t/]</tag> - Delete the specified ROA entry from a ROA table. Only dynamic ROA - entries (i.e., the ones added by <cf/add roa/ command) can be deleted. - - <tag><label id="cli-flush-roa">flush roa [table <m/t/]</tag> - Remove all dynamic ROA entries from a ROA table. - <tag><label id="cli-configure">configure [soft] ["<m/config file/"] [timeout [<m/num/]]</tag> Reload configuration from a given file. BIRD will smoothly switch itself to the new configuration, protocols are reconfigured if possible, @@ -1062,22 +1217,50 @@ foot). a shell pattern (represented also as a string). <tag><label id="type-ip">ip</tag> - This type can hold a single IP address. Depending on the compile-time - configuration of BIRD you are using, it is either an IPv4 or IPv6 - address. IP addresses are written in the standard notation - (<cf/10.20.30.40/ or <cf/fec0:3:4::1/). You can apply special operator - <cf>.mask(<M>num</M>)</cf> on values of type ip. It masks out all but - first <cf><M>num</M></cf> bits from the IP address. So + This type can hold a single IP address. The IPv4 addresses are stored as + IPv4-Mapped IPv6 addresses so one data type for both of them is used. + Whether the address is IPv4 or not may be checked by <cf>.is_ip4</cf> + which returns <cf/bool/. IP addresses are written in the standard + notation (<cf/10.20.30.40/ or <cf/fec0:3:4::1/). You can apply special + operator <cf>.mask(<M>num</M>)</cf> on values of type ip. It masks out + all but first <cf><M>num</M></cf> bits from the IP address. So <cf/1.2.3.4.mask(8) = 1.0.0.0/ is true. <tag><label id="type-prefix">prefix</tag> - This type can hold a network prefix consisting of IP address and prefix - length. Prefix literals are written as <cf><m/ipaddress//<m/pxlen/</cf>, - or <cf><m>ipaddress</m>/<m>netmask</m></cf>. There are two special - operators on prefixes: <cf/.ip/ which extracts the IP address from the + This type can hold a network prefix consisting of IP address, prefix + length and several other values. This is the key in route tables. + + Prefixes may be of several types, which can be determined by the special + operator <cf/.type/. The type may be: + + <cf/NET_IP4/ and <cf/NET_IP6/ prefixes hold an IP prefix. The literals + are written as <cf><m/ipaddress//<m/pxlen/</cf>. There are two special + operators on these: <cf/.ip/ which extracts the IP address from the pair, and <cf/.len/, which separates prefix length from the pair. So <cf>1.2.0.0/16.len = 16</cf> is true. + <cf/NET_VPN4/ and <cf/NET_VPN6/ prefixes hold an IP prefix with VPN + Route Distinguisher (<rfc id="4364">). They support the same special + operators as IP prefixes, and also <cf/.rd/ which extracts the Route + Distinguisher. Their literals are written + as <cf><m/vpnrd/ <m/ipprefix/</cf> + + <cf/NET_ROA4/ and <cf/NET_ROA6/ prefixes hold an IP prefix range + together with an ASN. They support the same special operators as IP + prefixes, and also <cf/.maxlen/ which extracts maximal prefix length, + and <cf/.asn/ which extracts the ASN. + + <cf/NET_FLOW4/ and <cf/NET_FLOW6/ hold an IP prefix together with a + flowspec rule. Filters currently don't support flowspec parsing. + + <cf/NET_MPLS/ holds a single MPLS label and its handling is currently + not implemented. + + <tag><label id="type-vpnrd">vpnrd</tag> + This is a route distinguisher according to <rfc id="4364">. There are + three kinds of RD's: <cf><m/asn/:<m/32bit int/</cf>, <cf><m/asn4/:<m/16bit int/</cf> + and <cf><m/IPv4 address/:<m/32bit int/</cf> + <tag><label id="type-ec">ec</tag> This is a specialized type used to represent BGP extended community values. It is essentially a 64bit value, literals of this type are @@ -1174,6 +1357,10 @@ foot). <cf>192.168.0.0/16{16,24}</cf> and <cf>192.168.0.0/16 ge 24</cf> as <cf>192.168.0.0/16{24,32}</cf>. + It is possible to mix IPv4 and IPv6 prefixes/addresses in a prefix/ip set + but its behavior may change between versions without any warning; don't do + it unless you are more than sure what you are doing. (Really, don't do it.) + <tag><label id="type-enum">enum</tag> Enumeration types are fixed sets of possibilities. You can't define your own variables of such type, but some route attributes are of enumeration @@ -1196,7 +1383,7 @@ foot). <cf><m/P/.len</cf> returns the length of path <m/P/. - <cf><m/P/.empty</cf> resets path <m/P/ to empty path. + <cf><m/P/.empty</cf> makes the path <m/P/ empty. <cf>prepend(<m/P/,<m/A/)</cf> prepends ASN <m/A/ to path <m/P/ and returns the result. @@ -1225,8 +1412,6 @@ foot). expressions can also contain integer expressions enclosed in parenthesis and integer variables, for example <tt>[= * 4 (1+2) a =]</tt>. You can also use ranges, for example <tt>[= * 3..5 2 100..200 * =]</tt>. - There is also old (deprecated) syntax that uses / .. / instead of [= .. =] - and ? instead of *. <tag><label id="type-clist">clist</tag> Clist is similar to a set, except that unlike other sets, it can be @@ -1236,7 +1421,7 @@ foot). <cf><m/C/.len</cf> returns the length of clist <m/C/. - <cf><m/C/.empty</cf> resets clist <m/C/ to empty clist. + <cf><m/C/.empty</cf> makes the list <m/C/ empty. <cf>add(<m/C/,<m/P/)</cf> adds pair (or quad) <m/P/ to clist <m/C/ and returns the result. If item <m/P/ is already in clist <m/C/, it does @@ -1266,7 +1451,7 @@ foot). <cf/!˜/ membership operators) can be used to modify or test eclists, with ECs instead of pairs as arguments. - <tag/lclist/ + <tag><label id="type-lclist">lclist/</tag> Lclist is a data type used for BGP large community lists. Like eclists, lclists are very similar to clists, but they are sets of LCs instead of pairs. The same operations (like <cf/add/, <cf/delete/ or <cf/˜/ @@ -1274,13 +1459,12 @@ foot). lclists, with LCs instead of pairs as arguments. </descrip> - <sect>Operators <label id="operators"> <p>The filter language supports common integer operators <cf>(+,-,*,/)</cf>, parentheses <cf/(a*(b+c))/, comparison <cf/(a=b, a!=b, a<b, a>=b)/. -Logical operations include unary not (<cf/!/), and (<cf/&&/) and or +Logical operations include unary not (<cf/!/), and (<cf/&&/), and or (<cf/||/). Special operators include (<cf/˜/, <cf/!˜/) for "is (not) element of a set" operation - it can be used on element and set of elements of the same type (returning true if element is @@ -1311,11 +1495,11 @@ prefix and an ASN as arguments. <p>Filters support two control structures: conditions and case switches. -<p>Syntax of a condition is: <cf>if <M>boolean expression</M> then <m/command1/; -else <m/command2/;</cf> and you can use <cf>{ <m/command_1/; <m/command_2/; +<p>Syntax of a condition is: <cf>if <M>boolean expression</M> then <m/commandT/; +else <m/commandF/;</cf> and you can use <cf>{ <m/command1/; <m/command2/; <M>...</M> }</cf> instead of either command. The <cf>else</cf> clause may be -omitted. If the <cf><m>boolean expression</m></cf> is true, <m/command1/ is -executed, otherwise <m/command2/ is executed. +omitted. If the <cf><m>boolean expression</m></cf> is true, <m/commandT/ is +executed, otherwise <m/commandF/ is executed. <p>The <cf>case</cf> is similar to case from Pascal. Syntax is <cf>case <m/expr/ { else: | <m/num_or_prefix [ .. num_or_prefix]/: <m/statement/ ; [ @@ -1349,13 +1533,14 @@ if 1234 = i then printn "."; else { like it accesses variables. Attempts to access undefined attribute result in a runtime error; you can check if an attribute is defined by using the <cf>defined( <m>attribute</m> )</cf> operator. One notable exception to this -rule are attributes of clist type, where undefined value is regarded as empty -clist for most purposes. +rule are attributes of bgppath and *clist types, where undefined value is +regarded as empty bgppath/*clist for most purposes. <descrip> <tag><label id="rta-net"><m/prefix/ net</tag> - Network the route is talking about. Read-only. (See the chapter about - routing tables.) + The network prefix or anything else the route is talking about. The + primary key of the routing table. Read-only. (See the <ref id="routes" + name="chapter about routes">.) <tag><label id="rta-scope"><m/enum/ scope</tag> The scope of the route. Possible values: <cf/SCOPE_HOST/ for routes @@ -1386,11 +1571,6 @@ clist for most purposes. <cf/RTS_OSPF_IA/, <cf/RTS_OSPF_EXT1/, <cf/RTS_OSPF_EXT2/, <cf/RTS_BGP/, <cf/RTS_PIPE/, <cf/RTS_BABEL/. - <tag><label id="rta-cast"><m/enum/ cast</tag> - Route type (Currently <cf/RTC_UNICAST/ for normal routes, - <cf/RTC_BROADCAST/, <cf/RTC_MULTICAST/, <cf/RTC_ANYCAST/ will be used in - the future for broadcast, multicast and anycast routes). Read-only. - <tag><label id="rta-dest"><m/enum/ dest</tag> Type of destination the packets should be sent to (<cf/RTD_ROUTER/ for forwarding to a neighboring router, @@ -1422,7 +1602,7 @@ clist for most purposes. 1 metric. </descrip> -<p>There also exist some protocol-specific attributes which are described in the +<p>There also exist protocol-specific attributes which are described in the corresponding protocol sections. @@ -1467,13 +1647,12 @@ networks. Babel is conceptually very simple in its operation and "just works" in its default configuration, though some configuration is possible and in some cases desirable. -<p>While the Babel protocol is dual stack (i.e., can carry both IPv4 and IPv6 -routes over the same IPv6 transport), BIRD presently implements only the IPv6 -subset of the protocol. No Babel extensions are implemented, but the BIRD -implementation can coexist with implementations using the extensions (and will -just ignore extension messages). +<p>The Babel protocol is dual stack; i.e., it can carry both IPv4 and IPv6 +routes over the same IPv6 transport. For sending and receiving Babel packets, +only a link-local IPv6 address is needed. -<p>The Babel protocol implementation in BIRD is currently in alpha stage. +<p>BIRD does not implement any Babel extensions, but will coexist with +implementations using extensions (and will just ignore extension messages). <sect1>Configuration <label id="babel-config"> @@ -1483,45 +1662,64 @@ other protocols, but supports the following per-interface configuration options: <code> protocol babel [<name>] { + ipv4 { <channel config> }; + ipv6 { <channel config> }; interface <interface pattern> { type <wired|wireless>; rxcost <number>; - hello interval <number>; - update interval <number>; + limit <number>; + hello interval <time>; + update interval <time>; port <number>; tx class|dscp <number>; tx priority <number>; rx buffer <number>; tx length <number>; check link <switch>; + next hop ipv4 <address>; + next hop ipv6 <address>; }; } </code> <descrip> + <tag><label id="babel-channel">ipv4|ipv6 <m/channel config/</tag> + The supported channels are IPv4 and IPv6. + <tag><label id="babel-type">type wired|wireless </tag> - This option specifies the interface type: Wired or wireless. Wired - interfaces are considered more reliable, and so the default hello - interval is higher, and a neighbour is considered unreachable after only - a small number of "hello" packets are lost. On wireless interfaces, - hello packets are sent more often, and the ETX link quality estimation - technique is used to compute the metrics of routes discovered over this - interface. This technique will gradually degrade the metric of routes - when packets are lost rather than the more binary up/down mechanism of - wired type links. Default: <cf/wired/. + This option specifies the interface type: Wired or wireless. On wired + interfaces a neighbor is considered unreachable after a small number of + Hello packets are lost, as described by <cf/limit/ option. On wireless + interfaces the ETX link quality estimation technique is used to compute + the metrics of routes discovered over this interface. This technique will + gradually degrade the metric of routes when packets are lost rather than + the more binary up/down mechanism of wired type links. Default: + <cf/wired/. <tag><label id="babel-rxcost">rxcost <m/num/</tag> - This specifies the RX cost of the interface. The route metrics will be - computed from this value with a mechanism determined by the interface - <cf/type/. Default: 96 for wired interfaces, 256 for wireless. - - <tag><label id="babel-hello">hello interval <m/num/</tag> - Interval at which periodic "hello" messages are sent on this interface, - in seconds. Default: 4 seconds. - - <tag><label id="babel-update">update interval <m/num/</tag> - Interval at which periodic (full) updates are sent. Default: 4 times the - hello interval. + This option specifies the nominal RX cost of the interface. The effective + neighbor costs for route metrics will be computed from this value with a + mechanism determined by the interface <cf/type/. Note that in contrast to + other routing protocols like RIP or OSPF, the <cf/rxcost/ specifies the + cost of RX instead of TX, so it affects primarily neighbors' route + selection and not local route selection. Default: 96 for wired interfaces, + 256 for wireless. + + <tag><label id="babel-limit">limit <m/num/</tag> + BIRD keeps track of received Hello messages from each neighbor to + establish neighbor reachability. For wired type interfaces, this option + specifies how many of last 16 hellos have to be correctly received in + order to neighbor is assumed to be up. The option is ignored on wireless + type interfaces, where gradual cost degradation is used instead of sharp + limit. Default: 12. + + <tag><label id="babel-hello">hello interval <m/time/ s|ms</tag> + Interval at which periodic Hello messages are sent on this interface, + with time units. Default: 4 seconds. + + <tag><label id="babel-update">update interval <m/time/ s|ms</tag> + Interval at which periodic (full) updates are sent, with time + units. Default: 4 times the hello interval. <tag><label id="babel-port">port <m/number/</tag> This option selects an UDP port to operate on. The default is to operate @@ -1551,6 +1749,16 @@ protocol babel [<name>] { routes received from them are withdrawn. It is possible that some hardware drivers or platforms do not implement this feature. Default: yes. + + <tag><label id="babel-next-hop-ipv4">next hop ipv4 <m/address/</tag> + Set the next hop address advertised for IPv4 routes advertised on this + interface. Default: the preferred IPv4 address of the interface. + + <tag><label id="babel-next-hop-ipv6">next hop ipv6 <m/address/</tag> + Set the next hop address advertised for IPv6 routes advertised on this + interface. If not set, the same link-local address that is used as the + source for Babel packets will be used. In normal operation, it should not + be necessary to set this option. </descrip> <sect1>Attributes @@ -1579,10 +1787,22 @@ protocol babel { # configured on local interfaces, plus re-distribute all routes received # from other babel peers. - export where (source = RTS_DEVICE) || (source = RTS_BABEL); + ipv4 { + export where (source = RTS_DEVICE) || (source = RTS_BABEL); + }; + ipv6 { + export where (source = RTS_DEVICE) || (source = RTS_BABEL); + }; } </code> +<sect1>Known issues +<label id="babel-issues"> + +<p>When retracting a route, Babel generates an unreachable route for a little +while (according to RFC). The interaction of this behavior with other protocols +is not well tested and strange things may happen. + <sect>BFD <label id="bfd"> @@ -1611,10 +1831,7 @@ the BFD session went down). advanced features like the echo mode or authentication are not implemented), IP transport for BFD as defined in <rfc id="5881"> and <rfc id="5883"> and interaction with client protocols as defined in <rfc id="5882">. - -<p>Note that BFD implementation in BIRD is currently a new feature in -development, expect some rough edges and possible UI and configuration changes -in the future. Also note that we currently support at most one protocol instance. +We currently support at most one protocol instance. <p>BFD packets are sent with a dynamic source port number. Linux systems use by default a bit different dynamic port range than the IANA approved one @@ -1825,17 +2042,42 @@ table it wishes to export along with complete path information (a list of AS'es the packet will travel through if it uses the particular route) in order to avoid routing loops. -<p>BIRD supports all requirements of the BGP4 standard as defined in -<rfc id="4271"> It also supports the community attributes (<rfc id="1997">), -capability negotiation (<rfc id="5492">), MD5 password authentication (<rfc -id="2385">), extended communities (<rfc id="4360">), route reflectors (<rfc -id="4456">), graceful restart (<rfc id="4724">), multiprotocol extensions -(<rfc id="4760">), 4B AS numbers (<rfc id="4893">), and 4B AS numbers in -extended communities (<rfc id="5668">). - +<sect1>Supported standards +<label id="bgp-standards"> -For IPv6, it uses the standard multiprotocol extensions defined in -<rfc id="4760"> and applied to IPv6 according to <rfc id="2545">. +<p> +<itemize> +<item> <rfc id="4271"> - Border Gateway Protocol 4 (BGP) +<item> <rfc id="1997"> - BGP Communities Attribute +<item> <rfc id="2385"> - Protection of BGP Sessions via TCP MD5 Signature +<item> <rfc id="2545"> - Use of BGP Multiprotocol Extensions for IPv6 +<item> <rfc id="2918"> - Route Refresh Capability +<item> <rfc id="3107"> - Carrying Label Information in BGP +<item> <rfc id="4360"> - BGP Extended Communities Attribute +<item> <rfc id="4364"> - BGP/MPLS IPv4 Virtual Private Networks +<item> <rfc id="4456"> - BGP Route Reflection +<item> <rfc id="4486"> - Subcodes for BGP Cease Notification Message +<item> <rfc id="4659"> - BGP/MPLS IPv6 Virtual Private Networks +<item> <rfc id="4724"> - Graceful Restart Mechanism for BGP +<item> <rfc id="4760"> - Multiprotocol extensions for BGP +<item> <rfc id="4798"> - Connecting IPv6 Islands over IPv4 MPLS +<item> <rfc id="5065"> - AS confederations for BGP +<item> <rfc id="5082"> - Generalized TTL Security Mechanism +<item> <rfc id="5492"> - Capabilities Advertisement with BGP +<item> <rfc id="5549"> - Advertising IPv4 NLRI with an IPv6 Next Hop +<item> <rfc id="5575"> - Dissemination of Flow Specification Rules +<item> <rfc id="5668"> - 4-Octet AS Specific BGP Extended Community +<item> <rfc id="6286"> - AS-Wide Unique BGP Identifier +<item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error +<item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers +<item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP +<item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages +<item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP +<item> <rfc id="7947"> - Internet Exchange BGP Route Server +<item> <rfc id="8092"> - BGP Large Communities Attribute +<item> <rfc id="8203"> - BGP Administrative Shutdown Communication +<item> <rfc id="8212"> - Default EBGP Route Propagation Behavior without Policies +</itemize> <sect1>Route selection rules <label id="bgp-route-select-rules"> @@ -1868,8 +2110,8 @@ determine immediate next hops for routes and to know their internal distances to boundary routers for the purpose of BGP route selection. In BIRD, there is usually one routing table used for both IGP routes and BGP routes. -<sect1>Configuration -<label id="bgp-config"> +<sect1>Protocol configuration +<label id="bgp-proto-config"> <p>Each instance of the BGP corresponds to one neighboring router. This allows to set routing policy and all the other parameters differently for each neighbor @@ -1924,58 +2166,22 @@ using the following configuration parameters: source address for the BGP session. Default: the address of the local end of the interface our neighbor is connected to. - <tag><label id="bgp-next-hop-self">next hop self</tag> - Avoid calculation of the Next Hop attribute and always advertise our own - source address as a next hop. This needs to be used only occasionally to - circumvent misconfigurations of other routers. Default: disabled. - - <tag><label id="bgp-next-hop-keep">next hop keep</tag> - Forward the received Next Hop attribute even in situations where the - local address should be used instead, like when the route is sent to an - interface with a different subnet. Default: disabled. - - <tag><label id="bgp-missing-lladdr">missing lladdr self|drop|ignore</tag> - Next Hop attribute in BGP-IPv6 sometimes contains just the global IPv6 - address, but sometimes it has to contain both global and link-local IPv6 - addresses. This option specifies what to do if BIRD have to send both - addresses but does not know link-local address. This situation might - happen when routes from other protocols are exported to BGP, or when - improper updates are received from BGP peers. <cf/self/ means that BIRD - advertises its own local address instead. <cf/drop/ means that BIRD - skips that prefixes and logs error. <cf/ignore/ means that BIRD ignores - the problem and sends just the global address (and therefore forms - improper BGP update). Default: <cf/self/, unless BIRD is configured as a - route server (option <cf/rs client/), in that case default is <cf/ignore/, - because route servers usually do not forward packets themselves. - - <tag><label id="bgp-gateway">gateway direct|recursive</tag> - For received routes, their <cf/gw/ (immediate next hop) attribute is - computed from received <cf/bgp_next_hop/ attribute. This option - specifies how it is computed. Direct mode means that the IP address from - <cf/bgp_next_hop/ is used if it is directly reachable, otherwise the - neighbor IP address is used. Recursive mode means that the gateway is - computed by an IGP routing table lookup for the IP address from - <cf/bgp_next_hop/. Note that there is just one level of indirection in - recursive mode - the route obtained by the lookup must not be recursive - itself, to prevent mutually recursive routes. - - Recursive mode is the behavior specified by the BGP - standard. Direct mode is simpler, does not require any routes in a - routing table, and was used in older versions of BIRD, but does not - handle well nontrivial iBGP setups and multihop. Recursive mode is - incompatible with <ref id="dsc-table-sorted" name="sorted tables">. Default: - <cf/direct/ for direct sessions, <cf/recursive/ for multihop sessions. - - <tag><label id="bgp-igp-table">igp table <m/name/</tag> - Specifies a table that is used as an IGP routing table. Default: the - same as the table BGP is connected to. + <tag><label id="bgp-strict-bind">strict bind <m/switch/</tag> + Specify whether BGP listening socket should be bound to a specific local + address (the same as the <cf/source address/) and associated interface, + or to all addresses. Binding to a specific address could be useful in + cases like running multiple BIRD instances on a machine, each using its + IP address. Note that listening sockets bound to a specific address and + to all addresses collide, therefore either all BGP protocols (of the + same address family and using the same local port) should have set + <cf/strict bind/, or none of them. Default: disabled. <tag><label id="bgp-check-link">check link <M>switch</M></tag> BGP could use hardware link state into consideration. If enabled, BIRD tracks the link state of the associated interface and when link disappears (e.g. an ethernet cable is unplugged), the BGP session is immediately shut down. Note that this option cannot be used with - multihop BGP. Default: disabled. + multihop BGP. Default: enabled for direct BGP, disabled otherwise. <tag><label id="bgp-bfd">bfd <M>switch</M></tag> BGP could use BFD protocol as an advisory mechanism for neighbor @@ -1996,7 +2202,7 @@ using the following configuration parameters: Note that full (ICMP protection, for example) <rfc id="5082"> support is provided by Linux only. Default: disabled. - <tag><label id="bgp-pass">password <m/string/</tag> + <tag><label id="bgp-password">password <m/string/</tag> Use this password for MD5 authentication of BGP sessions (<rfc id="2385">). When used on BSD systems, see also <cf/setkey/ option below. Default: no authentication. @@ -2017,6 +2223,21 @@ using the following configuration parameters: accepting incoming connections. In passive mode, outgoing connections are not initiated. Default: off. + <tag><label id="bgp-confederation">confederation <m/number/</tag> + BGP confederations (<rfc id="5065">) are collections of autonomous + systems that act as one entity to external systems, represented by one + confederation identifier (instead of AS numbers). This option allows to + enable BGP confederation behavior and to specify the local confederation + identifier. When BGP confederations are used, all BGP speakers that are + members of the BGP confederation should have the same confederation + identifier configured. Default: 0 (no confederation). + + <tag><label id="bgp-confederation-member">confederation member <m/switch/</tag> + When BGP confederations are used, this option allows to specify whether + the BGP neighbor is a member of the same confederation as the local BGP + speaker. The option is unnecessary (and ignored) for IBGP sessions, as + the same AS number implies the same confederation. Default: no. + <tag><label id="bgp-rr-client">rr client</tag> Be a route reflector and treat the neighbor as a route reflection client. Default: disabled. @@ -2039,24 +2260,6 @@ using the following configuration parameters: example does not prepend its AS number to AS PATH attribute and keeps MED attribute). Default: disabled. - <tag><label id="bgp-secondary">secondary <m/switch/</tag> - Usually, if an export filter rejects a selected route, no other route is - propagated for that network. This option allows to try the next route in - order until one that is accepted is found or all routes for that network - are rejected. This can be used for route servers that need to propagate - different tables to each client but do not want to have these tables - explicitly (to conserve memory). This option requires that the connected - routing table is <ref id="dsc-table-sorted" name="sorted">. Default: off. - - <tag><label id="bgp-add-paths">add paths <m/switch/|rx|tx</tag> - Standard BGP can propagate only one path (route) per destination network - (usually the selected one). This option controls the add-path protocol - extension, which allows to advertise any number of paths to a - destination. Note that to be active, add-path has to be enabled on both - sides of the BGP session, but it could be enabled separately for RX and - TX direction. When active, all available routes accepted by the export - filter are advertised to the neighbor. Default: off. - <tag><label id="bgp-allow-local-pref">allow bgp_local_pref <m/switch/</tag> A standard BGP implementation do not send the Local Preference attribute to eBGP neighbors and ignore this attribute if received from eBGP @@ -2091,16 +2294,16 @@ using the following configuration parameters: <tag><label id="bgp-graceful-restart">graceful restart <m/switch/|aware</tag> When a BGP speaker restarts or crashes, neighbors will discard all received paths from the speaker, which disrupts packet forwarding even - when the forwarding plane of the speaker remains intact. <rfc - id="4724"> specifies an optional graceful restart mechanism to - alleviate this issue. This option controls the mechanism. It has three - states: Disabled, when no support is provided. Aware, when the graceful - restart support is announced and the support for restarting neighbors - is provided, but no local graceful restart is allowed (i.e. - receiving-only role). Enabled, when the full graceful restart - support is provided (i.e. both restarting and receiving role). Note - that proper support for local graceful restart requires also - configuration of other protocols. Default: aware. + when the forwarding plane of the speaker remains intact. <rfc id="4724"> + specifies an optional graceful restart mechanism to alleviate this + issue. This option controls the mechanism. It has three states: + Disabled, when no support is provided. Aware, when the graceful restart + support is announced and the support for restarting neighbors is + provided, but no local graceful restart is allowed (i.e. receiving-only + role). Enabled, when the full graceful restart support is provided + (i.e. both restarting and receiving role). Restarting role could be also + configured per-channel. Note that proper support for local graceful + restart requires also configuration of other protocols. Default: aware. <tag><label id="bgp-graceful-restart-time">graceful restart time <m/number/</tag> The restart time is announced in the BGP graceful restart capability @@ -2147,13 +2350,6 @@ using the following configuration parameters: This option is relevant to IPv4 mode with enabled capability advertisement only. Default: on. - <tag><label id="bgp-route-limit">route limit <m/number/</tag> - The maximal number of routes that may be imported from the protocol. If - the route limit is exceeded, the connection is closed with an error. - Limit is currently implemented as <cf>import limit <m/number/ action - restart</cf>. This option is obsolete and it is replaced by - <ref id="proto-import-limit" name="import limit option">. Default: no limit. - <tag><label id="bgp-disable-after-error">disable after error <m/switch/</tag> When an error is encountered (either locally or by the other side), disable the instance automatically and wait for an administrator to fix @@ -2222,7 +2418,7 @@ using the following configuration parameters: <tag><label id="bgp-igp-metric">igp metric <m/switch/</tag> Enable comparison of internal distances to boundary routers during best - route selection. Default: on. + route selection. Default: on. <tag><label id="bgp-prefer-older">prefer older <m/switch/</tag> Standard route selection algorithm breaks ties by comparing router IDs. @@ -2241,6 +2437,119 @@ using the following configuration parameters: versions of BIRD). </descrip> +<sect1>Channel configuration +<label id="bgp-channel-config"> + +<p>BGP supports several AFIs and SAFIs over one connection. Every AFI/SAFI +announced to the peer corresponds to one channel. The table of supported AFI/SAFIs +together with their appropriate channels follows. + +<table loc="h"> +<tabular ca="l|l|l|r|r"> + <bf/Channel name/ | <bf/Table nettype/ | <bf/IGP table allowed/ | <bf/AFI/ | <bf/SAFI/ +@<hline> + <cf/ipv4/ | <cf/ipv4/ | <cf/ipv4/ and <cf/ipv6/ | 1 | 1 +@ <cf/ipv6/ | <cf/ipv6/ | <cf/ipv4/ and <cf/ipv6/ | 2 | 1 +@ <cf/ipv4 multicast/ | <cf/ipv4/ | <cf/ipv4/ and <cf/ipv6/ | 1 | 2 +@ <cf/ipv6 multicast/ | <cf/ipv6/ | <cf/ipv4/ and <cf/ipv6/ | 2 | 2 +@ <cf/ipv4 mpls/ | <cf/ipv4/ | <cf/ipv4/ and <cf/ipv6/ | 1 | 4 +@ <cf/ipv6 mpls/ | <cf/ipv6/ | <cf/ipv4/ and <cf/ipv6/ | 2 | 4 +@ <cf/vpn4 mpls/ | <cf/vpn4/ | <cf/ipv4/ and <cf/ipv6/ | 1 | 128 +@ <cf/vpn6 mpls/ | <cf/vpn6/ | <cf/ipv4/ and <cf/ipv6/ | 2 | 128 +@ <cf/vpn4 multicast/ | <cf/vpn4/ | <cf/ipv4/ and <cf/ipv6/ | 1 | 129 +@ <cf/vpn6 multicast/ | <cf/vpn6/ | <cf/ipv4/ and <cf/ipv6/ | 2 | 129 +@ <cf/flow4/ | <cf/flow4/ | --- | 1 | 133 +@ <cf/flow6/ | <cf/flow6/ | --- | 2 | 133 +</tabular> +</table> + +<p>Due to <rfc id="8212">, external BGP protocol requires explicit configuration +of import and export policies (in contrast to other protocols, where default +policies of <cf/import all/ and <cf/export none/ are used in absence of explicit +configuration). Note that blanket policies like <cf/all/ or <cf/none/ can still +be used in explicit configuration. + +<p>BGP channels have additional config options (together with the common ones): + +<descrip> + <tag><label id="bgp-next-hop-keep">next hop keep</tag> + Forward the received Next Hop attribute even in situations where the + local address should be used instead, like when the route is sent to an + interface with a different subnet. Default: disabled. + + <tag><label id="bgp-next-hop-self">next hop self</tag> + Avoid calculation of the Next Hop attribute and always advertise our own + source address as a next hop. This needs to be used only occasionally to + circumvent misconfigurations of other routers. Default: disabled. + + <tag><label id="bgp-next-hop-address">next hop address <m/ip/</tag> + Avoid calculation of the Next Hop attribute and always advertise this address + as a next hop. + + <tag><label id="bgp-missing-lladdr">missing lladdr self|drop|ignore</tag> + Next Hop attribute in BGP-IPv6 sometimes contains just the global IPv6 + address, but sometimes it has to contain both global and link-local IPv6 + addresses. This option specifies what to do if BIRD have to send both + addresses but does not know link-local address. This situation might + happen when routes from other protocols are exported to BGP, or when + improper updates are received from BGP peers. <cf/self/ means that BIRD + advertises its own local address instead. <cf/drop/ means that BIRD + skips that prefixes and logs error. <cf/ignore/ means that BIRD ignores + the problem and sends just the global address (and therefore forms + improper BGP update). Default: <cf/self/, unless BIRD is configured as a + route server (option <cf/rs client/), in that case default is <cf/ignore/, + because route servers usually do not forward packets themselves. + + <tag><label id="bgp-gateway">gateway direct|recursive</tag> + For received routes, their <cf/gw/ (immediate next hop) attribute is + computed from received <cf/bgp_next_hop/ attribute. This option + specifies how it is computed. Direct mode means that the IP address from + <cf/bgp_next_hop/ is used if it is directly reachable, otherwise the + neighbor IP address is used. Recursive mode means that the gateway is + computed by an IGP routing table lookup for the IP address from + <cf/bgp_next_hop/. Note that there is just one level of indirection in + recursive mode - the route obtained by the lookup must not be recursive + itself, to prevent mutually recursive routes. + + Recursive mode is the behavior specified by the BGP + standard. Direct mode is simpler, does not require any routes in a + routing table, and was used in older versions of BIRD, but does not + handle well nontrivial iBGP setups and multihop. Recursive mode is + incompatible with <ref id="dsc-table-sorted" name="sorted tables">. Default: + <cf/direct/ for direct sessions, <cf/recursive/ for multihop sessions. + + <tag><label id="bgp-igp-table">igp table <m/name/</tag> + Specifies a table that is used as an IGP routing table. The type of this + table must be as allowed in the table above. This option is allowed once + for every allowed table type. Default: the same as the main table + the channel is connected to (if eligible). + + <tag><label id="bgp-secondary">secondary <m/switch/</tag> + Usually, if an export filter rejects a selected route, no other route is + propagated for that network. This option allows to try the next route in + order until one that is accepted is found or all routes for that network + are rejected. This can be used for route servers that need to propagate + different tables to each client but do not want to have these tables + explicitly (to conserve memory). This option requires that the connected + routing table is <ref id="dsc-table-sorted" name="sorted">. Default: off. + + <tag><label id="bgp-add-paths">add paths <m/switch/|rx|tx</tag> + Standard BGP can propagate only one path (route) per destination network + (usually the selected one). This option controls the add-path protocol + extension, which allows to advertise any number of paths to a + destination. Note that to be active, add-path has to be enabled on both + sides of the BGP session, but it could be enabled separately for RX and + TX direction. When active, all available routes accepted by the export + filter are advertised to the neighbor. Default: off. + + <tag><label id="bgp-graceful-restart-c">graceful restart <m/switch/</tag> + Although BGP graceful restart is configured mainly by protocol-wide + <ref id="bgp-graceful-restart" name="options">, it is possible to + configure restarting role per AFI/SAFI pair by this channel option. + The option is ignored if graceful restart is disabled by protocol-wide + option. Default: off in aware mode, on in full mode. +</descrip> + <sect1>Attributes <label id="bgp-attr"> @@ -2332,23 +2641,40 @@ some of them (marked with `<tt/O/') are optional. <p><code> protocol bgp { - local as 65000; # Use a private AS number + local 198.51.100.14 as 65000; # Use a private AS number neighbor 198.51.100.130 as 64496; # Our neighbor ... multihop; # ... which is connected indirectly - export filter { # We use non-trivial export rules - if source = RTS_STATIC then { # Export only static routes - # Assign our community - bgp_community.add((65000,64501)); - # Artificially increase path length - # by advertising local AS number twice - if bgp_path ~ [= 65000 =] then - bgp_path.prepend(65000); - accept; - } - reject; + ipv4 { + export filter { # We use non-trivial export rules + if source = RTS_STATIC then { # Export only static routes + # Assign our community + bgp_community.add((65000,64501)); + # Artificially increase path length + # by advertising local AS number twice + if bgp_path ~ [= 65000 =] then + bgp_path.prepend(65000); + accept; + } + reject; + }; + import all; + next hop self; # advertise this router as next hop + igp table myigptable4; # IGP table for routes with IPv4 nexthops + igp table myigptable6; # IGP table for routes with IPv6 nexthops + }; + ipv6 { + export filter mylargefilter; # We use a named filter + import all; + missing lladdr self; + igp table myigptable4; # IGP table for routes with IPv4 nexthops + igp table myigptable6; # IGP table for routes with IPv6 nexthops + }; + ipv4 multicast { + import all; + export filter someotherfilter; + table mymulticasttable4; # Another IPv4 table, dedicated for multicast + igp table myigptable4; }; - import all; - source address 198.51.100.14; # Use a non-standard source address } </code> @@ -2358,7 +2684,7 @@ protocol bgp { <p>The Device protocol is not a real routing protocol. It doesn't generate any routes and it only serves as a module for getting information about network -interfaces from the kernel. +interfaces from the kernel. This protocol supports no channel. <p>Except for very unusual circumstances, you probably should include this protocol in the configuration since almost all other protocols require network @@ -2368,7 +2694,6 @@ interfaces to be defined for them to work with. <label id="device-config"> <p><descrip> - <tag><label id="device-scan-time">scan time <m/number/</tag> Time in seconds between two scans of the network interface list. On systems where we are notified about interface status changes @@ -2376,19 +2701,26 @@ interfaces to be defined for them to work with. list only in order to avoid confusion by lost notification messages, so the default time is set to a large value. - <tag><label id="device-primary">primary [ "<m/mask/" ] <m/prefix/</tag> - If a network interface has more than one network address, BIRD has to - choose one of them as a primary one. By default, BIRD chooses the - lexicographically smallest address as the primary one. - - This option allows to specify which network address should be chosen as - a primary one. Network addresses that match <m/prefix/ are preferred to - non-matching addresses. If more <cf/primary/ options are used, the first - one has the highest preference. If "<m/mask/" is specified, then such - <cf/primary/ option is relevant only to matching network interfaces. - - In all cases, an address marked by operating system as secondary cannot - be chosen as the primary one. + <tag><label id="device-iface">interface <m/pattern/ [, <m/.../]</tag> + + By default, the Device protocol handles all interfaces without any + configuration. Interface definitions allow to specify optional + parameters for specific interfaces. See <ref id="proto-iface" + name="interface"> common option for detailed description. Currently only + one interface option is available: + + <tag><label id="device-preferred">preferred <m/ip/</tag> + If a network interface has more than one IP address, BIRD chooses one of + them as a preferred one. Preferred IP address is used as source address + for packets or announced next hop by routing protocols. Precisely, BIRD + chooses one preferred IPv4 address, one preferred IPv6 address and one + preferred link-local IPv6 address. By default, BIRD chooses the first + found IP address as the preferred one. + + This option allows to specify which IP address should be preferred. May + be used multiple times for different address classes (IPv4, IPv6, IPv6 + link-local). In all cases, an address marked by operating system as + secondary cannot be chosen as the primary one. </descrip> <p>As the Device protocol doesn't generate any routes, it cannot have @@ -2397,8 +2729,10 @@ any attributes. Example configuration looks like this: <p><code> protocol device { scan time 10; # Scan the interfaces often - primary "eth0" 192.168.1.1; - primary 192.168.0.0/16; + interface "eth0" { + preferred 192.168.1.1; + preferred 2001:db8:1:10::1; + }; } </code> @@ -2408,7 +2742,8 @@ protocol device { <p>The Direct protocol is a simple generator of device routes for all the directly connected networks according to the list of interfaces provided by the -kernel via the Device protocol. +kernel via the Device protocol. The Direct protocol supports both IPv4 and IPv6 +channels. <p>The question is whether it is a good idea to have such device routes in BIRD routing table. OS kernel usually handles device routes for directly connected @@ -2452,6 +2787,8 @@ on Linux systems BIRD cannot change non-BIRD route in the kernel routing table. <p><code> protocol direct { + ipv4; + ipv6; interface "-arc*", "*"; # Exclude the ARCnets } </code> @@ -2490,6 +2827,9 @@ kernel protocols to the same routing table and changing route destination (gateway) in an export filter of a kernel protocol does not work. Both limitations can be overcome using another routing table and the pipe protocol. +<p>The Kernel protocol supports both IPv4 and IPv6 channels; only one of them +can be configured in each protocol instance. + <sect1>Configuration <label id="krt-config"> @@ -2507,13 +2847,6 @@ limitations can be overcome using another routing table and the pipe protocol. routing daemons or by the system administrator. This is possible only on systems which support identification of route authorship. - <tag><label id="krt-device-routes">device routes <m/switch/</tag> - Enable export of device routes to the kernel routing table. By default, - such routes are rejected (with the exception of explicitly configured - device routes from the static protocol) regardless of the export filter - to protect device routes in kernel routing table (managed by OS itself) - from accidental overwriting or erasing. - <tag><label id="krt-kernel-table">kernel table <m/number/</tag> Select which kernel table should this particular instance of the Kernel protocol work with. Available only on systems supporting multiple @@ -2528,7 +2861,7 @@ limitations can be overcome using another routing table and the pipe protocol. routes from other sources (e.g. kernel device routes). Metric 0 has a special meaning of undefined metric, in which either OS default is used, or per-route metric can be set using <cf/krt_metric/ attribute. Default: - 0 (undefined). + 32. <tag><label id="krt-graceful-restart">graceful restart <m/switch/</tag> Participate in graceful restart recovery. If this option is enabled and @@ -2618,14 +2951,18 @@ protocol kernel { # Primary routing table learn; # Learn alien routes from the kernel persist; # Don't remove routes on bird shutdown scan time 10; # Scan kernel routing table every 10 seconds - import all; - export all; + ipv4 { + import all; + export all; + }; } protocol kernel { # Secondary routing table - table auxtable; kernel table 100; - export all; + ipv4 { + table auxtable; + export all; + }; } </code> @@ -2669,15 +3006,23 @@ each router detects all changes. <sect1>Configuration <label id="ospf-config"> -<p>In the main part of configuration, there can be multiple definitions of OSPF -areas, each with a different id. These definitions includes many other switches -and multiple definitions of interfaces. Definition of interface may contain many -switches and constant definitions and list of neighbors on nonbroadcast -networks. +<p>First, the desired OSPF version can be specified by using <cf/ospf v2/ or +<cf/ospf v3/ as a protocol type. By default, OSPFv2 is used. In the main part of +configuration, there can be multiple definitions of OSPF areas, each with a +different id. These definitions includes many other switches and multiple +definitions of interfaces. Definition of interface may contain many switches and +constant definitions and list of neighbors on nonbroadcast networks. + +<p>OSPFv2 needs one IPv4 channel. OSPFv3 needs either one IPv6 channel, or one +IPv4 channel (<rfc id="5838">). Therefore, it is possible to use OSPFv3 for both +IPv4 and Pv6 routing, but it is necessary to have two protocol instances anyway. +If no channel is configured, appropriate channel is defined with default +parameters. <code> -protocol ospf <name> { +protocol ospf [v2|v3] <name> { rfc1583compat <switch>; + rfc5838 <switch>; instance id <num>; stub router <switch>; tick <num>; @@ -2778,15 +3123,23 @@ protocol ospf <name> { This option controls compatibility of routing table calculation with <rfc id="1583">. Default value is no. + <tag><label id="ospf-rfc5838">rfc5838 <m/switch/</tag> + Basic OSPFv3 is limited to IPv6 unicast routing. The <rfc id="5838"> + extension defines support for more address families (IPv4, IPv6, both + unicast and multicast). The extension is enabled by default, but can be + disabled if necessary, as it restricts the range of available instance + IDs. Default value is yes. + <tag><label id="ospf-instance-id">instance id <m/num/</tag> When multiple OSPF protocol instances are active on the same links, they should use different instance IDs to distinguish their packets. Although it could be done on per-interface basis, it is often preferred to set one instance ID to whole OSPF domain/topology (e.g., when multiple instances are used to represent separate logical topologies on the same - physical network). This option specifies the default instance ID for all - interfaces of the OSPF instance. Note that this option, if used, must - precede interface definitions. Default value is 0. + physical network). This option specifies the instance ID for all + interfaces of the OSPF instance, but can be overridden by + <cf/interface/ option. Default value is 0 unless OSPFv3-AF extended + address families are used, see <rfc id="5838"> for that case. <tag><label id="ospf-stub-router">stub router <M>switch</M></tag> This option configures the router to be a stub router, i.e., a router @@ -2807,8 +3160,8 @@ protocol ospf <name> { (equal-cost multipath) routes. Such routes are used when there are several directions to the destination, each with the same (computed) cost. This option also allows to specify a limit on maximum number of - nexthops in one route. By default, ECMP is disabled. If enabled, - default value of the limit is 16. + nexthops in one route. By default, ECMP is enabled if supported by + Kernel. Default value of the limit is 16. <tag><label id="ospf-merge-external">merge external <M>switch</M></tag> This option specifies whether OSPF should merge external routes from @@ -3047,7 +3400,7 @@ protocol ospf <name> { are immediately considered unreachable and only the address of the iface (instead of whole network prefix) is propagated. It is possible that some hardware drivers or platforms do not implement this feature. - Default value is no. + Default value is yes. <tag><label id="ospf-bfd">bfd <M>switch</M></tag> OSPF could use BFD protocol as an advisory mechanism for neighbor @@ -3134,14 +3487,14 @@ network. This attribute is read-only. Default is <cf/ospf_metric2 = 10000/ and <p><code> protocol ospf MyOSPF { - rfc1583compat yes; - tick 2; - export filter { - if source = RTS_BGP then { - ospf_metric1 = 100; - accept; - } - reject; + ipv4 { + export filter { + if source = RTS_BGP then { + ospf_metric1 = 100; + accept; + } + reject; + }; }; area 0.0.0.0 { interface "eth*" { @@ -3209,7 +3562,8 @@ routes to be passed from a table declared as primary (i.e., the one the pipe is connected to using the <cf/table/ configuration keyword) to the secondary one (declared using <cf/peer table/) and vice versa, depending on what's allowed by the filters. Export filters control export of routes from the primary table to -the secondary one, import filters control the opposite direction. +the secondary one, import filters control the opposite direction. Both tables +must be of the same nettype. <p>The Pipe protocol may work in the transparent mode mode or in the opaque mode. In the transparent mode, the Pipe protocol retransmits all routes from @@ -3239,13 +3593,14 @@ exporting a selected subset of one table to another one. <sect1>Configuration <label id="pipe-config"> +<p>Essentially, the Pipe protocol is just a channel connected to a table on both +sides. Therefore, the configuration block for <cf/protocol pipe/ shall directly +include standard channel config options; see the example below. + <p><descrip> <tag><label id="pipe-peer-table">peer table <m/table/</tag> Defines secondary routing table to connect to. The primary one is selected by the <cf/table/ keyword. - - <tag><label id="pipe-mode">mode opaque|transparent</tag> - Specifies the mode for the pipe to work in. Default is transparent. </descrip> <sect1>Attributes @@ -3272,33 +3627,29 @@ Pipe protocol while decreasing their preferences and correcting their BGP paths to reflect the AS boundary crossing. <code> -table as1; # Define the tables -table as2; +ipv4 table as1; # Define the tables +ipv4 table as2; protocol kernel kern1 { # Synchronize them with the kernel - table as1; + ipv4 { table as1; export all; }; kernel table 1; } protocol kernel kern2 { - table as2; + ipv4 { table as2; export all; }; kernel table 2; } protocol bgp bgp1 { # The outside connections - table as1; + ipv4 { table as1; import all; export all; }; local as 1; neighbor 192.168.0.1 as 1001; - export all; - import all; } protocol bgp bgp2 { - table as2; + ipv4 { table as2; import all; export all; }; local as 2; neighbor 10.0.0.1 as 1002; - export all; - import all; } protocol pipe { # The Pipe @@ -3339,6 +3690,8 @@ addresses and choose a default route. BIRD implements router behavior as defined in <rfc id="4861">, router preferences and specific routes (<rfc id="4191">), and DNS extensions (<rfc id="6106">). +<p>The RAdv protocols supports just IPv6 channel. + <sect1>Configuration <label id="radv-config"> @@ -3507,7 +3860,6 @@ definitions, prefix definitions and DNS definitions: option above. Default: no. </descrip> - <p>Prefix specific options <descrip> @@ -3559,7 +3911,6 @@ definitions, prefix definitions and DNS definitions: valid DNS servers. Default: 3 * <cf/max ra interval/. </descrip> - <p>DNSSL specific options: <descrip> @@ -3597,10 +3948,10 @@ definitions, prefix definitions and DNS definitions: <label id="radv-exam"> <p><code> -table radv_routes; # Manually configured routes go here +ipv6 table radv_routes; # Manually configured routes go here protocol static { - table radv_routes; + ipv6 { table radv_routes; }; route 2001:0DB8:4000::/48 unreachable; route 2001:0DB8:4010::/48 unreachable; @@ -3613,8 +3964,7 @@ protocol static { protocol radv { propagate routes yes; # Propagate the routes from the radv_routes table - table radv_routes; - export all; + ipv6 { table radv_routes; export all; }; interface "eth2" { max ra interval 5; # Fast failover with more routers @@ -3680,10 +4030,15 @@ pretty much obsolete. It is still usable on very small networks. <label id="rip-config"> <p>RIP configuration consists mainly of common protocol options and interface -definitions, most RIP options are interface specific. +definitions, most RIP options are interface specific. RIPng (RIP for IPv6) +protocol instance can be configured by using <cf/rip ng/ instead of just +<cf/rip/ as a protocol type. + +<p>RIP needs one IPv4 channel. RIPng needs one IPv6 channel. If no channel is +configured, appropriate channel is defined with default parameters. <code> -protocol rip [<name>] { +protocol rip [ng] [<name>] { infinity <number>; ecmp <switch> [limit <number>]; interface <interface pattern> { @@ -3732,8 +4087,8 @@ protocol rip [<name>] { (equal-cost multipath) routes. Such routes are used when there are several directions to the destination, each with the same (computed) cost. This option also allows to specify a limit on maximum number of - nexthops in one route. By default, ECMP is disabled. If enabled, - default value of the limit is 16. + nexthops in one route. By default, ECMP is enabled if supported by + Kernel. Default value of the limit is 16. <tag><label id="rip-iface">interface <m/pattern/ [, <m/.../] { <m/options/ }</tag> Interface definitions specify a set of interfaces on which the @@ -3879,7 +4234,7 @@ protocol rip [<name>] { unplugged), neighbors are immediately considered unreachable and all routes received from them are withdrawn. It is possible that some hardware drivers or platforms do not implement this feature. - Default: no. + Default: yes. </descrip> <sect1>Attributes @@ -3888,8 +4243,8 @@ protocol rip [<name>] { <p>RIP defines two route attributes: <descrip> - <tag><label id="rta-rip-metric">int rip_metric/</tag> - RIP metric of the route (ranging from 0 to <cf/infinity/). When routes + <tag>int <cf/rip_metric/</tag> + RIP metric of the route (ranging from 0 to <cf/infinity/). When routes from different RIP instances are available and all of them have the same preference, BIRD prefers the route with lowest <cf/rip_metric/. When a non-RIP route is exported to RIP, the default metric is 1. @@ -3906,8 +4261,10 @@ protocol rip [<name>] { <p><code> protocol rip { - import all; - export all; + ipv4 { + import all; + export all; + }; interface "eth*" { metric 2; port 1520; @@ -3921,6 +4278,188 @@ protocol rip { </code> +<sect>RPKI + +<sect1>Introduction + +<p>The Resource Public Key Infrastructure (RPKI) is mechanism for origin +validation of BGP routes (RFC 6480). BIRD supports only so-called RPKI-based +origin validation. There is implemented RPKI to Router (RPKI-RTR) protocol (RFC +6810). It uses some of the RPKI data to allow a router to verify that the +autonomous system announcing an IP address prefix is in fact authorized to do +so. This is not crypto checked so can be violated. But it should prevent the +vast majority of accidental hijackings on the Internet today, e.g. the famous +Pakastani accidental announcement of YouTube's address space. + +<p>The RPKI-RTR protocol receives and maintains a set of ROAs from a cache +server (also called validator). You can validate routes (RFC 6483) using +function <cf/roa_check()/ in filter and set it as import filter at the BGP +protocol. BIRD should re-validate all of affected routes after RPKI update by +RFC 6811, but we don't support it yet! You can use a BIRD's client command +<cf>reload in <m/bgp_protocol_name/</cf> for manual call of revalidation of all +routes. + +<sect1>Supported transports +<p> +<itemize> + <item>Unprotected transport over TCP uses a port 323. The cache server + and BIRD router should be on the same trusted and controlled network + for security reasons. + <item>SSHv2 encrypted transport connection uses the normal SSH port + 22. +</itemize> + +<sect1>Configuration + +<p>We currently support just one cache server per protocol. However you can +define more RPKI protocols generally. + +<code> +protocol rpki [<name>] { + roa4 { table <tab>; }; + roa6 { table <tab>; }; + remote <ip> | "<domain>" [port <num>]; + port <num>; + refresh [keep] <num>; + retry [keep] <num>; + expire [keep] <num>; + transport tcp; + transport ssh { + bird private key "</path/to/id_rsa>"; + remote public key "</path/to/known_host>"; + user "<name>"; + }; +} +</code> + +<p>Alse note that you have to specify the ROA channel. If you want to import +only IPv4 prefixes you have to specify only roa4 channel. Similarly with IPv6 +prefixes only. If you want to fetch both IPv4 and even IPv6 ROAs you have to +specify both channels. + +<sect2>RPKI protocol options +<p> +<descrip> + <tag>remote <m/ip/ | "<m/hostname/" [port <m/num/]</tag> Specifies + a destination address of the cache server. Can be specified by an IP + address or by full domain name string. Only one cache can be specified + per protocol. This option is required. + + <tag>port <m/num/</tag> Specifies the port number. The default port + number is 323 for transport without any encryption and 22 for transport + with SSH encryption. + + <tag>refresh [keep] <m/num/</tag> Time period in seconds. Tells how + long to wait before next attempting to poll the cache using a Serial + Query or a Reset Query packet. Must be lower than 86400 seconds (one + day). Too low value can caused a false positive detection of + network connection problems. A keyword <cf/keep/ suppresses updating + this value by a cache server. + Default: 3600 seconds + + <tag>retry [keep] <m/num/</tag> Time period in seconds between a failed + Serial/Reset Query and a next attempt. Maximum allowed value is 7200 + seconds (two hours). Too low value can caused a false positive + detection of network connection problems. A keyword <cf/keep/ + suppresses updating this value by a cache server. + Default: 600 seconds + + <tag>expire [keep] <m/num/</tag> Time period in seconds. Received + records are deleted if the client was unable to successfully refresh + data for this time period. Must be in range from 600 seconds (ten + minutes) to 172800 seconds (two days). A keyword <cf/keep/ + suppresses updating this value by a cache server. + Default: 7200 seconds + + <tag>transport tcp</tag> Unprotected transport over TCP. It's a default + transport. Should be used only on secure private networks. + Default: tcp + + <tag>transport ssh { <m/SSH transport options.../ }</tag> It enables a + SSHv2 transport encryption. Cannot be combined with a TCP transport. + Default: off +</descrip> + +<sect3>SSH transport options +<p> +<descrip> + <tag>bird private key "<m>/path/to/id_rsa</m>"</tag> + A path to the BIRD's private SSH key for authentication. + It can be a <cf><m>id_rsa</m></cf> file. + + <tag>remote public key "<m>/path/to/known_host</m>"</tag> + A path to the cache's public SSH key for verification identity + of the cache server. It could be a path to <cf><m>known_host</m></cf> file. + + <tag>user "<m/name/"</tag> + A SSH user name for authentication. This option is a required. +</descrip> + +<sect1>Examples +<sect2>BGP origin validation +<p>Policy: Don't import <cf/ROA_INVALID/ routes. +<code> +roa4 table r4; +roa6 table r6; + +protocol rpki { + debug all; + + roa4 { table r4; }; + roa6 { table r6; }; + + # Please, do not use rpki-validator.realmv6.org in production + remote "rpki-validator.realmv6.org" port 8282; + + retry keep 5; + refresh keep 30; + expire 600; +} + +filter peer_in_v4 { + if (roa_check(r4, net, bgp_path.last) = ROA_INVALID) then + { + print "Ignore invalid ROA ", net, " for ASN ", bgp_path.last; + reject; + } + accept; +} + +protocol bgp { + debug all; + local as 65000; + neighbor 192.168.2.1 as 65001; + ipv4 { + import filter peer_in_v4; + export none; + }; +} +</code> + +<sect2>SSHv2 transport encryption +<p> +<code> +roa4 table r4; +roa6 table r6; + +protocol rpki { + debug all; + + roa4 { table r4; }; + roa6 { table r6; }; + + remote 127.0.0.1 port 2345; + transport ssh { + bird private key "/home/birdgeek/.ssh/id_rsa"; + remote public key "/home/birdgeek/.ssh/known_hosts"; + user "birdgeek"; + }; + + # Default interval values +} +</code> + + <sect>Static <label id="static"> @@ -3932,21 +4471,10 @@ return packets as undeliverable if they are in your IP block, you don't have any specific destination for them and you don't want to send them out through the default route to prevent routing loops). -<p>There are five types of static routes: `classical' routes telling to forward -packets to a neighboring router, multipath routes specifying several (possibly -weighted) neighboring routers, device routes specifying forwarding to hosts on a -directly connected network, recursive routes computing their nexthops by doing -route table lookups for a given IP, and special routes (sink, blackhole etc.) -which specify a special action to be done instead of forwarding the packet. - -<p>When the particular destination is not available (the interface is down or -the next hop of the route is not a neighbor at the moment), Static just -uninstalls the route from the table it is connected to and adds it again as soon -as the destination becomes adjacent again. - <p>There are three classes of definitions in Static protocol configuration -- global options, static route definitions, and per-route options. Usually, the definition of the protocol contains mainly a list of static routes. +Static routes have no specific attributes. <p>Global options: @@ -3965,32 +4493,169 @@ definition of the protocol contains mainly a list of static routes. <p>Route definitions (each may also contain a block of per-route options): -<descrip> - <tag><label id="static-route-via-ip">route <m/prefix/ via <m/ip/</tag> - Static route through a neighboring router. For link-local next hops, - interface can be specified as a part of the address (e.g., - <cf/via fe80::1234%eth0/). +<sect1>Regular routes; MPLS switching rules - <tag><label id="static-route-via-mpath">route <m/prefix/ multipath via <m/ip/ [weight <m/num/] [bfd <m/switch/] [via <m/.../]</tag> - Static multipath route. Contains several nexthops (gateways), possibly - with their weights. +<p>There exist several types of routes; keep in mind that <m/prefix/ syntax is +<ref id="type-prefix" name="dependent on network type">. - <tag><label id="static-route-via-iface">route <m/prefix/ via <m/"interface"/</tag> - Static device route through an interface to hosts on a directly - connected network. +<descrip> + <tag>route <m/prefix/ via <m/ip/|<m/"interface"/ [mpls <m/num/[/<m/num/[/<m/num/[...]]]]</tag> + Next hop routes may bear one or more <ref id="route-next-hop" name="next hops">. + Every next hop is preceded by <cf/via/ and configured as shown. - <tag><label id="static-route-recursive">route <m/prefix/ recursive <m/ip/</tag> - Static recursive route, its nexthop depends on a route table lookup for - given IP address. + <tag>route <m/prefix/ recursive <m/ip/ [mpls <m/num/[/<m/num/[/<m/num/[...]]]]</tag> + Recursive nexthop resolves the given IP in the configured IGP table and + uses that route's next hop. The MPLS stacks are concatenated; on top is + the IGP's nexthop stack and on bottom is this route's stack. - <tag><label id="static-route-drop">route <m/prefix/ blackhole|unreachable|prohibit</tag> + <tag>route <m/prefix/ blackhole|unreachable|prohibit</tag> Special routes specifying to silently drop the packet, return it as unreachable or return it as administratively prohibited. First two targets are also known as <cf/drop/ and <cf/reject/. </descrip> -<p>Per-route options: +<p>When the particular destination is not available (the interface is down or +the next hop of the route is not a neighbor at the moment), Static just +uninstalls the route from the table it is connected to and adds it again as soon +as the destination becomes adjacent again. + +<sect1>Route Origin Authorization + +<p>The ROA config is just <cf>route <m/prefix/ max <m/int/ as <m/int/</cf> with no nexthop. + +<sect1>Flowspec +<label id="flowspec-network-type"> + +<p>The flow specification are rules for routers and firewalls for filtering +purpose. It is described by <rfc id="5575">. There are 3 types of arguments: +<m/inet4/ or <m/inet6/ prefixes, bitmasks matching expressions and numbers +matching expressions. + +Bitmasks matching is written using <m/value/<cf>/</cf><m/mask/ or +<cf/!/<m/value/<cf>/</cf><m/mask/ pairs. It means that <cf/(/<m/data/ <cf/&/ +<m/mask/<cf/)/ is or is not equal to <m/value/. + +Numbers matching is a matching sequence of numbers and ranges separeted by a +commas (<cf/,/) (e.g. <cf/10,20,30/). Ranges can be written using double dots +<cf/../ notation (e.g. <cf/80..90,120..124/). An alternative notation are +sequence of one or more pairs of relational operators and values separated by +logical operators <cf/&&/ or <cf/||/. Allowed relational operators are <cf/=/, +<cf/!=/, <cf/</, <cf/<=/, <cf/>/, <cf/>=/, <cf/true/ and <cf/false/. + +<sect2>IPv4 Flowspec + +<p><descrip> + <tag><label id="flow-dst">dst <m/inet4/</tag> + Set a matching destination prefix (e.g. <cf>dst 192.168.0.0/16</cf>). + Only this option is mandatory in IPv4 Flowspec. + + <tag><label id="flow-src">src <m/inet4/</tag> + Set a matching source prefix (e.g. <cf>src 10.0.0.0/8</cf>). + + <tag><label id="flow-proto">proto <m/numbers-match/</tag> + Set a matching IP protocol numbers (e.g. <cf/proto 6/). + + <tag><label id="flow-port">port <m/numbers-match/</tag> + Set a matching source or destination TCP/UDP port numbers (e.g. + <cf>port 1..1023,1194,3306</cf>). + + <tag><label id="flow-dport">dport <m/numbers-match/</tag> + Set a mating destination port numbers (e.g. <cf>dport 49151</cf>). + + <tag><label id="flow-sport">sport <m/numbers-match/</tag> + Set a matching source port numbers (e.g. <cf>sport = 0</cf>). + + <tag><label id="flow-icmp-type">icmp type <m/numbers-match/</tag> + Set a matching type field number of an ICMP packet (e.g. <cf>icmp type + 3</cf>) + + <tag><label id="flow-icmp-code">icmp code <m/numbers-match/</tag> + Set a matching code field number of an ICMP packet (e.g. <cf>icmp code + 1</cf>) + + <tag><label id="flow-tcp-flags">tcp flags <m/bitmask-match/</tag> + Set a matching bitmask for TCP header flags (aka control bits) (e.g. + <cf>tcp flags 0x03/0x0f;</cf>). The maximum length of mask is 12 bits + (0xfff). + + <tag><label id="flow-length">length <m/numbers-match/</tag> + Set a matching packet length (e.g. <cf>length > 1500;</cf>) + + <tag><label id="flow-dscp">dscp <m/numbers-match/</tag> + Set a matching DiffServ Code Point number (e.g. <cf>length > 1500;</cf>). + + <tag><label id="flow-fragment">fragment <m/fragmentation-type/</tag> + Set a matching type of packet fragmentation. Allowed fragmentation + types are <cf/dont_fragment/, <cf/is_fragment/, <cf/first_fragment/, + <cf/last_fragment/ (e.g. <cf>fragment is_fragment && + !dont_fragment</cf>). +</descrip> + +<p><code> +protocol static { + flow4; + + route flow4 { + dst 10.0.0.0/8; + port > 24 && < 30 || 40..50,60..70,80 && >= 90; + tcp flags 0x03/0x0f; + length > 1024; + dscp = 63; + fragment dont_fragment, is_fragment || !first_fragment; + }; +} +</code> +<sect2>Differences for IPv6 Flowspec + +<p>Flowspec IPv6 are same as Flowspec IPv4 with a few exceptions. +<itemize> + <item>Prefixes <m/inet6/ can be specified not only with prefix length, + but with prefix <cf/offset/ <m/num/ too (e.g. + <cf>::1234:5678:9800:0000/101 offset 64</cf>). Offset means to don't + care of <m/num/ first bits. + <item>IPv6 Flowspec hasn't mandatory any flowspec component. + <item>In IPv6 packets, there is a matching the last next header value + for a matching IP protocol number (e.g. <cf>next header 6</cf>). + <item>It is not possible to set <cf>dont_fragment</cf> as a type of + packet fragmentation. +</itemize> + +<p><descrip> + <tag><label id="flow6-dst">dst <m/inet6/ [offset <m/num/]</tag> + Set a matching destination IPv6 prefix (e.g. <cf>dst + ::1c77:3769:27ad:a11a/128 offset 64</cf>). + + <tag><label id="flow6-src">src <m/inet6/ [offset <m/num/]</tag> + Set a matching source IPv6 prefix (e.g. <cf>src fe80::/64</cf>). + + <tag><label id="flow6-next-header">next header <m/numbers-match/</tag> + Set a matching IP protocol numbers (e.g. <cf>next header != 6</cf>). + + <tag><label id="flow6-label">label <m/bitmask-match/</tag> + Set a 20-bit bitmask for matching Flow Label field in IPv6 packets + (e.g. <cf>label 0x8e5/0x8e5</cf>). +</descrip> + +<p><code> +protocol static { + flow6 { table myflow6; }; + + route flow6 { + dst fec0:1122:3344:5566:7788:99aa:bbcc:ddee/128; + src 0000:0000:0000:0001:1234:5678:9800:0000/101 offset 63; + next header = 23; + sport > 24 && < 30 || = 40 || 50,60,70..80; + dport = 50; + tcp flags 0x03/0x0f, !0/0xff || 0x33/0x33; + fragment !is_fragment || !first_fragment; + label 0xaaaa/0xaaaa && 0x33/0x33; + }; +} +</code> + +<sect1>Per-route options +<p> <descrip> <tag><label id="static-route-bfd">bfd <m/switch/</tag> The Static protocol could use BFD protocol for next hop liveness @@ -4016,16 +4681,14 @@ definition of the protocol contains mainly a list of static routes. exported to the OSPF protocol. </descrip> -<p>Static routes have no specific attributes. - -<p>Example static config might look like this: +<sect1>Example static config <p><code> protocol static { - table testable; # Connect to a non-default routing table + ipv4 { table testable; }; # Connect to a non-default routing table check link; # Advertise routes only if link is up route 0.0.0.0/0 via 198.51.100.130; # Default route - route 10.0.0.0/8 multipath # Multipath route + route 10.0.0.0/8 # Multipath route via 198.51.100.10 weight 2 via 198.51.100.20 bfd # BFD-controlled next hop via 192.0.2.1; @@ -4056,7 +4719,6 @@ versions of BIRD: <itemize> <item>Opaque LSA's <item>Route aggregation and flap dampening -<item>Multipath routes <item>Multicast routing protocols <item>Ports to other systems </itemize> diff --git a/filter/Makefile b/filter/Makefile index 2de598da..6bada8ca 100644 --- a/filter/Makefile +++ b/filter/Makefile @@ -1,5 +1,8 @@ -source=f-util.c filter.c tree.c trie.c -root-rel=../ -dir-name=filter +src := filter.c f-util.c tree.c trie.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) -include ../Rules +tests_src := tree_test.c filter_test.c trie_test.c +tests_targets := $(tests_targets) $(tests-target-files) +tests_objs := $(tests_objs) $(src-o-files) diff --git a/filter/config.Y b/filter/config.Y index 1ef5a3a8..6b7bedaf 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -227,7 +227,6 @@ f_generate_ec(u16 kind, struct f_inst *tk, struct f_inst *tv) cf_error("Can't operate with key of non-integer/IPv4 type in EC constructor"); } -#ifndef IPV6 /* IP->Quad implicit conversion */ else if (tk->fi_code == FI_CONSTANT_INDIRECT) { c1 = 1; @@ -239,13 +238,12 @@ f_generate_ec(u16 kind, struct f_inst *tk, struct f_inst *tv) else if (val->type == T_QUAD) { ipv4_used = 1; key = val->val.i; } - else if (val->type == T_IP) { - ipv4_used = 1; key = ipa_to_u32(val->val.px.ip); + else if ((val->type == T_IP) && ipa_is_ip4(val->val.ip)) { + ipv4_used = 1; key = ipa_to_u32(val->val.ip); } else cf_error("Can't operate with key of non-integer/IPv4 type in EC constructor"); } -#endif if (tv->fi_code == FI_CONSTANT) { if (tv->aux != T_INT) @@ -317,25 +315,90 @@ f_generate_lc(struct f_inst *t1, struct f_inst *t2, struct f_inst *t3) return rv; } +/* + * Remove all new lines and doubled whitespaces + * and convert all tabulators to spaces + * and return a copy of string + */ +char * +assert_copy_expr(const char *start, size_t len) +{ + /* XXX: Allocates maybe a little more memory than we really finally need */ + char *str = cfg_alloc(len + 1); + + char *dst = str; + const char *src = start - 1; + const char *end = start + len; + while (++src < end) + { + if (*src == '\n') + continue; + + /* Skip doubled whitespaces */ + if (src != start) + { + const char *prev = src - 1; + if ((*src == ' ' || *src == '\t') && (*prev == ' ' || *prev == '\t')) + continue; + } + + if (*src == '\t') + *dst = ' '; + else + *dst = *src; + + dst++; + } + *dst = '\0'; + return str; +} + +/* + * assert_done - create f_instruction of bt_assert + * @expr: expression in bt_assert() + * @start: pointer to first char of test expression + * @end: pointer to the last char of test expression + */ +static struct f_inst * +assert_done(struct f_inst *expr, const char *start, const char *end) +{ + struct f_inst *i; + i = f_new_inst(FI_ASSERT); + i->a1.p = expr; + + if (end >= start) + { + i->a2.p = assert_copy_expr(start, end - start + 1); + } + else + { + /* this is a break of lexer buffer */ + i->a2.p = "???"; + } + + return i; +} CF_DECLS CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, ACCEPT, REJECT, ERROR, QUITBIRD, - INT, BOOL, IP, PREFIX, PAIR, QUAD, EC, LC, + INT, BOOL, IP, TYPE, PREFIX, RD, PAIR, QUAD, EC, LC, SET, STRING, BGPMASK, BGPPATH, CLIST, ECLIST, LCLIST, IF, THEN, ELSE, CASE, TRUE, FALSE, RT, RO, UNKNOWN, GENERIC, - FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, CAST, DEST, IFNAME, IFINDEX, + FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, DEST, IFNAME, IFINDEX, PREFERENCE, - LEN, + ROA_CHECK, ASN, + IS_V4, IS_V6, + LEN, MAXLEN, DEFINED, ADD, DELETE, CONTAINS, RESET, PREPEND, FIRST, LAST, LAST_NONAGGREGATED, MATCH, - ROA_CHECK, EMPTY, - FILTER, WHERE, EVAL) + FILTER, WHERE, EVAL, + BT_ASSERT, BT_TEST_SUITE, FORMAT) %nonassoc THEN %nonassoc ELSE @@ -348,9 +411,11 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, %type <i32> cnum %type <e> pair_item ec_item lc_item set_item switch_item set_items switch_items switch_body %type <trie> fprefix_set -%type <v> set_atom switch_atom fprefix fprefix_s fipa +%type <v> set_atom switch_atom fipa +%type <px> fprefix %type <s> decls declsn one_decl function_params -%type <h> bgp_path bgp_path_tail1 bgp_path_tail2 +%type <h> bgp_path bgp_path_tail +%type <t> get_cf_position CF_GRAMMAR @@ -370,11 +435,27 @@ filter_eval: EVAL term { f_eval_int($2); } ; +CF_ADDTO(conf, bt_test_suite) +bt_test_suite: + BT_TEST_SUITE '(' SYM ',' text ')' { + if (!($3->class & SYM_FUNCTION)) + cf_error("Function expected"); + + struct f_bt_test_suite *t = cfg_alloc(sizeof(struct f_bt_test_suite)); + t->fn = $3->def; + t->fn_name = $3->name; + t->dsc = $5; + + add_tail(&new_config->tests, &t->n); + } + ; + type: INT { $$ = T_INT; } | BOOL { $$ = T_BOOL; } | IP { $$ = T_IP; } - | PREFIX { $$ = T_PREFIX; } + | RD { $$ = T_RD; } + | PREFIX { $$ = T_NET; } | PAIR { $$ = T_PAIR; } | QUAD { $$ = T_QUAD; } | EC { $$ = T_EC; } @@ -396,7 +477,7 @@ type: $$ = T_SET; break; - case T_PREFIX: + case T_NET: $$ = T_PREFIX_SET; break; @@ -527,7 +608,8 @@ block: * Complex types, their bison value is struct f_val */ fipa: - IPA %prec PREFIX_DUMMY { $$.type = T_IP; $$.val.px.ip = $1; } + IP4 %prec PREFIX_DUMMY { $$.type = T_IP; $$.val.ip = ipa_from_ip4($1); } + | IP6 %prec PREFIX_DUMMY { $$.type = T_IP; $$.val.ip = ipa_from_ip6($1); } ; @@ -541,7 +623,6 @@ fipa: set_atom: NUM { $$.type = T_INT; $$.val.i = $1; } - | RTRID { $$.type = T_QUAD; $$.val.i = $1; } | fipa { $$ = $1; } | ENUM { $$.type = pair_a($1); $$.val.i = pair_b($1); } | '(' term ')' { @@ -558,7 +639,6 @@ set_atom: switch_atom: NUM { $$.type = T_INT; $$.val.i = $1; } | '(' term ')' { $$.type = T_INT; $$.val.i = f_eval_int($2); } - | RTRID { $$.type = T_QUAD; $$.val.i = $1; } | fipa { $$ = $1; } | ENUM { $$.type = pair_a($1); $$.val.i = pair_b($1); } ; @@ -631,26 +711,20 @@ switch_items: | switch_items ',' switch_item { $$ = f_merge_items($1, $3); } ; -fprefix_s: - IPA '/' NUM %prec '/' { - if (($3 < 0) || ($3 > MAX_PREFIX_LENGTH) || !ip_is_prefix($1, $3)) cf_error("Invalid network prefix: %I/%d.", $1, $3); - $$.type = T_PREFIX; $$.val.px.ip = $1; $$.val.px.len = $3; - } - ; - fprefix: - fprefix_s { $$ = $1; } - | fprefix_s '+' { $$ = $1; $$.val.px.len |= LEN_PLUS; } - | fprefix_s '-' { $$ = $1; $$.val.px.len |= LEN_MINUS; } - | fprefix_s '{' NUM ',' NUM '}' { - if (! ((0 <= $3) && ($3 <= $5) && ($5 <= MAX_PREFIX_LENGTH))) cf_error("Invalid prefix pattern range: {%d, %d}.", $3, $5); - $$ = $1; $$.val.px.len |= LEN_RANGE | ($3 << 16) | ($5 << 8); + net_ip_ { $$.net = $1; $$.lo = $1.pxlen; $$.hi = $1.pxlen; } + | net_ip_ '+' { $$.net = $1; $$.lo = $1.pxlen; $$.hi = net_max_prefix_length[$1.type]; } + | net_ip_ '-' { $$.net = $1; $$.lo = 0; $$.hi = $1.pxlen; } + | net_ip_ '{' NUM ',' NUM '}' { + $$.net = $1; $$.lo = $3; $$.hi = $5; + if (($3 > $5) || ($5 > net_max_prefix_length[$1.type])) + cf_error("Invalid prefix pattern range: {%u, %u}", $3, $5); } ; fprefix_set: - fprefix { $$ = f_new_trie(cfg_mem, sizeof(struct f_trie_node)); trie_add_fprefix($$, &($1.val.px)); } - | fprefix_set ',' fprefix { $$ = $1; trie_add_fprefix($$, &($3.val.px)); } + fprefix { $$ = f_new_trie(cfg_mem, sizeof(struct f_trie_node)); trie_add_prefix($$, &($1.net), $1.lo, $1.hi); } + | fprefix_set ',' fprefix { $$ = $1; trie_add_prefix($$, &($3.net), $3.lo, $3.hi); } ; switch_body: /* EMPTY */ { $$ = NULL; } @@ -678,33 +752,26 @@ bgp_path_expr: ; bgp_path: - PO bgp_path_tail1 PC { $$ = $2; } - | '/' bgp_path_tail2 '/' { $$ = $2; } + PO bgp_path_tail PC { $$ = $2; } ; -bgp_path_tail1: - NUM bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN; $$->val = $1; } - | NUM DDOT NUM bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $4; $$->kind = PM_ASN_RANGE; $$->val = $1; $$->val2 = $3; } - | '*' bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASTERISK; } - | '?' bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_QUESTION; } - | bgp_path_expr bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN_EXPR; $$->val = (uintptr_t) $1; } +bgp_path_tail: + NUM bgp_path_tail { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN; $$->val = $1; } + | NUM DDOT NUM bgp_path_tail { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $4; $$->kind = PM_ASN_RANGE; $$->val = $1; $$->val2 = $3; } + | '*' bgp_path_tail { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASTERISK; } + | '?' bgp_path_tail { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_QUESTION; } + | bgp_path_expr bgp_path_tail { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN_EXPR; $$->val = (uintptr_t) $1; } | { $$ = NULL; } ; -bgp_path_tail2: - NUM bgp_path_tail2 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN; $$->val = $1; } - | '?' bgp_path_tail2 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASTERISK; } - | { $$ = NULL; } - ; - constant: NUM { $$ = f_new_inst(FI_CONSTANT); $$->aux = T_INT; $$->a2.i = $1; } | TRUE { $$ = f_new_inst(FI_CONSTANT); $$->aux = T_BOOL; $$->a2.i = 1; } | FALSE { $$ = f_new_inst(FI_CONSTANT); $$->aux = T_BOOL; $$->a2.i = 0; } | TEXT { $$ = f_new_inst(FI_CONSTANT); $$->aux = T_STRING; $$->a2.p = $1; } - | fipa { NEW_F_VAL; $$ = f_new_inst(FI_CONSTANT_INDIRECT); $$->a1.p = val; *val = $1; } - | fprefix_s {NEW_F_VAL; $$ = f_new_inst(FI_CONSTANT_INDIRECT); $$->a1.p = val; *val = $1; } - | RTRID { $$ = f_new_inst(FI_CONSTANT); $$->aux = T_QUAD; $$->a2.i = $1; } + | fipa { NEW_F_VAL; $$ = f_new_inst(FI_CONSTANT_INDIRECT); $$->a1.p = val; *val = $1; } + | VPN_RD { NEW_F_VAL; $$ = f_new_inst(FI_CONSTANT_INDIRECT); val->type = T_RD; val->val.ec = $1; $$->a1.p = val; } + | net_ { NEW_F_VAL; $$ = f_new_inst(FI_CONSTANT_INDIRECT); val->type = T_NET; val->val.net = $1; $$->a1.p = val; } | '[' set_items ']' { DBG( "We've got a set here..." ); $$ = f_new_inst(FI_CONSTANT); $$->aux = T_SET; $$->a2.p = build_tree($2); DBG( "ook\n" ); } | '[' fprefix_set ']' { $$ = f_new_inst(FI_CONSTANT); $$->aux = T_PREFIX_SET; $$->a2.p = $2; } | ENUM { $$ = f_new_inst(FI_CONSTANT); $$->aux = $1 >> 16; $$->a2.i = $1 & 0xffff; } @@ -765,11 +832,10 @@ symbol: static_attr: FROM { $$ = f_new_static_attr(T_IP, SA_FROM, 1); } | GW { $$ = f_new_static_attr(T_IP, SA_GW, 1); } - | NET { $$ = f_new_static_attr(T_PREFIX, SA_NET, 0); } + | NET { $$ = f_new_static_attr(T_NET, SA_NET, 0); } | PROTO { $$ = f_new_static_attr(T_STRING, SA_PROTO, 0); } | SOURCE { $$ = f_new_static_attr(T_ENUM_RTS, SA_SOURCE, 0); } | SCOPE { $$ = f_new_static_attr(T_ENUM_SCOPE, SA_SCOPE, 1); } - | CAST { $$ = f_new_static_attr(T_ENUM_RTC, SA_CAST, 0); } | DEST { $$ = f_new_static_attr(T_ENUM_RTD, SA_DEST, 1); } | IFNAME { $$ = f_new_static_attr(T_STRING, SA_IFNAME, 0); } | IFINDEX { $$ = f_new_static_attr(T_INT, SA_IFINDEX, 0); } @@ -804,8 +870,13 @@ term: | rtadot dynamic_attr { $$ = f_new_inst_da(FI_EA_GET, $2); } + | term '.' IS_V4 { $$ = f_new_inst(FI_IS_V4); $$->a1.p = $1; } + | term '.' TYPE { $$ = f_new_inst(FI_TYPE); $$->a1.p = $1; } | term '.' IP { $$ = f_new_inst(FI_IP); $$->a1.p = $1; $$->aux = T_IP; } + | term '.' RD { $$ = f_new_inst(FI_ROUTE_DISTINGUISHER); $$->a1.p = $1; $$->aux = T_RD; } | term '.' LEN { $$ = f_new_inst(FI_LENGTH); $$->a1.p = $1; } + | term '.' MAXLEN { $$ = f_new_inst(FI_ROA_MAXLEN); $$->a1.p = $1; } + | term '.' ASN { $$ = f_new_inst(FI_ROA_ASN); $$->a1.p = $1; } | term '.' MASK '(' term ')' { $$ = f_new_inst(FI_IP_MASK); $$->a1.p = $1; $$->a2.p = $5; } | term '.' FIRST { $$ = f_new_inst(FI_AS_PATH_FIRST); $$->a1.p = $1; } | term '.' LAST { $$ = f_new_inst(FI_AS_PATH_LAST); $$->a1.p = $1; } @@ -828,8 +899,12 @@ term: | DELETE '(' term ',' term ')' { $$ = f_new_inst(FI_CLIST_ADD_DEL); $$->a1.p = $3; $$->a2.p = $5; $$->aux = 'd'; } | FILTER '(' term ',' term ')' { $$ = f_new_inst(FI_CLIST_ADD_DEL); $$->a1.p = $3; $$->a2.p = $5; $$->aux = 'f'; } - | ROA_CHECK '(' SYM ')' { $$ = f_generate_roa_check($3, NULL, NULL); } - | ROA_CHECK '(' SYM ',' term ',' term ')' { $$ = f_generate_roa_check($3, $5, $7); } + | ROA_CHECK '(' rtable ')' { $$ = f_generate_roa_check($3, NULL, NULL); } + | ROA_CHECK '(' rtable ',' term ',' term ')' { $$ = f_generate_roa_check($3, $5, $7); } + + | FORMAT '(' term ')' { $$ = f_new_inst(FI_FORMAT); $$->a1.p = $3; } + +/* | term '.' LEN { $$->code = P('P','l'); } */ /* function_call is inlined here */ | SYM '(' var_list ')' { @@ -948,12 +1023,18 @@ cmd: $$->a2.p = build_tree( $4 ); } - | rtadot dynamic_attr '.' EMPTY ';' { $$ = f_generate_empty($2); } | rtadot dynamic_attr '.' PREPEND '(' term ')' ';' { $$ = f_generate_complex( FI_PATH_PREPEND, 'x', $2, $6 ); } | rtadot dynamic_attr '.' ADD '(' term ')' ';' { $$ = f_generate_complex( FI_CLIST_ADD_DEL, 'a', $2, $6 ); } | rtadot dynamic_attr '.' DELETE '(' term ')' ';' { $$ = f_generate_complex( FI_CLIST_ADD_DEL, 'd', $2, $6 ); } | rtadot dynamic_attr '.' FILTER '(' term ')' ';' { $$ = f_generate_complex( FI_CLIST_ADD_DEL, 'f', $2, $6 ); } + | BT_ASSERT '(' get_cf_position term get_cf_position ')' ';' { $$ = assert_done($4, $3 + 1, $5 - 1); } ; +get_cf_position: +{ + $$ = cf_text; +}; + + CF_END diff --git a/filter/f-util.c b/filter/f-util.c index 42b08868..68aecd73 100644 --- a/filter/f-util.c +++ b/filter/f-util.c @@ -27,7 +27,7 @@ struct f_inst * f_new_inst_da(enum f_instruction_code fi_code, struct f_dynamic_attr da) { struct f_inst *ret = f_new_inst(fi_code); - ret->aux = da.type; + ret->aux = (da.f_type << 8) | da.type; ret->a2.i = da.ea_code; return ret; } @@ -60,9 +60,8 @@ f_generate_complex(int operation, int operation_aux, struct f_dynamic_attr da, s return set_dyn; } - struct f_inst * -f_generate_roa_check(struct symbol *sym, struct f_inst *prefix, struct f_inst *asn) +f_generate_roa_check(struct rtable_config *table, struct f_inst *prefix, struct f_inst *asn) { struct f_inst_roa_check *ret = cfg_allocz(sizeof(struct f_inst_roa_check)); ret->i.fi_code = FI_ROA_CHECK; @@ -71,9 +70,9 @@ f_generate_roa_check(struct symbol *sym, struct f_inst *prefix, struct f_inst *a ret->i.arg2 = asn; /* prefix == NULL <-> asn == NULL */ - if ((sym->class != SYM_ROA) || ! sym->def) - cf_error("%s is not a ROA table", sym->name); - ret->rtc = sym->def; + if (table->addr_type != NET_ROA4 && table->addr_type != NET_ROA6) + cf_error("%s is not a ROA table", table->name); + ret->rtc = table; return &ret->i; } diff --git a/filter/filter.c b/filter/filter.c index 023f7e2f..28603f27 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -39,6 +39,8 @@ #include "lib/socket.h" #include "lib/string.h" #include "lib/unaligned.h" +#include "lib/net.h" +#include "lib/ip.h" #include "nest/route.h" #include "nest/protocol.h" #include "nest/iface.h" @@ -48,6 +50,19 @@ #define CMP_ERROR 999 +void (*bt_assert_hook)(int result, struct f_inst *assert); + +static struct adata undef_adata; /* adata of length 0 used for undefined */ + +/* Special undef value for paths and clists */ +static inline int +undef_value(struct f_val v) +{ + return ((v.type == T_PATH) || (v.type == T_CLIST) || + (v.type == T_ECLIST) || (v.type == T_LCLIST)) && + (v.val.ad == &undef_adata); +} + static struct adata * adata_empty(struct linpool *pool, int l) { @@ -92,17 +107,8 @@ pm_format(struct f_path_mask *p, buffer *buf) buffer_puts(buf, "=]"); } -static inline int -uint_cmp(uint i1, uint i2) -{ - return (int)(i1 > i2) - (int)(i1 < i2); -} - -static inline int -u64_cmp(u64 i1, u64 i2) -{ - return (int)(i1 > i2) - (int)(i1 < i2); -} +static inline int val_is_ip4(const struct f_val v) +{ return (v.type == T_IP) && ipa_is_ip4(v.val.ip); } static inline int lcomm_cmp(lcomm v1, lcomm v2) @@ -128,21 +134,17 @@ lcomm_cmp(lcomm v1, lcomm v2) int val_compare(struct f_val v1, struct f_val v2) { - int rc; - if (v1.type != v2.type) { if (v1.type == T_VOID) /* Hack for else */ return -1; if (v2.type == T_VOID) return 1; -#ifndef IPV6 /* IP->Quad implicit conversion */ - if ((v1.type == T_QUAD) && (v2.type == T_IP)) - return uint_cmp(v1.val.i, ipa_to_u32(v2.val.px.ip)); - if ((v1.type == T_IP) && (v2.type == T_QUAD)) - return uint_cmp(ipa_to_u32(v1.val.px.ip), v2.val.i); -#endif + if ((v1.type == T_QUAD) && val_is_ip4(v2)) + return uint_cmp(v1.val.i, ipa_to_u32(v2.val.ip)); + if (val_is_ip4(v1) && (v2.type == T_QUAD)) + return uint_cmp(ipa_to_u32(v1.val.ip), v2.val.i); debug( "Types do not match in val_compare\n" ); return CMP_ERROR; @@ -158,15 +160,14 @@ val_compare(struct f_val v1, struct f_val v2) case T_QUAD: return uint_cmp(v1.val.i, v2.val.i); case T_EC: + case T_RD: return u64_cmp(v1.val.ec, v2.val.ec); case T_LC: return lcomm_cmp(v1.val.lc, v2.val.lc); case T_IP: - return ipa_compare(v1.val.px.ip, v2.val.px.ip); - case T_PREFIX: - if (rc = ipa_compare(v1.val.px.ip, v2.val.px.ip)) - return rc; - return uint_cmp(v1.val.px.len, v2.val.px.len); + return ipa_compare(v1.val.ip, v2.val.ip); + case T_NET: + return net_compare(v1.val.net, v2.val.net); case T_STRING: return strcmp(v1.val.s, v2.val.s); default: @@ -237,38 +238,26 @@ val_same(struct f_val v1, struct f_val v2) } } -void -fprefix_get_bounds(struct f_prefix *px, int *l, int *h) -{ - *l = *h = px->len & LEN_MASK; - - if (px->len & LEN_MINUS) - *l = 0; - - else if (px->len & LEN_PLUS) - *h = MAX_PREFIX_LENGTH; - - else if (px->len & LEN_RANGE) - { - *l = 0xff & (px->len >> 16); - *h = 0xff & (px->len >> 8); - } -} - static int clist_set_type(struct f_tree *set, struct f_val *v) { - switch (set->from.type) { + switch (set->from.type) + { case T_PAIR: v->type = T_PAIR; return 1; + case T_QUAD: -#ifndef IPV6 - case T_IP: -#endif v->type = T_QUAD; return 1; - break; + + case T_IP: + if (val_is_ip4(set->from) && val_is_ip4(set->to)) + { + v->type = T_QUAD; + return 1; + } + /* Fall through */ default: v->type = T_VOID; return 0; @@ -471,11 +460,9 @@ val_in_range(struct f_val v1, struct f_val v2) if (((v1.type == T_PAIR) || (v1.type == T_QUAD)) && (v2.type == T_CLIST)) return int_set_contains(v2.val.ad, v1.val.i); -#ifndef IPV6 /* IP->Quad implicit conversion */ - if ((v1.type == T_IP) && (v2.type == T_CLIST)) - return int_set_contains(v2.val.ad, ipa_to_u32(v1.val.px.ip)); -#endif + if (val_is_ip4(v1) && (v2.type == T_CLIST)) + return int_set_contains(v2.val.ad, ipa_to_u32(v1.val.ip)); if ((v1.type == T_EC) && (v2.type == T_ECLIST)) return ec_set_contains(v2.val.ad, v1.val.ec); @@ -486,21 +473,21 @@ val_in_range(struct f_val v1, struct f_val v2) if ((v1.type == T_STRING) && (v2.type == T_STRING)) return patmatch(v2.val.s, v1.val.s); - if ((v1.type == T_IP) && (v2.type == T_PREFIX)) - return ipa_in_net(v1.val.px.ip, v2.val.px.ip, v2.val.px.len); + if ((v1.type == T_IP) && (v2.type == T_NET)) + return ipa_in_netX(v1.val.ip, v2.val.net); - if ((v1.type == T_PREFIX) && (v2.type == T_PREFIX)) - return net_in_net(v1.val.px.ip, v1.val.px.len, v2.val.px.ip, v2.val.px.len); + if ((v1.type == T_NET) && (v2.type == T_NET)) + return net_in_netX(v1.val.net, v2.val.net); - if ((v1.type == T_PREFIX) && (v2.type == T_PREFIX_SET)) - return trie_match_fprefix(v2.val.ti, &v1.val.px); + if ((v1.type == T_NET) && (v2.type == T_PREFIX_SET)) + return trie_match_net(v2.val.ti, v1.val.net); if (v2.type != T_SET) return CMP_ERROR; /* With integrated Quad<->IP implicit conversion */ if ((v1.type == v2.val.t->from.type) || - ((IP_VERSION == 4) && (v1.type == T_QUAD) && (v2.val.t->from.type == T_IP))) + ((v1.type == T_QUAD) && val_is_ip4(v2.val.t->from) && val_is_ip4(v2.val.t->to))) return !!find_tree(v2.val.t, v1); if (v1.type == T_CLIST) @@ -531,12 +518,13 @@ val_format(struct f_val v, buffer *buf) case T_BOOL: buffer_puts(buf, v.val.i ? "TRUE" : "FALSE"); return; case T_INT: buffer_print(buf, "%u", v.val.i); return; case T_STRING: buffer_print(buf, "%s", v.val.s); return; - case T_IP: buffer_print(buf, "%I", v.val.px.ip); return; - case T_PREFIX: buffer_print(buf, "%I/%d", v.val.px.ip, v.val.px.len); return; + case T_IP: buffer_print(buf, "%I", v.val.ip); return; + case T_NET: buffer_print(buf, "%N", v.val.net); return; case T_PAIR: buffer_print(buf, "(%u,%u)", v.val.i >> 16, v.val.i & 0xffff); return; case T_QUAD: buffer_print(buf, "%R", v.val.i); return; case T_EC: ec_format(buf2, v.val.ec); buffer_print(buf, "%s", buf2); return; case T_LC: lc_format(buf2, v.val.lc); buffer_print(buf, "%s", buf2); return; + case T_RD: rd_format(v.val.ec, buf2, 1024); buffer_print(buf, "%s", buf2); return; case T_PREFIX_SET: trie_format(v.val.ti, buf); return; case T_SET: tree_format(v.val.t, buf); return; case T_ENUM: buffer_print(buf, "(enum %x)%u", v.type, v.val.i); return; @@ -585,11 +573,19 @@ f_rta_cow(void) (*f_rte)->attrs = rta_do_cow((*f_rte)->attrs, f_pool); } +static char * +val_format_str(struct f_val v) { + buffer b; + LOG_BUFFER_INIT(b); + val_format(v, &b); + return lp_strdup(f_pool, b.start); +} + static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS; -#define runtime(x) do { \ +#define runtime(fmt, ...) do { \ if (!(f_flags & FF_SILENT)) \ - log_rl(&rl_runtime_err, L_ERR "filters, line %d: %s", what->lineno, x); \ + log_rl(&rl_runtime_err, L_ERR "filters, line %d: " fmt, what->lineno, ##__VA_ARGS__); \ res.type = T_RETURN; \ res.val.i = F_ERROR; \ return res; \ @@ -719,12 +715,10 @@ interpret(struct f_inst *what) else if (v1.type == T_QUAD) { ipv4_used = 1; key = v1.val.i; } -#ifndef IPV6 /* IP->Quad implicit conversion */ - else if (v1.type == T_IP) { - ipv4_used = 1; key = ipa_to_u32(v1.val.px.ip); + else if (val_is_ip4(v1)) { + ipv4_used = 1; key = ipa_to_u32(v1.val.ip); } -#endif else runtime("Can't operate with key of non-integer/IPv4 type in EC constructor"); @@ -824,7 +818,26 @@ interpret(struct f_inst *what) case FI_DEFINED: ONEARG; res.type = T_BOOL; - res.val.i = (v1.type != T_VOID); + res.val.i = (v1.type != T_VOID) && !undef_value(v1); + break; + case FI_TYPE: + ONEARG; + switch (v1.type) + { + case T_NET: + res.type = T_ENUM_NETTYPE; + res.val.i = v1.val.net->type; + break; + default: + runtime( "Can't determine type of this item" ); + } + break; + case FI_IS_V4: + ONEARG; + if (v1.type != T_IP) + runtime( "IP version check needs an IP address" ); + res.type = T_BOOL; + res.val.i = ipa_is_ip4(v1.val.ip); break; /* Set to indirect value, a1 = variable, a2 = value */ @@ -832,15 +845,15 @@ interpret(struct f_inst *what) ARG(v2, a2.p); sym = what->a1.p; vp = sym->def; - if ((sym->class != (SYM_VARIABLE | v2.type)) && (v2.type != T_VOID)) { -#ifndef IPV6 + if ((sym->class != (SYM_VARIABLE | v2.type)) && (v2.type != T_VOID)) + { /* IP->Quad implicit conversion */ - if ((sym->class == (SYM_VARIABLE | T_QUAD)) && (v2.type == T_IP)) { + if ((sym->class == (SYM_VARIABLE | T_QUAD)) && val_is_ip4(v2)) + { vp->type = T_QUAD; - vp->val.i = ipa_to_u32(v2.val.px.ip); + vp->val.i = ipa_to_u32(v2.val.ip); break; } -#endif runtime( "Assigning to variable of incompatible type" ); } *vp = v2; @@ -911,17 +924,15 @@ interpret(struct f_inst *what) switch (what->a2.i) { - case SA_FROM: res.val.px.ip = rta->from; break; - case SA_GW: res.val.px.ip = rta->gw; break; - case SA_NET: res.val.px.ip = (*f_rte)->net->n.prefix; - res.val.px.len = (*f_rte)->net->n.pxlen; break; + case SA_FROM: res.val.ip = rta->from; break; + case SA_GW: res.val.ip = rta->nh.gw; break; + case SA_NET: res.val.net = (*f_rte)->net->n.addr; break; case SA_PROTO: res.val.s = rta->src->proto->name; break; case SA_SOURCE: res.val.i = rta->source; break; case SA_SCOPE: res.val.i = rta->scope; break; - case SA_CAST: res.val.i = rta->cast; break; case SA_DEST: res.val.i = rta->dest; break; - case SA_IFNAME: res.val.s = rta->iface ? rta->iface->name : ""; break; - case SA_IFINDEX: res.val.i = rta->iface ? rta->iface->index : 0; break; + case SA_IFNAME: res.val.s = rta->nh.iface ? rta->nh.iface->name : ""; break; + case SA_IFINDEX: res.val.i = rta->nh.iface ? rta->nh.iface->index : 0; break; default: bug("Invalid static attribute access (%x)", res.type); @@ -941,20 +952,20 @@ interpret(struct f_inst *what) switch (what->a2.i) { case SA_FROM: - rta->from = v1.val.px.ip; + rta->from = v1.val.ip; break; case SA_GW: { - ip_addr ip = v1.val.px.ip; + ip_addr ip = v1.val.ip; neighbor *n = neigh_find(rta->src->proto, &ip, 0); if (!n || (n->scope == SCOPE_HOST)) runtime( "Invalid gw address" ); - rta->dest = RTD_ROUTER; - rta->gw = ip; - rta->iface = n->iface; - rta->nexthops = NULL; + rta->dest = RTD_UNICAST; + rta->nh.gw = ip; + rta->nh.iface = n->iface; + rta->nh.next = NULL; rta->hostentry = NULL; } break; @@ -969,9 +980,9 @@ interpret(struct f_inst *what) runtime( "Destination can be changed only to blackhole, unreachable or prohibit" ); rta->dest = i; - rta->gw = IPA_NONE; - rta->iface = NULL; - rta->nexthops = NULL; + rta->nh.gw = IPA_NONE; + rta->nh.iface = NULL; + rta->nh.next = NULL; rta->hostentry = NULL; break; @@ -985,6 +996,7 @@ interpret(struct f_inst *what) { eattr *e = NULL; u16 code = what->a2.i; + int f_type = what->aux >> 8; if (!(f_flags & FF_FORCE_TMPATTR)) e = ea_find((*f_rte)->attrs->eattrs, code); @@ -994,24 +1006,31 @@ interpret(struct f_inst *what) e = ea_find((*f_rte)->attrs->eattrs, code); if (!e) { - /* A special case: undefined int_set looks like empty int_set */ + /* A special case: undefined as_path looks like empty as_path */ + if ((what->aux & EAF_TYPE_MASK) == EAF_TYPE_AS_PATH) { + res.type = T_PATH; + res.val.ad = &undef_adata; + break; + } + + /* The same special case for int_set */ if ((what->aux & EAF_TYPE_MASK) == EAF_TYPE_INT_SET) { res.type = T_CLIST; - res.val.ad = adata_empty(f_pool, 0); + res.val.ad = &undef_adata; break; } /* The same special case for ec_set */ if ((what->aux & EAF_TYPE_MASK) == EAF_TYPE_EC_SET) { res.type = T_ECLIST; - res.val.ad = adata_empty(f_pool, 0); + res.val.ad = &undef_adata; break; } /* The same special case for lc_set */ if ((what->aux & EAF_TYPE_MASK) == EAF_TYPE_LC_SET) { res.type = T_LCLIST; - res.val.ad = adata_empty(f_pool, 0); + res.val.ad = &undef_adata; break; } @@ -1022,7 +1041,7 @@ interpret(struct f_inst *what) switch (what->aux & EAF_TYPE_MASK) { case EAF_TYPE_INT: - res.type = T_INT; + res.type = f_type; res.val.i = e->u.data; break; case EAF_TYPE_ROUTER_ID: @@ -1036,7 +1055,7 @@ interpret(struct f_inst *what) case EAF_TYPE_IP_ADDRESS: res.type = T_IP; struct adata * ad = e->u.ptr; - res.val.px.ip = * (ip_addr *) ad->data; + res.val.ip = * (ip_addr *) ad->data; break; case EAF_TYPE_AS_PATH: res.type = T_PATH; @@ -1072,30 +1091,28 @@ interpret(struct f_inst *what) { struct ea_list *l = lp_alloc(f_pool, sizeof(struct ea_list) + sizeof(eattr)); u16 code = what->a2.i; + int f_type = what->aux >> 8; l->next = NULL; l->flags = EALF_SORTED; l->count = 1; l->attrs[0].id = code; l->attrs[0].flags = 0; - l->attrs[0].type = what->aux | EAF_ORIGINATED; + l->attrs[0].type = (what->aux & 0xff) | EAF_ORIGINATED | EAF_FRESH; switch (what->aux & EAF_TYPE_MASK) { case EAF_TYPE_INT: - // Enums are also ints, so allow them in. - if (v1.type != T_INT && (v1.type < T_ENUM_LO || v1.type > T_ENUM_HI)) + if (v1.type != f_type) runtime( "Setting int attribute to non-int value" ); l->attrs[0].u.data = v1.val.i; break; case EAF_TYPE_ROUTER_ID: -#ifndef IPV6 /* IP->Quad implicit conversion */ - if (v1.type == T_IP) { - l->attrs[0].u.data = ipa_to_u32(v1.val.px.ip); + if (val_is_ip4(v1)) { + l->attrs[0].u.data = ipa_to_u32(v1.val.ip); break; } -#endif /* T_INT for backward compatibility */ if ((v1.type != T_QUAD) && (v1.type != T_INT)) runtime( "Setting quad attribute to non-quad value" ); @@ -1111,7 +1128,7 @@ interpret(struct f_inst *what) int len = sizeof(ip_addr); struct adata *ad = lp_alloc(f_pool, sizeof(struct adata) + len); ad->length = len; - (* (ip_addr *) ad->data) = v1.val.px.ip; + (* (ip_addr *) ad->data) = v1.val.ip; l->attrs[0].u.ptr = ad; break; case EAF_TYPE_AS_PATH: @@ -1191,7 +1208,7 @@ interpret(struct f_inst *what) ONEARG; res.type = T_INT; switch(v1.type) { - case T_PREFIX: res.val.i = v1.val.px.len; break; + case T_NET: res.val.i = net_pxlen(v1.val.net); break; case T_PATH: res.val.i = as_path_getlen(v1.val.ad); break; case T_CLIST: res.val.i = int_set_get_size(v1.val.ad); break; case T_ECLIST: res.val.i = ec_set_get_size(v1.val.ad); break; @@ -1199,16 +1216,41 @@ interpret(struct f_inst *what) default: runtime( "Prefix, path, clist or eclist expected" ); } break; + case FI_ROA_MAXLEN: /* Get ROA max prefix length */ + ONEARG; + if (v1.type != T_NET || !net_is_roa(v1.val.net)) + runtime( "ROA expected" ); + + res.type = T_INT; + res.val.i = (v1.val.net->type == NET_ROA4) ? + ((net_addr_roa4 *) v1.val.net)->max_pxlen : + ((net_addr_roa6 *) v1.val.net)->max_pxlen; + break; + case FI_ROA_ASN: /* Get ROA ASN */ + ONEARG; + if (v1.type != T_NET || !net_is_roa(v1.val.net)) + runtime( "ROA expected" ); + + res.type = T_INT; + res.val.i = (v1.val.net->type == NET_ROA4) ? + ((net_addr_roa4 *) v1.val.net)->asn : + ((net_addr_roa6 *) v1.val.net)->asn; + break; case FI_IP: /* Convert prefix to ... */ ONEARG; - if (v1.type != T_PREFIX) + if (v1.type != T_NET) runtime( "Prefix expected" ); - res.type = what->aux; - switch(res.type) { - /* case T_INT: res.val.i = v1.val.px.len; break; Not needed any more */ - case T_IP: res.val.px.ip = v1.val.px.ip; break; - default: bug( "Unknown prefix to conversion" ); - } + res.type = T_IP; + res.val.ip = net_prefix(v1.val.net); + break; + case FI_ROUTE_DISTINGUISHER: + ONEARG; + if (v1.type != T_NET) + runtime( "Prefix expected" ); + if (!net_is_vpn(v1.val.net)) + runtime( "VPN address expected" ); + res.type = T_RD; + res.val.ec = net_rd(v1.val.net); break; case FI_AS_PATH_FIRST: /* Get first ASN from AS PATH */ ONEARG; @@ -1279,11 +1321,11 @@ interpret(struct f_inst *what) runtime( "Integer expected"); if (v1.type != T_IP) runtime( "You can mask only IP addresses" ); - { - ip_addr mask = ipa_mkmask(v2.val.i); - res.type = T_IP; - res.val.px.ip = ipa_and(mask, v1.val.px.ip); - } + + res.type = T_IP; + res.val.ip = ipa_is_ip4(v1.val.ip) ? + ipa_from_ip4(ip4_and(ipa_to_ip4(v1.val.ip), ip4_mkmask(v2.val.i))) : + ipa_from_ip6(ip6_and(ipa_to_ip6(v1.val.ip), ip6_mkmask(v2.val.i))); break; case FI_EMPTY: /* Create empty attribute */ @@ -1339,11 +1381,9 @@ interpret(struct f_inst *what) if ((v2.type == T_PAIR) || (v2.type == T_QUAD)) n = v2.val.i; -#ifndef IPV6 /* IP->Quad implicit conversion */ - else if (v2.type == T_IP) - n = ipa_to_u32(v2.val.px.ip); -#endif + else if (val_is_ip4(v2)) + n = ipa_to_u32(v2.val.ip); else if ((v2.type == T_SET) && clist_set_type(v2.val.t, &dummy)) arg_set = 1; else if (v2.type == T_CLIST) @@ -1473,7 +1513,7 @@ interpret(struct f_inst *what) if (what->arg1) { TWOARGS; - if ((v1.type != T_PREFIX) || (v2.type != T_INT)) + if ((v1.type != T_NET) || (v2.type != T_INT)) runtime("Invalid argument to roa_check()"); as = v2.val.i; @@ -1481,8 +1521,7 @@ interpret(struct f_inst *what) else { ACCESS_RTE; - v1.val.px.ip = (*f_rte)->net->n.prefix; - v1.val.px.len = (*f_rte)->net->n.pxlen; + v1.val.net = (*f_rte)->net->n.addr; /* We ignore temporary attributes, probably not a problem here */ /* 0x02 is a value of BA_AS_PATH, we don't want to include BGP headers */ @@ -1494,12 +1533,39 @@ interpret(struct f_inst *what) as_path_get_last(e->u.ptr, &as); } - struct roa_table_config *rtc = ((struct f_inst_roa_check *) what)->rtc; - if (!rtc->table) + struct rtable *table = ((struct f_inst_roa_check *) what)->rtc->table; + if (!table) runtime("Missing ROA table"); + if (table->addr_type != NET_ROA4 && table->addr_type != NET_ROA6) + runtime("Table type must be either ROA4 or ROA6"); + res.type = T_ENUM_ROA; - res.val.i = roa_check(rtc->table, v1.val.px.ip, v1.val.px.len, as); + + if (table->addr_type != (v1.val.net->type == NET_IP4 ? NET_ROA4 : NET_ROA6)) + res.val.i = ROA_UNKNOWN; /* Prefix and table type mismatch */ + else + res.val.i = net_roa_check(table, v1.val.net, as); + + break; + + case FI_FORMAT: /* Format */ + ONEARG; + + res.type = T_STRING; + res.val.s = val_format_str(v1); + break; + + case FI_ASSERT: /* Birdtest Assert */ + ONEARG; + + if (v1.type != T_BOOL) + runtime("Should be boolean value"); + + res.type = v1.type; + res.val = v1.val; + + CALL(bt_assert_hook, res.val.i, what); break; default: @@ -1554,6 +1620,7 @@ i_same(struct f_inst *f1, struct f_inst *f2) case FI_NOT_MATCH: case FI_MATCH: TWOARGS; break; case FI_DEFINED: ONEARG; break; + case FI_TYPE: ONEARG; break; case FI_LC_CONSTRUCT: TWOARGS; @@ -1619,6 +1686,7 @@ i_same(struct f_inst *f1, struct f_inst *f2) case FI_RETURN: ONEARG; break; case FI_IP: ONEARG; break; + case FI_ROUTE_DISTINGUISHER: ONEARG; break; case FI_CALL: /* Call rewriting trickery to avoid exponential behaviour */ ONEARG; if (!i_same(f1->a2.p, f2->a2.p)) @@ -1635,7 +1703,7 @@ i_same(struct f_inst *f1, struct f_inst *f2) case FI_AS_PATH_LAST_NAG: ONEARG; break; case FI_ROA_CHECK: TWOARGS; - /* Does not really make sense - ROA check resuls may change anyway */ + /* Does not really make sense - ROA check results may change anyway */ if (strcmp(((struct f_inst_roa_check *) f1)->rtc->name, ((struct f_inst_roa_check *) f2)->rtc->name)) return 0; diff --git a/filter/filter.h b/filter/filter.h index 1d0f389e..47014785 100644 --- a/filter/filter.h +++ b/filter/filter.h @@ -35,6 +35,8 @@ F(FI_MATCH, 0, '~') \ F(FI_NOT_MATCH, '!', '~') \ F(FI_DEFINED, 'd', 'e') \ + F(FI_TYPE, 0, 'T') \ + F(FI_IS_V4, 'I', 'i') \ F(FI_SET, 0, 's') \ F(FI_CONSTANT, 0, 'c') \ F(FI_VARIABLE, 0, 'V') \ @@ -50,7 +52,10 @@ F(FI_PREF_GET, 0, 'P') \ F(FI_PREF_SET, 'P', 'S') \ F(FI_LENGTH, 0, 'L') \ + F(FI_ROA_MAXLEN, 'R', 'M') \ + F(FI_ROA_ASN, 'R', 'A') \ F(FI_IP, 'c', 'p') \ + F(FI_ROUTE_DISTINGUISHER, 'R', 'D') \ F(FI_AS_PATH_FIRST, 'a', 'f') \ F(FI_AS_PATH_LAST, 'a', 'l') \ F(FI_AS_PATH_LAST_NAG, 'a', 'L') \ @@ -62,7 +67,9 @@ F(FI_EMPTY, 0, 'E') \ F(FI_PATH_PREPEND, 'A', 'p') \ F(FI_CLIST_ADD_DEL, 'C', 'a') \ - F(FI_ROA_CHECK, 'R', 'C') + F(FI_ROA_CHECK, 'R', 'C') \ + F(FI_FORMAT, 0, 'F') \ + F(FI_ASSERT, 'a', 's') enum f_instruction_code { #define F(c,a,b) \ @@ -74,15 +81,15 @@ FI__LIST struct f_inst { /* Instruction */ struct f_inst *next; /* Structure is 16 bytes, anyway */ enum f_instruction_code fi_code; - u16 aux; + u16 aux; /* Extension to instruction code, T_*, EA_*, EAF_* */ union { - int i; + uint i; void *p; - } a1; + } a1; /* The first argument */ union { - int i; + uint i; void *p; - } a2; + } a2; /* The second argument */ int lineno; }; @@ -92,7 +99,7 @@ struct f_inst { /* Instruction */ /* Not enough fields in f_inst for three args used by roa_check() */ struct f_inst_roa_check { struct f_inst i; - struct roa_table_config *rtc; + struct rtable_config *rtc; }; struct f_inst3 { @@ -107,23 +114,18 @@ struct f_inst3 { struct f_prefix { - ip_addr ip; - int len; -#define LEN_MASK 0xff -#define LEN_PLUS 0x1000000 -#define LEN_MINUS 0x2000000 -#define LEN_RANGE 0x4000000 - /* If range then prefix must be in range (len >> 16 & 0xff, len >> 8 & 0xff) */ + net_addr net; + u8 lo, hi; }; struct f_val { - int type; + int type; /* T_* */ union { uint i; u64 ec; lcomm lc; - /* ip_addr ip; Folded into prefix */ - struct f_prefix px; + ip_addr ip; + const net_addr *net; char *s; struct f_tree *t; struct f_trie *ti; @@ -158,7 +160,7 @@ static inline struct f_static_attr f_new_static_attr(int f_type, int code, int r { return (struct f_static_attr) { .f_type = f_type, .sa_code = code, .readonly = readonly }; } struct f_tree *f_new_tree(void); struct f_inst *f_generate_complex(int operation, int operation_aux, struct f_dynamic_attr da, struct f_inst *argument); -struct f_inst *f_generate_roa_check(struct symbol *sym, struct f_inst *prefix, struct f_inst *asn); +struct f_inst *f_generate_roa_check(struct rtable_config *table, struct f_inst *prefix, struct f_inst *asn); struct f_tree *build_tree(struct f_tree *); @@ -167,28 +169,11 @@ int same_tree(struct f_tree *t1, struct f_tree *t2); void tree_format(struct f_tree *t, buffer *buf); struct f_trie *f_new_trie(linpool *lp, uint node_size); -void *trie_add_prefix(struct f_trie *t, ip_addr px, int plen, int l, int h); -int trie_match_prefix(struct f_trie *t, ip_addr px, int plen); +void *trie_add_prefix(struct f_trie *t, const net_addr *n, uint l, uint h); +int trie_match_net(struct f_trie *t, const net_addr *n); int trie_same(struct f_trie *t1, struct f_trie *t2); void trie_format(struct f_trie *t, buffer *buf); -void fprefix_get_bounds(struct f_prefix *px, int *l, int *h); - -static inline void -trie_add_fprefix(struct f_trie *t, struct f_prefix *px) -{ - int l, h; - fprefix_get_bounds(px, &l, &h); - trie_add_prefix(t, px->ip, px->len & LEN_MASK, l, h); -} - -static inline int -trie_match_fprefix(struct f_trie *t, struct f_prefix *px) -{ - return trie_match_prefix(t, px->ip, px->len & LEN_MASK); -} - - struct ea_list; struct rte; @@ -218,6 +203,7 @@ void val_format(struct f_val v, buffer *buf); #define FILTER_ACCEPT NULL #define FILTER_REJECT ((void *) 1) +#define FILTER_UNDEF ((void *) 2) /* Used in BGP */ /* Type numbers must be in 0..0xff range */ #define T_MASK 0xff @@ -242,7 +228,9 @@ void val_format(struct f_val v, buffer *buf); #define T_ENUM_RTC 0x33 #define T_ENUM_RTD 0x34 #define T_ENUM_ROA 0x35 -#define T_ENUM_RA_PREFERENCE 0x36 +#define T_ENUM_NETTYPE 0x36 +#define T_ENUM_RA_PREFERENCE 0x37 + /* new enums go here */ #define T_ENUM_EMPTY 0x3f /* Special hack for atomic_aggr */ @@ -250,7 +238,7 @@ void val_format(struct f_val v, buffer *buf); /* Bigger ones */ #define T_IP 0x20 -#define T_PREFIX 0x21 +#define T_NET 0x21 #define T_STRING 0x22 #define T_PATH_MASK 0x23 /* mask for BGP path */ #define T_PATH 0x24 /* BGP path */ @@ -259,6 +247,7 @@ void val_format(struct f_val v, buffer *buf); #define T_ECLIST 0x27 /* Extended community list */ #define T_LC 0x28 /* Large community value, lcomm */ #define T_LCLIST 0x29 /* Large community list */ +#define T_RD 0x2a /* Route distinguisher for VPN addresses */ #define T_RETURN 0x40 #define T_SET 0x80 @@ -271,10 +260,9 @@ void val_format(struct f_val v, buffer *buf); #define SA_PROTO 4 #define SA_SOURCE 5 #define SA_SCOPE 6 -#define SA_CAST 7 -#define SA_DEST 8 -#define SA_IFNAME 9 -#define SA_IFINDEX 10 +#define SA_DEST 7 +#define SA_IFNAME 8 +#define SA_IFINDEX 9 struct f_tree { @@ -286,7 +274,7 @@ struct f_tree { struct f_trie_node { ip_addr addr, mask, accept; - int plen; + uint plen; struct f_trie_node *c[2]; }; @@ -303,4 +291,15 @@ struct f_trie #define FF_FORCE_TMPATTR 1 /* Force all attributes to be temporary */ #define FF_SILENT 2 /* Silent filter execution */ +/* Bird Tests */ +struct f_bt_test_suite { + node n; /* Node in config->tests */ + struct f_inst *fn; /* Root of function */ + const char *fn_name; /* Name of test */ + const char *dsc; /* Description */ +}; + +/* Hook for call bt_assert() function in configuration */ +extern void (*bt_assert_hook)(int result, struct f_inst *assert); + #endif diff --git a/filter/filter_test.c b/filter/filter_test.c new file mode 100644 index 00000000..be7fd521 --- /dev/null +++ b/filter/filter_test.c @@ -0,0 +1,87 @@ +/* + * Filters: Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <string.h> +#include <stdlib.h> + +#include "test/birdtest.h" +#include "test/bt-utils.h" + +#include "filter/filter.h" +#include "conf/conf.h" + +#define BT_CONFIG_FILE "filter/test.conf" + + +static struct config * +parse_config_file(const void *filename_void) +{ + bt_bird_init(); + + size_t fn_size = strlen((const char *) filename_void) + 1; + char *filename = alloca(fn_size); + strncpy(filename, filename_void, fn_size); + + struct config *c = bt_config_file_parse(filename); + bt_bird_cleanup(); + + return c; +} + +static int +run_function(const void *parsed_fn_def) +{ + /* XXX: const -> non-const */ + struct f_inst *f = (struct f_inst *) parsed_fn_def; + + linpool *tmp = lp_new_default(&root_pool); + struct f_val res = f_eval(f, tmp); + rfree(tmp); + + if (res.type == T_RETURN && res.val.i >= F_REJECT) + return 0; + + return 1; +} + +static void +bt_assert_filter(int result, struct f_inst *assert) +{ + int bt_suit_case_result = 1; + if (!result) + { + bt_result = 0; + bt_suite_result = 0; + bt_suit_case_result = 0; + } + + bt_log_suite_case_result(bt_suit_case_result, "Assertion at line %d (%s)", assert->lineno, (char *) assert->a2.p); +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + struct config *c = parse_config_file(BT_CONFIG_FILE); + + if (c) + { + bt_assert_hook = bt_assert_filter; + + struct f_bt_test_suite *t; + WALK_LIST(t, c->tests) + bt_test_suite_base(run_function, t->fn_name, t->fn, BT_FORKING, BT_TIMEOUT, "%s", t->dsc); + } + + return bt_exit_value(); +} diff --git a/filter/test.conf b/filter/test.conf index 4dc67c49..989dab14 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -1,127 +1,630 @@ - /* - * This is an example configuration file. + * This is unit testing configuration file for testing filters + * * FIXME: add all examples from docs here. */ -# Yet another comment - router id 62.168.0.1; -define xyzzy = (120+10); -define '1a-a1' = (20+10); +/* We have to setup any protocol */ +protocol static { ipv4; } + + + + +/* + * Common definitions and functions + * -------------------------------- + */ + define one = 1; define ten = 10; -define p23 = (2, 3); -define ip1222 = 1.2.2.2; - function onef(int a) { return 1; } + + +/* + * Testing boolean expressions + * --------------------------- + */ + +function t_bool() +bool b; +{ + b = true; + bt_assert(b); + bt_assert(!!b); + + bt_assert(format(true) = "TRUE"); + bt_assert(format(false) = "FALSE"); + + if ( b = true ) then + bt_assert(b); + else + bt_assert(false); + + bt_assert(true && true); + bt_assert(true || false); + bt_assert(! false && ! false && true); + bt_assert(1 < 2 && 1 != 3); + bt_assert(true && true && ! false); + bt_assert(true || 1+"a"); + bt_assert(!(false && 1+"a")); + bt_assert(!(true && false)); +} + +bt_test_suite(t_bool, "Testing boolean expressions"); + + + + +/* + * Testing integers + * ---------------- + */ + +define four = 4; +define xyzzy = (120+10); +define '1a-a1' = (xyzzy-100); + +function t_int() +int i; +{ + bt_assert(xyzzy = 130); + bt_assert('1a-a1' = 30); + + i = four; + i = 12*100 + 60/2 + i; + i = (i + 0); + bt_assert(i = 1234); + + bt_assert(format(i) = "1234"); + + i = 4200000000; + bt_assert(i = 4200000000); + bt_assert(i > 4100000000); + bt_assert(!(i > 4250000000)); + + bt_assert(1 = 1); + bt_assert(!(1 != 1)); + + bt_assert(1 != 2); + bt_assert(1 <= 2); + + bt_assert(1 != "a"); + bt_assert(1 != (0,1)); + + bt_assert(!(i = 4)); + bt_assert(1 <= 1); + bt_assert(!(1234 < 1234)); +} + +bt_test_suite(t_int, "Testing integers"); + + + + +/* + * Testing sets of integers + * ------------------------ + */ + +define is1 = [ one, (2+1), (6-one), 8, 11, 15, 17, 19]; +define is2 = [(17+2), 17, 15, 11, 8, 5, 3, 2]; +define is3 = [5, 17, 2, 11, 8, 15, 3, 19]; + +function t_int_set() +int set is; +{ + bt_assert(1 ~ [1,2,3]); + bt_assert(5 ~ [1..20]); + bt_assert(2 ~ [ 1, 2, 3 ]); + bt_assert(5 ~ [ 4 .. 7 ]); + bt_assert(1 !~ [ 2, 3, 4 ]); + + is = [ 2, 3, 4, 7..11 ]; + bt_assert(10 ~ is); + bt_assert(5 !~ is); + + bt_assert(1 ~ is1); + bt_assert(3 ~ is1); + bt_assert(5 ~ is1); + bt_assert((one+2) ~ is1); + bt_assert(2 ~ is2); + bt_assert(2 ~ is3); + bt_assert(4 !~ is1); + bt_assert(4 !~ is2); + bt_assert(4 !~ is3); + bt_assert(10 !~ is1); + bt_assert(10 !~ is2); + bt_assert(10 !~ is3); + bt_assert(15 ~ is1); + bt_assert(15 ~ is2); + bt_assert(15 ~ is3); + bt_assert(18 !~ is1); + bt_assert(18 !~ is2); + bt_assert(18 !~ is3); + bt_assert(19 ~ is1); + bt_assert(19 ~ is2); + bt_assert(19 ~ is3); + bt_assert(20 !~ is1); + bt_assert(20 !~ is2); + bt_assert(20 !~ is3); + + bt_assert([1,2] != [1,3]); + bt_assert([1,4..10,20] = [1,4..10,20]); + + bt_assert(format([ 1, 2, 1, 1, 1, 3, 4, 1, 1, 1, 5 ]) = "[1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5]"); +} + +bt_test_suite(t_int_set, "Testing sets of integers"); + + + + +/* + * Testing string matching + * ----------------------- + */ + +function t_string() +string st; +{ + st = "Hello"; + bt_assert(format(st) = "Hello"); + bt_assert(st ~ "Hell*"); + bt_assert(st ~ "?ello"); + bt_assert(st ~ "Hello"); + bt_assert(st ~ "Hell?"); + bt_assert(st !~ "ell*"); +} + +bt_test_suite(t_string, "Testing string matching"); + + + + +/* + * Testing pairs + * ------------- + */ + function 'mkpair-a'(int a) { return (1, a); } -function mktrip(int a) +function t_pair() +pair pp; { - return (a, 2*a, 3*a); + pp = (1, 2); + bt_assert(format(pp) = "(1,2)"); + bt_assert((1,2) = pp); + bt_assert((1,1+1) = pp); + bt_assert('mkpair-a'(2) = pp); + bt_assert((1,2) = (1,1+1)); + bt_assert(((1,2) < (2,2))); + bt_assert(!((1,1) > (1,1))); } -function mkpath(int a; int b) +bt_test_suite(t_pair, "Testing pairs"); + + + + +/* + * Testing sets of pairs + * --------------------- + */ + +function t_pair_set() +pair pp; +pair set ps; { - return [= a b 3 2 1 =]; + pp = (1, 2); + ps = [(1,(one+one)), (3,4)..(4,8), (5,*), (6,3..6)]; + bt_assert(format(ps) = "[(1,2), (3,4)..(4,8), (5,0)..(5,65535), (6,3)..(6,6)]"); + bt_assert(pp ~ ps); + bt_assert((3,5) ~ ps); + bt_assert((4,1) ~ ps); + bt_assert((5,4) ~ ps); + bt_assert((5,65535) ~ ps); + bt_assert((6,4) ~ ps); + bt_assert((3, 10000) ~ ps); + bt_assert((3,3) !~ ps); + bt_assert((4,9) !~ ps); + bt_assert((4,65535) !~ ps); + bt_assert((6,2) !~ ps); + bt_assert((6,6+one) !~ ps); + bt_assert(((one+6),2) !~ ps); + bt_assert((1,1) !~ ps); + + ps = [(20..150, 200..300), (50100..50200, 1000..50000), (*, 5+5)]; + bt_assert((100,200) ~ ps); + bt_assert((150,300) ~ ps); + bt_assert((50180,1200) ~ ps); + bt_assert((50110,49000) ~ ps); + bt_assert((0,10) ~ ps); + bt_assert((64000,10) ~ ps); + bt_assert((20,199) !~ ps); + bt_assert((151,250) !~ ps); + bt_assert((50050,2000) !~ ps); + bt_assert((50150,50050) !~ ps); + bt_assert((10,9) !~ ps); + bt_assert((65535,11) !~ ps); } -function callme(int arg1; int arg2) -int local1; -int local2; -int i; +bt_test_suite(t_pair_set, "Testing sets of pairs"); + + + + +/* + * Testing quads + * ------------- + */ + +function t_quad() +quad qq; { - printn "Function callme called arguments ", arg1, " and ", arg2, ": " ; - i = arg2; + qq = 1.2.3.4; + bt_assert(format(qq) = "1.2.3.4"); + bt_assert(qq = 1.2.3.4); + bt_assert(qq != 4.3.2.1); +} - case arg1 { - 11, 1, 111: printn "jedna, "; printn "jedna"; - (one+onef(2)): printn "dva, "; printn "jeste jednou dva"; - (2+one) .. 5: if arg2 < 3 then printn "tri az pet"; - else: printn "neco jineho"; - } - print; +bt_test_suite(t_quad, "Testing quads"); + + + + +/* + * Testing sets of quads + * --------------------- + */ + +function t_quad_set() +quad qq; +{ + qq = 1.2.3.4; + bt_assert(qq ~ [1.2.3.4, 5.6.7.8]); + bt_assert(qq !~ [1.2.1.1, 1.2.3.5]); } -function fifteen() +bt_test_suite(t_quad_set, "Testing sets of quads"); + + + + +/* + * Testing ip address + * ------------------ + */ + +define onetwo = 1.2.3.4; + +function t_ip() +ip p; { - print "fifteen called"; - return 15; + p = 127.1.2.3; + bt_assert(p.is_v4); + bt_assert(p.mask(8) = 127.0.0.0); + bt_assert(1.2.3.4 = 1.2.3.4); + bt_assert(1.2.3.4 = onetwo); + bt_assert(format(p) = "127.1.2.3"); + + p = ::fffe:6:c0c:936d:88c7:35d3; + bt_assert(!p.is_v4); + bt_assert(format(p) = "::fffe:6:c0c:936d:88c7:35d3"); + + p = 1234:5678::; + bt_assert(!p.is_v4); + bt_assert(p.mask(24) = 1234:5600::); } -roa table rl +bt_test_suite(t_ip, "Testing ip address"); + + + + +/* + * Testing sets of ip address + * -------------------------- + */ + +define ip1222 = 1.2.2.2; + +function t_ip_set() +ip set ips; { - roa 10.110.0.0/16 max 16 as 1000; - roa 10.120.0.0/16 max 24 as 1000; - roa 10.130.0.0/16 max 24 as 2000; - roa 10.130.128.0/18 max 24 as 3000; + ips = [ 1.1.1.0 .. 1.1.1.255, ip1222]; + bt_assert(format(ips) = "[1.1.1.0..1.1.1.255, 1.2.2.2]"); + bt_assert(1.1.1.0 ~ ips); + bt_assert(1.1.1.100 ~ ips); + bt_assert(1.2.2.2 ~ ips); + bt_assert(1.1.0.255 !~ ips); + bt_assert(1.1.2.0 !~ ips); + bt_assert(1.2.2.3 !~ ips); + bt_assert(192.168.1.1 !~ ips); + + bt_assert(1.2.3.4 !~ [ 1.2.3.3, 1.2.3.5 ]); + bt_assert(1.2.3.4 ~ [ 1.2.3.3..1.2.3.5 ]); } -function test_roa() +bt_test_suite(t_ip_set, "Testing sets of ip address"); + + + + +/* + * Testing enums + * ------------- + */ + +function t_enum() { - # cannot be tested in __startup(), sorry - print "Testing ROA"; - print "Should be true: ", roa_check(rl, 10.10.0.0/16, 1000) = ROA_UNKNOWN, - " ", roa_check(rl, 10.0.0.0/8, 1000) = ROA_UNKNOWN, - " ", roa_check(rl, 10.110.0.0/16, 1000) = ROA_VALID, - " ", roa_check(rl, 10.110.0.0/16, 2000) = ROA_INVALID, - " ", roa_check(rl, 10.110.32.0/20, 1000) = ROA_INVALID, - " ", roa_check(rl, 10.120.32.0/20, 1000) = ROA_VALID; - print "Should be true: ", roa_check(rl, 10.120.32.0/20, 2000) = ROA_INVALID, - " ", roa_check(rl, 10.120.32.32/28, 1000) = ROA_INVALID, - " ", roa_check(rl, 10.130.130.0/24, 1000) = ROA_INVALID, - " ", roa_check(rl, 10.130.130.0/24, 2000) = ROA_VALID, - " ", roa_check(rl, 10.130.30.0/24, 3000) = ROA_INVALID, - " ", roa_check(rl, 10.130.130.0/24, 3000) = ROA_VALID; -} - -function path_test() + bt_assert(format(RTS_DUMMY) = "(enum 30)0"); + bt_assert(format(RTS_STATIC) = "(enum 30)1"); + bt_assert(RTS_STATIC ~ [RTS_STATIC, RTS_DEVICE]); + bt_assert(RTS_BGP !~ [RTS_STATIC, RTS_DEVICE]); +} + +bt_test_suite(t_enum, "Testing enums"); + + + + +/* + * Testing prefixes + * ---------------- + */ + +define netdoc = 2001:db8::/32; + +function t_prefix() +prefix px; +{ + px = 1.2.0.0/18; + bt_assert(format(px) = "1.2.0.0/18"); + bt_assert(192.168.0.0/16 ~ 192.168.0.0/16); + bt_assert(192.168.0.0/17 ~ 192.168.0.0/16); + bt_assert(192.168.254.0/24 ~ 192.168.0.0/16); + bt_assert(netdoc ~ 2001::/16); + bt_assert(192.168.0.0/15 !~ 192.168.0.0/16); + bt_assert(192.160.0.0/17 !~ 192.168.0.0/16); + bt_assert(px !~ netdoc); + + bt_assert(1.2.3.4 ~ 1.0.0.0/8); + bt_assert(1.0.0.0/8 ~ 1.0.0.0/8); +} + +bt_test_suite(t_prefix, "Testing prefixes"); + + + + +/* + * Testing prefix sets + * ------------------- + */ + +define net10 = 10.0.0.0/8; +define pxs2 = [ 10.0.0.0/16{8,12}, 20.0.0.0/16{24,28} ]; + +function test_pxset(prefix set pxs) +{ + bt_assert(net10 ~ pxs); + bt_assert(10.0.0.0/10 ~ pxs); + bt_assert(10.0.0.0/12 ~ pxs); + bt_assert(20.0.0.0/24 ~ pxs); + bt_assert(20.0.40.0/24 ~ pxs); + bt_assert(20.0.0.0/26 ~ pxs); + bt_assert(20.0.100.0/26 ~ pxs); + bt_assert(20.0.0.0/28 ~ pxs); + bt_assert(20.0.255.0/28 ~ pxs); + + bt_assert(10.0.0.0/7 !~ pxs); + bt_assert(10.0.0.0/13 !~ pxs); + bt_assert(10.0.0.0/16 !~ pxs); + bt_assert(20.0.0.0/16 !~ pxs); + bt_assert(20.0.0.0/23 !~ pxs); + bt_assert(20.0.0.0/29 !~ pxs); + bt_assert(11.0.0.0/10 !~ pxs); + bt_assert(20.1.0.0/26 !~ pxs); + + bt_assert(1.0.0.0/8 ~ [ 1.0.0.0/8+ ]); + bt_assert(1.0.0.0/9 !~ [ 1.0.0.0/8- ]); + bt_assert(1.2.0.0/17 !~ [ 1.0.0.0/8{ 15 , 16 } ]); + + bt_assert([ 10.0.0.0/8{ 15 , 17 } ] = [ 10.0.0.0/8{ 15 , 17 } ]); +} + +function t_prefix_set() +prefix set pxs; +{ + pxs = [ 1.2.0.0/16, 1.4.0.0/16+, 44.66.88.64/30{24,28}, 12.34.56.0/24{8,16} ]; + bt_assert(format(pxs) = "[1.2.0.0/112{::0.1.0.0}, 1.4.0.0/112{::0.1.255.255}, 12.34.0.0/112{::1.255.0.0}, 44.66.88.64/124{::1f0}]"); + bt_assert(1.2.0.0/16 ~ pxs); + bt_assert(1.4.0.0/16 ~ pxs); + bt_assert(1.4.0.0/18 ~ pxs); + bt_assert(1.4.0.0/32 ~ pxs); + bt_assert(1.1.0.0/16 !~ pxs); + bt_assert(1.3.0.0/16 !~ pxs); + bt_assert(1.2.0.0/15 !~ pxs); + bt_assert(1.2.0.0/17 !~ pxs); + bt_assert(1.2.0.0/32 !~ pxs); + bt_assert(1.4.0.0/15 !~ pxs); + + test_pxset(pxs2); + test_pxset([ 10.0.0.0/16{8,12}, 20.0.0.0/16{24,28} ]); + + bt_assert(1.2.0.0/16 ~ [ 1.0.0.0/8{ 15 , 17 } ]); + bt_assert([ 10.0.0.0/8{ 15 , 17 } ] != [ 11.0.0.0/8{ 15 , 17 } ]); +} + +bt_test_suite(t_prefix_set, "Testing prefix sets"); + + + + +/* + * Testing Prefix IPv6 + * ------------------- + */ + +function t_prefix6() +prefix px; +{ + px = 1020::/18; + bt_assert(format(px) = "1020::/18"); + bt_assert(1020:3040:5060:: ~ 1020:3040:5000::/40); + bt_assert(1020:3040::/32 ~ 1020:3040::/32); + bt_assert(1020:3040::/33 ~ 1020:3040::/32); + bt_assert(1020:3040:5060::/48 ~ 1020:3040::/32); + bt_assert(1020:3040::/31 !~ 1020:3040::/32); + bt_assert(1020:3041::/33 !~ 1020:3040::/32); +} + +bt_test_suite(t_prefix6, "Testing prefix IPv6"); + + + + +/* + * Testing prefix IPv6 sets + * ------------------------ + */ + +function t_prefix6_set() +prefix set pxs; +{ + bt_assert(1180::/16 ~ [ 1100::/8{15, 17} ]); + bt_assert(12::34 = 12::34); + bt_assert(12::34 ~ [ 12::33..12::35 ]); + bt_assert(1020::34 ~ 1000::/8); + bt_assert(1000::/8 ~ 1000::/8); + bt_assert(1000::/8 ~ [ 1000::/8+ ]); + bt_assert(12::34 !~ [ 12::33, 12::35 ]); + bt_assert(1000::/9 !~ [ 1000::/8- ]); + bt_assert(1000::/17 !~ [ 1000::/8{15, 16} ]); + + pxs = [ 1102::/16, 1104::/16+]; + bt_assert(1102::/16 ~ pxs); + bt_assert(1104::/16 ~ pxs); + bt_assert(1104::/18 ~ pxs); + bt_assert(1104::/32 ~ pxs); + bt_assert(1101::/16 !~ pxs); + bt_assert(1103::/16 !~ pxs); + bt_assert(1102::/15 !~ pxs); + bt_assert(1102::/17 !~ pxs); + bt_assert(1102::/32 !~ pxs); + bt_assert(1104::/15 !~ pxs); + + pxs = ([ 1000::/16{8,12}, 2000::/16{24,28} ]); + bt_assert(format(pxs) = "[1000::/12{1f0::}, 2000::/16{0:1f0::}]"); + bt_assert(1000::/8 ~ pxs); + bt_assert(1000::/10 ~ pxs); + bt_assert(1000::/12 ~ pxs); + bt_assert(2000::/24 ~ pxs); + bt_assert(2000:4000::/24 ~ pxs); + bt_assert(2000::/26 ~ pxs); + bt_assert(2000:8000::/26 ~ pxs); + bt_assert(2000::/28 ~ pxs); + bt_assert(2000:FFF0::/28 ~ pxs); + bt_assert(1000::/7 !~ pxs); + bt_assert(1000::/13 !~ pxs); + bt_assert(1000::/16 !~ pxs); + bt_assert(2000::/16 !~ pxs); + bt_assert(2000::/23 !~ pxs); + bt_assert(2000::/29 !~ pxs); + bt_assert(1100::/10 !~ pxs); + bt_assert(2010::/26 !~ pxs); +} + +bt_test_suite(t_prefix6_set, "Testing prefix IPv6 sets"); + + + + +function t_flowspec() +prefix p; +{ + p = flow4 { dst 10.0.0.0/8; }; + bt_assert(p !~ [ 10.0.0.0/8 ] ); + + bt_assert(format(flow4 { dst 10.0.0.0/8; proto = 23; }) = "flow4 { dst 10.0.0.0/8; proto 23; }"); + bt_assert(format(flow6 { dst ::1/128; src ::2/127; }) = "flow6 { dst ::1/128; src ::2/127; }"); + bt_assert(format(flow6 { next header false 42; }) = "flow6 { next header false 42; }"); + bt_assert(format(flow6 { port 80; }) = "flow6 { port 80; }"); + bt_assert(format(flow6 { dport > 24 && < 30 || 40..50,60..70,80 && >= 90; }) = "flow6 { dport > 24 && < 30 || 40..50,60..70,80 && >= 90; }"); + bt_assert(format(flow6 { sport 0..0x400; }) = "flow6 { sport 0..1024; }"); + bt_assert(format(flow6 { icmp type 80; }) = "flow6 { icmp type 80; }"); + bt_assert(format(flow6 { icmp code 90; }) = "flow6 { icmp code 90; }"); + bt_assert(format(flow6 { tcp flags 0x03/0x0f; }) = "flow6 { tcp flags 0x3/0x3,0x0/0xc; }"); + bt_assert(format(flow6 { length 0..65535; }) = "flow6 { length 0..65535; }"); + bt_assert(format(flow6 { dscp = 63; }) = "flow6 { dscp 63; }"); + bt_assert(format(flow6 { fragment is_fragment || !first_fragment; }) = "flow6 { fragment is_fragment || !first_fragment; }"); + bt_assert(format(flow6 { }) = "flow6 { }"); +} + +bt_test_suite(t_flowspec, "Testing flowspec routes"); + + + + +/* + * Testing Paths + * ------------- + */ + +function mkpath(int a; int b) +{ + return [= a b 3 2 1 =]; +} + +function t_path() bgpmask pm1; -bgpmask pm2; bgppath p2; -clist l; -clist l2; -eclist el; -eclist el2; -lclist ll; -lclist ll2; { - print "Entering path test..."; - pm1 = / 4 3 2 1 /; - pm2 = [= 3..6 3 2 1..2 =]; - print "Testing path masks: ", pm1, " ", pm2; + pm1 = [= 4 3 2 1 =]; + + bt_assert(format(pm1) = "[= 4 3 2 1 =]"); + + bt_assert(+empty+ = +empty+); + bt_assert(10 !~ +empty+); + p2 = prepend( + empty +, 1 ); p2 = prepend( p2, 2 ); p2 = prepend( p2, 3 ); p2 = prepend( p2, 4 ); - print "Testing path: (4 3 2 1) = ", p2; - print "Should be true: ", p2 ~ pm1, " ", p2 ~ pm2, " ", 3 ~ p2, " ", p2 ~ [2, 10..20], " ", p2 ~ [4, 10..20]; - print "4 = ", p2.len; - p2 = prepend( p2, 5 ); - print "Testing path: (5 4 3 2 1) = ", p2; - print "Should be false: ", p2 ~ pm1, " ", p2 ~ pm2, " ", 10 ~ p2, " ", p2 ~ [8, ten..(2*ten)], " ", p2 ~ [= 1..4 4 3 2 1 =], " ", p2 ~ [= 5 4 4..100 2 1 =]; - print "Should be true: ", p2 ~ / ? 4 3 2 1 /, " ", p2, " ", / ? 4 3 2 1 /; - print "Should be true: ", p2 ~ [= * 4 3 * 1 =], " ", p2, " ", [= * 4 3 * 1 =]; - print "Should be true: ", p2 ~ [= 5..6 4..10 1..3 1..3 1..65536 =]; - print "Should be true: ", p2 ~ [= (3+2) (2*2) 3 2 1 =], " ", p2 ~ mkpath(5, 4); - print "Should be true: ", p2.len = 5, " ", p2.first = 5, " ", p2.last = 1; - print "Should be true: ", pm1 = [= 4 3 2 1 =], " ", pm1 != [= 4 3 1 2 =], " ", - pm2 = [= 3..6 3 2 1..2 =], " ", pm2 != [= 3..6 3 2 1..3 =], " ", - [= 1 2 (1+2) =] = [= 1 2 (1+2) =], " ", [= 1 2 (1+2) =] != [= 1 2 (2+1) =]; - print "5 = ", p2.len; - print "Delete 3: ", delete(p2, 3); - print "Filter 1-3: ", filter(p2, [1..3]); + + bt_assert(format(p2) = "(path 4 3 2 1)"); + bt_assert(p2.len = 4); + bt_assert(p2 ~ pm1); + bt_assert(3 ~ p2); + bt_assert(p2 ~ [2, 10..20]); + bt_assert(p2 ~ [4, 10..20]); + + p2 = prepend(p2, 5); + bt_assert(p2 !~ pm1); + bt_assert(10 !~ p2); + bt_assert(p2 !~ [8, ten..(2*ten)]); + bt_assert(p2 ~ [= * 4 3 * 1 =]); + bt_assert(p2 ~ [= (3+2) (2*2) 3 2 1 =]); + bt_assert(p2 ~ mkpath(5, 4)); + + bt_assert(p2.len = 5); + bt_assert(p2.first = 5); + bt_assert(p2.last = 1); + + bt_assert(p2.len = 5); + bt_assert(delete(p2, 3) = prepend(prepend(prepend(prepend(+empty+, 1), 2), 4), 5)); + bt_assert(filter(p2, [1..3]) = prepend(prepend(prepend(+empty+, 1), 2), 3)); pm1 = [= 1 2 * 3 4 5 =]; p2 = prepend( + empty +, 5 ); @@ -130,63 +633,176 @@ lclist ll2; p2 = prepend( p2, 3 ); p2 = prepend( p2, 2 ); p2 = prepend( p2, 1 ); - print "Should be true: ", p2 ~ pm1, " ", p2, " ", pm1; - print "Delete 3: ", delete(p2, 3); - print "Delete 4-5: ", delete(p2, [4..5]); + bt_assert(p2 ~ pm1); + bt_assert(delete(p2, 3) = prepend(prepend(prepend(prepend(+empty+, 5), 4), 2), 1)); + bt_assert(delete(p2, [4..5]) = prepend(prepend(prepend(prepend(+empty+, 3), 3), 2), 1)); +} + +bt_test_suite(t_path, "Testing paths"); + + + + +/* + * Testing Community List + * ---------------------- + */ + +define p23 = (2, 3); + +function t_clist() +clist l; +clist l2; +clist r; +{ l = - empty -; - print "Should be false in this special case: ", l ~ [(*,*)]; + bt_assert(l !~ [(*,*)]); + bt_assert((l ~ [(*,*)]) != (l !~ [(*,*)])); + + bt_assert(-empty- = -empty-); + l = add( l, (one,2) ); - print "Should be always true: ", l ~ [(*,*)]; + bt_assert(l ~ [(*,*)]); l = add( l, (2,one+2) ); - print "Community list (1,2) (2,3) ", l; - print "Should be true: ", (2,3) ~ l, " ", l ~ [(1,*)], " ", l ~ [p23]," ", l ~ [(2,2..3)], " ", l ~ [(1,1..2)], " ", l ~ [(1,1)..(1,2)]; - l = add( l, (2,5) ); - l = add( l, (5,one) ); - l = add( l, (6,one) ); - l = add( l, (one,one) ); - l = delete( l, [(5,1),(6,one),(one,1)] ); - l = delete( l, [(5,one),(6,one)] ); - l = filter( l, [(1,*)] ); - print "Community list (1,2) ", l; - print "Should be false: ", (2,3) ~ l, " ", l ~ [(2,*)], " ", l ~ [(one,3..6)]; - print "Should be always true: ", l ~ [(*,*)]; - l = add( l, (3,one) ); - l = add( l, (one+one+one,one+one) ); - l = add( l, (3,3) ); - l = add( l, (3,4) ); - l = add( l, (3,5) ); - l2 = filter( l, [(3,*)] ); - l = delete( l, [(3,2..4)] ); - print "Community list (1,2) (3,1) (3,5) ", l, " len: ", l.len; - l = add( l, (3,2) ); - l = add( l, (4,5) ); - print "Community list (1,2) (3,1) (3,5) (3,2) (4,5) ", l, " len: ", l.len; - print "Should be true: ", l ~ [(*,2)], " ", l ~ [(*,5)], " ", l ~ [(*, one)]; - print "Should be false: ", l ~ [(*,3)], " ", l ~ [(*,(one+6))], " ", l ~ [(*, (one+one+one))]; - l = delete( l, [(*,(one+onef(3)))] ); - l = delete( l, [(*,(4+one))] ); - print "Community list (3,1) ", l; - l = delete( l, [(*,(onef(5)))] ); - print "Community list empty ", l; - l2 = add( l2, (3,6) ); - l = filter( l2, [(3,1..4)] ); - l2 = filter( l2, [(3,3..6)] ); - print "clist A (1..4): ", l; - print "clist B (3..6): ", l2; - print "clist A union B: ", add( l2, l ); - print "clist A isect B: ", filter( l, l2 ); - print "clist A \ B: ", delete( l, l2 ); + bt_assert(format(l) = "(clist (1,2) (2,3))"); + + bt_assert((2,3) ~ l); + bt_assert(l ~ [(1,*)]); + bt_assert(l ~ [p23]); + bt_assert(l ~ [(2,2..3)]); + bt_assert(l ~ [(1,1..2)]); + bt_assert(l ~ [(1,1)..(1,2)]); + + l = add(l, (2,5)); + l = add(l, (5,one)); + l = add(l, (6,one)); + l = add(l, (one,one)); + l = delete(l, [(5,1),(6,one),(one,1)]); + l = delete(l, [(5,one),(6,one)]); + l = filter(l, [(1,*)]); + bt_assert(l = add(-empty-, (1,2))); + + bt_assert((2,3) !~ l); + bt_assert(l !~ [(2,*)]); + bt_assert(l !~ [(one,3..6)]); + bt_assert(l ~ [(*,*)]); + + l = add(l, (3,one)); + l = add(l, (one+one+one,one+one)); + l = add(l, (3,3)); + l = add(l, (3,4)); + l = add(l, (3,5)); + l2 = filter(l, [(3,*)]); + l = delete(l, [(3,2..4)]); + bt_assert(l = add(add(add(-empty-, (1,2)), (3,1)), (3,5))); + bt_assert(l.len = 3); + + l = add(l, (3,2)); + l = add(l, (4,5)); + bt_assert(l = add(add(add(add(add(-empty-, (1,2)), (3,1)), (3,5)), (3,2)), (4,5))); + + bt_assert(l.len = 5); + bt_assert(l ~ [(*,2)]); + bt_assert(l ~ [(*,5)]); + bt_assert(l ~ [(*, one)]); + bt_assert(l !~ [(*,3)]); + bt_assert(l !~ [(*,(one+6))]); + bt_assert(l !~ [(*, (one+one+one))]); + + l = delete(l, [(*,(one+onef(3)))]); + l = delete(l, [(*,(4+one))]); + bt_assert(l = add(-empty-, (3,1))); + l = delete(l, [(*,(onef(5)))]); + bt_assert(l = -empty-); + + l2 = add(l2, (3,6)); + l = filter(l2, [(3,1..4)]); + l2 = filter(l2, [(3,3..6)]); + + # clist A (10,20,30) + bt_assert(l = add(add(add(add(-empty-, (3,1)), (3,2)), (3,3)), (3,4))); + bt_assert(format(l) = "(clist (3,1) (3,2) (3,3) (3,4))"); + + # clist B (30,40,50) + bt_assert(l2 = add(add(add(add(-empty-, (3,3)), (3,4)), (3,5)), (3,6))); + bt_assert(format(l2) = "(clist (3,3) (3,4) (3,5) (3,6))"); + + # clist A union B + r = add(l, l2); + bt_assert(r = add(add(add(add(add(add(-empty-, (3,1)), (3,2)), (3,3)), (3,4)), (3,5)), (3,6))); + bt_assert(format(r) = "(clist (3,1) (3,2) (3,3) (3,4) (3,5) (3,6))"); + + # clist A isect B + r = filter(l, l2); + bt_assert(r = add(add(-empty-, (3,3)), (3,4))); + bt_assert(format(r) = "(clist (3,3) (3,4))"); + + # clist A \ B + r = delete(l, l2); + bt_assert(r = add(add(-empty-, (3,1)), (3,2))); + bt_assert(format(r) = "(clist (3,1) (3,2))"); + + # clist in c set + r = filter(l, [(3,1), (*,2)]); + bt_assert(r = add(add(-empty-, (3,1)), (3,2))); + bt_assert(format(r) = "(clist (3,1) (3,2))"); +} + +bt_test_suite(t_clist, "Testing lists of communities"); + + + + +/* + * Testing Extended Communities + * ---------------------------- + */ + +function t_ec() +ec cc; +{ + cc = (rt, 12345, 200000); + bt_assert(format(cc) = "(rt, 12345, 200000)"); + + bt_assert(cc = (rt, 12345, 200000)); + bt_assert(cc < (rt, 12345, 200010)); + bt_assert(cc != (rt, 12346, 200000)); + bt_assert(cc != (ro, 12345, 200000)); + bt_assert(!(cc > (rt, 12345, 200010))); + + bt_assert(format((ro, 100000, 20000)) = "(ro, 100000, 20000)"); +} + +bt_test_suite(t_ec, "Testing extended communities"); + + + + +/* + * Testing Extended Community List + * ------------------------------- + */ + +function t_eclist() +eclist el; +eclist el2; +eclist r; +{ el = -- empty --; el = add(el, (rt, 10, 20)); el = add(el, (ro, 10.20.30.40, 100)); el = add(el, (ro, 11.21.31.41.mask(16), 200)); - print "EC list (rt, 10, 20) (ro, 10.20.30.40, 100) (ro, 11.21.0.0, 200):"; - print el; - print "EC len: ", el.len; + + bt_assert(--empty-- = --empty--); + bt_assert(((rt, 10, 20)) !~ --empty--); + + bt_assert(format(el) = "(eclist (rt, 10, 20) (ro, 10.20.30.40, 100) (ro, 11.21.0.0, 200))"); + bt_assert(el.len = 3); el = delete(el, (rt, 10, 20)); el = delete(el, (rt, 10, 30)); + bt_assert(el = add(add(--empty--, (ro, 10.20.30.40, 100)), (ro, 11.21.0.0, 200))); el = add(el, (unknown 2, ten, 1)); el = add(el, (unknown 5, ten, 1)); el = add(el, (rt, ten, one+one)); @@ -195,31 +811,117 @@ lclist ll2; el = add(el, (rt, 10, 5)); el = add(el, (generic, 0x2000a, 3*ten)); el = delete(el, [(rt, 10, 2..ten)]); - print "EC list (ro, 10.20.30.40, 100) (ro, 11.21.0.0, 200) (rt, 10, 1) (unknown 0x5, 10, 1) (rt, 10, 30):"; - print el; + bt_assert(el = add(add(add(add(add(--empty--, (ro, 10.20.30.40, 100)), (ro, 11.21.0.0, 200)), (rt, 10, 1)), (unknown 5, 10, 1)), (rt, 10, 30))); + el = filter(el, [(rt, 10, *)]); - print "EC list (rt, 10, 1) (rt, 10, 30): ", el; - print "Testing EC list, true: ", (rt, 10, 1) ~ el, " ", el ~ [(rt, 10, ten..40)]; - print "Testing EC list, false: ", (rt, 10, 20) ~ el, " ", (ro, 10.20.30.40, 100) ~ el, " ", el ~ [(rt, 10, 35..40)], " ", el ~ [(ro, 10, *)]; + bt_assert(el = add(add(--empty--, (rt, 10, 1)), (rt, 10, 30))); + bt_assert((rt, 10, 1) ~ el); + bt_assert(el ~ [(rt, 10, ten..40)]); + bt_assert((rt, 10, 20) !~ el); + bt_assert((ro, 10.20.30.40, 100) !~ el); + bt_assert(el !~ [(rt, 10, 35..40)]); + bt_assert(el !~ [(ro, 10, *)]); + el = add(el, (rt, 10, 40)); el2 = filter(el, [(rt, 10, 20..40)] ); el2 = add(el2, (rt, 10, 50)); - print "eclist A (1,30,40): ", el; - print "eclist B (30,40,50): ", el2; - print "eclist A union B: ", add( el2, el ); - print "eclist A isect B: ", filter( el, el2 ); - print "eclist A \ B: ", delete( el, el2 ); + + # eclist A (1,30,40) + bt_assert(el = add(add(add(--empty--, (rt, 10, 1)), (rt, 10, 30)), (rt, 10, 40))); + bt_assert(format(el) = "(eclist (rt, 10, 1) (rt, 10, 30) (rt, 10, 40))"); + + # eclist B (30,40,50) + bt_assert(el2 = add(add(add(--empty--, (rt, 10, 30)), (rt, 10, 40)), (rt, 10, 50))); + bt_assert(format(el2) = "(eclist (rt, 10, 30) (rt, 10, 40) (rt, 10, 50))"); + + # eclist A union B + r = add(el2, el); + bt_assert(r = add(add(add(add(--empty--, (rt, 10, 30)), (rt, 10, 40)), (rt, 10, 50)), (rt, 10, 1))); + bt_assert(format(r) = "(eclist (rt, 10, 30) (rt, 10, 40) (rt, 10, 50) (rt, 10, 1))"); + + # eclist A isect B + r = filter(el, el2); + bt_assert(r = add(add(--empty--, (rt, 10, 30)), (rt, 10, 40))); + bt_assert(format(r) = "(eclist (rt, 10, 30) (rt, 10, 40))"); + + # eclist A \ B + r = delete(el, el2); + bt_assert(r = add(--empty--, (rt, 10, 1))); + bt_assert(format(r) = "(eclist (rt, 10, 1))"); + + # eclist in ec set + r = filter(el, [(rt, 10, 1), (rt, 10, 25..30), (ro, 10, 40)]); + bt_assert(r = add(add(--empty--, (rt, 10, 1)), (rt, 10, 30))); + bt_assert(format(r) = "(eclist (rt, 10, 1) (rt, 10, 30))"); +} + +bt_test_suite(t_eclist, "Testing lists of extended communities"); + + + + +/* + * Testing sets of Extended Communities + * ------------------------------------ + */ + +define ecs2 = [(rt, ten, (one+onef(0))*10), (ro, 100000, 100..200), (rt, 12345, *)]; + +function t_ec_set() +ec set ecs; +{ + ecs = [(rt, ten, (one+onef(0))*10), (ro, 100000, 100..200), (rt, 12345, *)]; + bt_assert(format(ecs) = "[(rt, 10, 20), (rt, 12345, 0)..(rt, 12345, 4294967295), (ro, 100000, 100)..(ro, 100000, 200)]"); + bt_assert(format(ecs2) = "[(rt, 10, 20), (rt, 12345, 0)..(rt, 12345, 4294967295), (ro, 100000, 100)..(ro, 100000, 200)]"); + + bt_assert((rt, 10, 20) ~ ecs); + bt_assert((ro, 100000, 100) ~ ecs); + bt_assert((ro, 100000, 128) ~ ecs); + bt_assert((ro, 100000, 200) ~ ecs); + bt_assert((rt, 12345, 0) ~ ecs); + bt_assert((rt, 12345, 200000) ~ ecs); + bt_assert((rt, 12345, 4000000) ~ ecs); + bt_assert((ro, 10, 20) !~ ecs); + bt_assert((rt, 10, 21) !~ ecs); + bt_assert((ro, 100000, 99) !~ ecs); + bt_assert((ro, 12345, 10) !~ ecs); + bt_assert((rt, 12346, 0) !~ ecs); + bt_assert((ro, 0.1.134.160, 150) !~ ecs); +} + +bt_test_suite(t_ec_set, "Testing sets of extended communities"); + + + + +/* + * Testing Large Communities + * ------------------------- + */ + +function mktrip(int a) +{ + return (a, 2*a, 3*a); +} + +function t_lclist() +lclist ll; +lclist ll2; +lclist r; +{ + bt_assert(---empty--- = ---empty---); + bt_assert((10, 20, 30) !~ ---empty---); ll = --- empty ---; ll = add(ll, (ten, 20, 30)); ll = add(ll, (1000, 2000, 3000)); ll = add(ll, mktrip(100000)); - print "LC list (10, 20, 30) (1000, 2000, 3000) (100000, 200000, 300000):"; - print ll; - print "LC len: ", el.len; - print "Should be true: ", mktrip(1000) ~ ll, " ", ll ~ [(5,10,15), (10,20,30)], " ", ll ~ [(10,15..25,*)], " ", ll ~ [(ten, *, *)]; - print "Should be false: ", mktrip(100) ~ ll, " ", ll ~ [(5,10,15), (10,21,30)], " ", ll ~ [(10,21..25,*)], " ", ll ~ [(11, *, *)]; - print "LC filtered: ", filter(ll, [(5..15, *, *), (100000, 500..500000, *)]); + bt_assert(format(ll) = "(lclist (10, 20, 30) (1000, 2000, 3000) (100000, 200000, 300000))"); + bt_assert(ll.len = 3); + bt_assert(ll = add(add(add(---empty---, (10, 20, 30)), (1000, 2000, 3000)), (100000, 200000, 300000))); + + bt_assert(mktrip(1000) ~ ll); + bt_assert(mktrip(100) !~ ll); ll = --- empty ---; ll = add(ll, (10, 10, 10)); @@ -231,240 +933,375 @@ lclist ll2; ll2 = add(ll2, (30, 30, 30)); ll2 = add(ll2, (40, 40, 40)); - print "lclist A (10,20,30): ", ll; - print "lclist B (20,30,40): ", ll2; - print "lclist A union B: ", add(ll, ll2); - print "lclist A isect B: ", filter(ll, ll2); - print "lclist A \ B: ", delete(ll, ll2); + # lclist A (10, 20, 30) + bt_assert(format(ll) = "(lclist (10, 10, 10) (20, 20, 20) (30, 30, 30))"); -# test_roa(); -} + # lclist B (20, 30, 40) + bt_assert(format(ll2) = "(lclist (20, 20, 20) (30, 30, 30) (40, 40, 40))"); -function bla() -{ - print "fifteen called"; - return 15; -} + # lclist A union B + r = add(ll, ll2); + bt_assert(r = add(add(add(add(---empty---, (10,10,10)), (20,20,20)), (30,30,30)), (40,40,40))); + bt_assert(format(r) = "(lclist (10, 10, 10) (20, 20, 20) (30, 30, 30) (40, 40, 40))"); -define four=4; -define onetwo=1.2.3.4; + # lclist A isect B + r = filter(ll, ll2); + bt_assert(r = add(add(---empty---, (20, 20, 20)), (30, 30, 30))); + bt_assert(format(r) = "(lclist (20, 20, 20) (30, 30, 30))"); -function __test1() -{ - if source ~ [ RTS_BGP, RTS_STATIC ] then { -# ospf_metric1 = 65535; -# ospf_metric2 = 1000; - ospf_tag = 0x12345678; - accept; - } reject; -} + # lclist A \ B + r = delete(ll, ll2); + bt_assert(r = add(---empty---, (10, 10, 10))); + bt_assert(format(r) = "(lclist (10, 10, 10))"); -function __test2() -{ - if source ~ [ RTS_BGP, RTS_STATIC ] then { -# ospf_metric1 = 65535; -# ospf_metric2 = 1000; - ospf_tag = 0x12345678; - accept; - } reject; + # lclist in lc set + r = filter(ll, [(5..15, *, *), (20, 15..25, *)]); + bt_assert(r = add(add(---empty---, (10, 10, 10)), (20, 20, 20))); + bt_assert(format(r) = "(lclist (10, 10, 10) (20, 20, 20))"); } -function test_pxset(prefix set pxs) +bt_test_suite(t_lclist, "Testing lists of large communities"); + + + + +/* + * Testing sets of Large Communities + * --------------------------------- + */ + +function t_lclist_set() +lclist ll; +lc set lls; { - print pxs; - print " must be true: ", 10.0.0.0/8 ~ pxs, ",", 10.0.0.0/10 ~ pxs, ",", 10.0.0.0/12 ~ pxs, ",", - 20.0.0.0/24 ~ pxs, ",", 20.0.40.0/24 ~ pxs, ",", 20.0.0.0/26 ~ pxs, ",", - 20.0.100.0/26 ~ pxs, ",", 20.0.0.0/28 ~ pxs, ",", 20.0.255.0/28 ~ pxs; - print " must be false: ", 10.0.0.0/7 ~ pxs, ",", 10.0.0.0/13 ~ pxs, ",", 10.0.0.0/16 ~ pxs, ",", - 20.0.0.0/16 ~ pxs, ",", 20.0.0.0/23 ~ pxs, ",", 20.0.0.0/29 ~ pxs, ",", - 11.0.0.0/10 ~ pxs, ",", 20.1.0.0/26 ~ pxs; + ll = --- empty ---; + ll = add(ll, (10, 20, 30)); + ll = add(ll, (1000, 2000, 3000)); + ll = add(ll, mktrip(100000)); + + bt_assert(ll ~ [(5,10,15), (10,20,30)]); + bt_assert(ll ~ [(10,15..25,*)]); + bt_assert(ll ~ [(ten, *, *)]); + + bt_assert(ll !~ [(5,10,15), (10,21,30)]); + bt_assert(ll !~ [(10,21..25,*)]); + bt_assert(ll !~ [(11, *, *)]); + + lls = [(10, 10, 10), (20, 20, 15..25), (30, 30, *), (40, 35..45, *), (50, *, *), (55..65, *, *)]; + bt_assert(format(lls) = "[(10, 10, 10), (20, 20, 15)..(20, 20, 25), (30, 30, 0)..(30, 30, 4294967295), (40, 35, 0)..(40, 45, 4294967295), (50, 0, 0)..(50, 4294967295, 4294967295), (55, 0, 0)..(65, 4294967295, 4294967295)]"); + bt_assert((10, 10, 10) ~ lls); + bt_assert((20, 20, 25) ~ lls); + bt_assert((20, 20, 26) !~ lls); + bt_assert((30, 30, 0) ~ lls); + bt_assert((40, 35, 40) ~ lls); + bt_assert((40, 34, 40) !~ lls); + bt_assert((50, 0, 0) ~ lls); + bt_assert((60, 60, 60) ~ lls); + bt_assert((70, 60, 60) !~ lls); } +bt_test_suite(t_lclist_set, "Testing sets of large communities"); + + + + +/* + * Testing defined() function + * -------------------------- + */ + function test_undef(int a) int b; { - if a = 3 - then b = 4; - print "Defined: ", a, " ", b, " ", defined(b); + if a = 3 then { + b = 4; + bt_assert(defined(b)); + } + else { + bt_assert(!defined(b)); + } } -define is1 = [ one, (2+1), (6-one), 8, 11, 15, 17, 19]; -define is2 = [(17+2), 17, 15, 11, 8, 5, 3, 2]; -define is3 = [5, 17, 2, 11, 8, 15, 3, 19]; +function t_define() +int i; +{ + test_undef(2); + test_undef(3); + test_undef(2); -define pxs2 = [ 10.0.0.0/16{8,12}, 20.0.0.0/16{24,28} ]; + bt_assert(defined(1)); + bt_assert(defined(1.2.3.4)); +} -define ecs2 = [(rt, ten, (one+onef(0))*10), (ro, 100000, 100..200), (rt, 12345, *)]; +bt_test_suite(t_define, "Testing defined() function"); -function __startup() + + +/* + * Testing calling functions + * ------------------------- + */ + +function callme(int arg1; int arg2) int i; -bool b; -prefix px; -ip p; -pair pp; -quad qq; -ec cc; -int set is; -pair set ps; -ec set ecs; -ip set ips; -prefix set pxs; -string st; { - print "1a-a1 = 30: ", '1a-a1'; - print "Testing filter language:"; - i = four; - i = 12*100 + 60/2 + i; - i = ( i + 0 ); - print " arithmetics: 1234 = ", i; - printn " if statements "; - print "what happens here?"; - printn "."; - if (i = 4) then { print "*** FAIL: if 0"; quitbird; } else printn "."; -# if !(i = 3) then { print "*** FAIL: if 0"; quitbird; } else printn "."; - if 1234 = i then printn "."; else { print "*** FAIL: if 1 else"; } -# if 1 <= 1 then printn "."; else { print "*** FAIL: test 3"; } - if 1234 < 1234 then { print "*** FAIL: test 4"; quitbird; } else print "ok"; - is = [ 2, 3, 4, 7..11 ]; + case arg1 { + 1, 42: return 42; + else: return arg1 * arg2; + } - print "must be true: ", 1 = 1, " ", 1 != (0,1), " ", 1 != "a", " ", +empty+ = +empty+, " ", -empty- = -empty-, " ", --empty-- = --empty-- , - " ", [1,4..10,20] = [1,4..10,20] , " ", [ 10.0.0.0/8{ 15 , 17 } ] = [ 10.0.0.0/8{ 15 , 17 } ]; - print "must be false: ", 1 != 1, " ", 1 = (0,1), " ", 1 = "a", " ", +empty+ = -empty-, " ", -empty- = --empty--, " ", --empty-- = +empty+ , - " ", [1,2] = [1,3], " ", [ 10.0.0.0/8{ 15 , 17 } ] = [ 11.0.0.0/8{ 15 , 17 } ]; + return 0; +} - print " must be true: ", 1.2.0.0/16 ~ [ 1.0.0.0/8{ 15 , 17 } ]; - print " data types; must be true: ", 1.2.3.4 = 1.2.3.4, ",", 1 ~ [1,2,3], ",", 5 ~ [1..20], ",", 10 ~ is, ",", 2 ~ [ 1, 2, 3 ], ",", 5 ~ [ 4 .. 7 ], ",", 1.2.3.4 ~ [ 1.2.3.3..1.2.3.5 ], ",", 1.2.3.4 ~ 1.0.0.0/8, ",", 1.0.0.0/8 ~ 1.0.0.0/8, ",", 1.0.0.0/8 ~ [ 1.0.0.0/8+ ]; - print " must be true: ", true && true, ",", true || false, ",", ! false && ! false && true, ",", 1 < 2 && 1 != 3, ",", true && true && ! false, ",", true || 1+"a", ",", !(false && 1+"a"); +function fifteen() +{ + return 15; +} - print " must be true: ", defined(1), ",", defined(1.2.3.4), ",", 1 != 2, ",", 1 <= 2; - print " data types: must be false: ", 1 ~ [ 2, 3, 4 ], ",", 5 ~ is, ",", 1.2.3.4 ~ [ 1.2.3.3, 1.2.3.5 ], ",", (1,2) > (2,2), ",", (1,1) > (1,1), ",", 1.0.0.0/9 ~ [ 1.0.0.0/8- ], ",", 1.2.0.0/17 ~ [ 1.0.0.0/8{ 15 , 16 } ], ",", true && false; +function t_call_function() +{ + bt_assert(fifteen() = 15); + bt_assert(callme(1, 2) = 42); + bt_assert(callme(42, 2) = 42); - print " must be true: ", 1 ~ is1, " ", 3 ~ is1, " ", 5 ~ is1; - print " must be true: ", (one+2) ~ is1, " ", 2 ~ is2, " ", 2 ~ is3; - print " must be false: ", 4 ~ is1, " ", 4 ~ is2, " ", 4 ~ is3; - print " must be false: ", 10 ~ is1, " ", 10 ~ is2, " ", 10 ~ is3; - print " must be true: ", 15 ~ is1, " ", 15 ~ is2, " ", 15 ~ is3; - print " must be false: ", 18 ~ is1, " ", 18 ~ is2, " ", 18 ~ is3; - print " must be true: ", 19 ~ is1, " ", 19 ~ is2, " ", 19 ~ is3; - print " must be false: ", 20 ~ is1, " ", 20 ~ is2, " ", 20 ~ is3; + bt_assert(callme(2, 2) = 4); + bt_assert(callme(3, 2) = 6); + bt_assert(callme(4, 4) = 16); + bt_assert(callme(7, 2) = 14); +} - px = 1.2.0.0/18; - print "Testing prefixes: 1.2.0.0/18 = ", px; - print " must be true: ", 192.168.0.0/16 ~ 192.168.0.0/16, " ", 192.168.0.0/17 ~ 192.168.0.0/16, " ", 192.168.254.0/24 ~ 192.168.0.0/16; - print " must be false: ", 192.168.0.0/15 ~ 192.168.0.0/16, " ", 192.160.0.0/17 ~ 192.168.0.0/16; +bt_test_suite(t_call_function, "Testing calling functions"); - p = 127.1.2.3; - print "Testing mask : 127.0.0.0 = ", p.mask(8); - - pp = (1, 2); - print "Testing pairs: (1,2) = ", (1,2), " = ", pp, " = ", (1,1+1), " = ", 'mkpair-a'(2); - print " must be true: ", (1,2) = (1,1+1); - print "Testing enums: ", RTS_DUMMY, " ", RTS_STATIC, " ", - ", true: ", RTS_STATIC ~ [RTS_STATIC, RTS_DEVICE], - ", false: ", RTS_BGP ~ [RTS_STATIC, RTS_DEVICE]; - ps = [(1,(one+one)), (3,4)..(4,8), (5,*), (6,3..6)]; - print "Pair set: ", ps; - print "Testing pair set, true: ", pp ~ ps, " ", (3,5) ~ ps, " ", (4,1) ~ ps, " ", (5,4) ~ ps, " ", (5,65535) ~ ps, " ", (6,4) ~ ps, " ", (3, 10000) ~ ps; - print "Testing pair set, false: ", (3,3) ~ ps, " ", (4,9) ~ ps, " ", (4,65535) ~ ps, " ", (6,2) ~ ps, " ", (6,6+one) ~ ps, " ", ((one+6),2) ~ ps, " ", (1,1) ~ ps; - ps = [(20..150, 200..300), (50100..50200, 1000..50000), (*, 5+5)]; - print "Pair set: .. too long .."; - print "Testing pair set, true: ", (100,200) ~ ps, " ", (150,300) ~ ps, " ", (50180,1200) ~ ps, " ", (50110,49000) ~ ps, " ", (0,10) ~ ps, " ", (64000,10) ~ ps; - print "Testing pair set, false: ", (20,199) ~ ps, " ", (151,250) ~ ps, " ", (50050,2000) ~ ps, " ", (50150,50050) ~ ps, " ", (10,9) ~ ps, " ", (65535,11) ~ ps ; - qq = 1.2.3.4; - print "Testinq quad: 1.2.3.4 = ", qq, - ", true: ", qq = 1.2.3.4, " ", qq ~ [1.2.3.4, 5.6.7.8], - ", false: ", qq = 4.3.2.1, " ", qq ~ [1.2.1.1, 1.2.3.5]; +/* + * Test including another config file + * ---------------------------------- + */ - cc = (rt, 12345, 200000); - print "Testing EC: (rt, 12345, 200000) = ", cc; - print "Testing EC: (ro, 100000, 20000) = ", (ro, 100000, 20000); - print "Testing EC: (rt, 10.20.30.40, 20000) = ", (rt, 10.20.30.40, 20000); - print " true: ", cc = (rt, 12345, 200000), " ", cc < (rt, 12345, 200010), - ", false: ", cc = (rt, 12346, 200000), " ", cc = (ro, 12345, 200000), " ", cc > (rt, 12345, 200010); +function t_include() +int i; +{ + i = 1; + include "test.conf.inc"; + bt_assert(i = 42); +} - ecs = [(rt, ten, (one+onef(0))*10), (ro, 100000, 100..200), (rt, 12345, *)]; - print "EC set: ", ecs; - print "EC set: ", ecs2; - print "Testing EC set, true: ", (rt, 10, 20) ~ ecs, " ", (ro, 100000, 100) ~ ecs, " ", (ro, 100000, 200) ~ ecs, - " ", (rt, 12345, 0) ~ ecs, " ", cc ~ ecs, " ", (rt, 12345, 4000000) ~ ecs; - print "Testing EC set, false: ", (ro, 10, 20) ~ ecs, " ", (rt, 10, 21) ~ ecs, " ", (ro, 100000, 99) ~ ecs, - " ", (ro, 12345, 10) ~ ecs, " ", (rt, 12346, 0) ~ ecs, " ", (ro, 0.1.134.160, 150) ~ ecs; +bt_test_suite(t_include, "Testing including another config file"); - st = "Hello"; - print "Testing string: ", st, " true: ", st ~ "Hell*", " false: ", st ~ "ell*"; - - b = true; - print "Testing bool: ", b, ", ", !b; - if ( b = true ) then print "Testing bool comparison b = true: ", b; - else { print "*** FAIL: TRUE test failed" ; quitbird; } - - ips = [ 1.1.1.0 .. 1.1.1.255, ip1222]; - print "Testing IP sets: "; - print ips; - print " must be true: ", 1.1.1.0 ~ ips, ",", 1.1.1.100 ~ ips, ",", 1.2.2.2 ~ ips; - print " must be false: ", 1.1.0.255 ~ ips, ",", 1.1.2.0 ~ ips, ",", 1.2.2.3 ~ ips, ",", 192.168.1.1 ~ ips; - - pxs = [ 1.2.0.0/16, 1.4.0.0/16+]; - print "Testing prefix sets: "; - print pxs; - print " must be true: ", 1.2.0.0/16 ~ pxs, ",", 1.4.0.0/16 ~ pxs, ",", 1.4.0.0/18 ~ pxs, ",", 1.4.0.0/32 ~ pxs; - print " must be false: ", 1.1.0.0/16 ~ pxs, ",", 1.3.0.0/16 ~ pxs, ",", 1.2.0.0/15 ~ pxs, ",", 1.2.0.0/17 ~ pxs, ",", - 1.2.0.0/32 ~ pxs, ",", 1.4.0.0/15 ~ pxs; - test_pxset(pxs2); - test_pxset([ 10.0.0.0/16{8,12}, 20.0.0.0/16{24,28} ]); - print "What will this do? ", [ 1, 2, 1, 1, 1, 3, 4, 1, 1, 1, 5 ]; - print "Testing functions..."; - callme ( 1, 2 ); - callme ( 2, 2 ); - callme ( 2, 2 ); - callme ( 3, 2 ); - callme ( 4, 4 ); - callme ( 7, 2 ); +/* + * Test if-else statement + * ---------------------- + */ + +function t_if_else() +int i; +{ + if true then + bt_assert(true); - i = fifteen(); - print "Testing function calls: 15 = ", i; + if false then + bt_assert(false); + else if true then + bt_assert(true); + else + bt_assert(false); +} - path_test(); +bt_test_suite(t_if_else, "Testing if-else statement"); - print "1.2.3.4 = ", onetwo; - i = 4200000000; - print "4200000000 = ", i, " true: ", i = 4200000000, " ", i > 4100000000, " false: ", i > 4250000000; - test_undef(2); - test_undef(3); - test_undef(2); - print "Testing include"; - include "test.conf.inc"; +/* + * Unused functions -- testing only parsing + * ---------------------------------------- + */ + +function __test1() +{ + if source ~ [ RTS_BGP, RTS_STATIC ] then { +# ospf_metric1 = 65535; +# ospf_metric2 = 1000; + ospf_tag = 0x12345678; + accept; + } + reject; +} - print "done"; - quitbird; -# print "*** FAIL: this is unreachable"; +function __test2() +{ + if source ~ [ RTS_BGP, RTS_STATIC ] then { +# ospf_metric1 = 65535; +# ospf_metric2 = 1000; + ospf_tag = 0x12345678; + accept; + } + reject; } -filter testf -int j; -{ +filter testf +int j; +{ print "Heya, filtering route to ", net.ip, " prefixlen ", net.len, " source ", source; print "This route was from ", from; j = 7; j = 17; if rip_metric > 15 then { - reject "RIP Metric is more than infinity"; + reject "RIP Metric is more than infinity"; } rip_metric = 14; unset(rip_metric); - + accept "ok I take that"; } -eval __startup(); +filter roa_filter +{ + if net ~ [ 10.0.0.0/8{16,24}, 2000::/3{16,96} ] then { + accept; + } + reject; +} + +roa4 table r4; +roa6 table r6; + +protocol static +{ + roa4 { table r4; import filter roa_filter; }; + route 10.110.0.0/16 max 16 as 1000; + route 10.120.0.0/16 max 24 as 1000; + route 10.130.0.0/16 max 24 as 2000; + route 10.130.128.0/18 max 24 as 3000; +} + +protocol static +{ + roa6 { table r6; import filter roa_filter; }; + route 2001:0db8:85a3:8a2e::/64 max 96 as 1000; +} + +function test_roa_check() +prefix pfx; +{ + # cannot be tested in __startup(), sorry + bt_assert(roa_check(r4, 10.10.0.0/16, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r4, 10.0.0.0/8, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r4, 10.110.0.0/16, 1000) = ROA_VALID); + bt_assert(roa_check(r4, 10.110.0.0/16, 2000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.110.32.0/20, 1000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.120.32.0/20, 1000) = ROA_VALID); + bt_assert(roa_check(r4, 10.120.32.0/20, 2000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.120.32.32/28, 1000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.130.130.0/24, 1000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.130.130.0/24, 2000) = ROA_VALID); + bt_assert(roa_check(r4, 10.130.30.0/24, 3000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.130.130.0/24, 3000) = ROA_VALID); + + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_VALID); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e::/64, 1000) = ROA_VALID); + bt_assert(roa_check(r6, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN); + + bt_assert(roa_check(r4, 10.10.0.0/16, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r4, 10.0.0.0/8, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r4, 10.110.0.0/16, 1000) = ROA_VALID); + bt_assert(roa_check(r4, 10.110.0.0/16, 2000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.110.32.0/20, 1000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.120.32.0/20, 1000) = ROA_VALID); + + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_VALID); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e::/64, 1000) = ROA_VALID); + bt_assert(roa_check(r6, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN); + + bt_assert(roa_check(r4, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID); + + bt_assert(roa_check(r4, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_VALID); + bt_assert(roa_check(r4, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r6, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN); + + bt_assert(10.130.130.0/24 ~ 0.0.0.0/0); + bt_assert(2001:0db8:85a3:8a2e::/64 ~ ::/0); + bt_assert(10.130.130.0/24 !~ ::/0); + bt_assert(2001:0db8:85a3:8a2e::/64 !~ 0.0.0.0/0); + + pfx = 12.13.0.0/16 max 24 as 1234; + bt_assert(pfx.len = 16); + bt_assert(pfx.maxlen = 24); + bt_assert(pfx.asn = 1234); + + pfx = 1000::/8 max 32 as 1234; + bt_assert(pfx.len = 8); + bt_assert(pfx.maxlen = 32); + bt_assert(pfx.asn = 1234); +} + +bt_test_suite(test_roa_check, "Testing ROA"); + +/* + * Testing Mixed Net Types + * ----------------------- + */ + +function t_mixed_prefix() +prefix set pxs; +prefix set pxt; +{ + pxs = [ 98.45.0.0/16, 128.128.0.0/12+, 2200::/42-, ::ffff:d000:0/100{98,102}]; + bt_assert(format(pxs) = "[::/0, ::/2{c000::}, 98.45.0.0/112{::0.1.0.0}, 128.128.0.0/108{::0.31.255.255}, 208.0.0.0/100{::124.0.0.0}, 2200::/42{ffff:ffff:ffc0::}]"); + bt_assert(::fe00:0:0/88 !~ pxs); + bt_assert(::fffe:0:0/95 !~ pxs); + bt_assert(::ffff:d800:0/101 ~ pxs); + bt_assert(216.0.0.0/5 ~ pxs); + bt_assert(212.0.0.0/6 ~ pxs); + bt_assert(212.0.0.0/7 !~ pxs); + bt_assert(::ffff:8080:8080/121 ~ pxs); + bt_assert(::/0 ~ pxs); + bt_assert(0.0.0.0/0 !~ pxs); + bt_assert(128.135.64.17/32 ~ pxs); + +# pxt = [ 0:1:2 10.1.10.0/24, 0:5:10000 10.1.10.0/24 ]; +# print pxt; + + bt_assert(format(NET_IP4) = "(enum 36)1"); ## if (net.type = NET_IP4) ... + bt_assert(format(NET_VPN6) = "(enum 36)4"); + bt_assert(format(0:1:2) = "1:2"); +} + +bt_test_suite(t_mixed_prefix, "Testing mixed net types"); + + +filter vpn_filter +{ + bt_assert(format(net) = "0:1:2 10.1.10.0/24"); + bt_assert(net.type = NET_VPN4); + bt_assert(net.type != NET_IP4); + bt_assert(net.type != NET_IP6); + bt_assert(net.rd = 0:1:2); + + case (net.type) { + NET_IP4: print "IPV4"; + NET_IP6: print "IPV6"; + } + + accept; +} + +vpn4 table v4; +vpn4 table v6; + +protocol static +{ + vpn4 { table v4; import filter vpn_filter; }; + route 0:1:2 10.1.10.0/24 unreachable; +} diff --git a/filter/test.conf.inc b/filter/test.conf.inc index 109a49c5..8ede2d18 100644 --- a/filter/test.conf.inc +++ b/filter/test.conf.inc @@ -1,5 +1,3 @@ -print "Entering include"; -print "Should be 2: ", 1+1; -print "Leaving include"; - +bt_assert(1+1 = 2); +i = 42; diff --git a/filter/test.conf2 b/filter/test.conf2 index 60bdd965..48515020 100644 --- a/filter/test.conf2 +++ b/filter/test.conf2 @@ -18,6 +18,7 @@ protocol direct { protocol kernel { disabled; + ipv4; # Must be specified at least one channel # learn; # Learn all routes from the kernel # scan time 10; # Scan kernel tables every 10 seconds } @@ -25,51 +26,57 @@ protocol kernel { protocol static { # disabled; - import filter { print "ahoj"; - print source; - if source = RTS_STATIC then { - print "It is from static"; - } - print from; - from = 1.2.3.4; - print from; - print scope; - scope = SCOPE_HOST; - print scope; - if !(scope ~ [ SCOPE_HOST, SCOPE_SITE ]) then { - print "Failed in test"; - quitbird; - } - - preference = 15; - print preference; - preference = 29; - print preference; - rip_metric = 1; - print rip_metric; - rip_metric = rip_metric + 5; - print rip_metric; - bgp_community = - empty - ; - print "nazdar"; - bgp_community = add(bgp_community, (1,2)); - print "cau"; - bgp_community = add(bgp_community, (2,3)); - bgp_community.add((4,5)); - print "community = ", bgp_community; - bgp_community.delete((2,3)); - print "community = ", bgp_community; - bgp_community.empty; - print "community = ", bgp_community; - print "done"; - }; + ipv4 { + export all; + + import filter { + print "ahoj"; + print source; + if source = RTS_STATIC then { + print "It is from static"; + } + print from; + from = 1.2.3.4; + print from; + print scope; + scope = SCOPE_HOST; + print scope; + if !(scope ~ [ SCOPE_HOST, SCOPE_SITE ]) then { + print "Failed in test"; + quitbird; + } + + preference = 15; + print preference; + preference = 29; + print preference; + rip_metric = 1; + print rip_metric; + rip_metric = rip_metric + 5; + print rip_metric; + bgp_community = -empty-; + print "hi"; + bgp_community = add(bgp_community, (1,2)); + print "hello"; + bgp_community = add(bgp_community, (2,3)); + bgp_community.add((4,5)); + print "community = ", bgp_community; + bgp_community.delete((2,3)); + print "community = ", bgp_community; + bgp_community.empty; + print "community = ", bgp_community; + print "done"; + + accept; + }; + }; route 0.0.0.0/0 via 195.113.31.113; route 62.168.0.0/25 reject; route 1.2.3.4/32 via 195.113.31.124; -# route 10.0.0.0/8 reject; -# route 10.1.1.0:255.255.255.0 via 62.168.0.3; -# route 10.1.2.0:255.255.255.0 via 62.168.0.3; -# route 10.1.3.0:255.255.255.0 via 62.168.0.4; -# route 10.2.0.0/24 via "arc0"; - export all; + route 10.0.0.0/8 reject; + route 10.1.1.0/24 via 62.168.0.3; + route 10.1.2.0/24 via 62.168.0.3; + route 10.1.3.0/24 via 62.168.0.4; + route 10.2.0.0/24 via "arc0"; } diff --git a/filter/test6.conf b/filter/test6.conf deleted file mode 100644 index f25ffc47..00000000 --- a/filter/test6.conf +++ /dev/null @@ -1,182 +0,0 @@ -/* - * This is an example configuration file. - * FIXME: add all examples from docs here. - */ - -# Yet another comment - -router id 62.168.0.1; - -define xyzzy = (120+10); - -function callme(int arg1; int arg2) -int local1; -int local2; -int i; -{ - printn "Function callme called arguments ", arg1, " and ", arg2, ":" ; - i = arg2; - - case arg1 { - 2: print "dva"; print "jeste jednou dva"; - 3 .. 5: print "tri az pet"; - else: print "neco jineho"; - } -} - -function fifteen() -{ - print "fifteen called"; - return 15; -} - -function paths() -bgpmask pm1; -bgpmask pm2; -bgppath p2; -clist l; -{ - pm1 = / 4 3 2 1 /; - pm2 = [= 4 3 2 1 =]; - print "Testing path masks: ", pm1, " ", pm2; - p2 = prepend( + empty +, 1 ); - p2 = prepend( p2, 2 ); - p2 = prepend( p2, 3 ); - p2 = prepend( p2, 4 ); - print "Testing paths: ", p2; - print "Should be true: ", p2 ~ pm1, " ", p2 ~ pm2; - print "4 = ", p2.len; - p2 = prepend( p2, 5 ); - print "Should be false: ", p2 ~ pm1, " ", p2 ~ pm2; - print "Should be true: ", p2 ~ / ? 4 3 2 1 /, " ", p2, " ", / ? 4 3 2 1 /; - print "Should be true: ", p2 ~ [= * 4 3 * 1 =], " ", p2, " ", [= * 4 3 * 1 =]; - print "5 = ", p2.len; - - pm1 = [= 1 2 * 3 4 5 =]; - p2 = prepend( + empty +, 5 ); - p2 = prepend( p2, 4 ); - p2 = prepend( p2, 3 ); - p2 = prepend( p2, 3 ); - p2 = prepend( p2, 2 ); - p2 = prepend( p2, 1 ); - print "Should be true: ", p2 ~ pm1, " ", p2, " ", pm1; - - l = - empty -; - l = add( l, (1,2) ); - l = add( l, (2,3) ); - print "Community list (1,2) (2,3) ", l; - print "Should be true: ", (2,3) ~ l; - l = delete( l, (2,3) ); - print "Community list (1,2) ", l; - print "Should be false: ", (2,3) ~ l; -} - -function bla() -{ - print "fifteen called"; - return 15; -} - -define four=4; - -function test_pxset(prefix set pxs) -{ - print " must be true: ", 1000::/8 ~ pxs, ",", 1000::/10 ~ pxs, ",", 1000::/12 ~ pxs, ",", - 2000::/24 ~ pxs, ",", 2000:4000::/24 ~ pxs, ",", 2000::/26 ~ pxs, ",", - 2000:8000::/26 ~ pxs, ",", 2000::/28 ~ pxs, ",", 2000:FFF0::/28 ~ pxs; - print " must be false: ", 1000::/7 ~ pxs, ",", 1000::/13 ~ pxs, ",", 1000::/16 ~ pxs, ",", - 2000::/16 ~ pxs, ",", 2000::/23 ~ pxs, ",", 2000::/29 ~ pxs, ",", - 1100::/10 ~ pxs, ",", 2010::/26 ~ pxs; -} - -function __startup() -int i; -bool b; -prefix px; -ip p; -pair pp; -int set is; -prefix set pxs; -string s; -{ - print "Testing filter language:"; - i = four; - i = 12*100 + 60/2 + i; - i = ( i + 0 ); - print " arithmetics: 1234 = ", i; - printn " if statements "; - print "what happens here?"; - printn "."; - if (i = 4) then { print "*** FAIL: if 0"; quitbird; } else printn "."; -# if !(i = 3) then { print "*** FAIL: if 0"; quitbird; } else printn "."; - if 1234 = i then printn "."; else { print "*** FAIL: if 1 else"; } -# if 1 <= 1 then printn "."; else { print "*** FAIL: test 3"; } - if 1234 < 1234 then { print "*** FAIL: test 4"; quitbird; } else print "ok"; - is = [ 2, 3, 4, 7..11 ]; - print " must be true: ", 1180::/16 ~ [ 1100::/8{ 15 , 17 } ]; - print " data types; must be true: ", 12::34 = 12::34, ",", 1 ~ [1,2,3], ",", 5 ~ [1..20], ",", 10 ~ is, ",", 2 ~ [ 1, 2, 3 ], ",", 5 ~ [ 4 .. 7 ], ",", 12::34 ~ [ 12::33..12::35 ], ",", 1020::34 ~ 1000::/8, ",", 1000::/8 ~ 1000::/8, ",", 1000::/8 ~ [ 1000::/8+ ]; - print " must be true: ", true && true, ",", true || false; - -# print " must be true: ", defined(1), ",", defined(1.2.3.4), ",", 1 != 2, ",", 1 <= 2; - print " data types: must be false: ", 1 ~ [ 2, 3, 4 ], ",", 5 ~ is, ",", 12::34 ~ [ 12::33, 12::35 ], ",", (1,2) > (2,2), ",", (1,1) > (1,1), ",", 1000::/9 ~ [ 1000::/8- ], ",", 1000::/17 ~ [ 1000::/8{ 15 , 16 } ], ",", true && false; - - px = 1020::/18; - print "Testing prefixes: 1020::/18 = ", px; - p = 1234:5678::; - print "Testing mask : 1200:: = ", p.mask(8); - - pp = (1, 2); - print "Testing pairs: (1,2) = ", (1,2), " = ", pp; - print "Testing enums: ", RTS_DUMMY, " ", RTS_STATIC; - - s = "Hello"; - print "Testing string: ", s, " true: ", s ~ "Hell*", " false: ", s ~ "ell*"; - - b = true; - print "Testing bool: ", b, ", ", !b; - - pxs = [ 1102::/16, 1104::/16+]; - print "Testing prefix sets: "; - print pxs; - print " must be true: ", 1102::/16 ~ pxs, ",", 1104::/16 ~ pxs, ",", 1104::/18 ~ pxs, ",", 1104::/32 ~ pxs; - print " must be false: ", 1101::/16 ~ pxs, ",", 1103::/16 ~ pxs, ",", 1102::/15 ~ pxs, ",", 1102::/17 ~ pxs, ",", - 1102::/32 ~ pxs, ",", 1104::/15 ~ pxs; - - test_pxset([ 1000::/16{8,12}, 2000::/16{24,28} ]); - print "What will this do? ", [ 1, 2, 1, 1, 1, 3, 4, 1, 1, 1, 5 ]; - - print "Testing functions..."; -# callme ( 1, 2 ); - callme ( 2, 2 ); - callme ( 2, 2 ); - callme ( 3, 2 ); - callme ( 4, 2 ); - callme ( 7, 2 ); - - i = fifteen(); - print "Testing function calls: 15 = ", i; - - paths(); - - print "done"; - quitbird; -# print "*** FAIL: this is unreachable"; -} - -filter testf -int j; -{ - print "Heya, filtering route to ", net.ip, " prefixlen ", net.len, " source ", source; - print "This route was from ", from; - j = 7; - j = 17; - if rip_metric > 15 then { - reject "RIP Metric is more than infinity"; - } - rip_metric = 14; - unset(rip_metric); - - accept "ok I take that"; -} - -eval __startup();
\ No newline at end of file diff --git a/filter/tree_test.c b/filter/tree_test.c new file mode 100644 index 00000000..5b22a9fe --- /dev/null +++ b/filter/tree_test.c @@ -0,0 +1,304 @@ +/* + * Filters: Utility Functions Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "test/bt-utils.h" + +#include "filter/filter.h" +#include "conf/conf.h" + +#define MAX_TREE_HEIGHT 13 + +static void +start_conf_env(void) +{ + bt_bird_init(); + + pool *p = rp_new(&root_pool, "helper_pool"); + linpool *l = lp_new_default(p); + cfg_mem = l; +} + +static struct f_tree * +new_tree(uint id) +{ + struct f_tree *tree = f_new_tree(); + tree->from.type = tree->to.type = T_INT; + tree->from.val.i = tree->to.val.i = id; + + return tree; +} + +/* + * Show subtree in infix notation + */ +static void +show_subtree(struct f_tree *node) +{ + if (!node) + return; + + show_subtree(node->left); + + if (node->from.val.i == node->to.val.i) + bt_debug("%u ", node->from.val.i); + else + bt_debug("%u..%u ", node->from.val.i, node->to.val.i); + + show_subtree(node->right); +} + +static void +show_tree2(struct f_tree *root_node, const char *tree_name) +{ + bt_debug("%s: \n", tree_name); + bt_debug("[ "); + show_subtree(root_node); + bt_debug("]\n\n"); +} + +#define show_tree(tree) show_tree2(tree, #tree); + +static uint +get_nodes_count_full_bin_tree(uint height) +{ + return (bt_naive_pow(2, height+1) - 1); +} + +static struct f_tree * +get_balanced_full_subtree(uint height, uint idx) +{ + struct f_tree *node = new_tree(idx); + if (height > 0) + { + uint nodes_in_subtree = get_nodes_count_full_bin_tree(--height); + node->left = get_balanced_full_subtree(height, idx - nodes_in_subtree/2 - 1); + node->right = get_balanced_full_subtree(height, idx + nodes_in_subtree/2 + 1); + } + return node; +} + +static struct f_tree * +get_balanced_full_tree(uint height) +{ + return get_balanced_full_subtree(height, get_nodes_count_full_bin_tree(height)/2); +} + +static struct f_tree * +get_degenerated_left_tree(uint nodes_count) +{ + struct f_tree *old = NULL; + struct f_tree *new = NULL; + uint i; + + for (i = 0; i < nodes_count; i++) + { + old = new; + new = new_tree(nodes_count-1-i); + new->left = old; + } + + return new; +} + +static struct f_tree * +get_random_degenerated_left_tree(uint nodes_count) +{ + struct f_tree *tree = get_degenerated_left_tree(nodes_count); + + size_t avaible_indexes_size = nodes_count * sizeof(byte); + byte *avaible_indexes = malloc(avaible_indexes_size); + memset(avaible_indexes, 0, avaible_indexes_size); + + struct f_tree *n; + for (n = tree; n; n = n->left) + { + uint selected_idx; + do + { + selected_idx = bt_random() % nodes_count; + } while(avaible_indexes[selected_idx] != 0); + + avaible_indexes[selected_idx] = 1; + n->from.type = n->to.type = T_INT; + n->from.val.i = n->to.val.i = selected_idx; + } + + free(avaible_indexes); + return tree; +} + +static struct f_tree * +get_balanced_tree_with_ranged_values(uint nodes_count) +{ + struct f_tree *tree = get_degenerated_left_tree(nodes_count); + + uint idx = 0; + struct f_tree *n; + for (n = tree; n; n = n->left) + { + n->from.type = n->to.type = T_INT; + n->from.val.i = idx; + idx += (uint)bt_random() / nodes_count; /* (... / nodes_count) preventing overflow an uint idx */ + n->to.val.i = idx++; + } + + return build_tree(tree); +} + + +static int +t_balancing(void) +{ + start_conf_env(); + + uint height; + for (height = 1; height < MAX_TREE_HEIGHT; height++) + { + uint nodes_count = get_nodes_count_full_bin_tree(height); + + struct f_tree *simple_degenerated_tree = get_degenerated_left_tree(nodes_count); + show_tree(simple_degenerated_tree); + + struct f_tree *expected_balanced_tree = get_balanced_full_tree(height); + show_tree(expected_balanced_tree); + + struct f_tree *balanced_tree_from_simple = build_tree(simple_degenerated_tree); + show_tree(balanced_tree_from_simple); + + bt_assert(same_tree(balanced_tree_from_simple, expected_balanced_tree)); + } + + return 1; +} + + +static int +t_balancing_random(void) +{ + start_conf_env(); + + uint height; + for (height = 1; height < MAX_TREE_HEIGHT; height++) + { + uint nodes_count = get_nodes_count_full_bin_tree(height); + + struct f_tree *expected_balanced_tree = get_balanced_full_tree(height); + + uint i; + for(i = 0; i < 10; i++) + { + struct f_tree *random_degenerated_tree = get_random_degenerated_left_tree(nodes_count); + show_tree(random_degenerated_tree); + + struct f_tree *balanced_tree_from_random = build_tree(random_degenerated_tree); + + show_tree(expected_balanced_tree); + show_tree(balanced_tree_from_random); + + bt_assert(same_tree(balanced_tree_from_random, expected_balanced_tree)); + } + } + + return 1; +} + +static int +t_find(void) +{ + start_conf_env(); + + uint height; + for (height = 1; height < MAX_TREE_HEIGHT; height++) + { + uint nodes_count = get_nodes_count_full_bin_tree(height); + + struct f_tree *tree = get_balanced_full_tree(height); + show_tree(tree); + + struct f_val looking_up_value = { + .type = T_INT + }; + for(looking_up_value.val.i = 0; looking_up_value.val.i < nodes_count; looking_up_value.val.i++) + { + struct f_tree *found_tree = find_tree(tree, looking_up_value); + bt_assert((val_compare(looking_up_value, found_tree->from) == 0) && (val_compare(looking_up_value, found_tree->to) == 0)); + } + } + + return 1; +} + +static uint +get_max_value_in_unbalanced_tree(struct f_tree *node, uint max) +{ + if (!node) + return max; + + if (node->to.val.i > max) + max = node->to.val.i; + + uint max_left = get_max_value_in_unbalanced_tree(node->left, max); + if (max_left > max) + max = max_left; + + uint max_right = get_max_value_in_unbalanced_tree(node->right, max); + if (max_right > max) + max = max_right; + + return max; +} + +static int +t_find_ranges(void) +{ + start_conf_env(); + + uint height; + for (height = 1; height < MAX_TREE_HEIGHT; height++) + { + uint nodes_count = get_nodes_count_full_bin_tree(height); + + struct f_tree *tree = get_balanced_tree_with_ranged_values(nodes_count); + uint max_value = get_max_value_in_unbalanced_tree(tree, 0); + + show_tree(tree); + + bt_debug("max_value: %u \n", max_value); + + struct f_val needle = { + .type = T_INT + }; + uint *i = &needle.val.i; + + for(*i = 0; *i <= max_value; *i += (uint)bt_random()/nodes_count) + { + struct f_tree *found_tree = find_tree(tree, needle); + bt_debug("searching: %u \n", *i); + bt_assert( + (val_compare(needle, found_tree->from) == 0) || (val_compare(needle, found_tree->to) == 0) || + ((val_compare(needle, found_tree->from) == 1) && (val_compare(needle, found_tree->to) == -1)) + ); + } + } + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_balancing, "Balancing strong unbalanced trees"); + bt_test_suite(t_balancing_random, "Balancing random unbalanced trees"); + bt_test_suite(t_find, "Finding values in trees"); + bt_test_suite(t_find_ranges, "Finding values in trees with random ranged values"); + + return bt_exit_value(); +} diff --git a/filter/trie.c b/filter/trie.c index 565ae82f..adcfcdf3 100644 --- a/filter/trie.c +++ b/filter/trie.c @@ -74,6 +74,19 @@ #include "conf/conf.h" #include "filter/filter.h" + +/* + * In the trie code, the prefix length is internally treated as for the whole + * ip_addr, regardless whether it contains an IPv4 or IPv6 address. Therefore, + * remaining definitions make sense. + */ + +#define ipa_mkmask(x) ip6_mkmask(x) +#define ipa_masklen(x) ip6_masklen(&x) +#define ipa_pxlen(x,y) ip6_pxlen(x,y) +#define ipa_getbit(x,n) ip6_getbit(x,n) + + /** * f_new_trie - allocates and returns a new empty trie * @lp: linear pool to allocate items from @@ -109,12 +122,11 @@ attach_node(struct f_trie_node *parent, struct f_trie_node *child) /** * trie_add_prefix * @t: trie to add to - * @px: prefix address - * @plen: prefix length + * @net: IP network prefix * @l: prefix lower bound * @h: prefix upper bound * - * Adds prefix (prefix pattern) @px/@plen to trie @t. @l and @h are lower + * Adds prefix (prefix pattern) @n to trie @t. @l and @h are lower * and upper bounds on accepted prefix lengths, both inclusive. * 0 <= l, h <= 32 (128 for IPv6). * @@ -124,8 +136,19 @@ attach_node(struct f_trie_node *parent, struct f_trie_node *child) */ void * -trie_add_prefix(struct f_trie *t, ip_addr px, int plen, int l, int h) +trie_add_prefix(struct f_trie *t, const net_addr *net, uint l, uint h) { + ip_addr px = net_prefix(net); + uint plen = net_pxlen(net); + + if (net->type == NET_IP4) + { + const uint delta = IP6_MAX_PREFIX_LENGTH - IP4_MAX_PREFIX_LENGTH; + plen += delta; + l += delta; + h += delta; + } + if (l == 0) t->zero = 1; else @@ -140,7 +163,7 @@ trie_add_prefix(struct f_trie *t, ip_addr px, int plen, int l, int h) struct f_trie_node *o = NULL; struct f_trie_node *n = t->root; - while(n) + while (n) { ip_addr cmask = ipa_and(n->mask, pmask); @@ -196,18 +219,8 @@ trie_add_prefix(struct f_trie *t, ip_addr px, int plen, int l, int h) return a; } -/** - * trie_match_prefix - * @t: trie - * @px: prefix address - * @plen: prefix length - * - * Tries to find a matching prefix pattern in the trie such that - * prefix @px/@plen matches that prefix pattern. Returns 1 if there - * is such prefix pattern in the trie. - */ -int -trie_match_prefix(struct f_trie *t, ip_addr px, int plen) +static int +trie_match_prefix(struct f_trie *t, ip_addr px, uint plen) { ip_addr pmask = ipa_mkmask(plen); ip_addr paddr = ipa_and(px, pmask); @@ -241,6 +254,30 @@ trie_match_prefix(struct f_trie *t, ip_addr px, int plen) return 0; } +/** + * trie_match_net + * @t: trie + * @n: net address + * + * Tries to find a matching net in the trie such that + * prefix @n matches that prefix pattern. Returns 1 if there + * is such prefix pattern in the trie. + */ +int +trie_match_net(struct f_trie *t, const net_addr *n) +{ + uint add = 0; + + switch (n->type) { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + add = IP6_MAX_PREFIX_LENGTH - IP4_MAX_PREFIX_LENGTH; + } + + return trie_match_prefix(t, net_prefix(n), net_pxlen(n) + add); +} + static int trie_node_same(struct f_trie_node *t1, struct f_trie_node *t2) { diff --git a/filter/trie_test.c b/filter/trie_test.c new file mode 100644 index 00000000..7529a5c5 --- /dev/null +++ b/filter/trie_test.c @@ -0,0 +1,185 @@ +/* + * Filters: Utility Functions Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "test/bt-utils.h" + +#include "filter/filter.h" +#include "conf/conf.h" + +#define TESTS_NUM 10 +#define PREFIXES_NUM 10 +#define PREFIX_TESTS_NUM 10000 + +#define BIG_BUFFER_SIZE 10000 + +/* Wrapping structure for storing f_prefixes structures in list */ +struct f_prefix_node { + node n; + struct f_prefix prefix; +}; + +static u32 +xrandom(u32 max) +{ + return (bt_random() % max); +} + +static int +is_prefix_included(list *prefixes, struct f_prefix *needle) +{ + struct f_prefix_node *n; + WALK_LIST(n, *prefixes) + { + ip6_addr cmask = ip6_mkmask(MIN(n->prefix.net.pxlen, needle->net.pxlen)); + + ip6_addr ip = net6_prefix(&n->prefix.net); + ip6_addr needle_ip = net6_prefix(&needle->net); + + if ((ipa_compare(ipa_and(ip, cmask), ipa_and(needle_ip, cmask)) == 0) && + (n->prefix.lo <= needle->net.pxlen) && (needle->net.pxlen <= n->prefix.hi)) + { + bt_debug("FOUND\t" PRIip6 "/%d %d-%d\n", ARGip6(net6_prefix(&n->prefix.net)), n->prefix.net.pxlen, n->prefix.lo, n->prefix.hi); + return 1; /* OK */ + } + } + return 0; /* FAIL */ +} + +static struct f_prefix +get_random_ip6_prefix(void) +{ + struct f_prefix p; + u8 pxlen = xrandom(120)+8; + ip6_addr ip6 = ip6_build(bt_random(),bt_random(),bt_random(),bt_random()); + net_addr_ip6 net6 = NET_ADDR_IP6(ip6, pxlen); + + p.net = *((net_addr*) &net6); + + if (bt_random() % 2) + { + p.lo = 0; + p.hi = p.net.pxlen; + } + else + { + p.lo = p.net.pxlen; + p.hi = net_max_prefix_length[p.net.type]; + } + + return p; +} + +static void +generate_random_ipv6_prefixes(list *prefixes) +{ + int i; + for (i = 0; i < PREFIXES_NUM; i++) + { + struct f_prefix f = get_random_ip6_prefix(); + + struct f_prefix_node *px = calloc(1, sizeof(struct f_prefix_node)); + px->prefix = f; + + bt_debug("ADD\t" PRIip6 "/%d %d-%d\n", ARGip6(net6_prefix(&px->prefix.net)), px->prefix.net.pxlen, px->prefix.lo, px->prefix.hi); + add_tail(prefixes, &px->n); + } +} + +static int +t_match_net(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + uint round; + for (round = 0; round < TESTS_NUM; round++) + { + list prefixes; /* of structs f_extended_prefix */ + init_list(&prefixes); + struct f_trie *trie = f_new_trie(config->mem, sizeof(struct f_trie_node)); + + generate_random_ipv6_prefixes(&prefixes); + struct f_prefix_node *n; + WALK_LIST(n, prefixes) + { + trie_add_prefix(trie, &n->prefix.net, n->prefix.lo, n->prefix.hi); + } + + int i; + for (i = 0; i < PREFIX_TESTS_NUM; i++) + { + struct f_prefix f = get_random_ip6_prefix(); + bt_debug("TEST\t" PRIip6 "/%d\n", ARGip6(net6_prefix(&f.net)), f.net.pxlen); + + int should_be = is_prefix_included(&prefixes, &f); + int is_there = trie_match_net(trie, &f.net); + bt_assert_msg(should_be == is_there, "Prefix " PRIip6 "/%d %s", ARGip6(net6_prefix(&f.net)), f.net.pxlen, (should_be ? "should be found in trie" : "should not be found in trie")); + } + + struct f_prefix_node *nxt; + WALK_LIST_DELSAFE(n, nxt, prefixes) + { + free(n); + } + } + + bt_bird_cleanup(); + return 1; +} + +static int +t_trie_same(void) +{ + bt_bird_init(); + bt_config_parse(BT_CONFIG_SIMPLE); + + int round; + for (round = 0; round < TESTS_NUM*4; round++) + { + struct f_trie * trie1 = f_new_trie(config->mem, sizeof(struct f_trie_node)); + struct f_trie * trie2 = f_new_trie(config->mem, sizeof(struct f_trie_node)); + + list prefixes; /* a list of f_extended_prefix structures */ + init_list(&prefixes); + int i; + for (i = 0; i < 100; i++) + generate_random_ipv6_prefixes(&prefixes); + + struct f_prefix_node *n; + WALK_LIST(n, prefixes) + { + trie_add_prefix(trie1, &n->prefix.net, n->prefix.lo, n->prefix.hi); + } + WALK_LIST_BACKWARDS(n, prefixes) + { + trie_add_prefix(trie2, &n->prefix.net, n->prefix.lo, n->prefix.hi); + } + + bt_assert(trie_same(trie1, trie2)); + + struct f_prefix_node *nxt; + WALK_LIST_DELSAFE(n, nxt, prefixes) + { + free(n); + } + } + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_match_net, "Testing random prefix matching"); + bt_test_suite(t_trie_same, "A trie filled forward should be same with a trie filled backward."); + + return bt_exit_value(); +} @@ -3,6 +3,7 @@ S ip.c S lists.c S checksum.c bitops.c patmatch.c printf.c xmalloc.c tbf.c S mac.c +S flowspec.c D resource.sgml S resource.c S mempool.c diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 00000000..01f3114d --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,7 @@ +src := bitops.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c tbf.c timer.c xmalloc.c +obj := $(src-o-files) +$(all-daemon) + +tests_src := heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c checksum_test.c lists_test.c mac_test.c ip_test.c hash_test.c printf_test.c +tests_targets := $(tests_targets) $(tests-target-files) +tests_objs := $(tests_objs) $(src-o-files) diff --git a/lib/Modules b/lib/Modules deleted file mode 100644 index 0e30b488..00000000 --- a/lib/Modules +++ /dev/null @@ -1,35 +0,0 @@ -sha256.c -sha256.h -sha512.c -sha512.h -sha1.c -sha1.h -birdlib.h -bitops.c -bitops.h -ip.h -ip.c -lists.c -lists.h -mac.c -mac.h -md5.c -md5.h -mempool.c -resource.c -resource.h -slab.c -socket.h -tbf.c -unaligned.h -xmalloc.c -printf.c -string.h -patmatch.c -slists.c -slists.h -event.c -event.h -checksum.c -checksum.h -alloca.h diff --git a/lib/alloca.h b/lib/alloca.h index f0d61bb4..e5557cdb 100644 --- a/lib/alloca.h +++ b/lib/alloca.h @@ -15,4 +15,6 @@ #include <stdlib.h> #endif +#define allocz(len) ({ void *_x = alloca(len); memset(_x, 0, len); _x; }) + #endif diff --git a/lib/birdlib.h b/lib/birdlib.h index aaa7a0a3..428b3209 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -9,8 +9,7 @@ #ifndef _BIRD_BIRDLIB_H_ #define _BIRD_BIRDLIB_H_ -#include "timer.h" -#include "alloca.h" +#include "lib/alloca.h" /* Ugly structure offset handling macros */ @@ -41,6 +40,12 @@ struct align_probe { char x; long int y; }; #define CALL(fn, args...) ({ if (fn) fn(args); }) #define ADVANCE(w, r, l) ({ r -= l; w += l; }) +static inline int uint_cmp(uint i1, uint i2) +{ return (int)(i1 > i2) - (int)(i1 < i2); } + +static inline int u64_cmp(u64 i1, u64 i2) +{ return (int)(i1 > i2) - (int)(i1 < i2); } + /* Bitfield macros */ @@ -55,34 +60,20 @@ struct align_probe { char x; long int y; }; #define NULL ((void *) 0) #endif -#ifndef IPV6 -#define IP_VERSION 4 -#else -#define IP_VERSION 6 -#endif - - /* Macros for gcc attributes */ #define NORET __attribute__((noreturn)) #define UNUSED __attribute__((unused)) #define PACKED __attribute__((packed)) -#ifdef IPV6 -#define UNUSED4 -#define UNUSED6 UNUSED -#else -#define UNUSED4 UNUSED -#define UNUSED6 -#endif - /* Microsecond time */ typedef s64 btime; +//typedef s64 bird_clock_t; -#define S_ *1000000 -#define MS_ *1000 -#define US_ *1 +#define S_ * (btime) 1000000 +#define MS_ * (btime) 1000 +#define US_ * (btime) 1 #define TO_S /1000000 #define TO_MS /1000 #define TO_US /1 @@ -91,39 +82,26 @@ typedef s64 btime; #define S S_ #define MS MS_ #define US US_ +#define NS /1000 #endif +#define TIME_INFINITY ((s64) 0x7fffffffffffffff) + /* Rate limiting */ struct tbf { - bird_clock_t timestamp; /* Last update */ - u16 count; /* Available tokens */ + btime timestamp; /* Last update */ + u64 count; /* Available micro-tokens */ u16 burst; /* Max number of tokens */ - u16 rate; /* Rate of replenishment */ - u16 mark; /* Whether last op was limited */ + u16 rate; /* Rate of replenishment (tokens / sec) */ + u32 drop; /* Number of failed request since last successful */ }; /* Default TBF values for rate limiting log messages */ #define TBF_DEFAULT_LOG_LIMITS { .rate = 1, .burst = 5 } -void tbf_update(struct tbf *f); - -static inline int -tbf_limit(struct tbf *f) -{ - tbf_update(f); - - if (!f->count) - { - f->mark = 1; - return 1; - } - - f->count--; - f->mark = 0; - return 0; -} +int tbf_limit(struct tbf *f); /* Logging and dying */ @@ -163,7 +141,7 @@ void bug(const char *msg, ...) NORET; #define L_FATAL "\010" /* Fatal errors */ #define L_BUG "\011" /* BIRD bugs */ -void debug(const char *msg, ...); /* Printf to debug output */ +void debug(const char *msg, ...); /* Printf to debug output */ /* Debugging */ @@ -174,9 +152,9 @@ void debug(const char *msg, ...); /* Printf to debug output */ #endif #ifdef DEBUGGING -#define ASSERT(x) do { if (!(x)) bug("Assertion `%s' failed at %s:%d", #x, __FILE__, __LINE__); } while(0) +#define ASSERT(x) do { if (!(x)) bug("Assertion '%s' failed at %s:%d", #x, __FILE__, __LINE__); } while(0) #else -#define ASSERT(x) do { } while(0) +#define ASSERT(x) do { if (!(x)) log(L_BUG "Assertion '%s' failed at %s:%d", #x, __FILE__, __LINE__); } while(0) #endif /* Pseudorandom numbers */ diff --git a/lib/bitops.c b/lib/bitops.c index 81586e87..efb8710e 100644 --- a/lib/bitops.c +++ b/lib/bitops.c @@ -28,15 +28,15 @@ u32_mkmask(uint n) * * This function checks whether the given integer @x represents * a valid bit mask (binary representation contains first ones, then - * zeroes) and returns the number of ones or -1 if the mask is invalid. + * zeroes) and returns the number of ones or 255 if the mask is invalid. */ -int +uint u32_masklen(u32 x) { int l = 0; u32 n = ~x; - if (n & (n+1)) return -1; + if (n & (n+1)) return 255; if (x & 0x0000ffff) { x &= 0x0000ffff; l += 16; } if (x & 0x00ff00ff) { x &= 0x00ff00ff; l += 8; } if (x & 0x0f0f0f0f) { x &= 0x0f0f0f0f; l += 4; } diff --git a/lib/bitops.h b/lib/bitops.h index ce13732a..af648c26 100644 --- a/lib/bitops.h +++ b/lib/bitops.h @@ -9,6 +9,8 @@ #ifndef _BIRD_BITOPTS_H_ #define _BIRD_BITOPTS_H_ +#include "sysdep/config.h" + /* * Bit mask operations: * @@ -19,7 +21,7 @@ */ u32 u32_mkmask(uint n); -int u32_masklen(u32 x); +uint u32_masklen(u32 x); u32 u32_log2(u32 v); diff --git a/lib/bitops_test.c b/lib/bitops_test.c new file mode 100644 index 00000000..f816b9d1 --- /dev/null +++ b/lib/bitops_test.c @@ -0,0 +1,123 @@ +/* + * BIRD Library -- Generic Bit Operations Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "test/bt-utils.h" /* naive_pow() */ + +#include "lib/bitops.h" + +#define MAX_NUM 1000 +#define CHECK_BIT(var,pos) ((var) & (u32)(1<<(pos))) + +static int +t_mkmask(void) +{ + int i; + u32 compute, expect; + + bt_assert(u32_mkmask(0) == 0x00000000); + for (i = 1; i <= 32; i++) + { + compute = u32_mkmask(i); + expect = (u32) (0xffffffff << (32-i)); + bt_assert_msg(compute == expect, "u32_mkmask(%d) = 0x%08X, expected 0x%08X", i, compute, expect); + } + + return 1; +} + +static int +u32_masklen_expected(u32 mask) +{ + int j, expect = 0; + + int valid = 0; + for (j = 0; j <= 32; j++) + if (mask == (j ? (0xffffffff << (32-j)) : 0)) /* Shifting 32-bit value by 32 bits is undefined behavior */ + valid = 1; + + if (!valid && mask != 0) + expect = 255; + else + for (j = 0; j <= 31; j++) + if (CHECK_BIT(mask, (31-j))) + expect = j+1; + else + break; + return expect; +} + +static void +check_mask(u32 mask) +{ + int expected, masklen; + + expected = u32_masklen_expected(mask); + masklen = u32_masklen(mask); + int ok = (expected == masklen); + bt_debug("u32_masklen(Ox%08x) = %d, expected %d %s\n", mask, masklen, expected, ok ? "OK" : "FAIL!"); + bt_assert(ok); +} + +static int +t_masklen(void) +{ + u32 i; + + check_mask(0x82828282); + check_mask(0x00000000); + + for (i = 0; i <= 32; i++) + check_mask(((u32) (i ? (0xffffffff << (32-i)) : 0)) & 0xffffffff); /* Shifting 32-bit value by 32 bits is undefined behavior */ + + for (i = 0; i <= MAX_NUM; i++) + check_mask(bt_random()); + + return 1; +} + +static void +check_log2(u32 n) +{ + u32 log = u32_log2(n); + u32 low = bt_naive_pow(2, log); + u32 high = bt_naive_pow(2, log+1); + + bt_assert_msg(n >= low && n < high, + "u32_log2(%u) = %u, %u should be in the range <%u, %u)", + n, log, n, low, high); +} + +static int +t_log2(void) +{ + u32 i; + + for (i = 0; i < 31; i++) + bt_assert(u32_log2(bt_naive_pow(2, i+1)) == i+1); + + for (i = 1; i < MAX_NUM; i++) + check_log2(i); + + for (i = 1; i < MAX_NUM; i++) + check_log2(((u32) bt_random()) % 0x0fffffff); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_mkmask, "u32_mkmask()"); + bt_test_suite(t_masklen, "u32_masklen()"); + bt_test_suite(t_log2, "u32_log2()"); + + return bt_exit_value(); +} diff --git a/lib/buffer.h b/lib/buffer.h index 2a53f211..cd9bab86 100644 --- a/lib/buffer.h +++ b/lib/buffer.h @@ -13,10 +13,14 @@ #include "lib/resource.h" #include "sysdep/config.h" -#define BUFFER(type) struct { type *data; uint used, size; } - +#define BUFFER_(type) struct { type *data; uint used, size; } +#define BUFFER_TYPE(v) typeof(* (v).data) #define BUFFER_SIZE(v) ((v).size * sizeof(* (v).data)) +#ifndef PARSER +#define BUFFER(type) BUFFER_(type) +#endif + #define BUFFER_INIT(v,pool,isize) \ ({ \ (v).used = 0; \ @@ -46,4 +50,14 @@ #define BUFFER_FLUSH(v) ({ (v).used = 0; }) +#define BUFFER_WALK(v,n) \ + for (BUFFER_TYPE(v) *_n = (v).data, n; _n < ((v).data + (v).used) && (n = *_n, 1); _n++) + +#define BUFFER_SHALLOW_COPY(dst, src) \ + ({ \ + (dst).used = (src).used; \ + (dst).size = (src).size; \ + (dst).data = (src).data; \ + }) + #endif /* _BIRD_BUFFER_H_ */ diff --git a/lib/buffer_test.c b/lib/buffer_test.c new file mode 100644 index 00000000..5b7de330 --- /dev/null +++ b/lib/buffer_test.c @@ -0,0 +1,167 @@ +/* + * BIRD Library -- Buffer Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <stdlib.h> + +#include "test/birdtest.h" + +#include "lib/buffer.h" + +#define MAX_NUM 33 + +typedef BUFFER(int) buffer_int; +static int expected[MAX_NUM]; +static buffer_int buf; +static struct pool *buffer_pool; + +static void +show_buf(buffer_int *b) +{ + uint i; + bt_debug(".used = %d, .size = %d\n", b->used, b->size); + + for (i = 0; i < b->used; i++) + bt_debug(" .data[%3u] = %-16d expected %-16d %s\n", i, b->data[i], expected[i], (b->data[i] == expected[i] ? "OK" : "FAIL!")); +} + +static void +fill_expected_array(void) +{ + int i; + + for (i = 0; i < MAX_NUM; i++) + expected[i] = bt_random(); +} + +static void +init_buffer(void) +{ + resource_init(); + buffer_pool = &root_pool; + BUFFER_INIT(buf, buffer_pool, MAX_NUM); +} + +static int +is_buffer_as_expected(buffer_int *b) +{ + show_buf(b); + + int i; + for (i = 0; i < MAX_NUM; i++) + bt_assert(b->data[i] == expected[i]); + return 1; +} + +static int +t_buffer_push(void) +{ + int i; + + init_buffer(); + fill_expected_array(); + + for (i = 0; i < MAX_NUM; i++) + BUFFER_PUSH(buf) = expected[i]; + is_buffer_as_expected(&buf); + + return 1; +} + +static int +t_buffer_pop(void) +{ + int i; + + init_buffer(); + fill_expected_array(); + + /* POP a half of elements */ + for (i = 0; i < MAX_NUM; i++) + BUFFER_PUSH(buf) = expected[i]; + for (i = MAX_NUM-1; i >= MAX_NUM/2; i--) + BUFFER_POP(buf); + for (i = MAX_NUM/2; i < MAX_NUM; i++) + BUFFER_PUSH(buf) = expected[i] = bt_random(); + is_buffer_as_expected(&buf); + + /* POP all of elements */ + for (i = MAX_NUM-1; i >= 0; i--) + BUFFER_POP(buf); + bt_assert(buf.used == 0); + for (i = 0; i < MAX_NUM; i++) + BUFFER_PUSH(buf) = expected[i]; + is_buffer_as_expected(&buf); + + return 1; +} + +static int +t_buffer_resize(void) +{ + int i; + + init_buffer(); + BUFFER_INIT(buf, buffer_pool, 0); + fill_expected_array(); + + for (i = 0; i < MAX_NUM; i++) + BUFFER_PUSH(buf) = expected[i]; + is_buffer_as_expected(&buf); + bt_assert(buf.size >= MAX_NUM); + + return 1; +} + +static int +t_buffer_flush(void) +{ + int i; + + init_buffer(); + fill_expected_array(); + for (i = 0; i < MAX_NUM; i++) + BUFFER_PUSH(buf) = expected[i]; + + BUFFER_FLUSH(buf); + bt_assert(buf.used == 0); + + return 1; +} + +static int +t_buffer_walk(void) +{ + int i; + + init_buffer(); + fill_expected_array(); + for (i = 0; i < MAX_NUM; i++) + BUFFER_PUSH(buf) = expected[i]; + + i = 0; + BUFFER_WALK(buf, v) + bt_assert(v == expected[i++]); + + bt_assert(i == MAX_NUM); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_buffer_push, "Pushing new elements"); + bt_test_suite(t_buffer_pop, "Fill whole buffer (PUSH), a half of elements POP and PUSH new elements"); + bt_test_suite(t_buffer_resize, "Init a small buffer and try overfill"); + bt_test_suite(t_buffer_flush, "Fill and flush all elements"); + bt_test_suite(t_buffer_walk, "Fill and walk through buffer"); + + return bt_exit_value(); +} diff --git a/lib/checksum_test.c b/lib/checksum_test.c new file mode 100644 index 00000000..7e5658eb --- /dev/null +++ b/lib/checksum_test.c @@ -0,0 +1,94 @@ +/* + * BIRD Library -- IP One-Complement Checksum Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <stdio.h> + +#include "test/birdtest.h" + +#include "lib/checksum.h" + +#define MAX_NUM 10000 + +static u16 +ipsum_calculate_expected(u32 *a) +{ + int i; + u32 sum = 0; + + for(i = 0; i < MAX_NUM; i++) + { + sum += a[i] & 0xffff; + bt_debug("low) \t0x%08X \n", sum); + + sum += a[i] >> 16; + bt_debug("high) \t0x%08X \n", sum); + + u16 carry = sum >> 16; + sum = (sum & 0xffff) + carry; + bt_debug("carry) \t0x%08X \n\n", sum); + } + bt_debug("sum) \t0x%08X \n", sum); + + sum = sum ^ 0xffff; + bt_debug("~sum) \t0x%08X \n", sum); + + return sum; +} + +static int +t_calculate(void) +{ + u32 a[MAX_NUM]; + int i; + + for (i = 0; i < MAX_NUM; i++) + a[i] = bt_random(); + + u16 sum_calculated = ipsum_calculate(a, sizeof(a), NULL); + u16 sum_calculated_2 = ipsum_calculate(&a[0], sizeof(u32)*(MAX_NUM/2), &a[MAX_NUM/2], sizeof(u32)*(MAX_NUM - MAX_NUM/2), NULL); + bt_assert(sum_calculated == sum_calculated_2); + + u16 sum_expected = ipsum_calculate_expected(a); + + bt_debug("sum_calculated: %08X \n", sum_calculated); + bt_debug("sum_expected: %08X \n", sum_expected); + + bt_assert(sum_calculated == sum_expected); + + return 1; +} + +static int +t_verify(void) +{ + u32 a[MAX_NUM+1]; + int i; + + for (i = 0; i < MAX_NUM; i++) + a[i] = bt_random(); + + u16 sum = ipsum_calculate_expected(a); + + a[MAX_NUM] = sum; + + bt_assert(ipsum_verify(a, sizeof(a), NULL)); + + return 1; +} + + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_calculate, "Checksum of pseudo-random data"); + bt_test_suite(t_verify, "Verification of pseudo-random data."); + + return bt_exit_value(); +} diff --git a/lib/event_test.c b/lib/event_test.c new file mode 100644 index 00000000..e1215bba --- /dev/null +++ b/lib/event_test.c @@ -0,0 +1,89 @@ +/* + * BIRD Library -- Event Processing Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + + +#include "test/birdtest.h" + +#include "lib/net.h" +#include "lib/event.h" +#include "conf/conf.h" +#include "nest/locks.h" +#include "sysdep/unix/unix.h" +#include "nest/iface.h" +#include "nest/route.h" + +#define MAX_NUM 4 + +int event_check_points[MAX_NUM]; + +#define event_hook_body(num) \ + do { \ + bt_debug("Event Hook " #num "\n"); \ + event_check_points[num] = 1; \ + bt_assert_msg(event_check_points[num-1], "Events should be run in right order"); \ + } while (0) + +static void event_hook_1(void *data UNUSED) { event_hook_body(1); } +static void event_hook_2(void *data UNUSED) { event_hook_body(2); } +static void event_hook_3(void *data UNUSED) { event_hook_body(3); } + +#define schedule_event(num) \ + do { \ + struct event *event_##num = ev_new(&root_pool); \ + event_##num->hook = event_hook_##num; \ + ev_schedule(event_##num); \ + } while (0) + +static void +init_event_check_points(void) +{ + int i; + event_check_points[0] = 1; + for (i = 1; i < MAX_NUM; i++) + event_check_points[i] = 0; +} + +static int +t_ev_run_list(void) +{ + int i; + + resource_init(); + olock_init(); + timer_init(); + io_init(); + rt_init(); + if_init(); +// roa_init(); + config_init(); + config = config_alloc(""); + + init_event_check_points(); + + schedule_event(1); + schedule_event(2); + schedule_event(3); + + ev_run_list(&global_event_list); + + for (i = 1; i < MAX_NUM; i++) + bt_assert(event_check_points[i]); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_ev_run_list, "Schedule and run 3 events in right order."); + + return bt_exit_value(); +} + diff --git a/lib/fletcher16_test.c b/lib/fletcher16_test.c new file mode 100644 index 00000000..1020e6ec --- /dev/null +++ b/lib/fletcher16_test.c @@ -0,0 +1,169 @@ +/* + * BIRD Library -- Fletcher-16 Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "lib/fletcher16.h" + +static u16 +straightforward_fletcher16_compute(const char *data) +{ + int count = strlen(data); + + u16 sum1 = 0; + u16 sum2 = 0; + int index; + + for (index = 0; index < count; ++index) + { + sum1 = (sum1 + data[index]) % 255; + sum2 = (sum2 + sum1) % 255; + } + + return (sum2 << 8) | sum1; +} + +static u16 +straightforward_fletcher16_checksum(const char *data) +{ + u16 csum; + u8 c0,c1,f0,f1; + + csum = straightforward_fletcher16_compute(data); + f0 = csum & 0xff; + f1 = (csum >> 8) & 0xff; + c0 = 0xff - ((f0 + f1) % 0xff); + c1 = 0xff - ((f0 + c0) % 0xff); + + return (c1 << 8) | c0; +} + +static int +test_fletcher16(void *out_, const void *in_, const void *expected_out_) +{ + u16 *out = out_; + const char *in = in_; + const u16 *expected_out = expected_out_; + + struct fletcher16_context ctxt; + + fletcher16_init(&ctxt); + fletcher16_update(&ctxt, in, strlen(in)); + put_u16(out, fletcher16_compute(&ctxt)); + + return *out == *expected_out; +} + +static int +test_fletcher16_checksum(void *out_, const void *in_, const void *expected_out_) +{ + u16 *out = out_; + const char *in = in_; + const u16 *expected_out = expected_out_; + + struct fletcher16_context ctxt; + int len = strlen(in); + + fletcher16_init(&ctxt); + fletcher16_update(&ctxt, in, len); + put_u16(out, fletcher16_final(&ctxt, len, len)); + + return *out == *expected_out; +} + +static int +t_fletcher16_compute(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "\001\002", + .out = & (const u16) { 0x0403 }, + }, + { + .in = "", + .out = & ((const u16) { straightforward_fletcher16_compute("") }), + }, + { + .in = "a", + .out = & ((const u16) { straightforward_fletcher16_compute("a") }), + }, + { + .in = "abcd", + .out = & ((const u16) { straightforward_fletcher16_compute("abcd") }), + }, + { + .in = "message digest", + .out = & ((const u16) { straightforward_fletcher16_compute("message digest") }), + }, + { + .in = "abcdefghijklmnopqrstuvwxyz", + .out = & ((const u16) { straightforward_fletcher16_compute("abcdefghijklmnopqrstuvwxyz") }), + }, + { + .in = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", + .out = & ((const u16) { straightforward_fletcher16_compute("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") }), + }, + { + .in = "12345678901234567890123456789012345678901234567890123456789012345678901234567890", + .out = & ((const u16) { straightforward_fletcher16_compute("12345678901234567890123456789012345678901234567890123456789012345678901234567890") }), + }, + }; + + return bt_assert_batch(test_vectors, test_fletcher16, bt_fmt_str, bt_fmt_unsigned); +} + +static int +t_fletcher16_checksum(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "\001\002", + .out = & ((const u16) { straightforward_fletcher16_checksum("\001\002") }), + }, + { + .in = "", + .out = & ((const u16) { straightforward_fletcher16_checksum("") }), + }, + { + .in = "a", + .out = & ((const u16) { straightforward_fletcher16_checksum("a") }), + }, + { + .in = "abcd", + .out = & ((const u16) { straightforward_fletcher16_checksum("abcd") }), + }, + { + .in = "message digest", + .out = & ((const u16) { straightforward_fletcher16_checksum("message digest") }), + }, + { + .in = "abcdefghijklmnopqrstuvwxyz", + .out = & ((const u16) { straightforward_fletcher16_checksum("abcdefghijklmnopqrstuvwxyz") }), + }, + { + .in = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", + .out = & ((const u16) { straightforward_fletcher16_checksum("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") }), + }, + { + .in = "12345678901234567890123456789012345678901234567890123456789012345678901234567890", + .out = & ((const u16) { straightforward_fletcher16_checksum("12345678901234567890123456789012345678901234567890123456789012345678901234567890") }), + }, + }; + + return bt_assert_batch(test_vectors, test_fletcher16_checksum, bt_fmt_str, bt_fmt_unsigned); +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_fletcher16_compute, "Fletcher-16 Compute Tests"); + bt_test_suite(t_fletcher16_checksum, "Fletcher-16 Checksum Tests"); + + return bt_exit_value(); +} diff --git a/lib/flowspec.c b/lib/flowspec.c new file mode 100644 index 00000000..87ce0206 --- /dev/null +++ b/lib/flowspec.c @@ -0,0 +1,1176 @@ +/* + * BIRD Library -- Flow specification (RFC 5575) + * + * (c) 2016 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/** + * DOC: Flow specification (flowspec) + * + * Flowspec are rules (RFC 5575) for firewalls disseminated using BGP protocol. + * The |flowspec.c| is a library for handling flowspec binary streams and + * flowspec data structures. You will find there functions for validation + * incoming flowspec binary streams, iterators for jumping over components, + * functions for handling a length and functions for formatting flowspec data + * structure into user-friendly text representation. + * + * In this library, you will find also flowspec builder. In |confbase.Y|, there + * are grammar's rules for parsing and building new flowspec data structure + * from BIRD's configuration files and from BIRD's command line interface. + * Finalize function will assemble final &net_addr_flow4 or &net_addr_flow6 + * data structure. + * + * The data structures &net_addr_flow4 and &net_addr_flow6 are defined in + * |net.h| file. The attribute length is size of whole data structure plus + * binary stream representation of flowspec including a compressed encoded + * length of flowspec. + * + * Sometimes in code, it is used expression flowspec type, it should mean + * flowspec component type. + */ + +#include "nest/bird.h" +#include "lib/flowspec.h" +#include "conf/conf.h" + + +static const char* flow4_type_str[] = { + [FLOW_TYPE_DST_PREFIX] = "dst", + [FLOW_TYPE_SRC_PREFIX] = "src", + [FLOW_TYPE_IP_PROTOCOL] = "proto", + [FLOW_TYPE_PORT] = "port", + [FLOW_TYPE_DST_PORT] = "dport", + [FLOW_TYPE_SRC_PORT] = "sport", + [FLOW_TYPE_ICMP_TYPE] = "icmp type", + [FLOW_TYPE_ICMP_CODE] = "icmp code", + [FLOW_TYPE_TCP_FLAGS] = "tcp flags", + [FLOW_TYPE_PACKET_LENGTH] = "length", + [FLOW_TYPE_DSCP] = "dscp", + [FLOW_TYPE_FRAGMENT] = "fragment" +}; + +static const char* flow6_type_str[] = { + [FLOW_TYPE_DST_PREFIX] = "dst", + [FLOW_TYPE_SRC_PREFIX] = "src", + [FLOW_TYPE_NEXT_HEADER] = "next header", + [FLOW_TYPE_PORT] = "port", + [FLOW_TYPE_DST_PORT] = "dport", + [FLOW_TYPE_SRC_PORT] = "sport", + [FLOW_TYPE_ICMP_TYPE] = "icmp type", + [FLOW_TYPE_ICMP_CODE] = "icmp code", + [FLOW_TYPE_TCP_FLAGS] = "tcp flags", + [FLOW_TYPE_PACKET_LENGTH] = "length", + [FLOW_TYPE_DSCP] = "dscp", + [FLOW_TYPE_FRAGMENT] = "fragment", + [FLOW_TYPE_LABEL] = "label" +}; + +/** + * flow_type_str - get stringified flowspec name of component + * @type: flowspec component type + * @ipv6: IPv4/IPv6 decide flag, use zero for IPv4 and one for IPv6 + * + * This function returns flowspec name of component @type in string. + */ +const char * +flow_type_str(enum flow_type type, int ipv6) +{ + return ipv6 ? flow6_type_str[type] : flow4_type_str[type]; +} + +/* + * Length + */ + +/** + * flow_write_length - write compressed length value + * @data: destination buffer to write + * @len: the value of the length (0 to 0xfff) for writing + * + * This function writes appropriate as (1- or 2-bytes) the value of @len into + * buffer @data. The function returns number of written bytes, thus 1 or 2 bytes. + */ +uint +flow_write_length(byte *data, u16 len) +{ + if (len >= 0xf0) + { + put_u16(data, len | 0xf000); + return 2; + } + + *data = len; + return 1; +} + +inline static uint +get_value_length(const byte *op) +{ + return (1 << ((*op & 0x30) >> 4)); +} + + + +/* + * Flowspec iterators + */ + +static inline u8 num_op(const byte *op) { return (*op & 0x07); } +static inline int isset_and(const byte *op) { return ((*op & 0x40) == 0x40); } +static inline int isset_end(const byte *op) { return ((*op & 0x80) == 0x80); } + +static const byte * +flow_first_part(const byte *data) +{ + if (!data || flow_read_length(data) == 0) + return NULL; + + /* It is allowed to encode the value of length less then 240 into 2-bytes too */ + if ((data[0] & 0xf0) == 0xf0) + return data + 2; + + return data + 1; +} + +/** + * flow4_first_part - get position of the first flowspec component + * @f: flowspec data structure &net_addr_flow4 + * + * This function return a position to the beginning of the first flowspec + * component in IPv4 flowspec @f. + */ +inline const byte * +flow4_first_part(const net_addr_flow4 *f) +{ + return f ? flow_first_part(f->data) : NULL; +} + +/** + * flow6_first_part - get position of the first flowspec component + * @f: flowspec data structure &net_addr_flow6 + * + * This function return a position to the beginning of the first flowspec + * component in IPv6 flowspec @f. + */ +inline const byte * +flow6_first_part(const net_addr_flow6 *f) +{ + return f ? flow_first_part(f->data) : NULL; +} + +static const byte * +flow_next_part(const byte *pos, const byte *end, int ipv6) +{ + switch (*pos++) + { + case FLOW_TYPE_DST_PREFIX: + case FLOW_TYPE_SRC_PREFIX: + { + uint pxlen = *pos++; + uint bytes = BYTES(pxlen); + if (ipv6) + { + uint offset = *pos++ / 8; + pos += bytes - offset; + } + else + { + pos += bytes; + } + break; + } + + case FLOW_TYPE_IP_PROTOCOL: /* == FLOW_TYPE_NEXT_HEADER */ + case FLOW_TYPE_PORT: + case FLOW_TYPE_DST_PORT: + case FLOW_TYPE_SRC_PORT: + case FLOW_TYPE_ICMP_TYPE: + case FLOW_TYPE_ICMP_CODE: + case FLOW_TYPE_TCP_FLAGS: + case FLOW_TYPE_PACKET_LENGTH: + case FLOW_TYPE_DSCP: + case FLOW_TYPE_FRAGMENT: + case FLOW_TYPE_LABEL: + { + /* Is this the end of list operator-value pair? */ + uint last = 0; + + while (!last) + { + last = isset_end(pos); + + /* Value length of operator */ + uint len = get_value_length(pos); + pos += 1+len; + } + break; + } + default: + return NULL; + } + + return (pos < end) ? pos : NULL; +} + +/** + * flow4_next_part - an iterator over flowspec components in flowspec binary stream + * @pos: the beginning of a previous or the first component in flowspec binary + * stream + * @end: the last valid byte in scanned flowspec binary stream + * + * This function returns a position to the beginning of the next component + * (to a component type byte) in flowspec binary stream or %NULL for the end. + */ +inline const byte * +flow4_next_part(const byte *pos, const byte *end) +{ + return flow_next_part(pos, end, 0); +} + +/** + * flow6_next_part - an iterator over flowspec components in flowspec binary stream + * @pos: the beginning of a previous or the first component in flowspec binary + * stream + * @end: the last valid byte in scanned flowspec binary stream + * + * This function returns a position to the beginning of the next component + * (to a component type byte) in flowspec binary stream or %NULL for the end. + */ +inline const byte * +flow6_next_part(const byte *pos, const byte *end) +{ + return flow_next_part(pos, end, 1); +} + + +/* + * Flowspec validation + */ + +static const char* flow_validated_state_str_[] = { + [FLOW_ST_UNKNOWN_COMPONENT] = "Unknown component", + [FLOW_ST_VALID] = "Valid", + [FLOW_ST_NOT_COMPLETE] = "Not complete", + [FLOW_ST_EXCEED_MAX_PREFIX_LENGTH] = "Exceed maximal prefix length", + [FLOW_ST_EXCEED_MAX_PREFIX_OFFSET] = "Exceed maximal prefix offset", + [FLOW_ST_EXCEED_MAX_VALUE_LENGTH] = "Exceed maximal value length", + [FLOW_ST_BAD_TYPE_ORDER] = "Bad component order", + [FLOW_ST_AND_BIT_SHOULD_BE_UNSET] = "The AND-bit should be unset", + [FLOW_ST_ZERO_BIT_SHOULD_BE_UNSED] = "The Zero-bit should be unset", + [FLOW_ST_DEST_PREFIX_REQUIRED] = "Destination prefix is missing", + [FLOW_ST_INVALID_TCP_FLAGS] = "TCP flags exceeding 0xfff", + [FLOW_ST_CANNOT_USE_DONT_FRAGMENT] = "Cannot use Don't fragment flag in IPv6 flow" +}; + +/** + * flow_validated_state_str - return a textual description of validation process + * @code: validation result + * + * This function return well described validation state in string. + */ +const char * +flow_validated_state_str(enum flow_validated_state code) +{ + return flow_validated_state_str_[code]; +} + +static const u8 flow4_max_value_length[] = { + [FLOW_TYPE_DST_PREFIX] = 0, + [FLOW_TYPE_SRC_PREFIX] = 0, + [FLOW_TYPE_IP_PROTOCOL] = 1, + [FLOW_TYPE_PORT] = 2, + [FLOW_TYPE_DST_PORT] = 2, + [FLOW_TYPE_SRC_PORT] = 2, + [FLOW_TYPE_ICMP_TYPE] = 1, + [FLOW_TYPE_ICMP_CODE] = 1, + [FLOW_TYPE_TCP_FLAGS] = 2, + [FLOW_TYPE_PACKET_LENGTH] = 2, + [FLOW_TYPE_DSCP] = 1, + [FLOW_TYPE_FRAGMENT] = 1 /* XXX */ +}; + +static const u8 flow6_max_value_length[] = { + [FLOW_TYPE_DST_PREFIX] = 0, + [FLOW_TYPE_SRC_PREFIX] = 0, + [FLOW_TYPE_NEXT_HEADER] = 1, + [FLOW_TYPE_PORT] = 2, + [FLOW_TYPE_DST_PORT] = 2, + [FLOW_TYPE_SRC_PORT] = 2, + [FLOW_TYPE_ICMP_TYPE] = 1, + [FLOW_TYPE_ICMP_CODE] = 1, + [FLOW_TYPE_TCP_FLAGS] = 2, + [FLOW_TYPE_PACKET_LENGTH] = 2, + [FLOW_TYPE_DSCP] = 1, + [FLOW_TYPE_FRAGMENT] = 1, /* XXX */ + [FLOW_TYPE_LABEL] = 4 +}; + +static u8 +flow_max_value_length(enum flow_type type, int ipv6) +{ + return ipv6 ? flow6_max_value_length[type] : flow4_max_value_length[type]; +} + +/** + * flow_check_cf_bmk_values - check value/bitmask part of flowspec component + * @fb: flow builder instance + * @neg: negation operand + * @val: value from value/mask pair + * @mask: bitmap mask from value/mask pair + * + * This function checks value/bitmask pair. If some problem will appear, the + * function calls cf_error() function with a textual description of reason + * to failing of validation. + */ +void +flow_check_cf_bmk_values(struct flow_builder *fb, u8 neg, u32 val, u32 mask) +{ + flow_check_cf_value_length(fb, val); + flow_check_cf_value_length(fb, mask); + + if (neg && !(val == 0 || val == mask)) + cf_error("For negation, value must be zero or bitmask"); + + if ((fb->this_type == FLOW_TYPE_TCP_FLAGS) && (mask & 0xf000)) + cf_error("Invalid mask 0x%x, must not exceed 0xfff", mask); + + if ((fb->this_type == FLOW_TYPE_FRAGMENT) && fb->ipv6 && (mask & 0x01)) + cf_error("Invalid mask 0x%x, bit 0 must be 0", mask); + + if (val & ~mask) + cf_error("Value 0x%x outside bitmask 0x%x", val, mask); +} + +/** + * flow_check_cf_value_length - check value by flowspec component type + * @fb: flow builder instance + * @val: value + * + * This function checks if the value is in range of component's type support. + * If some problem will appear, the function calls cf_error() function with + * a textual description of reason to failing of validation. + */ +void +flow_check_cf_value_length(struct flow_builder *fb, u32 val) +{ + enum flow_type t = fb->this_type; + u8 max = flow_max_value_length(t, fb->ipv6); + + if (t == FLOW_TYPE_DSCP && val > 0x3f) + cf_error("%s value %u out of range (0-63)", flow_type_str(t, fb->ipv6), val); + + if (max == 1 && (val > 0xff)) + cf_error("%s value %u out of range (0-255)", flow_type_str(t, fb->ipv6), val); + + if (max == 2 && (val > 0xffff)) + cf_error("%s value %u out of range (0-65535)", flow_type_str(t, fb->ipv6), val); +} + +static enum flow_validated_state +flow_validate(const byte *nlri, uint len, int ipv6) +{ + enum flow_type type = 0; + const byte *pos = nlri; + const byte *end = nlri + len; + int met_dst_pfx = 0; + + while (pos < end) + { + /* Check increasing type ordering */ + if (*pos <= type) + return FLOW_ST_BAD_TYPE_ORDER; + type = *pos++; + + switch (type) + { + case FLOW_TYPE_DST_PREFIX: + met_dst_pfx = 1; + /* Fall through */ + case FLOW_TYPE_SRC_PREFIX: + { + uint pxlen = *pos++; + if (pxlen > (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH)) + return FLOW_ST_EXCEED_MAX_PREFIX_LENGTH; + + uint bytes = BYTES(pxlen); + if (ipv6) + { + uint pxoffset = *pos++; + if (pxoffset > IP6_MAX_PREFIX_LENGTH || pxoffset > pxlen) + return FLOW_ST_EXCEED_MAX_PREFIX_OFFSET; + bytes -= pxoffset / 8; + } + pos += bytes; + + break; + } + + case FLOW_TYPE_LABEL: + if (!ipv6) + return FLOW_ST_UNKNOWN_COMPONENT; + /* fall through */ + case FLOW_TYPE_IP_PROTOCOL: /* == FLOW_TYPE_NEXT_HEADER */ + case FLOW_TYPE_PORT: + case FLOW_TYPE_DST_PORT: + case FLOW_TYPE_SRC_PORT: + case FLOW_TYPE_ICMP_TYPE: + case FLOW_TYPE_ICMP_CODE: + case FLOW_TYPE_TCP_FLAGS: + case FLOW_TYPE_PACKET_LENGTH: + case FLOW_TYPE_DSCP: + case FLOW_TYPE_FRAGMENT: + { + uint last = 0; + uint first = 1; + + while (!last) + { + /* + * 0 1 2 3 4 5 6 7 + * +---+---+---+---+---+---+---+---+ + * | e | a | len | 0 |lt |gt |eq | + * +---+---+---+---+---+---+---+---+ + * + * Numeric operator + */ + + last = isset_end(pos); + + /* The AND bit should in the first operator byte of a sequence */ + if (first && isset_and(pos)) + return FLOW_ST_AND_BIT_SHOULD_BE_UNSET; + + /* This bit should be zero */ + if (*pos & 0x08) + return FLOW_ST_ZERO_BIT_SHOULD_BE_UNSED; + + if (type == FLOW_TYPE_TCP_FLAGS || type == FLOW_TYPE_FRAGMENT) + { + /* + * 0 1 2 3 4 5 6 7 + * +---+---+---+---+---+---+---+---+ + * | e | a | len | 0 | 0 |not| m | + * +---+---+---+---+---+---+---+---+ + * + * Bitmask operand + */ + if (*pos & 0x04) + return FLOW_ST_ZERO_BIT_SHOULD_BE_UNSED; + } + + /* Value length of operator */ + uint len = get_value_length(pos); + if (len > flow_max_value_length(type, ipv6)) + return FLOW_ST_EXCEED_MAX_VALUE_LENGTH; + + /* TCP Flags component must not check highest nibble (just 12 valid bits) */ + if ((type == FLOW_TYPE_TCP_FLAGS) && (len == 2) && (pos[1] & 0xf0)) + return FLOW_ST_INVALID_TCP_FLAGS; + + /* Bit-7 must be 0 [draft-ietf-idr-flow-spec-v6] */ + if ((type == FLOW_TYPE_FRAGMENT) && ipv6 && (pos[1] & 0x01)) + return FLOW_ST_CANNOT_USE_DONT_FRAGMENT; + /* XXX: Could be a fragment component encoded in 2-bytes? */ + + pos += 1+len; + + if (pos > end && !last) + return FLOW_ST_NOT_COMPLETE; + + if (pos > (end+1)) + return FLOW_ST_NOT_COMPLETE; + + first = 0; + } + break; + } + default: + return FLOW_ST_UNKNOWN_COMPONENT; + } + } + + if (pos != end) + return FLOW_ST_NOT_COMPLETE; + + if (!ipv6 && !met_dst_pfx) + return FLOW_ST_DEST_PREFIX_REQUIRED; + + return FLOW_ST_VALID; +} + +/** + * flow4_validate - check untrustworthy IPv4 flowspec data stream + * @nlri: flowspec data stream without compressed encoded length value + * @len: length of @nlri + * + * This function checks meaningfulness of binary flowspec. It should return + * %FLOW_ST_VALID or %FLOW_ST_UNKNOWN_COMPONENT. If some problem appears, it + * returns some other %FLOW_ST_xxx state. + */ +inline enum flow_validated_state +flow4_validate(const byte *nlri, uint len) +{ + return flow_validate(nlri, len, 0); +} + +/** + * flow6_validate - check untrustworthy IPv6 flowspec data stream + * @nlri: flowspec binary stream without encoded length value + * @len: length of @nlri + * + * This function checks meaningfulness of binary flowspec. It should return + * %FLOW_ST_VALID or %FLOW_ST_UNKNOWN_COMPONENT. If some problem appears, it + * returns some other %FLOW_ST_xxx state. + */ +inline enum flow_validated_state +flow6_validate(const byte *nlri, uint len) +{ + return flow_validate(nlri, len, 1); +} + +/** + * flow4_validate_cf - validate flowspec data structure &net_addr_flow4 in parsing time + * @f: flowspec data structure &net_addr_flow4 + * + * Check if @f is valid flowspec data structure. Can call cf_error() function + * with a textual description of reason to failing of validation. + */ +void +flow4_validate_cf(net_addr_flow4 *f) +{ + enum flow_validated_state r = flow4_validate(flow4_first_part(f), flow_read_length(f->data)); + + if (r != FLOW_ST_VALID) + cf_error("Invalid flow route: %s", flow_validated_state_str(r)); +} + +/** + * flow6_validate_cf - validate flowspec data structure &net_addr_flow6 in parsing time + * @f: flowspec data structure &net_addr_flow6 + * + * Check if @f is valid flowspec data structure. Can call cf_error() function + * with a textual description of reason to failing of validation. + */ +void +flow6_validate_cf(net_addr_flow6 *f) +{ + enum flow_validated_state r = flow6_validate(flow6_first_part(f), flow_read_length(f->data)); + + if (r != FLOW_ST_VALID) + cf_error("Invalid flow route: %s", flow_validated_state_str(r)); +} + + +/* + * Flowspec Builder + */ + +/** + * flow_builder_init - constructor for flowspec builder instance + * @pool: memory pool + * + * This function prepares flowspec builder instance using memory pool @pool. + */ +struct flow_builder * +flow_builder_init(pool *pool) +{ + struct flow_builder *fb = mb_allocz(pool, sizeof(struct flow_builder)); + BUFFER_INIT(fb->data, pool, 4); + return fb; +} + +static int +is_stackable_type(enum flow_type type) +{ + switch (type) + { + case FLOW_TYPE_IP_PROTOCOL: + case FLOW_TYPE_PORT: + case FLOW_TYPE_DST_PORT: + case FLOW_TYPE_SRC_PORT: + case FLOW_TYPE_ICMP_TYPE: + case FLOW_TYPE_ICMP_CODE: + case FLOW_TYPE_TCP_FLAGS: + case FLOW_TYPE_PACKET_LENGTH: + case FLOW_TYPE_DSCP: + case FLOW_TYPE_FRAGMENT: + case FLOW_TYPE_LABEL: + return 1; + + default: + /* The unknown components are not stack-able in default */ + return 0; + } +} + +static int +builder_add_prepare(struct flow_builder *fb) +{ + if (fb->parts[fb->this_type].length) + { + if (fb->last_type != fb->this_type) + return 0; + + if (!is_stackable_type(fb->this_type)) + return 0; + } + else + { + fb->parts[fb->this_type].offset = fb->data.used; + } + + return 1; +} + +static void +builder_add_finish(struct flow_builder *fb) +{ + fb->parts[fb->this_type].length = fb->data.used - fb->parts[fb->this_type].offset; + flow_builder_set_type(fb, fb->this_type); +} + +static void +push_pfx_to_buffer(struct flow_builder *fb, u8 pxlen_bytes, byte *ip) +{ + for (int i = 0; i < pxlen_bytes; i++) + BUFFER_PUSH(fb->data) = *ip++; +} + +/** + * flow_builder4_add_pfx - add IPv4 prefix + * @fb: flowspec builder instance + * @n4: net address of type IPv4 + * + * This function add IPv4 prefix into flowspec builder instance. + */ +int +flow_builder4_add_pfx(struct flow_builder *fb, const net_addr_ip4 *n4) +{ + if (!builder_add_prepare(fb)) + return 0; + + ip4_addr ip4 = ip4_hton(n4->prefix); + + BUFFER_PUSH(fb->data) = fb->this_type; + BUFFER_PUSH(fb->data) = n4->pxlen; + push_pfx_to_buffer(fb, BYTES(n4->pxlen), (byte *) &ip4); + + builder_add_finish(fb); + return 1; +} + +/** + * flow_builder6_add_pfx - add IPv6 prefix + * @fb: flowspec builder instance + * @n6: net address of type IPv4 + * @pxoffset: prefix offset for @n6 + * + * This function add IPv4 prefix into flowspec builder instance. This function + * should return 1 for successful adding, otherwise returns %0. + */ +int +flow_builder6_add_pfx(struct flow_builder *fb, const net_addr_ip6 *n6, u32 pxoffset) +{ + if (!builder_add_prepare(fb)) + return 0; + + ip6_addr ip6 = ip6_hton(n6->prefix); + + BUFFER_PUSH(fb->data) = fb->this_type; + BUFFER_PUSH(fb->data) = n6->pxlen; + BUFFER_PUSH(fb->data) = pxoffset; + push_pfx_to_buffer(fb, BYTES(n6->pxlen) - (pxoffset / 8), ((byte *) &ip6) + (pxoffset / 8)); + + builder_add_finish(fb); + return 1; +} + +/** + * flow_builder_add_op_val - add operator/value pair + * @fb: flowspec builder instance + * @op: operator + * @value: value + * + * This function add operator/value pair as a part of a flowspec component. It + * is required to set appropriate flowspec component type using function + * flow_builder_set_type(). This function should return 1 for successful + * adding, otherwise returns 0. + */ +int +flow_builder_add_op_val(struct flow_builder *fb, byte op, u32 value) +{ + if (!builder_add_prepare(fb)) + return 0; + + if (fb->this_type == fb->last_type) + { + /* Remove the end-bit from last operand-value pair of the component */ + fb->data.data[fb->last_op_offset] &= 0x7f; + } + else + { + BUFFER_PUSH(fb->data) = fb->this_type; + } + + fb->last_op_offset = fb->data.used; + + /* Set the end-bit for operand-value pair of the component */ + op |= 0x80; + + if (value & 0xff00) + { + BUFFER_PUSH(fb->data) = op | 0x10; + put_u16(BUFFER_INC(fb->data, 2), value); + } + else + { + BUFFER_PUSH(fb->data) = op; + BUFFER_PUSH(fb->data) = (u8) value; + } + + builder_add_finish(fb); + return 1; +} + +/** + * flow_builder_add_val_mask - add value/bitmask pair + * @fb: flowspec builder instance + * @op: operator + * @value: value + * @mask: bitmask + * + * It is required to set appropriate flowspec component type using function + * flow_builder_set_type(). This function should return 1 for successful adding, + * otherwise returns 0. + */ +int +flow_builder_add_val_mask(struct flow_builder *fb, byte op, u32 value, u32 mask) +{ + u32 a = value & mask; + u32 b = ~value & mask; + + if (a) + { + flow_builder_add_op_val(fb, op ^ 0x01, a); + op |= FLOW_OP_AND; + } + + if (b) + flow_builder_add_op_val(fb, op ^ 0x02, b); + + return 1; +} + + +/** + * flow_builder_set_type - set type of next flowspec component + * @fb: flowspec builder instance + * @type: flowspec component type + * + * This function sets type of next flowspec component. It is necessary to call + * this function before each changing of adding flowspec component. + */ +void +flow_builder_set_type(struct flow_builder *fb, enum flow_type type) +{ + fb->last_type = fb->this_type; + fb->this_type = type; +} + +static ip4_addr +flow_read_ip4(const byte *px, uint pxlen) +{ + ip4_addr ip = IP4_NONE; + memcpy(&ip, px, BYTES(pxlen)); + return ip4_ntoh(ip); +} + +static ip6_addr +flow_read_ip6(const byte *px, uint pxlen, uint pxoffset) +{ + uint floor_offset = BYTES(pxoffset - (pxoffset % 8)); + uint ceil_len = BYTES(pxlen); + ip6_addr ip = IP6_NONE; + + memcpy(((byte *) &ip) + floor_offset, px, ceil_len - floor_offset); + + return ip6_ntoh(ip); +} + +static void +builder_write_parts(struct flow_builder *fb, byte *buf) +{ + for (int i = 1; i < FLOW_TYPE_MAX; i++) + { + if (fb->parts[i].length) + { + memcpy(buf, fb->data.data + fb->parts[i].offset, fb->parts[i].length); + buf += fb->parts[i].length; + } + } +} + +/** + * flow_builder4_finalize - assemble final flowspec data structure &net_addr_flow4 + * @fb: flowspec builder instance + * @lpool: linear memory pool + * + * This function returns final flowspec data structure &net_addr_flow4 allocated + * onto @lpool linear memory pool. + */ +net_addr_flow4 * +flow_builder4_finalize(struct flow_builder *fb, linpool *lpool) +{ + uint data_len = fb->data.used + (fb->data.used < 0xf0 ? 1 : 2); + net_addr_flow4 *f = lp_alloc(lpool, sizeof(struct net_addr_flow4) + data_len); + + ip4_addr prefix = IP4_NONE; + uint pxlen = 0; + + if (fb->parts[FLOW_TYPE_DST_PREFIX].length) + { + byte *p = fb->data.data + fb->parts[FLOW_TYPE_DST_PREFIX].offset + 1; + pxlen = *p++; + prefix = flow_read_ip4(p, pxlen); + } + *f = NET_ADDR_FLOW4(prefix, pxlen, data_len); + + builder_write_parts(fb, f->data + flow_write_length(f->data, fb->data.used)); + + return f; +} + +/** + * flow_builder6_finalize - assemble final flowspec data structure &net_addr_flow6 + * @fb: flowspec builder instance + * @lpool: linear memory pool for allocation of + * + * This function returns final flowspec data structure &net_addr_flow6 allocated + * onto @lpool linear memory pool. + */ +net_addr_flow6 * +flow_builder6_finalize(struct flow_builder *fb, linpool *lpool) +{ + uint data_len = fb->data.used + (fb->data.used < 0xf0 ? 1 : 2); + net_addr_flow6 *n = lp_alloc(lpool, sizeof(net_addr_flow6) + data_len); + + ip6_addr prefix = IP6_NONE; + uint pxlen = 0; + + if (fb->parts[FLOW_TYPE_DST_PREFIX].length) + { + byte *p = fb->data.data + fb->parts[FLOW_TYPE_DST_PREFIX].offset + 1; + pxlen = *p++; + uint pxoffset = *p++; + prefix = flow_read_ip6(p, pxlen, pxoffset); + } + *n = NET_ADDR_FLOW6(prefix, pxlen, data_len); + + builder_write_parts(fb, n->data + flow_write_length(n->data, fb->data.used)); + + return n; +} + +/** + * flow_builder_clear - flush flowspec builder instance for another flowspec creation + * @fb: flowspec builder instance + * + * This function flushes all data from builder but it maintains pre-allocated + * buffer space. + */ +void +flow_builder_clear(struct flow_builder *fb) +{ + BUFFER(byte) data; + BUFFER_FLUSH(fb->data); + + BUFFER_SHALLOW_COPY(data, fb->data); + memset(fb, 0, sizeof(struct flow_builder)); + BUFFER_SHALLOW_COPY(fb->data, data); +} + + +/* + * Net Formatting + */ + +/* Flowspec operators for [op, value]+ pairs */ + +static const char * +num_op_str(const byte *op) +{ + switch (*op & 0x07) + { + case FLOW_OP_TRUE: return "true"; + case FLOW_OP_EQ: return "="; + case FLOW_OP_GT: return ">"; + case FLOW_OP_GEQ: return ">="; + case FLOW_OP_LT: return "<"; + case FLOW_OP_LEQ: return "<="; + case FLOW_OP_NEQ: return "!="; + case FLOW_OP_FALSE: return "false"; + } + + return NULL; +} + +static uint +get_value(const byte *val, u8 len) +{ + switch (len) + { + case 1: return *val; + case 2: return get_u16(val); + case 4: return get_u32(val); + // No component may have length 8 + // case 8: return get_u64(val); + } + + return 0; +} + +static const char * +fragment_val_str(u8 val) +{ + switch (val) + { + case 1: return "dont_fragment"; + case 2: return "is_fragment"; + case 4: return "first_fragment"; + case 8: return "last_fragment"; + } + return "???"; +} + +static void +net_format_flow_ip(buffer *b, const byte *part, int ipv6) +{ + uint pxlen = *(part+1); + if (ipv6) + { + uint pxoffset = *(part+2); + if (pxoffset) + buffer_print(b, "%I6/%u offset %u; ", flow_read_ip6(part+3,pxlen,pxoffset), pxlen, pxoffset); + else + buffer_print(b, "%I6/%u; ", flow_read_ip6(part+3,pxlen,0), pxlen); + } + else + { + buffer_print(b, "%I4/%u; ", flow_read_ip4(part+2,pxlen), pxlen); + } +} + +static void +net_format_flow_num(buffer *b, const byte *part) +{ + const byte *last_op = NULL; + const byte *op = part+1; + uint val; + uint len; + uint first = 1; + + while (1) + { + if (!first) + { + /* XXX: I don't like this so complicated if-tree */ + if (!isset_and(op) && + ((num_op( op) == FLOW_OP_EQ) || (num_op( op) == FLOW_OP_GEQ)) && + ((num_op(last_op) == FLOW_OP_EQ) || (num_op(last_op) == FLOW_OP_LEQ))) + { + b->pos--; /* Remove last char (it is a space) */ + buffer_puts(b, ","); + } + else + { + buffer_puts(b, isset_and(op) ? "&& " : "|| "); + } + } + first = 0; + + len = get_value_length(op); + val = get_value(op+1, len); + + if (!isset_end(op) && !isset_and(op) && isset_and(op+1+len) && + (num_op(op) == FLOW_OP_GEQ) && (num_op(op+1+len) == FLOW_OP_LEQ)) + { + /* Display interval */ + buffer_print(b, "%u..", val); + op += 1 + len; + len = get_value_length(op); + val = get_value(op+1, len); + buffer_print(b, "%u", val); + } + else if (num_op(op) == FLOW_OP_EQ) + { + buffer_print(b, "%u", val); + } + else + { + buffer_print(b, "%s %u", num_op_str(op), val); + } + + if (isset_end(op)) + { + buffer_puts(b, "; "); + break; + } + else + { + buffer_puts(b, " "); + } + + last_op = op; + op += 1 + len; + } +} + +static void +net_format_flow_bitmask(buffer *b, const byte *part) +{ + const byte *op = part+1; + uint val; + uint len; + uint first = 1; + + while (1) + { + if (!first) + { + if (isset_and(op)) + { + b->pos--; /* Remove last char (it is a space) */ + buffer_puts(b, ","); + } + else + { + buffer_puts(b, "|| "); + } + } + first = 0; + + len = get_value_length(op); + val = get_value(op+1, len); + + /* + * Not Match Show + * ------------------ + * 0 0 !0/B + * 0 1 B/B + * 1 0 0/B + * 1 1 !B/B + */ + + if ((*op & 0x3) == 0x3 || (*op & 0x3) == 0) + buffer_puts(b, "!"); + + if (*part == FLOW_TYPE_FRAGMENT && (val == 1 || val == 2 || val == 4 || val == 8)) + buffer_print(b, "%s%s", ((*op & 0x1) ? "" : "!"), fragment_val_str(val)); + else + buffer_print(b, "0x%x/0x%x", ((*op & 0x1) ? val : 0), val); + + if (isset_end(op)) + { + buffer_puts(b, "; "); + break; + } + else + { + buffer_puts(b, " "); + } + + op += 1 + len; + } +} + +static uint +net_format_flow(char *buf, uint blen, const byte *data, uint dlen, int ipv6) +{ + buffer b = { + .start = buf, + .pos = buf, + .end = buf + blen, + }; + + const byte *part = flow_first_part(data); + *buf = 0; + + if (ipv6) + buffer_puts(&b, "flow6 { "); + else + buffer_puts(&b, "flow4 { "); + + while (part) + { + buffer_print(&b, "%s ", flow_type_str(*part, ipv6)); + + switch (*part) + { + case FLOW_TYPE_DST_PREFIX: + case FLOW_TYPE_SRC_PREFIX: + net_format_flow_ip(&b, part, ipv6); + break; + case FLOW_TYPE_IP_PROTOCOL: /* == FLOW_TYPE_NEXT_HEADER */ + case FLOW_TYPE_PORT: + case FLOW_TYPE_DST_PORT: + case FLOW_TYPE_SRC_PORT: + case FLOW_TYPE_ICMP_TYPE: + case FLOW_TYPE_ICMP_CODE: + case FLOW_TYPE_PACKET_LENGTH: + case FLOW_TYPE_DSCP: + net_format_flow_num(&b, part); + break; + case FLOW_TYPE_TCP_FLAGS: + case FLOW_TYPE_FRAGMENT: + case FLOW_TYPE_LABEL: + net_format_flow_bitmask(&b, part); + break; + } + + part = flow_next_part(part, data+dlen, ipv6); + } + + buffer_puts(&b, "}"); + + if (b.pos == b.end) + { + b.pos = b.start + MIN(blen - 6, strlen(b.start)); + buffer_puts(&b, " ...}"); + } + + return b.pos - b.start; +} + +/** + * flow4_net_format - stringify flowspec data structure &net_addr_flow4 + * @buf: pre-allocated buffer for writing a stringify net address flowspec + * @blen: free allocated space in @buf + * @f: flowspec data structure &net_addr_flow4 for stringify + * + * This function writes stringified @f into @buf. The function returns number + * of written chars. If final string is too large, the string will ends the with + * ' ...}' sequence and zero-terminator. + */ +uint +flow4_net_format(char *buf, uint blen, const net_addr_flow4 *f) +{ + return net_format_flow(buf, blen, f->data, f->length - sizeof(net_addr_flow4), 0); +} + +/** + * flow6_net_format - stringify flowspec data structure &net_addr_flow6 + * @buf: pre-allocated buffer for writing a stringify net address flowspec + * @blen: free allocated space in @buf + * @f: flowspec data structure &net_addr_flow4 for stringify + * + * This function writes stringified @f into @buf. The function returns number + * of written chars. If final string is too large, the string will ends the with + * ' ...}' sequence and zero-terminator. + */ +uint +flow6_net_format(char *buf, uint blen, const net_addr_flow6 *f) +{ + return net_format_flow(buf, blen, f->data, f->length - sizeof(net_addr_flow6), 1); +} diff --git a/lib/flowspec.h b/lib/flowspec.h new file mode 100644 index 00000000..fa90c70d --- /dev/null +++ b/lib/flowspec.h @@ -0,0 +1,152 @@ +/* + * BIRD Library -- Flow specification (RFC 5575) + * + * (c) 2016 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_FLOWSPEC_H_ +#define _BIRD_FLOWSPEC_H_ + +#include "nest/bird.h" +#include "lib/buffer.h" +#include "lib/net.h" + + +/* Flow component operators */ +#define FLOW_OP_TRUE 0x00 /* 0b000 */ +#define FLOW_OP_EQ 0x01 /* 0b001 */ +#define FLOW_OP_GT 0x02 /* 0b010 */ +#define FLOW_OP_GEQ 0x03 /* 0b011 */ +#define FLOW_OP_LT 0x04 /* 0b100 */ +#define FLOW_OP_LEQ 0x05 /* 0b101 */ +#define FLOW_OP_NEQ 0x06 /* 0b110 */ +#define FLOW_OP_FALSE 0x07 /* 0b111 */ + +#define FLOW_OP_OR 0x00 +#define FLOW_OP_AND 0x40 + + +/* Types of components in flowspec */ +enum flow_type { + FLOW_TYPE_DST_PREFIX = 1, + FLOW_TYPE_SRC_PREFIX = 2, + FLOW_TYPE_IP_PROTOCOL = 3, + FLOW_TYPE_NEXT_HEADER = 3, /* IPv6 */ + FLOW_TYPE_PORT = 4, + FLOW_TYPE_DST_PORT = 5, + FLOW_TYPE_SRC_PORT = 6, + FLOW_TYPE_ICMP_TYPE = 7, + FLOW_TYPE_ICMP_CODE = 8, + FLOW_TYPE_TCP_FLAGS = 9, + FLOW_TYPE_PACKET_LENGTH = 10, + FLOW_TYPE_DSCP = 11, /* DiffServ Code Point */ + FLOW_TYPE_FRAGMENT = 12, + FLOW_TYPE_LABEL = 13, /* IPv6 */ + FLOW_TYPE_MAX +}; + +const char *flow_type_str(enum flow_type type, int ipv6); + + +/* + * Length + */ + +uint flow_write_length(byte *data, u16 len); + +static inline u16 flow_hdr_length(const byte *data) +{ return ((*data & 0xf0) == 0xf0) ? 2 : 1; } + +static inline u16 flow_read_length(const byte *data) +{ return ((*data & 0xf0) == 0xf0) ? get_u16(data) & 0x0fff : *data; } + +static inline u16 flow4_get_length(const net_addr_flow4 *f) +{ return f->length - sizeof(net_addr_flow4); } + +static inline u16 flow6_get_length(const net_addr_flow6 *f) +{ return f->length - sizeof(net_addr_flow6); } + +static inline void flow4_set_length(net_addr_flow4 *f, u16 len) +{ f->length = sizeof(net_addr_flow4) + flow_write_length(f->data, len) + len; } + +static inline void flow6_set_length(net_addr_flow6 *f, u16 len) +{ f->length = sizeof(net_addr_flow6) + flow_write_length(f->data, len) + len; } + + +/* + * Iterators + */ + +const byte *flow4_first_part(const net_addr_flow4 *f); +const byte *flow6_first_part(const net_addr_flow6 *f); +const byte *flow4_next_part(const byte *pos, const byte *end); +const byte *flow6_next_part(const byte *pos, const byte *end); + + +/* + * Flowspec Builder + */ + +/* A data structure for keep a state of flow builder */ +struct flow_builder { + BUFFER_(byte) data; + enum flow_type this_type; + enum flow_type last_type; + u16 last_op_offset; /* Position of last operator in data.data */ + int ipv6; + struct { + u16 offset; /* Beginning of a component */ + u16 length; /* Length of a component */ + } parts[FLOW_TYPE_MAX]; /* Indexing all components */ +}; + +struct flow_builder *flow_builder_init(pool *pool); +void flow_builder_clear(struct flow_builder *fb); +void flow_builder_set_type(struct flow_builder *fb, enum flow_type p); +int flow_builder4_add_pfx(struct flow_builder *fb, const net_addr_ip4 *n4); +int flow_builder6_add_pfx(struct flow_builder *fb, const net_addr_ip6 *n6, u32 offset); +int flow_builder_add_op_val(struct flow_builder *fb, byte op, u32 value); +int flow_builder_add_val_mask(struct flow_builder *fb, byte op, u32 value, u32 mask); +net_addr_flow4 *flow_builder4_finalize(struct flow_builder *fb, linpool *lpool); +net_addr_flow6 *flow_builder6_finalize(struct flow_builder *fb, linpool *lpool); + + +/* + * Validation + */ + +/* Results of validation Flow specification */ +enum flow_validated_state { + FLOW_ST_UNKNOWN_COMPONENT, + FLOW_ST_VALID, + FLOW_ST_NOT_COMPLETE, + FLOW_ST_EXCEED_MAX_PREFIX_LENGTH, + FLOW_ST_EXCEED_MAX_PREFIX_OFFSET, + FLOW_ST_EXCEED_MAX_VALUE_LENGTH, + FLOW_ST_BAD_TYPE_ORDER, + FLOW_ST_AND_BIT_SHOULD_BE_UNSET, + FLOW_ST_ZERO_BIT_SHOULD_BE_UNSED, + FLOW_ST_DEST_PREFIX_REQUIRED, + FLOW_ST_INVALID_TCP_FLAGS, + FLOW_ST_CANNOT_USE_DONT_FRAGMENT +}; + +const char *flow_validated_state_str(enum flow_validated_state code); +enum flow_validated_state flow4_validate(const byte *nlri, uint len); +enum flow_validated_state flow6_validate(const byte *nlri, uint len); +void flow_check_cf_value_length(struct flow_builder *fb, u32 expr); +void flow_check_cf_bmk_values(struct flow_builder *fb, u8 neg, u32 val, u32 mask); +void flow4_validate_cf(net_addr_flow4 *f); +void flow6_validate_cf(net_addr_flow6 *f); + + +/* + * Net Formatting + */ + +uint flow4_net_format(char *buf, uint blen, const net_addr_flow4 *f); +uint flow6_net_format(char *buf, uint blen, const net_addr_flow6 *f); + +#endif /* _BIRD_FLOWSPEC_H_ */ diff --git a/lib/flowspec_test.c b/lib/flowspec_test.c new file mode 100644 index 00000000..dd71dc7b --- /dev/null +++ b/lib/flowspec_test.c @@ -0,0 +1,639 @@ +/* + * BIRD Library -- Flow specification (RFC 5575) Tests + * + * (c) 2016 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "lib/flowspec.h" + +#define NET_ADDR_FLOW4_(what,prefix,pxlen,data_) \ + do \ + { \ + what = alloca(sizeof(net_addr_flow4) + 128); \ + *what = NET_ADDR_FLOW4(prefix, pxlen, sizeof(data_)); \ + memcpy(what->data, &(data_), sizeof(data_)); \ + } while(0) + +#define NET_ADDR_FLOW6_(what,prefix,pxlen,data_) \ + do \ + { \ + what = alloca(sizeof(net_addr_flow6) + 128); \ + *what = NET_ADDR_FLOW6(prefix, pxlen, sizeof(data_)); \ + memcpy(what->data, &(data_), sizeof(data_)); \ + } while(0) + +static int +t_read_length(void) +{ + byte data[] = { 0xcc, 0xcc, 0xcc }; + + for (uint expect = 0; expect < 0xf0; expect++) + { + *data = expect; + uint get = flow_read_length(data); + bt_assert_msg(get == expect, "Testing get length 0x%02x (get 0x%02x)", expect, get); + } + + for (uint expect = 0; expect <= 0xfff; expect++) + { + put_u16(data, expect | 0xf000); + uint get = flow_read_length(data); + bt_assert_msg(get == expect, "Testing get length 0x%03x (get 0x%03x)", expect, get); + } + + return 1; +} + +static int +t_write_length(void) +{ + byte data[] = { 0xcc, 0xcc, 0xcc }; + + for (uint expect = 0; expect <= 0xfff; expect++) + { + uint offset = flow_write_length(data, expect); + + uint set = (expect < 0xf0) ? *data : (get_u16(data) & 0x0fff); + bt_assert_msg(set == expect, "Testing set length 0x%03x (set 0x%03x)", expect, set); + bt_assert(offset == (expect < 0xf0 ? 1 : 2)); + } + + return 1; +} + +static int +t_first_part(void) +{ + net_addr_flow4 *f; + NET_ADDR_FLOW4_(f, ip4_build(10,0,0,1), 24, ((byte[]) { 0x00, 0x00, 0xab })); + + const byte *under240 = &f->data[1]; + const byte *above240 = &f->data[2]; + + /* Case 0x00 0x00 */ + bt_assert(flow4_first_part(f) == NULL); + + /* Case 0x01 0x00 */ + f->data[0] = 0x01; + bt_assert(flow4_first_part(f) == under240); + + /* Case 0xef 0x00 */ + f->data[0] = 0xef; + bt_assert(flow4_first_part(f) == under240); + + /* Case 0xf0 0x00 */ + f->data[0] = 0xf0; + bt_assert(flow4_first_part(f) == NULL); + + /* Case 0xf0 0x01 */ + f->data[1] = 0x01; + bt_assert(flow4_first_part(f) == above240); + + /* Case 0xff 0xff */ + f->data[0] = 0xff; + f->data[1] = 0xff; + bt_assert(flow4_first_part(f) == above240); + + return 1; +} + +static int +t_iterators4(void) +{ + net_addr_flow4 *f; + NET_ADDR_FLOW4_(f, ip4_build(5,6,7,0), 24, ((byte[]) { + 25, /* Length */ + FLOW_TYPE_DST_PREFIX, 24, 5, 6, 7, + FLOW_TYPE_SRC_PREFIX, 32, 10, 11, 12, 13, + FLOW_TYPE_IP_PROTOCOL, 0x81, 0x06, + FLOW_TYPE_PORT, 0x03, 0x89, 0x45, 0x8b, 0x91, 0x1f, 0x90, + FLOW_TYPE_TCP_FLAGS, 0x80, 0x55, + })); + + const byte *start = f->data; + const byte *p1_dst_pfx = &f->data[1]; + const byte *p2_src_pfx = &f->data[6]; + const byte *p3_ip_proto = &f->data[12]; + const byte *p4_port = &f->data[15]; + const byte *p5_tcp_flags = &f->data[23]; + const byte *end = &f->data[25]; + + bt_assert(flow_read_length(f->data) == (end-start)); + bt_assert(flow4_first_part(f) == p1_dst_pfx); + + bt_assert(flow4_next_part(p1_dst_pfx, end) == p2_src_pfx); + bt_assert(flow4_next_part(p2_src_pfx, end) == p3_ip_proto); + bt_assert(flow4_next_part(p3_ip_proto, end) == p4_port); + bt_assert(flow4_next_part(p4_port, end) == p5_tcp_flags); + bt_assert(flow4_next_part(p5_tcp_flags, end) == NULL); + + return 1; +} + +static int +t_iterators6(void) +{ + net_addr_flow6 *f; + NET_ADDR_FLOW6_(f, ip6_build(0,0,0x12345678,0x9a000000), 64, ((byte[]) { + 26, /* Length */ + FLOW_TYPE_DST_PREFIX, 0x68, 0x40, 0x12, 0x34, 0x56, 0x78, 0x9a, + FLOW_TYPE_SRC_PREFIX, 0x08, 0x0, 0xc0, + FLOW_TYPE_NEXT_HEADER, 0x81, 0x06, + FLOW_TYPE_PORT, 0x03, 0x89, 0x45, 0x8b, 0x91, 0x1f, 0x90, + FLOW_TYPE_LABEL, 0x80, 0x55, + })); + + const byte *start = f->data; + const byte *p1_dst_pfx = &f->data[1]; + const byte *p2_src_pfx = &f->data[9]; + const byte *p3_next_header = &f->data[13]; + const byte *p4_port = &f->data[16]; + const byte *p5_label = &f->data[24]; + const byte *end = &f->data[26]; + + bt_assert(flow_read_length(f->data) == (end-start)); + bt_assert(flow6_first_part(f) == p1_dst_pfx); + + bt_assert(flow6_next_part(p1_dst_pfx, end) == p2_src_pfx); + bt_assert(flow6_next_part(p2_src_pfx, end) == p3_next_header); + bt_assert(flow6_next_part(p3_next_header, end) == p4_port); + bt_assert(flow6_next_part(p4_port, end) == p5_label); + bt_assert(flow6_next_part(p5_label, end) == NULL); + + return 1; +} + +static int +t_validation4(void) +{ + enum flow_validated_state res; + + byte nlri1[] = { + FLOW_TYPE_DST_PREFIX, 24, 5, 6, 7, + FLOW_TYPE_SRC_PREFIX, 32, 10, 11, 12, 13, + FLOW_TYPE_IP_PROTOCOL, 0x81, 0x06, + FLOW_TYPE_PORT, 0x03, 0x89, 0x45, 0x8b, 0x91, 0x1f, 0x90, + FLOW_TYPE_TCP_FLAGS, 0x80, 0x55, + }; + + /* Isn't included destination prefix */ + res = flow4_validate(nlri1, 0); + bt_assert(res == FLOW_ST_DEST_PREFIX_REQUIRED); + res = flow4_validate(&nlri1[5], sizeof(nlri1)-5); + bt_assert(res == FLOW_ST_DEST_PREFIX_REQUIRED); + + /* Valid / Not Complete testing */ + uint valid_sizes[] = {5, 11, 14, 22, 25, 0}; + uint valid_idx = 0; + for (uint size = 1; size <= sizeof(nlri1); size++) + { + res = flow4_validate(nlri1, size); + bt_debug("size %u, result: %s\n", size, flow_validated_state_str(res)); + if (size == valid_sizes[valid_idx]) + { + valid_idx++; + bt_assert(res == FLOW_ST_VALID); + } + else + { + bt_assert(res == FLOW_ST_NOT_COMPLETE); + } + } + + /* Misc err tests */ + + struct tset { + enum flow_validated_state expect; + char *description; + u16 size; + byte *nlri; + }; + +#define TS(type, msg, data) ((struct tset) {type, msg, sizeof(data), (data)}) + struct tset tset[] = { + TS( + FLOW_ST_EXCEED_MAX_PREFIX_LENGTH, + "33-length IPv4 prefix", + ((byte []) { + FLOW_TYPE_DST_PREFIX, 33, 5, 6, 7, 8, 9 + }) + ), + TS( + FLOW_ST_BAD_TYPE_ORDER, + "Bad flowspec component type order", + ((byte []) { + FLOW_TYPE_SRC_PREFIX, 32, 10, 11, 12, 13, + FLOW_TYPE_DST_PREFIX, 24, 5, 6, 7, + }) + ), + TS( + FLOW_ST_BAD_TYPE_ORDER, + "Doubled destination prefix component", + ((byte []) { + FLOW_TYPE_DST_PREFIX, 24, 5, 6, 7, + FLOW_TYPE_DST_PREFIX, 24, 5, 6, 7, + }) + ), + TS( + FLOW_ST_AND_BIT_SHOULD_BE_UNSET, + "The first numeric operator has set the AND bit", + ((byte []) { + FLOW_TYPE_PORT, 0x43, 0x89, 0x45, 0x8b, 0x91, 0x1f, 0x90, + }) + ), + TS( + FLOW_ST_ZERO_BIT_SHOULD_BE_UNSED, + "Set zero bit in operand to one", + ((byte []) { + FLOW_TYPE_IP_PROTOCOL, 0x89, 0x06, + }) + ), + TS( + FLOW_ST_UNKNOWN_COMPONENT, + "Unknown component of type number 13", + ((byte []) { + FLOW_TYPE_DST_PREFIX, 24, 5, 6, 7, + FLOW_TYPE_TCP_FLAGS, 0x80, 0x55, + 13 /*something new*/, 0x80, 0x55, + }) + ), + }; +#undef TS + + for (uint tcase = 0; tcase < ARRAY_SIZE(tset); tcase++) + { + res = flow4_validate(tset[tcase].nlri, tset[tcase].size); + bt_assert_msg(res == tset[tcase].expect, "Assertion (%s == %s) %s", flow_validated_state_str(res), flow_validated_state_str(tset[tcase].expect), tset[tcase].description); + } + + return 1; +} + +static int +t_validation6(void) +{ + enum flow_validated_state res; + + byte nlri1[] = { + FLOW_TYPE_DST_PREFIX, 103, 61, 0x01, 0x12, 0x34, 0x56, 0x78, 0x98, + FLOW_TYPE_SRC_PREFIX, 8, 0, 0xc0, + FLOW_TYPE_NEXT_HEADER, 0x81, 0x06, + FLOW_TYPE_PORT, 0x03, 0x89, 0x45, 0x8b, 0x91, 0x1f, 0x90, + FLOW_TYPE_LABEL, 0x80, 0x55, + }; + + /* Isn't included destination prefix */ + res = flow6_validate(nlri1, 0); + bt_assert(res == FLOW_ST_VALID); + + /* Valid / Not Complete testing */ + uint valid_sizes[] = {0, 9, 13, 16, 24, 27, 0}; + uint valid_idx = 0; + for (uint size = 0; size <= sizeof(nlri1); size++) + { + res = flow6_validate(nlri1, size); + bt_debug("size %u, result: %s\n", size, flow_validated_state_str(res)); + if (size == valid_sizes[valid_idx]) + { + valid_idx++; + bt_assert(res == FLOW_ST_VALID); + } + else + { + bt_assert(res == FLOW_ST_NOT_COMPLETE); + } + } + + /* Misc err tests */ + + struct tset { + enum flow_validated_state expect; + char *description; + u16 size; + byte *nlri; + }; + +#define TS(type, msg, data) ((struct tset) {type, msg, sizeof(data), (data)}) + struct tset tset[] = { + TS( + FLOW_ST_EXCEED_MAX_PREFIX_LENGTH, + "129-length IPv6 prefix", + ((byte []) { + FLOW_TYPE_DST_PREFIX, 129, 64, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x12 + }) + ), + TS( + FLOW_ST_EXCEED_MAX_PREFIX_OFFSET, + "Prefix offset is higher than prefix length", + ((byte []) { + FLOW_TYPE_DST_PREFIX, 48, 64, 0x40, 0x12, 0x34 + }) + ), + TS( + FLOW_ST_BAD_TYPE_ORDER, + "Bad flowspec component type order", + ((byte []) { + FLOW_TYPE_NEXT_HEADER, 0x81, 0x06, + FLOW_TYPE_SRC_PREFIX, 8, 0, 0xc0, + }) + ), + TS( + FLOW_ST_BAD_TYPE_ORDER, + "Doubled destination prefix component", + ((byte []) { + FLOW_TYPE_DST_PREFIX, 103, 61, 0x01, 0x12, 0x34, 0x56, 0x78, 0x98, + FLOW_TYPE_DST_PREFIX, 103, 61, 0x01, 0x12, 0x34, 0x56, 0x78, 0x98, + }) + ), + TS( + FLOW_ST_AND_BIT_SHOULD_BE_UNSET, + "The first numeric operator has set the AND bit", + ((byte []) { + FLOW_TYPE_PORT, 0x43, 0x89, 0x45, 0x8b, 0x91, 0x1f, 0x90 + }) + ), + TS( + FLOW_ST_ZERO_BIT_SHOULD_BE_UNSED, + "Set zero bit in operand to one", + ((byte []) { + FLOW_TYPE_NEXT_HEADER, 0x89, 0x06 + }) + ), + TS( + FLOW_ST_VALID, + "Component of type number 13 (Label) is well-known in IPv6", + ((byte []) { + FLOW_TYPE_LABEL, 0x80, 0x55 + }) + ), + TS( + FLOW_ST_UNKNOWN_COMPONENT, + "Unknown component of type number 14", + ((byte []) { + FLOW_TYPE_LABEL, 0x80, 0x55, + 14 /*something new*/, 0x80, 0x55, + }) + ) + }; +#undef TS + + for (uint tcase = 0; tcase < ARRAY_SIZE(tset); tcase++) + { + res = flow6_validate(tset[tcase].nlri, tset[tcase].size); + bt_assert_msg(res == tset[tcase].expect, "Assertion (%s == %s) %s", flow_validated_state_str(res), flow_validated_state_str(tset[tcase].expect), tset[tcase].description); + } + + return 1; +} + + + +/* + * Builder tests + */ + +static int +t_builder4(void) +{ + resource_init(); + + struct flow_builder *fb = flow_builder_init(&root_pool); + linpool *lp = lp_new_default(&root_pool); + + /* Expectation */ + + static byte nlri[] = { + 25, + FLOW_TYPE_DST_PREFIX, 24, 5, 6, 7, + FLOW_TYPE_SRC_PREFIX, 32, 10, 11, 12, 13, + FLOW_TYPE_IP_PROTOCOL, 0x80, 0x06, + FLOW_TYPE_PORT, 0x03, 0x89, 0x45, 0x8b, 0x91, 0x1f, 0x90, + FLOW_TYPE_TCP_FLAGS, 0x80, 0x55 + }; + + net_addr_flow4 *expect; + NET_ADDR_FLOW4_(expect, ip4_build(5, 6, 7, 0), 24, nlri); + + /* Normal order */ + + net_addr_ip4 n1; + net_fill_ip4((net_addr *) &n1, ip4_build(5,6,7,0), 24); + flow_builder_set_type(fb, FLOW_TYPE_DST_PREFIX); + flow_builder4_add_pfx(fb, &n1); + + net_addr_ip4 n2; + net_fill_ip4((net_addr *) &n2, ip4_build(10,11,12,13), 32); + flow_builder_set_type(fb, FLOW_TYPE_SRC_PREFIX); + flow_builder4_add_pfx(fb, &n2); + + flow_builder_set_type(fb, FLOW_TYPE_IP_PROTOCOL); + flow_builder_add_op_val(fb, 0, 0x06); + + flow_builder_set_type(fb, FLOW_TYPE_PORT); + flow_builder_add_op_val(fb, 0x03, 0x89); + flow_builder_add_op_val(fb, 0x45, 0x8b); + flow_builder_add_op_val(fb, 0x01, 0x1f90); + + /* Try put a component twice time */ + flow_builder_set_type(fb, FLOW_TYPE_IP_PROTOCOL); + flow_builder_add_op_val(fb, 0, 0x06); + + flow_builder_set_type(fb, FLOW_TYPE_TCP_FLAGS); + flow_builder_add_op_val(fb, 0, 0x55); + + net_addr_flow4 *res = flow_builder4_finalize(fb, lp); + + bt_assert(memcmp(res, expect, expect->length) == 0); + + /* Reverse order */ + + flow_builder_clear(fb); + + flow_builder_set_type(fb, FLOW_TYPE_TCP_FLAGS); + flow_builder_add_op_val(fb, 0, 0x55); + + flow_builder_set_type(fb, FLOW_TYPE_PORT); + flow_builder_add_op_val(fb, 0x03, 0x89); + flow_builder_add_op_val(fb, 0x45, 0x8b); + flow_builder_add_op_val(fb, 0x01, 0x1f90); + + flow_builder_set_type(fb, FLOW_TYPE_IP_PROTOCOL); + flow_builder_add_op_val(fb, 0, 0x06); + + net_fill_ip4((net_addr *) &n2, ip4_build(10,11,12,13), 32); + flow_builder_set_type(fb, FLOW_TYPE_SRC_PREFIX); + flow_builder4_add_pfx(fb, &n2); + + net_fill_ip4((net_addr *) &n1, ip4_build(5,6,7,0), 24); + flow_builder_set_type(fb, FLOW_TYPE_DST_PREFIX); + flow_builder4_add_pfx(fb, &n1); + + bt_assert(memcmp(res, expect, expect->length) == 0); + + return 1; +} + +static int +t_builder6(void) +{ + net_addr_ip6 ip; + + resource_init(); + linpool *lp = lp_new_default(&root_pool); + struct flow_builder *fb = flow_builder_init(&root_pool); + fb->ipv6 = 1; + + /* Expectation */ + + byte nlri[] = { + 27, + FLOW_TYPE_DST_PREFIX, 103, 61, 0x01, 0x12, 0x34, 0x56, 0x78, 0x98, + FLOW_TYPE_SRC_PREFIX, 8, 0, 0xc0, + FLOW_TYPE_NEXT_HEADER, 0x80, 0x06, + FLOW_TYPE_PORT, 0x03, 0x89, 0x45, 0x8b, 0x91, 0x1f, 0x90, + FLOW_TYPE_LABEL, 0x80, 0x55, + }; + + net_addr_flow6 *expect; + NET_ADDR_FLOW6_(expect, ip6_build(0, 1, 0x12345678, 0x98000000), 103, nlri); + + /* Normal order */ + + net_fill_ip6((net_addr *) &ip, ip6_build(0, 1, 0x12345678, 0x98000000), 103); + flow_builder_set_type(fb, FLOW_TYPE_DST_PREFIX); + flow_builder6_add_pfx(fb, &ip, 61); + + /* Try put a component twice time */ + net_fill_ip6((net_addr *) &ip, ip6_build(0, 1, 0x12345678, 0x98000000), 103); + flow_builder_set_type(fb, FLOW_TYPE_DST_PREFIX); + bt_assert(flow_builder6_add_pfx(fb, &ip, 61) == 0); + + net_fill_ip6((net_addr *) &ip, ip6_build(0xc0000000,0,0,0), 8); + flow_builder_set_type(fb, FLOW_TYPE_SRC_PREFIX); + flow_builder6_add_pfx(fb, &ip, 0); + + flow_builder_set_type(fb, FLOW_TYPE_NEXT_HEADER); + flow_builder_add_op_val(fb, 0, 0x06); + + flow_builder_set_type(fb, FLOW_TYPE_PORT); + flow_builder_add_op_val(fb, 0x03, 0x89); + flow_builder_add_op_val(fb, 0x45, 0x8b); + flow_builder_add_op_val(fb, 0x01, 0x1f90); + + flow_builder_set_type(fb, FLOW_TYPE_LABEL); + flow_builder_add_op_val(fb, 0, 0x55); + + net_addr_flow6 *res = flow_builder6_finalize(fb, lp); + bt_assert(memcmp(res, expect, expect->length) == 0); + + /* Reverse order */ + + flow_builder_clear(fb); + fb->ipv6 = 1; + + flow_builder_set_type(fb, FLOW_TYPE_LABEL); + flow_builder_add_op_val(fb, 0, 0x55); + + flow_builder_set_type(fb, FLOW_TYPE_PORT); + flow_builder_add_op_val(fb, 0x03, 0x89); + flow_builder_add_op_val(fb, 0x45, 0x8b); + flow_builder_add_op_val(fb, 0x01, 0x1f90); + + flow_builder_set_type(fb, FLOW_TYPE_NEXT_HEADER); + flow_builder_add_op_val(fb, 0, 0x06); + + net_fill_ip6((net_addr *) &ip, ip6_build(0xc0000000,0,0,0), 8); + flow_builder_set_type(fb, FLOW_TYPE_SRC_PREFIX); + flow_builder6_add_pfx(fb, &ip, 0); + + net_fill_ip6((net_addr *) &ip, ip6_build(0, 1, 0x12345678, 0x98000000), 103); + flow_builder_set_type(fb, FLOW_TYPE_DST_PREFIX); + flow_builder6_add_pfx(fb, &ip, 61); + + res = flow_builder6_finalize(fb, lp); + bt_assert(memcmp(res, expect, expect->length) == 0); + + return 1; +} + +static int +t_formatting4(void) +{ + char b[1024]; + + byte nlri[] = { + 0, + FLOW_TYPE_DST_PREFIX, 0x08, 10, + FLOW_TYPE_IP_PROTOCOL, 0x81, 23, + FLOW_TYPE_DST_PORT, 0x02, 24, 0x44, 30, 0x03, 40, 0x45, 50, 0x03, 60, 0x45, 70, 0x01, 80, 0xc3, 90, + FLOW_TYPE_SRC_PORT, 0x02, 24, 0x44, 0x1e, 0x01, 0x28, 0x01, 0x32, 0x03, 0x3c, 0x45, 0x46, 0x81, 0x50, + FLOW_TYPE_ICMP_TYPE, 0x81, 0x50, + FLOW_TYPE_ICMP_CODE, 0x81, 0x5a, + FLOW_TYPE_TCP_FLAGS, 0x01, 0x03, 0xc2, 0x0c, + FLOW_TYPE_PACKET_LENGTH, 0x03, 0, 0xd5, 0xff, 0xff, + FLOW_TYPE_DSCP, 0x81, 63, + FLOW_TYPE_FRAGMENT, 0x01, 0x01, 0x82, 0x02 + }; + *nlri = (u8) sizeof(nlri); + + net_addr_flow4 *input; + NET_ADDR_FLOW4_(input, ip4_build(5, 6, 7, 0), 24, nlri); + + const char *expect = "flow4 { dst 10.0.0.0/8; proto 23; dport > 24 && < 30 || 40..50,60..70,80 && >= 90; sport > 24 && < 30 || 40,50,60..70,80; icmp type 80; icmp code 90; tcp flags 0x3/0x3,0x0/0xc; length 0..65535; dscp 63; fragment dont_fragment || !is_fragment; }"; + + bt_assert(flow4_net_format(b, sizeof(b), input) == strlen(expect)); + bt_debug(" expect: '%s',\n output: '%s'\n", expect, b); + bt_assert(strcmp(b, expect) == 0); + + return 1; +} + +static int +t_formatting6(void) +{ + char b[1024]; + + byte nlri[] = { + 0, + FLOW_TYPE_DST_PREFIX, 103, 61, 0x01, 0x12, 0x34, 0x56, 0x78, 0x98, + FLOW_TYPE_SRC_PREFIX, 8, 0, 0xc0, + FLOW_TYPE_NEXT_HEADER, 0x81, 0x06, + FLOW_TYPE_PORT, 0x03, 20, 0x45, 40, 0x91, 0x01, 0x11, + FLOW_TYPE_LABEL, 0xa0, 0x12, 0x34, 0x56, 0x78, + }; + *nlri = (u8) sizeof(nlri); + + net_addr_flow6 *input; + NET_ADDR_FLOW6_(input, ip6_build(0, 1, 0x12345678, 0x98000000), 103, nlri); + + const char *expect = "flow6 { dst ::1:1234:5678:9800:0/103 offset 61; src c000::/8; next header 6; port 20..40,273; label !0x0/0x12345678; }"; + + bt_assert(flow6_net_format(b, sizeof(b), input) == strlen(expect)); + bt_debug(" expect: '%s',\n output: '%s'\n", expect, b); + bt_assert(strcmp(b, expect) == 0); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_read_length, "Testing get NLRI length"); + bt_test_suite(t_write_length, "Testing set NLRI length"); + bt_test_suite(t_first_part, "Searching first part in net_addr_flow"); + bt_test_suite(t_iterators4, "Testing iterators (IPv4)"); + bt_test_suite(t_iterators6, "Testing iterators (IPv6)"); + bt_test_suite(t_validation4, "Testing validation (IPv4)"); + bt_test_suite(t_validation6, "Testing validation (IPv6)"); + bt_test_suite(t_builder4, "Inserting components into existing Flow Specification (IPv4)"); + bt_test_suite(t_builder6, "Inserting components into existing Flow Specification (IPv6)"); + bt_test_suite(t_formatting4, "Formatting Flow Specification (IPv4) into text representation"); + bt_test_suite(t_formatting6, "Formatting Flow Specification (IPv6) into text representation"); + + return bt_exit_value(); +} @@ -194,4 +194,40 @@ #define HASH_WALK_FILTER_END } while (0) + +static inline void +mem_hash_init(u64 *h) +{ + *h = 0x001047d54778bcafULL; +} + +static inline void +mem_hash_mix(u64 *h, void *p, uint s) +{ + const u64 multiplier = 0xb38bc09a61202731ULL; + const char *pp = p; + uint i; + + for (i=0; i<s/4; i++) + *h = *h * multiplier + ((const u32 *)pp)[i]; + + for (i=s & ~0x3; i<s; i++) + *h = *h * multiplier + pp[i]; +} + +static inline uint +mem_hash_value(u64 *h) +{ + return ((*h >> 32) ^ (*h & 0xffffffff)); +} + +static inline uint +mem_hash(void *p, uint s) +{ + static u64 h; + mem_hash_init(&h); + mem_hash_mix(&h, p, s); + return mem_hash_value(&h); +} + #endif diff --git a/lib/hash_test.c b/lib/hash_test.c new file mode 100644 index 00000000..59beb7c0 --- /dev/null +++ b/lib/hash_test.c @@ -0,0 +1,305 @@ +/* + * BIRD Library -- Hash Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#undef LOCAL_DEBUG + +#include "test/birdtest.h" + +#include "lib/hash.h" + +struct test_node { + struct test_node *next; /* Hash chain */ + u32 key; +}; + +#define TEST_KEY(n) n->key +#define TEST_NEXT(n) n->next +#define TEST_EQ(n1,n2) n1 == n2 +#define TEST_FN(n) (n) ^ u32_hash((n)) +#define TEST_ORDER 13 +#define TEST_PARAMS /TEST_ORDER, *2, 2, 2, TEST_ORDER, 20 +#define TEST_REHASH test_rehash + +HASH_DEFINE_REHASH_FN(TEST, struct test_node); + +HASH(struct test_node) hash; +struct pool *my_pool; + +#define MAX_NUM (1 << TEST_ORDER) + +struct test_node nodes[MAX_NUM]; + +static void +print_rate_of_fulfilment(void) +{ + int i; + int num_stacked_items = 0; + + for (i = 0; i < MAX_NUM; i++) + if (!hash.data[i]) + num_stacked_items++; + + double percent_stacked_items = ((double)num_stacked_items/(double)MAX_NUM)*100.; + bt_debug("%d (%.2f %%) chained of %d hashes \n", num_stacked_items, percent_stacked_items, MAX_NUM); +} + +#ifdef LOCAL_DEBUG +static void +dump_nodes(void) +{ + int i; + for (i = 0; i < MAX_NUM; i++) + bt_debug("nodes[%3d] is at address %14p has .key %3d, .next %14p \n", i, &nodes[i], nodes[i].key, nodes[i].next); +} +#endif + +static void +init_hash_(uint order) +{ + resource_init(); + my_pool = rp_new(&root_pool, "Test pool"); + + HASH_INIT(hash, my_pool, order); + + int i; + for (i = 0; i < MAX_NUM; i++) + { + nodes[i].key = i; + nodes[i].next = NULL; + } + + bt_debug("MAX_NUM %d \n", MAX_NUM); +} + +static void +init_hash(void) +{ + init_hash_(TEST_ORDER); +} + +static void +validate_filled_hash(void) +{ + int i; + struct test_node *node; + for (i = 0; i < MAX_NUM; i++) + { + node = HASH_FIND(hash, TEST, nodes[i].key); + bt_assert_msg(node->key == nodes[i].key, "Hash should be filled, to find (%p) the node[%d] (%p) with .key = %u, .next %p", node, i, &nodes[i], nodes[i].key, nodes[i].next); + } + + print_rate_of_fulfilment(); +} + +static void +validate_empty_hash(void) +{ + int i; + struct test_node *node; + for (i = 0; i < MAX_NUM; i++) + { + node = HASH_FIND(hash, TEST, nodes[i].key); + bt_assert_msg(node == NULL, "Hash should be empty, to find (%p) the node[%d] (%p) with .key %u, .next %p", node, i, &nodes[i], nodes[i].key, nodes[i].next); + } +} + +static void +fill_hash(void) +{ + int i; + struct test_node *node; + + for (i = 0; i < MAX_NUM; i++) + { + nodes[i].key = i; + node = &nodes[i]; + HASH_INSERT(hash, TEST, node); + } +} + +static int +t_insert_find(void) +{ + init_hash(); + fill_hash(); + validate_filled_hash(); + + return 1; +} + +static int +t_insert_find_random(void) +{ + init_hash(); + + int i; + struct test_node *node; + for (i = 0; i < MAX_NUM; i++) + { + nodes[i].key = bt_random(); + node = &nodes[i]; + HASH_INSERT(hash, TEST, node); + } + + validate_filled_hash(); + + return 1; +} + +static int +t_insert2_find(void) +{ + init_hash_(1); + + int i; + struct test_node *node; + for (i = 0; i < MAX_NUM; i++) + { + nodes[i].key = i; + node = &nodes[i]; + HASH_INSERT2(hash, TEST, my_pool, node); + } + bt_assert_msg(hash.order != 1, "The hash should auto-resize from order 2^1. The order of the hash is 2^%u.", hash.order); + + validate_filled_hash(); + + return 1; +} + +static int +t_walk(void) +{ + init_hash(); + fill_hash(); + + uint i; + uint check[MAX_NUM]; + for (i = 0; i < MAX_NUM; i++) + check[i] = 0; + + HASH_WALK(hash, next, n) + { + check[n->key]++; + } + HASH_WALK_END; + + for (i = 0; i < MAX_NUM; i++) + bt_assert(check[i] == 1); + + return 1; +} + +static int +t_walk_delsafe_delete(void) +{ + init_hash(); + fill_hash(); + + HASH_WALK_DELSAFE(hash, next, n) + { + HASH_DELETE(hash, TEST, n->key); + } + HASH_WALK_DELSAFE_END; + + validate_empty_hash(); + + return 1; +} + +static int +t_walk_delsafe_remove(void) +{ + init_hash(); + fill_hash(); + + HASH_WALK_DELSAFE(hash, next, n) + { + HASH_REMOVE(hash, TEST, n); + } + HASH_WALK_DELSAFE_END; + + validate_empty_hash(); + + return 1; +} + +static int +t_walk_delsafe_delete2(void) +{ + init_hash(); + fill_hash(); + + HASH_WALK_DELSAFE(hash, next, n) + { + HASH_DELETE2(hash, TEST, my_pool, n->key); + } + HASH_WALK_DELSAFE_END; + + validate_empty_hash(); + + return 1; +} + +static int +t_walk_delsafe_remove2(void) +{ + init_hash(); + fill_hash(); + + HASH_WALK_DELSAFE(hash, next, n) + { + HASH_REMOVE2(hash, TEST, my_pool, n); + } + HASH_WALK_DELSAFE_END; + + validate_empty_hash(); + + return 1; +} + +static int +t_walk_filter(void) +{ + init_hash(); + fill_hash(); + + uint i; + uint check[MAX_NUM]; + for (i = 0; i < MAX_NUM; i++) + check[i] = 0; + + HASH_WALK_FILTER(hash, next, n, m) + { + bt_assert(n == *m); + check[n->key]++; + } + HASH_WALK_FILTER_END; + + for (i = 0; i < MAX_NUM; i++) + bt_assert(check[i] == 1); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_insert_find, "HASH_INSERT and HASH_FIND"); + bt_test_suite(t_insert_find_random, "HASH_INSERT pseudo-random keys and HASH_FIND"); + bt_test_suite(t_insert2_find, "HASH_INSERT2 and HASH_FIND. HASH_INSERT2 is HASH_INSERT and a smart auto-resize function"); + bt_test_suite(t_walk, "HASH_WALK"); + bt_test_suite(t_walk_delsafe_delete, "HASH_WALK_DELSAFE and HASH_DELETE"); + bt_test_suite(t_walk_delsafe_delete2, "HASH_WALK_DELSAFE and HASH_DELETE2. HASH_DELETE2 is HASH_DELETE and smart auto-resize function"); + bt_test_suite(t_walk_delsafe_remove, "HASH_WALK_DELSAFE and HASH_REMOVE"); + bt_test_suite(t_walk_delsafe_remove2, "HASH_WALK_DELSAFE and HASH_REMOVE2. HASH_REMOVE2 is HASH_REMOVE and smart auto-resize function"); + bt_test_suite(t_walk_filter, "HASH_WALK_FILTER"); + + return bt_exit_value(); +} diff --git a/lib/heap_test.c b/lib/heap_test.c new file mode 100644 index 00000000..c04a0450 --- /dev/null +++ b/lib/heap_test.c @@ -0,0 +1,186 @@ +/* + * BIRD Library -- Universal Heap Macros Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "sysdep/config.h" +#include "lib/heap.h" + +#define MAX_NUM 1000 +#define SPECIAL_KEY -3213 + +#define MY_CMP(x, y) ((x) < (y)) + +#define MY_HEAP_SWAP(heap,a,b,t) \ + do { \ + bt_debug("swap(%u %u) ", a, b); \ + HEAP_SWAP(heap,a,b,t); \ + } while(0) + +static int heap[MAX_NUM+1]; +static uint num; + +/* + * A valid heap must follow these rules: + * - `num >= 0` + * - `heap[i] >= heap[i / 2]` for each `i` in `[2, num]` + */ +static int +is_heap_valid(int heap[], uint num) +{ + uint i; + + if (num > MAX_NUM) + return 0; + + for (i = 2; i <= num; i++) + if (heap[i] < heap[i / 2]) + return 0; + + return 1; +} + +static void +show_heap(void) +{ + uint i; + bt_debug("\n"); + bt_debug("numbers %u; ", num); + for (i = 0; i <= num; i++) + bt_debug("%d ", heap[i]); + bt_debug(is_heap_valid(heap, num) ? "OK" : "NON-VALID HEAP!"); + bt_debug("\n"); +} + +static void +init_heap(void) +{ + uint i; + num = 0; + heap[0] = SPECIAL_KEY; /* heap[0] should be unused */ + for (i = 1; i <= MAX_NUM; i++) + heap[i] = 0; +} + +static int +t_heap_insert(void) +{ + uint i; + + init_heap(); + + for (i = MAX_NUM; i >= 1; i--) + { + bt_debug("ins %u at pos %u ", i, MAX_NUM - i); + heap[MAX_NUM - i + 1] = i; + HEAP_INSERT(heap, ++num, int, MY_CMP, MY_HEAP_SWAP); + show_heap(); + bt_assert(is_heap_valid(heap, num)); + } + + return 1; +} + +static int +t_heap_increase_decrease(void) +{ + uint i; + + t_heap_insert(); + + for (i = 1; i <= MAX_NUM; i++) + { + if ((int)i > heap[i]) + { + bt_debug("inc %u ", i); + heap[i] = i; + HEAP_INCREASE(heap, num, int, MY_CMP, MY_HEAP_SWAP, i); + } + else if ((int)i < heap[i]) + { + bt_debug("dec %u ", i); + heap[i] = i; + HEAP_INCREASE(heap, num, int, MY_CMP, MY_HEAP_SWAP, i); + } + show_heap(); + bt_assert(is_heap_valid(heap, num)); + } + + return 1; +} + +static int +t_heap_delete(void) +{ + uint i; + + t_heap_insert(); + + for (i = 1; i <= num; i++) + { + bt_debug("del at pos %u ", i); + HEAP_DELETE(heap, num, int, MY_CMP, MY_HEAP_SWAP, i); + show_heap(); + bt_assert(is_heap_valid(heap, num)); + } + + return 1; +} + +static int +t_heap_0(void) +{ + init_heap(); + t_heap_insert(); + t_heap_increase_decrease(); + t_heap_delete(); + + return heap[0] == SPECIAL_KEY; +} + +static int +t_heap_insert_random(void) +{ + int i, j; + int expected[MAX_NUM+1]; + + init_heap(); + + for (i = 1; i <= MAX_NUM; i++) + { + heap[i] = expected[i] = bt_random(); + HEAP_INSERT(heap, ++num, int, MY_CMP, MY_HEAP_SWAP); + show_heap(); + bt_assert(is_heap_valid(heap, num)); + } + + for (i = 1; i <= MAX_NUM; i++) + for (j = 1; j <= MAX_NUM; j++) + if(expected[i] == heap[j]) + break; + else if (j == MAX_NUM) + { + show_heap(); + bt_abort_msg("Did not find a number %d in heap.", expected[i]); + } + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_heap_insert, "Inserting a descending sequence of numbers (the worst case)"); + bt_test_suite(t_heap_insert_random, "Inserting pseudo-random numbers"); + bt_test_suite(t_heap_increase_decrease, "Increasing/Decreasing"); + bt_test_suite(t_heap_delete, "Deleting"); + bt_test_suite(t_heap_0, "Is a heap[0] really unused?"); + + return bt_exit_value(); +} diff --git a/lib/idm.c b/lib/idm.c new file mode 100644 index 00000000..66e311c6 --- /dev/null +++ b/lib/idm.c @@ -0,0 +1,76 @@ +/* + * BIRD Library -- ID Map + * + * (c) 2013--2015 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2013--2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <stdlib.h> + +#include "nest/bird.h" +#include "lib/idm.h" +#include "lib/resource.h" +#include "lib/string.h" + + +void +idm_init(struct idm *m, pool *p, uint size) +{ + m->pos = 0; + m->used = 1; + m->size = size; + m->data = mb_allocz(p, m->size * sizeof(u32)); + + /* ID 0 is reserved */ + m->data[0] = 1; +} + +static inline int u32_cto(uint x) { return ffs(~x) - 1; } + +u32 +idm_alloc(struct idm *m) +{ + uint i, j; + + for (i = m->pos; i < m->size; i++) + if (m->data[i] != 0xffffffff) + goto found; + + /* If we are at least 7/8 full, expand */ + if (m->used > (m->size * 28)) + { + m->size *= 2; + m->data = mb_realloc(m->data, m->size * sizeof(u32)); + memset(m->data + i, 0, (m->size - i) * sizeof(u32)); + goto found; + } + + for (i = 0; i < m->pos; i++) + if (m->data[i] != 0xffffffff) + goto found; + + ASSERT(0); + +found: + ASSERT(i < 0x8000000); + + m->pos = i; + j = u32_cto(m->data[i]); + + m->data[i] |= (1 << j); + m->used++; + return 32 * i + j; +} + +void +idm_free(struct idm *m, u32 id) +{ + uint i = id / 32; + uint j = id % 32; + + ASSERT((i < m->size) && (m->data[i] & (1 << j))); + m->data[i] &= ~(1 << j); + m->used--; +} diff --git a/lib/idm.h b/lib/idm.h new file mode 100644 index 00000000..e3380cce --- /dev/null +++ b/lib/idm.h @@ -0,0 +1,25 @@ +/* + * BIRD Library -- ID Map + * + * (c) 2013--2015 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2013--2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_IDM_H_ +#define _BIRD_IDM_H_ + +struct idm +{ + u32 *data; + u32 pos; + u32 used; + u32 size; +}; + +void idm_init(struct idm *m, pool *p, uint size); +u32 idm_alloc(struct idm *m); +void idm_free(struct idm *m, u32 id); + +#endif @@ -58,7 +58,7 @@ ip6_mkmask(uint n) return a; } -int +uint ip6_masklen(ip6_addr *a) { int i, j, n; @@ -67,12 +67,12 @@ ip6_masklen(ip6_addr *a) if (a->addr[i] != ~0U) { j = u32_masklen(a->addr[i]); - if (j < 0) + if (j == 255) return j; n += j; while (++i < 4) if (a->addr[i]) - return -1; + return 255; break; } @@ -306,7 +306,7 @@ ip6_pton(const char *a, ip6_addr *o) if (*a == ':' && a[1]) a++; - else if (*a == '.' && (i == 6 || i < 6 && hfil >= 0)) + else if (*a == '.' && (i == 6 || (i < 6 && hfil >= 0))) { /* Embedded IPv4 address */ ip4_addr x; if (!ip4_pton(start, &x)) @@ -9,7 +9,7 @@ #ifndef _BIRD_IP_H_ #define _BIRD_IP_H_ -#include "lib/endian.h" +#include "sysdep/unix/endian.h" #include "lib/string.h" #include "lib/bitops.h" #include "lib/unaligned.h" @@ -31,6 +31,13 @@ #define IP4_NONE _MI4(0) #define IP6_NONE _MI6(0,0,0,0) +#define IP4_MAX_PREFIX_LENGTH 32 +#define IP6_MAX_PREFIX_LENGTH 128 + +#define IP4_MAX_TEXT_LENGTH 15 /* "255.255.255.255" */ +#define IP6_MAX_TEXT_LENGTH 39 /* "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff" */ +#define IPA_MAX_TEXT_LENGTH 39 + #define IP4_MIN_MTU 576 #define IP6_MIN_MTU 1280 @@ -41,19 +48,6 @@ #define UDP_HEADER_LENGTH 8 -#ifdef IPV6 -#define MAX_PREFIX_LENGTH 128 -#define BITS_PER_IP_ADDRESS 128 -#define STD_ADDRESS_P_LENGTH 39 -#define SIZE_OF_IP_HEADER 40 -#else -#define MAX_PREFIX_LENGTH 32 -#define BITS_PER_IP_ADDRESS 32 -#define STD_ADDRESS_P_LENGTH 15 -#define SIZE_OF_IP_HEADER 24 -#endif - - #ifdef DEBUGGING typedef struct ip4_addr { @@ -84,8 +78,6 @@ typedef struct ip6_addr { #define _I3(a) ((a).addr[3]) -#ifdef IPV6 - /* Structure ip_addr may contain both IPv4 and IPv6 addresses */ typedef ip6_addr ip_addr; #define IPA_NONE IP6_NONE @@ -99,24 +91,10 @@ typedef ip6_addr ip_addr; #define ipa_to_u32(x) ip4_to_u32(ipa_to_ip4(x)) #define ipa_is_ip4(a) ip6_is_v4mapped(a) +#define ipa_is_ip6(a) (! ip6_is_v4mapped(a)) -#else - -/* Provisionary ip_addr definition same as ip4_addr */ -typedef ip4_addr ip_addr; -#define IPA_NONE IP4_NONE - -#define ipa_from_ip4(x) x -#define ipa_from_ip6(x) IPA_NONE -#define ipa_from_u32(x) ipa_from_ip4(ip4_from_u32(x)) - -#define ipa_to_ip4(x) x -#define ipa_to_ip6(x) IP6_NONE -#define ipa_to_u32(x) ip4_to_u32(ipa_to_ip4(x)) - -#define ipa_is_ip4(a) 1 - -#endif +#define IPA_NONE4 ipa_from_ip4(IP4_NONE) +#define IPA_NONE6 ipa_from_ip6(IP6_NONE) /* @@ -181,7 +159,6 @@ static inline ip6_addr ip6_not(ip6_addr a) { return _MI6(~_I0(a), ~_I1(a), ~_I2(a), ~_I3(a)); } -#ifdef IPV6 #define ipa_equal(x,y) ip6_equal(x,y) #define ipa_zero(x) ip6_zero(x) #define ipa_nonzero(x) ip6_nonzero(x) @@ -189,19 +166,8 @@ static inline ip6_addr ip6_not(ip6_addr a) #define ipa_or(x,y) ip6_or(x,y) #define ipa_xor(x,y) ip6_xor(x,y) #define ipa_not(x) ip6_not(x) -#else -#define ipa_equal(x,y) ip4_equal(x,y) -#define ipa_zero(x) ip4_zero(x) -#define ipa_nonzero(x) ip4_nonzero(x) -#define ipa_and(x,y) ip4_and(x,y) -#define ipa_or(x,y) ip4_or(x,y) -#define ipa_xor(x,y) ip4_xor(x,y) -#define ipa_not(x) ip4_not(x) -#endif - -#ifdef IPV6 /* * A zero address is either a token for invalid/unused, or the prefix of default * routes. These functions should be used in the second case, where both IPv4 @@ -214,26 +180,12 @@ static inline int ipa_zero2(ip_addr a) static inline int ipa_nonzero2(ip_addr a) { return _I0(a) || _I1(a) || ((_I2(a) != 0) && (_I2(a) != 0xffff)) || _I3(a); } -#else -#define ipa_zero2(x) ip4_zero(x) -#define ipa_nonzero2(x) ip4_nonzero(x) -#endif - /* * Hash and compare functions */ -static inline uint ip4_hash(ip4_addr a) -{ - /* Returns a 16-bit value */ - u32 x = _I(a); - x ^= x >> 16; - x ^= x << 10; - return x & 0xffff; -} - -static inline u32 ip4_hash32(ip4_addr a) +static inline u32 ip4_hash(ip4_addr a) { /* Returns a 32-bit value, although low-order bits are not mixed */ u32 x = _I(a); @@ -242,14 +194,7 @@ static inline u32 ip4_hash32(ip4_addr a) return x; } -static inline uint ip6_hash(ip6_addr a) -{ - /* Returns a 16-bit hash key */ - u32 x = _I0(a) ^ _I1(a) ^ _I2(a) ^ _I3(a); - return (x ^ (x >> 16) ^ (x >> 8)) & 0xffff; -} - -static inline u32 ip6_hash32(ip6_addr a) +static inline u32 ip6_hash(ip6_addr a) { /* Returns a 32-bit hash key, although low-order bits are not mixed */ u32 x = _I0(a) ^ _I1(a) ^ _I2(a) ^ _I3(a); @@ -261,16 +206,8 @@ static inline int ip4_compare(ip4_addr a, ip4_addr b) int ip6_compare(ip6_addr a, ip6_addr b); - -#ifdef IPV6 #define ipa_hash(x) ip6_hash(x) -#define ipa_hash32(x) ip6_hash32(x) #define ipa_compare(x,y) ip6_compare(x,y) -#else -#define ipa_hash(x) ip4_hash(x) -#define ipa_hash32(x) ip4_hash32(x) -#define ipa_compare(x,y) ip4_compare(x,y) -#endif /* @@ -301,14 +238,10 @@ static inline int ip6_is_link_local(ip6_addr a) static inline int ip6_is_v4mapped(ip6_addr a) { return _I0(a) == 0 && _I1(a) == 0 && _I2(a) == 0xffff; } -#ifdef IPV6 #define ipa_classify(x) ip6_classify(&(x)) #define ipa_is_link_local(x) ip6_is_link_local(x) -#else -#define ipa_classify(x) ip4_classify(x) -#define ipa_is_link_local(x) 0 -#endif +/* XXXX remove */ static inline int ipa_classify_net(ip_addr a) { return ipa_zero2(a) ? (IADDR_HOST | SCOPE_UNIVERSE) : ipa_classify(a); } @@ -320,11 +253,11 @@ static inline int ipa_classify_net(ip_addr a) static inline ip4_addr ip4_mkmask(uint n) { return _MI4(u32_mkmask(n)); } -static inline int ip4_masklen(ip4_addr a) +static inline uint ip4_masklen(ip4_addr a) { return u32_masklen(_I(a)); } ip6_addr ip6_mkmask(uint n); -int ip6_masklen(ip6_addr *a); +uint ip6_masklen(ip6_addr *a); /* ipX_pxlen() requires that x != y */ static inline uint ip4_pxlen(ip4_addr a, ip4_addr b) @@ -346,6 +279,18 @@ static inline u32 ip4_getbit(ip4_addr a, uint pos) static inline u32 ip6_getbit(ip6_addr a, uint pos) { return a.addr[pos / 32] & (0x80000000 >> (pos % 32)); } +static inline u32 ip4_setbit(ip4_addr *a, uint pos) +{ return _I(*a) |= (0x80000000 >> pos); } + +static inline u32 ip6_setbit(ip6_addr *a, uint pos) +{ return a->addr[pos / 32] |= (0x80000000 >> (pos % 32)); } + +static inline u32 ip4_clrbit(ip4_addr *a, uint pos) +{ return _I(*a) &= ~(0x80000000 >> pos); } + +static inline u32 ip6_clrbit(ip6_addr *a, uint pos) +{ return a->addr[pos / 32] &= ~(0x80000000 >> (pos % 32)); } + static inline ip4_addr ip4_opposite_m1(ip4_addr a) { return _MI4(_I(a) ^ 1); } @@ -360,21 +305,8 @@ static inline ip6_addr ip6_opposite_m2(ip6_addr a) ip4_addr ip4_class_mask(ip4_addr ad); -#ifdef IPV6 -#define ipa_mkmask(x) ip6_mkmask(x) -#define ipa_masklen(x) ip6_masklen(&x) -#define ipa_pxlen(x,y) ip6_pxlen(x,y) -#define ipa_getbit(x,n) ip6_getbit(x,n) #define ipa_opposite_m1(x) ip6_opposite_m1(x) #define ipa_opposite_m2(x) ip6_opposite_m2(x) -#else -#define ipa_mkmask(x) ip4_mkmask(x) -#define ipa_masklen(x) ip4_masklen(x) -#define ipa_pxlen(x,y) ip4_pxlen(x,y) -#define ipa_getbit(x,n) ip4_getbit(x,n) -#define ipa_opposite_m1(x) ip4_opposite_m1(x) -#define ipa_opposite_m2(x) ip4_opposite_m2(x) -#endif /* @@ -393,14 +325,33 @@ static inline ip6_addr ip6_hton(ip6_addr a) static inline ip6_addr ip6_ntoh(ip6_addr a) { return _MI6(ntohl(_I0(a)), ntohl(_I1(a)), ntohl(_I2(a)), ntohl(_I3(a))); } -#ifdef IPV6 -#define ipa_hton(x) x = ip6_hton(x) -#define ipa_ntoh(x) x = ip6_ntoh(x) -#else -#define ipa_hton(x) x = ip4_hton(x) -#define ipa_ntoh(x) x = ip4_ntoh(x) -#endif +#define MPLS_MAX_LABEL_STACK 8 +typedef struct mpls_label_stack { + uint len; + u32 stack[MPLS_MAX_LABEL_STACK]; +} mpls_label_stack; + +static inline int +mpls_get(const char *buf, int buflen, u32 *stack) +{ + for (int i=0; (i<MPLS_MAX_LABEL_STACK) && (i*4+3 < buflen); i++) + { + u32 s = get_u32(buf + i*4); + stack[i] = s >> 12; + if (s & 0x100) + return i+1; + } + return -1; +} + +static inline int +mpls_put(char *buf, int len, u32 *stack) +{ + for (int i=0; i<len; i++) + put_u32(buf + i*4, stack[i] << 12 | (i+1 == len ? 0x100 : 0)); + return len*4; +} /* * Unaligned data access (in network order) @@ -431,15 +382,6 @@ static inline void * put_ip6(void *buf, ip6_addr a) return buf+16; } -// XXXX these functions must be redesigned or removed -#ifdef IPV6 -#define get_ipa(x) get_ip6(x) -#define put_ipa(x,y) put_ip6(x,y) -#else -#define get_ipa(x) get_ip4(x) -#define put_ipa(x,y) put_ip4(x,y) -#endif - /* * Binary/text form conversions @@ -457,34 +399,11 @@ static inline char * ip6_ntox(ip6_addr a, char *b) int ip4_pton(const char *a, ip4_addr *o); int ip6_pton(const char *a, ip6_addr *o); -// XXXX these functions must be redesigned or removed -#ifdef IPV6 -#define ipa_ntop(x,y) ip6_ntop(x,y) -#define ipa_ntox(x,y) ip6_ntox(x,y) -#define ipa_pton(x,y) ip6_pton(x,y) -#else -#define ipa_ntop(x,y) ip4_ntop(x,y) -#define ipa_ntox(x,y) ip4_ntox(x,y) -#define ipa_pton(x,y) ip4_pton(x,y) -#endif - /* * Miscellaneous */ -// XXXX review this - -#define ip_is_prefix(a,l) (!ipa_nonzero(ipa_and(a, ipa_not(ipa_mkmask(l))))) -#define ipa_in_net(x,n,p) (ipa_zero(ipa_and(ipa_xor((n),(x)),ipa_mkmask(p)))) -#define net_in_net(n1,l1,n2,l2) (((l1) >= (l2)) && (ipa_zero(ipa_and(ipa_xor((n1),(n2)),ipa_mkmask(l2))))) - char *ip_scope_text(uint); -struct prefix { - ip_addr addr; - uint len; -}; - - #endif diff --git a/lib/ip_test.c b/lib/ip_test.c new file mode 100644 index 00000000..fd70c957 --- /dev/null +++ b/lib/ip_test.c @@ -0,0 +1,161 @@ +/* + * BIRD Library -- IP address functions Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" + +#include "lib/ip.h" + +#define IP4_MAX_LEN 16 + +static int +test_ipa_pton(void *out_, const void *in_, const void *expected_out_) +{ + ip_addr *out = out_; + const char *in = in_; + const ip_addr *expected_out = expected_out_; + + if (ipa_is_ip4(*expected_out)) + { + ip4_addr ip4; + bt_assert(ip4_pton(in, &ip4)); + *out = ipa_from_ip4(ip4); + } + else + { + bt_assert(ip6_pton(in, out)); + /* ip_addr == ip6_addr */ + } + + return ipa_equal(*out, *expected_out); +} + +static int +t_ip4_pton(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "192.168.1.128", + .out = & ipa_build4(192, 168, 1, 128), + }, + { + .in = "255.255.255.255", + .out = & ipa_build4(255, 255, 255, 255), + }, + { + .in = "0.0.0.0", + .out = & ipa_build4(0, 0, 0, 0), + }, + }; + + return bt_assert_batch(test_vectors, test_ipa_pton, bt_fmt_str, bt_fmt_ipa); +} + +static int +t_ip6_pton(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "2001:0db8:0000:0000:0000:0000:1428:57ab", + .out = & ipa_build6(0x20010DB8, 0x00000000, 0x00000000, 0x142857AB), + }, + { + .in = "2001:0db8:0000:0000:0000::1428:57ab", + .out = & ipa_build6(0x20010DB8, 0x00000000, 0x00000000, 0x142857AB), + }, + { + .in = "2001:0db8::1428:57ab", + .out = & ipa_build6(0x20010DB8, 0x00000000, 0x00000000, 0x142857AB), + }, + { + .in = "2001:db8::1428:57ab", + .out = & ipa_build6(0x20010DB8, 0x00000000, 0x00000000, 0x142857AB), + }, + { + .in = "::1", + .out = & ipa_build6(0x00000000, 0x00000000, 0x00000000, 0x00000001), + }, + { + .in = "::", + .out = & ipa_build6(0x00000000, 0x00000000, 0x00000000, 0x00000000), + }, + { + .in = "2605:2700:0:3::4713:93e3", + .out = & ipa_build6(0x26052700, 0x00000003, 0x00000000, 0x471393E3), + }, + }; + + return bt_assert_batch(test_vectors, test_ipa_pton, bt_fmt_str, bt_fmt_ipa); +} + +static int +test_ipa_ntop(void *out_, const void *in_, const void *expected_out_) +{ + char *out = out_; + const ip_addr *in = in_; + const char *expected_out = expected_out_; + + if (ipa_is_ip4(*in)) + ip4_ntop(ipa_to_ip4(*in), out); + else + ip6_ntop(ipa_to_ip6(*in), out); + + int result = strncmp(out, expected_out, ipa_is_ip4(*in) ? IP4_MAX_TEXT_LENGTH : IP6_MAX_TEXT_LENGTH) == 0; + return result; +} + +static int +t_ip4_ntop(void) +{ + struct bt_pair test_vectors[] = { + { + .in = & ipa_build4(192, 168, 1, 128), + .out = "192.168.1.128", + }, + { + .in = & ipa_build4(255, 255, 255, 255), + .out = "255.255.255.255", + }, + { + .in = & ipa_build4(0, 0, 0, 1), + .out = "0.0.0.1", + }, + }; + + return bt_assert_batch(test_vectors, test_ipa_ntop, bt_fmt_ipa, bt_fmt_str); +} + +static int +t_ip6_ntop(void) +{ + struct bt_pair test_vectors[] = { + { + .in = & ipa_build6(0x20010DB8, 0x00000000, 0x00000000, 0x142857AB), + .out = "2001:db8::1428:57ab", + }, + { + .in = & ipa_build6(0x26052700, 0x00000003, 0x00000000, 0x471393E3), + .out = "2605:2700:0:3::4713:93e3", + }, + }; + + return bt_assert_batch(test_vectors, test_ipa_ntop, bt_fmt_ipa, bt_fmt_str); +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_ip4_pton, "Converting IPv4 string to ip4_addr struct"); + bt_test_suite(t_ip6_pton, "Converting IPv6 string to ip6_addr struct"); + bt_test_suite(t_ip4_ntop, "Converting ip4_addr struct to IPv4 string"); + bt_test_suite(t_ip6_ntop, "Converting ip6_addr struct to IPv6 string"); + + return bt_exit_value(); +} + diff --git a/lib/lists.c b/lib/lists.c index 12ef3cc6..4a48d3b7 100644 --- a/lib/lists.c +++ b/lib/lists.c @@ -158,3 +158,15 @@ add_tail_list(list *to, list *l) q->next = &to->tail_node; to->tail = q; } + +LIST_INLINE uint +list_length(list *l) +{ + uint len = 0; + node *n; + + WALK_LIST(n, *l) + len++; + + return len; +} diff --git a/lib/lists.h b/lib/lists.h index 51856b05..066eafbb 100644 --- a/lib/lists.h +++ b/lib/lists.h @@ -52,7 +52,10 @@ typedef union list { /* In fact two overlayed nodes */ #define WALK_LIST2(n,nn,list,pos) \ for(nn=(list).head; NODE_VALID(nn) && (n=SKIP_BACK(typeof(*n),pos,nn)); nn=nn->next) #define WALK_LIST_DELSAFE(n,nxt,list) \ - for(n=HEAD(list); nxt=NODE_NEXT(n); n=(void *) nxt) + for(n=HEAD(list); nxt=NODE_NEXT(n); n=(void *) nxt) +#define WALK_LIST2_DELSAFE(n,nn,nxt,list,pos) \ + for(nn=HEAD(list); (nxt=nn->next) && (n=SKIP_BACK(typeof(*n),pos,nn)); nn=nxt) + /* WALK_LIST_FIRST supposes that called code removes each processed node */ #define WALK_LIST_FIRST(n,list) \ while(n=HEAD(list), (NODE (n))->next) @@ -77,6 +80,7 @@ void rem_node(node *); void add_tail_list(list *, list *); void init_list(list *); void insert_node(node *, node *); +uint list_length(list *); #endif #endif diff --git a/lib/lists_test.c b/lib/lists_test.c new file mode 100644 index 00000000..f26a88e2 --- /dev/null +++ b/lib/lists_test.c @@ -0,0 +1,287 @@ +/* + * BIRD Library -- Linked Lists Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "lib/lists.h" + +#define MAX_NUM 1000 + +static node nodes[MAX_NUM]; +static list l; + +static void +show_list(void) +{ + bt_debug("\n"); + bt_debug("list.null is at %p and point to %p\n", &l.null, l.null); + bt_debug("list.head is at %p and point to %p\n", &l.head, l.head); + bt_debug("list.tail is at %p and point to %p\n", &l.tail, l.tail); + + int i; + for (i = 0; i < MAX_NUM; i++) + { + bt_debug("n[%3i] is at %p\n", i, &nodes[i]); + bt_debug(" prev is at %p and point to %p\n", &(nodes[i].prev), nodes[i].prev); + bt_debug(" next is at %p and point to %p\n", &(nodes[i].next), nodes[i].next); + } +} + +static int +is_filled_list_well_linked(void) +{ + int i; + bt_assert(l.head == &nodes[0]); + bt_assert(l.tail == &nodes[MAX_NUM-1]); + bt_assert((void *) nodes[0].prev == (void *) &l.head); + bt_assert((void *) nodes[MAX_NUM-1].next == (void *) &l.null); + + for (i = 0; i < MAX_NUM; i++) + { + if (i < (MAX_NUM-1)) + bt_assert(nodes[i].next == &nodes[i+1]); + + if (i > 0) + bt_assert(nodes[i].prev == &nodes[i-1]); + } + + return 1; +} + +static int +is_empty_list_well_unlinked(void) +{ + int i; + + bt_assert(l.head == NODE &l.null); + bt_assert(l.tail == NODE &l.head); + bt_assert(EMPTY_LIST(l)); + + for (i = 0; i < MAX_NUM; i++) + { + bt_assert(nodes[i].next == NULL); + bt_assert(nodes[i].prev == NULL); + } + + return 1; +} + +static void +init_list__(list *l, struct node nodes[]) +{ + init_list(l); + + int i; + for (i = 0; i < MAX_NUM; i++) + { + nodes[i].next = NULL; + nodes[i].prev = NULL; + } +} + +static void +init_list_(void) +{ + init_list__(&l, (node *) nodes); +} + +static int +t_add_tail(void) +{ + int i; + + init_list_(); + for (i = 0; i < MAX_NUM; i++) + { + add_tail(&l, &nodes[i]); + bt_debug("."); + bt_assert(l.tail == &nodes[i]); + bt_assert(l.head == &nodes[0]); + bt_assert((void *) nodes[i].next == (void *) &l.null); + if (i > 0) + { + bt_assert(nodes[i-1].next == &nodes[i]); + bt_assert(nodes[i].prev == &nodes[i-1]); + } + } + show_list(); + bt_assert(is_filled_list_well_linked()); + + return 1; +} + +static int +t_add_head(void) +{ + int i; + + init_list_(); + for (i = MAX_NUM-1; i >= 0; i--) + { + add_head(&l, &nodes[i]); + bt_debug("."); + bt_assert(l.head == &nodes[i]); + bt_assert(l.tail == &nodes[MAX_NUM-1]); + if (i < MAX_NUM-1) + { + bt_assert(nodes[i+1].prev == &nodes[i]); + bt_assert(nodes[i].next == &nodes[i+1]); + } + } + show_list(); + bt_assert(is_filled_list_well_linked()); + + return 1; +} + +static void +insert_node_(node *n, node *after) +{ + insert_node(n, after); + bt_debug("."); +} + +static int +t_insert_node(void) +{ + int i; + + init_list_(); + + // add first node + insert_node_(&nodes[0], NODE &l.head); + + // add odd nodes + for (i = 2; i < MAX_NUM; i+=2) + insert_node_(&nodes[i], &nodes[i-2]); + + // add even nodes + for (i = 1; i < MAX_NUM; i+=2) + insert_node_(&nodes[i], &nodes[i-1]); + + bt_debug("\n"); + bt_assert(is_filled_list_well_linked()); + + return 1; +} + +static void +fill_list2(list *l, node nodes[]) +{ + int i; + for (i = 0; i < MAX_NUM; i++) + add_tail(l, &nodes[i]); +} + +static void +fill_list(void) +{ + fill_list2(&l, (node *) nodes); +} + +static int +t_remove_node(void) +{ + int i; + + init_list_(); + + /* Fill & Remove & Check */ + fill_list(); + for (i = 0; i < MAX_NUM; i++) + rem_node(&nodes[i]); + bt_assert(is_empty_list_well_unlinked()); + + /* Fill & Remove the half of nodes & Check & Remove the rest nodes & Check */ + fill_list(); + for (i = 0; i < MAX_NUM; i+=2) + rem_node(&nodes[i]); + + int tail_node_index = (MAX_NUM % 2) ? MAX_NUM - 2 : MAX_NUM - 1; + bt_assert(l.head == &nodes[1]); + bt_assert(l.tail == &nodes[tail_node_index]); + bt_assert(nodes[tail_node_index].next == NODE &l.null); + + for (i = 1; i < MAX_NUM; i+=2) + { + if (i > 1) + bt_assert(nodes[i].prev == &nodes[i-2]); + if (i < tail_node_index) + bt_assert(nodes[i].next == &nodes[i+2]); + } + + for (i = 1; i < MAX_NUM; i+=2) + rem_node(&nodes[i]); + bt_assert(is_empty_list_well_unlinked()); + + return 1; +} + +static int +t_replace_node(void) +{ + node head, inside, tail; + + init_list_(); + fill_list(); + + replace_node(&nodes[0], &head); + bt_assert(l.head == &head); + bt_assert(head.prev == NODE &l.head); + bt_assert(head.next == &nodes[1]); + bt_assert(nodes[1].prev == &head); + + replace_node(&nodes[MAX_NUM/2], &inside); + bt_assert(nodes[MAX_NUM/2-1].next == &inside); + bt_assert(nodes[MAX_NUM/2+1].prev == &inside); + bt_assert(inside.prev == &nodes[MAX_NUM/2-1]); + bt_assert(inside.next == &nodes[MAX_NUM/2+1]); + + replace_node(&nodes[MAX_NUM-1], &tail); + bt_assert(l.tail == &tail); + bt_assert(tail.prev == &nodes[MAX_NUM-2]); + bt_assert(tail.next == NODE &l.null); + bt_assert(nodes[MAX_NUM-2].next == &tail); + + return 1; +} + +static int +t_add_tail_list(void) +{ + node nodes2[MAX_NUM]; + list l2; + + init_list__(&l, (node *) nodes); + fill_list2(&l, (node *) nodes); + + init_list__(&l2, (node *) nodes2); + fill_list2(&l2, (node *) nodes2); + + add_tail_list(&l, &l2); + + bt_assert(nodes[MAX_NUM-1].next == &nodes2[0]); + bt_assert(nodes2[0].prev == &nodes[MAX_NUM-1]); + bt_assert(l.tail == &nodes2[MAX_NUM-1]); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_add_tail, "Adding nodes to tail of list"); + bt_test_suite(t_add_head, "Adding nodes to head of list"); + bt_test_suite(t_insert_node, "Inserting nodes to list"); + bt_test_suite(t_remove_node, "Removing nodes from list"); + bt_test_suite(t_replace_node, "Replacing nodes in list"); + bt_test_suite(t_add_tail_list, "At the tail of a list adding the another list"); + + return bt_exit_value(); +} diff --git a/lib/mac_test.c b/lib/mac_test.c new file mode 100644 index 00000000..806fe3e4 --- /dev/null +++ b/lib/mac_test.c @@ -0,0 +1,1159 @@ +/* + * BIRD Library -- SHA and HMAC-SHA functions tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "test/bt-utils.h" + +#include "lib/mac.h" + + +#define define_test_hash_fn(name,id) \ +static int \ +test_##name(void *out_, const void *in_, const void *expected_out_) \ +{ \ + char *out = out_; \ + const char *in = in_; \ + const char *expected_out = expected_out_; \ + \ + struct mac_context ctx; \ + mac_init(&ctx, id, NULL, 0); \ + mac_update(&ctx, in, strlen(in)); \ + byte *out_bin = mac_final(&ctx); \ + \ + uint len = mac_type_length(id); \ + bt_bytes_to_hex(out, out_bin, len); \ + \ + return strncmp(out, expected_out, 2*len+1) == 0; \ +} + +define_test_hash_fn(md5, ALG_MD5) +define_test_hash_fn(sha1, ALG_SHA1) +define_test_hash_fn(sha224, ALG_SHA224) +define_test_hash_fn(sha256, ALG_SHA256) +define_test_hash_fn(sha384, ALG_SHA384) +define_test_hash_fn(sha512, ALG_SHA512) + + +static int +t_md5(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "", + .out = "d41d8cd98f00b204e9800998ecf8427e", + }, + { + .in = "a", + .out = "0cc175b9c0f1b6a831c399e269772661", + }, + { + .in = "abc", + .out = "900150983cd24fb0d6963f7d28e17f72", + }, + { + .in = "message digest", + .out = "f96b697d7cb7938d525a2f31aaf161d0", + }, + { + .in = "abcdefghijklmnopqrstuvwxyz", + .out = "c3fcd3d76192e4007dfb496cca67e13b", + }, + { + .in = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", + .out = "d174ab98d277d9f5a5611c2c9f419d9f", + }, + { + .in = "12345678901234567890123456789012345678901234567890123456789012345678901234567890", + .out = "57edf4a22be3c955ac49da2e2107b67a", + }, + }; + + return bt_assert_batch(test_vectors, test_md5, bt_fmt_str, bt_fmt_str); +} + + +/* + * Testing SHAxxx functions + */ + + +static int +t_sha1(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "", + .out = "da39a3ee5e6b4b0d3255bfef95601890afd80709", + }, + { + .in = "a", + .out = "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", + }, + { + .in = "abc", + .out = "a9993e364706816aba3e25717850c26c9cd0d89d", + }, + { + .in = "message digest", + .out = "c12252ceda8be8994d5fa0290a47231c1d16aae3", + }, + { + .in = "abcdefghijklmnopqrstuvwxyz", + .out = "32d10c7b8cf96570ca04ce37f2a19d84240d3a89", + }, + { + .in = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", + .out = "761c457bf73b14d27e9e9265c46f4b4dda11f940", + }, + { + .in = "12345678901234567890123456789012345678901234567890123456789012345678901234567890", + .out = "50abf5706a150990a08b2c5ea40fa0e585554732", + }, + { + .in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + .out = "6a64fcc1fb970f7339ce886601775d2efea5cd4b", + }, + }; + + return bt_assert_batch(test_vectors, test_sha1, bt_fmt_str, bt_fmt_str); +} + +static int +t_sha224(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "", + .out = "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f", + }, + { + .in = "a", + .out = "abd37534c7d9a2efb9465de931cd7055ffdb8879563ae98078d6d6d5", + }, + { + .in = "abc", + .out = "23097d223405d8228642a477bda255b32aadbce4bda0b3f7e36c9da7", + }, + { + .in = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + .out = "75388b16512776cc5dba5da1fd890150b0c6455cb4f58b1952522525", + }, + { + .in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + .out = "cca7dd1a332a17775d8b0429bdb45055c2d4368ebaab0c7cf385586e", + }, + }; + + return bt_assert_batch(test_vectors, test_sha224, bt_fmt_str, bt_fmt_str); +} + +static int +t_sha256(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "", + .out = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + }, + { + .in = "a", + .out = "ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb", + }, + { + .in = "abc", + .out = "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad", + }, + { + .in = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + .out = "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1", + }, + { + .in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + .out = "bf18b43b61652b5d73f41ebf3d72e5e43aebf5076f497dde31ea3de9de4998ef", + }, + }; + + return bt_assert_batch(test_vectors, test_sha256, bt_fmt_str, bt_fmt_str); +} + +static int +t_sha384(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "", + .out = "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b", + }, + { + .in = "a", + .out = "54a59b9f22b0b80880d8427e548b7c23abd873486e1f035dce9cd697e85175033caa88e6d57bc35efae0b5afd3145f31", + }, + { + .in = "abc", + .out = "cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed8086072ba1e7cc2358baeca134c825a7", + }, + { + .in = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + .out = "3391fdddfc8dc7393707a65b1b4709397cf8b1d162af05abfe8f450de5f36bc6b0455a8520bc4e6f5fe95b1fe3c8452b", + }, + { + .in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + .out = "6452928a62ca915a60f2d16ea22cc832d8ecb35443d78a3ff6986e7def9174a1dc16ce2ff65d3ed1666db98357f3c05e", + }, + }; + + return bt_assert_batch(test_vectors, test_sha384, bt_fmt_str, bt_fmt_str); +} + +static int +t_sha512(void) +{ + struct bt_pair test_vectors[] = { + { + .in = "", + .out = "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", + }, + { + .in = "a", + .out = "1f40fc92da241694750979ee6cf582f2d5d7d28e18335de05abc54d0560e0f5302860c652bf08d560252aa5e74210546f369fbbbce8c12cfc7957b2652fe9a75", + }, + { + .in = "abc", + .out = "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f", + }, + { + .in = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + .out = "204a8fc6dda82f0a0ced7beb8e08a41657c16ef468b228a8279be331a703c33596fd15c13b1b07f9aa1d3bea57789ca031ad85c7a71dd70354ec631238ca3445", + }, + { + .in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + .out = "415509a1c345371acb3e27a88b3835e3b6dfebcbbab5134850596f4db64d7bb22ac42c3cd179446a80c92b8be955460eb536eac01389a7e1fdf09d1dca83922f", + }, + }; + + return bt_assert_batch(test_vectors, test_sha512, bt_fmt_str, bt_fmt_str); +} + + +/* + * Testing SHAxxx HMAC functions + */ + +#define HMAC_BUFFER_SIZE 160 +struct hmac_data_in { + byte key[HMAC_BUFFER_SIZE]; + uint key_len; + byte data[HMAC_BUFFER_SIZE]; + uint data_len; +}; + +static void +hmac_in_fmt(char *buf, size_t size, const void *data_) +{ + uint i; + const struct hmac_data_in *data = data_; + + snprintf(buf, size, "data: '"); + for (i = 0; i < data->data_len; i++) + snprintf(buf+strlen(buf), size-strlen(buf), bt_is_char(data->data[i]) ? "%c" : " 0x%02x", data->data[i]); + + snprintf(buf+strlen(buf), size-strlen(buf), "', key: '"); + for (i = 0; i < data->key_len; i++) + snprintf(buf+strlen(buf), size-strlen(buf), bt_is_char(data->key[i]) ? "%c" : " 0x%02x", data->key[i]); + snprintf(buf+strlen(buf), size-strlen(buf), "'"); +} + +#define define_test_hmac_fn(name,id) \ +static int \ +test_##name##_hmac(void *out_, const void *in_, const void *expected_out_) \ +{ \ + char *out = out_; \ + const struct hmac_data_in *in = in_; \ + const char *expected_out = expected_out_; \ + \ + struct mac_context ctx; \ + mac_init(&ctx, id, in->key, in->key_len); \ + mac_update(&ctx, in->data, in->data_len); \ + byte *out_bin = mac_final(&ctx); \ + \ + uint len = mac_type_length(id); \ + bt_bytes_to_hex(out, out_bin, len); \ + \ + return strncmp(out, expected_out, 2*len+1) == 0; \ +} + +define_test_hmac_fn(md5, ALG_HMAC_MD5) +define_test_hmac_fn(sha1, ALG_HMAC_SHA1) +define_test_hmac_fn(sha224, ALG_HMAC_SHA224) +define_test_hmac_fn(sha256, ALG_HMAC_SHA256) +define_test_hmac_fn(sha384, ALG_HMAC_SHA384) +define_test_hmac_fn(sha512, ALG_HMAC_SHA512) + + +static int +t_md5_hmac(void) +{ + struct bt_pair test_vectors[] = { + { + .in = & (struct hmac_data_in) { + .key = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + }, + .key_len = 16, + .data = "Hi There", + .data_len = 8, + }, + .out = "9294727a3638bb1c13f48ef8158bfc9d", + }, + { + .in = & (struct hmac_data_in) { + .key = "Jefe", + .key_len = 4, + .data = "what do ya want for nothing?", + .data_len = 28, + }, + .out = "750c783e6ab0b503eaa86e310a5db738", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 16, + .data = { + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + }, + .data_len = 50, + }, + .out = "56be34521d144c88dbb8c733f0e8b3f6", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + }, + .key_len = 25, + .data = { + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + }, + .data_len = 50, + }, + .out = "697eaf0aca3a3aea3a75164746ffaa79", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + }, + .key_len = 16, + .data = "Test With Truncation", + .data_len = 20, + }, + .out = "56461ef2342edc00f9bab995690efd4c", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 80, + .data = "Test Using Larger Than Block-Size Key - Hash Key First", + .data_len = 54, + }, + .out = "6b1ab7fe4bd7bf8f0b62e6ce61b9d0cd", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 80, + .data = "Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data", + .data_len = 73, + }, + .out = "6f630fad67cda0ee1fb1f562db3aa53e", + }, + }; + + return bt_assert_batch(test_vectors, test_md5_hmac, hmac_in_fmt, bt_fmt_str); +} + +static int +t_sha1_hmac(void) +{ + struct bt_pair test_vectors[] = { + { + .in = & (struct hmac_data_in) { + .key = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + }, + .key_len = 20, + .data = "Hi There", + .data_len = 8, + }, + .out = "b617318655057264e28bc0b6fb378c8ef146be00", + }, + { + .in = & (struct hmac_data_in) { + .key = "Jefe", + .key_len = 4, + .data = "what do ya want for nothing?", + .data_len = 28, + }, + .out = "effcdf6ae5eb2fa2d27416d5f184df9c259a7c79", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 20, + .data = { + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + }, + .data_len = 50, + }, + .out = "125d7342b9ac11cd91a39af48aa17b4f63f175d3", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + }, + .key_len = 25, + .data = { + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + }, + .data_len = 50, + }, + .out = "4c9007f4026250c6bc8414f9bf50c86c2d7235da", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + }, + .key_len = 20, + .data = "Test With Truncation", + .data_len = 20, + }, + .out = "4c1a03424b55e07fe7f27be1d58bb9324a9a5a04", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 80, + .data = "Test Using Larger Than Block-Size Key - Hash Key First", + .data_len = 54, + }, + .out = "aa4ae5e15272d00e95705637ce8a3b55ed402112", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 80, + .data = "Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data", + .data_len = 73, + }, + .out = "e8e99d0f45237d786d6bbaa7965c7808bbff1a91", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, + 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, + 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, + 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, + 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, + 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, + 0x61, 0x61, 0x61, 0x61, + }, + .key_len = 64, + .data = "Test Using key 64 bytes sized", + .data_len = 29, + }, + .out = "a55d4fb80962a6b3d2e720705314bee417d68cf6", + }, + }; + + return bt_assert_batch(test_vectors, test_sha1_hmac, hmac_in_fmt, bt_fmt_str); +} + +static int +t_sha224_hmac(void) +{ + struct bt_pair test_vectors[] = { + { + .in = & (struct hmac_data_in) { + .key = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + }, + .key_len = 20, + .data = "Hi There", + .data_len = 8, + }, + .out = "896fb1128abbdf196832107cd49df33f47b4b1169912ba4f53684b22", + }, + { + .in = & (struct hmac_data_in) { + .key = "Jefe", + .key_len = 4, + .data = "what do ya want for nothing?", + .data_len = 28, + }, + .out = "a30e01098bc6dbbf45690f3a7e9e6d0f8bbea2a39e6148008fd05e44", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 20, + .data = { + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + }, + .data_len = 50, + }, + .out = "7fb3cb3588c6c1f6ffa9694d7d6ad2649365b0c1f65d69d1ec8333ea", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + }, + .key_len = 25, + .data = { + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + }, + .data_len = 50, + }, + .out = "6c11506874013cac6a2abc1bb382627cec6a90d86efc012de7afec5a", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + }, + .key_len = 20, + .data = "Test With Truncation", + .data_len = 20, + }, + .out = "0e2aea68a90c8d37c988bcdb9fca6fa8099cd857c7ec4a1815cac54c", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, + }, + .key_len = 131, + .data = "Test Using Larger Than Block-Size Key - Hash Key First", + .data_len = 54, + }, + .out = "95e9a0db962095adaebe9b2d6f0dbce2d499f112f2d2b7273fa6870e", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, + }, + .key_len = 131, + .data = "This is a test using a larger than block-size key and a larger than block-size data. The key needs to be hashed before being used by the HMAC algorithm.", + .data_len = 152, + }, + .out = "3a854166ac5d9f023f54d517d0b39dbd946770db9c2b95c9f6f565d1", + }, + }; + + return bt_assert_batch(test_vectors, test_sha224_hmac, hmac_in_fmt, bt_fmt_str); +} + +static int +t_sha256_hmac(void) +{ + struct bt_pair test_vectors[] = { + { + .in = & (struct hmac_data_in) { + .key = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + }, + .key_len = 20, + .data = "Hi There", + .data_len = 8, + }, + .out = "b0344c61d8db38535ca8afceaf0bf12b881dc200c9833da726e9376c2e32cff7", + }, + { + .in = & (struct hmac_data_in) { + .key = "Jefe", + .key_len = 4, + .data = "what do ya want for nothing?", + .data_len = 28, + }, + .out = "5bdcc146bf60754e6a042426089575c75a003f089d2739839dec58b964ec3843", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 20, + .data = { + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + }, + .data_len = 50, + }, + .out = "773ea91e36800e46854db8ebd09181a72959098b3ef8c122d9635514ced565fe", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + }, + .key_len = 25, + .data = { + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + }, + .data_len = 50, + }, + .out = "82558a389a443c0ea4cc819899f2083a85f0faa3e578f8077a2e3ff46729665b", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + }, + .key_len = 20, + .data = "Test With Truncation", + .data_len = 20, + }, + .out = "a3b6167473100ee06e0c796c2955552bfa6f7c0a6a8aef8b93f860aab0cd20c5", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, + }, + .key_len = 131, + .data = "Test Using Larger Than Block-Size Key - Hash Key First", + .data_len = 54, + }, + .out = "60e431591ee0b67f0d8a26aacbf5b77f8e0bc6213728c5140546040f0ee37f54", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, + }, + .key_len = 131, + .data = "This is a test using a larger than block-size key and a larger than block-size data. The key needs to be hashed before being used by the HMAC algorithm.", + .data_len = 152, + }, + .out = "9b09ffa71b942fcb27635fbcd5b0e944bfdc63644f0713938a7f51535c3a35e2", + }, + }; + + return bt_assert_batch(test_vectors, test_sha256_hmac, hmac_in_fmt, bt_fmt_str); +} + +static int +t_sha384_hmac(void) +{ + struct bt_pair test_vectors[] = { + { + .in = & (struct hmac_data_in) { + .key = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + }, + .key_len = 20, + .data = "Hi There", + .data_len = 8, + }, + .out = "afd03944d84895626b0825f4ab46907f15f9dadbe4101ec682aa034c7cebc59cfaea9ea9076ede7f4af152e8b2fa9cb6", + }, + { + .in = & (struct hmac_data_in) { + .key = "Jefe", + .key_len = 4, + .data = "what do ya want for nothing?", + .data_len = 28, + }, + .out = "af45d2e376484031617f78d2b58a6b1b9c7ef464f5a01b47e42ec3736322445e8e2240ca5e69e2c78b3239ecfab21649", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 20, + .data = { + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + }, + .data_len = 50, + }, + .out = "88062608d3e6ad8a0aa2ace014c8a86f0aa635d947ac9febe83ef4e55966144b2a5ab39dc13814b94e3ab6e101a34f27", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + }, + .key_len = 25, + .data = { + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + }, + .data_len = 50, + }, + .out = "3e8a69b7783c25851933ab6290af6ca77a9981480850009cc5577c6e1f573b4e6801dd23c4a7d679ccf8a386c674cffb", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + }, + .key_len = 20, + .data = "Test With Truncation", + .data_len = 20, + }, + .out = "3abf34c3503b2a23a46efc619baef897f4c8e42c934ce55ccbae9740fcbc1af4ca62269e2a37cd88ba926341efe4aeea", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, + }, + .key_len = 131, + .data = "Test Using Larger Than Block-Size Key - Hash Key First", + .data_len = 54, + }, + .out = "4ece084485813e9088d2c63a041bc5b44f9ef1012a2b588f3cd11f05033ac4c60c2ef6ab4030fe8296248df163f44952", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, + }, + .key_len = 131, + .data = "This is a test using a larger than block-size key and a larger than block-size data. The key needs to be hashed before being used by the HMAC algorithm.", + .data_len = 152, + }, + .out = "6617178e941f020d351e2f254e8fd32c602420feb0b8fb9adccebb82461e99c5a678cc31e799176d3860e6110c46523e", + }, + }; + + return bt_assert_batch(test_vectors, test_sha384_hmac, hmac_in_fmt, bt_fmt_str); +} + +static int +t_sha512_hmac(void) +{ + struct bt_pair test_vectors[] = { + { + .in = & (struct hmac_data_in) { + .key = { + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + }, + .key_len = 20, + .data = "Hi There", + .data_len = 8, + }, + .out = "87aa7cdea5ef619d4ff0b4241a1d6cb02379f4e2ce4ec2787ad0b30545e17cdedaa833b7d6b8a702038b274eaea3f4e4be9d914eeb61f1702e696c203a126854", + }, + { + .in = & (struct hmac_data_in) { + .key = "Jefe", + .key_len = 4, + .data = "what do ya want for nothing?", + .data_len = 28, + }, + .out = "164b7a7bfcf819e2e395fbe73b56e0a387bd64222e831fd610270cd7ea2505549758bf75c05a994a6d034f65f8f0e6fdcaeab1a34d4a6b4b636e070a38bce737", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + }, + .key_len = 20, + .data = { + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, + }, + .data_len = 50, + }, + .out = "fa73b0089d56a284efb0f0756c890be9b1b5dbdd8ee81a3655f83e33b2279d39bf3e848279a722c806b485a47e67c807b946a337bee8942674278859e13292fb", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, + }, + .key_len = 25, + .data = { + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, + }, + .data_len = 50, + }, + .out = "b0ba465637458c6990e5a8c5f61d4af7e576d97ff94b872de76f8050361ee3dba91ca5c11aa25eb4d679275cc5788063a5f19741120c4f2de2adebeb10a298dd", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + }, + .key_len = 20, + .data = "Test With Truncation", + .data_len = 20, + }, + .out = "415fad6271580a531d4179bc891d87a650188707922a4fbb36663a1eb16da008711c5b50ddd0fc235084eb9d3364a1454fb2ef67cd1d29fe6773068ea266e96b", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, + }, + .key_len = 131, + .data = "Test Using Larger Than Block-Size Key - Hash Key First", + .data_len = 54, + }, + .out = "80b24263c7c1a3ebb71493c1dd7be8b49b46d1f41b4aeec1121b013783f8f3526b56d037e05f2598bd0fd2215d6a1e5295e64f73f63f0aec8b915a985d786598", + }, + { + .in = & (struct hmac_data_in) { + .key = { + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, + }, + .key_len = 131, + .data = "This is a test using a larger than block-size key and a larger than block-size data. The key needs to be hashed before being used by the HMAC algorithm.", + .data_len = 152, + }, + .out = "e37b6a775dc87dbaa4dfa9f96e5e3ffddebd71f8867289865df5a32d20cdc944b6022cac3c4982b10d5eeb55c3e4de15134676fb6de0446065c97440fa8c6a58", + }, + }; + + return bt_assert_batch(test_vectors, test_sha512_hmac, hmac_in_fmt, bt_fmt_str); +} + + +/* + * Testing SHAxxx concating independence + */ + +#include "lib/sha256.h" +#include "lib/sha512.h" + +static int +t_sha256_concating(void) +{ + char hash_a[SHA256_HEX_SIZE]; + char hash_b[SHA256_HEX_SIZE]; + + char *str_a = "a" "bb" "ccc" "dddd" "eeeee" "ffffff"; + char *str_b1 = "a" ; + char *str_b2 = "bb" ; + char *str_b3 = "ccc" ; + char *str_b4 = "dddd" ; + char *str_b5 = "eeeee" ; + char *str_b6 = "ffffff"; + + struct hash_context ctx_a; + sha256_init(&ctx_a); + sha256_update(&ctx_a, str_a, strlen(str_a)); + byte *hash_a_ = sha256_final(&ctx_a); + bt_bytes_to_hex(hash_a, hash_a_, SHA256_SIZE); + + struct hash_context ctx_b; + sha256_init(&ctx_b); + sha256_update(&ctx_b, str_b1, strlen(str_b1)); + sha256_update(&ctx_b, str_b2, strlen(str_b2)); + sha256_update(&ctx_b, str_b3, strlen(str_b3)); + sha256_update(&ctx_b, str_b4, strlen(str_b4)); + sha256_update(&ctx_b, str_b5, strlen(str_b5)); + sha256_update(&ctx_b, str_b6, strlen(str_b6)); + byte *hash_b_ = sha256_final(&ctx_b); + bt_bytes_to_hex(hash_b, hash_b_, SHA256_SIZE); + + int are_hash_a_b_equal = (strncmp(hash_a, hash_b, sizeof(hash_a)) == 0); + bt_assert_msg(are_hash_a_b_equal, "Hashes A: %s, B: %s should be same", hash_a, hash_b); + + return 1; +} + + +static int +t_sha512_concating(void) +{ + char hash_a[SHA512_HEX_SIZE]; + char hash_b[SHA512_HEX_SIZE]; + + char *str_a = "a" "bb" "ccc" "dddd" "eeeee" "ffffff"; + char *str_b1 = "a" ; + char *str_b2 = "bb" ; + char *str_b3 = "ccc" ; + char *str_b4 = "dddd" ; + char *str_b5 = "eeeee" ; + char *str_b6 = "ffffff"; + + struct hash_context ctx_a; + sha512_init(&ctx_a); + sha512_update(&ctx_a, str_a, strlen(str_a)); + byte *hash_a_ = sha512_final(&ctx_a); + bt_bytes_to_hex(hash_a, hash_a_, SHA512_SIZE); + + struct hash_context ctx_b; + sha512_init(&ctx_b); + sha512_update(&ctx_b, str_b1, strlen(str_b1)); + sha512_update(&ctx_b, str_b2, strlen(str_b2)); + sha512_update(&ctx_b, str_b3, strlen(str_b3)); + sha512_update(&ctx_b, str_b4, strlen(str_b4)); + sha512_update(&ctx_b, str_b5, strlen(str_b5)); + sha512_update(&ctx_b, str_b6, strlen(str_b6)); + byte *hash_b_ = sha512_final(&ctx_b); + bt_bytes_to_hex(hash_b, hash_b_, SHA512_SIZE); + + int are_hash_a_b_equal = (strncmp(hash_a, hash_b, sizeof(hash_a)) == 0); + bt_assert_msg(are_hash_a_b_equal, "Hashes A: %s, B: %s should be same", hash_a, hash_b); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_md5, "Testing MD5 by RFC 1321"); + bt_test_suite(t_sha1, "Testing SHA-1"); + bt_test_suite(t_sha224, "Testing SHA-224"); + bt_test_suite(t_sha256, "Testing SHA-256"); + bt_test_suite(t_sha384, "Testing SHA-384"); + bt_test_suite(t_sha512, "Testing SHA-512"); + + bt_test_suite(t_md5_hmac, "Testing HMAC-MD5 by RFC 2202"); + bt_test_suite(t_sha1_hmac, "Testing HMAC-SHA-1 by RFC 2202"); + bt_test_suite(t_sha224_hmac, "Testing HMAC-SHA-224 by RFC 4231"); + bt_test_suite(t_sha256_hmac, "Testing HMAC-SHA-256 by RFC 4231"); + bt_test_suite(t_sha384_hmac, "Testing HMAC-SHA-384 by RFC 4231"); + bt_test_suite(t_sha512_hmac, "Testing HMAC-SHA-512 by RFC 4231"); + + bt_test_suite(t_sha256_concating, "Testing concatenation input string to hash using sha256_update"); + bt_test_suite(t_sha512_concating, "Testing concatenation input string to hash using sha512_update"); + + return bt_exit_value(); +} diff --git a/lib/mempool.c b/lib/mempool.c index a8281041..758882ce 100644 --- a/lib/mempool.c +++ b/lib/mempool.c @@ -11,7 +11,7 @@ * * Linear memory pools are collections of memory blocks which * support very fast allocation of new blocks, but are able to free only - * the whole collection at once. + * the whole collection at once (or in stack order). * * Example: Each configuration is described by a complex system of structures, * linked lists and function trees which are all allocated from a single linear @@ -32,10 +32,12 @@ struct lp_chunk { byte data[0]; }; +const int lp_chunk_size = sizeof(struct lp_chunk); + struct linpool { resource r; byte *ptr, *end; - struct lp_chunk *first, *current, **plast; /* Normal (reusable) chunks */ + struct lp_chunk *first, *current; /* Normal (reusable) chunks */ struct lp_chunk *first_large; /* Large chunks */ uint chunk_size, threshold, total, total_large; }; @@ -67,7 +69,6 @@ linpool *lp_new(pool *p, uint blk) { linpool *m = ralloc(p, &lp_class); - m->plast = &m->first; m->chunk_size = blk; m->threshold = 3*blk/4; return m; @@ -112,22 +113,25 @@ lp_alloc(linpool *m, uint size) } else { - if (m->current) + if (m->current && m->current->next) { /* Still have free chunks from previous incarnation (before lp_flush()) */ - c = m->current; - m->current = c->next; + c = m->current->next; } else { /* Need to allocate a new chunk */ c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size); m->total += m->chunk_size; - *m->plast = c; - m->plast = &c->next; c->next = NULL; c->size = m->chunk_size; + + if (m->current) + m->current->next = c; + else + m->first = c; } + m->current = c; m->ptr = c->data + size; m->end = c->data + m->chunk_size; } @@ -188,9 +192,11 @@ lp_flush(linpool *m) { struct lp_chunk *c; - /* Relink all normal chunks to free list and free all large chunks */ - m->ptr = m->end = NULL; - m->current = m->first; + /* Move ptr to the first chunk and free all large chunks */ + m->current = c = m->first; + m->ptr = c ? c->data : NULL; + m->end = c ? c->data + m->chunk_size : NULL; + while (c = m->first_large) { m->first_large = c->next; @@ -199,6 +205,50 @@ lp_flush(linpool *m) m->total_large = 0; } +/** + * lp_save - save the state of a linear memory pool + * @m: linear memory pool + * @p: state buffer + * + * This function saves the state of a linear memory pool. Saved state can be + * used later to restore the pool (to free memory allocated since). + */ +void +lp_save(linpool *m, lp_state *p) +{ + p->current = m->current; + p->large = m->first_large; + p->ptr = m->ptr; +} + +/** + * lp_restore - restore the state of a linear memory pool + * @m: linear memory pool + * @p: saved state + * + * This function restores the state of a linear memory pool, freeing all memory + * allocated since the state was saved. Note that the function cannot un-free + * the memory, therefore the function also invalidates other states that were + * saved between (on the same pool). + */ +void +lp_restore(linpool *m, lp_state *p) +{ + struct lp_chunk *c; + + /* Move ptr to the saved pos and free all newer large chunks */ + m->current = c = p->current; + m->ptr = p->ptr; + m->end = c ? c->data + m->chunk_size : NULL; + + while ((c = m->first_large) && (c != p->large)) + { + m->first_large = c->next; + m->total_large -= c->size; + xfree(c); + } +} + static void lp_free(resource *r) { diff --git a/lib/net.c b/lib/net.c new file mode 100644 index 00000000..976ddbcc --- /dev/null +++ b/lib/net.c @@ -0,0 +1,331 @@ + +#include "nest/bird.h" +#include "lib/ip.h" +#include "lib/net.h" +#include "lib/flowspec.h" + + +const char * const net_label[] = { + [NET_IP4] = "ipv4", + [NET_IP6] = "ipv6", + [NET_VPN4] = "vpn4", + [NET_VPN6] = "vpn6", + [NET_ROA4] = "roa4", + [NET_ROA6] = "roa6", + [NET_FLOW4] = "flow4", + [NET_FLOW6] = "flow6", + [NET_IP6_SADR]= "ipv6-sadr", + [NET_MPLS] = "mpls", +}; + +const u16 net_addr_length[] = { + [NET_IP4] = sizeof(net_addr_ip4), + [NET_IP6] = sizeof(net_addr_ip6), + [NET_VPN4] = sizeof(net_addr_vpn4), + [NET_VPN6] = sizeof(net_addr_vpn6), + [NET_ROA4] = sizeof(net_addr_roa4), + [NET_ROA6] = sizeof(net_addr_roa6), + [NET_FLOW4] = 0, + [NET_FLOW6] = 0, + [NET_IP6_SADR]= sizeof(net_addr_ip6_sadr), + [NET_MPLS] = sizeof(net_addr_mpls), +}; + +const u8 net_max_prefix_length[] = { + [NET_IP4] = IP4_MAX_PREFIX_LENGTH, + [NET_IP6] = IP6_MAX_PREFIX_LENGTH, + [NET_VPN4] = IP4_MAX_PREFIX_LENGTH, + [NET_VPN6] = IP6_MAX_PREFIX_LENGTH, + [NET_ROA4] = IP4_MAX_PREFIX_LENGTH, + [NET_ROA6] = IP6_MAX_PREFIX_LENGTH, + [NET_FLOW4] = IP4_MAX_PREFIX_LENGTH, + [NET_FLOW6] = IP6_MAX_PREFIX_LENGTH, + [NET_IP6_SADR]= IP6_MAX_PREFIX_LENGTH, + [NET_MPLS] = 0, +}; + +const u16 net_max_text_length[] = { + [NET_IP4] = 18, /* "255.255.255.255/32" */ + [NET_IP6] = 43, /* "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128" */ + [NET_VPN4] = 40, /* "4294967296:4294967296 255.255.255.255/32" */ + [NET_VPN6] = 65, /* "4294967296:4294967296 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128" */ + [NET_ROA4] = 34, /* "255.255.255.255/32-32 AS4294967295" */ + [NET_ROA6] = 60, /* "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128-128 AS4294967295" */ + [NET_FLOW4] = 0, /* "flow4 { ... }" */ + [NET_FLOW6] = 0, /* "flow6 { ... }" */ + [NET_IP6_SADR]= 92, /* "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128 from ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128" */ + [NET_MPLS] = 7, /* "1048575" */ +}; + + +int +rd_format(const u64 rd, char *buf, int buflen) +{ + switch (rd >> 48) + { + case 0: return bsnprintf(buf, buflen, "%u:%u", (u32) (rd >> 32), (u32) rd); + case 1: return bsnprintf(buf, buflen, "%I4:%u", ip4_from_u32(rd >> 16), (u32) (rd & 0xffff)); + case 2: if (((u32) (rd >> 16)) >> 16) + return bsnprintf(buf, buflen, "%u:%u", (u32) (rd >> 16), (u32) (rd & 0xffff)); + else + return bsnprintf(buf, buflen, "2:%u:%u", (u32) (rd >> 16), (u32) (rd & 0xffff)); + default: return bsnprintf(buf, buflen, "X:%08x:%08x", (u32) (rd >> 32), (u32) rd); + } +} + +int +net_format(const net_addr *N, char *buf, int buflen) +{ + net_addr_union *n = (void *) N; + buf[0] = 0; + + switch (n->n.type) + { + case NET_IP4: + return bsnprintf(buf, buflen, "%I4/%d", n->ip4.prefix, n->ip4.pxlen); + case NET_IP6: + return bsnprintf(buf, buflen, "%I6/%d", n->ip6.prefix, n->ip6.pxlen); + case NET_VPN4: + { + int c = rd_format(n->vpn4.rd, buf, buflen); + ADVANCE(buf, buflen, c); + return bsnprintf(buf, buflen, " %I4/%d", n->vpn4.prefix, n->vpn4.pxlen); + } + case NET_VPN6: + { + /* XXX: RD format is specified for VPN4; not found any for VPN6, reusing the same as for VPN4 */ + int c = rd_format(n->vpn6.rd, buf, buflen); + ADVANCE(buf, buflen, c); + return bsnprintf(buf, buflen, " %I6/%d", n->vpn6.prefix, n->vpn6.pxlen); + } + case NET_ROA4: + return bsnprintf(buf, buflen, "%I4/%u-%u AS%u", n->roa4.prefix, n->roa4.pxlen, n->roa4.max_pxlen, n->roa4.asn); + case NET_ROA6: + return bsnprintf(buf, buflen, "%I6/%u-%u AS%u", n->roa6.prefix, n->roa6.pxlen, n->roa6.max_pxlen, n->roa6.asn); + case NET_FLOW4: + return flow4_net_format(buf, buflen, &n->flow4); + case NET_FLOW6: + return flow6_net_format(buf, buflen, &n->flow6); + case NET_IP6_SADR: + return bsnprintf(buf, buflen, "%I6/%d from %I6/%d", n->ip6_sadr.dst_prefix, n->ip6_sadr.dst_pxlen, n->ip6_sadr.src_prefix, n->ip6_sadr.src_pxlen); + case NET_MPLS: + return bsnprintf(buf, buflen, "%u", n->mpls.label); + } + + bug("unknown network type"); +} + +ip_addr +net_pxmask(const net_addr *a) +{ + switch (a->type) + { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + case NET_FLOW4: + return ipa_from_ip4(ip4_mkmask(net4_pxlen(a))); + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + case NET_FLOW6: + case NET_IP6_SADR: + return ipa_from_ip6(ip6_mkmask(net6_pxlen(a))); + + case NET_MPLS: + default: + return IPA_NONE; + } +} + +int +net_compare(const net_addr *a, const net_addr *b) +{ + if (a->type != b->type) + return uint_cmp(a->type, b->type); + + switch (a->type) + { + case NET_IP4: + return net_compare_ip4((const net_addr_ip4 *) a, (const net_addr_ip4 *) b); + case NET_IP6: + return net_compare_ip6((const net_addr_ip6 *) a, (const net_addr_ip6 *) b); + case NET_VPN4: + return net_compare_vpn4((const net_addr_vpn4 *) a, (const net_addr_vpn4 *) b); + case NET_VPN6: + return net_compare_vpn6((const net_addr_vpn6 *) a, (const net_addr_vpn6 *) b); + case NET_ROA4: + return net_compare_roa4((const net_addr_roa4 *) a, (const net_addr_roa4 *) b); + case NET_ROA6: + return net_compare_roa6((const net_addr_roa6 *) a, (const net_addr_roa6 *) b); + case NET_FLOW4: + return net_compare_flow4((const net_addr_flow4 *) a, (const net_addr_flow4 *) b); + case NET_FLOW6: + return net_compare_flow6((const net_addr_flow6 *) a, (const net_addr_flow6 *) b); + case NET_IP6_SADR: + return net_compare_ip6_sadr((const net_addr_ip6_sadr *) a, (const net_addr_ip6_sadr *) b); + case NET_MPLS: + return net_compare_mpls((const net_addr_mpls *) a, (const net_addr_mpls *) b); + } + return 0; +} + +#define NET_HASH(a,t) net_hash_##t((const net_addr_##t *) a) + +u32 +net_hash(const net_addr *n) +{ + switch (n->type) + { + case NET_IP4: return NET_HASH(n, ip4); + case NET_IP6: return NET_HASH(n, ip6); + case NET_VPN4: return NET_HASH(n, vpn4); + case NET_VPN6: return NET_HASH(n, vpn6); + case NET_ROA4: return NET_HASH(n, roa4); + case NET_ROA6: return NET_HASH(n, roa6); + case NET_FLOW4: return NET_HASH(n, flow4); + case NET_FLOW6: return NET_HASH(n, flow6); + case NET_IP6_SADR: return NET_HASH(n, ip6_sadr); + case NET_MPLS: return NET_HASH(n, mpls); + default: bug("invalid type"); + } +} + + +#define NET_VALIDATE(a,t) net_validate_##t((const net_addr_##t *) a) + +int +net_validate(const net_addr *n) +{ + switch (n->type) + { + case NET_IP4: return NET_VALIDATE(n, ip4); + case NET_IP6: return NET_VALIDATE(n, ip6); + case NET_VPN4: return NET_VALIDATE(n, vpn4); + case NET_VPN6: return NET_VALIDATE(n, vpn6); + case NET_ROA4: return NET_VALIDATE(n, roa4); + case NET_ROA6: return NET_VALIDATE(n, roa6); + case NET_FLOW4: return NET_VALIDATE(n, flow4); + case NET_FLOW6: return NET_VALIDATE(n, flow6); + case NET_IP6_SADR: return NET_VALIDATE(n, ip6_sadr); + case NET_MPLS: return NET_VALIDATE(n, mpls); + default: return 0; + } +} + +void +net_normalize(net_addr *N) +{ + net_addr_union *n = (void *) N; + + switch (n->n.type) + { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + case NET_FLOW4: + return net_normalize_ip4(&n->ip4); + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + case NET_FLOW6: + return net_normalize_ip6(&n->ip6); + + case NET_IP6_SADR: + return net_normalize_ip6_sadr(&n->ip6_sadr); + + case NET_MPLS: + return; + } +} + +int +net_classify(const net_addr *N) +{ + net_addr_union *n = (void *) N; + + switch (n->n.type) + { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + case NET_FLOW4: + return ip4_zero(n->ip4.prefix) ? (IADDR_HOST | SCOPE_UNIVERSE) : ip4_classify(n->ip4.prefix); + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + case NET_FLOW6: + return ip6_zero(n->ip6.prefix) ? (IADDR_HOST | SCOPE_UNIVERSE) : ip6_classify(&n->ip6.prefix); + + case NET_IP6_SADR: + return ip6_zero(n->ip6_sadr.dst_prefix) ? (IADDR_HOST | SCOPE_UNIVERSE) : ip6_classify(&n->ip6_sadr.dst_prefix); + + case NET_MPLS: + return IADDR_HOST | SCOPE_UNIVERSE; + } + + return IADDR_INVALID; +} + +int +ipa_in_netX(const ip_addr a, const net_addr *n) +{ + switch (n->type) + { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + case NET_FLOW4: + if (!ipa_is_ip4(a)) return 0; + return ip4_zero(ip4_and(ip4_xor(ipa_to_ip4(a), net4_prefix(n)), + ip4_mkmask(net4_pxlen(n)))); + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + case NET_FLOW6: + if (ipa_is_ip4(a)) return 0; + return ip6_zero(ip6_and(ip6_xor(ipa_to_ip6(a), net6_prefix(n)), + ip6_mkmask(net6_pxlen(n)))); + + case NET_IP6_SADR: + if (ipa_is_ip4(a)) return 0; + return ip6_zero(ip6_and(ip6_xor(ipa_to_ip6(a), net6_prefix(n)), + ip6_mkmask(net6_pxlen(n)))); + + case NET_MPLS: + default: + return 0; + } +} + +int +net_in_netX(const net_addr *a, const net_addr *n) +{ + if (a->type != n->type) + return 0; + + return (net_pxlen(n) <= net_pxlen(a)) && ipa_in_netX(net_prefix(a), n); +} + +#define CHECK_NET(T,S) \ + ({ if (sizeof(T) != S) die("sizeof %s is %d/%d", #T, (int) sizeof(T), S); }) + +void +net_init(void) +{ + CHECK_NET(net_addr, 24); + CHECK_NET(net_addr_ip4, 8); + CHECK_NET(net_addr_ip6, 20); + CHECK_NET(net_addr_vpn4, 16); + CHECK_NET(net_addr_vpn6, 32); + CHECK_NET(net_addr_roa4, 16); + CHECK_NET(net_addr_roa6, 28); + CHECK_NET(net_addr_flow4, 8); + CHECK_NET(net_addr_flow6, 20); + CHECK_NET(net_addr_ip6_sadr, 40); + CHECK_NET(net_addr_mpls, 8); +} diff --git a/lib/net.h b/lib/net.h new file mode 100644 index 00000000..ad4000fd --- /dev/null +++ b/lib/net.h @@ -0,0 +1,622 @@ +/* + * BIRD Internet Routing Daemon -- Network addresses + * + * (c) 2015 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_NET_H_ +#define _BIRD_NET_H_ + +#include "lib/ip.h" + + +#define NET_IP4 1 +#define NET_IP6 2 +#define NET_VPN4 3 +#define NET_VPN6 4 +#define NET_ROA4 5 +#define NET_ROA6 6 +#define NET_FLOW4 7 +#define NET_FLOW6 8 +#define NET_IP6_SADR 9 +#define NET_MPLS 10 +#define NET_MAX 11 + +#define NB_IP4 (1 << NET_IP4) +#define NB_IP6 (1 << NET_IP6) +#define NB_VPN4 (1 << NET_VPN4) +#define NB_VPN6 (1 << NET_VPN6) +#define NB_ROA4 (1 << NET_ROA4) +#define NB_ROA6 (1 << NET_ROA6) +#define NB_FLOW4 (1 << NET_FLOW4) +#define NB_FLOW6 (1 << NET_FLOW6) +#define NB_IP6_SADR (1 << NET_IP6_SADR) +#define NB_MPLS (1 << NET_MPLS) + +#define NB_IP (NB_IP4 | NB_IP6) +#define NB_VPN (NB_VPN4 | NB_VPN6) +#define NB_FLOW (NB_FLOW4 | NB_FLOW6) +#define NB_DEST (NB_IP | NB_IP6_SADR | NB_VPN | NB_MPLS) +#define NB_ANY 0xffffffff + + +typedef struct net_addr { + u8 type; + u8 pxlen; + u16 length; + u8 data[20]; + u64 align[0]; +} net_addr; + +typedef struct net_addr_ip4 { + u8 type; + u8 pxlen; + u16 length; + ip4_addr prefix; +} net_addr_ip4; + +typedef struct net_addr_ip6 { + u8 type; + u8 pxlen; + u16 length; + ip6_addr prefix; +} net_addr_ip6; + +typedef struct net_addr_vpn4 { + u8 type; + u8 pxlen; + u16 length; + ip4_addr prefix; + u64 rd; +} net_addr_vpn4; + +typedef struct net_addr_vpn6 { + u8 type; + u8 pxlen; + u16 length; + ip6_addr prefix; + u32 padding; + u64 rd; +} net_addr_vpn6; + +typedef struct net_addr_roa4 { + u8 type; + u8 pxlen; + u16 length; + ip4_addr prefix; + u32 max_pxlen; + u32 asn; +} net_addr_roa4; + +typedef struct net_addr_roa6 { + u8 type; + u8 pxlen; + u16 length; + ip6_addr prefix; + u32 max_pxlen; + u32 asn; +} net_addr_roa6; + +typedef struct net_addr_flow4 { + u8 type; + u8 pxlen; + u16 length; + ip4_addr prefix; + byte data[0]; +} net_addr_flow4; + +typedef struct net_addr_flow6 { + u8 type; + u8 pxlen; + u16 length; + ip6_addr prefix; + byte data[0]; +} net_addr_flow6; + +typedef struct net_addr_mpls { + u8 type; + u8 pxlen; + u16 length; + u32 label; +} net_addr_mpls; + +typedef struct net_addr_ip6_sadr { + u8 type; + u8 dst_pxlen; + u16 length; + ip6_addr dst_prefix; + s32 src_pxlen; /* s32 to avoid padding */ + ip6_addr src_prefix; +} net_addr_ip6_sadr; + +typedef union net_addr_union { + net_addr n; + net_addr_ip4 ip4; + net_addr_ip6 ip6; + net_addr_vpn4 vpn4; + net_addr_vpn6 vpn6; + net_addr_roa4 roa4; + net_addr_roa6 roa6; + net_addr_flow4 flow4; + net_addr_flow6 flow6; + net_addr_ip6_sadr ip6_sadr; + net_addr_mpls mpls; +} net_addr_union; + + +extern const char * const net_label[]; +extern const u16 net_addr_length[]; +extern const u8 net_max_prefix_length[]; +extern const u16 net_max_text_length[]; + +#define NET_MAX_TEXT_LENGTH 256 + + +#define NET_ADDR_IP4(prefix,pxlen) \ + ((net_addr_ip4) { NET_IP4, pxlen, sizeof(net_addr_ip4), prefix }) + +#define NET_ADDR_IP6(prefix,pxlen) \ + ((net_addr_ip6) { NET_IP6, pxlen, sizeof(net_addr_ip6), prefix }) + +#define NET_ADDR_VPN4(prefix,pxlen,rd) \ + ((net_addr_vpn4) { NET_VPN4, pxlen, sizeof(net_addr_vpn4), prefix, rd }) + +#define NET_ADDR_VPN6(prefix,pxlen,rd) \ + ((net_addr_vpn6) { NET_VPN6, pxlen, sizeof(net_addr_vpn6), prefix, 0, rd }) + +#define NET_ADDR_ROA4(prefix,pxlen,max_pxlen,asn) \ + ((net_addr_roa4) { NET_ROA4, pxlen, sizeof(net_addr_roa4), prefix, max_pxlen, asn }) + +#define NET_ADDR_ROA6(prefix,pxlen,max_pxlen,asn) \ + ((net_addr_roa6) { NET_ROA6, pxlen, sizeof(net_addr_roa6), prefix, max_pxlen, asn }) + +#define NET_ADDR_FLOW4(prefix,pxlen,dlen) \ + ((net_addr_flow4) { NET_FLOW4, pxlen, sizeof(net_addr_ip4) + dlen, prefix }) + +#define NET_ADDR_FLOW6(prefix,pxlen,dlen) \ + ((net_addr_flow6) { NET_FLOW6, pxlen, sizeof(net_addr_ip6) + dlen, prefix }) + +#define NET_ADDR_IP6_SADR(dst_prefix,dst_pxlen,src_prefix,src_pxlen) \ + ((net_addr_ip6_sadr) { NET_IP6_SADR, dst_pxlen, sizeof(net_addr_ip6_sadr), dst_prefix, src_pxlen, src_prefix }) + +#define NET_ADDR_MPLS(label) \ + ((net_addr_mpls) { NET_MPLS, 20, sizeof(net_addr_mpls), label }) + + +static inline void net_fill_ip4(net_addr *a, ip4_addr prefix, uint pxlen) +{ *(net_addr_ip4 *)a = NET_ADDR_IP4(prefix, pxlen); } + +static inline void net_fill_ip6(net_addr *a, ip6_addr prefix, uint pxlen) +{ *(net_addr_ip6 *)a = NET_ADDR_IP6(prefix, pxlen); } + +static inline void net_fill_vpn4(net_addr *a, ip4_addr prefix, uint pxlen, u64 rd) +{ *(net_addr_vpn4 *)a = NET_ADDR_VPN4(prefix, pxlen, rd); } + +static inline void net_fill_vpn6(net_addr *a, ip6_addr prefix, uint pxlen, u64 rd) +{ *(net_addr_vpn6 *)a = NET_ADDR_VPN6(prefix, pxlen, rd); } + +static inline void net_fill_roa4(net_addr *a, ip4_addr prefix, uint pxlen, uint max_pxlen, u32 asn) +{ *(net_addr_roa4 *)a = NET_ADDR_ROA4(prefix, pxlen, max_pxlen, asn); } + +static inline void net_fill_roa6(net_addr *a, ip6_addr prefix, uint pxlen, uint max_pxlen, u32 asn) +{ *(net_addr_roa6 *)a = NET_ADDR_ROA6(prefix, pxlen, max_pxlen, asn); } + +static inline void net_fill_ip6_sadr(net_addr *a, ip6_addr dst_prefix, uint dst_pxlen, ip6_addr src_prefix, uint src_pxlen) +{ *(net_addr_ip6_sadr *)a = NET_ADDR_IP6_SADR(dst_prefix, dst_pxlen, src_prefix, src_pxlen); } + +static inline void net_fill_mpls(net_addr *a, u32 label) +{ *(net_addr_mpls *)a = NET_ADDR_MPLS(label); } + +static inline void net_fill_ipa(net_addr *a, ip_addr prefix, uint pxlen) +{ + if (ipa_is_ip4(prefix)) + net_fill_ip4(a, ipa_to_ip4(prefix), pxlen); + else + net_fill_ip6(a, ipa_to_ip6(prefix), pxlen); +} + +static inline void net_fill_ip_host(net_addr *a, ip_addr prefix) +{ + if (ipa_is_ip4(prefix)) + net_fill_ip4(a, ipa_to_ip4(prefix), IP4_MAX_PREFIX_LENGTH); + else + net_fill_ip6(a, ipa_to_ip6(prefix), IP6_MAX_PREFIX_LENGTH); +} + +static inline void net_fill_flow4(net_addr *a, ip4_addr prefix, uint pxlen, byte *data, uint dlen) +{ + net_addr_flow4 *f = (void *) a; + *f = NET_ADDR_FLOW4(prefix, pxlen, dlen); + memcpy(f->data, data, dlen); +} + +static inline void net_fill_flow6(net_addr *a, ip6_addr prefix, uint pxlen, byte *data, uint dlen) +{ + net_addr_flow6 *f = (void *) a; + *f = NET_ADDR_FLOW6(prefix, pxlen, dlen); + memcpy(f->data, data, dlen); +} + +/* Make NET_IP6_SADR from NET_IP6, assuming there is enough space */ +static inline void net_make_ip6_sadr(net_addr *a) +{ + net_addr_ip6_sadr *n = (void *) a; + n->type = NET_IP6_SADR; + n->length = sizeof(net_addr_ip6_sadr); + n->src_pxlen = 0; + n->src_prefix = IP6_NONE; +} + +static inline int net_val_match(u8 type, u32 mask) +{ return !!((1 << type) & mask); } + +static inline int net_type_match(const net_addr *a, u32 mask) +{ return net_val_match(a->type, mask); } + +static inline int net_is_ip(const net_addr *a) +{ return (a->type == NET_IP4) || (a->type == NET_IP6); } + +static inline int net_is_vpn(const net_addr *a) +{ return (a->type == NET_VPN4) || (a->type == NET_VPN6); } + +static inline int net_is_roa(const net_addr *a) +{ return (a->type == NET_ROA4) || (a->type == NET_ROA6); } + +static inline int net_is_flow(const net_addr *a) +{ return (a->type == NET_FLOW4) || (a->type == NET_FLOW6); } + + +static inline ip4_addr net4_prefix(const net_addr *a) +{ return ((net_addr_ip4 *) a)->prefix; } + +static inline ip6_addr net6_prefix(const net_addr *a) +{ return ((net_addr_ip6 *) a)->prefix; } + +static inline ip_addr net_prefix(const net_addr *a) +{ + switch (a->type) + { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + case NET_FLOW4: + return ipa_from_ip4(net4_prefix(a)); + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + case NET_FLOW6: + case NET_IP6_SADR: + return ipa_from_ip6(net6_prefix(a)); + + case NET_MPLS: + default: + return IPA_NONE; + } +} + +static inline u32 net_mpls(const net_addr *a) +{ + if (a->type == NET_MPLS) + return ((net_addr_mpls *) a)->label; + + bug("Can't call net_mpls on non-mpls net_addr"); +} + +static inline uint net4_pxlen(const net_addr *a) +{ return a->pxlen; } + +static inline uint net6_pxlen(const net_addr *a) +{ return a->pxlen; } + +static inline uint net_pxlen(const net_addr *a) +{ return a->pxlen; } + +ip_addr net_pxmask(const net_addr *a); + +static inline u64 net_rd(const net_addr *a) +{ + switch (a->type) + { + case NET_VPN4: + return ((net_addr_vpn4 *)a)->rd; + case NET_VPN6: + return ((net_addr_vpn6 *)a)->rd; + } + return 0; +} + + +static inline int net_equal(const net_addr *a, const net_addr *b) +{ return (a->length == b->length) && !memcmp(a, b, a->length); } + +static inline int net_equal_ip4(const net_addr_ip4 *a, const net_addr_ip4 *b) +{ return !memcmp(a, b, sizeof(net_addr_ip4)); } + +static inline int net_equal_ip6(const net_addr_ip6 *a, const net_addr_ip6 *b) +{ return !memcmp(a, b, sizeof(net_addr_ip6)); } + +static inline int net_equal_vpn4(const net_addr_vpn4 *a, const net_addr_vpn4 *b) +{ return !memcmp(a, b, sizeof(net_addr_vpn4)); } + +static inline int net_equal_vpn6(const net_addr_vpn6 *a, const net_addr_vpn6 *b) +{ return !memcmp(a, b, sizeof(net_addr_vpn6)); } + +static inline int net_equal_roa4(const net_addr_roa4 *a, const net_addr_roa4 *b) +{ return !memcmp(a, b, sizeof(net_addr_roa4)); } + +static inline int net_equal_roa6(const net_addr_roa6 *a, const net_addr_roa6 *b) +{ return !memcmp(a, b, sizeof(net_addr_roa6)); } + +static inline int net_equal_flow4(const net_addr_flow4 *a, const net_addr_flow4 *b) +{ return net_equal((const net_addr *) a, (const net_addr *) b); } + +static inline int net_equal_flow6(const net_addr_flow6 *a, const net_addr_flow6 *b) +{ return net_equal((const net_addr *) a, (const net_addr *) b); } + +static inline int net_equal_ip6_sadr(const net_addr_ip6_sadr *a, const net_addr_ip6_sadr *b) +{ return !memcmp(a, b, sizeof(net_addr_ip6_sadr)); } + +static inline int net_equal_mpls(const net_addr_mpls *a, const net_addr_mpls *b) +{ return !memcmp(a, b, sizeof(net_addr_mpls)); } + + +static inline int net_equal_prefix_roa4(const net_addr_roa4 *a, const net_addr_roa4 *b) +{ return ip4_equal(a->prefix, b->prefix) && (a->pxlen == b->pxlen); } + +static inline int net_equal_prefix_roa6(const net_addr_roa6 *a, const net_addr_roa6 *b) +{ return ip6_equal(a->prefix, b->prefix) && (a->pxlen == b->pxlen); } + +static inline int net_equal_dst_ip6_sadr(const net_addr_ip6_sadr *a, const net_addr_ip6_sadr *b) +{ return ip6_equal(a->dst_prefix, b->dst_prefix) && (a->dst_pxlen == b->dst_pxlen); } + +static inline int net_equal_src_ip6_sadr(const net_addr_ip6_sadr *a, const net_addr_ip6_sadr *b) +{ return ip6_equal(a->src_prefix, b->src_prefix) && (a->src_pxlen == b->src_pxlen); } + + +static inline int net_zero_ip4(const net_addr_ip4 *a) +{ return !a->pxlen && ip4_zero(a->prefix); } + +static inline int net_zero_ip6(const net_addr_ip6 *a) +{ return !a->pxlen && ip6_zero(a->prefix); } + +static inline int net_zero_vpn4(const net_addr_vpn4 *a) +{ return !a->pxlen && ip4_zero(a->prefix) && !a->rd; } + +static inline int net_zero_vpn6(const net_addr_vpn6 *a) +{ return !a->pxlen && ip6_zero(a->prefix) && !a->rd; } + +static inline int net_zero_roa4(const net_addr_roa4 *a) +{ return !a->pxlen && ip4_zero(a->prefix) && !a->max_pxlen && !a->asn; } + +static inline int net_zero_roa6(const net_addr_roa6 *a) +{ return !a->pxlen && ip6_zero(a->prefix) && !a->max_pxlen && !a->asn; } + +static inline int net_zero_flow4(const net_addr_flow4 *a) +{ return !a->pxlen && ip4_zero(a->prefix) && (a->length == sizeof(net_addr_flow4)); } + +static inline int net_zero_flow6(const net_addr_flow6 *a) +{ return !a->pxlen && ip6_zero(a->prefix) && (a->length == sizeof(net_addr_flow6)); } + +static inline int net_zero_mpls(const net_addr_mpls *a) +{ return !a->label; } + + +static inline int net_compare_ip4(const net_addr_ip4 *a, const net_addr_ip4 *b) +{ return ip4_compare(a->prefix, b->prefix) ?: uint_cmp(a->pxlen, b->pxlen); } + +static inline int net_compare_ip6(const net_addr_ip6 *a, const net_addr_ip6 *b) +{ return ip6_compare(a->prefix, b->prefix) ?: uint_cmp(a->pxlen, b->pxlen); } + +static inline int net_compare_vpn4(const net_addr_vpn4 *a, const net_addr_vpn4 *b) +{ return u64_cmp(a->rd, b->rd) ?: ip4_compare(a->prefix, b->prefix) ?: uint_cmp(a->pxlen, b->pxlen); } + +static inline int net_compare_vpn6(const net_addr_vpn6 *a, const net_addr_vpn6 *b) +{ return u64_cmp(a->rd, b->rd) ?: ip6_compare(a->prefix, b->prefix) ?: uint_cmp(a->pxlen, b->pxlen); } + +static inline int net_compare_roa4(const net_addr_roa4 *a, const net_addr_roa4 *b) +{ return ip4_compare(a->prefix, b->prefix) ?: uint_cmp(a->pxlen, b->pxlen) ?: uint_cmp(a->max_pxlen, b->max_pxlen) ?: uint_cmp(a->asn, b->asn); } + +static inline int net_compare_roa6(const net_addr_roa6 *a, const net_addr_roa6 *b) +{ return ip6_compare(a->prefix, b->prefix) ?: uint_cmp(a->pxlen, b->pxlen) ?: uint_cmp(a->max_pxlen, b->max_pxlen) ?: uint_cmp(a->asn, b->asn); } + +static inline int net_compare_flow4(const net_addr_flow4 *a, const net_addr_flow4 *b) +{ return ip4_compare(a->prefix, b->prefix) ?: uint_cmp(a->pxlen, b->pxlen) ?: uint_cmp(a->length, b->length) ?: memcmp(a->data, b->data, a->length - sizeof(net_addr_flow4)); } + +static inline int net_compare_flow6(const net_addr_flow6 *a, const net_addr_flow6 *b) +{ return ip6_compare(a->prefix, b->prefix) ?: uint_cmp(a->pxlen, b->pxlen) ?: uint_cmp(a->length, b->length) ?: memcmp(a->data, b->data, a->length - sizeof(net_addr_flow6)); } + +static inline int net_compare_ip6_sadr(const net_addr_ip6_sadr *a, const net_addr_ip6_sadr *b) +{ + return + ip6_compare(a->dst_prefix, b->dst_prefix) ?: uint_cmp(a->dst_pxlen, b->dst_pxlen) ?: + ip6_compare(a->src_prefix, b->src_prefix) ?: uint_cmp(a->src_pxlen, b->src_pxlen); +} + +static inline int net_compare_mpls(const net_addr_mpls *a, const net_addr_mpls *b) +{ return uint_cmp(a->label, b->label); } + +int net_compare(const net_addr *a, const net_addr *b); + + +static inline void net_copy(net_addr *dst, const net_addr *src) +{ memcpy(dst, src, src->length); } + +static inline void net_copy_ip4(net_addr_ip4 *dst, const net_addr_ip4 *src) +{ memcpy(dst, src, sizeof(net_addr_ip4)); } + +static inline void net_copy_ip6(net_addr_ip6 *dst, const net_addr_ip6 *src) +{ memcpy(dst, src, sizeof(net_addr_ip6)); } + +static inline void net_copy_vpn4(net_addr_vpn4 *dst, const net_addr_vpn4 *src) +{ memcpy(dst, src, sizeof(net_addr_vpn4)); } + +static inline void net_copy_vpn6(net_addr_vpn6 *dst, const net_addr_vpn6 *src) +{ memcpy(dst, src, sizeof(net_addr_vpn6)); } + +static inline void net_copy_roa4(net_addr_roa4 *dst, const net_addr_roa4 *src) +{ memcpy(dst, src, sizeof(net_addr_roa4)); } + +static inline void net_copy_roa6(net_addr_roa6 *dst, const net_addr_roa6 *src) +{ memcpy(dst, src, sizeof(net_addr_roa6)); } + +static inline void net_copy_flow4(net_addr_flow4 *dst, const net_addr_flow4 *src) +{ memcpy(dst, src, src->length); } + +static inline void net_copy_flow6(net_addr_flow6 *dst, const net_addr_flow6 *src) +{ memcpy(dst, src, src->length); } + +static inline void net_copy_ip6_sadr(net_addr_ip6_sadr *dst, const net_addr_ip6_sadr *src) +{ memcpy(dst, src, sizeof(net_addr_ip6_sadr)); } + +static inline void net_copy_mpls(net_addr_mpls *dst, const net_addr_mpls *src) +{ memcpy(dst, src, sizeof(net_addr_mpls)); } + + +/* XXXX */ +static inline u32 u64_hash(u64 a) +{ return u32_hash(a); } + +static inline u32 net_hash_ip4(const net_addr_ip4 *n) +{ return ip4_hash(n->prefix) ^ ((u32) n->pxlen << 26); } + +static inline u32 net_hash_ip6(const net_addr_ip6 *n) +{ return ip6_hash(n->prefix) ^ ((u32) n->pxlen << 26); } + +static inline u32 net_hash_vpn4(const net_addr_vpn4 *n) +{ return ip4_hash(n->prefix) ^ ((u32) n->pxlen << 26) ^ u64_hash(n->rd); } + +static inline u32 net_hash_vpn6(const net_addr_vpn6 *n) +{ return ip6_hash(n->prefix) ^ ((u32) n->pxlen << 26) ^ u64_hash(n->rd); } + +static inline u32 net_hash_roa4(const net_addr_roa4 *n) +{ return ip4_hash(n->prefix) ^ ((u32) n->pxlen << 26); } + +static inline u32 net_hash_roa6(const net_addr_roa6 *n) +{ return ip6_hash(n->prefix) ^ ((u32) n->pxlen << 26); } + +static inline u32 net_hash_flow4(const net_addr_flow4 *n) +{ return ip4_hash(n->prefix) ^ ((u32) n->pxlen << 26); } + +static inline u32 net_hash_flow6(const net_addr_flow6 *n) +{ return ip6_hash(n->prefix) ^ ((u32) n->pxlen << 26); } + +static inline u32 net_hash_ip6_sadr(const net_addr_ip6_sadr *n) +{ return net_hash_ip6((net_addr_ip6 *) n); } + +static inline u32 net_hash_mpls(const net_addr_mpls *n) +{ return n->label; } + +u32 net_hash(const net_addr *a); + + +static inline int net_validate_px4(const ip4_addr prefix, uint pxlen) +{ + return (pxlen <= IP4_MAX_PREFIX_LENGTH) && + ip4_zero(ip4_and(prefix, ip4_not(ip4_mkmask(pxlen)))); +} + +static inline int net_validate_px6(const ip6_addr prefix, uint pxlen) +{ + return (pxlen <= IP6_MAX_PREFIX_LENGTH) && + ip6_zero(ip6_and(prefix, ip6_not(ip6_mkmask(pxlen)))); +} + +static inline int net_validate_ip4(const net_addr_ip4 *n) +{ return net_validate_px4(n->prefix, n->pxlen); } + +static inline int net_validate_ip6(const net_addr_ip6 *n) +{ return net_validate_px6(n->prefix, n->pxlen); } + +static inline int net_validate_vpn4(const net_addr_vpn4 *n) +{ return net_validate_px4(n->prefix, n->pxlen); } + +static inline int net_validate_vpn6(const net_addr_vpn6 *n) +{ return net_validate_px6(n->prefix, n->pxlen); } + +static inline int net_validate_roa4(const net_addr_roa4 *n) +{ + return net_validate_px4(n->prefix, n->pxlen) && + (n->pxlen <= n->max_pxlen) && (n->max_pxlen <= IP4_MAX_PREFIX_LENGTH); +} + +static inline int net_validate_roa6(const net_addr_roa6 *n) +{ + return net_validate_px6(n->prefix, n->pxlen) && + (n->pxlen <= n->max_pxlen) && (n->max_pxlen <= IP6_MAX_PREFIX_LENGTH); +} + +// FIXME: Better check, call flow_validate? +static inline int net_validate_flow4(const net_addr_flow4 *n) +{ return net_validate_px4(n->prefix, n->pxlen); } + +static inline int net_validate_flow6(const net_addr_flow6 *n) +{ return net_validate_px6(n->prefix, n->pxlen); } + +static inline int net_validate_mpls(const net_addr_mpls *n) +{ return n->label < (1 << 20); } + +static inline int net_validate_ip6_sadr(const net_addr_ip6_sadr *n) +{ return net_validate_px6(n->dst_prefix, n->dst_pxlen) && net_validate_px6(n->src_prefix, n->src_pxlen); } + +int net_validate(const net_addr *N); + + +static inline void net_normalize_ip4(net_addr_ip4 *n) +{ n->prefix = ip4_and(n->prefix, ip4_mkmask(n->pxlen)); } + +static inline void net_normalize_ip6(net_addr_ip6 *n) +{ n->prefix = ip6_and(n->prefix, ip6_mkmask(n->pxlen)); } + +static inline void net_normalize_vpn4(net_addr_vpn4 *n) +{ net_normalize_ip4((net_addr_ip4 *) n); } + +static inline void net_normalize_vpn6(net_addr_vpn6 *n) +{ net_normalize_ip6((net_addr_ip6 *) n); } + +static inline void net_normalize_ip6_sadr(net_addr_ip6_sadr *n) +{ + n->dst_prefix = ip6_and(n->dst_prefix, ip6_mkmask(n->dst_pxlen)); + n->src_prefix = ip6_and(n->src_prefix, ip6_mkmask(n->src_pxlen)); +} + +void net_normalize(net_addr *N); + + +int net_classify(const net_addr *N); +int net_format(const net_addr *N, char *buf, int buflen); +int rd_format(const u64 rd, char *buf, int buflen); + +static inline int ipa_in_px4(ip4_addr a, ip4_addr prefix, uint pxlen) +{ return ip4_zero(ip4_and(ip4_xor(a, prefix), ip4_mkmask(pxlen))); } + +static inline int ipa_in_px6(ip6_addr a, ip6_addr prefix, uint pxlen) +{ return ip6_zero(ip6_and(ip6_xor(a, prefix), ip6_mkmask(pxlen))); } + +static inline int ipa_in_net_ip4(ip4_addr a, const net_addr_ip4 *n) +{ return ipa_in_px4(a, n->prefix, n->pxlen); } + +static inline int ipa_in_net_ip6(ip6_addr a, const net_addr_ip6 *n) +{ return ipa_in_px6(a, n->prefix, n->pxlen); } + +static inline int net_in_net_ip4(const net_addr_ip4 *a, const net_addr_ip4 *b) +{ return (a->pxlen >= b->pxlen) && ipa_in_px4(a->prefix, b->prefix, b->pxlen); } + +static inline int net_in_net_ip6(const net_addr_ip6 *a, const net_addr_ip6 *b) +{ return (a->pxlen >= b->pxlen) && ipa_in_px6(a->prefix, b->prefix, b->pxlen); } + +static inline int net_in_net_dst_ip6_sadr(const net_addr_ip6_sadr *a, const net_addr_ip6_sadr *b) +{ return (a->dst_pxlen >= b->dst_pxlen) && ipa_in_px6(a->dst_prefix, b->dst_prefix, b->dst_pxlen); } + +static inline int net_in_net_src_ip6_sadr(const net_addr_ip6_sadr *a, const net_addr_ip6_sadr *b) +{ return (a->src_pxlen >= b->src_pxlen) && ipa_in_px6(a->src_prefix, b->src_prefix, b->src_pxlen); } + +int ipa_in_netX(const ip_addr A, const net_addr *N); +int net_in_netX(const net_addr *A, const net_addr *N); + +void net_init(void); + +#endif diff --git a/lib/patmatch_test.c b/lib/patmatch_test.c new file mode 100644 index 00000000..d65f316f --- /dev/null +++ b/lib/patmatch_test.c @@ -0,0 +1,149 @@ +/* + * BIRD Library -- Pattern Matching Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" + +#include "nest/bird.h" +#include "lib/string.h" + +#define MATCH (int) { 1 } +#define NOMATCH (int) { 0 } + +struct match_pair { + byte *pattern; + byte *data; +}; + +static int +test_matching(void *out_, const void *in_, const void *expected_out_) +{ + int *out = out_; + const struct match_pair *in = in_; + const int *expected_out = expected_out_; + + *out = patmatch(in->pattern, in->data); + + return *out == *expected_out; +} + +static void +fmt_match_pair(char *buf, size_t size, const void *data) +{ + const struct match_pair *mp = data; + snprintf(buf, size, "pattern: '%s', subject: '%s'", mp->pattern, mp->data); +} + +static void +fmt_match_result(char *buf, size_t size, const void *data) +{ + const int *result = data; + snprintf(buf, size, *result ? "match" : "no-match"); +} + +static int +t_matching(void) +{ + struct bt_pair test_vectors[] = { + { + .in = & (struct match_pair) { + .pattern = "", + .data = "", + }, + .out = & MATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "*", + .data = "", + }, + .out = & MATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "\\*", + .data = "*", + }, + .out = & MATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "\\*", + .data = "a", + }, + .out = & NOMATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "?", + .data = "", + }, + .out = & NOMATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "abcdefghijklmnopqrstuvwxyz", + .data = "abcdefghijklmnopqrstuvwxyz", + }, + .out = & MATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "??????????????????????????", + .data = "abcdefghijklmnopqrstuvwxyz", + }, + .out = & MATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "*abcdefghijklmnopqrstuvwxyz*", + .data = "abcdefghijklmnopqrstuvwxyz", + }, + .out = & MATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "ab?defg*jklmnop*stu*wxy*z", + .data = "abcdefghijklmnopqrstuvwxyz", + }, + .out = & MATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "abcdefghijklmnopqrstuvwxyz", + .data = "abcdefghijklmnopqrtuvwxyz", + }, + .out = & NOMATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "abcdefghijklmnopqr?uvwxyz", + .data = "abcdefghijklmnopqrstuvwxyz", + }, + .out = & NOMATCH, + }, + { + .in = & (struct match_pair) { + .pattern = "aa*aaaaa?aaaaaaaaaaaaaaaaaaa", + .data = "aaaaaaaaaaaaaaaaaaaaaaaaaa", + }, + .out = & NOMATCH, + }, + }; + + return bt_assert_batch(test_vectors, test_matching, fmt_match_pair, fmt_match_result); +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_matching, "Pattern matching"); + + return bt_exit_value(); +} diff --git a/lib/printf.c b/lib/printf.c index 4fd75c9b..533a1300 100644 --- a/lib/printf.c +++ b/lib/printf.c @@ -118,16 +118,17 @@ static char * number(char * str, long num, int base, int size, int precision, * @fmt: format string * @args: a list of arguments to be formatted * - * This functions acts like ordinary sprintf() except that it checks - * available space to avoid buffer overflows and it allows some more - * format specifiers: |%I| for formatting of IP addresses (any non-zero - * width is automatically replaced by standard IP address width which - * depends on whether we use IPv4 or IPv6; |%#I| gives hexadecimal format), - * |%R| for Router / Network ID (u32 value printed as IPv4 address) - * |%lR| for 64bit Router / Network ID (u64 value printed as eight :-separated octets) - * and |%m| resp. |%M| for error messages (uses strerror() to translate @errno code to - * message text). On the other hand, it doesn't support floating - * point numbers. + * This functions acts like ordinary sprintf() except that it checks available + * space to avoid buffer overflows and it allows some more format specifiers: + * |%I| for formatting of IP addresses (width of 1 is automatically replaced by + * standard IP address width which depends on whether we use IPv4 or IPv6; |%I4| + * or |%I6| can be used for explicit ip4_addr / ip6_addr arguments, |%N| for + * generic network addresses (net_addr *), |%R| for Router / Network ID (u32 + * value printed as IPv4 address), |%lR| for 64bit Router / Network ID (u64 + * value printed as eight :-separated octets), |%t| for time values (btime) with + * specified subsecond precision, and |%m| resp. |%M| for error messages (uses + * strerror() to translate @errno code to message text). On the other hand, it + * doesn't support floating point numbers. * * Result: number of characters of the output string or -1 if * the buffer space was insufficient. @@ -139,9 +140,11 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) int i, base; u32 x; u64 X; + btime t; + s64 t1, t2; char *str, *start; const char *s; - char ipbuf[MAX(STD_ADDRESS_P_LENGTH,ROUTER_ID_64_LENGTH)+1]; + char ipbuf[NET_MAX_TEXT_LENGTH+1]; struct iface *iface; int flags; /* flags to number() */ @@ -158,7 +161,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) *str++ = *fmt; continue; } - + /* process flags */ flags = 0; repeat: @@ -170,7 +173,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) case '#': flags |= SPECIAL; goto repeat; case '0': flags |= ZEROPAD; goto repeat; } - + /* get field width */ field_width = -1; if (is_digit(*fmt)) @@ -188,7 +191,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) /* get the precision */ precision = -1; if (*fmt == '.') { - ++fmt; + ++fmt; if (is_digit(*fmt)) precision = skip_atoi(&fmt); else if (*fmt == '*') { @@ -238,6 +241,14 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) case 'M': s = strerror(va_arg(args, int)); goto str; + case 'N': { + net_addr *n = va_arg(args, net_addr *); + if (field_width == 1) + field_width = net_max_text_length[n->type]; + net_format(n, ipbuf, sizeof(ipbuf)); + s = ipbuf; + goto str; + } case 's': s = va_arg(args, char *); if (!s) @@ -271,7 +282,6 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) return -1; continue; - case 'n': if (qualifier == 'l') { long * ip = va_arg(args, long *); @@ -284,14 +294,35 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) /* IP address */ case 'I': - if (flags & SPECIAL) - ipa_ntox(va_arg(args, ip_addr), ipbuf); - else { - ipa_ntop(va_arg(args, ip_addr), ipbuf); - if (field_width == 1) - field_width = STD_ADDRESS_P_LENGTH; + if (fmt[1] == '4') { + /* Explicit IPv4 address */ + ip4_addr a = va_arg(args, ip4_addr); + ip4_ntop(a, ipbuf); + i = IP4_MAX_TEXT_LENGTH; + fmt++; + } else if (fmt[1] == '6') { + /* Explicit IPv6 address */ + ip6_addr a = va_arg(args, ip6_addr); + ip6_ntop(a, ipbuf); + i = IP6_MAX_TEXT_LENGTH; + fmt++; + } else { + /* Just IP address */ + ip_addr a = va_arg(args, ip_addr); + + if (ipa_is_ip4(a)) { + ip4_ntop(ipa_to_ip4(a), ipbuf); + i = IP4_MAX_TEXT_LENGTH; + } else { + ip6_ntop(ipa_to_ip6(a), ipbuf); + i = IP6_MAX_TEXT_LENGTH; + } } + s = ipbuf; + if (field_width == 1) + field_width = i; + goto str; /* Interface scope after link-local IP address */ @@ -311,7 +342,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) /* Router/Network ID - essentially IPv4 address in u32 value */ case 'R': - if(qualifier == 'l') { + if (qualifier == 'l') { X = va_arg(args, u64); bsprintf(ipbuf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", ((X >> 56) & 0xff), @@ -326,15 +357,55 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) else { x = va_arg(args, u32); - bsprintf(ipbuf, "%d.%d.%d.%d", - ((x >> 24) & 0xff), - ((x >> 16) & 0xff), - ((x >> 8) & 0xff), - (x & 0xff)); + ip4_ntop(ip4_from_u32(x), ipbuf); } s = ipbuf; goto str; + case 't': + t = va_arg(args, btime); + t1 = t TO_S; + t2 = t - t1 S; + + if (precision < 0) + precision = 3; + + if (precision > 6) + precision = 6; + + /* Compute field_width for second part */ + if ((precision > 0) && (field_width > 0)) + field_width -= (1 + precision); + + if (field_width < 0) + field_width = 0; + + /* Print seconds */ + flags |= SIGN; + str = number(str, t1, 10, field_width, 0, flags, size); + if (!str) + return -1; + + if (precision > 0) + { + size -= (str-start); + start = str; + + if ((1 + precision) > size) + return -1; + + /* Convert microseconds to requested precision */ + for (i = precision; i < 6; i++) + t2 /= 10; + + /* Print sub-seconds */ + *str++ = '.'; + str = number(str, t2, 10, precision, 0, ZEROPAD, size - 1); + if (!str) + return -1; + } + goto done; + /* integer number formats - set up the flags and "break" */ case 'o': base = 8; @@ -376,6 +447,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) str = number(str, num, base, field_width, precision, flags, size); if (!str) return -1; + done: ; } if (!size) return -1; @@ -442,6 +514,10 @@ int buffer_vprint(buffer *buf, const char *fmt, va_list args) { int i = bvsnprintf((char *) buf->pos, buf->end - buf->pos, fmt, args); + + if ((i < 0) && (buf->pos < buf->end)) + *buf->pos = 0; + buf->pos = (i >= 0) ? (buf->pos + i) : buf->end; return i; } @@ -453,9 +529,12 @@ buffer_print(buffer *buf, const char *fmt, ...) int i; va_start(args, fmt); - i=bvsnprintf((char *) buf->pos, buf->end - buf->pos, fmt, args); + i = bvsnprintf((char *) buf->pos, buf->end - buf->pos, fmt, args); va_end(args); + if ((i < 0) && (buf->pos < buf->end)) + *buf->pos = 0; + buf->pos = (i >= 0) ? (buf->pos + i) : buf->end; return i; } @@ -464,13 +543,13 @@ void buffer_puts(buffer *buf, const char *str) { byte *bp = buf->pos; - byte *be = buf->end; + byte *be = buf->end - 1; while (bp < be && *str) *bp++ = *str++; - if (bp < be) + if (bp <= be) *bp = 0; - buf->pos = bp; + buf->pos = (bp < be) ? bp : buf->end; } diff --git a/lib/printf_test.c b/lib/printf_test.c new file mode 100644 index 00000000..a2683d93 --- /dev/null +++ b/lib/printf_test.c @@ -0,0 +1,79 @@ +/* + * BIRD Library -- String Functions Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" + +#include "lib/string.h" + +#define BSPRINTF(nw, res, buf, fmt, ...) \ + do { \ + int n = bsprintf(buf, fmt, ##__VA_ARGS__); \ + bt_assert_msg(n == nw, "fmt=\"%s\" returns n=%d, want %d", fmt, n, nw); \ + bt_assert_msg(buf[n] == 0, "fmt=\"%s\" buf[%d] should be \'\\0\', found 0x%02x", fmt, n, buf[n]); \ + bt_assert_msg(memcmp(buf, res, nw) == 0, "fmt=\"%s\" writes \"%*s\", want \"%*s\"", fmt, (n < nw ? n : nw), buf, nw, res); \ + } while (0) + +static int +t_simple(void) +{ + char buf[256]; + memset(buf, 0xa5, 256); + + BSPRINTF(0, "", buf, "", NULL); + BSPRINTF(1, "%", buf, "%%", NULL); + BSPRINTF(2, "%%", buf, "%%%%", NULL); + + BSPRINTF(1, "\x00", buf, "%c", 0); + BSPRINTF(1, "@", buf, "@", 64); + BSPRINTF(1, "\xff", buf, "%c", 0xff); + + errno = 5; + BSPRINTF(18, "Input/output error", buf, "%m"); + errno = 0; + + BSPRINTF(18, "Input/output error", buf, "%M", 5); + + BSPRINTF(11, "TeSt%StRiNg", buf, "%s", "TeSt%StRiNg"); + + if (sizeof(void *) == 4) + BSPRINTF(8, "1a15600d", buf, "%p", (void *) 0x1a15600d); + else + BSPRINTF(16, "00000fee1a15600d", buf, "%p", (void *) 0xfee1a15600d); + + long ln = 0; + BSPRINTF(10, "TeStStRiNg", buf, "TeStS%lntRiNg", &ln); + bt_assert_msg(ln == 5, "fmt=\"TeStS%%lntRiNg\", &ln makes ln=%ld, want 5", ln); + + BSPRINTF(2, "%d", buf, "%%d", 1); + BSPRINTF(1, "1", buf, "%d", 1); + BSPRINTF(2, "+1", buf, "%+d", 1); + BSPRINTF(2, " 1", buf, "% d", 1); + BSPRINTF(2, "-1", buf, "%d", -1); + BSPRINTF(11, "-2147483648", buf, "%d", -2147483648); + + BSPRINTF(7, "123.456", buf, "%t", (btime) 123456789); + BSPRINTF(7, "123.456", buf, "%2t", (btime) 123456789); + BSPRINTF(8, " 123.456", buf, "%8t", (btime) 123456789); + BSPRINTF(4, " 123", buf, "%4.0t", (btime) 123456789); + BSPRINTF(8, "123.4567", buf, "%8.4t", (btime) 123456789); + BSPRINTF(9, "0123.4567", buf, "%09.4t", (btime) 123456789); + BSPRINTF(12, " 123.456789", buf, "%12.10t", (btime) 123456789); + BSPRINTF(8, " 123.004", buf, "%8t", (btime) 123004 MS); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_simple, "printf without varargs"); + + return bt_exit_value(); +} diff --git a/lib/resource.c b/lib/resource.c index 68718dfb..ab8c800f 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -31,7 +31,7 @@ struct pool { resource r; list inside; - char *name; + const char *name; }; static void pool_dump(resource *); @@ -61,7 +61,7 @@ static int indent; * parent pool. */ pool * -rp_new(pool *p, char *name) +rp_new(pool *p, const char *name) { pool *z = ralloc(p, &pool_class); z->name = name; diff --git a/lib/resource.h b/lib/resource.h index 1a62d389..d9d4bb8f 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -37,7 +37,7 @@ struct resclass { typedef struct pool pool; void resource_init(void); -pool *rp_new(pool *, char *); /* Create new pool */ +pool *rp_new(pool *, const char *); /* Create new pool */ void rfree(void *); /* Free single resource */ void rdump(void *); /* Dump to debug output */ size_t rmemsize(void *res); /* Return size of memory used by the resource */ @@ -59,11 +59,23 @@ void mb_free(void *); typedef struct linpool linpool; +typedef struct lp_state { + void *current, *large; + byte *ptr; +} lp_state; + linpool *lp_new(pool *, unsigned blk); void *lp_alloc(linpool *, unsigned size); /* Aligned */ void *lp_allocu(linpool *, unsigned size); /* Unaligned */ void *lp_allocz(linpool *, unsigned size); /* With clear */ void lp_flush(linpool *); /* Free everything, but leave linpool */ +void lp_save(linpool *m, lp_state *p); /* Save state */ +void lp_restore(linpool *m, lp_state *p); /* Restore state */ + +extern const int lp_chunk_size; +#define LP_GAS 1024 +#define LP_GOOD_SIZE(x) (((x + LP_GAS - 1) & (~(LP_GAS - 1))) - lp_chunk_size) +#define lp_new_default(p) lp_new(p, LP_GOOD_SIZE(LP_GAS*4)) /* Slabs */ diff --git a/lib/slist_test.c b/lib/slist_test.c new file mode 100644 index 00000000..069e361c --- /dev/null +++ b/lib/slist_test.c @@ -0,0 +1,384 @@ +/* + * BIRD Library -- Safe Linked Lists Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" + +#include "lib/slists.h" + +#define MAX_NUM 1000 + +static snode nodes[MAX_NUM]; +static slist lst; + +static void +show_list(void) +{ + bt_debug("\n"); + bt_debug("list.null is at %p and point to %p \n", &lst.null, lst.null); + bt_debug("list.head is at %p and point to %p \n", &lst.head, lst.head); + bt_debug("list.tail is at %p and point to %p \n", &lst.tail, lst.tail); + bt_debug("list.tail_readers is at %p and point to %p \n", &lst.tail_readers, lst.tail_readers); + + int i; + for (i = 0; i < MAX_NUM; i++) + bt_debug("n[%3i] is at %p, .prev (%p) points to %p, .next (%p) points to %p, .readers (%p) points to %p \n", + i, &nodes[i], &(nodes[i].prev), nodes[i].prev, &(nodes[i].next), nodes[i].next, &(nodes[i].readers), nodes[i].readers); +} + +static int +is_filled_list_well_linked(void) +{ + int i; + bt_assert(lst.head == &nodes[0]); + bt_assert(lst.tail == &nodes[MAX_NUM-1]); + bt_assert((void *) nodes[0].prev == (void *) &lst.head); + bt_assert((void *) nodes[MAX_NUM-1].next == (void *) &lst.null); + + for (i = 0; i < MAX_NUM; i++) + { + if (i < (MAX_NUM-1)) + bt_assert(nodes[i].next == &nodes[i+1]); + + if (i > 0) + bt_assert(nodes[i].prev == &nodes[i-1]); + } + + return 1; +} + +static int +is_empty_list_well_unlinked(void) +{ + bt_assert(lst.head == SNODE &lst.null); + bt_assert(lst.tail == SNODE &lst.head); + + bt_assert(EMPTY_SLIST(lst)); + + return 1; +} + +static void +init_list__(slist *l, struct snode nodes[]) +{ + s_init_list(l); + + int i; + for (i = 0; i < MAX_NUM; i++) + { + nodes[i].next = NULL; + nodes[i].prev = NULL; + } +} + +static void +init_list_(void) +{ + init_list__(&lst, nodes); +} + +static int +t_add_tail(void) +{ + int i; + + init_list_(); + for (i = 0; i < MAX_NUM; i++) + { + s_add_tail(&lst, &nodes[i]); + bt_debug("."); + bt_assert(lst.tail == &nodes[i]); + bt_assert(lst.head == &nodes[0]); + bt_assert((void *) nodes[i].next == (void *) &lst.null); + if (i > 0) + { + bt_assert(nodes[i-1].next == &nodes[i]); + bt_assert(nodes[i].prev == &nodes[i-1]); + } + } + + bt_assert(is_filled_list_well_linked()); + + return 1; +} + +static int +t_add_head(void) +{ + int i; + + init_list_(); + for (i = MAX_NUM-1; i >= 0; i--) + { + s_add_head(&lst, &nodes[i]); + bt_debug("."); + bt_assert(lst.head == &nodes[i]); + bt_assert(lst.tail == &nodes[MAX_NUM-1]); + if (i < MAX_NUM-1) + { + bt_assert(nodes[i+1].prev == &nodes[i]); + bt_assert(nodes[i].next == &nodes[i+1]); + } + } + + bt_assert(is_filled_list_well_linked()); + + return 1; +} + +static void +insert_node_(snode *n, snode *after) +{ + s_insert_node(n, after); + bt_debug("."); +} + +static int +t_insert_node(void) +{ + int i; + + init_list_(); + + // add first node + insert_node_(&nodes[0], SNODE &lst.head); + + // add odd nodes + for (i = 2; i < MAX_NUM; i+=2) + insert_node_(&nodes[i], &nodes[i-2]); + + // add even nodes + for (i = 1; i < MAX_NUM; i+=2) + insert_node_(&nodes[i], &nodes[i-1]); + + bt_debug("\n"); + bt_assert(is_filled_list_well_linked()); + + return 1; +} + +static void +fill_list2(slist *l, snode nodes[]) +{ + int i; + for (i = 0; i < MAX_NUM; i++) + s_add_tail(l, &nodes[i]); +} + +static void +fill_list(void) +{ + fill_list2(&lst, SNODE nodes); +} + + +static int +t_remove_node(void) +{ + int i; + + init_list_(); + + /* Fill & Remove & Check */ + fill_list(); + for (i = 0; i < MAX_NUM; i++) + s_rem_node(&nodes[i]); + bt_assert(is_empty_list_well_unlinked()); + + /* Fill & Remove the half of nodes & Check & Remove the rest nodes & Check */ + fill_list(); + for (i = 0; i < MAX_NUM; i+=2) + s_rem_node(&nodes[i]); + + int tail_node_index = (MAX_NUM % 2) ? MAX_NUM - 2 : MAX_NUM - 1; + bt_assert(lst.head == &nodes[1]); + bt_assert(lst.tail == &nodes[tail_node_index]); + bt_assert(nodes[tail_node_index].next == SNODE &lst.null); + + for (i = 1; i < MAX_NUM; i+=2) + { + if (i > 1) + bt_assert(nodes[i].prev == &nodes[i-2]); + if (i < tail_node_index) + bt_assert(nodes[i].next == &nodes[i+2]); + } + + for (i = 1; i < MAX_NUM; i+=2) + s_rem_node(&nodes[i]); + bt_assert(is_empty_list_well_unlinked()); + + return 1; +} + +static int +t_add_tail_list(void) +{ + snode nodes2[MAX_NUM]; + slist l2; + + init_list__(&lst, SNODE &nodes); + fill_list2(&lst, SNODE &nodes); + + init_list__(&l2, SNODE &nodes2); + fill_list2(&l2, SNODE &nodes2); + + s_add_tail_list(&lst, &l2); + + bt_assert(nodes[MAX_NUM-1].next == &nodes2[0]); + bt_assert(nodes2[0].prev == &nodes[MAX_NUM-1]); + bt_assert(lst.tail == &nodes2[MAX_NUM-1]); + + return 1; +} + +void +dump(const char *str, slist *a) +{ + snode *x; + + bt_debug("%s \n", str); + for (x = SHEAD(*a); x; x = x->next) + { + siterator *i, *j; + bt_debug("%p", x); + j = (siterator *) x; + for (i = x->readers; i; i = i->next) + { + if (i->prev != j) + bt_debug(" ???"); + j = i; + bt_debug(" [%p:%p]", i, i->node); + } + bt_debug("\n"); + } + bt_debug("---\n"); +} + +static int +t_iterator_walk(void) +{ + snode *node; + siterator iter; + + init_list_(); + fill_list(); + + int k; + int i = 0; + + show_list(); + + s_init(&iter, &lst); + WALK_SLIST(node, lst) + { + s_get(&iter); + s_put(&iter, node); + bt_debug("node->readers: %p, iter: %p, nodes[%d].readers: %p, node: %p, nodes[i]: %p, node->next: %p \n", + node->readers, &iter, i, nodes[i].readers, node, &(nodes[i]), node->next); + bt_assert(node->readers == &iter); + bt_assert(node->readers == nodes[i].readers); + bt_assert(node == &(nodes[i])); + for (k = 0; k < MAX_NUM; k++) + if (k != i) + bt_assert(nodes[k].readers == NULL); + + dump("",&lst); + i++; + } + + return 1; +} + +static int +t_original(void) +{ + slist a, b; + snode *x, *y; + siterator i, j; + + s_init_list(&a); + s_init_list(&b); + x = xmalloc(sizeof(*x)); + s_add_tail(&a, x); + x = xmalloc(sizeof(*x)); + s_add_tail(&a, x); + x = xmalloc(sizeof(*x)); + s_add_tail(&a, x); + dump("1", &a); + + s_init(&i, &a); + s_init(&j, &a); + dump("2", &a); + + x = s_get(&i); + bt_debug("Got %p\n", x); + dump("3", &a); + + s_put(&i, x->next); + dump("4", &a); + + y = s_get(&j); + while (y) + { + s_put(&j, y); + dump("5*", &a); + y = s_get(&j)->next; + } + + dump("5 done", &a); + + s_rem_node(a.head->next); + dump("6 (deletion)", &a); + + s_put(&i, s_get(&i)->next); + dump("6 (relink)", &a); + + x = xmalloc(sizeof(*x)); + s_add_tail(&b, x); + dump("7 (second list)", &b); + + s_add_tail_list(&b, &a); + dump("8 (after merge)", &b); + + return 1; +} + +static int +t_safe_del_walk(void) +{ + init_list_(); + fill_list(); + + show_list(); + + snode *node, *node_next; + WALK_SLIST_DELSAFE(node,node_next, lst) + { + bt_debug("Will remove node %p \n", node); + s_rem_node(SNODE node); + } + bt_assert(is_empty_list_well_unlinked()); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_add_tail, "Adding nodes to tail of list"); + bt_test_suite(t_add_head, "Adding nodes to head of list"); + bt_test_suite(t_insert_node, "Inserting nodes to list"); + bt_test_suite(t_remove_node, "Removing nodes from list"); + bt_test_suite(t_add_tail_list, "At the tail of a list adding the another list"); + bt_test_suite(t_iterator_walk, "Iterator walk"); + bt_test_suite(t_safe_del_walk, "WALK_SLIST_DELSAFE and s_rem_node all nodes"); + bt_test_suite(t_original, "The original BIRD test suit for SLIST"); + + return bt_exit_value(); +} diff --git a/lib/slists.c b/lib/slists.c index 6e0df39e..00f3c84f 100644 --- a/lib/slists.c +++ b/lib/slists.c @@ -150,85 +150,3 @@ s_add_tail_list(slist *to, slist *l) to->tail = q; s_merge((snode *) &l->null, (snode *) &to->null); } - -#ifdef TEST - -#include "lib/resource.h" -#include <stdio.h> - -void dump(char *c, slist *a) -{ - snode *x; - - puts(c); - for(x=SHEAD(*a); x; x=x->next) - { - siterator *i, *j; - printf("%p", x); - j = (siterator *) x; - for(i=x->readers; i; i=i->next) - { - if (i->prev != j) - printf(" ???"); - j = i; - printf(" [%p:%p]", i, i->node); - } - putchar('\n'); - } - puts("---"); -} - -int main(void) -{ - slist a, b; - snode *x, *y; - siterator i, j; - - s_init_list(&a); - s_init_list(&b); - x = xmalloc(sizeof(*x)); - s_add_tail(&a, x); - x = xmalloc(sizeof(*x)); - s_add_tail(&a, x); - x = xmalloc(sizeof(*x)); - s_add_tail(&a, x); - dump("1", &a); - - s_init(&i, &a); - s_init(&j, &a); - dump("2", &a); - - x = s_get(&i); - printf("Got %p\n", x); - dump("3", &a); - - s_put(&i, x->next); - dump("4", &a); - - y = s_get(&j); - while (y) - { - s_put(&j, y); - dump("5*", &a); - y = s_get(&j)->next; - } - - dump("5 done", &a); - - s_rem_node(a.head->next); - dump("6 (deletion)", &a); - - s_put(&i, s_get(&i)->next); - dump("6 (relink)", &a); - - x = xmalloc(sizeof(*x)); - s_add_tail(&b, x); - dump("7 (second list)", &b); - - s_add_tail_list(&b, &a); - dump("8 (after merge)", &b); - - return 0; -} - -#endif diff --git a/lib/socket.h b/lib/socket.h index 0769489b..e53ec5ba 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -10,16 +10,40 @@ #define _BIRD_SOCKET_H_ #include <errno.h> -// #include <sys/socket.h> #include "lib/resource.h" +#ifdef HAVE_LIBSSH +#define LIBSSH_LEGACY_0_4 +#include <libssh/libssh.h> +#endif + +#ifdef HAVE_LIBSSH +struct ssh_sock { + const char *username; /* (Required) SSH user name */ + const char *server_hostkey_path; /* (Optional) Filepath to the SSH public key of remote side, can be knownhost file */ + const char *client_privkey_path; /* (Optional) Filepath to the SSH private key of BIRD */ + const char *subsystem; /* (Optional) Name of SSH subsytem */ + ssh_session session; /* Internal */ + ssh_channel channel; /* Internal */ + int state; /* Internal */ +#define SK_SSH_CONNECT 0 /* Start state */ +#define SK_SSH_SERVER_KNOWN 1 /* Internal */ +#define SK_SSH_USERAUTH 2 /* Internal */ +#define SK_SSH_CHANNEL 3 /* Internal */ +#define SK_SSH_SESSION 4 /* Internal */ +#define SK_SSH_SUBSYSTEM 5 /* Internal */ +#define SK_SSH_ESTABLISHED 6 /* Final state */ +}; +#endif typedef struct birdsock { resource r; pool *pool; /* Pool where incoming connections should be allocated (for SK_xxx_PASSIVE) */ int type; /* Socket type */ + int subtype; /* Socket subtype */ void *data; /* User data */ ip_addr saddr, daddr; /* IPA_NONE = unspecified */ + const char *host; /* Alternative to daddr, NULL = unspecified */ uint sport, dport; /* 0 = unspecified (for IP: protocol type) */ int tos; /* TOS / traffic class, -1 = default */ int priority; /* Local socket priority, -1 = default */ @@ -46,14 +70,15 @@ typedef struct birdsock { uint lifindex; /* local interface that received the datagram */ /* laddr and lifindex are valid only if SKF_LADDR_RX flag is set to request it */ - int af; /* Address family (AF_INET, AF_INET6 or 0 for non-IP) of fd */ + int af; /* System-dependend adress family (e.g. AF_INET) */ int fd; /* System-dependent data */ int index; /* Index in poll buffer */ int rcv_ttl; /* TTL of last received datagram */ node n; void *rbuf_alloc, *tbuf_alloc; char *password; /* Password for MD5 authentication */ - char *err; /* Error message */ + const char *err; /* Error message */ + struct ssh_sock *ssh; /* Used in SK_SSH */ } sock; sock *sock_new(pool *); /* Allocate new socket */ @@ -69,19 +94,12 @@ void sk_set_tbsize(sock *s, uint val); /* Resize TX buffer, keeping content */ void sk_set_tbuf(sock *s, void *tbuf); /* Switch TX buffer, NULL-> return to internal */ void sk_dump_all(void); +int sk_is_ipv4(sock *s); /* True if socket is IPv4 */ +int sk_is_ipv6(sock *s); /* True if socket is IPv6 */ + static inline int sk_send_buffer_empty(sock *sk) { return sk->tbuf == sk->tpos; } - -#ifdef IPV6 -#define sk_is_ipv4(X) 0 -#define sk_is_ipv6(X) 1 -#else -#define sk_is_ipv4(X) 1 -#define sk_is_ipv6(X) 0 -#endif - - int sk_setup_multicast(sock *s); /* Prepare UDP or IP socket for multicasting */ int sk_join_group(sock *s, ip_addr maddr); /* Join multicast group on sk iface */ int sk_leave_group(sock *s, ip_addr maddr); /* Leave multicast group on sk iface */ @@ -100,7 +118,6 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou /* Socket flags */ -#define SKF_V4ONLY 0x01 /* Use IPv4 for IP sockets */ #define SKF_V6ONLY 0x02 /* Use IPV6_V6ONLY socket option */ #define SKF_LADDR_RX 0x04 /* Report local address for RX packets */ #define SKF_TTL_RX 0x08 /* Report TTL / Hop Limit for RX packets */ @@ -124,26 +141,38 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou #define SK_MAGIC 7 /* Internal use by sysdep code */ #define SK_UNIX_PASSIVE 8 #define SK_UNIX 9 +#define SK_SSH_ACTIVE 10 /* - - * * - ? - DA = host */ +#define SK_SSH 11 + +/* + * Socket subtypes + */ + +#define SK_IPV4 1 +#define SK_IPV6 2 /* - * For SK_UDP or SK_IP sockets setting DA/DP allows to use sk_send(), - * otherwise sk_send_to() must be used. + * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either + * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr). But + * these specifications must be consistent. + * + * For SK_UDP or SK_IP sockets setting DA/DP allows to use sk_send(), otherwise + * sk_send_to() must be used. * - * For SK_IP sockets setting DP specifies protocol number, which is used - * for both receiving and sending. + * For SK_IP sockets setting DP specifies protocol number, which is used for + * both receiving and sending. * - * For multicast on SK_UDP or SK_IP sockets set IF and TTL, - * call sk_setup_multicast() to enable multicast on that socket, - * and then use sk_join_group() and sk_leave_group() to manage - * a set of received multicast groups. + * For multicast on SK_UDP or SK_IP sockets set IF and TTL, call + * sk_setup_multicast() to enable multicast on that socket, and then use + * sk_join_group() and sk_leave_group() to manage a set of received multicast + * groups. * - * For datagram (SK_UDP, SK_IP) sockets, there are two ways to handle - * source address. The socket could be bound to it using bind() - * syscall, but that also forbids the reception of multicast packets, - * or the address could be set on per-packet basis using platform - * dependent options (but these are not available in some corner - * cases). The first way is used when SKF_BIND is specified, the - * second way is used otherwise. + * For datagram (SK_UDP, SK_IP) sockets, there are two ways to handle source + * address. The socket could be bound to it using bind() syscall, but that also + * forbids the reception of multicast packets, or the address could be set on + * per-packet basis using platform dependent options (but these are not + * available in some corner cases). The first way is used when SKF_BIND is + * specified, the second way is used otherwise. */ #endif diff --git a/lib/string.h b/lib/string.h index 75cb88dd..0d34f9c5 100644 --- a/lib/string.h +++ b/lib/string.h @@ -13,6 +13,8 @@ #include <string.h> #include <strings.h> +#include "lib/resource.h" + int bsprintf(char *str, const char *fmt, ...); int bvsprintf(char *str, const char *fmt, va_list args); int bsnprintf(char *str, int size, const char *fmt, ...); @@ -39,6 +41,15 @@ xstrdup(const char *c) return z; } +static inline char * +lp_strdup(linpool *lp, const char *c) +{ + size_t l = strlen(c) + 1; + char *z = lp_allocu(lp, l); + memcpy(z, c, l); + return z; +} + static inline void memset32(void *D, u32 val, uint n) { @@ -8,22 +8,30 @@ */ #include "nest/bird.h" +#include "lib/timer.h" -void -tbf_update(struct tbf *f) +int +tbf_limit(struct tbf *f) { - bird_clock_t delta = now - f->timestamp; + btime delta = current_time() - f->timestamp; - if (delta == 0) - return; - - f->timestamp = now; + if (delta > 0) + { + u64 next = f->count + delta * f->rate; + u64 burst = (u64) f->burst << 20; + f->count = MIN(next, burst); + f->timestamp += delta; + } - if ((0 < delta) && (delta < f->burst)) + if (f->count < 1000000) { - u32 next = f->count + delta * f->rate; - f->count = MIN(next, f->burst); + f->drop++; + return 1; } else - f->count = f->burst; + { + f->count -= 1000000; + f->drop = 0; + return 0; + } } diff --git a/lib/timer.c b/lib/timer.c new file mode 100644 index 00000000..ed731d26 --- /dev/null +++ b/lib/timer.c @@ -0,0 +1,378 @@ +/* + * BIRD -- Timers + * + * (c) 2013--2017 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2013--2017 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/** + * DOC: Timers + * + * Timers are resources which represent a wish of a module to call a function at + * the specified time. The timer code does not guarantee exact timing, only that + * a timer function will not be called before the requested time. + * + * In BIRD, time is represented by values of the &btime type which is signed + * 64-bit integer interpreted as a relative number of microseconds since some + * fixed time point in past. The current time can be obtained by current_time() + * function with reasonable accuracy and is monotonic. There is also a current + * 'wall-clock' real time obtainable by current_real_time() reported by OS. + * + * Each timer is described by a &timer structure containing a pointer to the + * handler function (@hook), data private to this function (@data), time the + * function should be called at (@expires, 0 for inactive timers), for the other + * fields see |timer.h|. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#include "nest/bird.h" + +#include "lib/heap.h" +#include "lib/resource.h" +#include "lib/timer.h" + + +struct timeloop main_timeloop; + + +#ifdef USE_PTHREADS + +#include <pthread.h> + +/* Data accessed and modified from proto/bfd/io.c */ +pthread_key_t current_time_key; + +static inline struct timeloop * +timeloop_current(void) +{ + return pthread_getspecific(current_time_key); +} + +static inline void +timeloop_init_current(void) +{ + pthread_key_create(¤t_time_key, NULL); + pthread_setspecific(current_time_key, &main_timeloop); +} + +void wakeup_kick_current(void); + +#else + +/* Just use main timelooop */ +static inline struct timeloop * timeloop_current(void) { return &main_timeloop; } +static inline void timeloop_init_current(void) { } + +#endif + +btime +current_time(void) +{ + return timeloop_current()->last_time; +} + +btime +current_real_time(void) +{ + struct timeloop *loop = timeloop_current(); + + if (!loop->real_time) + times_update_real_time(loop); + + return loop->real_time; +} + + +#define TIMER_LESS(a,b) ((a)->expires < (b)->expires) +#define TIMER_SWAP(heap,a,b,t) (t = heap[a], heap[a] = heap[b], heap[b] = t, \ + heap[a]->index = (a), heap[b]->index = (b)) + + +static void +tm_free(resource *r) +{ + timer *t = (void *) r; + + tm_stop(t); +} + +static void +tm_dump(resource *r) +{ + timer *t = (void *) r; + + debug("(code %p, data %p, ", t->hook, t->data); + if (t->randomize) + debug("rand %d, ", t->randomize); + if (t->recurrent) + debug("recur %d, ", t->recurrent); + if (t->expires) + debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS); + else + debug("inactive)\n"); +} + + +static struct resclass tm_class = { + "Timer", + sizeof(timer), + tm_free, + tm_dump, + NULL, + NULL +}; + +timer * +tm_new(pool *p) +{ + timer *t = ralloc(p, &tm_class); + t->index = -1; + return t; +} + +void +tm_set(timer *t, btime when) +{ + struct timeloop *loop = timeloop_current(); + uint tc = timers_count(loop); + + if (!t->expires) + { + t->index = ++tc; + t->expires = when; + BUFFER_PUSH(loop->timers) = t; + HEAP_INSERT(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP); + } + else if (t->expires < when) + { + t->expires = when; + HEAP_INCREASE(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); + } + else if (t->expires > when) + { + t->expires = when; + HEAP_DECREASE(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); + } + +#ifdef CONFIG_BFD + /* Hack to notify BFD loops */ + if ((loop != &main_timeloop) && (t->index == 1)) + wakeup_kick_current(); +#endif +} + +void +tm_start(timer *t, btime after) +{ + tm_set(t, current_time() + MAX(after, 0)); +} + +void +tm_stop(timer *t) +{ + if (!t->expires) + return; + + struct timeloop *loop = timeloop_current(); + uint tc = timers_count(loop); + + HEAP_DELETE(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); + BUFFER_POP(loop->timers); + + t->index = -1; + t->expires = 0; +} + +void +timers_init(struct timeloop *loop, pool *p) +{ + times_init(loop); + + BUFFER_INIT(loop->timers, p, 4); + BUFFER_PUSH(loop->timers) = NULL; +} + +void io_log_event(void *hook, void *data); + +void +timers_fire(struct timeloop *loop) +{ + btime base_time; + timer *t; + + times_update(loop); + base_time = loop->last_time; + + while (t = timers_first(loop)) + { + if (t->expires > base_time) + return; + + if (t->recurrent) + { + btime when = t->expires + t->recurrent; + + if (when <= loop->last_time) + when = loop->last_time + t->recurrent; + + if (t->randomize) + when += random() % (t->randomize + 1); + + tm_set(t, when); + } + else + tm_stop(t); + + /* This is ugly hack, we want to log just timers executed from the main I/O loop */ + if (loop == &main_timeloop) + io_log_event(t->hook, t->data); + + t->hook(t); + } +} + +void +timer_init(void) +{ + timers_init(&main_timeloop, &root_pool); + timeloop_init_current(); +} + + +/** + * tm_parse_time - parse a date and time + * @x: time string + * + * tm_parse_time() takes a textual representation of a date and time + * (yyyy-mm-dd[ hh:mm:ss[.sss]]) and converts it to the corresponding value of + * type &btime. + */ +btime +tm_parse_time(char *x) +{ + struct tm tm; + int usec, n1, n2, n3, r; + + r = sscanf(x, "%d-%d-%d%n %d:%d:%d%n.%d%n", + &tm.tm_year, &tm.tm_mon, &tm.tm_mday, &n1, + &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n2, + &usec, &n3); + + if ((r == 3) && !x[n1]) + tm.tm_hour = tm.tm_min = tm.tm_sec = usec = 0; + else if ((r == 6) && !x[n2]) + usec = 0; + else if ((r == 7) && !x[n3]) + { + /* Convert subsecond digits to proper precision */ + int digits = n3 - n2 - 1; + if ((usec < 0) || (usec > 999999) || (digits < 1) || (digits > 6)) + return 0; + + while (digits++ < 6) + usec *= 10; + } + else + return 0; + + tm.tm_mon--; + tm.tm_year -= 1900; + s64 ts = mktime(&tm); + if ((ts == (s64) (time_t) -1) || (ts < 0) || (ts > ((s64) 1 << 40))) + return 0; + + return ts S + usec; +} + +/** + * tm_format_time - convert date and time to textual representation + * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE + * @fmt: specification of resulting textual representation of the time + * @t: time + * + * This function formats the given relative time value @t to a textual + * date/time representation (dd-mm-yyyy hh:mm:ss) in real time. + */ +void +tm_format_time(char *x, struct timeformat *fmt, btime t) +{ + btime dt = current_time() - t; + btime rt = current_real_time() - dt; + int v1 = !fmt->limit || (dt < fmt->limit); + + tm_format_real_time(x, v1 ? fmt->fmt1 : fmt->fmt2, rt); +} + +/* Replace %f in format string with usec scaled to requested precision */ +static int +strfusec(char *buf, int size, const char *fmt, uint usec) +{ + char *str = buf; + int parity = 0; + + while (*fmt) + { + if (!size) + return 0; + + if ((fmt[0] == '%') && (!parity) && + ((fmt[1] == 'f') || (fmt[1] >= '1') && (fmt[1] <= '6') && (fmt[2] == 'f'))) + { + int digits = (fmt[1] == 'f') ? 6 : (fmt[1] - '0'); + uint d = digits, u = usec; + + /* Convert microseconds to requested precision */ + while (d++ < 6) + u /= 10; + + int num = bsnprintf(str, size, "%0*u", digits, u); + if (num < 0) + return 0; + + fmt += (fmt[1] == 'f') ? 2 : 3; + ADVANCE(str, size, num); + } + else + { + /* Handle '%%' expression */ + parity = (*fmt == '%') ? !parity : 0; + *str++ = *fmt++; + size--; + } + } + + if (!size) + return 0; + + *str = 0; + return str - buf; +} + +void +tm_format_real_time(char *x, const char *fmt, btime t) +{ + s64 t1 = t TO_S; + s64 t2 = t - t1 S; + + time_t ts = t1; + struct tm tm; + if (!localtime_r(&ts, &tm)) + goto err; + + byte tbuf[TM_DATETIME_BUFFER_SIZE]; + if (!strfusec(tbuf, TM_DATETIME_BUFFER_SIZE, fmt, t2)) + goto err; + + if (!strftime(x, TM_DATETIME_BUFFER_SIZE, tbuf, &tm)) + goto err; + + return; + +err: + strcpy(x, "<error>"); +} diff --git a/lib/timer.h b/lib/timer.h new file mode 100644 index 00000000..ed8f0d02 --- /dev/null +++ b/lib/timer.h @@ -0,0 +1,127 @@ +/* + * BIRD -- Timers + * + * (c) 2013--2017 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2013--2017 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_TIMER_H_ +#define _BIRD_TIMER_H_ + +#include "nest/bird.h" +#include "lib/buffer.h" +#include "lib/resource.h" + + +typedef struct timer +{ + resource r; + void (*hook)(struct timer *); + void *data; + + btime expires; /* 0=inactive */ + uint randomize; /* Amount of randomization */ + uint recurrent; /* Timer recurrence */ + + int index; +} timer; + +struct timeloop +{ + BUFFER_(timer *) timers; + btime last_time; + btime real_time; +}; + +static inline uint timers_count(struct timeloop *loop) +{ return loop->timers.used - 1; } + +static inline timer *timers_first(struct timeloop *loop) +{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; } + +extern struct timeloop main_timeloop; + +btime current_time(void); +btime current_real_time(void); + +//#define now (current_time() TO_S) +//#define now_real (current_real_time() TO_S) +extern btime boot_time; + +timer *tm_new(pool *p); +void tm_set(timer *t, btime when); +void tm_start(timer *t, btime after); +void tm_stop(timer *t); + +static inline int +tm_active(timer *t) +{ + return t->expires != 0; +} + +static inline btime +tm_remains(timer *t) +{ + btime now_ = current_time(); + return (t->expires > now_) ? (t->expires - now_) : 0; +} + +static inline timer * +tm_new_init(pool *p, void (*hook)(struct timer *), void *data, uint rec, uint rand) +{ + timer *t = tm_new(p); + t->hook = hook; + t->data = data; + t->recurrent = rec; + t->randomize = rand; + return t; +} + +static inline void +tm_set_max(timer *t, btime when) +{ + if (when > t->expires) + tm_set(t, when); +} + +static inline void +tm_start_max(timer *t, btime after) +{ + btime rem = tm_remains(t); + tm_start(t, MAX_(rem, after)); +} + +/* In sysdep code */ +void times_init(struct timeloop *loop); +void times_update(struct timeloop *loop); +void times_update_real_time(struct timeloop *loop); + +/* For I/O loop */ +void timers_init(struct timeloop *loop, pool *p); +void timers_fire(struct timeloop *loop); + +void timer_init(void); + + +struct timeformat { + char *fmt1, *fmt2; + btime limit; +}; + +#define TM_ISO_SHORT_S (struct timeformat){"%T", "%F", (s64) (20*3600) S_} +#define TM_ISO_SHORT_MS (struct timeformat){"%T.%3f", "%F", (s64) (20*3600) S_} +#define TM_ISO_SHORT_US (struct timeformat){"%T.%6f", "%F", (s64) (20*3600) S_} + +#define TM_ISO_LONG_S (struct timeformat){"%F %T", NULL, 0} +#define TM_ISO_LONG_MS (struct timeformat){"%F %T.%3f", NULL, 0} +#define TM_ISO_LONG_US (struct timeformat){"%F %T.%6f", NULL, 0} + +#define TM_DATETIME_BUFFER_SIZE 32 /* Buffer size required by tm_format_time() */ + +btime tm_parse_time(char *x); +void tm_format_time(char *x, struct timeformat *fmt, btime t); +void tm_format_real_time(char *x, const char *fmt, btime t); + +#endif diff --git a/lib/unaligned.h b/lib/unaligned.h index dc777fbf..0da1fdb4 100644 --- a/lib/unaligned.h +++ b/lib/unaligned.h @@ -17,6 +17,7 @@ * if possible. */ +#include "sysdep/unix/endian.h" #include "lib/string.h" static inline u16 @@ -28,6 +29,13 @@ get_u16(const void *p) } static inline u32 +get_u24(const void *P) +{ + const byte *p = P; + return (p[0] << 16) + (p[1] << 8) + p[2]; +} + +static inline u32 get_u32(const void *p) { u32 x; @@ -52,6 +60,13 @@ put_u16(void *p, u16 x) } static inline void +put_u24(void *p, u32 x) +{ + x = htonl(x); + memcpy(p, ((char *) &x) + 1, 3); +} + +static inline void put_u32(void *p, u32 x) { x = htonl(x); @@ -68,4 +83,22 @@ put_u64(void *p, u64 x) memcpy(p+4, &xl, 4); } +static inline void +get_u32s(const void *p, u32 *x, int n) +{ + int i; + memcpy(x, p, 4*n); + for (i = 0; i < n; i++) + x[i] = ntohl(x[i]); +} + +static inline void +put_u32s(void *p, const u32 *x, int n) +{ + int i; + for (i = 0; i < n; i++) + put_u32((byte *) p + 4*i, x[i]); +} + + #endif diff --git a/misc/bird.spec b/misc/bird.spec index bfbfc484..7b645655 100644 --- a/misc/bird.spec +++ b/misc/bird.spec @@ -1,6 +1,6 @@ Summary: BIRD Internet Routing Daemon Name: bird -Version: 1.6.3 +Version: 2.0.1 Release: 1 Copyright: GPL Group: Networking/Daemons diff --git a/nest/Makefile b/nest/Makefile index e6928668..884d3950 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -1,6 +1,8 @@ -source=rt-table.c rt-fib.c rt-attr.c rt-roa.c proto.c iface.c rt-dev.c password.c cli.c locks.c cmds.c neighbor.c \ - a-path.c a-set.c -root-rel=../ -dir-name=nest +src := a-path.c a-set.c cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) -include ../Rules +tests_src := a-set_test.c a-path_test.c +tests_targets := $(tests_targets) $(tests-target-files) +tests_objs := $(tests_objs) $(src-o-files) diff --git a/nest/a-path.c b/nest/a-path.c index b453f702..c0d16c30 100644 --- a/nest/a-path.c +++ b/nest/a-path.c @@ -20,263 +20,528 @@ #define put_as put_u32 #define get_as get_u32 -#define BS 4 +#define BS 4 /* Default block size of ASN (autonomous system number) */ -struct adata * -as_path_prepend(struct linpool *pool, struct adata *olda, u32 as) +#define BAD(DSC, VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; }) + +int +as_path_valid(byte *data, uint len, int bs, int confed, char *err, uint elen) { - struct adata *newa; + byte *pos = data; + char *err_dsc = NULL; + uint err_val = 0; + + while (len) + { + if (len < 2) + BAD("segment framing error", 0); + + /* Process one AS path segment */ + uint type = pos[0]; + uint slen = 2 + bs * pos[1]; + + if (len < slen) + BAD("segment framing error", len); - if (olda->length && olda->data[0] == AS_PATH_SEQUENCE && olda->data[1] < 255) - /* Starting with sequence => just prepend the AS number */ + switch (type) { - int nl = olda->length + BS; - newa = lp_alloc(pool, sizeof(struct adata) + nl); - newa->length = nl; - newa->data[0] = AS_PATH_SEQUENCE; - newa->data[1] = olda->data[1] + 1; - memcpy(newa->data + BS + 2, olda->data + 2, olda->length - 2); + case AS_PATH_SET: + case AS_PATH_SEQUENCE: + break; + + case AS_PATH_CONFED_SEQUENCE: + case AS_PATH_CONFED_SET: + if (!confed) + BAD("AS_CONFED* segment", type); + break; + + default: + BAD("unknown segment", type); } - else /* Create new path segment */ + + if (pos[1] == 0) + BAD("zero-length segment", type); + + pos += slen; + len -= slen; + } + + return 1; + +bad: + if (err) + if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0) + err[0] = 0; + + return 0; +} + +int +as_path_16to32(byte *dst, byte *src, uint len) +{ + byte *dst0 = dst; + byte *end = src + len; + uint i, n; + + while (src < end) + { + n = src[1]; + *dst++ = *src++; + *dst++ = *src++; + + for (i = 0; i < n; i++) { - int nl = olda->length + BS + 2; - newa = lp_alloc(pool, sizeof(struct adata) + nl); - newa->length = nl; - newa->data[0] = AS_PATH_SEQUENCE; - newa->data[1] = 1; - memcpy(newa->data + BS + 2, olda->data, olda->length); + put_u32(dst, get_u16(src)); + src += 2; + dst += 4; } - put_as(newa->data + 2, as); - return newa; + } + + return dst - dst0; } int -as_path_convert_to_old(struct adata *path, byte *dst, int *new_used) +as_path_32to16(byte *dst, byte *src, uint len) { - byte *src = path->data; - byte *src_end = src + path->length; - byte *dst_start = dst; - u32 as; - int i, n; - *new_used = 0; + byte *dst0 = dst; + byte *end = src + len; + uint i, n; + + while (src < end) + { + n = src[1]; + *dst++ = *src++; + *dst++ = *src++; - while (src < src_end) + for (i = 0; i < n; i++) { - n = src[1]; - *dst++ = *src++; - *dst++ = *src++; + put_u16(dst, get_u32(src)); + src += 4; + dst += 2; + } + } - for(i=0; i<n; i++) - { - as = get_u32(src); - if (as > 0xFFFF) - { - as = AS_TRANS; - *new_used = 1; - } - put_u16(dst, as); - src += 4; - dst += 2; - } + return dst - dst0; +} + +int +as_path_contains_as4(const struct adata *path) +{ + const byte *pos = path->data; + const byte *end = pos + path->length; + uint i, n; + + while (pos < end) + { + n = pos[1]; + pos += 2; + + for (i = 0; i < n; i++) + { + if (get_as(pos) > 0xFFFF) + return 1; + + pos += BS; } + } - return dst - dst_start; + return 0; } int -as_path_convert_to_new(struct adata *path, byte *dst, int req_as) +as_path_contains_confed(const struct adata *path) { - byte *src = path->data; - byte *src_end = src + path->length; - byte *dst_start = dst; - u32 as; - int i, t, n; + const byte *pos = path->data; + const byte *end = pos + path->length; + + while (pos < end) + { + uint type = pos[0]; + uint slen = 2 + BS * pos[1]; + + if ((type == AS_PATH_CONFED_SEQUENCE) || + (type == AS_PATH_CONFED_SET)) + return 1; + + pos += slen; + } + return 0; +} + +struct adata * +as_path_strip_confed(struct linpool *pool, const struct adata *path) +{ + struct adata *res = lp_alloc_adata(pool, path->length); + const byte *src = path->data; + const byte *end = src + path->length; + byte *dst = res->data; + + while (src < end) + { + uint type = src[0]; + uint slen = 2 + BS * src[1]; - while ((src < src_end) && (req_as > 0)) + /* Copy regular segments */ + if ((type == AS_PATH_SET) || (type == AS_PATH_SEQUENCE)) { - t = *src++; - n = *src++; + memcpy(dst, src, slen); + dst += slen; + } - if (t == AS_PATH_SEQUENCE) - { - if (n > req_as) - n = req_as; + src += slen; + } - req_as -= n; - } - else // t == AS_PATH_SET - req_as--; + /* Fix the result length */ + res->length = dst - res->data; + + return res; +} - *dst++ = t; - *dst++ = n; +struct adata * +as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as) +{ + struct adata *np; + const byte *pos = op->data; + uint len = op->length; - for(i=0; i<n; i++) - { - as = get_u16(src); - put_u32(dst, as); - src += 2; - dst += 4; - } + if (len && (pos[0] == seq) && (pos[1] < 255)) + { + /* Starting with matching segment => just prepend the AS number */ + np = lp_alloc_adata(pool, len + BS); + np->data[0] = seq; + np->data[1] = pos[1] + 1; + put_as(np->data + 2, as); + + uint dlen = BS * pos[1]; + memcpy(np->data + 2 + BS, pos + 2, dlen); + ADVANCE(pos, len, 2 + dlen); + } + else + { + /* Create a new path segment */ + np = lp_alloc_adata(pool, len + 2 + BS); + np->data[0] = seq; + np->data[1] = 1; + put_as(np->data + 2, as); + } + + if (len) + { + byte *dst = np->data + 2 + BS * np->data[1]; + + memcpy(dst, pos, len); + } + + return np; +} + + +struct adata * +as_path_to_old(struct linpool *pool, const struct adata *path) +{ + struct adata *res = lp_alloc_adata(pool, path->length); + byte *pos = res->data; + byte *end = pos + res->length; + uint i, n; + u32 as; + + /* Copy the whole path */ + memcpy(res->data, path->data, path->length); + + /* Replace 32-bit AS numbers with AS_TRANS */ + while (pos < end) + { + n = pos[1]; + pos += 2; + + for (i = 0; i < n; i++) + { + as = get_as(pos); + if (as > 0xFFFF) + put_as(pos, AS_TRANS); + + pos += BS; } + } - return dst - dst_start; + return res; } +/* + * Cut the path to the length @num, measured to the usual path metric. Note that + * AS_CONFED_* segments have zero length and must be added if they are on edge. + * In contrast to other as_path_* functions, @path is modified in place. + */ void -as_path_format(struct adata *path, byte *buf, uint size) +as_path_cut(struct adata *path, uint num) { - byte *p = path->data; - byte *e = p + path->length; - byte *end = buf + size - 16; - int sp = 1; - int l, isset; + byte *pos = path->data; + byte *end = pos + path->length; - while (p < e) + while (pos < end) + { + uint t = pos[0]; + uint l = pos[1]; + uint n = 0; + + switch (t) { - if (buf > end) - { - strcpy(buf, " ..."); - return; - } - isset = (*p++ == AS_PATH_SET); - l = *p++; - if (isset) - { - if (!sp) - *buf++ = ' '; - *buf++ = '{'; - sp = 0; - } - while (l-- && buf <= end) - { - if (!sp) - *buf++ = ' '; - buf += bsprintf(buf, "%u", get_as(p)); - p += BS; - sp = 0; - } - if (isset) - { - *buf++ = ' '; - *buf++ = '}'; - sp = 0; - } + case AS_PATH_SET: n = 1; break; + case AS_PATH_SEQUENCE: n = l; break; + case AS_PATH_CONFED_SEQUENCE: n = 0; break; + case AS_PATH_CONFED_SET: n = 0; break; + default: bug("as_path_cut: Invalid path segment"); } - *buf = 0; + + /* Cannot add whole segment, so try partial one and finish */ + if (num < n) + { + if (num) + { + pos[1] = num; + pos += 2 + BS * num; + } + + break; + } + + num -= n; + pos += 2 + BS * l; + } + + path->length = pos - path->data; } -int -as_path_getlen(struct adata *path) +/* + * Merge (concatenate) paths @p1 and @p2 and return the result. + * In contrast to other as_path_* functions, @p1 and @p2 may be reused. + */ +struct adata * +as_path_merge(struct linpool *pool, struct adata *p1, struct adata *p2) { - return as_path_getlen_int(path, BS); + if (p1->length == 0) + return p2; + + if (p2->length == 0) + return p1; + + struct adata *res = lp_alloc_adata(pool, p1->length + p2->length); + memcpy(res->data, p1->data, p1->length); + memcpy(res->data + p1->length, p2->data, p2->length); + + return res; +} + +void +as_path_format(const struct adata *path, byte *bb, uint size) +{ + buffer buf = { .start = bb, .pos = bb, .end = bb + size }, *b = &buf; + const byte *pos = path->data; + const byte *end = pos + path->length; + const char *ops, *cls; + + b->pos[0] = 0; + + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; + + switch (type) + { + case AS_PATH_SET: ops = "{"; cls = "}"; break; + case AS_PATH_SEQUENCE: ops = NULL; cls = NULL; break; + case AS_PATH_CONFED_SEQUENCE: ops = "("; cls = ")"; break; + case AS_PATH_CONFED_SET: ops = "({"; cls = "})"; break; + default: bug("Invalid path segment"); + } + + if (ops) + buffer_puts(b, ops); + + while (len--) + { + buffer_print(b, len ? "%u " : "%u", get_as(pos)); + pos += BS; + } + + if (cls) + buffer_puts(b, cls); + + if (pos < end) + buffer_puts(b, " "); + } + + /* Handle overflow */ + if (b->pos == b->end) + strcpy(b->end - 12, "..."); } int -as_path_getlen_int(struct adata *path, int bs) +as_path_getlen(const struct adata *path) { - int res = 0; - u8 *p = path->data; - u8 *q = p+path->length; - int len; + const byte *pos = path->data; + const byte *end = pos + path->length; + uint res = 0; - while (p<q) + while (pos < end) + { + uint t = pos[0]; + uint l = pos[1]; + uint n = 0; + + switch (t) { - switch (*p++) - { - case AS_PATH_SET: len = *p++; res++; p += bs * len; break; - case AS_PATH_SEQUENCE: len = *p++; res += len; p += bs * len; break; - default: bug("as_path_getlen: Invalid path segment"); - } + case AS_PATH_SET: n = 1; break; + case AS_PATH_SEQUENCE: n = l; break; + case AS_PATH_CONFED_SEQUENCE: n = 0; break; + case AS_PATH_CONFED_SET: n = 0; break; + default: bug("as_path_getlen: Invalid path segment"); } + + res += n; + pos += 2 + BS * l; + } + return res; } int -as_path_get_last(struct adata *path, u32 *orig_as) +as_path_get_last(const struct adata *path, u32 *orig_as) { + const byte *pos = path->data; + const byte *end = pos + path->length; int found = 0; - u32 res = 0; - u8 *p = path->data; - u8 *q = p+path->length; - int len; + u32 val = 0; - while (p<q) + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; + + if (!len) + continue; + + switch (type) { - switch (*p++) - { - case AS_PATH_SET: - if (len = *p++) - { - found = 0; - p += BS * len; - } - break; - case AS_PATH_SEQUENCE: - if (len = *p++) - { - found = 1; - res = get_as(p + BS * (len - 1)); - p += BS * len; - } - break; - default: bug("Invalid path segment"); - } + case AS_PATH_SET: + case AS_PATH_CONFED_SET: + found = 0; + break; + + case AS_PATH_SEQUENCE: + case AS_PATH_CONFED_SEQUENCE: + val = get_as(pos + BS * (len - 1)); + found = 1; + break; + + default: + bug("Invalid path segment"); } + pos += BS * len; + } + if (found) - *orig_as = res; + *orig_as = val; return found; } u32 -as_path_get_last_nonaggregated(struct adata *path) +as_path_get_last_nonaggregated(const struct adata *path) { - u8 *p = path->data; - u8 *q = p+path->length; - u32 res = 0; - int len; + const byte *pos = path->data; + const byte *end = pos + path->length; + u32 val = 0; - while (p<q) + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; + + if (!len) + continue; + + switch (type) { - switch (*p++) - { - case AS_PATH_SET: - return res; + case AS_PATH_SET: + case AS_PATH_CONFED_SET: + return val; - case AS_PATH_SEQUENCE: - if (len = *p++) - res = get_as(p + BS * (len - 1)); - p += BS * len; - break; + case AS_PATH_SEQUENCE: + case AS_PATH_CONFED_SEQUENCE: + val = get_as(pos + BS * (len - 1)); + break; - default: bug("Invalid path segment"); - } + default: + bug("Invalid path segment"); } - return res; -} + pos += BS * len; + } + return val; +} int -as_path_get_first(struct adata *path, u32 *last_as) +as_path_get_first(const struct adata *path, u32 *last_as) { - u8 *p = path->data; + const u8 *p = path->data; if ((path->length == 0) || (p[0] != AS_PATH_SEQUENCE) || (p[1] == 0)) return 0; - else + + *last_as = get_as(p+2); + return 1; +} + +int +as_path_get_first_regular(const struct adata *path, u32 *last_as) +{ + const byte *pos = path->data; + const byte *end = pos + path->length; + + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; + + switch (type) { - *last_as = get_as(p+2); + case AS_PATH_SET: + return 0; + + case AS_PATH_SEQUENCE: + if (len == 0) + return 0; + + *last_as = get_as(pos); return 1; + + case AS_PATH_CONFED_SEQUENCE: + case AS_PATH_CONFED_SET: + break; + + default: + bug("Invalid path segment"); } + + pos += BS * len; + } + + return 0; } int -as_path_contains(struct adata *path, u32 as, int min) +as_path_contains(const struct adata *path, u32 as, int min) { - u8 *p = path->data; - u8 *q = p+path->length; + const u8 *p = path->data; + const u8 *q = p+path->length; int num = 0; int i, n; @@ -296,10 +561,10 @@ as_path_contains(struct adata *path, u32 as, int min) } int -as_path_match_set(struct adata *path, struct f_tree *set) +as_path_match_set(const struct adata *path, struct f_tree *set) { - u8 *p = path->data; - u8 *q = p+path->length; + const u8 *p = path->data; + const u8 *q = p+path->length; int i, n; while (p<q) @@ -325,8 +590,8 @@ as_path_filter(struct linpool *pool, struct adata *path, struct f_tree *set, u32 return NULL; int len = path->length; - u8 *p = path->data; - u8 *q = path->data + len; + const u8 *p = path->data; + const u8 *q = path->data + len; u8 *d, *d2; int i, bt, sn, dn; u8 buf[len]; @@ -388,51 +653,57 @@ struct pm_pos u8 mark; union { - char *sp; + const char *sp; u32 asn; } val; }; static int -parse_path(struct adata *path, struct pm_pos *pos) +parse_path(const struct adata *path, struct pm_pos *pp) { - u8 *p = path->data; - u8 *q = p + path->length; - struct pm_pos *opos = pos; - int i, len; + const byte *pos = path->data; + const byte *end = pos + path->length; + struct pm_pos *op = pp; + uint i; + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; - while (p < q) - switch (*p++) + switch (type) + { + case AS_PATH_SET: + case AS_PATH_CONFED_SET: + pp->set = 1; + pp->mark = 0; + pp->val.sp = pos - 1; + pp++; + + pos += BS * len; + break; + + case AS_PATH_SEQUENCE: + case AS_PATH_CONFED_SEQUENCE: + for (i = 0; i < len; i++) { - case AS_PATH_SET: - pos->set = 1; - pos->mark = 0; - pos->val.sp = p; - len = *p; - p += 1 + BS * len; - pos++; - break; - - case AS_PATH_SEQUENCE: - len = *p++; - for (i = 0; i < len; i++) - { - pos->set = 0; - pos->mark = 0; - pos->val.asn = get_as(p); - p += BS; - pos++; - } - break; - - default: - bug("as_path_match: Invalid path component"); + pp->set = 0; + pp->mark = 0; + pp->val.asn = get_as(pos); + pp++; + + pos += BS; } - - return pos - opos; -} + break; + + default: + bug("Invalid path segment"); + } + } + return pp - op; +} static int pm_match(struct pm_pos *pos, u32 asn, u32 asn2) @@ -441,7 +712,7 @@ pm_match(struct pm_pos *pos, u32 asn, u32 asn2) if (! pos->set) return ((pos->val.asn >= asn) && (pos->val.asn <= asn2)); - u8 *p = pos->val.sp; + const u8 *p = pos->val.sp; int len = *p++; int i; @@ -463,7 +734,7 @@ pm_mark(struct pm_pos *pos, int i, int plen, int *nl, int *nh) if (pos[i].set) pos[i].mark = 1; - + for (j = i + 1; (j < plen) && pos[j].set && (! pos[j].mark); j++) pos[j].mark = 1; pos[j].mark = 1; @@ -478,7 +749,7 @@ pm_mark(struct pm_pos *pos, int i, int plen, int *nl, int *nh) } /* AS path matching is nontrivial. Because AS path can - * contain sets, it is not a plain wildcard matching. A set + * contain sets, it is not a plain wildcard matching. A set * in an AS path is interpreted as it might represent any * sequence of AS numbers from that set (possibly with * repetitions). So it is also a kind of a pattern, @@ -499,9 +770,8 @@ pm_mark(struct pm_pos *pos, int i, int plen, int *nl, int *nh) * (auxiliary position after last real position in AS path) * is marked. */ - int -as_path_match(struct adata *path, struct f_path_mask *mask) +as_path_match(const struct adata *path, struct f_path_mask *mask) { struct pm_pos pos[2048 + 1]; int plen = parse_path(path, pos); @@ -517,7 +787,7 @@ as_path_match(struct adata *path, struct f_path_mask *mask) l = h = 0; pos[0].mark = 1; - + while (mask) { /* We remove this mark to not step after pos[plen] */ @@ -540,7 +810,7 @@ as_path_match(struct adata *path, struct f_path_mask *mask) case PM_ASN_RANGE: val = mask->val; val2 = mask->val2; - goto step; + goto step; case PM_QUESTION: step: nh = nl = -1; diff --git a/nest/a-path_test.c b/nest/a-path_test.c new file mode 100644 index 00000000..a71b48ba --- /dev/null +++ b/nest/a-path_test.c @@ -0,0 +1,220 @@ +/* + * BIRD -- Path Operations Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "test/bt-utils.h" + +#include "nest/route.h" +#include "nest/attrs.h" +#include "lib/resource.h" + +#define TESTS_NUM 30 +#define AS_PATH_LENGTH 1000 + +#if AS_PATH_LENGTH > AS_PATH_MAXLEN +#warning "AS_PATH_LENGTH should be <= AS_PATH_MAXLEN" +#endif + +static int +t_as_path_match(void) +{ + resource_init(); + + int round; + for (round = 0; round < TESTS_NUM; round++) + { + struct adata empty_as_path = {}; + struct adata *as_path = &empty_as_path; + u32 first_prepended, last_prepended; + first_prepended = last_prepended = 0; + struct linpool *lp = lp_new_default(&root_pool); + + struct f_path_mask mask[AS_PATH_LENGTH] = {}; + int i; + for (i = 0; i < AS_PATH_LENGTH; i++) + { + u32 val = bt_random(); + as_path = as_path_prepend(lp, as_path, val); + bt_debug("Prepending ASN: %10u \n", val); + + if (i == 0) + first_prepended = val; + if (i == AS_PATH_LENGTH-1) + last_prepended = val; + + mask[i].kind = PM_ASN; + mask[i].val = val; + if (i) + mask[i].next = &mask[i-1]; + } + + bt_assert_msg(as_path_match(as_path, &mask[AS_PATH_LENGTH-1]), "Mask should match with AS path"); + + u32 asn; + + bt_assert(as_path_get_first(as_path, &asn)); + bt_assert_msg(asn == last_prepended, "as_path_get_first() should return the last prepended ASN"); + + bt_assert(as_path_get_last(as_path, &asn)); + bt_assert_msg(asn == first_prepended, "as_path_get_last() should return the first prepended ASN"); + + rfree(lp); + } + + return 1; +} + +static int +t_path_format(void) +{ + resource_init(); + + struct adata empty_as_path = {}; + struct adata *as_path = &empty_as_path; + struct linpool *lp = lp_new_default(&root_pool); + + uint i; + for (i = 4294967285; i <= 4294967294; i++) + { + as_path = as_path_prepend(lp, as_path, i); + bt_debug("Prepending ASN: %10u \n", i); + } + +#define BUFFER_SIZE 120 + byte buf[BUFFER_SIZE] = {}; + + as_path_format(&empty_as_path, buf, BUFFER_SIZE); + bt_assert_msg(strcmp(buf, "") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); + + as_path_format(as_path, buf, BUFFER_SIZE); + bt_assert_msg(strcmp(buf, "4294967294 4294967293 4294967292 4294967291 4294967290 4294967289 4294967288 4294967287 4294967286 4294967285") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); + +#define SMALL_BUFFER_SIZE 25 + byte buf2[SMALL_BUFFER_SIZE] = {}; + as_path_format(as_path, buf2, SMALL_BUFFER_SIZE); + bt_assert_msg(strcmp(buf2, "4294967294 42...") == 0, "Small Buffer(%zu): '%s'", strlen(buf2), buf2); + + rfree(lp); + + return 1; +} + +static int +count_asn_in_array(const u32 *array, u32 asn) +{ + int counts_of_contains = 0; + int u; + for (u = 0; u < AS_PATH_LENGTH; u++) + if (array[u] == asn) + counts_of_contains++; + return counts_of_contains; +} + +static int +t_path_include(void) +{ + resource_init(); + + struct adata empty_as_path = {}; + struct adata *as_path = &empty_as_path; + struct linpool *lp = lp_new_default(&root_pool); + + u32 as_nums[AS_PATH_LENGTH] = {}; + int i; + for (i = 0; i < AS_PATH_LENGTH; i++) + { + u32 val = bt_random(); + as_nums[i] = val; + as_path = as_path_prepend(lp, as_path, val); + } + + for (i = 0; i < AS_PATH_LENGTH; i++) + { + int counts_of_contains = count_asn_in_array(as_nums, as_nums[i]); + bt_assert_msg(as_path_contains(as_path, as_nums[i], counts_of_contains), "AS Path should contains %d-times number %d", counts_of_contains, as_nums[i]); + + bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 0) != NULL); + bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 1) != NULL); + } + + for (i = 0; i < 10000; i++) + { + u32 test_val = bt_random(); + int counts_of_contains = count_asn_in_array(as_nums, test_val); + int result = as_path_contains(as_path, test_val, (counts_of_contains == 0 ? 1 : counts_of_contains)); + + if (counts_of_contains) + bt_assert_msg(result, "As path should contain %d-times the number %u", counts_of_contains, test_val); + else + bt_assert_msg(result == 0, "As path should not contain the number %u", test_val); + } + + rfree(lp); + + return 1; +} + +#if 0 +static int +t_as_path_converting(void) +{ + resource_init(); + + struct adata empty_as_path = {}; + struct adata *as_path = &empty_as_path; + struct linpool *lp = lp_new_default(&root_pool); +#define AS_PATH_LENGTH_FOR_CONVERTING_TEST 10 + + int i; + for (i = 0; i < AS_PATH_LENGTH_FOR_CONVERTING_TEST; i++) + as_path = as_path_prepend(lp, as_path, i); + + bt_debug("data length: %u \n", as_path->length); + + byte buffer[100] = {}; + int used_size = as_path_convert_to_new(as_path, buffer, AS_PATH_LENGTH_FOR_CONVERTING_TEST-1); + bt_debug("as_path_convert_to_new: len %d \n%s\n", used_size, buffer); + for (i = 0; i < used_size; i++) + { + bt_debug("\\03%d", buffer[i]); + } + bt_debug("\n"); + bt_assert(memcmp(buffer, + "\032\039\030\030\030\030\030\030\030\039\030\030\030\030\030\030\030\038\030\030\030\030\030\030" + "\030\037\030\030\030\030\030\030\030\036\030\030\030\030", + 38)); + + bzero(buffer, sizeof(buffer)); + int new_used; + used_size = as_path_convert_to_old(as_path, buffer, &new_used); + bt_debug("as_path_convert_to_old: len %d, new_used: %d \n", used_size, new_used); + for (i = 0; i < used_size; i++) + { + bt_debug("\\03%d", buffer[i]); + } + bt_debug("\n"); + bt_assert(memcmp(buffer, + "\032\0310\030\039\030\038\030\037\030\036\030\035\030\034\030\033\030\032\030\031\030\030", + 22)); + + return 1; +} +#endif + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities."); + bt_test_suite(t_path_format, "Testing formating as path into byte buffer"); + bt_test_suite(t_path_include, "Testing including a AS number in AS path"); + // bt_test_suite(t_as_path_converting, "Testing as_path_convert_to_*() output constancy"); + + return bt_exit_value(); +} diff --git a/nest/a-set.c b/nest/a-set.c index a6c07f45..048e522d 100644 --- a/nest/a-set.c +++ b/nest/a-set.c @@ -7,6 +7,8 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ +#include <stdlib.h> + #include "nest/bird.h" #include "nest/route.h" #include "nest/attrs.h" @@ -474,3 +476,92 @@ lc_set_union(struct linpool *pool, struct adata *l1, struct adata *l2) memcpy(res->data + l1->length, tmp, len); return res; } + + +struct adata * +ec_set_del_nontrans(struct linpool *pool, struct adata *set) +{ + adata *res = lp_alloc_adata(pool, set->length); + u32 *src = int_set_get_data(set); + u32 *dst = int_set_get_data(res); + int len = int_set_get_size(set); + int i; + + /* Remove non-transitive communities (EC_TBIT set) */ + for (i = 0; i < len; i += 2) + { + if (src[i] & EC_TBIT) + continue; + + *dst++ = src[i]; + *dst++ = src[i+1]; + } + + res->length = ((byte *) dst) - res->data; + + return res; +} + +static int +int_set_cmp(const void *X, const void *Y) +{ + const u32 *x = X, *y = Y; + return (*x < *y) ? -1 : (*x > *y) ? 1 : 0; +} + +struct adata * +int_set_sort(struct linpool *pool, struct adata *src) +{ + struct adata *dst = lp_alloc_adata(pool, src->length); + memcpy(dst->data, src->data, src->length); + qsort(dst->data, dst->length / 4, 4, int_set_cmp); + return dst; +} + + +static int +ec_set_cmp(const void *X, const void *Y) +{ + u64 x = ec_get(X, 0); + u64 y = ec_get(Y, 0); + return (x < y) ? -1 : (x > y) ? 1 : 0; +} + +struct adata * +ec_set_sort(struct linpool *pool, struct adata *src) +{ + struct adata *dst = lp_alloc_adata(pool, src->length); + memcpy(dst->data, src->data, src->length); + qsort(dst->data, dst->length / 8, 8, ec_set_cmp); + return dst; +} + +void +ec_set_sort_x(struct adata *set) +{ + /* Sort in place */ + qsort(set->data, set->length / 8, 8, ec_set_cmp); +} + + +static int +lc_set_cmp(const void *X, const void *Y) +{ + const u32 *x = X, *y = Y; + if (x[0] != y[0]) + return (x[0] > y[0]) ? 1 : -1; + if (x[1] != y[1]) + return (x[1] > y[1]) ? 1 : -1; + if (x[2] != y[2]) + return (x[2] > y[2]) ? 1 : -1; + return 0; +} + +struct adata * +lc_set_sort(struct linpool *pool, struct adata *src) +{ + struct adata *dst = lp_alloc_adata(pool, src->length); + memcpy(dst->data, src->data, src->length); + qsort(dst->data, dst->length / LCOMM_LENGTH, LCOMM_LENGTH, lc_set_cmp); + return dst; +} diff --git a/nest/a-set_test.c b/nest/a-set_test.c new file mode 100644 index 00000000..a5081f9f --- /dev/null +++ b/nest/a-set_test.c @@ -0,0 +1,260 @@ +/* + * BIRD -- Set/Community-list Operations Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "test/bt-utils.h" + +#include "lib/net.h" +#include "nest/route.h" +#include "nest/attrs.h" +#include "lib/resource.h" + +#define SET_SIZE 10 +static struct adata *set_sequence; /* <0; SET_SIZE) */ +static struct adata *set_sequence_same; /* <0; SET_SIZE) */ +static struct adata *set_sequence_higher; /* <SET_SIZE; 2*SET_SIZE) */ +static struct adata *set_random; + +#define BUFFER_SIZE 1000 +static byte buf[BUFFER_SIZE] = {}; + +#define SET_SIZE_FOR_FORMAT_OUTPUT 10 + +struct linpool *lp; + +enum set_type +{ + SET_TYPE_INT, + SET_TYPE_EC +}; + +static void +generate_set_sequence(enum set_type type) +{ + struct adata empty_as_path = {}; + set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; + lp = lp_new_default(&root_pool); + + int i; + for (i = 0; i < SET_SIZE; i++) + { + if (type == SET_TYPE_INT) + { + set_sequence = int_set_add(lp, set_sequence, i); + set_sequence_same = int_set_add(lp, set_sequence_same, i); + set_sequence_higher = int_set_add(lp, set_sequence_higher, i + SET_SIZE); + set_random = int_set_add(lp, set_random, bt_random()); + } + else if (type == SET_TYPE_EC) + { + set_sequence = ec_set_add(lp, set_sequence, i); + set_sequence_same = ec_set_add(lp, set_sequence_same, i); + set_sequence_higher = ec_set_add(lp, set_sequence_higher, i + SET_SIZE); + set_random = ec_set_add(lp, set_random, (bt_random() << 32 | bt_random())); + } + else + bt_abort_msg("This should be unreachable"); + } +} + +/* + * SET INT TESTS + */ + +static int +t_set_int_contains(void) +{ + int i; + + resource_init(); + generate_set_sequence(SET_TYPE_INT); + + bt_assert(int_set_get_size(set_sequence) == SET_SIZE); + + for (i = 0; i < SET_SIZE; i++) + bt_assert(int_set_contains(set_sequence, i)); + bt_assert(int_set_contains(set_sequence, -1) == 0); + bt_assert(int_set_contains(set_sequence, SET_SIZE) == 0); + + int *data = int_set_get_data(set_sequence); + for (i = 0; i < SET_SIZE; i++) + bt_assert_msg(data[i] == i, "(data[i] = %d) == i = %d)", data[i], i); + + rfree(lp); + return 1; +} + +static int +t_set_int_union(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_INT); + + struct adata *set_union; + set_union = int_set_union(lp, set_sequence, set_sequence_same); + bt_assert(int_set_get_size(set_union) == SET_SIZE); + bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); + + set_union = int_set_union(lp, set_sequence, set_sequence_higher); + bt_assert_msg(int_set_get_size(set_union) == SET_SIZE*2, "int_set_get_size(set_union) %d, SET_SIZE*2 %d", int_set_get_size(set_union), SET_SIZE*2); + bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); + + rfree(lp); + return 1; +} + +static int +t_set_int_format(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_INT); + + set_sequence->length = 4 * SET_SIZE_FOR_FORMAT_OUTPUT; /* dirty */ + bt_assert(int_set_format(set_sequence, 0, 0, buf, BUFFER_SIZE) == 0); + bt_assert(strcmp(buf, "0.0.0.0 0.0.0.1 0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); + + bzero(buf, BUFFER_SIZE); + bt_assert(int_set_format(set_sequence, 0, 2, buf, BUFFER_SIZE) == 0); + bt_assert(strcmp(buf, "0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); + + bzero(buf, BUFFER_SIZE); + bt_assert(int_set_format(set_sequence, 1, 0, buf, BUFFER_SIZE) == 0); + bt_assert(strcmp(buf, "(0,0) (0,1) (0,2) (0,3) (0,4) (0,5) (0,6) (0,7) (0,8) (0,9)") == 0); + + rfree(lp); + return 1; +} + +static int +t_set_int_delete(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_INT); + + struct adata *deleting_sequence = set_sequence; + u32 i; + for (i = 0; i < SET_SIZE; i++) + { + deleting_sequence = int_set_del(lp, deleting_sequence, i); + bt_assert_msg(int_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), + "int_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", + int_set_get_size(deleting_sequence), + SET_SIZE-1-i); + } + + bt_assert(int_set_get_size(set_sequence) == SET_SIZE); + + return 1; +} + +/* + * SET EC TESTS + */ + +static int +t_set_ec_contains(void) +{ + u32 i; + + resource_init(); + generate_set_sequence(SET_TYPE_EC); + + bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); + + for (i = 0; i < SET_SIZE; i++) + bt_assert(ec_set_contains(set_sequence, i)); + bt_assert(ec_set_contains(set_sequence, -1) == 0); + bt_assert(ec_set_contains(set_sequence, SET_SIZE) == 0); + +// int *data = ec_set_get_data(set_sequence); +// for (i = 0; i < SET_SIZE; i++) +// bt_assert_msg(data[i] == (SET_SIZE-1-i), "(data[i] = %d) == ((SET_SIZE-1-i) = %d)", data[i], SET_SIZE-1-i); + + rfree(lp); + return 1; +} + +static int +t_set_ec_union(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_EC); + + struct adata *set_union; + set_union = ec_set_union(lp, set_sequence, set_sequence_same); + bt_assert(ec_set_get_size(set_union) == SET_SIZE); + bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); + + set_union = ec_set_union(lp, set_sequence, set_sequence_higher); + bt_assert_msg(ec_set_get_size(set_union) == SET_SIZE*2, "ec_set_get_size(set_union) %d, SET_SIZE*2 %d", ec_set_get_size(set_union), SET_SIZE*2); + bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); + + rfree(lp); + return 1; +} + +static int +t_set_ec_format(void) +{ + resource_init(); + + struct adata empty_as_path = {}; + set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; + lp = lp_new_default(&root_pool); + + u64 i = 0; + set_sequence = ec_set_add(lp, set_sequence, i); + for (i = 1; i < SET_SIZE_FOR_FORMAT_OUTPUT; i++) + set_sequence = ec_set_add(lp, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48))); + + bt_assert(ec_set_format(set_sequence, 0, buf, BUFFER_SIZE) == 0); + bt_assert_msg(strcmp(buf, "(unknown 0x0, 0, 0) (ro, 0, 1) (rt, 0, 2) (ro, 0, 3) (rt, 0, 4) (ro, 0, 5) (rt, 0, 6) (ro, 0, 7) (rt, 0, 8) (ro, 0, 9)") == 0, + "ec_set_format() returns '%s'", buf); + + rfree(lp); + return 1; +} + +static int +t_set_ec_delete(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_EC); + + struct adata *deleting_sequence = set_sequence; + u32 i; + for (i = 0; i < SET_SIZE; i++) + { + deleting_sequence = ec_set_del(lp, deleting_sequence, i); + bt_assert_msg(ec_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), + "ec_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", + ec_set_get_size(deleting_sequence), SET_SIZE-1-i); + } + + bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_set_int_contains, "Testing sets of integers: contains, get_data"); + bt_test_suite(t_set_int_format, "Testing sets of integers: format"); + bt_test_suite(t_set_int_union, "Testing sets of integers: union"); + bt_test_suite(t_set_int_delete, "Testing sets of integers: delete"); + + bt_test_suite(t_set_ec_contains, "Testing sets of Extended Community values: contains, get_data"); + bt_test_suite(t_set_ec_format, "Testing sets of Extended Community values: format"); + bt_test_suite(t_set_ec_union, "Testing sets of Extended Community values: union"); + bt_test_suite(t_set_ec_delete, "Testing sets of Extended Community values: delete"); + + return bt_exit_value(); +} diff --git a/nest/attrs.h b/nest/attrs.h index a34e64d3..102f378a 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -10,6 +10,9 @@ #define _BIRD_ATTRS_H_ #include <stdint.h> +#include "lib/unaligned.h" +#include "nest/route.h" + /* a-path.c */ @@ -27,19 +30,30 @@ struct f_tree; -struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, u32 as); -int as_path_convert_to_old(struct adata *path, byte *dst, int *new_used); -int as_path_convert_to_new(struct adata *path, byte *dst, int req_as); -void as_path_format(struct adata *path, byte *buf, uint size); -int as_path_getlen(struct adata *path); -int as_path_getlen_int(struct adata *path, int bs); -int as_path_get_first(struct adata *path, u32 *orig_as); -int as_path_get_last(struct adata *path, u32 *last_as); -u32 as_path_get_last_nonaggregated(struct adata *path); -int as_path_contains(struct adata *path, u32 as, int min); -int as_path_match_set(struct adata *path, struct f_tree *set); +int as_path_valid(byte *data, uint len, int bs, int confed, char *err, uint elen); +int as_path_16to32(byte *dst, byte *src, uint len); +int as_path_32to16(byte *dst, byte *src, uint len); +int as_path_contains_as4(const struct adata *path); +int as_path_contains_confed(const struct adata *path); +struct adata *as_path_strip_confed(struct linpool *pool, const struct adata *op); +struct adata *as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as); +struct adata *as_path_to_old(struct linpool *pool, const struct adata *path); +void as_path_cut(struct adata *path, uint num); +struct adata *as_path_merge(struct linpool *pool, struct adata *p1, struct adata *p2); +void as_path_format(const struct adata *path, byte *buf, uint size); +int as_path_getlen(const struct adata *path); +int as_path_getlen_int(const struct adata *path, int bs); +int as_path_get_first(const struct adata *path, u32 *orig_as); +int as_path_get_first_regular(const struct adata *path, u32 *last_as); +int as_path_get_last(const struct adata *path, u32 *last_as); +u32 as_path_get_last_nonaggregated(const struct adata *path); +int as_path_contains(const struct adata *path, u32 as, int min); +int as_path_match_set(const struct adata *path, struct f_tree *set); struct adata *as_path_filter(struct linpool *pool, struct adata *path, struct f_tree *set, u32 key, int pos); +static inline struct adata *as_path_prepend(struct linpool *pool, const struct adata *path, u32 as) +{ return as_path_prepend2(pool, path, AS_PATH_SEQUENCE, as); } + #define PM_ASN 0 #define PM_QUESTION 1 @@ -54,7 +68,42 @@ struct f_path_mask { uintptr_t val2; }; -int as_path_match(struct adata *path, struct f_path_mask *mask); +int as_path_match(const struct adata *path, struct f_path_mask *mask); + + +/* Counterparts to appropriate as_path_* functions */ + +static inline int +aggregator_16to32(byte *dst, byte *src) +{ + put_u32(dst, get_u16(src)); + memcpy(dst+4, src+2, 4); + return 8; +} + +static inline int +aggregator_32to16(byte *dst, byte *src) +{ + put_u16(dst, get_u32(src)); + memcpy(dst+2, src+4, 4); + return 6; +} + +static inline int +aggregator_contains_as4(struct adata *a) +{ + return get_u32(a->data) > 0xFFFF; +} + +static inline struct adata * +aggregator_to_old(struct linpool *pool, struct adata *a) +{ + struct adata *d = lp_alloc_adata(pool, 8); + put_u32(d->data, 0xFFFF); + memcpy(d->data + 4, a->data + 4, 4); + return d; +} + /* a-set.c */ @@ -143,5 +192,11 @@ struct adata *int_set_union(struct linpool *pool, struct adata *l1, struct adata struct adata *ec_set_union(struct linpool *pool, struct adata *l1, struct adata *l2); struct adata *lc_set_union(struct linpool *pool, struct adata *l1, struct adata *l2); +struct adata *ec_set_del_nontrans(struct linpool *pool, struct adata *set); +struct adata *int_set_sort(struct linpool *pool, struct adata *src); +struct adata *ec_set_sort(struct linpool *pool, struct adata *src); +struct adata *lc_set_sort(struct linpool *pool, struct adata *src); + +void ec_set_sort_x(struct adata *set); /* Sort in place */ #endif @@ -46,7 +46,7 @@ static inline void cf_check_bfd(int use UNUSED) { } #else -static inline struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, void (*hook)(struct bfd_request *), void *data) { return NULL; } +static inline struct bfd_request * bfd_request_session(pool *p UNUSED, ip_addr addr UNUSED, ip_addr local UNUSED, struct iface *iface UNUSED, void (*hook)(struct bfd_request *) UNUSED, void *data UNUSED) { return NULL; } static inline void cf_check_bfd(int use) { if (use) cf_error("BFD not available"); } diff --git a/nest/bird.h b/nest/bird.h index 3c7d749b..55712abe 100644 --- a/nest/bird.h +++ b/nest/bird.h @@ -12,5 +12,6 @@ #include "sysdep/config.h" #include "lib/birdlib.h" #include "lib/ip.h" +#include "lib/net.h" #endif @@ -60,7 +60,7 @@ * the new one. When the consumer processes everything in the buffer * queue, it calls cli_written(), tha frees all buffers (except the * first one) and schedules cli.event . - * + * */ #include "nest/bird.h" @@ -136,7 +136,7 @@ cli_printf(cli *c, int code, char *msg, ...) } else if (cd == CLI_ASYNC_CODE) { - size = 1; buf[0] = '+'; + size = 1; buf[0] = '+'; errcode = cd; } else @@ -316,7 +316,8 @@ cli_new(void *priv) c->event->hook = cli_event; c->event->data = c; c->cont = cli_hello; - c->parser_pool = lp_new(c->pool, 4096); + c->parser_pool = lp_new_default(c->pool); + c->show_pool = lp_new_default(c->pool); c->rx_buf = mb_alloc(c->pool, CLI_RX_BUF_SIZE); ev_schedule(c->event); return c; @@ -38,6 +38,7 @@ typedef struct cli { int last_reply; int restricted; /* CLI is restricted to read-only commands */ struct linpool *parser_pool; /* Pool used during parsing */ + struct linpool *show_pool; /* Pool used during route show */ byte *ring_buf; /* Ring buffer for asynchronous messages */ byte *ring_end, *ring_read, *ring_write; /* Pointers to the ring buffer */ uint ring_overflow; /* Counter of ring overflows */ diff --git a/nest/cmds.c b/nest/cmds.c index 0bc9b9d1..ca601ef2 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -25,12 +25,12 @@ cmd_show_status(void) byte tim[TM_DATETIME_BUFFER_SIZE]; cli_msg(-1000, "BIRD " BIRD_VERSION); - tm_format_datetime(tim, &config->tf_base, now); + tm_format_time(tim, &config->tf_base, current_time()); cli_msg(-1011, "Router ID is %R", config->router_id); cli_msg(-1011, "Current server time is %s", tim); - tm_format_datetime(tim, &config->tf_base, boot_time); + tm_format_time(tim, &config->tf_base, boot_time); cli_msg(-1011, "Last reboot on %s", tim); - tm_format_datetime(tim, &config->tf_base, config->load_time); + tm_format_time(tim, &config->tf_base, config->load_time); cli_msg(-1011, "Last reconfiguration on %s", tim); graceful_restart_show_status(); @@ -82,8 +82,6 @@ print_size(char *dsc, size_t val) extern pool *rt_table_pool; extern pool *rta_pool; -extern pool *roa_pool; -extern pool *proto_pool; void cmd_show_memory(void) @@ -91,7 +89,6 @@ cmd_show_memory(void) cli_msg(-1018, "BIRD memory usage"); print_size("Routing tables:", rmemsize(rt_table_pool)); print_size("Route attributes:", rmemsize(rta_pool)); - print_size("ROA tables:", rmemsize(roa_pool)); print_size("Protocols:", rmemsize(proto_pool)); print_size("Total:", rmemsize(&root_pool)); cli_msg(0, ""); diff --git a/nest/config.Y b/nest/config.Y index 358c7745..ab09a10c 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -18,9 +18,10 @@ CF_HDR CF_DEFINES static struct proto_config *this_proto; +static struct channel_config *this_channel; static struct iface_patt *this_ipatt; static struct iface_patt_node *this_ipn; -static struct roa_table_config *this_roa_table; +/* static struct roa_table_config *this_roa_table; */ static list *this_p_list; static struct password_item *this_p_item; static int password_id; @@ -31,7 +32,7 @@ iface_patt_check(void) struct iface_patt_node *pn; WALK_LIST(pn, this_ipatt->ipn_list) - if (!pn->pattern || pn->pxlen) + if (!pn->pattern || pn->prefix.type) cf_error("Interface name/mask expected, not IP prefix"); } @@ -50,25 +51,38 @@ get_passwords(void) return rv; } +static void +proto_postconfig(void) +{ + CALL(this_proto->protocol->postconfig, this_proto); + this_channel = NULL; + this_proto = NULL; +} + + #define DIRECT_CFG ((struct rt_dev_config *) this_proto) CF_DECLS CF_KEYWORDS(ROUTER, ID, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, TABLE, STATES, ROUTES, FILTERS) +CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED) CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512) -CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, NOEXPORT, GENERATE, ROA) -CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED) +CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, COMMANDS, PREEXPORT, NOEXPORT, GENERATE) +CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION, SORTED) CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) +CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) +/* For r_args_channel */ +CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) + CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) -CF_ENUM(T_ENUM_RTC, RTC_, UNICAST, BROADCAST, MULTICAST, ANYCAST) -CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT, MULTIPATH) +CF_ENUM(T_ENUM_RTD, RTD_, UNICAST, BLACKHOLE, UNREACHABLE, PROHIBIT) CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type <i32> idval @@ -76,12 +90,14 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type <r> rtable %type <s> optsym %type <ra> r_args -%type <ro> roa_args -%type <rot> roa_table_arg %type <sd> sym_args -%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode roa_mode limit_action tab_sorted tos password_algorithm +%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type table_sorted tos password_algorithm %type <ps> proto_patt proto_patt2 -%type <g> limit_spec +%type <cc> channel_start proto_channel +%type <cl> limit_spec +%type <net> r_args_for_val +%type <net_ptr> r_args_for +%type <t> r_args_channel CF_GRAMMAR @@ -97,86 +113,60 @@ rtrid: idval: NUM { $$ = $1; } | '(' term ')' { $$ = f_eval_int($2); } - | RTRID - | IPA { -#ifndef IPV6 - $$ = ipa_to_u32($1); -#else - cf_error("Router IDs must be entered as hexadecimal numbers or IPv4 addresses in IPv6 version"); -#endif - } + | IP4 { $$ = ip4_to_u32($1); } | SYM { if ($1->class == (SYM_CONSTANT | T_INT) || $1->class == (SYM_CONSTANT | T_QUAD)) $$ = SYM_VAL($1).i; -#ifndef IPV6 - else if ($1->class == (SYM_CONSTANT | T_IP)) - $$ = ipa_to_u32(SYM_VAL($1).px.ip); -#endif + else if (($1->class == (SYM_CONSTANT | T_IP)) && ipa_is_ip4(SYM_VAL($1).ip)) + $$ = ipa_to_u32(SYM_VAL($1).ip); else cf_error("Number or IPv4 address constant expected"); } ; +CF_ADDTO(conf, gr_opts) -CF_ADDTO(conf, listen) +gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->gr_wait = $4; } ; -listen: LISTEN BGP listen_opts ';' ; -listen_opts: - /* Nothing */ - | listen_opts listen_opt - ; +/* Network types (for tables, channels) */ -listen_opt: - ADDRESS ipa { new_config->listen_bgp_addr = $2; } - | PORT expr { new_config->listen_bgp_port = $2; } - | V6ONLY { new_config->listen_bgp_flags = 0; } - | DUAL { new_config->listen_bgp_flags = 1; } +net_type: + IPV4 { $$ = NET_IP4; } + | IPV6 { $$ = NET_IP6; } + | IPV6 SADR { $$ = NET_IP6_SADR; } + | VPN4 { $$ = NET_VPN4; } + | VPN6 { $$ = NET_VPN6; } + | ROA4 { $$ = NET_ROA4; } + | ROA6 { $$ = NET_ROA6; } + | FLOW4{ $$ = NET_FLOW4; } + | FLOW6{ $$ = NET_FLOW6; } + | MPLS { $$ = NET_MPLS; } ; - -CF_ADDTO(conf, gr_opts) - -gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->gr_wait = $4; } ; +CF_ENUM(T_ENUM_NETTYPE, NET_, IP4, IP6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, IP6_SADR) /* Creation of routing tables */ -tab_sorted: - { $$ = 0; } +CF_ADDTO(conf, table) + +table_sorted: + { $$ = 0; } | SORTED { $$ = 1; } ; -CF_ADDTO(conf, newtab) - -newtab: TABLE SYM tab_sorted { +table: net_type TABLE SYM table_sorted { struct rtable_config *cf; - cf = rt_new_table($2); - cf->sorted = $3; - } - ; - -CF_ADDTO(conf, roa_table) - -roa_table_start: ROA TABLE SYM { - this_roa_table = roa_new_table_config($3); -}; - -roa_table_opts: - /* empty */ - | roa_table_opts ROA prefix MAX NUM AS NUM ';' { - roa_add_item_config(this_roa_table, $3.addr, $3.len, $5, $7); + cf = rt_new_table($3, $1); + cf->sorted = $4; } ; -roa_table: - roa_table_start - | roa_table_start '{' roa_table_opts '}' - ; /* Definition of protocols */ -CF_ADDTO(conf, proto) +CF_ADDTO(conf, proto { proto_postconfig(); }) proto_start: PROTOCOL { $$ = SYM_PROTO; } @@ -214,23 +204,61 @@ proto_name: proto_item: /* EMPTY */ - | PREFERENCE expr { - if ($2 < 0 || $2 > 0xFFFF) cf_error("Invalid preference"); - this_proto->preference = $2; - } | DISABLED bool { this_proto->disabled = $2; } | DEBUG debug_mask { this_proto->debug = $2; } | MRTDUMP mrtdump_mask { this_proto->mrtdump = $2; } - | IMPORT imexport { this_proto->in_filter = $2; } - | EXPORT imexport { this_proto->out_filter = $2; } - | RECEIVE LIMIT limit_spec { this_proto->rx_limit = $3; } - | IMPORT LIMIT limit_spec { this_proto->in_limit = $3; } - | EXPORT LIMIT limit_spec { this_proto->out_limit = $3; } - | IMPORT KEEP FILTERED bool { this_proto->in_keep_filtered = $4; } - | VRF text { this_proto->vrf = if_get_by_name($2); } - | TABLE rtable { this_proto->table = $2; } | ROUTER ID idval { this_proto->router_id = $3; } | DESCRIPTION text { this_proto->dsc = $2; } + | VRF text { this_proto->vrf = if_get_by_name($2); } + ; + + +channel_start: net_type +{ + $$ = this_channel = channel_config_get(NULL, net_label[$1], $1, this_proto); +}; + +channel_item: + TABLE rtable { + if (this_channel->net_type && ($2->addr_type != this_channel->net_type)) + cf_error("Incompatible table type"); + this_channel->table = $2; + } + | IMPORT imexport { this_channel->in_filter = $2; } + | EXPORT imexport { this_channel->out_filter = $2; } + | RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; } + | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } + | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } + | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } + | IMPORT KEEP FILTERED bool { this_channel->in_keep_filtered = $4; } + ; + +channel_opts: + /* empty */ + | channel_opts channel_item ';' + ; + +channel_opt_list: + /* empty */ + | '{' channel_opts '}' + ; + +channel_end: +{ + if (!this_channel->table) + cf_error("Routing table not specified"); + + this_channel = NULL; +}; + +proto_channel: channel_start channel_opt_list channel_end; + + +rtable: + SYM { + if ($1->class != SYM_TABLE) cf_error("Table expected"); + $$ = $1->def; + } ; imexport: @@ -249,21 +277,10 @@ limit_action: ; limit_spec: - expr limit_action { - struct proto_limit *l = cfg_allocz(sizeof(struct proto_limit)); - l->limit = $1; - l->action = $2; - $$ = l; - } - | OFF { $$ = NULL; } + expr limit_action { $$ = (struct channel_limit){ .limit = $1, $$.action = $2 }; } + | OFF { $$ = (struct channel_limit){}; } ; -rtable: - SYM { - if ($1->class != SYM_TABLE) cf_error("Table name expected"); - $$ = $1->def; - } - ; CF_ADDTO(conf, debug_default) @@ -274,6 +291,31 @@ debug_default: /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ +CF_ADDTO(conf, timeformat_base) + +timeformat_which: + ROUTE { $$ = &new_config->tf_route; } + | PROTOCOL { $$ = &new_config->tf_proto; } + | BASE { $$ = &new_config->tf_base; } + | LOG { $$ = &new_config->tf_log; } + ; + +timeformat_spec: + timeformat_which TEXT { *$1 = (struct timeformat){$2, NULL, 0}; } + | timeformat_which TEXT expr TEXT { *$1 = (struct timeformat){$2, $4, (s64) $3 S_}; } + | timeformat_which ISO SHORT { *$1 = TM_ISO_SHORT_S; } + | timeformat_which ISO SHORT MS { *$1 = TM_ISO_SHORT_MS; } + | timeformat_which ISO SHORT US { *$1 = TM_ISO_SHORT_US; } + | timeformat_which ISO LONG { *$1 = TM_ISO_LONG_S; } + | timeformat_which ISO LONG MS { *$1 = TM_ISO_LONG_MS; } + | timeformat_which ISO LONG US { *$1 = TM_ISO_LONG_US; } + ; + +timeformat_base: + TIMEFORMAT timeformat_spec ';' + ; + + /* Interface patterns */ iface_patt_node_init: @@ -285,9 +327,8 @@ iface_patt_node_init: ; iface_patt_node_body: - TEXT { this_ipn->pattern = $1; this_ipn->prefix = IPA_NONE; this_ipn->pxlen = 0; } - | prefix_or_ipa { this_ipn->pattern = NULL; this_ipn->prefix = $1.addr; this_ipn->pxlen = $1.len; } - | TEXT prefix_or_ipa { this_ipn->pattern = $1; this_ipn->prefix = $2.addr; this_ipn->pxlen = $2.len; } + TEXT { this_ipn->pattern = $1; /* this_ipn->prefix stays zero */ } + | opttext net_or_ipa { this_ipn->pattern = $1; this_ipn->prefix = $2; } ; iface_negate: @@ -320,8 +361,8 @@ iface_patt: ; tos: - CLASS expr { $$ = $2 & 0xfc; if (($2 < 0) || ($2 > 255)) cf_error("TX class must be in range 0-255"); } - | DSCP expr { $$ = ($2 & 0x3f) << 2; if (($2 < 0) || ($2 > 63)) cf_error("TX DSCP must be in range 0-63"); } + CLASS expr { $$ = $2 & 0xfc; if ($2 > 255) cf_error("TX class must be in range 0-255"); } + | DSCP expr { $$ = ($2 & 0x3f) << 2; if ($2 > 63) cf_error("TX DSCP must be in range 0-63"); } ; /* Direct device route protocol */ @@ -337,6 +378,7 @@ dev_proto_start: proto_start DIRECT { dev_proto: dev_proto_start proto_name '{' | dev_proto proto_item ';' + | dev_proto proto_channel ';' | dev_proto dev_iface_patt ';' | dev_proto CHECK LINK bool ';' { DIRECT_CFG->check_link = $4; } ; @@ -413,9 +455,9 @@ password_item: password_item_begin: PASSWORD text { if (!this_p_list) { - this_p_list = cfg_alloc(sizeof(list)); - init_list(this_p_list); - password_id = 1; + this_p_list = cfg_alloc(sizeof(list)); + init_list(this_p_list); + password_id = 1; } this_p_item = cfg_alloc(sizeof (struct password_item)); this_p_item->password = $2; @@ -432,12 +474,12 @@ password_item_begin: password_item_params: /* empty */ { } - | GENERATE FROM datetime ';' password_item_params { this_p_item->genfrom = $3; } - | GENERATE TO datetime ';' password_item_params { this_p_item->gento = $3; } - | ACCEPT FROM datetime ';' password_item_params { this_p_item->accfrom = $3; } - | ACCEPT TO datetime ';' password_item_params { this_p_item->accto = $3; } - | FROM datetime ';' password_item_params { this_p_item->genfrom = this_p_item->accfrom = $2; } - | TO datetime ';' password_item_params { this_p_item->gento = this_p_item->accto = $2; } + | GENERATE FROM time ';' password_item_params { this_p_item->genfrom = $3; } + | GENERATE TO time ';' password_item_params { this_p_item->gento = $3; } + | ACCEPT FROM time ';' password_item_params { this_p_item->accfrom = $3; } + | ACCEPT TO time ';' password_item_params { this_p_item->accto = $3; } + | FROM time ';' password_item_params { this_p_item->genfrom = this_p_item->accfrom = $2; } + | TO time ';' password_item_params { this_p_item->gento = this_p_item->accto = $2; } | ID expr ';' password_item_params { this_p_item->id = $2; if ($2 <= 0) cf_error("Password ID has to be greated than zero."); } | ALGORITHM password_algorithm ';' password_item_params { this_p_item->alg = $2; } ; @@ -488,26 +530,32 @@ CF_CLI(SHOW ROUTE, r_args, [[[<prefix>|for <prefix>|for <ip>] [table <t>] [filte r_args: /* empty */ { $$ = cfg_allocz(sizeof(struct rt_show_data)); - $$->pxlen = 256; + init_list(&($$->tables)); $$->filter = FILTER_ACCEPT; } - | r_args prefix { + | r_args net_any { $$ = $1; - if ($$->pxlen != 256) cf_error("Only one prefix expected"); - $$->prefix = $2.addr; - $$->pxlen = $2.len; + if ($$->addr) cf_error("Only one prefix expected"); + $$->addr = $2; } - | r_args FOR prefix_or_ipa { + | r_args FOR r_args_for { $$ = $1; - if ($$->pxlen != 256) cf_error("Only one prefix expected"); - $$->prefix = $3.addr; - $$->pxlen = $3.len; + if ($$->addr) cf_error("Only one prefix expected"); $$->show_for = 1; + $$->addr = $3; } | r_args TABLE SYM { $$ = $1; if ($3->class != SYM_TABLE) cf_error("%s is not a table", $3->name); - $$->table = ((struct rtable_config *)$3->def)->table; + rt_show_add_table($$, ((struct rtable_config *)$3->def)->table); + $$->tables_defined_by = RSD_TDB_DIRECT; + } + | r_args TABLE ALL { + struct rtable_config *t; + $$ = $1; + WALK_LIST(t, config->tables) + rt_show_add_table($$, t->table); + $$->tables_defined_by = RSD_TDB_ALL; } | r_args FILTER filter { $$ = $1; @@ -534,11 +582,23 @@ r_args: | r_args export_mode SYM { struct proto_config *c = (struct proto_config *) $3->def; $$ = $1; - if ($$->export_mode) cf_error("Protocol specified twice"); + if ($$->export_mode) cf_error("Export specified twice"); if ($3->class != SYM_PROTO || !c->proto) cf_error("%s is not a protocol", $3->name); $$->export_mode = $2; $$->export_protocol = c->proto; $$->running_on_config = c->proto->cf->global; + $$->tables_defined_by = RSD_TDB_INDIRECT; + } + | r_args export_mode SYM '.' r_args_channel { + struct proto_config *c = (struct proto_config *) $3->def; + $$ = $1; + if ($$->export_mode) cf_error("Export specified twice"); + if ($3->class != SYM_PROTO || !c->proto) cf_error("%s is not a protocol", $3->name); + $$->export_mode = $2; + $$->export_channel = proto_find_channel_by_name(c->proto, $5); + if (!$$->export_channel) cf_error("Export channel not found"); + $$->running_on_config = c->proto->cf->global; + $$->tables_defined_by = RSD_TDB_INDIRECT; } | r_args PROTOCOL SYM { struct proto_config *c = (struct proto_config *) $3->def; @@ -547,6 +607,7 @@ r_args: if ($3->class != SYM_PROTO || !c->proto) cf_error("%s is not a protocol", $3->name); $$->show_protocol = c->proto; $$->running_on_config = c->proto->cf->global; + $$->tables_defined_by = RSD_TDB_INDIRECT; } | r_args STATS { $$ = $1; @@ -558,52 +619,77 @@ r_args: } ; +r_args_for: + r_args_for_val { + $$ = cfg_alloc($1.length); + net_copy($$, &$1); + } + | net_vpn4_ + | net_vpn6_ + | net_ip6_sadr_ + | VPN_RD IP4 { + $$ = cfg_alloc(sizeof(net_addr_vpn4)); + net_fill_vpn4($$, $2, IP4_MAX_PREFIX_LENGTH, $1); + } + | VPN_RD IP6 { + $$ = cfg_alloc(sizeof(net_addr_vpn6)); + net_fill_vpn6($$, $2, IP6_MAX_PREFIX_LENGTH, $1); + } + | IP6 FROM IP6 { + $$ = cfg_alloc(sizeof(net_addr_ip6_sadr)); + net_fill_ip6_sadr($$, $1, IP6_MAX_PREFIX_LENGTH, $3, IP6_MAX_PREFIX_LENGTH); + } + | SYM { + if ($1->class == (SYM_CONSTANT | T_IP)) + { + $$ = cfg_alloc(ipa_is_ip4(SYM_VAL($1).ip) ? sizeof(net_addr_ip4) : sizeof(net_addr_ip6)); + net_fill_ip_host($$, SYM_VAL($1).ip); + } + else if (($1->class == (SYM_CONSTANT | T_NET)) && net_type_match(SYM_VAL($1).net, NB_IP | NB_VPN)) + $$ = (net_addr *) SYM_VAL($1).net; /* Avoid const warning */ + else + cf_error("IP address or network expected"); + } + ; + +r_args_for_val: + net_ip4_ + | net_ip6_ + | IP4 { net_fill_ip4(&($$), $1, IP4_MAX_PREFIX_LENGTH); } + | IP6 { net_fill_ip6(&($$), $1, IP6_MAX_PREFIX_LENGTH); } + export_mode: PREEXPORT { $$ = RSEM_PREEXPORT; } | EXPORT { $$ = RSEM_EXPORT; } | NOEXPORT { $$ = RSEM_NOEXPORT; } ; - -CF_CLI_HELP(SHOW ROA, ..., [[Show ROA table]]) -CF_CLI(SHOW ROA, roa_args, [<prefix> | in <prefix> | for <prefix>] [as <num>] [table <t>], [[Show ROA table]]) -{ roa_show($3); } ; - -roa_args: - /* empty */ { - $$ = cfg_allocz(sizeof(struct roa_show_data)); - $$->mode = ROA_SHOW_ALL; - $$->table = roa_table_default; - if (roa_table_default == NULL) - cf_error("No ROA table defined"); - } - | roa_args roa_mode prefix { - $$ = $1; - if ($$->mode != ROA_SHOW_ALL) cf_error("Only one prefix expected"); - $$->prefix = $3.addr; - $$->pxlen = $3.len; - $$->mode = $2; - } - | roa_args AS NUM { - $$ = $1; - $$->asn = $3; - } - | roa_args TABLE SYM { - $$ = $1; - if ($3->class != SYM_ROA) cf_error("%s is not a ROA table", $3->name); - $$->table = ((struct roa_table_config *)$3->def)->table; - } - ; - -roa_mode: - { $$ = ROA_SHOW_PX; } - | IN { $$ = ROA_SHOW_IN; } - | FOR { $$ = ROA_SHOW_FOR; } +/* This is ugly hack */ +r_args_channel: + IPV4 { $$ = "ipv4"; } + | IPV4_MC { $$ = "ipv4-mc"; } + | IPV4_MPLS { $$ = "ipv4-mpls"; } + | IPV6 { $$ = "ipv6"; } + | IPV6_MC { $$ = "ipv6-mc"; } + | IPV6_MPLS { $$ = "ipv6-mpls"; } + | IPV6_SADR { $$ = "ipv6-sadr"; } + | VPN4 { $$ = "vpn4"; } + | VPN4_MC { $$ = "vpn4-mc"; } + | VPN4_MPLS { $$ = "vpn4-mpls"; } + | VPN6 { $$ = "vpn6"; } + | VPN6_MC { $$ = "vpn6-mc"; } + | VPN6_MPLS { $$ = "vpn6-mpls"; } + | ROA4 { $$ = "roa4"; } + | ROA6 { $$ = "roa6"; } + | FLOW4 { $$ = "flow4"; } + | FLOW6 { $$ = "flow6"; } + | MPLS { $$ = "mpls"; } + | PRI { $$ = "pri"; } + | SEC { $$ = "sec"; } ; - CF_CLI_HELP(SHOW SYMBOLS, ..., [[Show all known symbolic names]]) -CF_CLI(SHOW SYMBOLS, sym_args, [table|filter|function|protocol|template|roa|<symbol>], [[Show all known symbolic names]]) +CF_CLI(SHOW SYMBOLS, sym_args, [table|filter|function|protocol|template|<symbol>], [[Show all known symbolic names]]) { cmd_show_symbols($3); } ; sym_args: @@ -615,46 +701,10 @@ sym_args: | sym_args FILTER { $$ = $1; $$->type = SYM_FILTER; } | sym_args PROTOCOL { $$ = $1; $$->type = SYM_PROTO; } | sym_args TEMPLATE { $$ = $1; $$->type = SYM_TEMPLATE; } - | sym_args ROA { $$ = $1; $$->type = SYM_ROA; } | sym_args SYM { $$ = $1; $$->sym = $2; } ; -roa_table_arg: - /* empty */ { - if (roa_table_default == NULL) - cf_error("No ROA table defined"); - $$ = roa_table_default; - } - | TABLE SYM { - if ($2->class != SYM_ROA) - cf_error("%s is not a ROA table", $2->name); - $$ = ((struct roa_table_config *)$2->def)->table; - } - ; - -CF_CLI_HELP(ADD, roa ..., [[Add ROA record]]) -CF_CLI(ADD ROA, prefix MAX NUM AS NUM roa_table_arg, <prefix> max <num> as <num> [table <name>], [[Add ROA record]]) -{ - if (! cli_access_restricted()) - { roa_add_item($8, $3.addr, $3.len, $5, $7, ROA_SRC_DYNAMIC); cli_msg(0, ""); } -}; - -CF_CLI_HELP(DELETE, roa ..., [[Delete ROA record]]) -CF_CLI(DELETE ROA, prefix MAX NUM AS NUM roa_table_arg, <prefix> max <num> as <num> [table <name>], [[Delete ROA record]]) -{ - if (! cli_access_restricted()) - { roa_delete_item($8, $3.addr, $3.len, $5, $7, ROA_SRC_DYNAMIC); cli_msg(0, ""); } -}; - -CF_CLI_HELP(FLUSH, roa [table <name>], [[Removes all dynamic ROA records]]) -CF_CLI(FLUSH ROA, roa_table_arg, [table <name>], [[Removes all dynamic ROA records]]) -{ - if (! cli_access_restricted()) - { roa_flush($3, ROA_SRC_DYNAMIC); cli_msg(0, ""); } -}; - - CF_CLI_HELP(DUMP, ..., [[Dump debugging information]]) CF_CLI(DUMP RESOURCES,,, [[Dump all allocated resource]]) { rdump(&root_pool); cli_msg(0, ""); } ; @@ -696,11 +746,11 @@ echo_size: } ; -CF_CLI(DISABLE, proto_patt text_or_none, (<protocol> | \"<pattern>\" | all) [message], [[Disable protocol]]) +CF_CLI(DISABLE, proto_patt opttext, (<protocol> | \"<pattern>\" | all) [message], [[Disable protocol]]) { proto_apply_cmd($2, proto_cmd_disable, 1, (uintptr_t) $3); } ; -CF_CLI(ENABLE, proto_patt text_or_none, (<protocol> | \"<pattern>\" | all) [message], [[Enable protocol]]) +CF_CLI(ENABLE, proto_patt opttext, (<protocol> | \"<pattern>\" | all) [message], [[Enable protocol]]) { proto_apply_cmd($2, proto_cmd_enable, 1, (uintptr_t) $3); } ; -CF_CLI(RESTART, proto_patt text_or_none, (<protocol> | \"<pattern>\" | all) [message], [[Restart protocol]]) +CF_CLI(RESTART, proto_patt opttext, (<protocol> | \"<pattern>\" | all) [message], [[Restart protocol]]) { proto_apply_cmd($2, proto_cmd_restart, 1, (uintptr_t) $3); } ; CF_CLI(RELOAD, proto_patt, <protocol> | \"<pattern>\" | all, [[Reload protocol]]) { proto_apply_cmd($2, proto_cmd_reload, 1, CMD_RELOAD); } ; diff --git a/nest/iface.c b/nest/iface.c index 3dd45065..a633f748 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -32,11 +32,14 @@ #include "lib/resource.h" #include "lib/string.h" #include "conf/conf.h" +#include "sysdep/unix/krt.h" static pool *if_pool; list iface_list; +static void if_recalc_preferred(struct iface *i); + /** * ifa_dump - dump interface address * @a: interface address descriptor @@ -46,10 +49,11 @@ list iface_list; void ifa_dump(struct ifa *a) { - debug("\t%I, net %I/%-2d bc %I -> %I%s%s%s\n", a->ip, a->prefix, a->pxlen, a->brd, a->opposite, - (a->flags & IF_UP) ? "" : " DOWN", - (a->flags & IA_PRIMARY) ? "" : " SEC", - (a->flags & IA_PEER) ? "PEER" : ""); + debug("\t%I, net %N bc %I -> %I%s%s%s%s\n", a->ip, &a->prefix, a->brd, a->opposite, + (a->flags & IA_PRIMARY) ? " PRIMARY" : "", + (a->flags & IA_SECONDARY) ? " SEC" : "", + (a->flags & IA_HOST) ? " HOST" : "", + (a->flags & IA_PEER) ? " PEER" : ""); } /** @@ -89,7 +93,8 @@ if_dump(struct iface *i) WALK_LIST(a, i->addrs) { ifa_dump(a); - ASSERT((a != i->addr) == !(a->flags & IA_PRIMARY)); + ASSERT(!!(a->flags & IA_PRIMARY) == + ((a == i->addr4) || (a == i->addr6) || (a == i->llv6))); } } @@ -140,12 +145,13 @@ if_copy(struct iface *to, struct iface *from) static inline void ifa_send_notify(struct proto *p, unsigned c, struct ifa *a) { - if (p->ifa_notify && (!p->vrf || p->vrf == a->iface->master)) + if (p->ifa_notify && + (p->proto_state != PS_DOWN) && + (!p->vrf || p->vrf == a->iface->master)) { if (p->debug & D_IFACES) - log(L_TRACE "%s <%s address %I/%d on interface %s %s", - p->name, (a->flags & IA_PRIMARY) ? " primary" : "", - a->prefix, a->pxlen, a->iface->name, + log(L_TRACE "%s < address %N on interface %s %s", + p->name, &a->prefix, a->iface->name, (c & IF_CHANGE_UP) ? "added" : "removed"); p->ifa_notify(p, c, a); } @@ -158,7 +164,7 @@ ifa_notify_change_(unsigned c, struct ifa *a) DBG("IFA change notification (%x) for %s:%I\n", c, a->iface->name, a->ip); - WALK_LIST(p, active_proto_list) + WALK_LIST(p, proto_list) ifa_send_notify(p, c, a); } @@ -177,7 +183,9 @@ ifa_notify_change(unsigned c, struct ifa *a) static inline void if_send_notify(struct proto *p, unsigned c, struct iface *i) { - if (p->if_notify && (!p->vrf || p->vrf == i->master)) + if (p->if_notify && + (p->proto_state != PS_DOWN) && + (!p->vrf || p->vrf == i->master)) { if (p->debug & D_IFACES) log(L_TRACE "%s < interface %s %s", p->name, i->name, @@ -185,6 +193,7 @@ if_send_notify(struct proto *p, unsigned c, struct iface *i) (c & IF_CHANGE_DOWN) ? "goes down" : (c & IF_CHANGE_MTU) ? "changes MTU" : (c & IF_CHANGE_LINK) ? "changes link" : + (c & IF_CHANGE_PREFERRED) ? "changes preferred address" : (c & IF_CHANGE_CREATE) ? "created" : "sends unknown event"); p->if_notify(p, c, i); @@ -213,20 +222,14 @@ if_notify_change(unsigned c, struct iface *i) if (c & IF_CHANGE_DOWN) WALK_LIST(a, i->addrs) - { - a->flags = (i->flags & ~IA_FLAGS) | (a->flags & IA_FLAGS); - ifa_notify_change_(IF_CHANGE_DOWN, a); - } + ifa_notify_change_(IF_CHANGE_DOWN, a); - WALK_LIST(p, active_proto_list) + WALK_LIST(p, proto_list) if_send_notify(p, c, i); if (c & IF_CHANGE_UP) WALK_LIST(a, i->addrs) - { - a->flags = (i->flags & ~IA_FLAGS) | (a->flags & IA_FLAGS); - ifa_notify_change_(IF_CHANGE_UP, a); - } + ifa_notify_change_(IF_CHANGE_UP, a); if (c & IF_CHANGE_UP) neigh_if_up(i); @@ -235,25 +238,25 @@ if_notify_change(unsigned c, struct iface *i) neigh_if_link(i); } -static unsigned -if_recalc_flags(struct iface *i, unsigned flags) +static uint +if_recalc_flags(struct iface *i UNUSED, uint flags) { - if ((flags & (IF_SHUTDOWN | IF_TMP_DOWN)) || - !(flags & IF_ADMIN_UP) || - !i->addr || - (i->master_index && !i->master)) - flags &= ~IF_UP; - else + if ((flags & IF_ADMIN_UP) && + !(flags & (IF_SHUTDOWN | IF_TMP_DOWN)) && + !(i->master_index && !i->master)) flags |= IF_UP; + else + flags &= ~IF_UP; + return flags; } static void -if_change_flags(struct iface *i, unsigned flags) +if_change_flags(struct iface *i, uint flags) { - unsigned of = i->flags; - + uint of = i->flags; i->flags = if_recalc_flags(i, flags); + if ((i->flags ^ of) & IF_UP) if_notify_change((i->flags & IF_UP) ? IF_CHANGE_UP : IF_CHANGE_DOWN, i); } @@ -301,7 +304,6 @@ if_update(struct iface *new) WALK_LIST(i, iface_list) if (!strcmp(new->name, i->name)) { - new->addr = i->addr; new->flags = if_recalc_flags(new, new->flags); c = if_what_changed(i, new); if (c & IF_CHANGE_TOO_MUCH) /* Changed a lot, convert it to down/up */ @@ -309,10 +311,13 @@ if_update(struct iface *new) DBG("Interface %s changed too much -- forcing down/up transition\n", i->name); if_change_flags(i, i->flags | IF_TMP_DOWN); rem_node(&i->n); - new->addr = i->addr; + new->addr4 = i->addr4; + new->addr6 = i->addr6; + new->llv6 = i->llv6; + new->sysdep = i->sysdep; memcpy(&new->addrs, &i->addrs, sizeof(i->addrs)); memcpy(i, new, sizeof(*i)); - i->flags &= ~IF_UP; /* IF_TMP_DOWN will be added later */ + i->flags &= ~IF_UP; /* IF_TMP_DOWN will be added later */ goto newif; } @@ -343,13 +348,16 @@ if_start_update(void) { i->flags &= ~IF_UPDATED; WALK_LIST(a, i->addrs) - a->flags &= ~IF_UPDATED; + a->flags &= ~IA_UPDATED; } } void if_end_partial_update(struct iface *i) { + if (i->flags & IF_NEEDS_RECALC) + if_recalc_preferred(i); + if (i->flags & IF_TMP_DOWN) if_change_flags(i, i->flags & ~IF_TMP_DOWN); } @@ -367,7 +375,7 @@ if_end_update(void) else { WALK_LIST_DELSAFE(a, b, i->addrs) - if (!(a->flags & IF_UPDATED)) + if (!(a->flags & IA_UPDATED)) ifa_delete(a); if_end_partial_update(i); } @@ -464,47 +472,105 @@ if_get_by_name(char *name) return i; } -struct ifa *kif_choose_primary(struct iface *i); +static inline void +if_set_preferred(struct ifa **pos, struct ifa *new) +{ + if (*pos) + (*pos)->flags &= ~IA_PRIMARY; + if (new) + new->flags |= IA_PRIMARY; + + *pos = new; +} -static int -ifa_recalc_primary(struct iface *i) +static void +if_recalc_preferred(struct iface *i) { - struct ifa *a = kif_choose_primary(i); + /* + * Preferred address selection priority: + * 1) Address configured in Device protocol + * 2) Sysdep IPv4 address (BSD) + * 3) Old preferred address + * 4) First address in list + */ - if (a == i->addr) - return 0; + struct kif_iface_config *ic = kif_get_iface_config(i); + struct ifa *a4 = i->addr4, *a6 = i->addr6, *ll = i->llv6; + ip_addr pref_v4 = ic->pref_v4; + uint change = 0; + + if (kif_update_sysdep_addr(i)) + change |= IF_CHANGE_SYSDEP; - if (i->addr) - i->addr->flags &= ~IA_PRIMARY; + /* BSD sysdep address */ + if (ipa_zero(pref_v4) && ip4_nonzero(i->sysdep)) + pref_v4 = ipa_from_ip4(i->sysdep); - if (a) + struct ifa *a; + WALK_LIST(a, i->addrs) { - a->flags |= IA_PRIMARY; - rem_node(&a->n); - add_head(&i->addrs, &a->n); + /* Secondary address is never selected */ + if (a->flags & IA_SECONDARY) + continue; + + if (ipa_is_ip4(a->ip)) { + if (!a4 || ipa_equal(a->ip, pref_v4)) + a4 = a; + } else if (!ipa_is_link_local(a->ip)) { + if (!a6 || ipa_equal(a->ip, ic->pref_v6)) + a6 = a; + } else { + if (!ll || ipa_equal(a->ip, ic->pref_ll)) + ll = a; + } } - i->addr = a; - return 1; + if (a4 != i->addr4) + { + if_set_preferred(&i->addr4, a4); + change |= IF_CHANGE_ADDR4; + } + + if (a6 != i->addr6) + { + if_set_preferred(&i->addr6, a6); + change |= IF_CHANGE_ADDR6; + } + + if (ll != i->llv6) + { + if_set_preferred(&i->llv6, ll); + change |= IF_CHANGE_LLV6; + } + + i->flags &= ~IF_NEEDS_RECALC; + + /* + * FIXME: There should be proper notification instead of iface restart: + * if_notify_change(change, i) + */ + if (change) + if_change_flags(i, i->flags | IF_TMP_DOWN); } void -ifa_recalc_all_primary_addresses(void) +if_recalc_all_preferred_addresses(void) { struct iface *i; WALK_LIST(i, iface_list) - { - if (ifa_recalc_primary(i)) - if_change_flags(i, i->flags | IF_TMP_DOWN); - } + { + if_recalc_preferred(i); + + if (i->flags & IF_TMP_DOWN) + if_change_flags(i, i->flags & ~IF_TMP_DOWN); + } } static inline int ifa_same(struct ifa *a, struct ifa *b) { - return ipa_equal(a->ip, b->ip) && ipa_equal(a->prefix, b->prefix) && - a->pxlen == b->pxlen; + return ipa_equal(a->ip, b->ip) && net_equal(&a->prefix, &b->prefix); } @@ -530,25 +596,23 @@ ifa_update(struct ifa *a) b->scope == a->scope && !((b->flags ^ a->flags) & IA_PEER)) { - b->flags |= IF_UPDATED; + b->flags |= IA_UPDATED; return b; } ifa_delete(b); break; } -#ifndef IPV6 - if ((i->flags & IF_BROADCAST) && !ipa_nonzero(a->brd)) - log(L_ERR "Missing broadcast address for interface %s", i->name); -#endif + if ((a->prefix.type == NET_IP4) && (i->flags & IF_BROADCAST) && ipa_zero(a->brd)) + log(L_WARN "Missing broadcast address for interface %s", i->name); b = mb_alloc(if_pool, sizeof(struct ifa)); memcpy(b, a, sizeof(struct ifa)); add_tail(&i->addrs, &b->n); - b->flags = (i->flags & ~IA_FLAGS) | (a->flags & IA_FLAGS); - if (ifa_recalc_primary(i)) - if_change_flags(i, i->flags | IF_TMP_DOWN); - if (b->flags & IF_UP) + b->flags |= IA_UPDATED; + + i->flags |= IF_NEEDS_RECALC; + if (i->flags & IF_UP) ifa_notify_change(IF_CHANGE_CREATE | IF_CHANGE_UP, b); return b; } @@ -571,25 +635,32 @@ ifa_delete(struct ifa *a) if (ifa_same(b, a)) { rem_node(&b->n); - if (b->flags & IF_UP) - { - b->flags &= ~IF_UP; - ifa_notify_change(IF_CHANGE_DOWN, b); - } + if (b->flags & IA_PRIMARY) { - if_change_flags(i, i->flags | IF_TMP_DOWN); - ifa_recalc_primary(i); + /* + * We unlink deleted preferred address and mark for recalculation. + * FIXME: This could break if we make iface scan non-atomic, as + * protocols still could use the freed address until they get + * if_notify from preferred route recalculation. + */ + if (b == i->addr4) i->addr4 = NULL; + if (b == i->addr6) i->addr6 = NULL; + if (b == i->llv6) i->llv6 = NULL; + i->flags |= IF_NEEDS_RECALC; } + + if (i->flags & IF_UP) + ifa_notify_change(IF_CHANGE_DOWN, b); + mb_free(b); return; } } u32 -if_choose_router_id(struct iface_patt *mask UNUSED6, u32 old_id UNUSED6) +if_choose_router_id(struct iface_patt *mask, u32 old_id) { -#ifndef IPV6 struct iface *i; struct ifa *a, *b; @@ -602,6 +673,9 @@ if_choose_router_id(struct iface_patt *mask UNUSED6, u32 old_id UNUSED6) WALK_LIST(a, i->addrs) { + if (a->prefix.type != NET_IP4) + continue; + if (a->flags & IA_SECONDARY) continue; @@ -626,10 +700,6 @@ if_choose_router_id(struct iface_patt *mask UNUSED6, u32 old_id UNUSED6) log(L_INFO "Chosen router ID %R according to interface %s", id, b->iface->name); return id; - -#else - return 0; -#endif } /** @@ -672,17 +742,17 @@ iface_patt_match(struct iface_patt *ifp, struct iface *i, struct ifa *a) continue; } - if (p->pxlen == 0) + if (p->prefix.pxlen == 0) return pos; if (!a) continue; - if (ipa_in_net(a->ip, p->prefix, p->pxlen)) + if (ipa_in_netX(a->ip, &p->prefix)) return pos; if ((a->flags & IA_PEER) && - ipa_in_net(a->opposite, p->prefix, p->pxlen)) + ipa_in_netX(a->opposite, &p->prefix)) return pos; continue; @@ -716,8 +786,7 @@ iface_plists_equal(struct iface_patt *pa, struct iface_patt *pb) (!x->pattern && y->pattern) || /* This nasty lines where written by me... :-( Feela */ (!y->pattern && x->pattern) || ((x->pattern != y->pattern) && strcmp(x->pattern, y->pattern)) || - !ipa_equal(x->prefix, y->prefix) || - (x->pxlen != y->pxlen)) + !net_equal(&x->prefix, &y->prefix)) return 0; x = (void *) x->n.next; y = (void *) y->n.next; @@ -750,16 +819,17 @@ iface_patts_equal(list *a, list *b, int (*comp)(struct iface_patt *, struct ifac static void if_show_addr(struct ifa *a) { - byte opp[STD_ADDRESS_P_LENGTH + 16]; + byte *flg, opp[IPA_MAX_TEXT_LENGTH + 16]; + + flg = (a->flags & IA_PRIMARY) ? "Preferred, " : (a->flags & IA_SECONDARY) ? "Secondary, " : ""; if (ipa_nonzero(a->opposite)) - bsprintf(opp, ", opposite %I", a->opposite); + bsprintf(opp, "opposite %I, ", a->opposite); else opp[0] = 0; - cli_msg(-1003, "\t%I/%d (%s%s, scope %s)", - a->ip, a->pxlen, - (a->flags & IA_PRIMARY) ? "Primary" : (a->flags & IA_SECONDARY) ? "Secondary" : "Unselected", - opp, ip_scope_text(a->scope)); + + cli_msg(-1003, "\t%I/%d (%s%sscope %s)", + a->ip, a->prefix.pxlen, flg, opp, ip_scope_text(a->scope)); } void @@ -780,7 +850,7 @@ if_show(void) else if (i->master_index) bsprintf(mbuf, " master=#%u", i->master_index); - cli_msg(-1001, "%s %s (index=%d%s)", i->name, (i->flags & IF_UP) ? "up" : "DOWN", i->index, mbuf); + cli_msg(-1001, "%s %s (index=%d%s)", i->name, (i->flags & IF_UP) ? "up" : "down", i->index, mbuf); if (!(i->flags & IF_MULTIACCESS)) type = "PtP"; else @@ -794,10 +864,13 @@ if_show(void) (i->flags & IF_LOOPBACK) ? " Loopback" : "", (i->flags & IF_IGNORE) ? " Ignored" : "", i->mtu); - if (i->addr) - if_show_addr(i->addr); + WALK_LIST(a, i->addrs) - if (a != i->addr) + if (a->prefix.type == NET_IP4) + if_show_addr(a); + + WALK_LIST(a, i->addrs) + if (a->prefix.type == NET_IP6) if_show_addr(a); } cli_msg(0, ""); @@ -807,16 +880,25 @@ void if_show_summary(void) { struct iface *i; - byte addr[STD_ADDRESS_P_LENGTH + 16]; - cli_msg(-2005, "interface state address"); + cli_msg(-2005, "%-10s %-6s %-18s %s", "Interface", "State", "IPv4 address", "IPv6 address"); WALK_LIST(i, iface_list) { - if (i->addr) - bsprintf(addr, "%I/%d", i->addr->ip, i->addr->pxlen); + byte a4[IPA_MAX_TEXT_LENGTH + 17]; + byte a6[IPA_MAX_TEXT_LENGTH + 17]; + + if (i->addr4) + bsprintf(a4, "%I/%d", i->addr4->ip, i->addr4->prefix.pxlen); else - addr[0] = 0; - cli_msg(-1005, "%-9s %-5s %s", i->name, (i->flags & IF_UP) ? "up" : "DOWN", addr); + a4[0] = 0; + + if (i->addr6) + bsprintf(a6, "%I/%d", i->addr6->ip, i->addr6->prefix.pxlen); + else + a6[0] = 0; + + cli_msg(-1005, "%-10s %-6s %-18s %s", + i->name, (i->flags & IF_UP) ? "up" : "down", a4, a6); } cli_msg(0, ""); } diff --git a/nest/iface.h b/nest/iface.h index b8e69838..ab3f8f35 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -10,6 +10,7 @@ #define _BIRD_IFACE_H_ #include "lib/lists.h" +#include "lib/ip.h" extern list iface_list; @@ -19,9 +20,8 @@ struct pool; struct ifa { /* Interface address */ node n; struct iface *iface; /* Interface this address belongs to */ + net_addr prefix; /* Network prefix */ ip_addr ip; /* IP address of this host */ - ip_addr prefix; /* Network prefix */ - unsigned pxlen; /* Prefix length */ ip_addr brd; /* Broadcast address */ ip_addr opposite; /* Opposite end of a point-to-point link */ unsigned scope; /* Interface address scope */ @@ -35,13 +35,16 @@ struct iface { unsigned mtu; unsigned index; /* OS-dependent interface index */ unsigned master_index; /* Interface index of master iface */ - list addrs; /* Addresses assigned to this interface */ - struct ifa *addr; /* Primary address */ struct iface *master; /* Master iface (e.g. for VRF) */ + list addrs; /* Addresses assigned to this interface */ + struct ifa *addr4; /* Primary address for IPv4 */ + struct ifa *addr6; /* Primary address for IPv6 */ + struct ifa *llv6; /* Primary link-local address for IPv6 */ + ip4_addr sysdep; /* Arbitrary IPv4 address for internal sysdep use */ list neighbors; /* All neighbors on this interface */ }; -#define IF_UP 1 /* IF_ADMIN_UP and IP address known */ +#define IF_UP 1 /* Currently just IF_ADMIN_UP */ #define IF_MULTIACCESS 2 #define IF_BROADCAST 4 #define IF_MULTICAST 8 @@ -72,7 +75,10 @@ struct iface { #define IF_JUST_CREATED 0x10000000 /* Send creation event as soon as possible */ #define IF_TMP_DOWN 0x20000000 /* Temporary shutdown due to interface reconfiguration */ -#define IF_UPDATED 0x40000000 /* Touched in last scan */ +#define IF_UPDATED 0x40000000 /* Iface touched in last scan */ +#define IF_NEEDS_RECALC 0x80000000 /* Preferred address recalculation is needed */ + +#define IA_UPDATED IF_UPDATED /* Address touched in last scan */ /* Interface change events */ @@ -81,8 +87,14 @@ struct iface { #define IF_CHANGE_MTU 4 #define IF_CHANGE_CREATE 8 /* Seen this interface for the first time */ #define IF_CHANGE_LINK 0x10 +#define IF_CHANGE_ADDR4 0x100 /* Change of iface->addr4 */ +#define IF_CHANGE_ADDR6 0x200 /* ... */ +#define IF_CHANGE_LLV6 0x400 +#define IF_CHANGE_SYSDEP 0x800 #define IF_CHANGE_TOO_MUCH 0x40000000 /* Used internally */ +#define IF_CHANGE_PREFERRED (IF_CHANGE_ADDR4 | IF_CHANGE_ADDR6 | IF_CHANGE_LLV6) + void if_init(void); void if_dump(struct iface *); void if_dump_all(void); @@ -101,7 +113,7 @@ void if_feed_baby(struct proto *); struct iface *if_find_by_index(unsigned); struct iface *if_find_by_name(char *); struct iface *if_get_by_name(char *); -void ifa_recalc_all_primary_addresses(void); +void if_recalc_all_preferred_addresses(void); /* The Neighbor Cache */ @@ -120,12 +132,15 @@ typedef struct neighbor { SCOPE_HOST when it's our own address */ } neighbor; -#define NEF_STICKY 1 -#define NEF_ONLINK 2 -#define NEF_BIND 4 /* Used internally for neighbors bound to an iface */ +#define NEF_STICKY 1 +#define NEF_ONLINK 2 +#define NEF_BIND 4 /* Used internally for neighbors bound to an iface */ +#define NEF_IFACE 8 /* Neighbors bound to iface */ + neighbor *neigh_find(struct proto *, ip_addr *, unsigned flags); neighbor *neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags); +neighbor *neigh_find_iface(struct proto *p, struct iface *ifa); static inline int neigh_connected_to(struct proto *p, ip_addr *a, struct iface *i) { @@ -150,8 +165,7 @@ struct iface_patt_node { node n; int positive; byte *pattern; - ip_addr prefix; - int pxlen; + net_addr prefix; }; struct iface_patt { diff --git a/nest/mrtdump.h b/nest/mrtdump.h index 73932553..28b3bdfd 100644 --- a/nest/mrtdump.h +++ b/nest/mrtdump.h @@ -28,4 +28,3 @@ void mrt_dump_message(struct proto *p, u16 type, u16 subtype, byte *buf, u32 len); #endif - diff --git a/nest/neighbor.c b/nest/neighbor.c index f8159d35..4f93e29e 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -46,14 +46,15 @@ #include "lib/resource.h" #define NEIGH_HASH_SIZE 256 +#define NEIGH_HASH_OFFSET 24 static slab *neigh_slab; -static list sticky_neigh_list, neigh_hash_table[NEIGH_HASH_SIZE]; +static list sticky_neigh_list, iface_neigh_list, neigh_hash_table[NEIGH_HASH_SIZE]; static inline uint neigh_hash(struct proto *p, ip_addr *a) { - return (p->hash_key ^ ipa_hash(*a)) & (NEIGH_HASH_SIZE-1); + return (p->hash_key ^ ipa_hash(*a)) >> NEIGH_HASH_OFFSET; } static int @@ -80,17 +81,17 @@ if_connected(ip_addr *a, struct iface *i, struct ifa **ap) } else { - if (ipa_in_net(*a, b->prefix, b->pxlen)) + if (ipa_in_netX(*a, &b->prefix)) { -#ifndef IPV6 - if ((b->pxlen < (BITS_PER_IP_ADDRESS - 1)) && - (ipa_equal(*a, b->prefix) || /* Network address */ + /* Do not allow IPv4 network and broadcast addresses */ + if (ipa_is_ip4(*a) && + (net_pxlen(&b->prefix) < (IP4_MAX_PREFIX_LENGTH - 1)) && + (ipa_equal(*a, net_prefix(&b->prefix)) || /* Network address */ ipa_equal(*a, b->brd))) /* Broadcast */ { *ap = NULL; return -1; } -#endif return b->scope; } @@ -155,7 +156,7 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) WALK_LIST(i, iface_list) if ((!p->vrf || p->vrf == i->master) && ((scope = if_connected(a, i, &addr)) >= 0)) - { + { ifa = i; break; } @@ -167,6 +168,8 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) return NULL; n = sl_alloc(neigh_slab); + memset(n, 0, sizeof(neighbor)); + n->addr = *a; if (scope >= 0) { @@ -188,6 +191,35 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) return n; } +neighbor * +neigh_find_iface(struct proto *p, struct iface *ifa) +{ + neighbor *n; + node *nn; + + /* We keep neighbors with NEF_IFACE foremost in ifa->neighbors list */ + WALK_LIST2(n, nn, ifa->neighbors, if_n) + { + if (! (n->flags & NEF_IFACE)) + break; + + if (n->proto == p) + return n; + } + + n = sl_alloc(neigh_slab); + memset(n, 0, sizeof(neighbor)); + + add_tail(&iface_neigh_list, &n->n); + add_head(&ifa->neighbors, &n->if_n); + n->iface = ifa; + n->proto = p; + n->flags = NEF_IFACE; + n->scope = (ifa->flags & IF_UP) ? SCOPE_HOST : -1; + + return n; +} + /** * neigh_dump - dump specified neighbor entry. * @n: the entry to dump @@ -206,6 +238,8 @@ neigh_dump(neighbor *n) debug("%s %p %08x scope %s", n->proto->name, n->data, n->aux, ip_scope_text(n->scope)); if (n->flags & NEF_STICKY) debug(" STICKY"); + if (n->flags & NEF_IFACE) + debug(" IFACE"); debug("\n"); } @@ -224,6 +258,8 @@ neigh_dump_all(void) debug("Known neighbors:\n"); WALK_LIST(n, sticky_neigh_list) neigh_dump(n); + WALK_LIST(n, iface_neigh_list) + neigh_dump(n); for(i=0; i<NEIGH_HASH_SIZE; i++) WALK_LIST(n, neigh_hash_table[i]) neigh_dump(n); @@ -233,14 +269,19 @@ neigh_dump_all(void) static void neigh_up(neighbor *n, struct iface *i, int scope, struct ifa *a) { + DBG("Waking up sticky neighbor %I\n", n->addr); n->iface = i; n->ifa = a; n->scope = scope; - add_tail(&i->neighbors, &n->if_n); - rem_node(&n->n); - add_tail(&neigh_hash_table[neigh_hash(n->proto, &n->addr)], &n->n); - DBG("Waking up sticky neighbor %I\n", n->addr); - if (n->proto->neigh_notify && n->proto->core_state != FS_FLUSHING) + + if (! (n->flags & NEF_IFACE)) + { + add_tail(&i->neighbors, &n->if_n); + rem_node(&n->n); + add_tail(&neigh_hash_table[neigh_hash(n->proto, &n->addr)], &n->n); + } + + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) n->proto->neigh_notify(n); } @@ -248,14 +289,20 @@ static void neigh_down(neighbor *n) { DBG("Flushing neighbor %I on %s\n", n->addr, n->iface->name); - rem_node(&n->if_n); - if (! (n->flags & NEF_BIND)) + if (! (n->flags & (NEF_BIND | NEF_IFACE))) n->iface = NULL; n->ifa = NULL; n->scope = -1; - if (n->proto->neigh_notify && n->proto->core_state != FS_FLUSHING) + + if (! (n->flags & NEF_IFACE)) + { + rem_node(&n->if_n); + rem_node(&n->n); + } + + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) n->proto->neigh_notify(n); - rem_node(&n->n); + if (n->flags & NEF_STICKY) { add_tail(&sticky_neigh_list, &n->n); @@ -273,7 +320,8 @@ neigh_down(neighbor *n) return; } } - else + + if (! (n->flags & (NEF_STICKY | NEF_IFACE))) sl_free(neigh_slab, n); } @@ -291,10 +339,17 @@ void neigh_if_up(struct iface *i) { struct ifa *a; - neighbor *n, *next; + neighbor *n; + node *x, *y; int scope; - WALK_LIST_DELSAFE(n, next, sticky_neigh_list) + /* Wake up all iface neighbors */ + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if ((n->scope < 0) && (n->flags & NEF_IFACE)) + neigh_up(n, i, SCOPE_HOST, NULL); + + /* Wake up appropriate sticky neighbors */ + WALK_LIST_DELSAFE(n, x, sticky_neigh_list) if ((!n->iface || n->iface == i) && ((scope = if_connected(&n->addr, i, &a)) >= 0)) neigh_up(n, i, scope, a); @@ -312,10 +367,11 @@ neigh_if_up(struct iface *i) void neigh_if_down(struct iface *i) { + neighbor *n; node *x, *y; - WALK_LIST_DELSAFE(x, y, i->neighbors) - neigh_down(SKIP_BACK(neighbor, if_n, x)); + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + neigh_down(n); } /** @@ -329,14 +385,12 @@ neigh_if_down(struct iface *i) void neigh_if_link(struct iface *i) { + neighbor *n; node *x, *y; - WALK_LIST_DELSAFE(x, y, i->neighbors) - { - neighbor *n = SKIP_BACK(neighbor, if_n, x); - if (n->proto->neigh_notify && n->proto->core_state != FS_FLUSHING) - n->proto->neigh_notify(n); - } + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) + n->proto->neigh_notify(n); } /** @@ -353,19 +407,21 @@ void neigh_ifa_update(struct ifa *a) { struct iface *i = a->iface; + struct ifa *aa; node *x, *y; - + neighbor *n; + int scope; + /* Remove all neighbors whose scope has changed */ - WALK_LIST_DELSAFE(x, y, i->neighbors) - { - struct ifa *aa; - neighbor *n = SKIP_BACK(neighbor, if_n, x); - if (if_connected(&n->addr, i, &aa) != n->scope) - neigh_down(n); - } + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if (n->ifa && (if_connected(&n->addr, i, &aa) != n->scope)) + neigh_down(n); /* Wake up all sticky neighbors that are reachable now */ - neigh_if_up(i); + WALK_LIST_DELSAFE(n, x, sticky_neigh_list) + if ((!n->iface || n->iface == i) && + ((scope = if_connected(&n->addr, i, &aa)) >= 0)) + neigh_up(n, i, scope, aa); } static inline void @@ -374,7 +430,7 @@ neigh_prune_one(neighbor *n) if (n->proto->proto_state != PS_DOWN) return; rem_node(&n->n); - if (n->scope >= 0) + if (n->if_n.next) rem_node(&n->if_n); sl_free(neigh_slab, n); } @@ -399,6 +455,8 @@ neigh_prune(void) neigh_prune_one(n); WALK_LIST_DELSAFE(n, m, sticky_neigh_list) neigh_prune_one(n); + WALK_LIST_DELSAFE(n, m, iface_neigh_list) + neigh_prune_one(n); } /** @@ -411,10 +469,11 @@ neigh_prune(void) void neigh_init(pool *if_pool) { - int i; - neigh_slab = sl_new(if_pool, sizeof(neighbor)); + init_list(&sticky_neigh_list); - for(i=0; i<NEIGH_HASH_SIZE; i++) + init_list(&iface_neigh_list); + + for(int i = 0; i < NEIGH_HASH_SIZE; i++) init_list(&neigh_hash_table[i]); } diff --git a/nest/password.c b/nest/password.c index e4813741..6f87af21 100644 --- a/nest/password.c +++ b/nest/password.c @@ -10,6 +10,7 @@ #include "nest/bird.h" #include "nest/password.h" #include "lib/string.h" +#include "lib/timer.h" #include "lib/mac.h" struct password_item *last_password_item = NULL; @@ -19,12 +20,13 @@ password_find(list *l, int first_fit) { struct password_item *pi; struct password_item *pf = NULL; + btime now_ = current_real_time(); if (l) { WALK_LIST(pi, *l) { - if ((pi->genfrom < now_real) && (pi->gento > now_real)) + if ((pi->genfrom < now_) && (pi->gento > now_)) { if (first_fit) return pi; @@ -41,12 +43,13 @@ struct password_item * password_find_by_id(list *l, uint id) { struct password_item *pi; + btime now_ = current_real_time(); if (!l) return NULL; WALK_LIST(pi, *l) - if ((pi->id == id) && (pi->accfrom <= now_real) && (now_real < pi->accto)) + if ((pi->id == id) && (pi->accfrom <= now_) && (now_ < pi->accto)) return pi; return NULL; @@ -56,12 +59,13 @@ struct password_item * password_find_by_value(list *l, char *pass, uint size) { struct password_item *pi; + btime now_ = current_real_time(); if (!l) return NULL; WALK_LIST(pi, *l) - if (password_verify(pi, pass, size) && (pi->accfrom <= now_real) && (now_real < pi->accto)) + if (password_verify(pi, pass, size) && (pi->accfrom <= now_) && (now_ < pi->accto)) return pi; return NULL; diff --git a/nest/password.h b/nest/password.h index f21483c4..c4017848 100644 --- a/nest/password.h +++ b/nest/password.h @@ -10,15 +10,13 @@ #ifndef PASSWORD_H #define PASSWORD_H -#include "lib/timer.h" - struct password_item { node n; char *password; /* Key data, null terminated */ uint length; /* Key length, without null */ uint id; /* Key ID */ uint alg; /* MAC algorithm */ - bird_clock_t accfrom, accto, genfrom, gento; + btime accfrom, accto, genfrom, gento; }; extern struct password_item *last_password_item; diff --git a/nest/proto-hooks.c b/nest/proto-hooks.c index 7395b45e..71cddd64 100644 --- a/nest/proto-hooks.c +++ b/nest/proto-hooks.c @@ -190,7 +190,7 @@ void ifa_notify(struct proto *p, unsigned flags, struct ifa *a) /** * rt_notify - notify instance about routing table change * @p: protocol instance - * @table: a routing table + * @channel: notifying channel * @net: a network entry * @new: new route for the network * @old: old route for the network @@ -281,7 +281,7 @@ int import_control(struct proto *p, rte **e, ea_list **attrs, struct linpool *po /** * rte_recalculate - prepare routes for comparison - * @table: a routing table + * @table: a routing table * @net: a network entry * @new: new route for the network * @old: old route for the network diff --git a/nest/proto.c b/nest/proto.c index 552d53ae..15d6f4de 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -13,6 +13,7 @@ #include "lib/resource.h" #include "lib/lists.h" #include "lib/event.h" +#include "lib/timer.h" #include "lib/string.h" #include "conf/conf.h" #include "nest/route.h" @@ -21,19 +22,12 @@ #include "filter/filter.h" pool *proto_pool; +list proto_list; static list protocol_list; -static list proto_list; #define PD(pr, msg, args...) do { if (pr->debug & D_STATES) { log(L_TRACE "%s: " msg, pr->name , ## args); } } while(0) -list active_proto_list; -static list inactive_proto_list; -static list initial_proto_list; -static list flush_proto_list; -static struct proto *initial_device_proto; - -static event *proto_flush_event; static timer *proto_shutdown_timer; static timer *gr_wait_timer; @@ -46,199 +40,680 @@ static int graceful_restart_state; static u32 graceful_restart_locks; static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; -static char *c_states[] = { "HUNGRY", "???", "HAPPY", "FLUSHING" }; +static char *c_states[] = { "DOWN", "START", "UP", "FLUSHING" }; + +extern struct protocol proto_unix_iface; -static void proto_flush_loop(void *); -static void proto_shutdown_loop(struct timer *); +static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); -static void proto_want_export_up(struct proto *p); -static void proto_fell_down(struct proto *p); static char *proto_state_name(struct proto *p); +static void channel_verify_limits(struct channel *c); +static inline void channel_reset_limit(struct channel_limit *l); -static void -proto_relink(struct proto *p) -{ - list *l = NULL; - switch (p->core_state) - { - case FS_HUNGRY: - l = &inactive_proto_list; - break; - case FS_HAPPY: - l = &active_proto_list; - break; - case FS_FLUSHING: - l = &flush_proto_list; - break; - default: - ASSERT(0); - } +static inline int proto_is_done(struct proto *p) +{ return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } - rem_node(&p->n); - add_tail(l, &p->n); -} +static inline int channel_is_active(struct channel *c) +{ return (c->channel_state == CS_START) || (c->channel_state == CS_UP); } static void proto_log_state_change(struct proto *p) { if (p->debug & D_STATES) + { + char *name = proto_state_name(p); + if (name != p->last_state_name_announced) { - char *name = proto_state_name(p); - if (name != p->last_state_name_announced) - { - p->last_state_name_announced = name; - PD(p, "State changed to %s", proto_state_name(p)); - } + p->last_state_name_announced = name; + PD(p, "State changed to %s", proto_state_name(p)); } + } else p->last_state_name_announced = NULL; } +struct channel_config * +proto_cf_find_channel(struct proto_config *pc, uint net_type) +{ + struct channel_config *cc; + + WALK_LIST(cc, pc->channels) + if (cc->net_type == net_type) + return cc; + + return NULL; +} + /** - * proto_new - create a new protocol instance - * @c: protocol configuration - * @size: size of protocol data structure (each protocol instance is represented by - * a structure starting with generic part [struct &proto] and continued - * with data specific to the protocol) + * proto_find_channel_by_table - find channel connected to a routing table + * @p: protocol instance + * @t: routing table * - * When a new configuration has been read in, the core code starts - * initializing all the protocol instances configured by calling their - * init() hooks with the corresponding instance configuration. The initialization - * code of the protocol is expected to create a new instance according to the - * configuration by calling this function and then modifying the default settings - * to values wanted by the protocol. + * Returns pointer to channel or NULL */ -void * -proto_new(struct proto_config *c, unsigned size) +struct channel * +proto_find_channel_by_table(struct proto *p, struct rtable *t) { - struct protocol *pr = c->protocol; - struct proto *p = mb_allocz(proto_pool, size); - - p->cf = c; - p->debug = c->debug; - p->mrtdump = c->mrtdump; - p->name = c->name; - p->preference = c->preference; - p->disabled = c->disabled; - p->proto = pr; - p->table = c->table->table; - p->hash_key = random_u32(); - c->proto = p; - return p; + struct channel *c; + + WALK_LIST(c, p->channels) + if (c->table == t) + return c; + + return NULL; } -static void -proto_init_instance(struct proto *p) +/** + * proto_find_channel_by_name - find channel by its name + * @p: protocol instance + * @n: channel name + * + * Returns pointer to channel or NULL + */ +struct channel * +proto_find_channel_by_name(struct proto *p, const char *n) { - /* Here we cannot use p->cf->name since it won't survive reconfiguration */ - p->pool = rp_new(proto_pool, p->proto->name); - p->attn = ev_new(p->pool); - p->attn->data = p; + struct channel *c; - if (graceful_restart_state == GRS_INIT) - p->gr_recovery = 1; + WALK_LIST(c, p->channels) + if (!strcmp(c->name, n)) + return c; - if (! p->proto->multitable) - rt_lock_table(p->table); + return NULL; } -extern pool *rt_table_pool; /** - * proto_add_announce_hook - connect protocol to a routing table + * proto_add_channel - connect protocol to a routing table * @p: protocol instance - * @t: routing table to connect to - * @stats: per-table protocol statistics + * @cf: channel configuration * - * This function creates a connection between the protocol instance @p and the - * routing table @t, making the protocol hear all changes in the table. + * This function creates a channel between the protocol instance @p and the + * routing table specified in the configuration @cf, making the protocol hear + * all changes in the table and allowing the protocol to update routes in the + * table. * - * The announce hook is linked in the protocol ahook list. Announce hooks are - * allocated from the routing table resource pool and when protocol accepts - * routes also in the table ahook list. The are linked to the table ahook list - * and unlinked from it depending on export_state (in proto_want_export_up() and - * proto_want_export_down()) and they are automatically freed after the protocol - * is flushed (in proto_fell_down()). - * - * Unless you want to listen to multiple routing tables (as the Pipe protocol - * does), you needn't to worry about this function since the connection to the - * protocol's primary routing table is initialized automatically by the core - * code. + * The channel is linked in the protocol channel list and when active also in + * the table channel list. Channels are allocated from the global resource pool + * (@proto_pool) and they are automatically freed when the protocol is removed. */ -struct announce_hook * -proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *stats) + +struct channel * +proto_add_channel(struct proto *p, struct channel_config *cf) +{ + struct channel *c = mb_allocz(proto_pool, cf->channel->channel_size); + + c->name = cf->name; + c->channel = cf->channel; + c->proto = p; + c->table = cf->table->table; + + c->in_filter = cf->in_filter; + c->out_filter = cf->out_filter; + c->rx_limit = cf->rx_limit; + c->in_limit = cf->in_limit; + c->out_limit = cf->out_limit; + + c->net_type = cf->net_type; + c->ra_mode = cf->ra_mode; + c->preference = cf->preference; + c->merge_limit = cf->merge_limit; + c->in_keep_filtered = cf->in_keep_filtered; + + c->channel_state = CS_DOWN; + c->export_state = ES_DOWN; + c->last_state_change = current_time(); + c->reloadable = 1; + + CALL(c->channel->init, c, cf); + + add_tail(&p->channels, &c->n); + + PD(p, "Channel %s connected to table %s", c->name, c->table->name); + + return c; +} + +void +proto_remove_channel(struct proto *p, struct channel *c) +{ + ASSERT(c->channel_state == CS_DOWN); + + PD(p, "Channel %s removed", c->name); + + rem_node(&c->n); + mb_free(c); +} + + +static void +proto_start_channels(struct proto *p) +{ + struct channel *c; + WALK_LIST(c, p->channels) + if (!c->disabled) + channel_set_state(c, CS_UP); +} + +static void +proto_pause_channels(struct proto *p) +{ + struct channel *c; + WALK_LIST(c, p->channels) + if (!c->disabled && channel_is_active(c)) + channel_set_state(c, CS_START); +} + +static void +proto_stop_channels(struct proto *p) +{ + struct channel *c; + WALK_LIST(c, p->channels) + if (!c->disabled && channel_is_active(c)) + channel_set_state(c, CS_FLUSHING); +} + +static void +proto_remove_channels(struct proto *p) +{ + struct channel *c; + WALK_LIST_FIRST(c, p->channels) + proto_remove_channel(p, c); +} + +static void +channel_schedule_feed(struct channel *c, int initial) +{ + // DBG("%s: Scheduling meal\n", p->name); + ASSERT(c->channel_state == CS_UP); + + c->export_state = ES_FEEDING; + c->refeeding = !initial; + + ev_schedule(c->feed_event); +} + +static void +channel_feed_loop(void *ptr) +{ + struct channel *c = ptr; + + if (c->export_state != ES_FEEDING) + return; + + if (!c->feed_active) + if (c->proto->feed_begin) + c->proto->feed_begin(c, !c->refeeding); + + // DBG("Feeding protocol %s continued\n", p->name); + if (!rt_feed_channel(c)) + { + ev_schedule(c->feed_event); + return; + } + + // DBG("Feeding protocol %s finished\n", p->name); + c->export_state = ES_READY; + // proto_log_state_change(p); + + if (c->proto->feed_end) + c->proto->feed_end(c); +} + + +static void +channel_start_export(struct channel *c) +{ + ASSERT(c->channel_state == CS_UP); + ASSERT(c->export_state == ES_DOWN); + + channel_schedule_feed(c, 1); /* Sets ES_FEEDING */ +} + +static void +channel_stop_export(struct channel *c) +{ + /* Need to abort feeding */ + if (c->export_state == ES_FEEDING) + rt_feed_channel_abort(c); + + c->export_state = ES_DOWN; + c->stats.exp_routes = 0; +} + +static void +channel_do_start(struct channel *c) +{ + rt_lock_table(c->table); + add_tail(&c->table->channels, &c->table_node); + c->proto->active_channels++; + + c->feed_event = ev_new(c->proto->pool); + c->feed_event->data = c; + c->feed_event->hook = channel_feed_loop; + + channel_reset_limit(&c->rx_limit); + channel_reset_limit(&c->in_limit); + channel_reset_limit(&c->out_limit); + + CALL(c->channel->start, c); +} + +static void +channel_do_flush(struct channel *c) +{ + rt_schedule_prune(c->table); + + c->gr_wait = 0; + if (c->gr_lock) + channel_graceful_restart_unlock(c); + + CALL(c->channel->shutdown, c); +} + +static void +channel_do_down(struct channel *c) { - struct announce_hook *h; + rem_node(&c->table_node); + rt_unlock_table(c->table); + c->proto->active_channels--; - DBG("Connecting protocol %s to table %s\n", p->name, t->name); - PD(p, "Connected to table %s", t->name); + if ((c->stats.imp_routes + c->stats.filt_routes) != 0) + log(L_ERR "%s: Channel %s is down but still has some routes", c->proto->name, c->name); - h = mb_allocz(rt_table_pool, sizeof(struct announce_hook)); - h->table = t; - h->proto = p; - h->stats = stats; + memset(&c->stats, 0, sizeof(struct proto_stats)); - h->next = p->ahooks; - p->ahooks = h; + CALL(c->channel->cleanup, c); - if (p->rt_notify && (p->export_state != ES_DOWN)) - add_tail(&t->hooks, &h->n); - return h; + /* Schedule protocol shutddown */ + if (proto_is_done(c->proto)) + ev_schedule(c->proto->event); +} + +void +channel_set_state(struct channel *c, uint state) +{ + uint cs = c->channel_state; + uint es = c->export_state; + + DBG("%s reporting channel %s state transition %s -> %s\n", c->proto->name, c->name, c_states[cs], c_states[state]); + if (state == cs) + return; + + c->channel_state = state; + c->last_state_change = current_time(); + + switch (state) + { + case CS_START: + ASSERT(cs == CS_DOWN || cs == CS_UP); + + if (cs == CS_DOWN) + channel_do_start(c); + + if (es != ES_DOWN) + channel_stop_export(c); + + break; + + case CS_UP: + ASSERT(cs == CS_DOWN || cs == CS_START); + + if (cs == CS_DOWN) + channel_do_start(c); + + if (!c->gr_wait && c->proto->rt_notify) + channel_start_export(c); + + break; + + case CS_FLUSHING: + ASSERT(cs == CS_START || cs == CS_UP); + + if (es != ES_DOWN) + channel_stop_export(c); + + channel_do_flush(c); + break; + + case CS_DOWN: + ASSERT(cs == CS_FLUSHING); + + channel_do_down(c); + break; + + default: + ASSERT(0); + } + // XXXX proto_log_state_change(c); } /** - * proto_find_announce_hook - find announce hooks - * @p: protocol instance - * @t: routing table + * channel_request_feeding - request feeding routes to the channel + * @c: given channel * - * Returns pointer to announce hook or NULL + * Sometimes it is needed to send again all routes to the channel. This is + * called feeding and can be requested by this function. This would cause + * channel export state transition to ES_FEEDING (during feeding) and when + * completed, it will switch back to ES_READY. This function can be called + * even when feeding is already running, in that case it is restarted. */ -struct announce_hook * -proto_find_announce_hook(struct proto *p, struct rtable *t) +void +channel_request_feeding(struct channel *c) { - struct announce_hook *a; + ASSERT(c->channel_state == CS_UP); - for (a = p->ahooks; a; a = a->next) - if (a->table == t) - return a; + /* Do nothing if we are still waiting for feeding */ + if (c->export_state == ES_DOWN) + return; - return NULL; + /* If we are already feeding, we want to restart it */ + if (c->export_state == ES_FEEDING) + { + /* Unless feeding is in initial state */ + if (!c->feed_active) + return; + + rt_feed_channel_abort(c); + } + + channel_reset_limit(&c->out_limit); + + /* Hack: reset exp_routes during refeed, and do not decrease it later */ + c->stats.exp_routes = 0; + + channel_schedule_feed(c, 0); /* Sets ES_FEEDING */ + // proto_log_state_change(c); +} + +static inline int +channel_reloadable(struct channel *c) +{ + return c->proto->reload_routes && c->reloadable; } static void -proto_link_ahooks(struct proto *p) +channel_request_reload(struct channel *c) { - struct announce_hook *h; + ASSERT(c->channel_state == CS_UP); + ASSERT(channel_reloadable(c)); + + c->proto->reload_routes(c); - if (p->rt_notify) - for(h=p->ahooks; h; h=h->next) - add_tail(&h->table->hooks, &h->n); + /* + * Should this be done before reload_routes() hook? + * Perhaps, but routes are updated asynchronously. + */ + channel_reset_limit(&c->rx_limit); + channel_reset_limit(&c->in_limit); } -static void -proto_unlink_ahooks(struct proto *p) +const struct channel_class channel_basic = { + .channel_size = sizeof(struct channel), + .config_size = sizeof(struct channel_config) +}; + +void * +channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto) +{ + struct channel_config *cf = NULL; + struct rtable_config *tab = NULL; + + if (net_type) + { + if (!net_val_match(net_type, proto->protocol->channel_mask)) + cf_error("Unsupported channel type"); + + if (proto->net_type && (net_type != proto->net_type)) + cf_error("Different channel type"); + + tab = new_config->def_tables[net_type]; + } + + if (!cc) + cc = &channel_basic; + + cf = cfg_allocz(cc->config_size); + cf->name = name; + cf->channel = cc; + cf->parent = proto; + cf->table = tab; + cf->out_filter = FILTER_REJECT; + + cf->net_type = net_type; + cf->ra_mode = RA_OPTIMAL; + cf->preference = proto->protocol->preference; + + add_tail(&proto->channels, &cf->n); + + return cf; +} + +void * +channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto) +{ + struct channel_config *cf; + + /* We are using name as token, so no strcmp() */ + WALK_LIST(cf, proto->channels) + if (cf->name == name) + { + /* Allow to redefine channel only if inherited from template */ + if (cf->parent == proto) + cf_error("Multiple %s channels", name); + + cf->parent = proto; + return cf; + } + + return channel_config_new(cc, name, net_type, proto); +} + +struct channel_config * +channel_copy_config(struct channel_config *src, struct proto_config *proto) { - struct announce_hook *h; + struct channel_config *dst = cfg_alloc(src->channel->config_size); - if (p->rt_notify) - for(h=p->ahooks; h; h=h->next) - rem_node(&h->n); + memcpy(dst, src, src->channel->config_size); + add_tail(&proto->channels, &dst->n); + CALL(src->channel->copy_config, dst, src); + + return dst; } + +static int reconfigure_type; /* Hack to propagate type info to channel_reconfigure() */ + +int +channel_reconfigure(struct channel *c, struct channel_config *cf) +{ + /* FIXME: better handle these changes, also handle in_keep_filtered */ + if ((c->table != cf->table->table) || (cf->ra_mode && (c->ra_mode != cf->ra_mode))) + return 0; + + /* Note that filter_same() requires arguments in (new, old) order */ + int import_changed = !filter_same(cf->in_filter, c->in_filter); + int export_changed = !filter_same(cf->out_filter, c->out_filter); + + if (c->preference != cf->preference) + import_changed = 1; + + if (c->merge_limit != cf->merge_limit) + export_changed = 1; + + /* Reconfigure channel fields */ + c->in_filter = cf->in_filter; + c->out_filter = cf->out_filter; + c->rx_limit = cf->rx_limit; + c->in_limit = cf->in_limit; + c->out_limit = cf->out_limit; + + // c->ra_mode = cf->ra_mode; + c->merge_limit = cf->merge_limit; + c->preference = cf->preference; + c->in_keep_filtered = cf->in_keep_filtered; + + channel_verify_limits(c); + + /* Execute channel-specific reconfigure hook */ + if (c->channel->reconfigure && !c->channel->reconfigure(c, cf)) + return 0; + + /* If the channel is not open, it has no routes and we cannot reload it anyways */ + if (c->channel_state != CS_UP) + return 1; + + if (reconfigure_type == RECONFIG_SOFT) + { + if (import_changed) + log(L_INFO "Channel %s.%s changed import", c->proto->name, c->name); + + if (export_changed) + log(L_INFO "Channel %s.%s changed export", c->proto->name, c->name); + + return 1; + } + + /* Route reload may be not supported */ + if (import_changed && !channel_reloadable(c)) + return 0; + + if (import_changed || export_changed) + log(L_INFO "Reloading channel %s.%s", c->proto->name, c->name); + + if (import_changed) + channel_request_reload(c); + + if (export_changed) + channel_request_feeding(c); + + return 1; +} + + +int +proto_configure_channel(struct proto *p, struct channel **pc, struct channel_config *cf) +{ + struct channel *c = *pc; + + if (!c && cf) + { + *pc = proto_add_channel(p, cf); + } + else if (c && !cf) + { + if (c->channel_state != CS_DOWN) + { + log(L_INFO "Cannot remove channel %s.%s", c->proto->name, c->name); + return 0; + } + + proto_remove_channel(p, c); + *pc = NULL; + } + else if (c && cf) + { + if (!channel_reconfigure(c, cf)) + { + log(L_INFO "Cannot reconfigure channel %s.%s", c->proto->name, c->name); + return 0; + } + } + + return 1; +} + + static void -proto_free_ahooks(struct proto *p) +proto_event(void *ptr) { - struct announce_hook *h, *hn; + struct proto *p = ptr; + + if (p->do_start) + { + if_feed_baby(p); + p->do_start = 0; + } - for(h = p->ahooks; h; h = hn) + if (p->do_stop) { - hn = h->next; - mb_free(h); + if (p->proto == &proto_unix_iface) + if_flush_ifaces(p); + p->do_stop = 0; } - p->ahooks = NULL; - p->main_ahook = NULL; + if (proto_is_done(p)) + { + if (p->proto->cleanup) + p->proto->cleanup(p); + + p->active = 0; + proto_log_state_change(p); + proto_rethink_goal(p); + } +} + + +/** + * proto_new - create a new protocol instance + * @c: protocol configuration + * + * When a new configuration has been read in, the core code starts + * initializing all the protocol instances configured by calling their + * init() hooks with the corresponding instance configuration. The initialization + * code of the protocol is expected to create a new instance according to the + * configuration by calling this function and then modifying the default settings + * to values wanted by the protocol. + */ +void * +proto_new(struct proto_config *cf) +{ + struct proto *p = mb_allocz(proto_pool, cf->protocol->proto_size); + + p->cf = cf; + p->debug = cf->debug; + p->mrtdump = cf->mrtdump; + p->name = cf->name; + p->proto = cf->protocol; + p->net_type = cf->net_type; + p->disabled = cf->disabled; + p->hash_key = random_u32(); + cf->proto = p; + + init_list(&p->channels); + + return p; +} + +static struct proto * +proto_init(struct proto_config *c, node *n) +{ + struct protocol *pr = c->protocol; + struct proto *p = pr->init(c); + + p->proto_state = PS_DOWN; + p->last_state_change = current_time(); + p->vrf = c->vrf; + insert_node(&p->n, n); + + p->event = ev_new(proto_pool); + p->event->hook = proto_event; + p->event->data = p; + + PD(p, "Initializing%s", p->disabled ? " [disabled]" : ""); + + return p; +} + +static void +proto_start(struct proto *p) +{ + /* Here we cannot use p->cf->name since it won't survive reconfiguration */ + p->pool = rp_new(proto_pool, p->proto->name); + + if (graceful_restart_state == GRS_INIT) + p->gr_recovery = 1; } @@ -263,22 +738,24 @@ proto_free_ahooks(struct proto *p) void * proto_config_new(struct protocol *pr, int class) { - struct proto_config *c = cfg_allocz(pr->config_size); + struct proto_config *cf = cfg_allocz(pr->config_size); if (class == SYM_PROTO) - add_tail(&new_config->protos, &c->n); - c->global = new_config; - c->protocol = pr; - c->name = pr->name; - c->preference = pr->preference; - c->class = class; - c->out_filter = FILTER_REJECT; - c->table = c->global->master_rtc; - c->debug = new_config->proto_default_debug; - c->mrtdump = new_config->proto_default_mrtdump; - return c; + add_tail(&new_config->protos, &cf->n); + + cf->global = new_config; + cf->protocol = pr; + cf->name = pr->name; + cf->class = class; + cf->debug = new_config->proto_default_debug; + cf->mrtdump = new_config->proto_default_mrtdump; + + init_list(&cf->channels); + + return cf; } + /** * proto_copy_config - copy a protocol configuration * @dest: destination protocol configuration @@ -293,6 +770,7 @@ proto_config_new(struct protocol *pr, int class) void proto_copy_config(struct proto_config *dest, struct proto_config *src) { + struct channel_config *cc; node old_node; int old_class; char *old_name; @@ -305,7 +783,7 @@ proto_copy_config(struct proto_config *dest, struct proto_config *src) DBG("Copying configuration from %s to %s\n", src->name, dest->name); - /* + /* * Copy struct proto_config here. Keep original node, class and name. * protocol-specific config copy is handled by protocol copy_config() hook */ @@ -314,12 +792,17 @@ proto_copy_config(struct proto_config *dest, struct proto_config *src) old_class = dest->class; old_name = dest->name; - memcpy(dest, src, sizeof(struct proto_config)); + memcpy(dest, src, src->protocol->config_size); dest->n = old_node; dest->class = old_class; dest->name = old_name; + init_list(&dest->channels); + + WALK_LIST(cc, src->channels) + channel_copy_config(cc, dest); + /* FIXME: allow for undefined copy_config */ dest->protocol->copy_config(dest, src); } @@ -339,67 +822,15 @@ protos_preconfig(struct config *c) init_list(&c->protos); DBG("Protocol preconfig:"); WALK_LIST(p, protocol_list) - { - DBG(" %s", p->name); - p->name_counter = 0; - if (p->preconfig) - p->preconfig(p, c); - } - DBG("\n"); -} - -/** - * protos_postconfig - post-configuration processing - * @c: new configuration - * - * This function calls the postconfig() hooks of all protocol - * instances specified in configuration @c. The hooks are not - * called for protocol templates. - */ -void -protos_postconfig(struct config *c) -{ - struct proto_config *x; - struct protocol *p; - - DBG("Protocol postconfig:"); - WALK_LIST(x, c->protos) - { - DBG(" %s", x->name); - - p = x->protocol; - if (p->postconfig) - p->postconfig(x); - } + { + DBG(" %s", p->name); + p->name_counter = 0; + if (p->preconfig) + p->preconfig(p, c); + } DBG("\n"); } -extern struct protocol proto_unix_iface; - -static struct proto * -proto_init(struct proto_config *c) -{ - struct protocol *p = c->protocol; - struct proto *q = p->init(c); - - q->proto_state = PS_DOWN; - q->core_state = FS_HUNGRY; - q->export_state = ES_DOWN; - q->last_state_change = now; - q->vrf = c->vrf; - - add_tail(&initial_proto_list, &q->n); - - if (p == &proto_unix_iface) - initial_device_proto = q; - - add_tail(&proto_list, &q->glob_node); - PD(q, "Initializing%s", q->disabled ? " [disabled]" : ""); - return q; -} - -int proto_reconfig_type; /* Hack to propagate type info to pipe reconfigure hook */ - static int proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config *nc, int type) { @@ -409,75 +840,23 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config /* If there is a too big change in core attributes, ... */ if ((nc->protocol != oc->protocol) || + (nc->net_type != oc->net_type) || (nc->disabled != p->disabled) || - (nc->vrf != oc->vrf) || - (nc->table->table != oc->table->table)) + (nc->vrf != oc->vrf)) return 0; + p->name = nc->name; p->debug = nc->debug; p->mrtdump = nc->mrtdump; - proto_reconfig_type = type; + reconfigure_type = type; /* Execute protocol specific reconfigure hook */ - if (! (p->proto->reconfigure && p->proto->reconfigure(p, nc))) + if (!p->proto->reconfigure || !p->proto->reconfigure(p, nc)) return 0; DBG("\t%s: same\n", oc->name); PD(p, "Reconfigured"); p->cf = nc; - p->name = nc->name; - p->preference = nc->preference; - - - /* Multitable protocols handle rest in their reconfigure hooks */ - if (p->proto->multitable) - return 1; - - /* Update filters and limits in the main announce hook - Note that this also resets limit state */ - if (p->main_ahook) - { - struct announce_hook *ah = p->main_ahook; - ah->in_filter = nc->in_filter; - ah->out_filter = nc->out_filter; - ah->rx_limit = nc->rx_limit; - ah->in_limit = nc->in_limit; - ah->out_limit = nc->out_limit; - ah->in_keep_filtered = nc->in_keep_filtered; - proto_verify_limits(ah); - } - - /* Update routes when filters changed. If the protocol in not UP, - it has no routes and we can ignore such changes */ - if ((p->proto_state != PS_UP) || (type == RECONFIG_SOFT)) - return 1; - - int import_changed = ! filter_same(nc->in_filter, oc->in_filter); - int export_changed = ! filter_same(nc->out_filter, oc->out_filter); - - /* We treat a change in preferences by reimporting routes */ - if (nc->preference != oc->preference) - import_changed = 1; - - if (import_changed || export_changed) - log(L_INFO "Reloading protocol %s", p->name); - - /* If import filter changed, call reload hook */ - if (import_changed && ! (p->reload_routes && p->reload_routes(p))) - { - /* Now, the protocol is reconfigured. But route reload failed - and we have to do regular protocol restart. */ - log(L_INFO "Restarting protocol %s", p->name); - p->disabled = 1; - p->down_code = PDC_CF_RESTART; - proto_rethink_goal(p); - p->disabled = 0; - proto_rethink_goal(p); - return 1; - } - - if (export_changed) - proto_request_feeding(p); return 1; } @@ -514,85 +893,94 @@ void protos_commit(struct config *new, struct config *old, int force_reconfig, int type) { struct proto_config *oc, *nc; - struct proto *p, *n; struct symbol *sym; + struct proto *p; + node *n; + DBG("protos_commit:\n"); if (old) + { + WALK_LIST(oc, old->protos) { - WALK_LIST(oc, old->protos) - { - p = oc->proto; - sym = cf_find_symbol(new, oc->name); - if (sym && sym->class == SYM_PROTO && !new->shutdown) - { - /* Found match, let's check if we can smoothly switch to new configuration */ - /* No need to check description */ - nc = sym->def; - nc->proto = p; - - /* We will try to reconfigure protocol p */ - if (! force_reconfig && proto_reconfigure(p, oc, nc, type)) - continue; - - /* Unsuccessful, we will restart it */ - if (!p->disabled && !nc->disabled) - log(L_INFO "Restarting protocol %s", p->name); - else if (p->disabled && !nc->disabled) - log(L_INFO "Enabling protocol %s", p->name); - else if (!p->disabled && nc->disabled) - log(L_INFO "Disabling protocol %s", p->name); - - p->down_code = nc->disabled ? PDC_CF_DISABLE : PDC_CF_RESTART; - p->cf_new = nc; - } - else if (!new->shutdown) - { - log(L_INFO "Removing protocol %s", p->name); - p->down_code = PDC_CF_REMOVE; - p->cf_new = NULL; - } - else /* global shutdown */ - { - p->down_code = PDC_CMD_SHUTDOWN; - p->cf_new = NULL; - } - - p->reconfiguring = 1; - config_add_obstacle(old); - proto_rethink_goal(p); - } + p = oc->proto; + sym = cf_find_symbol(new, oc->name); + if (sym && sym->class == SYM_PROTO && !new->shutdown) + { + /* Found match, let's check if we can smoothly switch to new configuration */ + /* No need to check description */ + nc = sym->def; + nc->proto = p; + + /* We will try to reconfigure protocol p */ + if (! force_reconfig && proto_reconfigure(p, oc, nc, type)) + continue; + + /* Unsuccessful, we will restart it */ + if (!p->disabled && !nc->disabled) + log(L_INFO "Restarting protocol %s", p->name); + else if (p->disabled && !nc->disabled) + log(L_INFO "Enabling protocol %s", p->name); + else if (!p->disabled && nc->disabled) + log(L_INFO "Disabling protocol %s", p->name); + + p->down_code = nc->disabled ? PDC_CF_DISABLE : PDC_CF_RESTART; + p->cf_new = nc; + } + else if (!new->shutdown) + { + log(L_INFO "Removing protocol %s", p->name); + p->down_code = PDC_CF_REMOVE; + p->cf_new = NULL; + } + else /* global shutdown */ + { + p->down_code = PDC_CMD_SHUTDOWN; + p->cf_new = NULL; + } + + p->reconfiguring = 1; + config_add_obstacle(old); + proto_rethink_goal(p); } + } + struct proto *first_dev_proto = NULL; + + n = NODE &(proto_list.head); WALK_LIST(nc, new->protos) if (!nc->proto) - { - if (old) /* Not a first-time configuration */ - log(L_INFO "Adding protocol %s", nc->name); - proto_init(nc); - } - DBG("\tdone\n"); + { + /* Not a first-time configuration */ + if (old) + log(L_INFO "Adding protocol %s", nc->name); + + p = proto_init(nc, n); + n = NODE p; + + if (p->proto == &proto_unix_iface) + first_dev_proto = p; + } + else + n = NODE nc->proto; DBG("Protocol start\n"); /* Start device protocol first */ - if (initial_device_proto) - { - proto_rethink_goal(initial_device_proto); - initial_device_proto = NULL; - } + if (first_dev_proto) + proto_rethink_goal(first_dev_proto); /* Determine router ID for the first time - it has to be here and not in global_commit() because it is postponed after start of device protocol */ if (!config->router_id) - { - config->router_id = if_choose_router_id(config->router_id_from, 0); - if (!config->router_id) - die("Cannot determine router ID, please configure it manually"); - } + { + config->router_id = if_choose_router_id(config->router_id_from, 0); + if (!config->router_id) + die("Cannot determine router ID, please configure it manually"); + } - /* Start all other protocols */ - WALK_LIST_DELSAFE(p, n, initial_proto_list) + /* Start all new protocols */ + WALK_LIST_DELSAFE(p, n, proto_list) proto_rethink_goal(p); } @@ -602,20 +990,22 @@ proto_rethink_goal(struct proto *p) struct protocol *q; byte goal; - if (p->reconfiguring && p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN) - { - struct proto_config *nc = p->cf_new; - DBG("%s has shut down for reconfiguration\n", p->name); - p->cf->proto = NULL; - config_del_obstacle(p->cf->global); - rem_node(&p->n); - rem_node(&p->glob_node); - mb_free(p->message); - mb_free(p); - if (!nc) - return; - p = proto_init(nc); - } + if (p->reconfiguring && !p->active) + { + struct proto_config *nc = p->cf_new; + node *n = p->n.prev; + DBG("%s has shut down for reconfiguration\n", p->name); + p->cf->proto = NULL; + config_del_obstacle(p->cf->global); + proto_remove_channels(p); + rem_node(&p->n); + rfree(p->event); + mb_free(p->message); + mb_free(p); + if (!nc) + return; + p = proto_init(nc, n); + } /* Determine what state we want to reach */ if (p->disabled || p->reconfiguring) @@ -624,25 +1014,27 @@ proto_rethink_goal(struct proto *p) goal = PS_UP; q = p->proto; - if (goal == PS_UP) /* Going up */ + if (goal == PS_UP) + { + if (!p->active) { - if (p->proto_state == PS_DOWN && p->core_state == FS_HUNGRY) - { - DBG("Kicking %s up\n", p->name); - PD(p, "Starting"); - proto_init_instance(p); - proto_notify_state(p, (q->start ? q->start(p) : PS_UP)); - } + /* Going up */ + DBG("Kicking %s up\n", p->name); + PD(p, "Starting"); + proto_start(p); + proto_notify_state(p, (q->start ? q->start(p) : PS_UP)); } - else /* Going down */ + } + else + { + if (p->proto_state == PS_START || p->proto_state == PS_UP) { - if (p->proto_state == PS_START || p->proto_state == PS_UP) - { - DBG("Kicking %s down\n", p->name); - PD(p, "Shutting down"); - proto_notify_state(p, (q->shutdown ? q->shutdown(p) : PS_DOWN)); - } + /* Going down */ + DBG("Kicking %s down\n", p->name); + PD(p, "Shutting down"); + proto_notify_state(p, (q->shutdown ? q->shutdown(p) : PS_DOWN)); } + } } @@ -664,20 +1056,21 @@ proto_rethink_goal(struct proto *p) * When graceful restart recovery need is detected during initialization, then * enabled protocols are marked with @gr_recovery flag before start. Such * protocols then decide how to proceed with graceful restart, participation is - * voluntary. Protocols could lock the recovery by proto_graceful_restart_lock() - * (stored in @gr_lock flag), which means that they want to postpone the end of - * the recovery until they converge and then unlock it. They also could set - * @gr_wait before advancing to %PS_UP, which means that the core should defer - * route export to that protocol until the end of the recovery. This should be - * done by protocols that expect their neigbors to keep the proper routes - * (kernel table, BGP sessions with BGP graceful restart capability). + * voluntary. Protocols could lock the recovery for each channel by function + * channel_graceful_restart_lock() (state stored in @gr_lock flag), which means + * that they want to postpone the end of the recovery until they converge and + * then unlock it. They also could set @gr_wait before advancing to %PS_UP, + * which means that the core should defer route export to that channel until + * the end of the recovery. This should be done by protocols that expect their + * neigbors to keep the proper routes (kernel table, BGP sessions with BGP + * graceful restart capability). * * The graceful restart recovery is finished when either all graceful restart * locks are unlocked or when graceful restart wait timer fires. * */ -static void graceful_restart_done(struct timer *t); +static void graceful_restart_done(timer *t); /** * graceful_restart_recovery - request initial graceful restart recovery @@ -708,15 +1101,14 @@ graceful_restart_init(void) log(L_INFO "Graceful restart started"); if (!graceful_restart_locks) - { - graceful_restart_done(NULL); - return; - } + { + graceful_restart_done(NULL); + return; + } graceful_restart_state = GRS_ACTIVE; - gr_wait_timer = tm_new(proto_pool); - gr_wait_timer->hook = graceful_restart_done; - tm_start(gr_wait_timer, config->gr_wait); + gr_wait_timer = tm_new_init(proto_pool, graceful_restart_done, NULL, 0, 0); + tm_start(gr_wait_timer, config->gr_wait S); } /** @@ -730,32 +1122,32 @@ graceful_restart_init(void) * restart wait timer fires (but there are still some locks). */ static void -graceful_restart_done(struct timer *t UNUSED) +graceful_restart_done(timer *t UNUSED) { - struct proto *p; - node *n; - log(L_INFO "Graceful restart done"); graceful_restart_state = GRS_DONE; - WALK_LIST2(p, n, proto_list, glob_node) - { - if (!p->gr_recovery) - continue; + struct proto *p; + WALK_LIST(p, proto_list) + { + if (!p->gr_recovery) + continue; + struct channel *c; + WALK_LIST(c, p->channels) + { /* Resume postponed export of routes */ - if ((p->proto_state == PS_UP) && p->gr_wait) - { - proto_want_export_up(p); - proto_log_state_change(p); - } + if ((c->channel_state == CS_UP) && c->gr_wait && c->proto->rt_notify) + channel_start_export(c); /* Cleanup */ - p->gr_recovery = 0; - p->gr_wait = 0; - p->gr_lock = 0; + c->gr_wait = 0; + c->gr_lock = 0; } + p->gr_recovery = 0; + } + graceful_restart_locks = 0; } @@ -766,17 +1158,17 @@ graceful_restart_show_status(void) return; cli_msg(-24, "Graceful restart recovery in progress"); - cli_msg(-24, " Waiting for %d protocols to recover", graceful_restart_locks); - cli_msg(-24, " Wait timer is %d/%d", tm_remains(gr_wait_timer), config->gr_wait); + cli_msg(-24, " Waiting for %d channels to recover", graceful_restart_locks); + cli_msg(-24, " Wait timer is %t/%u", tm_remains(gr_wait_timer), config->gr_wait); } /** - * proto_graceful_restart_lock - lock graceful restart by protocol - * @p: protocol instance + * channel_graceful_restart_lock - lock graceful restart by channel + * @p: channel instance * * This function allows a protocol to postpone the end of graceful restart * recovery until it converges. The lock is removed when the protocol calls - * proto_graceful_restart_unlock() or when the protocol is stopped. + * channel_graceful_restart_unlock() or when the channel is closed. * * The function have to be called during the initial phase of graceful restart * recovery and only for protocols that are part of graceful restart (i.e. their @@ -784,32 +1176,32 @@ graceful_restart_show_status(void) * hooks. */ void -proto_graceful_restart_lock(struct proto *p) +channel_graceful_restart_lock(struct channel *c) { ASSERT(graceful_restart_state == GRS_INIT); - ASSERT(p->gr_recovery); + ASSERT(c->proto->gr_recovery); - if (p->gr_lock) + if (c->gr_lock) return; - p->gr_lock = 1; + c->gr_lock = 1; graceful_restart_locks++; } /** - * proto_graceful_restart_unlock - unlock graceful restart by protocol - * @p: protocol instance + * channel_graceful_restart_unlock - unlock graceful restart by channel + * @p: channel instance * - * This function unlocks a lock from proto_graceful_restart_lock(). It is also + * This function unlocks a lock from channel_graceful_restart_lock(). It is also * automatically called when the lock holding protocol went down. */ void -proto_graceful_restart_unlock(struct proto *p) +channel_graceful_restart_unlock(struct channel *c) { - if (!p->gr_lock) + if (!c->gr_lock) return; - p->gr_lock = 0; + c->gr_lock = 0; graceful_restart_locks--; if ((graceful_restart_state == GRS_ACTIVE) && !graceful_restart_locks) @@ -830,34 +1222,26 @@ proto_graceful_restart_unlock(struct proto *p) void protos_dump_all(void) { - struct proto *p; - struct announce_hook *a; - debug("Protocols:\n"); - WALK_LIST(p, active_proto_list) + struct proto *p; + WALK_LIST(p, proto_list) + { + debug(" protocol %s state %s\n", p->name, p_states[p->proto_state]); + + struct channel *c; + WALK_LIST(c, p->channels) { - debug(" protocol %s state %s/%s\n", p->name, - p_states[p->proto_state], c_states[p->core_state]); - for (a = p->ahooks; a; a = a->next) - { - debug("\tTABLE %s\n", a->table->name); - if (a->in_filter) - debug("\tInput filter: %s\n", filter_name(a->in_filter)); - if (a->out_filter != FILTER_REJECT) - debug("\tOutput filter: %s\n", filter_name(a->out_filter)); - } - if (p->disabled) - debug("\tDISABLED\n"); - else if (p->proto->dump) - p->proto->dump(p); + debug("\tTABLE %s\n", c->table->name); + if (c->in_filter) + debug("\tInput filter: %s\n", filter_name(c->in_filter)); + if (c->out_filter) + debug("\tOutput filter: %s\n", filter_name(c->out_filter)); } - WALK_LIST(p, inactive_proto_list) - debug(" inactive %s: state %s/%s\n", p->name, p_states[p->proto_state], c_states[p->core_state]); - WALK_LIST(p, initial_proto_list) - debug(" initial %s\n", p->name); - WALK_LIST(p, flush_proto_list) - debug(" flushing %s\n", p->name); + + if (p->proto->dump && (p->proto_state != PS_DOWN)) + p->proto->dump(p); + } } /** @@ -894,12 +1278,9 @@ extern void bfd_init_all(void); void protos_build(void) { - init_list(&protocol_list); init_list(&proto_list); - init_list(&active_proto_list); - init_list(&inactive_proto_list); - init_list(&initial_proto_list); - init_list(&flush_proto_list); + init_list(&protocol_list); + proto_build(&proto_device); #ifdef CONFIG_RADV proto_build(&proto_radv); @@ -926,160 +1307,37 @@ protos_build(void) #ifdef CONFIG_BABEL proto_build(&proto_babel); #endif +#ifdef CONFIG_RPKI + proto_build(&proto_rpki); +#endif proto_pool = rp_new(&root_pool, "Protocols"); - proto_flush_event = ev_new(proto_pool); - proto_flush_event->hook = proto_flush_loop; proto_shutdown_timer = tm_new(proto_pool); proto_shutdown_timer->hook = proto_shutdown_loop; } -static void -proto_feed_more(void *P) -{ - struct proto *p = P; - - if (p->export_state != ES_FEEDING) - return; - - DBG("Feeding protocol %s continued\n", p->name); - if (rt_feed_baby(p)) - { - DBG("Feeding protocol %s finished\n", p->name); - p->export_state = ES_READY; - proto_log_state_change(p); - - if (p->feed_end) - p->feed_end(p); - } - else - { - p->attn->hook = proto_feed_more; - ev_schedule(p->attn); /* Will continue later... */ - } -} - -static void -proto_feed_initial(void *P) -{ - struct proto *p = P; - - if (p->export_state != ES_FEEDING) - return; - - DBG("Feeding protocol %s\n", p->name); - - if_feed_baby(p); - proto_feed_more(P); -} - -static void -proto_schedule_feed(struct proto *p, int initial) -{ - DBG("%s: Scheduling meal\n", p->name); - - p->export_state = ES_FEEDING; - p->refeeding = !initial; - - p->attn->hook = initial ? proto_feed_initial : proto_feed_more; - ev_schedule(p->attn); - - if (p->feed_begin) - p->feed_begin(p, initial); -} - -/* - * Flushing loop is responsible for flushing routes and protocols - * after they went down. It runs in proto_flush_event. At the start of - * one round, protocols waiting to flush are marked in - * proto_schedule_flush_loop(). At the end of the round (when routing - * table flush is complete), marked protocols are flushed and a next - * round may start. - */ - -static int flush_loop_state; /* 1 -> running */ - -static void -proto_schedule_flush_loop(void) -{ - struct proto *p; - struct announce_hook *h; - - if (flush_loop_state) - return; - flush_loop_state = 1; - - WALK_LIST(p, flush_proto_list) - { - p->flushing = 1; - for (h=p->ahooks; h; h=h->next) - rt_mark_for_prune(h->table); - } - - ev_schedule(proto_flush_event); -} - -static void -proto_flush_loop(void *unused UNUSED) -{ - struct proto *p; - - if (! rt_prune_loop()) - { - /* Rtable pruning is not finished */ - ev_schedule(proto_flush_event); - return; - } - - rt_prune_sources(); - - again: - WALK_LIST(p, flush_proto_list) - if (p->flushing) - { - /* This will flush interfaces in the same manner - like rt_prune_all() flushes routes */ - if (p->proto == &proto_unix_iface) - if_flush_ifaces(p); - - DBG("Flushing protocol %s\n", p->name); - p->flushing = 0; - p->core_state = FS_HUNGRY; - proto_relink(p); - proto_log_state_change(p); - if (p->proto_state == PS_DOWN) - proto_fell_down(p); - goto again; - } - - /* This round finished, perhaps there will be another one */ - flush_loop_state = 0; - if (!EMPTY_LIST(flush_proto_list)) - proto_schedule_flush_loop(); -} - /* Temporary hack to propagate restart to BGP */ int proto_restart; static void -proto_shutdown_loop(struct timer *t UNUSED) +proto_shutdown_loop(timer *t UNUSED) { struct proto *p, *p_next; - WALK_LIST_DELSAFE(p, p_next, active_proto_list) + WALK_LIST_DELSAFE(p, p_next, proto_list) if (p->down_sched) - { - proto_restart = (p->down_sched == PDS_RESTART); + { + proto_restart = (p->down_sched == PDS_RESTART); - p->disabled = 1; + p->disabled = 1; + proto_rethink_goal(p); + if (proto_restart) + { + p->disabled = 0; proto_rethink_goal(p); - if (proto_restart) - { - p->disabled = 0; - proto_rethink_goal(p); - } } + } } static inline void @@ -1094,7 +1352,7 @@ proto_schedule_down(struct proto *p, byte restart, byte code) p->down_sched = restart ? PDS_RESTART : PDS_DISABLE; p->down_code = code; - tm_start_max(proto_shutdown_timer, restart ? 2 : 0); + tm_start_max(proto_shutdown_timer, restart ? 250 MS : 0); } /** @@ -1131,50 +1389,8 @@ proto_set_message(struct proto *p, char *msg, int len) } -/** - * proto_request_feeding - request feeding routes to the protocol - * @p: given protocol - * - * Sometimes it is needed to send again all routes to the - * protocol. This is called feeding and can be requested by this - * function. This would cause protocol export state transition - * to ES_FEEDING (during feeding) and when completed, it will - * switch back to ES_READY. This function can be called even - * when feeding is already running, in that case it is restarted. - */ -void -proto_request_feeding(struct proto *p) -{ - ASSERT(p->proto_state == PS_UP); - - /* Do nothing if we are still waiting for feeding */ - if (p->export_state == ES_DOWN) - return; - - /* If we are already feeding, we want to restart it */ - if (p->export_state == ES_FEEDING) - { - /* Unless feeding is in initial state */ - if (p->attn->hook == proto_feed_initial) - return; - - rt_feed_baby_abort(p); - } - - /* FIXME: This should be changed for better support of multitable protos */ - struct announce_hook *ah; - for (ah = p->ahooks; ah; ah = ah->next) - proto_reset_limit(ah->out_limit); - - /* Hack: reset exp_routes during refeed, and do not decrease it later */ - p->stats.exp_routes = 0; - - proto_schedule_feed(p, 0); - proto_log_state_change(p); -} - static const char * -proto_limit_name(struct proto_limit *l) +channel_limit_name(struct channel_limit *l) { const char *actions[] = { [PLA_WARN] = "warn", @@ -1187,22 +1403,22 @@ proto_limit_name(struct proto_limit *l) } /** - * proto_notify_limit: notify about limit hit and take appropriate action - * @ah: announce hook + * channel_notify_limit: notify about limit hit and take appropriate action + * @c: channel * @l: limit being hit * @dir: limit direction (PLD_*) - * @rt_count: the number of routes + * @rt_count: the number of routes * * The function is called by the route processing core when limit @l * is breached. It activates the limit and tooks appropriate action * according to @l->action. */ void -proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 rt_count) +channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count) { const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; - struct proto *p = ah->proto; + struct proto *p = c->proto; if (l->state == PLS_BLOCKED) return; @@ -1210,148 +1426,112 @@ proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 /* For warning action, we want the log message every time we hit the limit */ if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit))) log(L_WARN "Protocol %s hits route %s limit (%d), action: %s", - p->name, dir_name[dir], l->limit, proto_limit_name(l)); + p->name, dir_name[dir], l->limit, channel_limit_name(l)); switch (l->action) - { - case PLA_WARN: - l->state = PLS_ACTIVE; - break; - - case PLA_BLOCK: - l->state = PLS_BLOCKED; - break; - - case PLA_RESTART: - case PLA_DISABLE: - l->state = PLS_BLOCKED; - if (p->proto_state == PS_UP) - proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); - break; - } + { + case PLA_WARN: + l->state = PLS_ACTIVE; + break; + + case PLA_BLOCK: + l->state = PLS_BLOCKED; + break; + + case PLA_RESTART: + case PLA_DISABLE: + l->state = PLS_BLOCKED; + if (p->proto_state == PS_UP) + proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); + break; + } } -void -proto_verify_limits(struct announce_hook *ah) +static void +channel_verify_limits(struct channel *c) { - struct proto_limit *l; - struct proto_stats *stats = ah->stats; - u32 all_routes = stats->imp_routes + stats->filt_routes; + struct channel_limit *l; + u32 all_routes = c->stats.imp_routes + c->stats.filt_routes; - l = ah->rx_limit; - if (l && (all_routes > l->limit)) - proto_notify_limit(ah, l, PLD_RX, all_routes); + l = &c->rx_limit; + if (l->action && (all_routes > l->limit)) + channel_notify_limit(c, l, PLD_RX, all_routes); - l = ah->in_limit; - if (l && (stats->imp_routes > l->limit)) - proto_notify_limit(ah, l, PLD_IN, stats->imp_routes); + l = &c->in_limit; + if (l->action && (c->stats.imp_routes > l->limit)) + channel_notify_limit(c, l, PLD_IN, c->stats.imp_routes); - l = ah->out_limit; - if (l && (stats->exp_routes > l->limit)) - proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes); + l = &c->out_limit; + if (l->action && (c->stats.exp_routes > l->limit)) + channel_notify_limit(c, l, PLD_OUT, c->stats.exp_routes); } - -static void -proto_want_core_up(struct proto *p) +static inline void +channel_reset_limit(struct channel_limit *l) { - ASSERT(p->core_state == FS_HUNGRY); - - if (!p->proto->multitable) - { - p->main_source = rt_get_source(p, 0); - rt_lock_source(p->main_source); - - /* Connect protocol to routing table */ - p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats); - p->main_ahook->in_filter = p->cf->in_filter; - p->main_ahook->out_filter = p->cf->out_filter; - p->main_ahook->rx_limit = p->cf->rx_limit; - p->main_ahook->in_limit = p->cf->in_limit; - p->main_ahook->out_limit = p->cf->out_limit; - p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered; - - proto_reset_limit(p->main_ahook->rx_limit); - proto_reset_limit(p->main_ahook->in_limit); - proto_reset_limit(p->main_ahook->out_limit); - } - - p->core_state = FS_HAPPY; - proto_relink(p); + if (l->action) + l->state = PLS_INITIAL; } -static void -proto_want_export_up(struct proto *p) +static inline void +proto_do_start(struct proto *p) { - ASSERT(p->core_state == FS_HAPPY); - ASSERT(p->export_state == ES_DOWN); - - proto_link_ahooks(p); - proto_schedule_feed(p, 1); /* Sets ES_FEEDING */ + p->active = 1; + p->do_start = 1; + ev_schedule(p->event); } static void -proto_want_export_down(struct proto *p) +proto_do_up(struct proto *p) { - ASSERT(p->export_state != ES_DOWN); - - /* Need to abort feeding */ - if (p->export_state == ES_FEEDING) - rt_feed_baby_abort(p); + if (!p->main_source) + { + p->main_source = rt_get_source(p, 0); + rt_lock_source(p->main_source); + } - p->export_state = ES_DOWN; - p->stats.exp_routes = 0; - proto_unlink_ahooks(p); + proto_start_channels(p); } -static void -proto_want_core_down(struct proto *p) +static inline void +proto_do_pause(struct proto *p) { - ASSERT(p->core_state == FS_HAPPY); - ASSERT(p->export_state == ES_DOWN); - - p->core_state = FS_FLUSHING; - proto_relink(p); - proto_schedule_flush_loop(); - - if (!p->proto->multitable) - { - rt_unlock_source(p->main_source); - p->main_source = NULL; - } + proto_pause_channels(p); } static void -proto_falling_down(struct proto *p) +proto_do_stop(struct proto *p) { + p->down_sched = 0; p->gr_recovery = 0; - p->gr_wait = 0; - if (p->gr_lock) - proto_graceful_restart_unlock(p); -} - -static void -proto_fell_down(struct proto *p) -{ - DBG("Protocol %s down\n", p->name); - - u32 all_routes = p->stats.imp_routes + p->stats.filt_routes; - if (all_routes != 0) - log(L_ERR "Protocol %s is down but still has %d routes", p->name, all_routes); - bzero(&p->stats, sizeof(struct proto_stats)); - proto_free_ahooks(p); + p->do_stop = 1; + ev_schedule(p->event); - if (! p->proto->multitable) - rt_unlock_table(p->table); + if (p->main_source) + { + rt_unlock_source(p->main_source); + p->main_source = NULL; + } - if (p->proto->cleanup) - p->proto->cleanup(p); + proto_stop_channels(p); +} - proto_rethink_goal(p); +static void +proto_do_down(struct proto *p) +{ + p->down_code = 0; + neigh_prune(); + rfree(p->pool); + p->pool = NULL; + + /* Shutdown is finished in the protocol event */ + if (proto_is_done(p)) + ev_schedule(p->event); } + /** * proto_notify_state - notify core about protocol state change * @p: protocol the state of which has changed @@ -1367,78 +1547,53 @@ proto_fell_down(struct proto *p) * it should be used at tail positions of protocol callbacks. */ void -proto_notify_state(struct proto *p, unsigned ps) +proto_notify_state(struct proto *p, uint state) { - unsigned ops = p->proto_state; - unsigned cs = p->core_state; - unsigned es = p->export_state; + uint ps = p->proto_state; - DBG("%s reporting state transition %s/%s -> */%s\n", p->name, c_states[cs], p_states[ops], p_states[ps]); - if (ops == ps) + DBG("%s reporting state transition %s -> %s\n", p->name, p_states[ps], p_states[state]); + if (state == ps) return; - p->proto_state = ps; - p->last_state_change = now; + p->proto_state = state; + p->last_state_change = current_time(); - switch (ps) - { - case PS_START: - ASSERT(ops == PS_DOWN || ops == PS_UP); - ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY); - - if (es != ES_DOWN) - proto_want_export_down(p); - break; - - case PS_UP: - ASSERT(ops == PS_DOWN || ops == PS_START); - ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY); - ASSERT(es == ES_DOWN); - - if (cs == FS_HUNGRY) - proto_want_core_up(p); - if (!p->gr_wait) - proto_want_export_up(p); - break; - - case PS_STOP: - ASSERT(ops == PS_START || ops == PS_UP); - - p->down_sched = 0; - - if (es != ES_DOWN) - proto_want_export_down(p); - if (cs == FS_HAPPY) - proto_want_core_down(p); - proto_falling_down(p); - break; - - case PS_DOWN: - p->down_code = 0; - p->down_sched = 0; - - if (es != ES_DOWN) - proto_want_export_down(p); - if (cs == FS_HAPPY) - proto_want_core_down(p); - if (ops != PS_STOP) - proto_falling_down(p); - - neigh_prune(); // FIXME convert neighbors to resource? - rfree(p->pool); - p->pool = NULL; - - if (cs == FS_HUNGRY) /* Shutdown finished */ - { - proto_log_state_change(p); - proto_fell_down(p); - return; /* The protocol might have ceased to exist */ - } - break; - - default: - bug("%s: Invalid state %d", p->name, ps); - } + switch (state) + { + case PS_START: + ASSERT(ps == PS_DOWN || ps == PS_UP); + + if (ps == PS_DOWN) + proto_do_start(p); + else + proto_do_pause(p); + break; + + case PS_UP: + ASSERT(ps == PS_DOWN || ps == PS_START); + + if (ps == PS_DOWN) + proto_do_start(p); + + proto_do_up(p); + break; + + case PS_STOP: + ASSERT(ps == PS_START || ps == PS_UP); + + proto_do_stop(p); + break; + + case PS_DOWN: + if (ps != PS_STOP) + proto_do_stop(p); + + proto_do_down(p); + break; + + default: + bug("%s: Invalid state %d", p->name, ps); + } proto_log_state_change(p); } @@ -1450,84 +1605,74 @@ proto_notify_state(struct proto *p, unsigned ps) static char * proto_state_name(struct proto *p) { -#define P(x,y) ((x << 4) | y) - switch (P(p->proto_state, p->core_state)) - { - case P(PS_DOWN, FS_HUNGRY): return "down"; - case P(PS_START, FS_HUNGRY): - case P(PS_START, FS_HAPPY): return "start"; - case P(PS_UP, FS_HAPPY): - switch (p->export_state) - { - case ES_DOWN: return "wait"; - case ES_FEEDING: return "feed"; - case ES_READY: return "up"; - default: return "???"; - } - case P(PS_STOP, FS_HUNGRY): - case P(PS_STOP, FS_FLUSHING): return "stop"; - case P(PS_DOWN, FS_FLUSHING): return "flush"; - default: return "???"; - } -#undef P + switch (p->proto_state) + { + case PS_DOWN: return p->active ? "flush" : "down"; + case PS_START: return "start"; + case PS_UP: return "up"; + case PS_STOP: return "stop"; + default: return "???"; + } } static void -proto_show_stats(struct proto_stats *s, int in_keep_filtered) +channel_show_stats(struct channel *c) { - if (in_keep_filtered) - cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", - s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes); + struct proto_stats *s = &c->stats; + + if (c->in_keep_filtered) + cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported", + s->imp_routes, s->filt_routes, s->exp_routes); else - cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", - s->imp_routes, s->exp_routes, s->pref_routes); + cli_msg(-1006, " Routes: %u imported, %u exported", + s->imp_routes, s->exp_routes); - cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); - cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", + cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", s->imp_updates_received, s->imp_updates_invalid, s->imp_updates_filtered, s->imp_updates_ignored, s->imp_updates_accepted); - cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", + cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", s->imp_withdraws_received, s->imp_withdraws_invalid, s->imp_withdraws_ignored, s->imp_withdraws_accepted); - cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", + cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", s->exp_updates_received, s->exp_updates_rejected, s->exp_updates_filtered, s->exp_updates_accepted); - cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", + cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", s->exp_withdraws_received, s->exp_withdraws_accepted); } void -proto_show_limit(struct proto_limit *l, const char *dsc) +channel_show_limit(struct channel_limit *l, const char *dsc) { - if (!l) + if (!l->action) return; - cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); - cli_msg(-1006, " Action: %s", proto_limit_name(l)); + cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); + cli_msg(-1006, " Action: %s", channel_limit_name(l)); } void -proto_show_basic_info(struct proto *p) +channel_show_info(struct channel *c) { - if (p->vrf) - cli_msg(-1006, " VRF: %s", p->vrf->name); - - cli_msg(-1006, " Preference: %d", p->preference); - cli_msg(-1006, " Input filter: %s", filter_name(p->cf->in_filter)); - cli_msg(-1006, " Output filter: %s", filter_name(p->cf->out_filter)); + cli_msg(-1006, " Channel %s", c->name); + cli_msg(-1006, " State: %s", c_states[c->channel_state]); + cli_msg(-1006, " Table: %s", c->table->name); + cli_msg(-1006, " Preference: %d", c->preference); + cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter)); + cli_msg(-1006, " Output filter: %s", filter_name(c->out_filter)); if (graceful_restart_state == GRS_ACTIVE) - cli_msg(-1006, " GR recovery: %s%s", - p->gr_lock ? " pending" : "", - p->gr_wait ? " waiting" : ""); + cli_msg(-1006, " GR recovery: %s%s", + c->gr_lock ? " pending" : "", + c->gr_wait ? " waiting" : ""); - proto_show_limit(p->cf->rx_limit, "Receive limit:"); - proto_show_limit(p->cf->in_limit, "Import limit:"); - proto_show_limit(p->cf->out_limit, "Export limit:"); + channel_show_limit(&c->rx_limit, "Receive limit:"); + channel_show_limit(&c->in_limit, "Import limit:"); + channel_show_limit(&c->out_limit, "Export limit:"); - if (p->proto_state != PS_DOWN) - proto_show_stats(&p->stats, p->cf->in_keep_filtered); + if (c->channel_state != CS_DOWN) + channel_show_stats(c); } void @@ -1537,47 +1682,53 @@ proto_cmd_show(struct proto *p, uintptr_t verbose, int cnt) /* First protocol - show header */ if (!cnt) - cli_msg(-2002, "name proto table state since info"); + cli_msg(-2002, "%-10s %-10s %-10s %-6s %-12s %s", + "Name", "Proto", "Table", "State", "Since", "Info"); buf[0] = 0; if (p->proto->get_status) p->proto->get_status(p, buf); - tm_format_datetime(tbuf, &config->tf_proto, p->last_state_change); - cli_msg(-1002, "%-8s %-8s %-8s %-5s %-10s %s", + tm_format_time(tbuf, &config->tf_proto, p->last_state_change); + cli_msg(-1002, "%-10s %-10s %-10s %-6s %-12s %s", p->name, p->proto->name, - p->table->name, + p->main_channel ? p->main_channel->table->name : "---", proto_state_name(p), tbuf, buf); + if (verbose) + { + if (p->cf->dsc) + cli_msg(-1006, " Description: %s", p->cf->dsc); + if (p->message) + cli_msg(-1006, " Message: %s", p->message); + if (p->cf->router_id) + cli_msg(-1006, " Router ID: %R", p->cf->router_id); + if (p->vrf) + cli_msg(-1006, " VRF: %s", p->vrf->name); + + if (p->proto->show_proto_info) + p->proto->show_proto_info(p); + else { - if (p->cf->dsc) - cli_msg(-1006, " Description: %s", p->cf->dsc); - - if (p->message) - cli_msg(-1006, " Message: %s", p->message); - - if (p->cf->router_id) - cli_msg(-1006, " Router ID: %R", p->cf->router_id); - - if (p->proto->show_proto_info) - p->proto->show_proto_info(p); - else - proto_show_basic_info(p); - - cli_msg(-1006, ""); + struct channel *c; + WALK_LIST(c, p->channels) + channel_show_info(c); } + + cli_msg(-1006, ""); + } } void proto_cmd_disable(struct proto *p, uintptr_t arg, int cnt UNUSED) { if (p->disabled) - { - cli_msg(-8, "%s: already disabled", p->name); - return; - } + { + cli_msg(-8, "%s: already disabled", p->name); + return; + } log(L_INFO "Disabling protocol %s", p->name); p->disabled = 1; @@ -1591,10 +1742,10 @@ void proto_cmd_enable(struct proto *p, uintptr_t arg, int cnt UNUSED) { if (!p->disabled) - { - cli_msg(-10, "%s: already enabled", p->name); - return; - } + { + cli_msg(-10, "%s: already enabled", p->name); + return; + } log(L_INFO "Enabling protocol %s", p->name); p->disabled = 0; @@ -1607,10 +1758,10 @@ void proto_cmd_restart(struct proto *p, uintptr_t arg, int cnt UNUSED) { if (p->disabled) - { - cli_msg(-8, "%s: already disabled", p->name); - return; - } + { + cli_msg(-8, "%s: already disabled", p->name); + return; + } log(L_INFO "Restarting protocol %s", p->name); p->disabled = 1; @@ -1625,41 +1776,38 @@ proto_cmd_restart(struct proto *p, uintptr_t arg, int cnt UNUSED) void proto_cmd_reload(struct proto *p, uintptr_t dir, int cnt UNUSED) { + struct channel *c; + if (p->disabled) - { - cli_msg(-8, "%s: already disabled", p->name); - return; - } + { + cli_msg(-8, "%s: already disabled", p->name); + return; + } /* If the protocol in not UP, it has no routes */ if (p->proto_state != PS_UP) return; + /* All channels must support reload */ + if (dir != CMD_RELOAD_OUT) + WALK_LIST(c, p->channels) + if (!channel_reloadable(c)) + { + cli_msg(-8006, "%s: reload failed", p->name); + return; + } + log(L_INFO "Reloading protocol %s", p->name); /* re-importing routes */ if (dir != CMD_RELOAD_OUT) - { - if (! (p->reload_routes && p->reload_routes(p))) - { - cli_msg(-8006, "%s: reload failed", p->name); - return; - } - - /* - * Should be done before reload_routes() hook? - * Perhaps, but these hooks work asynchronously. - */ - if (!p->proto->multitable) - { - proto_reset_limit(p->main_ahook->rx_limit); - proto_reset_limit(p->main_ahook->in_limit); - } - } + WALK_LIST(c, p->channels) + channel_request_reload(c); /* re-exporting routes */ if (dir != CMD_RELOAD_IN) - proto_request_feeding(p); + WALK_LIST(c, p->channels) + channel_request_feeding(c); cli_msg(-15, "%s: reloading", p->name); } @@ -1680,10 +1828,10 @@ static void proto_apply_cmd_symbol(struct symbol *s, void (* cmd)(struct proto *, uintptr_t, int), uintptr_t arg) { if (s->class != SYM_PROTO) - { - cli_msg(9002, "%s is not a protocol", s->name); - return; - } + { + cli_msg(9002, "%s is not a protocol", s->name); + return; + } cmd(((struct proto_config *)s->def)->proto, arg, 0); cli_msg(0, ""); @@ -1692,16 +1840,12 @@ proto_apply_cmd_symbol(struct symbol *s, void (* cmd)(struct proto *, uintptr_t, static void proto_apply_cmd_patt(char *patt, void (* cmd)(struct proto *, uintptr_t, int), uintptr_t arg) { + struct proto *p; int cnt = 0; - node *nn; - WALK_LIST(nn, proto_list) - { - struct proto *p = SKIP_BACK(struct proto, glob_node, nn); - - if (!patt || patmatch(patt, p->name)) - cmd(p, arg, cnt++); - } + WALK_LIST(p, proto_list) + if (!patt || patmatch(patt, p->name)) + cmd(p, arg, cnt++); if (!cnt) cli_msg(8003, "No protocols match"); @@ -1728,25 +1872,27 @@ proto_get_named(struct symbol *sym, struct protocol *pr) struct proto *p, *q; if (sym) - { - if (sym->class != SYM_PROTO) - cf_error("%s: Not a protocol", sym->name); - p = ((struct proto_config *)sym->def)->proto; - if (!p || p->proto != pr) - cf_error("%s: Not a %s protocol", sym->name, pr->name); - } + { + if (sym->class != SYM_PROTO) + cf_error("%s: Not a protocol", sym->name); + + p = ((struct proto_config *) sym->def)->proto; + if (!p || p->proto != pr) + cf_error("%s: Not a %s protocol", sym->name, pr->name); + } else - { - p = NULL; - WALK_LIST(q, active_proto_list) - if (q->proto == pr) - { - if (p) - cf_error("There are multiple %s protocols running", pr->name); - p = q; - } - if (!p) - cf_error("There is no %s protocol running", pr->name); - } + { + p = NULL; + WALK_LIST(q, proto_list) + if ((q->proto == pr) && (q->proto_state != PS_DOWN)) + { + if (p) + cf_error("There are multiple %s protocols running", pr->name); + p = q; + } + if (!p) + cf_error("There is no %s protocol running", pr->name); + } + return p; } diff --git a/nest/proto.sgml b/nest/proto.sgml index 1d4c31a7..53da78b8 100644 --- a/nest/proto.sgml +++ b/nest/proto.sgml @@ -69,23 +69,6 @@ its state by calling the <func/proto_notify_state/ function. <p>At any time, the core code can ask the protocol to shut itself down by calling its stop() hook. -<p>The <em/core state machine/ takes care of the core view of protocol state. -The states are traversed according to changes of the protocol state machine, but -sometimes the transitions are delayed if the core needs to finish some actions -(for example sending of new routes to the protocol) before proceeding to the -new state. There are the following core states: - -<descrip> - <tag/FS_HUNGRY/ The protocol is down, it doesn't have any routes and - doesn't want them. - <tag/FS_FEEDING/ The protocol has reached the <tt/PS_UP/ state, but - we are still busy sending the initial set of routes to it. - <tag/FS_HAPPY/ The protocol is up and has complete routing information. - <tag/FS_FLUSHING/ The protocol is shutting down (it's in either <tt/PS_STOP/ - or <tt/PS_DOWN/ state) and we're flushing all of its routes from the - routing tables. -</descrip> - <sect1>Functions of the protocol module <p>The protocol module provides the following functions: diff --git a/nest/protocol.h b/nest/protocol.h index 5aca9a4e..8a22d76b 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -11,7 +11,7 @@ #include "lib/lists.h" #include "lib/resource.h" -#include "lib/timer.h" +#include "lib/event.h" #include "nest/route.h" #include "conf/conf.h" @@ -23,13 +23,16 @@ struct neighbor; struct rta; struct network; struct proto_config; +struct channel_limit; +struct channel_config; struct config; struct proto; -struct event; +struct channel; struct ea_list; struct eattr; struct symbol; + /* * Routing Protocol */ @@ -40,9 +43,10 @@ struct protocol { char *template; /* Template for automatic generation of names */ int name_counter; /* Counter for automatic name generation */ int attr_class; /* Attribute class known to this protocol */ - int multitable; /* Protocol handles all announce hooks itself */ uint preference; /* Default protocol preference */ - uint config_size; /* Size of protocol config */ + uint channel_mask; /* Mask of accepted channel types (NB_*) */ + uint proto_size; /* Size of protocol data structure */ + uint config_size; /* Size of protocol config data structure */ void (*preconfig)(struct protocol *, struct config *); /* Just before configuring */ void (*postconfig)(struct proto_config *); /* After configuring each instance */ @@ -63,7 +67,6 @@ struct protocol { void protos_build(void); void proto_build(struct protocol *); void protos_preconfig(struct config *); -void protos_postconfig(struct config *); void protos_commit(struct config *new, struct config *old, int force_restart, int type); void protos_dump_all(void); @@ -77,7 +80,7 @@ void protos_dump_all(void); extern struct protocol proto_device, proto_radv, proto_rip, proto_static, - proto_ospf, proto_pipe, proto_bgp, proto_bfd, proto_babel; + proto_ospf, proto_pipe, proto_bgp, proto_bfd, proto_babel, proto_rpki; /* * Routing Protocol Instance @@ -91,17 +94,13 @@ struct proto_config { char *name; char *dsc; int class; /* SYM_PROTO or SYM_TEMPLATE */ + u8 net_type; /* Protocol network type (NET_*), 0 for undefined */ + u8 disabled; /* Protocol enabled/disabled by default */ u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */ - unsigned preference, disabled; /* Generic parameters */ - int in_keep_filtered; /* Routes rejected in import filter are kept */ u32 router_id; /* Protocol specific router ID */ + + list channels; /* List of channel configs (struct channel_config) */ struct iface *vrf; /* Related VRF instance, NULL if global */ - struct rtable_config *table; /* Table we're attached to */ - struct filter *in_filter, *out_filter; /* Attached filters */ - struct proto_limit *rx_limit; /* Limit for receiving routes from protocol - (relevant when in_keep_filtered is active) */ - struct proto_limit *in_limit; /* Limit for importing routes from protocol */ - struct proto_limit *out_limit; /* Limit for exporting routes to protocol */ /* Check proto_reconfigure() and proto_copy_config() after changing struct proto_config */ @@ -113,7 +112,6 @@ struct proto_stats { /* Import - from protocol to core */ u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */ - u32 pref_routes; /* Number of routes that are preferred, sum over all routing tables */ u32 imp_updates_received; /* Number of route updates received */ u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ u32 imp_updates_filtered; /* Number of route updates rejected by filters */ @@ -135,37 +133,36 @@ struct proto_stats { }; struct proto { - node n; /* Node in *_proto_list */ - node glob_node; /* Node in global proto_list */ + node n; /* Node in global proto_list */ struct protocol *proto; /* Protocol */ struct proto_config *cf; /* Configuration data */ struct proto_config *cf_new; /* Configuration we want to switch to after shutdown (NULL=delete) */ pool *pool; /* Pool containing local objects */ - struct event *attn; /* "Pay attention" event */ + event *event; /* Protocol event */ + + list channels; /* List of channels to rtables (struct channel) */ + struct channel *main_channel; /* Primary channel */ + struct rte_src *main_source; /* Primary route source */ + struct iface *vrf; /* Related VRF instance, NULL if global */ char *name; /* Name of this instance (== cf->name) */ u32 debug; /* Debugging flags */ u32 mrtdump; /* MRTDump flags */ - unsigned preference; /* Default route preference */ - byte accept_ra_types; /* Which types of route announcements are accepted (RA_OPTIMAL or RA_ANY) */ + uint active_channels; /* Number of active channels */ + byte net_type; /* Protocol network type (NET_*), 0 for undefined */ byte disabled; /* Manually disabled */ byte proto_state; /* Protocol state machine (PS_*, see below) */ - byte core_state; /* Core state machine (FS_*, see below) */ - byte export_state; /* Route export state (ES_*, see below) */ + byte active; /* From PS_START to cleanup after PS_STOP */ + byte do_start; /* Start actions are scheduled */ + byte do_stop; /* Stop actions are scheduled */ byte reconfiguring; /* We're shutting down due to reconfiguration */ - byte refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */ - byte flushing; /* Protocol is flushed in current flush loop round */ byte gr_recovery; /* Protocol should participate in graceful restart recovery */ - byte gr_lock; /* Graceful restart mechanism should wait for this proto */ - byte gr_wait; /* Route export to protocol is postponed until graceful restart */ byte down_sched; /* Shutdown is scheduled for later (PDS_*) */ byte down_code; /* Reason for shutdown (PDC_* codes) */ - byte merge_limit; /* Maximal number of nexthops for RA_MERGED */ u32 hash_key; /* Random key used for hashing of neighbors */ - bird_clock_t last_state_change; /* Time of last state transition */ + btime last_state_change; /* Time of last state transition */ char *last_state_name_announced; /* Last state name we've announced to the user */ char *message; /* State-change message, allocated from proto_pool */ - struct proto_stats stats; /* Current protocol statistics */ /* * General protocol hooks: @@ -180,23 +177,23 @@ struct proto { * It can construct a new rte, add private attributes and * decide whether the route shall be imported: 1=yes, -1=no, * 0=process it through the import filter set by the user. - * reload_routes Request protocol to reload all its routes to the core + * reload_routes Request channel to reload all its routes to the core * (using rte_update()). Returns: 0=reload cannot be done, * 1= reload is scheduled and will happen (asynchronously). - * feed_begin Notify protocol about beginning of route feeding. - * feed_end Notify protocol about finish of route feeding. + * feed_begin Notify channel about beginning of route feeding. + * feed_end Notify channel about finish of route feeding. */ void (*if_notify)(struct proto *, unsigned flags, struct iface *i); void (*ifa_notify)(struct proto *, unsigned flags, struct ifa *a); - void (*rt_notify)(struct proto *, struct rtable *table, struct network *net, struct rte *new, struct rte *old, struct ea_list *attrs); + void (*rt_notify)(struct proto *, struct channel *, struct network *net, struct rte *new, struct rte *old, struct ea_list *attrs); void (*neigh_notify)(struct neighbor *neigh); struct ea_list *(*make_tmp_attrs)(struct rte *rt, struct linpool *pool); void (*store_tmp_attrs)(struct rte *rt, struct ea_list *attrs); int (*import_control)(struct proto *, struct rte **rt, struct ea_list **attrs, struct linpool *pool); - int (*reload_routes)(struct proto *); - void (*feed_begin)(struct proto *, int initial); - void (*feed_end)(struct proto *); + void (*reload_routes)(struct channel *); + void (*feed_begin)(struct channel *, int initial); + void (*feed_end)(struct channel *); /* * Routing entry hooks (called only for routes belonging to this protocol): @@ -216,15 +213,6 @@ struct proto { void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); - struct iface *vrf; /* Related VRF instance, NULL if global */ - struct rtable *table; /* Our primary routing table */ - struct rte_src *main_source; /* Primary route source */ - struct announce_hook *main_ahook; /* Primary announcement hook */ - struct announce_hook *ahooks; /* Announcement hooks for this protocol */ - - struct fib_iterator *feed_iterator; /* Routing table iterator used during protocol feeding */ - struct announce_hook *feed_ahook; /* Announce hook we currently feed */ - /* Hic sunt protocol-specific data */ }; @@ -248,26 +236,21 @@ struct proto_spec { #define PDC_OUT_LIMIT_HIT 0x23 /* Route export limit reached */ -void *proto_new(struct proto_config *, unsigned size); +void *proto_new(struct proto_config *); void *proto_config_new(struct protocol *, int class); void proto_copy_config(struct proto_config *dest, struct proto_config *src); void proto_set_message(struct proto *p, char *msg, int len); -void proto_request_feeding(struct proto *p); - -static inline void -proto_copy_rest(struct proto_config *dest, struct proto_config *src, unsigned size) -{ memcpy(dest + 1, src + 1, size - sizeof(struct proto_config)); } void graceful_restart_recovery(void); void graceful_restart_init(void); void graceful_restart_show_status(void); -void proto_graceful_restart_lock(struct proto *p); -void proto_graceful_restart_unlock(struct proto *p); +void channel_graceful_restart_lock(struct channel *c); +void channel_graceful_restart_unlock(struct channel *c); #define DEFAULT_GR_WAIT 240 -void proto_show_limit(struct proto_limit *l, const char *dsc); -void proto_show_basic_info(struct proto *p); +void channel_show_limit(struct channel_limit *l, const char *dsc); +void channel_show_info(struct channel *c); void proto_cmd_show(struct proto *, uintptr_t, int); void proto_cmd_disable(struct proto *, uintptr_t, int); @@ -299,9 +282,10 @@ rte_make_tmp_attrs(struct rte *rt, struct linpool *pool) } /* Moved from route.h to avoid dependency conflicts */ -static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); } +static inline void rte_update(struct proto *p, const net_addr *n, rte *new) { rte_update2(p->main_channel, n, new, p->main_source); } -extern list active_proto_list; +extern pool *proto_pool; +extern list proto_list; /* * Each protocol instance runs two different state machines: @@ -353,7 +337,7 @@ void proto_notify_state(struct proto *p, unsigned state); * * HUNGRY ----> FEEDING * ^ | - * | V + * | V * FLUSHING <---- HAPPY * * States: HUNGRY Protocol either administratively down (i.e., @@ -377,16 +361,6 @@ void proto_notify_state(struct proto *p, unsigned state); * as a result of received ROUTE-REFRESH request). */ -#define FS_HUNGRY 0 -#define FS_FEEDING 1 /* obsolete */ -#define FS_HAPPY 2 -#define FS_FLUSHING 3 - - -#define ES_DOWN 0 -#define ES_FEEDING 1 -#define ES_READY 2 - /* @@ -429,6 +403,7 @@ extern struct proto_config *cf_dev_proto; #define PLD_OUT 2 /* Export limit */ #define PLD_MAX 3 +#define PLA_NONE 0 /* No limit */ #define PLA_WARN 1 /* Issue log warning */ #define PLA_BLOCK 2 /* Block new routes */ #define PLA_RESTART 4 /* Force protocol restart */ @@ -438,42 +413,182 @@ extern struct proto_config *cf_dev_proto; #define PLS_ACTIVE 1 /* Limit was hit */ #define PLS_BLOCKED 2 /* Limit is active and blocking new routes */ -struct proto_limit { +struct channel_limit { u32 limit; /* Maximum number of prefixes */ - byte action; /* Action to take (PLA_*) */ - byte state; /* State of limit (PLS_*) */ + u8 action; /* Action to take (PLA_*) */ + u8 state; /* State of limit (PLS_*) */ }; -void proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 rt_count); -void proto_verify_limits(struct announce_hook *ah); - -static inline void -proto_reset_limit(struct proto_limit *l) -{ - if (l) - l->state = PLS_INITIAL; -} +void channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count); /* - * Route Announcement Hook + * Channels */ -struct announce_hook { +struct channel_class { + uint channel_size; /* Size of channel data structure */ + uint config_size; /* Size of channel config data structure */ + + void (*init)(struct channel *, struct channel_config *); /* Create new instance */ + int (*reconfigure)(struct channel *, struct channel_config *); /* Try to reconfigure instance, returns success */ + int (*start)(struct channel *); /* Start the instance */ + void (*shutdown)(struct channel *); /* Stop the instance */ + void (*cleanup)(struct channel *); /* Channel finished flush */ + + void (*copy_config)(struct channel_config *, struct channel_config *); /* Copy config from given channel instance */ +#if 0 + XXXX; + void (*preconfig)(struct protocol *, struct config *); /* Just before configuring */ + void (*postconfig)(struct proto_config *); /* After configuring each instance */ + + + void (*dump)(struct proto *); /* Debugging dump */ + void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */ + + void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ + void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); /* Get route information (for `show route' command) */ + int (*get_attr)(struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ + void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ + +#endif +}; + +extern struct channel_class channel_bgp; + +struct channel_config { node n; - struct rtable *table; + const char *name; + const struct channel_class *channel; + + struct proto_config *parent; /* Where channel is defined (proto or template) */ + struct rtable_config *table; /* Table we're attached to */ + struct filter *in_filter, *out_filter; /* Attached filters */ + struct channel_limit rx_limit; /* Limit for receiving routes from protocol + (relevant when in_keep_filtered is active) */ + struct channel_limit in_limit; /* Limit for importing routes from protocol */ + struct channel_limit out_limit; /* Limit for exporting routes to protocol */ + + u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ + u8 ra_mode; /* Mode of received route advertisements (RA_*) */ + u16 preference; /* Default route preference */ + u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ + u8 in_keep_filtered; /* Routes rejected in import filter are kept */ +}; + +struct channel { + node n; /* Node in proto->channels */ + node table_node; /* Node in table->channels */ + + const char *name; /* Channel name (may be NULL) */ + const struct channel_class *channel; struct proto *proto; + + struct rtable *table; struct filter *in_filter; /* Input filter */ struct filter *out_filter; /* Output filter */ - struct proto_limit *rx_limit; /* Receive limit (for in_keep_filtered) */ - struct proto_limit *in_limit; /* Input limit */ - struct proto_limit *out_limit; /* Output limit */ - struct proto_stats *stats; /* Per-table protocol statistics */ - struct announce_hook *next; /* Next hook for the same protocol */ - int in_keep_filtered; /* Routes rejected in import filter are kept */ + struct channel_limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct channel_limit in_limit; /* Input limit */ + struct channel_limit out_limit; /* Output limit */ + + struct event *feed_event; /* Event responsible for feeding */ + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + struct proto_stats stats; /* Per-channel protocol statistics */ + + u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ + u8 ra_mode; /* Mode of received route advertisements (RA_*) */ + u16 preference; /* Default route preference */ + u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ + u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 disabled; + u8 stale; /* Used in reconfiguration */ + + u8 channel_state; + u8 export_state; /* Route export state (ES_*, see below) */ + u8 feed_active; + u8 flush_active; + u8 refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */ + u8 reloadable; /* Hook reload_routes() is allowed on the channel */ + u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ + u8 gr_wait; /* Route export to channel is postponed until graceful restart */ + + btime last_state_change; /* Time of last state transition */ }; -struct announce_hook *proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *stats); -struct announce_hook *proto_find_announce_hook(struct proto *p, struct rtable *t); + +/* + * Channel states + * + * CS_DOWN - The initial and the final state of a channel. There is no route + * exchange between the protocol and the table. Channel is not counted as + * active. Channel keeps a ptr to the table, but do not lock the table and is + * not linked in the table. Generally, new closed channels are created in + * protocols' init() hooks. The protocol is expected to explicitly activate its + * channels (by calling channel_init() or channel_open()). + * + * CS_START - The channel as a connection between the protocol and the table is + * initialized (counted as active by the protocol, linked in the table and keeps + * the table locked), but there is no current route exchange. There still may be + * routes associated with the channel in the routing table if the channel falls + * to CS_START from CS_UP. Generally, channels are initialized in protocols' + * start() hooks when going to PS_START. + * + * CS_UP - The channel is initialized and the route exchange is allowed. Note + * that even in CS_UP state, route export may still be down (ES_DOWN) by the + * core decision (e.g. waiting for table convergence after graceful restart). + * I.e., the protocol decides to open the channel but the core decides to start + * route export. Route import (caused by rte_update() from the protocol) is not + * restricted by that and is on volition of the protocol. Generally, channels + * are opened in protocols' start() hooks when going to PS_UP. + * + * CS_FLUSHING - The transitional state between initialized channel and closed + * channel. The channel is still initialized, but no route exchange is allowed. + * Instead, the associated table is running flush loop to remove routes imported + * through the channel. After that, the channel changes state to CS_DOWN and + * is detached from the table (the table is unlocked and the channel is unlinked + * from it). Unlike other states, the CS_FLUSHING state is not explicitly + * entered or left by the protocol. A protocol may request to close a channel + * (by calling channel_close()), which causes the channel to change state to + * CS_FLUSHING and later to CS_DOWN. Also note that channels are closed + * automatically by the core when the protocol is going down. + * + * Allowed transitions: + * + * CS_DOWN -> CS_START / CS_UP + * CS_START -> CS_UP / CS_FLUSHING + * CS_UP -> CS_START / CS_FLUSHING + * CS_FLUSHING -> CS_DOWN (automatic) + */ + +#define CS_DOWN 0 +#define CS_START 1 +#define CS_UP 2 +#define CS_FLUSHING 3 + +#define ES_DOWN 0 +#define ES_FEEDING 1 +#define ES_READY 2 + + +struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_type); +static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) +{ struct channel_config *cc = HEAD(pc->channels); return NODE_VALID(cc) ? cc : NULL; } + +struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t); +struct channel *proto_find_channel_by_name(struct proto *p, const char *n); +struct channel *proto_add_channel(struct proto *p, struct channel_config *cf); +int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf); + +void channel_set_state(struct channel *c, uint state); + +static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } +static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP); } +static inline void channel_close(struct channel *c) { channel_set_state(c, CS_FLUSHING); } + +void channel_request_feeding(struct channel *c); +void *channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); +void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); +int channel_reconfigure(struct channel *c, struct channel_config *cf); + #endif diff --git a/nest/route.h b/nest/route.h index 0834da45..79127519 100644 --- a/nest/route.h +++ b/nest/route.h @@ -11,7 +11,7 @@ #include "lib/lists.h" #include "lib/resource.h" -#include "lib/timer.h" +#include "lib/net.h" struct ea_list; struct protocol; @@ -36,11 +36,8 @@ struct cli; struct fib_node { struct fib_node *next; /* Next in hash chain */ struct fib_iterator *readers; /* List of readers of this node */ - byte pxlen; - byte flags; /* User-defined */ - byte x0, x1; /* User-defined */ - u32 uid; /* Unique ID based on hash */ - ip_addr prefix; /* In host order */ + byte flags; /* User-defined, will be removed */ + net_addr addr[0]; }; struct fib_iterator { /* See lib/slists.h for an explanation */ @@ -51,7 +48,7 @@ struct fib_iterator { /* See lib/slists.h for an explanation */ uint hash; }; -typedef void (*fib_init_func)(struct fib_node *); +typedef void (*fib_init_fn)(void *); struct fib { pool *fib_pool; /* Pool holding all our data */ @@ -59,16 +56,26 @@ struct fib { struct fib_node **hash_table; /* Node hash table */ uint hash_size; /* Number of hash table entries (a power of two) */ uint hash_order; /* Binary logarithm of hash_size */ - uint hash_shift; /* 16 - hash_log */ + uint hash_shift; /* 32 - hash_order */ + uint addr_type; /* Type of address data stored in fib (NET_*) */ + uint node_size; /* FIB node size, 0 for nonuniform */ + uint node_offset; /* Offset of fib_node struct inside of user data */ uint entries; /* Number of entries */ uint entries_min, entries_max; /* Entry count limits (else start rehashing) */ - fib_init_func init; /* Constructor */ + fib_init_fn init; /* Constructor */ }; -void fib_init(struct fib *, pool *, unsigned node_size, unsigned hash_order, fib_init_func init); -void *fib_find(struct fib *, ip_addr *, int); /* Find or return NULL if doesn't exist */ -void *fib_get(struct fib *, ip_addr *, int); /* Find or create new if nonexistent */ -void *fib_route(struct fib *, ip_addr, int); /* Longest-match routing lookup */ +static inline void * fib_node_to_user(struct fib *f, struct fib_node *e) +{ return e ? (void *) ((char *) e - f->node_offset) : NULL; } + +static inline struct fib_node * fib_user_to_node(struct fib *f, void *e) +{ return e ? (void *) ((char *) e + f->node_offset) : NULL; } + +void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init); +void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */ +void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */ +void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */ +void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */ void fib_delete(struct fib *, void *); /* Remove fib entry */ void fib_free(struct fib *); /* Destroy the fib */ void fib_check(struct fib *); /* Consistency check for debugging */ @@ -79,34 +86,37 @@ void fit_put(struct fib_iterator *, struct fib_node *); void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos); -#define FIB_WALK(fib, z) do { \ - struct fib_node *z, **ff = (fib)->hash_table; \ - uint count = (fib)->hash_size; \ - while (count--) \ - for(z = *ff++; z; z=z->next) +#define FIB_WALK(fib, type, z) do { \ + struct fib_node *fn_, **ff_ = (fib)->hash_table; \ + uint count_ = (fib)->hash_size; \ + type *z; \ + while (count_--) \ + for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next) #define FIB_WALK_END } while (0) #define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) -#define FIB_ITERATE_START(fib, it, z) do { \ - struct fib_node *z = fit_get(fib, it); \ - uint count = (fib)->hash_size; \ - uint hpos = (it)->hash; \ +#define FIB_ITERATE_START(fib, it, type, z) do { \ + struct fib_node *fn_ = fit_get(fib, it); \ + uint count_ = (fib)->hash_size; \ + uint hpos_ = (it)->hash; \ + type *z; \ for(;;) { \ - if (!z) \ - { \ - if (++hpos >= count) \ + if (!fn_) \ + { \ + if (++hpos_ >= count_) \ break; \ - z = (fib)->hash_table[hpos]; \ + fn_ = (fib)->hash_table[hpos_]; \ continue; \ - } + } \ + z = fib_node_to_user(fib, fn_); -#define FIB_ITERATE_END(z) z = z->next; } } while(0) +#define FIB_ITERATE_END fn_ = fn_->next; } } while(0) -#define FIB_ITERATE_PUT(it, z) fit_put(it, z) +#define FIB_ITERATE_PUT(it) fit_put(it, fn_) -#define FIB_ITERATE_PUT_NEXT(it, fib, z) fit_put_next(fib, it, z, hpos) +#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_) #define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it) @@ -127,6 +137,7 @@ struct rtable_config { char *name; struct rtable *table; struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ int gc_max_ops; /* Maximum number of operations before GC is run */ int gc_min_time; /* Minimum time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ @@ -136,7 +147,8 @@ typedef struct rtable { node n; /* Node in list of all tables */ struct fib fib; char *name; /* Name of this table */ - list hooks; /* List of announcement hooks */ + list channels; /* List of attached channels (struct channel) */ + uint addr_type; /* Type of address data stored in table (NET_*) */ int pipe_busy; /* Pipe loop detection */ int use_count; /* Number of protocols using this table */ struct hostcache *hostcache; @@ -146,9 +158,8 @@ typedef struct rtable { * obstacle from this routing table. */ struct event *rt_event; /* Routing table event */ + btime gc_time; /* Time of last GC */ int gc_counter; /* Number of operations since last GC */ - bird_clock_t gc_time; /* Time of last GC */ - byte gc_scheduled; /* GC is scheduled */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ byte hcu_scheduled; /* Hostcache update is scheduled */ byte nhu_state; /* Next Hop Update state */ @@ -156,13 +167,14 @@ typedef struct rtable { struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ } rtable; -#define RPS_NONE 0 -#define RPS_SCHEDULED 1 -#define RPS_RUNNING 2 +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 typedef struct network { - struct fib_node n; /* FIB flags reserved for kernel syncer */ struct rte *routes; /* Available routes for this network */ + struct fib_node n; /* FIB flags reserved for kernel syncer */ } net; struct hostcache { @@ -187,20 +199,20 @@ struct hostentry { unsigned hash_key; /* Hash key */ unsigned uc; /* Use count */ struct rta *src; /* Source rta entry */ - ip_addr gw; /* Chosen next hop */ byte dest; /* Chosen route destination type (RTD_...) */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ u32 igp_metric; /* Chosen route IGP metric */ }; typedef struct rte { struct rte *next; net *net; /* Network this RTE belongs to */ - struct announce_hook *sender; /* Announce hook used to send the route to the routing table */ + struct channel *sender; /* Channel used to send the route to the routing table */ struct rta *attrs; /* Attributes of this route */ byte flags; /* Flags (REF_...) */ byte pflags; /* Protocol-specific flags */ word pref; /* Route preference */ - bird_clock_t lastmod; /* Last modified */ + btime lastmod; /* Last modified */ union { /* Protocol-dependent data (metrics etc.) */ #ifdef CONFIG_RIP struct { @@ -223,6 +235,7 @@ typedef struct rte { #endif #ifdef CONFIG_BABEL struct { + u16 seqno; /* Babel seqno */ u16 metric; /* Babel metric */ u64 router_id; /* Babel router id */ } babel; @@ -250,6 +263,7 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); /* Types of route announcement, also used as flags */ +#define RA_UNDEF 0 /* Undefined RA type */ #define RA_OPTIMAL 1 /* Announcement of optimal route change */ #define RA_ACCEPTED 2 /* Announcement of first accepted route */ #define RA_ANY 3 /* Announcement of any route change */ @@ -268,17 +282,22 @@ void rt_preconfig(struct config *); void rt_commit(struct config *new, struct config *old); void rt_lock_table(rtable *); void rt_unlock_table(rtable *); -void rt_setup(pool *, rtable *, char *, struct rtable_config *); -static inline net *net_find(rtable *tab, ip_addr addr, unsigned len) { return (net *) fib_find(&tab->fib, &addr, len); } -static inline net *net_get(rtable *tab, ip_addr addr, unsigned len) { return (net *) fib_get(&tab->fib, &addr, len); } +void rt_setup(pool *, rtable *, struct rtable_config *); +static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable *tab, const net_addr *addr) +{ net *n = net_find(tab, addr); return (n && rte_is_valid(n->routes)) ? n : NULL; } +static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +void *net_route(rtable *tab, const net_addr *n); +int net_roa_check(rtable *tab, const net_addr *n, u32 asn); rte *rte_find(net *net, struct rte_src *src); rte *rte_get_temp(struct rta *); -void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src); +void rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); /* rte_update() moved to protocol.h to avoid dependency conflicts */ -int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter); -rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct announce_hook *ah); -void rt_refresh_end(rtable *t, struct announce_hook *ah); +int rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter); +rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, struct ea_list **tmpa, linpool *pool, int silent); +void rt_refresh_begin(rtable *t, struct channel *c); +void rt_refresh_end(rtable *t, struct channel *c); +void rt_schedule_prune(rtable *t); void rte_dump(rte *); void rte_free(rte *); rte *rte_do_cow(rte *); @@ -286,35 +305,49 @@ static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r rte *rte_cow_rta(rte *r, linpool *lp); void rt_dump(rtable *); void rt_dump_all(void); -int rt_feed_baby(struct proto *p); -void rt_feed_baby_abort(struct proto *p); -int rt_prune_loop(void); -struct rtable_config *rt_new_table(struct symbol *s); +int rt_feed_channel(struct channel *c); +void rt_feed_channel_abort(struct channel *c); +struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); -static inline void -rt_mark_for_prune(rtable *tab) -{ - if (tab->prune_state == RPS_RUNNING) - fit_get(&tab->fib, &tab->prune_fit); +/* Default limit for ECMP next hops, defined in sysdep code */ +extern const int rt_default_ecmp; - tab->prune_state = RPS_SCHEDULED; -} +struct rt_show_data_rtable { + node n; + rtable *table; + struct channel *export_channel; +}; struct rt_show_data { - ip_addr prefix; - unsigned pxlen; - rtable *table; + net_addr *addr; + list tables; + struct rt_show_data_rtable *tab; /* Iterator over table list */ + struct rt_show_data_rtable *last_table; /* Last table in output */ + struct fib_iterator fit; /* Iterator over networks in table */ + int verbose, tables_defined_by; struct filter *filter; - int verbose; - struct fib_iterator fit; struct proto *show_protocol; struct proto *export_protocol; - int export_mode, primary_only, filtered; + struct channel *export_channel; struct config *running_on_config; - int net_counter, rt_counter, show_counter; - int stats, show_for; + int export_mode, primary_only, filtered, stats, show_for; + + int table_open; /* Iteration (fit) is open */ + int net_counter, rt_counter, show_counter, table_counter; + int net_counter_last, rt_counter_last, show_counter_last; }; + void rt_show(struct rt_show_data *); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); + +/* Value of table definition mode in struct rt_show_data */ +#define RSD_TDB_DEFAULT 0 /* no table specified */ +#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ +#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ +#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ + +#define RSD_TDB_SET 0x1 /* internal: show empty tables */ +#define RSD_TDB_NMN 0x2 /* internal: need matching net */ /* Value of export_mode in struct rt_show_data */ #define RSEM_NONE 0 /* Export mode not used */ @@ -330,14 +363,21 @@ void rt_show(struct rt_show_data *); * construction of BGP route attribute lists. */ -/* Multipath next-hop */ -struct mpnh { +/* Nexthop structure */ +struct nexthop { ip_addr gw; /* Next hop */ struct iface *iface; /* Outgoing interface */ - struct mpnh *next; + struct nexthop *next; + byte flags; byte weight; + byte labels_orig; /* Number of labels before hostentry was applied */ + byte labels; /* Number of all labels */ + u32 label[0]; }; +#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ + + struct rte_src { struct rte_src *next; /* Hash chain */ struct proto *proto; /* Protocol the source is based on */ @@ -349,22 +389,18 @@ struct rte_src { typedef struct rta { struct rta *next, **pprev; /* Hash chain */ + u32 uc; /* Use count */ + u32 hash_key; /* Hash over important fields */ + struct ea_list *eattrs; /* Extended Attribute chain */ struct rte_src *src; /* Route source that created the route */ - unsigned uc; /* Use count */ - byte source; /* Route source (RTS_...) */ - byte scope; /* Route scope (SCOPE_... -- see ip.h) */ - byte cast; /* Casting type (RTC_...) */ - byte dest; /* Route destination type (RTD_...) */ - byte flags; /* Route flags (RTF_...), now unused */ - byte aflags; /* Attribute cache flags (RTAF_...) */ - u16 hash_key; /* Hash over important fields */ - u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - ip_addr gw; /* Next hop */ - ip_addr from; /* Advertising router */ struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - struct iface *iface; /* Outgoing interface */ - struct mpnh *nexthops; /* Next-hops for multipath routes */ - struct ea_list *eattrs; /* Extended Attribute chain */ + ip_addr from; /* Advertising router */ + u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ + u8 source; /* Route source (RTS_...) */ + u8 scope; /* Route scope (SCOPE_... -- see ip.h) */ + u8 dest; /* Route destination type (RTD_...) */ + u8 aflags; + struct nexthop nh; /* Next hop */ } rta; #define RTS_DUMMY 0 /* Dummy route to be removed soon */ @@ -381,19 +417,20 @@ typedef struct rta { #define RTS_BGP 11 /* BGP route */ #define RTS_PIPE 12 /* Inter-table wormhole */ #define RTS_BABEL 13 /* Babel route */ +#define RTS_RPKI 14 /* Route Origin Authorization */ + #define RTC_UNICAST 0 #define RTC_BROADCAST 1 #define RTC_MULTICAST 2 #define RTC_ANYCAST 3 /* IPv6 Anycast */ -#define RTD_ROUTER 0 /* Next hop is neighbor router */ -#define RTD_DEVICE 1 /* Points to device */ +#define RTD_NONE 0 /* Undefined next hop */ +#define RTD_UNICAST 1 /* Next hop is neighbor router */ #define RTD_BLACKHOLE 2 /* Silently drop packets */ #define RTD_UNREACHABLE 3 /* Reject as unreachable */ #define RTD_PROHIBIT 4 /* Administratively prohibited */ -#define RTD_MULTIPATH 5 /* Multipath route (nexthops != NULL) */ -#define RTD_NONE 6 /* Invalid RTD */ +#define RTD_MAX 5 /* Flags for net->n.flags, used by kernel syncer */ #define KRF_INSTALLED 0x80 /* This route should be installed in the kernel */ @@ -405,9 +442,14 @@ typedef struct rta { protocol-specific metric is availabe */ +const char * rta_dest_names[RTD_MAX]; + +static inline const char *rta_dest_name(uint n) +{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } + /* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ static inline int rte_is_reachable(rte *r) -{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); } +{ return r->attrs->dest == RTD_UNICAST; } /* @@ -456,13 +498,22 @@ typedef struct eattr { #define EAF_TYPE_UNDEF 0x1f /* `force undefined' entry */ #define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ #define EAF_VAR_LENGTH 0x02 /* Attribute length is variable (part of type spec) */ -#define EAF_ORIGINATED 0x40 /* The attribute has originated locally */ +#define EAF_ORIGINATED 0x20 /* The attribute has originated locally */ +#define EAF_FRESH 0x40 /* An uncached attribute (e.g. modified in export filter) */ #define EAF_TEMP 0x80 /* A temporary attribute (the one stored in the tmp attr list) */ -struct adata { +typedef struct adata { uint length; /* Length of data */ byte data[0]; -}; +} adata; + +static inline struct adata * +lp_alloc_adata(struct linpool *pool, uint len) +{ + struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); + ad->length = len; + return ad; +} static inline int adata_same(struct adata *a, struct adata *b) { return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } @@ -504,14 +555,62 @@ uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ ea_list *ea_append(ea_list *to, ea_list *what); void ea_format_bitfield(struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); -int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */ -static inline int mpnh_same(struct mpnh *x, struct mpnh *y) -{ return (x == y) || mpnh__same(x, y); } -struct mpnh *mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp); -void mpnh_insert(struct mpnh **n, struct mpnh *y); -int mpnh_is_sorted(struct mpnh *x); +static inline eattr * +ea_set_attr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, uintptr_t val) +{ + ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); + eattr *e = &a->attrs[0]; + + a->flags = EALF_SORTED; + a->count = 1; + a->next = *to; + *to = a; + + e->id = id; + e->type = type; + e->flags = flags; + + if (type & EAF_EMBEDDED) + e->u.data = (u32) val; + else + e->u.ptr = (struct adata *) val; + + return e; +} + +static inline void +ea_set_attr_u32(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, u32 val) +{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } + +static inline void +ea_set_attr_ptr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, struct adata *val) +{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } + +static inline void +ea_set_attr_data(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, void *data, uint len) +{ + struct adata *a = lp_alloc_adata(pool, len); + memcpy(a->data, data, len); + ea_set_attr(to, pool, id, flags, type, (uintptr_t) a); +} + + +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) + +static inline size_t nexthop_size(const struct nexthop *nh) +{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } +int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ +static inline int nexthop_same(struct nexthop *x, struct nexthop *y) +{ return (x == y) || nexthop__same(x, y); } +struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); +static inline void nexthop_link(struct rta *a, struct nexthop *from) +{ memcpy(&a->nh, from, nexthop_size(from)); } +void nexthop_insert(struct nexthop **n, struct nexthop *y); +int nexthop_is_sorted(struct nexthop *x); void rta_init(void); +static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } +#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; } static inline rta *rta_clone(rta *r) { r->uc++; return r; } @@ -522,7 +621,15 @@ static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *, ea_list *); -void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll); + +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); + +static inline void +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) +{ + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); +} /* * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills @@ -556,93 +663,21 @@ extern struct protocol *attr_class_to_protocol[EAP_MAX]; * Default protocol preferences */ -#define DEF_PREF_DIRECT 240 /* Directly connected */ +#define DEF_PREF_DIRECT 240 /* Directly connected */ #define DEF_PREF_STATIC 200 /* Static route */ #define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ #define DEF_PREF_BABEL 130 /* Babel */ #define DEF_PREF_RIP 120 /* RIP */ #define DEF_PREF_BGP 100 /* BGP */ -#define DEF_PREF_PIPE 70 /* Routes piped from other tables */ +#define DEF_PREF_RPKI 100 /* RPKI */ #define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ - /* * Route Origin Authorization */ -struct roa_item { - u32 asn; - byte maxlen; - byte src; - struct roa_item *next; -}; - -struct roa_node { - struct fib_node n; - struct roa_item *items; - // u32 cached_asn; -}; - -struct roa_table { - node n; /* Node in roa_table_list */ - struct fib fib; - char *name; /* Name of this ROA table */ - struct roa_table_config *cf; /* Configuration of this ROA table */ -}; - -struct roa_item_config { - ip_addr prefix; - byte pxlen, maxlen; - u32 asn; - struct roa_item_config *next; -}; - -struct roa_table_config { - node n; /* Node in config->rpa_tables */ - char *name; /* Name of this ROA table */ - struct roa_table *table; - - struct roa_item_config *roa_items; /* Preconfigured ROA items */ - - // char *filename; - // int gc_max_ops; /* Maximum number of operations before GC is run */ - // int gc_min_time; /* Minimum time between two consecutive GC runs */ -}; - -struct roa_show_data { - struct fib_iterator fit; - struct roa_table *table; - ip_addr prefix; - byte pxlen; - byte mode; /* ROA_SHOW_* values */ - u32 asn; /* Filter ASN, 0 -> all */ -}; - #define ROA_UNKNOWN 0 #define ROA_VALID 1 #define ROA_INVALID 2 -#define ROA_SRC_ANY 0 -#define ROA_SRC_CONFIG 1 -#define ROA_SRC_DYNAMIC 2 - -#define ROA_SHOW_ALL 0 -#define ROA_SHOW_PX 1 -#define ROA_SHOW_IN 2 -#define ROA_SHOW_FOR 3 - -extern struct roa_table *roa_table_default; - -void roa_add_item(struct roa_table *t, ip_addr prefix, byte pxlen, byte maxlen, u32 asn, byte src); -void roa_delete_item(struct roa_table *t, ip_addr prefix, byte pxlen, byte maxlen, u32 asn, byte src); -void roa_flush(struct roa_table *t, byte src); -byte roa_check(struct roa_table *t, ip_addr prefix, byte pxlen, u32 asn); -struct roa_table_config * roa_new_table_config(struct symbol *s); -void roa_add_item_config(struct roa_table_config *rtc, ip_addr prefix, byte pxlen, byte maxlen, u32 asn); -void roa_init(void); -void roa_preconfig(struct config *c); -void roa_commit(struct config *new, struct config *old); -void roa_show(struct roa_show_data *d); - - #endif diff --git a/nest/rt-attr.c b/nest/rt-attr.c index edf27d44..881687de 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -52,18 +52,27 @@ #include "nest/attrs.h" #include "lib/alloca.h" #include "lib/hash.h" +#include "lib/idm.h" #include "lib/resource.h" #include "lib/string.h" +#include <stddef.h> + +const char * rta_dest_names[RTD_MAX] = { + [RTD_NONE] = "", + [RTD_UNICAST] = "unicast", + [RTD_BLACKHOLE] = "blackhole", + [RTD_UNREACHABLE] = "unreachable", + [RTD_PROHIBIT] = "prohibited", +}; + pool *rta_pool; -static slab *rta_slab; -static slab *mpnh_slab; +static slab *rta_slab_[4]; +static slab *nexthop_slab_[4]; static slab *rte_src_slab; -/* rte source ID bitmap */ -static u32 *src_ids; -static u32 src_id_size, src_id_used, src_id_pos; +static struct idm src_ids; #define SRC_ID_INIT_SIZE 4 /* rte source hash */ @@ -87,64 +96,11 @@ rte_src_init(void) { rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); - src_id_pos = 0; - src_id_size = SRC_ID_INIT_SIZE; - src_ids = mb_allocz(rta_pool, src_id_size * sizeof(u32)); - - /* ID 0 is reserved */ - src_ids[0] = 1; - src_id_used = 1; + idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER); } -static inline int u32_cto(uint x) { return ffs(~x) - 1; } - -static inline u32 -rte_src_alloc_id(void) -{ - uint i, j; - for (i = src_id_pos; i < src_id_size; i++) - if (src_ids[i] != 0xffffffff) - goto found; - - /* If we are at least 7/8 full, expand */ - if (src_id_used > (src_id_size * 28)) - { - src_id_size *= 2; - src_ids = mb_realloc(src_ids, src_id_size * sizeof(u32)); - bzero(src_ids + i, (src_id_size - i) * sizeof(u32)); - goto found; - } - - for (i = 0; i < src_id_pos; i++) - if (src_ids[i] != 0xffffffff) - goto found; - - ASSERT(0); - - found: - ASSERT(i < 0x8000000); - - src_id_pos = i; - j = u32_cto(src_ids[i]); - - src_ids[i] |= (1 << j); - src_id_used++; - return 32 * i + j; -} - -static inline void -rte_src_free_id(u32 id) -{ - int i = id / 32; - int j = id % 32; - - ASSERT((i < src_id_size) && (src_ids[i] & (1 << j))); - src_ids[i] &= ~(1 << j); - src_id_used--; -} - HASH_DEFINE_REHASH_FN(RSH, struct rte_src) @@ -165,7 +121,7 @@ rt_get_source(struct proto *p, u32 id) src = sl_alloc(rte_src_slab); src->proto = p; src->private_id = id; - src->global_id = rte_src_alloc_id(); + src->global_id = idm_alloc(&src_ids); src->uc = 0; HASH_INSERT2(src_hash, RSH, rta_pool, src); @@ -181,7 +137,7 @@ rt_prune_sources(void) if (src->uc == 0) { HASH_DO_REMOVE(src_hash, RSH, sp); - rte_src_free_id(src->global_id); + idm_free(&src_ids, src->global_id); sl_free(rte_src_slab, src); } } @@ -195,28 +151,41 @@ rt_prune_sources(void) * Multipath Next Hop */ -static inline uint -mpnh_hash(struct mpnh *x) +static inline u32 +nexthop_hash(struct nexthop *x) { - uint h = 0; + u32 h = 0; for (; x; x = x->next) - h ^= ipa_hash(x->gw); + { + h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); + + for (int i = 0; i < x->labels; i++) + h ^= x->label[i] ^ (h << 6) ^ (h >> 7); + } return h; } int -mpnh__same(struct mpnh *x, struct mpnh *y) +nexthop__same(struct nexthop *x, struct nexthop *y) { for (; x && y; x = x->next, y = y->next) - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight)) + { + if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || + (x->flags != y->flags) || (x->weight != y->weight) || + (x->labels != y->labels)) return 0; + for (int i = 0; i < x->labels; i++) + if (x->label[i] != y->label[i]) + return 0; + } + return x == y; } static int -mpnh_compare_node(struct mpnh *x, struct mpnh *y) +nexthop_compare_node(struct nexthop *x, struct nexthop *y) { int r; @@ -226,6 +195,8 @@ mpnh_compare_node(struct mpnh *x, struct mpnh *y) if (!y) return -1; + /* Should we also compare flags ? */ + r = ((int) y->weight) - ((int) x->weight); if (r) return r; @@ -234,22 +205,33 @@ mpnh_compare_node(struct mpnh *x, struct mpnh *y) if (r) return r; + r = ((int) y->labels) - ((int) x->labels); + if (r) + return r; + + for (int i = 0; i < y->labels; i++) + { + r = ((int) y->label[i]) - ((int) x->label[i]); + if (r) + return r; + } + return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct mpnh * -mpnh_copy_node(const struct mpnh *src, linpool *lp) +static inline struct nexthop * +nexthop_copy_node(const struct nexthop *src, linpool *lp) { - struct mpnh *n = lp_alloc(lp, sizeof(struct mpnh)); - n->gw = src->gw; - n->iface = src->iface; + struct nexthop *n = lp_alloc(lp, nexthop_size(src)); + + memcpy(n, src, nexthop_size(src)); n->next = NULL; - n->weight = src->weight; + return n; } /** - * mpnh_merge - merge nexthop lists + * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 * @rx: reusability of list @x @@ -257,7 +239,7 @@ mpnh_copy_node(const struct mpnh *src, linpool *lp) * @max: max number of nexthops * @lp: linpool for allocating nexthops * - * The mpnh_merge() function takes two nexthop lists @x and @y and merges them, + * The nexthop_merge() function takes two nexthop lists @x and @y and merges them, * eliminating possible duplicates. The input lists must be sorted and the * result is sorted too. The number of nexthops in result is limited by @max. * New nodes are allocated from linpool @lp. @@ -270,28 +252,28 @@ mpnh_copy_node(const struct mpnh *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct mpnh * -mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) +struct nexthop * +nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) { - struct mpnh *root = NULL; - struct mpnh **n = &root; + struct nexthop *root = NULL; + struct nexthop **n = &root; while ((x || y) && max--) { - int cmp = mpnh_compare_node(x, y); + int cmp = nexthop_compare_node(x, y); if (cmp < 0) { - *n = rx ? x : mpnh_copy_node(x, lp); + *n = rx ? x : nexthop_copy_node(x, lp); x = x->next; } else if (cmp > 0) { - *n = ry ? y : mpnh_copy_node(y, lp); + *n = ry ? y : nexthop_copy_node(y, lp); y = y->next; } else { - *n = rx ? x : (ry ? y : mpnh_copy_node(x, lp)); + *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); x = x->next; y = y->next; } @@ -303,11 +285,11 @@ mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) } void -mpnh_insert(struct mpnh **n, struct mpnh *x) +nexthop_insert(struct nexthop **n, struct nexthop *x) { for (; *n; n = &((*n)->next)) { - int cmp = mpnh_compare_node(*n, x); + int cmp = nexthop_compare_node(*n, x); if (cmp < 0) continue; @@ -322,28 +304,37 @@ mpnh_insert(struct mpnh **n, struct mpnh *x) } int -mpnh_is_sorted(struct mpnh *x) +nexthop_is_sorted(struct nexthop *x) { for (; x && x->next; x = x->next) - if (mpnh_compare_node(x, x->next) >= 0) + if (nexthop_compare_node(x, x->next) >= 0) return 0; return 1; } -static struct mpnh * -mpnh_copy(struct mpnh *o) +static inline slab * +nexthop_slab(struct nexthop *nh) +{ + return nexthop_slab_[MIN(nh->labels, 3)]; +} + +static struct nexthop * +nexthop_copy(struct nexthop *o) { - struct mpnh *first = NULL; - struct mpnh **last = &first; + struct nexthop *first = NULL; + struct nexthop **last = &first; for (; o; o = o->next) { - struct mpnh *n = sl_alloc(mpnh_slab); + struct nexthop *n = sl_alloc(nexthop_slab(o)); n->gw = o->gw; n->iface = o->iface; n->next = NULL; n->weight = o->weight; + n->labels = o->labels; + for (int i=0; i<o->labels; i++) + n->label[i] = o->label[i]; *last = n; last = &(n->next); @@ -353,14 +344,14 @@ mpnh_copy(struct mpnh *o) } static void -mpnh_free(struct mpnh *o) +nexthop_free(struct nexthop *o) { - struct mpnh *n; + struct nexthop *n; while (o) { n = o->next; - sl_free(mpnh_slab, o); + sl_free(nexthop_slab(o), o); o = n; } } @@ -580,7 +571,7 @@ ea_do_prune(ea_list *e) if ((s0->type & EAF_TYPE_MASK) != EAF_TYPE_UNDEF) { *d = *s0; - d->type = (d->type & ~EAF_ORIGINATED) | (s[-1].type & EAF_ORIGINATED); + d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED); d++; i++; } @@ -972,7 +963,8 @@ ea_dump(ea_list *e) inline uint ea_hash(ea_list *e) { - u32 h = 0; + const u64 mul = 0x68576150f3d6847; + u64 h = 0xafcef24eda8b29; int i; if (e) /* Assuming chain of length 1 */ @@ -980,29 +972,18 @@ ea_hash(ea_list *e) for(i=0; i<e->count; i++) { struct eattr *a = &e->attrs[i]; - h ^= a->id; + h ^= a->id; h *= mul; if (a->type & EAF_EMBEDDED) h ^= a->u.data; else { struct adata *d = a->u.ptr; - int size = d->length; - byte *z = d->data; - while (size >= 4) - { - h ^= *(u32 *)z; - z += 4; - size -= 4; - } - while (size--) - h = (h >> 24) ^ (h << 8) ^ *z++; + h ^= mem_hash(d->data, d->length); } + h *= mul; } - h ^= h >> 16; - h ^= h >> 6; - h &= 0xffff; } - return h; + return (h >> 32) ^ (h & 0xffffffff); } /** @@ -1051,8 +1032,19 @@ rta_alloc_hash(void) static inline uint rta_hash(rta *a) { - return (((uint) (uintptr_t) a->src) ^ ipa_hash(a->gw) ^ - mpnh_hash(a->nexthops) ^ ea_hash(a->eattrs)) & 0xffff; + u64 h; + mem_hash_init(&h); +#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); + MIX(src); + MIX(hostentry); + MIX(from); + MIX(igp_metric); + MIX(source); + MIX(scope); + MIX(dest); +#undef MIX + + return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); } static inline int @@ -1061,26 +1053,28 @@ rta_same(rta *x, rta *y) return (x->src == y->src && x->source == y->source && x->scope == y->scope && - x->cast == y->cast && x->dest == y->dest && - x->flags == y->flags && x->igp_metric == y->igp_metric && - ipa_equal(x->gw, y->gw) && ipa_equal(x->from, y->from) && - x->iface == y->iface && x->hostentry == y->hostentry && - mpnh_same(x->nexthops, y->nexthops) && + nexthop_same(&(x->nh), &(y->nh)) && ea_same(x->eattrs, y->eattrs)); } +static inline slab * +rta_slab(rta *a) +{ + return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; +} + static rta * rta_copy(rta *o) { - rta *r = sl_alloc(rta_slab); + rta *r = sl_alloc(rta_slab(o)); - memcpy(r, o, sizeof(rta)); + memcpy(r, o, rta_size(o)); r->uc = 1; - r->nexthops = mpnh_copy(o->nexthops); + r->nh.next = nexthop_copy(o->nh.next); r->eattrs = ea_list_copy(o->eattrs); return r; } @@ -1173,19 +1167,26 @@ rta__free(rta *a) *a->pprev = a->next; if (a->next) a->next->pprev = a->pprev; - a->aflags = 0; /* Poison the entry */ rt_unlock_hostentry(a->hostentry); rt_unlock_source(a->src); - mpnh_free(a->nexthops); + if (a->nh.next) + nexthop_free(a->nh.next); ea_free(a->eattrs); - sl_free(rta_slab, a); + a->aflags = 0; /* Poison the entry */ + sl_free(rta_slab(a), a); } rta * rta_do_cow(rta *o, linpool *lp) { - rta *r = lp_alloc(lp, sizeof(rta)); - memcpy(r, o, sizeof(rta)); + rta *r = lp_alloc(lp, rta_size(o)); + memcpy(r, o, rta_size(o)); + for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) + { + *nhn = lp_alloc(lp, nexthop_size(nho)); + memcpy(*nhn, nho, nexthop_size(nho)); + nhn = &((*nhn)->next); + } r->aflags = 0; r->uc = 0; return r; @@ -1203,20 +1204,24 @@ rta_dump(rta *a) static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", - "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtc[] = { "", " BC", " MC", " AC" }; + "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - debug("p=%s uc=%d %s %s%s%s h=%04x", - a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtc[a->cast], + debug("p=%s uc=%d %s %s%s h=%04x", + a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtd[a->dest], a->hash_key); if (!(a->aflags & RTAF_CACHED)) debug(" !CACHED"); debug(" <-%I", a->from); - if (a->dest == RTD_ROUTER) - debug(" ->%I", a->gw); - if (a->dest == RTD_DEVICE || a->dest == RTD_ROUTER) - debug(" [%s]", a->iface ? a->iface->name : "???" ); + if (a->dest == RTD_UNICAST) + for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); + } if (a->eattrs) { debug(" EA: "); @@ -1252,10 +1257,9 @@ rta_show(struct cli *c, rta *a, ea_list *eal) { static char *src_names[] = { "dummy", "static", "inherit", "device", "static-device", "redirect", "RIP", "OSPF", "OSPF-IA", "OSPF-E1", "OSPF-E2", "BGP", "pipe" }; - static char *cast_names[] = { "unicast", "broadcast", "multicast", "anycast" }; int i; - cli_printf(c, -1008, "\tType: %s %s %s", src_names[a->source], cast_names[a->cast], ip_scope_text(a->scope)); + cli_printf(c, -1008, "\tType: %s %s", src_names[a->source], ip_scope_text(a->scope)); if (!eal) eal = a->eattrs; for(; eal; eal=eal->next) @@ -1273,8 +1277,17 @@ void rta_init(void) { rta_pool = rp_new(&root_pool, "Attributes"); - rta_slab = sl_new(rta_pool, sizeof(rta)); - mpnh_slab = sl_new(rta_pool, sizeof(struct mpnh)); + + rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); + rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); + rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); + rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + + nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); + nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); + nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); + nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_alloc_hash(); rte_src_init(); } diff --git a/nest/rt-dev.c b/nest/rt-dev.c index ed6c06af..718c4578 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -24,14 +24,17 @@ #include "lib/resource.h" #include "lib/string.h" + static void -dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad) +dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) { - struct rt_dev_config *P = (void *) p->cf; + struct rt_dev_proto *p = (void *) P; + struct rt_dev_config *cf = (void *) P->cf; + struct channel *c; - if (!EMPTY_LIST(P->iface_list) && - !iface_patt_find(&P->iface_list, ad->iface, ad->iface->addr)) - /* Empty list is automagically treated as "*" */ + if (!EMPTY_LIST(cf->iface_list) && + !iface_patt_find(&cf->iface_list, ad->iface, ad)) + /* Empty list is automatically treated as "*" */ return; if (ad->flags & IA_SECONDARY) @@ -40,51 +43,49 @@ dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad) if (ad->scope <= SCOPE_LINK) return; - if (c & IF_CHANGE_DOWN) - { - net *n; + if (ad->prefix.type == NET_IP4) + c = p->ip4_channel; + else if (ad->prefix.type == NET_IP6) + c = p->ip6_channel; + else + return; + + if (!c) + return; + if (flags & IF_CHANGE_DOWN) + { DBG("dev_if_notify: %s:%I going down\n", ad->iface->name, ad->ip); - n = net_find(p->table, ad->prefix, ad->pxlen); - if (!n) - { - DBG("dev_if_notify: device shutdown: prefix not found\n"); - return; - } /* Use iface ID as local source ID */ - struct rte_src *src = rt_get_source(p, ad->iface->index); - rte_update2(p->main_ahook, n, NULL, src); + struct rte_src *src = rt_get_source(P, ad->iface->index); + rte_update2(c, &ad->prefix, NULL, src); } - else if (c & IF_CHANGE_UP) + else if (flags & IF_CHANGE_UP) { rta *a; - net *n; rte *e; DBG("dev_if_notify: %s:%I going up\n", ad->iface->name, ad->ip); - if (P->check_link && !(ad->iface->flags & IF_LINK_UP)) + if (cf->check_link && !(ad->iface->flags & IF_LINK_UP)) return; /* Use iface ID as local source ID */ - struct rte_src *src = rt_get_source(p, ad->iface->index); + struct rte_src *src = rt_get_source(P, ad->iface->index); rta a0 = { .src = src, .source = RTS_DEVICE, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST, - .dest = RTD_DEVICE, - .iface = ad->iface + .dest = RTD_UNICAST, + .nh.iface = ad->iface, }; a = rta_lookup(&a0); - n = net_get(p->table, ad->prefix, ad->pxlen); e = rte_get_temp(a); - e->net = n; e->pflags = 0; - rte_update2(p->main_ahook, n, e, src); + rte_update2(c, &ad->prefix, e, src); } } @@ -108,30 +109,44 @@ dev_if_notify(struct proto *p, uint c, struct iface *iface) static struct proto * -dev_init(struct proto_config *c) +dev_init(struct proto_config *CF) { - struct proto *p = proto_new(c, sizeof(struct proto)); + struct proto *P = proto_new(CF); + struct rt_dev_proto *p = (void *) P; + // struct rt_dev_config *cf = (void *) CF; + + proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)); + proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)); + + P->if_notify = dev_if_notify; + P->ifa_notify = dev_ifa_notify; - p->if_notify = dev_if_notify; - p->ifa_notify = dev_ifa_notify; - return p; + return P; } static int -dev_reconfigure(struct proto *p, struct proto_config *new) +dev_reconfigure(struct proto *P, struct proto_config *CF) { - struct rt_dev_config *o = (struct rt_dev_config *) p->cf; - struct rt_dev_config *n = (struct rt_dev_config *) new; + struct rt_dev_proto *p = (void *) P; + struct rt_dev_config *o = (void *) P->cf; + struct rt_dev_config *n = (void *) CF; + + if (!iface_patts_equal(&o->iface_list, &n->iface_list, NULL) || + (o->check_link != n->check_link)) + return 0; + + return + proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)) && + proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)); - return iface_patts_equal(&o->iface_list, &n->iface_list, NULL) && - (o->check_link == n->check_link); + return 1; } static void dev_copy_config(struct proto_config *dest, struct proto_config *src) { - struct rt_dev_config *d = (struct rt_dev_config *) dest; - struct rt_dev_config *s = (struct rt_dev_config *) src; + struct rt_dev_config *d = (void *) dest; + struct rt_dev_config *s = (void *) src; /* * We copy iface_list as ifaces can be shared by more direct protocols. @@ -144,11 +159,13 @@ dev_copy_config(struct proto_config *dest, struct proto_config *src) } struct protocol proto_device = { - .name = "Direct", - .template = "direct%d", - .preference = DEF_PREF_DIRECT, + .name = "Direct", + .template = "direct%d", + .preference = DEF_PREF_DIRECT, + .channel_mask = NB_IP, + .proto_size = sizeof(struct rt_dev_proto), .config_size = sizeof(struct rt_dev_config), - .init = dev_init, - .reconfigure = dev_reconfigure, - .copy_config = dev_copy_config + .init = dev_init, + .reconfigure = dev_reconfigure, + .copy_config = dev_copy_config }; diff --git a/nest/rt-dev.h b/nest/rt-dev.h index 191b9a02..20b88a64 100644 --- a/nest/rt-dev.h +++ b/nest/rt-dev.h @@ -15,4 +15,10 @@ struct rt_dev_config { int check_link; }; +struct rt_dev_proto { + struct proto p; + struct channel *ip4_channel; + struct channel *ip6_channel; +}; + #endif diff --git a/nest/rt-fib.c b/nest/rt-fib.c index 9af333c9..18ccbfc3 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -61,16 +61,17 @@ #define HASH_DEF_ORDER 10 #define HASH_HI_MARK *4 #define HASH_HI_STEP 2 -#define HASH_HI_MAX 16 /* Must be at most 16 */ +#define HASH_HI_MAX 16 #define HASH_LO_MARK /5 #define HASH_LO_STEP 2 #define HASH_LO_MIN 10 + static void fib_ht_alloc(struct fib *f) { f->hash_size = 1 << f->hash_order; - f->hash_shift = 16 - f->hash_order; + f->hash_shift = 32 - f->hash_order; if (f->hash_order > HASH_HI_MAX - HASH_HI_STEP) f->entries_max = ~0; else @@ -90,16 +91,8 @@ fib_ht_free(struct fib_node **h) mb_free(h); } -static inline unsigned -fib_hash(struct fib *f, ip_addr *a) -{ - return ipa_hash(*a) >> f->hash_shift; -} -static void -fib_dummy_init(struct fib_node *dummy UNUSED) -{ -} +static inline u32 fib_hash(struct fib *f, const net_addr *a); /** * fib_init - initialize a new FIB @@ -114,18 +107,23 @@ fib_dummy_init(struct fib_node *dummy UNUSED) * This function initializes a newly allocated FIB and prepares it for use. */ void -fib_init(struct fib *f, pool *p, unsigned node_size, unsigned hash_order, fib_init_func init) +fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init) { + uint addr_length = net_addr_length[addr_type]; + if (!hash_order) hash_order = HASH_DEF_ORDER; f->fib_pool = p; - f->fib_slab = sl_new(p, node_size); + f->fib_slab = addr_length ? sl_new(p, node_size + addr_length) : NULL; + f->addr_type = addr_type; + f->node_size = node_size; + f->node_offset = node_offset; f->hash_order = hash_order; fib_ht_alloc(f); bzero(f->hash_table, f->hash_size * sizeof(struct fib_node *)); f->entries = 0; f->entries_min = 0; - f->init = init ? : fib_dummy_init; + f->init = init; } static void @@ -151,7 +149,7 @@ fib_rehash(struct fib *f, int step) while (e = x) { x = e->next; - nh = fib_hash(f, &e->prefix); + nh = fib_hash(f, e->addr); while (nh > ni) { *t = NULL; @@ -171,127 +169,201 @@ fib_rehash(struct fib *f, int step) fib_ht_free(m); } +#define CAST(t) (const net_addr_##t *) +#define CAST2(t) (net_addr_##t *) + +#define FIB_HASH(f,a,t) (net_hash_##t(CAST(t) a) >> f->hash_shift) + +#define FIB_FIND(f,a,t) \ + ({ \ + struct fib_node *e = f->hash_table[FIB_HASH(f, a, t)]; \ + while (e && !net_equal_##t(CAST(t) e->addr, CAST(t) a)) \ + e = e->next; \ + fib_node_to_user(f, e); \ + }) + +#define FIB_INSERT(f,a,e,t) \ + ({ \ + u32 h = net_hash_##t(CAST(t) a); \ + struct fib_node **ee = f->hash_table + (h >> f->hash_shift); \ + struct fib_node *g; \ + \ + while ((g = *ee) && (net_hash_##t(CAST(t) g->addr) < h)) \ + ee = &g->next; \ + \ + net_copy_##t(CAST2(t) e->addr, CAST(t) a); \ + e->next = *ee; \ + *ee = e; \ + }) + + +static inline u32 +fib_hash(struct fib *f, const net_addr *a) +{ + /* Same as FIB_HASH() */ + return net_hash(a) >> f->hash_shift; +} + +void * +fib_get_chain(struct fib *f, const net_addr *a) +{ + ASSERT(f->addr_type == a->type); + + struct fib_node *e = f->hash_table[fib_hash(f, a)]; + return e; +} + /** * fib_find - search for FIB node by prefix * @f: FIB to search in - * @a: pointer to IP address of the prefix - * @len: prefix length + * @n: network address * * Search for a FIB node corresponding to the given prefix, return * a pointer to it or %NULL if no such node exists. */ void * -fib_find(struct fib *f, ip_addr *a, int len) +fib_find(struct fib *f, const net_addr *a) { - struct fib_node *e = f->hash_table[fib_hash(f, a)]; - - while (e && (e->pxlen != len || !ipa_equal(*a, e->prefix))) - e = e->next; - return e; + ASSERT(f->addr_type == a->type); + + switch (f->addr_type) + { + case NET_IP4: return FIB_FIND(f, a, ip4); + case NET_IP6: return FIB_FIND(f, a, ip6); + case NET_VPN4: return FIB_FIND(f, a, vpn4); + case NET_VPN6: return FIB_FIND(f, a, vpn6); + case NET_ROA4: return FIB_FIND(f, a, roa4); + case NET_ROA6: return FIB_FIND(f, a, roa6); + case NET_FLOW4: return FIB_FIND(f, a, flow4); + case NET_FLOW6: return FIB_FIND(f, a, flow6); + case NET_IP6_SADR: return FIB_FIND(f, a, ip6_sadr); + case NET_MPLS: return FIB_FIND(f, a, mpls); + default: bug("invalid type"); + } } -/* -int -fib_histogram(struct fib *f) +static void +fib_insert(struct fib *f, const net_addr *a, struct fib_node *e) { - log(L_WARN "Histogram dump start %d %d", f->hash_size, f->entries); - - int i, j; - struct fib_node *e; - - for (i = 0; i < f->hash_size; i++) - { - j = 0; - for (e = f->hash_table[i]; e != NULL; e = e->next) - j++; - if (j > 0) - log(L_WARN "Histogram line %d: %d", i, j); - } - - log(L_WARN "Histogram dump end"); + ASSERT(f->addr_type == a->type); + + switch (f->addr_type) + { + case NET_IP4: FIB_INSERT(f, a, e, ip4); return; + case NET_IP6: FIB_INSERT(f, a, e, ip6); return; + case NET_VPN4: FIB_INSERT(f, a, e, vpn4); return; + case NET_VPN6: FIB_INSERT(f, a, e, vpn6); return; + case NET_ROA4: FIB_INSERT(f, a, e, roa4); return; + case NET_ROA6: FIB_INSERT(f, a, e, roa6); return; + case NET_FLOW4: FIB_INSERT(f, a, e, flow4); return; + case NET_FLOW6: FIB_INSERT(f, a, e, flow6); return; + case NET_IP6_SADR: FIB_INSERT(f, a, e, ip6_sadr); return; + case NET_MPLS: FIB_INSERT(f, a, e, mpls); return; + default: bug("invalid type"); + } } -*/ + /** * fib_get - find or create a FIB node * @f: FIB to work with - * @a: pointer to IP address of the prefix - * @len: prefix length + * @n: network address * * Search for a FIB node corresponding to the given prefix and * return a pointer to it. If no such node exists, create it. */ void * -fib_get(struct fib *f, ip_addr *a, int len) +fib_get(struct fib *f, const net_addr *a) { - uint h = ipa_hash(*a); - struct fib_node **ee = f->hash_table + (h >> f->hash_shift); - struct fib_node *g, *e = *ee; - u32 uid = h << 16; - - while (e && (e->pxlen != len || !ipa_equal(*a, e->prefix))) - e = e->next; - if (e) - return e; -#ifdef DEBUGGING - if (len < 0 || len > BITS_PER_IP_ADDRESS || !ip_is_prefix(*a,len)) - bug("fib_get() called for invalid address"); -#endif + void *b = fib_find(f, a); + if (b) + return b; - while ((g = *ee) && g->uid < uid) - ee = &g->next; - while ((g = *ee) && g->uid == uid) - { - ee = &g->next; - uid++; - } + if (f->fib_slab) + b = sl_alloc(f->fib_slab); + else + b = mb_alloc(f->fib_pool, f->node_size + a->length); - if ((uid >> 16) != h) - log(L_ERR "FIB hash table chains are too long"); + struct fib_node *e = fib_user_to_node(f, b); + e->readers = NULL; + e->flags = 0; + fib_insert(f, a, e); - // log (L_WARN "FIB_GET %I %x %x", *a, h, uid); + memset(b, 0, f->node_offset); + if (f->init) + f->init(b); - e = sl_alloc(f->fib_slab); - e->prefix = *a; - e->pxlen = len; - e->next = *ee; - e->uid = uid; - *ee = e; - e->readers = NULL; - f->init(e); if (f->entries++ > f->entries_max) fib_rehash(f, HASH_HI_STEP); - return e; + return b; +} + +static inline void * +fib_route_ip4(struct fib *f, net_addr_ip4 *n) +{ + void *r; + + while (!(r = fib_find(f, (net_addr *) n)) && (n->pxlen > 0)) + { + n->pxlen--; + ip4_clrbit(&n->prefix, n->pxlen); + } + + return r; +} + +static inline void * +fib_route_ip6(struct fib *f, net_addr_ip6 *n) +{ + void *r; + + while (!(r = fib_find(f, (net_addr *) n)) && (n->pxlen > 0)) + { + n->pxlen--; + ip6_clrbit(&n->prefix, n->pxlen); + } + + return r; } /** * fib_route - CIDR routing lookup * @f: FIB to search in - * @a: pointer to IP address of the prefix - * @len: prefix length + * @n: network address * * Search for a FIB node with longest prefix matching the given * network, that is a node which a CIDR router would use for routing * that network. */ void * -fib_route(struct fib *f, ip_addr a, int len) +fib_route(struct fib *f, const net_addr *n) { - ip_addr a0; - void *t; - - while (len >= 0) - { - a0 = ipa_and(a, ipa_mkmask(len)); - t = fib_find(f, &a0, len); - if (t) - return t; - len--; - } - return NULL; + ASSERT(f->addr_type == n->type); + + net_addr *n0 = alloca(n->length); + net_copy(n0, n); + + switch (n->type) + { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + case NET_FLOW4: + return fib_route_ip4(f, (net_addr_ip4 *) n0); + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + case NET_FLOW6: + return fib_route_ip6(f, (net_addr_ip6 *) n0); + + default: + return NULL; + } } + static inline void fib_merge_readers(struct fib_iterator *i, struct fib_node *to) { @@ -338,8 +410,8 @@ fib_merge_readers(struct fib_iterator *i, struct fib_node *to) void fib_delete(struct fib *f, void *E) { - struct fib_node *e = E; - uint h = fib_hash(f, &e->prefix); + struct fib_node *e = fib_user_to_node(f, E); + uint h = fib_hash(f, e->addr); struct fib_node **ee = f->hash_table + h; struct fib_iterator *it; @@ -361,7 +433,12 @@ fib_delete(struct fib *f, void *E) } fib_merge_readers(it, l); } - sl_free(f->fib_slab, e); + + if (f->fib_slab) + sl_free(f->fib_slab, E); + else + mb_free(E); + if (f->entries-- < f->entries_min) fib_rehash(f, -HASH_LO_STEP); return; @@ -431,7 +508,7 @@ fit_get(struct fib *f, struct fib_iterator *i) if (k = i->next) k->prev = j; j->next = k; - i->hash = fib_hash(f, &n->prefix); + i->hash = fib_hash(f, n->addr); return n; } @@ -479,21 +556,17 @@ found: void fib_check(struct fib *f) { - uint i, ec, lo, nulls; + uint i, ec, nulls; ec = 0; for(i=0; i<f->hash_size; i++) { struct fib_node *n; - lo = 0; for(n=f->hash_table[i]; n; n=n->next) { struct fib_iterator *j, *j0; - uint h0 = ipa_hash(n->prefix); - if (h0 < lo) - bug("fib_check: discord in hash chains"); - lo = h0; - if ((h0 >> f->hash_shift) != i) + uint h0 = fib_hash(f, n->addr); + if (h0 != i) bug("fib_check: mishashed %x->%x (order %d)", h0, i, f->hash_order); j0 = (struct fib_iterator *) n; nulls = 0; @@ -514,8 +587,31 @@ fib_check(struct fib *f) } if (ec != f->entries) bug("fib_check: invalid entry count (%d != %d)", ec, f->entries); + return; } +/* +int +fib_histogram(struct fib *f) +{ + log(L_WARN "Histogram dump start %d %d", f->hash_size, f->entries); + + int i, j; + struct fib_node *e; + + for (i = 0; i < f->hash_size; i++) + { + j = 0; + for (e = f->hash_table[i]; e != NULL; e = e->next) + j++; + if (j > 0) + log(L_WARN "Histogram line %d: %d", i, j); + } + + log(L_WARN "Histogram dump end"); +} +*/ + #endif #ifdef TEST @@ -535,7 +631,7 @@ void dump(char *m) struct fib_iterator *j; for(n=f.hash_table[i]; n; n=n->next) { - debug("%04x %04x %p %I/%2d", i, ipa_hash(n->prefix), n, n->prefix, n->pxlen); + debug("%04x %08x %p %N", i, ipa_hash(n->prefix), n, n->addr); for(j=n->readers; j; j=j->next) debug(" %p[%p]", j, j->node); debug("\n"); diff --git a/nest/rt-roa.c b/nest/rt-roa.c deleted file mode 100644 index bf457e30..00000000 --- a/nest/rt-roa.c +++ /dev/null @@ -1,440 +0,0 @@ -/* - * BIRD -- Route Origin Authorization - * - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#undef LOCAL_DEBUG - -#include "nest/bird.h" -#include "nest/route.h" -#include "nest/cli.h" -#include "lib/lists.h" -#include "lib/resource.h" -#include "lib/event.h" -#include "lib/string.h" -#include "conf/conf.h" - - -pool *roa_pool; -static slab *roa_slab; /* Slab of struct roa_item */ -static list roa_table_list; /* List of struct roa_table */ -struct roa_table *roa_table_default; /* The first ROA table in the config */ - -static inline int -src_match(struct roa_item *it, byte src) -{ return !src || it->src == src; } - -/** - * roa_add_item - add a ROA entry - * @t: ROA table - * @prefix: prefix of the ROA entry - * @pxlen: prefix length of the ROA entry - * @maxlen: max length field of the ROA entry - * @asn: AS number field of the ROA entry - * @src: source of the ROA entry (ROA_SRC_*) - * - * The function adds a new ROA entry to the ROA table. If the same ROA - * is already in the table, nothing is added. @src field is used to - * distinguish different sources of ROAs. - */ -void -roa_add_item(struct roa_table *t, ip_addr prefix, byte pxlen, byte maxlen, u32 asn, byte src) -{ - struct roa_node *n = fib_get(&t->fib, &prefix, pxlen); - - // if ((n->items == NULL) && (n->n.x0 != ROA_INVALID)) - // t->cached_items--; - - struct roa_item *it; - for (it = n->items; it; it = it->next) - if ((it->maxlen == maxlen) && (it->asn == asn) && src_match(it, src)) - return; - - it = sl_alloc(roa_slab); - it->asn = asn; - it->maxlen = maxlen; - it->src = src; - it->next = n->items; - n->items = it; -} - -/** - * roa_delete_item - delete a ROA entry - * @t: ROA table - * @prefix: prefix of the ROA entry - * @pxlen: prefix length of the ROA entry - * @maxlen: max length field of the ROA entry - * @asn: AS number field of the ROA entry - * @src: source of the ROA entry (ROA_SRC_*) - * - * The function removes a specified ROA entry from the ROA table and - * frees it. If @src field is not ROA_SRC_ANY, only entries from - * that source are considered. - */ -void -roa_delete_item(struct roa_table *t, ip_addr prefix, byte pxlen, byte maxlen, u32 asn, byte src) -{ - struct roa_node *n = fib_find(&t->fib, &prefix, pxlen); - - if (!n) - return; - - struct roa_item *it, **itp; - for (itp = &n->items; it = *itp; itp = &it->next) - if ((it->maxlen == maxlen) && (it->asn == asn) && src_match(it, src)) - break; - - if (!it) - return; - - *itp = it->next; - sl_free(roa_slab, it); - - // if ((n->items == NULL) && (n->n.x0 != ROA_INVALID)) - // t->cached_items++; -} - - -/** - * roa_flush - flush a ROA table - * @t: ROA table - * @src: source of ROA entries (ROA_SRC_*) - * - * The function removes and frees ROA entries from the ROA table. If - * @src is ROA_SRC_ANY, all entries in the table are removed, - * otherwise only all entries from that source are removed. - */ -void -roa_flush(struct roa_table *t, byte src) -{ - struct roa_item *it, **itp; - struct roa_node *n; - - FIB_WALK(&t->fib, fn) - { - n = (struct roa_node *) fn; - - itp = &n->items; - while (it = *itp) - if (src_match(it, src)) - { - *itp = it->next; - sl_free(roa_slab, it); - } - else - itp = &it->next; - } - FIB_WALK_END; - - // TODO add cleanup of roa_nodes -} - - - -/* -byte -roa_check(struct roa_table *t, ip_addr prefix, byte pxlen, u32 asn) -{ - struct roa_node *n = fib_find(&t->fib, &prefix, pxlen); - - if (n && n->n.x0 == ROA_UNKNOWN) - return ROA_UNKNOWN; - - if (n && n->n.x0 == ROA_VALID && asn == n->cached_asn) - return ROA_VALID; - - byte rv = roa_match(t, n, prefix, pxlen, asn); - - if (rv != ROA_INVALID) - { - if (!n) - { - if (t->cached_items >= t->cached_items_max) - n = fib_get(&t->fib, &prefix, pxlen); - t->cached_items++; - } - - n->cached_asn = asn; - n->n.x0 = rv; - } - - return rv; -} -*/ - -/** - * roa_check - check validity of route origination in a ROA table - * @t: ROA table - * @prefix: network prefix to check - * @pxlen: length of network prefix - * @asn: AS number of network prefix - * - * Implements RFC 6483 route validation for the given network - * prefix. The procedure is to find all candidate ROAs - ROAs whose - * prefixes cover the give network prefix. If there is no candidate - * ROA, return ROA_UNKNOWN. If there is a candidate ROA with matching - * ASN and maxlen field greater than or equal to the given prefix - * length, return ROA_VALID. Otherwise return ROA_INVALID. If caller - * cannot determine origin AS, 0 could be used (in that case ROA_VALID - * cannot happen). - */ -byte -roa_check(struct roa_table *t, ip_addr prefix, byte pxlen, u32 asn) -{ - struct roa_node *n; - ip_addr px; - byte anything = 0; - - int len; - for (len = pxlen; len >= 0; len--) - { - px = ipa_and(prefix, ipa_mkmask(len)); - n = fib_find(&t->fib, &px, len); - - if (!n) - continue; - - struct roa_item *it; - for (it = n->items; it; it = it->next) - { - anything = 1; - if ((it->maxlen >= pxlen) && (it->asn == asn) && asn) - return ROA_VALID; - } - } - - return anything ? ROA_INVALID : ROA_UNKNOWN; -} - -static void -roa_node_init(struct fib_node *fn) -{ - struct roa_node *n = (struct roa_node *) fn; - n->items = NULL; -} - -static inline void -roa_populate(struct roa_table *t) -{ - struct roa_item_config *ric; - for (ric = t->cf->roa_items; ric; ric = ric->next) - roa_add_item(t, ric->prefix, ric->pxlen, ric->maxlen, ric->asn, ROA_SRC_CONFIG); -} - -static void -roa_new_table(struct roa_table_config *cf) -{ - struct roa_table *t; - - t = mb_allocz(roa_pool, sizeof(struct roa_table)); - fib_init(&t->fib, roa_pool, sizeof(struct roa_node), 0, roa_node_init); - t->name = cf->name; - t->cf = cf; - - cf->table = t; - add_tail(&roa_table_list, &t->n); - - roa_populate(t); -} - -struct roa_table_config * -roa_new_table_config(struct symbol *s) -{ - struct roa_table_config *rtc = cfg_allocz(sizeof(struct roa_table_config)); - - cf_define_symbol(s, SYM_ROA, rtc); - rtc->name = s->name; - add_tail(&new_config->roa_tables, &rtc->n); - return rtc; -} - -/** - * roa_add_item_config - add a static ROA entry to a ROA table configuration - * - * Arguments are self-explanatory. The first is the ROA table config, rest - * are specifying the ROA entry. - */ -void -roa_add_item_config(struct roa_table_config *rtc, ip_addr prefix, byte pxlen, byte maxlen, u32 asn) -{ - struct roa_item_config *ric = cfg_allocz(sizeof(struct roa_item_config)); - - ric->prefix = prefix; - ric->pxlen = pxlen; - ric->maxlen = maxlen; - ric->asn = asn; - ric->next = rtc->roa_items; - rtc->roa_items = ric; -} - -/** - * roa_init - initialize ROA tables - * - * This function is called during BIRD startup. It initializes - * the ROA table module. - */ -void -roa_init(void) -{ - roa_pool = rp_new(&root_pool, "ROA tables"); - roa_slab = sl_new(roa_pool, sizeof(struct roa_item)); - init_list(&roa_table_list); -} - -void -roa_preconfig(struct config *c) -{ - init_list(&c->roa_tables); -} - - -/** - * roa_commit - commit new ROA table configuration - * @new: new configuration - * @old: original configuration or %NULL if it's boot time config - * - * Scan differences between @old and @new configuration and modify the - * ROA tables according to these changes. If @new defines a previously - * unknown table, create it, if it omits a table existing in @old, - * delete it (there are no references, only indirect through struct - * roa_table_config). If it exists in both configurations, update the - * configured ROA entries. - */ -void -roa_commit(struct config *new, struct config *old) -{ - struct roa_table_config *cf; - struct roa_table *t, *tx; - - if (old) - WALK_LIST_DELSAFE(t, tx, roa_table_list) - { - struct symbol *sym = cf_find_symbol(new, t->name); - if (sym && sym->class == SYM_ROA) - { - /* Found old table in new config */ - cf = sym->def; - cf->table = t; - t->name = cf->name; - t->cf = cf; - - /* Reconfigure it */ - roa_flush(t, ROA_SRC_CONFIG); - roa_populate(t); - } - else - { - t->cf->table = NULL; - - /* Free it now */ - roa_flush(t, ROA_SRC_ANY); - rem_node(&t->n); - fib_free(&t->fib); - mb_free(t); - } - } - - /* Add new tables */ - WALK_LIST(cf, new->roa_tables) - if (! cf->table) - roa_new_table(cf); - - roa_table_default = EMPTY_LIST(new->roa_tables) ? NULL : - ((struct roa_table_config *) HEAD(new->roa_tables))->table; -} - - - -static void -roa_show_node(struct cli *c, struct roa_node *rn, int len, u32 asn) -{ - struct roa_item *ri; - - for (ri = rn->items; ri; ri = ri->next) - if ((ri->maxlen >= len) && (!asn || (ri->asn == asn))) - cli_printf(c, -1019, "%I/%d max %d as %u", rn->n.prefix, rn->n.pxlen, ri->maxlen, ri->asn); -} - -static void -roa_show_cont(struct cli *c) -{ - struct roa_show_data *d = c->rover; - struct fib *fib = &d->table->fib; - struct fib_iterator *it = &d->fit; - struct roa_node *rn; - unsigned max = 32; - - FIB_ITERATE_START(fib, it, f) - { - rn = (struct roa_node *) f; - - if (!max--) - { - FIB_ITERATE_PUT(it, f); - return; - } - - if ((d->mode == ROA_SHOW_ALL) || - net_in_net(rn->n.prefix, rn->n.pxlen, d->prefix, d->pxlen)) - roa_show_node(c, rn, 0, d->asn); - } - FIB_ITERATE_END(f); - - cli_printf(c, 0, ""); - c->cont = c->cleanup = NULL; -} - -static void -roa_show_cleanup(struct cli *c) -{ - struct roa_show_data *d = c->rover; - - /* Unlink the iterator */ - fit_get(&d->table->fib, &d->fit); -} - -void -roa_show(struct roa_show_data *d) -{ - struct roa_node *rn; - ip_addr px; - int len; - - switch (d->mode) - { - case ROA_SHOW_ALL: - case ROA_SHOW_IN: - FIB_ITERATE_INIT(&d->fit, &d->table->fib); - this_cli->cont = roa_show_cont; - this_cli->cleanup = roa_show_cleanup; - this_cli->rover = d; - break; - - case ROA_SHOW_PX: - rn = fib_find(&d->table->fib, &d->prefix, d->pxlen); - if (rn) - { - roa_show_node(this_cli, rn, 0, d->asn); - cli_msg(0, ""); - } - else - cli_msg(-8001, "Network not in table"); - break; - - case ROA_SHOW_FOR: - for (len = d->pxlen; len >= 0; len--) - { - px = ipa_and(d->prefix, ipa_mkmask(len)); - rn = fib_find(&d->table->fib, &px, len); - - if (!rn) - continue; - - roa_show_node(this_cli, rn, 0, d->asn); - } - cli_msg(0, ""); - break; - } -} diff --git a/nest/rt-show.c b/nest/rt-show.c new file mode 100644 index 00000000..1f1b73d2 --- /dev/null +++ b/nest/rt-show.c @@ -0,0 +1,421 @@ +/* + * BIRD -- Route Display Routines + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2017 Jan Moskyto Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#undef LOCAL_DEBUG + +#include "nest/bird.h" +#include "nest/route.h" +#include "nest/protocol.h" +#include "nest/cli.h" +#include "nest/iface.h" +#include "filter/filter.h" + +static void +rt_show_table(struct cli *c, struct rt_show_data *d) +{ + /* No table blocks in 'show route count' */ + if (d->stats == 2) + return; + + if (d->last_table) cli_printf(c, -1007, ""); + cli_printf(c, -1007, "Table %s:", d->tab->table->name); + d->last_table = d->tab; +} + +static void +rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa) +{ + byte from[IPA_MAX_TEXT_LENGTH+8]; + byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; + rta *a = e->attrs; + int primary = (e->net->routes == e); + int sync_error = (e->net->n.flags & KRF_SYNC_ERROR); + void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); + struct nexthop *nh; + + tm_format_time(tm, &config->tf_route, e->lastmod); + if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw)) + bsprintf(from, " from %I", a->from); + else + from[0] = 0; + + get_route_info = a->src->proto->proto->get_route_info; + if (get_route_info || d->verbose) + { + /* Need to normalize the extended attributes */ + ea_list *t = tmpa; + t = ea_append(t, a->eattrs); + tmpa = alloca(ea_scan(t)); + ea_merge(t, tmpa); + ea_sort(tmpa); + } + if (get_route_info) + get_route_info(e, info, tmpa); + else + bsprintf(info, " (%d)", e->pref); + + if (d->last_table != d->tab) + rt_show_table(c, d); + + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), + a->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + + if (a->dest == RTD_UNICAST) + for (nh = &(a->nh); nh; nh = nh->next) + { + char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; + char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; + char weight[16] = ""; + + if (nh->labels) + { + lsp += bsprintf(lsp, " mpls %d", nh->label[0]); + for (int i=1;i<nh->labels; i++) + lsp += bsprintf(lsp, "/%d", nh->label[i]); + } + *lsp = '\0'; + + if (a->nh.next) + bsprintf(weight, " weight %d", nh->weight + 1); + + if (ipa_nonzero(nh->gw)) + cli_printf(c, -1007, "\tvia %I on %s%s%s%s", + nh->gw, nh->iface->name, mpls, onlink, weight); + else + cli_printf(c, -1007, "\tdev %s%s%s", + nh->iface->name, mpls, onlink, weight); + } + + if (d->verbose) + rta_show(c, a, tmpa); +} + +static void +rt_show_net(struct cli *c, net *n, struct rt_show_data *d) +{ + rte *e, *ee; + byte ia[NET_MAX_TEXT_LENGTH+1]; + struct ea_list *tmpa; + struct channel *ec = d->tab->export_channel; + int first = 1; + int pass = 0; + + bsnprintf(ia, sizeof(ia), "%N", n->n.addr); + + for (e = n->routes; e; e = e->next) + { + if (rte_is_filtered(e) != d->filtered) + continue; + + d->rt_counter++; + d->net_counter += first; + first = 0; + + if (pass) + continue; + + ee = e; + tmpa = rte_make_tmp_attrs(e, c->show_pool); + + /* Export channel is down, do not try to export routes to it */ + if (ec && (ec->export_state == ES_DOWN)) + goto skip; + + /* Special case for merged export */ + if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED)) + { + rte *rt_free; + e = rt_export_merged(ec, n, &rt_free, &tmpa, c->show_pool, 1); + pass = 1; + + if (!e) + { e = ee; goto skip; } + } + else if (d->export_mode) + { + struct proto *ep = ec->proto; + int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, c->show_pool) : 0; + + if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED) + pass = 1; + + if (ic < 0) + goto skip; + + if (d->export_mode > RSEM_PREEXPORT) + { + /* + * FIXME - This shows what should be exported according to current + * filters, but not what was really exported. 'configure soft' + * command may change the export filter and do not update routes. + */ + int do_export = (ic > 0) || + (f_run(ec->out_filter, &e, &tmpa, c->show_pool, + FF_FORCE_TMPATTR | FF_SILENT) <= F_ACCEPT); + + if (do_export != (d->export_mode == RSEM_EXPORT)) + goto skip; + + if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_ACCEPTED)) + pass = 1; + } + } + + if (d->show_protocol && (d->show_protocol != e->attrs->src->proto)) + goto skip; + + if (f_run(d->filter, &e, &tmpa, c->show_pool, FF_FORCE_TMPATTR) > F_ACCEPT) + goto skip; + + if (d->stats < 2) + rt_show_rte(c, ia, e, d, tmpa); + + d->show_counter++; + ia[0] = 0; + + skip: + if (e != ee) + { + rte_free(e); + e = ee; + } + lp_flush(c->show_pool); + + if (d->primary_only) + break; + } +} + +static void +rt_show_cleanup(struct cli *c) +{ + struct rt_show_data *d = c->rover; + struct rt_show_data_rtable *tab; + + /* Unlink the iterator */ + if (d->table_open) + fit_get(&d->tab->table->fib, &d->fit); + + /* Unlock referenced tables */ + WALK_LIST(tab, d->tables) + rt_unlock_table(tab->table); +} + +static void +rt_show_cont(struct cli *c) +{ + struct rt_show_data *d = c->rover; +#ifdef DEBUGGING + unsigned max = 4; +#else + unsigned max = 64; +#endif + struct fib *fib = &d->tab->table->fib; + struct fib_iterator *it = &d->fit; + + if (d->running_on_config && (d->running_on_config != config)) + { + cli_printf(c, 8004, "Stopped due to reconfiguration"); + goto done; + } + + if (!d->table_open) + { + FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib); + d->table_open = 1; + d->table_counter++; + + d->show_counter_last = d->show_counter; + d->rt_counter_last = d->rt_counter; + d->net_counter_last = d->net_counter; + + if (d->tables_defined_by & RSD_TDB_SET) + rt_show_table(c, d); + } + + FIB_ITERATE_START(fib, it, net, n) + { + if (!max--) + { + FIB_ITERATE_PUT(it); + return; + } + rt_show_net(c, n, d); + } + FIB_ITERATE_END; + + if (d->stats) + { + if (d->last_table != d->tab) + rt_show_table(c, d); + + cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", + d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, + d->net_counter - d->net_counter_last, d->tab->table->name); + } + + d->table_open = 0; + d->tab = NODE_NEXT(d->tab); + + if (NODE_VALID(d->tab)) + return; + + if (d->stats && (d->table_counter > 1)) + { + if (d->last_table) cli_printf(c, -1007, ""); + cli_printf(c, 14, "Total: %d of %d routes for %d networks in %d tables", + d->show_counter, d->rt_counter, d->net_counter, d->table_counter); + } + else + cli_printf(c, 0, ""); + +done: + rt_show_cleanup(c); + c->cont = c->cleanup = NULL; +} + +struct rt_show_data_rtable * +rt_show_add_table(struct rt_show_data *d, rtable *t) +{ + struct rt_show_data_rtable *tab = cfg_allocz(sizeof(struct rt_show_data_rtable)); + tab->table = t; + add_tail(&(d->tables), &(tab->n)); + return tab; +} + +static inline void +rt_show_get_default_tables(struct rt_show_data *d) +{ + struct channel *c; + struct rt_show_data_rtable *tab; + + if (d->export_channel) + { + c = d->export_channel; + tab = rt_show_add_table(d, c->table); + tab->export_channel = c; + return; + } + + if (d->export_protocol) + { + WALK_LIST(c, d->export_protocol->channels) + { + if (c->export_state == ES_DOWN) + continue; + + tab = rt_show_add_table(d, c->table); + tab->export_channel = c; + } + return; + } + + if (d->show_protocol) + { + WALK_LIST(c, d->show_protocol->channels) + rt_show_add_table(d, c->table); + return; + } + + for (int i=1; i<NET_MAX; i++) + if (config->def_tables[i]) + rt_show_add_table(d, config->def_tables[i]->table); +} + +static inline void +rt_show_prepare_tables(struct rt_show_data *d) +{ + struct rt_show_data_rtable *tab, *tabx; + + /* Add implicit tables if no table is specified */ + if (EMPTY_LIST(d->tables)) + rt_show_get_default_tables(d); + + WALK_LIST_DELSAFE(tab, tabx, d->tables) + { + /* Ensure there is defined export_channel for each table */ + if (d->export_mode) + { + if (!tab->export_channel && d->export_channel && + (tab->table == d->export_channel->table)) + tab->export_channel = d->export_channel; + + if (!tab->export_channel && d->export_protocol) + tab->export_channel = proto_find_channel_by_table(d->export_protocol, tab->table); + + if (!tab->export_channel) + { + if (d->tables_defined_by & RSD_TDB_NMN) + cf_error("No export channel for table %s", tab->table->name); + + rem_node(&(tab->n)); + continue; + } + } + + /* Ensure specified network is compatible with each table */ + if (d->addr && (tab->table->addr_type != d->addr->type)) + { + if (d->tables_defined_by & RSD_TDB_NMN) + cf_error("Incompatible type of prefix/ip for table %s", tab->table->name); + + rem_node(&(tab->n)); + continue; + } + } + + /* Ensure there is at least one table */ + if (EMPTY_LIST(d->tables)) + cf_error("No valid tables"); +} + +void +rt_show(struct rt_show_data *d) +{ + struct rt_show_data_rtable *tab; + net *n; + + /* Filtered routes are neither exported nor have sensible ordering */ + if (d->filtered && (d->export_mode || d->primary_only)) + cf_error("Incompatible show route options"); + + rt_show_prepare_tables(d); + + if (!d->addr) + { + WALK_LIST(tab, d->tables) + rt_lock_table(tab->table); + + /* There is at least one table */ + d->tab = HEAD(d->tables); + this_cli->cont = rt_show_cont; + this_cli->cleanup = rt_show_cleanup; + this_cli->rover = d; + } + else + { + WALK_LIST(tab, d->tables) + { + d->tab = tab; + + if (d->show_for) + n = net_route(tab->table, d->addr); + else + n = net_find(tab->table, d->addr); + + if (n) + rt_show_net(this_cli, n, d); + } + + if (d->rt_counter) + cli_msg(0, ""); + else + cli_msg(8001, "Network not found"); + } +} diff --git a/nest/rt-table.c b/nest/rt-table.c index 28fe5baa..686d0e84 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -33,7 +33,6 @@ #include "nest/bird.h" #include "nest/route.h" #include "nest/protocol.h" -#include "nest/cli.h" #include "nest/iface.h" #include "lib/resource.h" #include "lib/event.h" @@ -50,41 +49,198 @@ static linpool *rte_update_pool; static list routing_tables; -static byte *rt_format_via(rte *e); static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); -static inline int rt_prune_table(rtable *tab); -static inline void rt_schedule_gc(rtable *tab); -static inline void rt_schedule_prune(rtable *tab); +static inline void rt_prune_table(rtable *tab); /* Like fib_route(), but skips empty net entries */ -static net * -net_route(rtable *tab, ip_addr a, int len) +static inline void * +net_route_ip4(rtable *t, net_addr_ip4 *n) { - ip_addr a0; - net *n; + net *r; - while (len >= 0) + while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) + { + n->pxlen--; + ip4_clrbit(&n->prefix, n->pxlen); + } + + return r; +} + +static inline void * +net_route_ip6(rtable *t, net_addr_ip6 *n) +{ + net *r; + + while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) + { + n->pxlen--; + ip6_clrbit(&n->prefix, n->pxlen); + } + + return r; +} + +static inline void * +net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) +{ + struct fib_node *fn; + + while (1) + { + net *best = NULL; + int best_pxlen = 0; + + /* We need to do dst first matching. Since sadr addresses are hashed on dst + prefix only, find the hash table chain and go through it to find the + match with the smallest matching src prefix. */ + for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next) { - a0 = ipa_and(a, ipa_mkmask(len)); - n = fib_find(&tab->fib, &a0, len); - if (n && rte_is_valid(n->routes)) - return n; - len--; + net_addr_ip6_sadr *a = (void *) fn->addr; + + if (net_equal_dst_ip6_sadr(n, a) && + net_in_net_src_ip6_sadr(n, a) && + (a->src_pxlen >= best_pxlen)) + { + best = fib_node_to_user(&t->fib, fn); + best_pxlen = a->src_pxlen; + } } + + if (best) + return best; + + if (!n->dst_pxlen) + break; + + n->dst_pxlen--; + ip6_clrbit(&n->dst_prefix, n->dst_pxlen); + } + return NULL; } -static void -rte_init(struct fib_node *N) +void * +net_route(rtable *tab, const net_addr *n) { - net *n = (net *) N; + ASSERT(tab->addr_type == n->type); + + net_addr *n0 = alloca(n->length); + net_copy(n0, n); + + switch (n->type) + { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + return net_route_ip4(tab, (net_addr_ip4 *) n0); + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + return net_route_ip6(tab, (net_addr_ip6 *) n0); + + case NET_IP6_SADR: + return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0); - N->flags = 0; - n->routes = NULL; + default: + return NULL; + } +} + + +static int +net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +{ + struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); + struct fib_node *fn; + int anything = 0; + + while (1) + { + for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_roa4 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + + if (n.pxlen == 0) + break; + + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +{ + struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); + struct fib_node *fn; + int anything = 0; + + while (1) + { + for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_roa6 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + + if (n.pxlen == 0) + break; + + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); + } + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +/** + * roa_check - check validity of route origination in a ROA table + * @tab: ROA table + * @n: network prefix to check + * @asn: AS number of network prefix + * + * Implements RFC 6483 route validation for the given network prefix. The + * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given + * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is + * a candidate ROA with matching ASN and maxlen field greater than or equal to + * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If + * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID + * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n + * must have type NET_IP4 or NET_IP6, respectively. + */ +int +net_roa_check(rtable *tab, const net_addr *n, u32 asn) +{ + if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) + return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn); + else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) + return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn); + else + return ROA_UNKNOWN; /* Should not happen */ } /** @@ -121,7 +277,7 @@ rte_get_temp(rta *a) e->attrs = a; e->flags = 0; - e->pref = a->src->proto->preference; + e->pref = 0; return e; } @@ -219,7 +375,7 @@ rte_mergable(rte *pri, rte *sec) static void rte_trace(struct proto *p, rte *e, int dir, char *msg) { - log(L_TRACE "%s %c %s %I/%d %s", p->name, dir, msg, e->net->n.prefix, e->net->n.pxlen, rt_format_via(e)); + log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest)); } static inline void @@ -237,11 +393,11 @@ rte_trace_out(uint flag, struct proto *p, rte *e, char *msg) } static rte * -export_filter_(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) +export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { - struct proto *p = ah->proto; - struct filter *filter = ah->out_filter; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; + struct filter *filter = c->out_filter; + struct proto_stats *stats = &c->stats; ea_list *tmpb = NULL; rte *rt; int v; @@ -298,16 +454,16 @@ export_filter_(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa } static inline rte * -export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) +export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) { - return export_filter_(ah, rt0, rt_free, tmpa, rte_update_pool, silent); + return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent); } static void -do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) +do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) { - struct proto *p = ah->proto; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; + struct proto_stats *stats = &c->stats; /* @@ -337,11 +493,11 @@ do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tm * also non-new updates (contrary to import blocking). */ - struct proto_limit *l = ah->out_limit; - if (l && new) + struct channel_limit *l = &c->out_limit; + if (l->action && new) { if ((!old || refeed) && (stats->exp_routes >= l->limit)) - proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes); + channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); if (l->state == PLS_BLOCKED) { @@ -378,25 +534,24 @@ do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tm rte_trace_out(D_ROUTES, p, old, "removed"); } if (!new) - p->rt_notify(p, ah->table, net, NULL, old, NULL); + p->rt_notify(p, c, net, NULL, old, NULL); else if (tmpa) { ea_list *t = tmpa; while (t->next) t = t->next; t->next = new->attrs->eattrs; - p->rt_notify(p, ah->table, net, new, old, tmpa); + p->rt_notify(p, c, net, new, old, tmpa); t->next = NULL; } else - p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs); + p->rt_notify(p, c, net, new, old, new->attrs->eattrs); } static void -rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed) +rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed) { - struct proto *p = ah->proto; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; rte *new = new0; rte *old = old0; @@ -405,9 +560,9 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int re ea_list *tmpa = NULL; if (new) - stats->exp_updates_received++; + c->stats.exp_updates_received++; else - stats->exp_withdraws_received++; + c->stats.exp_withdraws_received++; /* * This is a tricky part - we don't know whether route 'old' was @@ -430,10 +585,10 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int re */ if (new) - new = export_filter(ah, new, &new_free, &tmpa, 0); + new = export_filter(c, new, &new_free, &tmpa, 0); if (old && !refeed) - old = export_filter(ah, old, &old_free, NULL, 1); + old = export_filter(c, old, &old_free, NULL, 1); if (!new && !old) { @@ -450,13 +605,13 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int re #ifdef CONFIG_PIPE if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto)) - p->rt_notify(p, ah->table, net, NULL, old0, NULL); + p->rt_notify(p, c, net, NULL, old0, NULL); #endif return; } - do_rt_notify(ah, net, new, old, tmpa, refeed); + do_rt_notify(c, net, new, old, tmpa, refeed); /* Discard temporary rte's */ if (new_free) @@ -466,10 +621,9 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int re } static void -rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed) +rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed) { - // struct proto *p = ah->proto; - struct proto_stats *stats = ah->stats; + // struct proto *p = c->proto; rte *r; rte *new_best = NULL; @@ -487,14 +641,14 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol was not valid, caller must use NULL for both old_changed and before_old. */ if (new_changed) - stats->exp_updates_received++; + c->stats.exp_updates_received++; else - stats->exp_withdraws_received++; + c->stats.exp_withdraws_received++; /* First, find the new_best route - first accepted by filters */ for (r=net->routes; rte_is_valid(r); r=r->next) { - if (new_best = export_filter(ah, r, &new_free, &tmpa, 0)) + if (new_best = export_filter(c, r, &new_free, &tmpa, 0)) break; /* Note if we walked around the position of old_changed route */ @@ -502,9 +656,9 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol old_meet = 1; } - /* + /* * Second, handle the feed case. That means we do not care for - * old_best. It is NULL for feed, and the new_best for refeed. + * old_best. It is NULL for feed, and the new_best for refeed. * For refeed, there is a hack similar to one in rt_notify_basic() * to ensure withdraws in case of changed filters */ @@ -545,7 +699,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol /* First case */ if (old_meet) - if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1)) + if (old_best = export_filter(c, old_changed, &old_free, NULL, 1)) goto found; /* Second case */ @@ -563,18 +717,18 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol /* Fourth case */ for (r=r->next; rte_is_valid(r); r=r->next) { - if (old_best = export_filter(ah, r, &old_free, NULL, 1)) + if (old_best = export_filter(c, r, &old_free, NULL, 1)) goto found; if (r == before_old) - if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1)) + if (old_best = export_filter(c, old_changed, &old_free, NULL, 1)) goto found; } /* Implicitly, old_best is NULL and new_best is non-NULL */ found: - do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2)); + do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2)); /* Discard temporary rte's */ if (new_free) @@ -584,19 +738,17 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol } -static struct mpnh * -mpnh_merge_rta(struct mpnh *nhs, rta *a, linpool *pool, int max) +static struct nexthop * +nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) { - struct mpnh nh = { .gw = a->gw, .iface = a->iface }; - struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh; - return mpnh_merge(nhs, nh2, 1, 0, max, pool); + return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); } rte * -rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) +rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { - // struct proto *p = ah->proto; - struct mpnh *nhs = NULL; + // struct proto *p = c->proto; + struct nexthop *nhs = NULL; rte *best0, *best, *rt0, *rt, *tmp; best0 = net->routes; @@ -605,7 +757,7 @@ rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tm if (!rte_is_valid(best0)) return NULL; - best = export_filter_(ah, best0, rt_free, tmpa, pool, silent); + best = export_filter_(c, best0, rt_free, tmpa, pool, silent); if (!best || !rte_is_reachable(best)) return best; @@ -615,13 +767,13 @@ rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tm if (!rte_mergable(best0, rt0)) continue; - rt = export_filter_(ah, rt0, &tmp, NULL, pool, 1); + rt = export_filter_(c, rt0, &tmp, NULL, pool, 1); if (!rt) continue; if (rte_is_reachable(rt)) - nhs = mpnh_merge_rta(nhs, rt->attrs, pool, ah->proto->merge_limit); + nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); if (tmp) rte_free(tmp); @@ -629,13 +781,12 @@ rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tm if (nhs) { - nhs = mpnh_merge_rta(nhs, best->attrs, pool, ah->proto->merge_limit); + nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); if (nhs->next) { best = rte_cow_rta(best, pool); - best->attrs->dest = RTD_MULTIPATH; - best->attrs->nexthops = nhs; + nexthop_link(best->attrs, nhs); } } @@ -647,10 +798,10 @@ rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tm static void -rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, +rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *new_best, rte*old_best, int refeed) { - // struct proto *p = ah->proto; + // struct proto *p = c->proto; rte *new_best_free = NULL; rte *old_best_free = NULL; @@ -668,31 +819,31 @@ rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_ if ((new_best == old_best) && !refeed) { new_changed = rte_mergable(new_best, new_changed) ? - export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL; + export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL; old_changed = rte_mergable(old_best, old_changed) ? - export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL; + export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL; if (!new_changed && !old_changed) return; } if (new_best) - ah->stats->exp_updates_received++; + c->stats.exp_updates_received++; else - ah->stats->exp_withdraws_received++; + c->stats.exp_withdraws_received++; /* Prepare new merged route */ if (new_best) - new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, rte_update_pool, 0); + new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0); /* Prepare old merged route (without proper merged next hops) */ /* There are some issues with running filter on old route - see rt_notify_basic() */ if (old_best && !refeed) - old_best = export_filter(ah, old_best, &old_best_free, NULL, 1); + old_best = export_filter(c, old_best, &old_best_free, NULL, 1); if (new_best || old_best) - do_rt_notify(ah, net, new_best, old_best, tmpa, refeed); + do_rt_notify(c, net, new_best, old_best, tmpa, refeed); /* Discard temporary rte's */ if (new_best_free) @@ -716,7 +867,7 @@ rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_ * @new_best: the new best route for the same network * @old_best: the previous best route for the same network * @before_old: The previous route before @old for the same network. - * If @before_old is NULL @old was the first. + * If @before_old is NULL @old was the first. * * This function gets a routing table update and announces it * to all protocols that acccepts given type of route announcement @@ -757,28 +908,22 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, if (!old && !new) return; - if (type == RA_OPTIMAL) - { - if (new) - new->attrs->src->proto->stats.pref_routes++; - if (old) - old->attrs->src->proto->stats.pref_routes--; - - if (tab->hostcache) - rt_notify_hostcache(tab, net); - } + if ((type == RA_OPTIMAL) && tab->hostcache) + rt_notify_hostcache(tab, net); - struct announce_hook *a; - WALK_LIST(a, tab->hooks) + struct channel *c; node *n; + WALK_LIST2(c, n, tab->channels, table_node) { - ASSERT(a->proto->export_state != ES_DOWN); - if (a->proto->accept_ra_types == type) + if (c->export_state == ES_DOWN) + continue; + + if (c->ra_mode == type) if (type == RA_ACCEPTED) - rt_notify_accepted(a, net, new, old, before_old, 0); + rt_notify_accepted(c, net, new, old, before_old, 0); else if (type == RA_MERGED) - rt_notify_merged(a, net, new, old, new_best, old_best, 0); + rt_notify_merged(c, net, new, old, new_best, old_best, 0); else - rt_notify_basic(a, net, new, old, 0); + rt_notify_basic(c, net, new, old, 0); } } @@ -788,27 +933,36 @@ rte_validate(rte *e) int c; net *n = e->net; - if ((n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen)) - { - log(L_WARN "Ignoring bogus prefix %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->sender->proto->name); - return 0; - } + if (!net_validate(n->n.addr)) + { + log(L_WARN "Ignoring bogus prefix %N received via %s", + n->n.addr, e->sender->proto->name); + return 0; + } - c = ipa_classify_net(n->n.prefix); + /* FIXME: better handling different nettypes */ + c = !net_is_flow(n->n.addr) ? + net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) - { - log(L_WARN "Ignoring bogus route %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->sender->proto->name); - return 0; - } + { + log(L_WARN "Ignoring bogus route %N received via %s", + n->n.addr, e->sender->proto->name); + return 0; + } - if ((e->attrs->dest == RTD_MULTIPATH) && !mpnh_is_sorted(e->attrs->nexthops)) - { - log(L_WARN "Ignoring unsorted multipath route %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->sender->proto->name); - return 0; - } + if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest) + { + log(L_WARN "Ignoring route %N with invalid dest %d received via %s", + n->n.addr, e->attrs->dest, e->sender->proto->name); + return 0; + } + + if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n->n.addr, e->sender->proto->name); + return 0; + } return 1; } @@ -848,11 +1002,11 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) +rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) { - struct proto *p = ah->proto; - struct rtable *table = ah->table; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; + struct rtable *table = c->table; + struct proto_stats *stats = &c->stats; static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS; rte *before_old = NULL; rte *old_best = net->routes; @@ -877,8 +1031,8 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr { if (new) { - log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %I/%d to table %s", - net->n.prefix, net->n.pxlen, table->name); + log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s", + net->n.addr, table->name); rte_free_quick(new); } return; @@ -916,13 +1070,13 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct proto_limit *l = ah->rx_limit; - if (l && !old && new) + struct channel_limit *l = &c->rx_limit; + if (l->action && !old && new) { u32 all_routes = stats->imp_routes + stats->filt_routes; if (all_routes >= l->limit) - proto_notify_limit(ah, l, PLD_RX, all_routes); + channel_notify_limit(c, l, PLD_RX, all_routes); if (l->state == PLS_BLOCKED) { @@ -936,11 +1090,11 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr } } - l = ah->in_limit; - if (l && !old_ok && new_ok) + l = &c->in_limit; + if (l->action && !old_ok && new_ok) { if (stats->imp_routes >= l->limit) - proto_notify_limit(ah, l, PLD_IN, stats->imp_routes); + channel_notify_limit(c, l, PLD_IN, stats->imp_routes); if (l->state == PLS_BLOCKED) { @@ -954,13 +1108,13 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr stats->imp_updates_ignored++; rte_trace_in(D_FILTERS, p, new, "ignored [limit]"); - if (ah->in_keep_filtered) + if (c->in_keep_filtered) new->flags |= REF_FILTERED; else { rte_free_quick(new); new = NULL; } /* Note that old && !new could be possible when - ah->in_keep_filtered changed in the recent past. */ + c->in_keep_filtered changed in the recent past. */ if (!old && !new) return; @@ -1064,7 +1218,7 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr } if (new) - new->lastmod = now; + new->lastmod = current_time(); /* Log the route change */ if (p->debug & D_ROUTES) @@ -1092,8 +1246,8 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr if (!net->routes && (table->gc_counter++ >= table->config->gc_max_ops) && - (table->gc_time + table->config->gc_min_time <= now)) - rt_schedule_gc(table); + (table->gc_time + table->config->gc_min_time <= current_time())) + rt_schedule_prune(table); if (old_ok && p->rte_remove) p->rte_remove(net, old); @@ -1142,7 +1296,7 @@ rte_unhide_dummy_routes(net *net, rte **dummy) /** * rte_update - enter a new update to a routing table * @table: table to be updated - * @ah: pointer to table announce hook + * @c: channel doing the update * @net: network node * @p: protocol submitting the update * @src: protocol originating the update @@ -1182,18 +1336,27 @@ rte_unhide_dummy_routes(net *net, rte **dummy) */ void -rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) +rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { - struct proto *p = ah->proto; - struct proto_stats *stats = ah->stats; - struct filter *filter = ah->in_filter; + struct proto *p = c->proto; + struct proto_stats *stats = &c->stats; + struct filter *filter = c->in_filter; ea_list *tmpa = NULL; rte *dummy = NULL; + net *nn; + + ASSERT(c->channel_state == CS_UP); rte_update_lock(); if (new) { - new->sender = ah; + nn = net_get(c->table, n); + + new->net = nn; + new->sender = c; + + if (!new->pref) + new->pref = c->preference; stats->imp_updates_received++; if (!rte_validate(new)) @@ -1208,7 +1371,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) stats->imp_updates_filtered++; rte_trace_in(D_FILTERS, p, new, "filtered out"); - if (! ah->in_keep_filtered) + if (! c->in_keep_filtered) goto drop; /* new is a private copy, i could modify it */ @@ -1226,7 +1389,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) stats->imp_updates_filtered++; rte_trace_in(D_FILTERS, p, new, "filtered out"); - if (! ah->in_keep_filtered) + if (! c->in_keep_filtered) goto drop; new->flags |= REF_FILTERED; @@ -1243,7 +1406,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) { stats->imp_withdraws_received++; - if (!net || !src) + if (!(nn = net_find(c->table, n)) || !src) { stats->imp_withdraws_ignored++; rte_update_unlock(); @@ -1252,9 +1415,9 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) } recalc: - rte_hide_dummy_routes(net, &dummy); - rte_recalculate(ah, net, new, src); - rte_unhide_dummy_routes(net, &dummy); + rte_hide_dummy_routes(nn, &dummy); + rte_recalculate(c, nn, new, src); + rte_unhide_dummy_routes(nn, &dummy); rte_update_unlock(); return; @@ -1266,7 +1429,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) /* Independent call to rte_announce(), used from next hop recalculation, outside of rte_update(). new must be non-NULL */ -static inline void +static inline void rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *new_best, rte *old_best) { @@ -1285,9 +1448,9 @@ rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collec /* Check rtable for best route to given net whether it would be exported do p */ int -rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter) +rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter) { - net *n = net_find(t, prefix, pxlen); + net *n = net_find(t, a); rte *rt = n ? n->routes : NULL; if (!rte_is_valid(rt)) @@ -1315,28 +1478,25 @@ rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter /** * rt_refresh_begin - start a refresh cycle * @t: related routing table - * @ah: related announce hook + * @c related channel * * This function starts a refresh cycle for given routing table and announce * hook. The refresh cycle is a sequence where the protocol sends all its valid * routes to the routing table (by rte_update()). After that, all protocol - * routes (more precisely routes with @ah as @sender) not sent during the + * routes (more precisely routes with @c as @sender) not sent during the * refresh cycle but still in the table from the past are pruned. This is * implemented by marking all related routes as stale by REF_STALE flag in * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct announce_hook *ah) +rt_refresh_begin(rtable *t, struct channel *c) { - net *n; - rte *e; - - FIB_WALK(&t->fib, fn) + FIB_WALK(&t->fib, net, n) { - n = (net *) fn; + rte *e; for (e = n->routes; e; e = e->next) - if (e->sender == ah) + if (e->sender == c) e->flags |= REF_STALE; } FIB_WALK_END; @@ -1345,23 +1505,21 @@ rt_refresh_begin(rtable *t, struct announce_hook *ah) /** * rt_refresh_end - end a refresh cycle * @t: related routing table - * @ah: related announce hook + * @c: related channel * - * This function starts a refresh cycle for given routing table and announce + * This function ends a refresh cycle for given routing table and announce * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct announce_hook *ah) +rt_refresh_end(rtable *t, struct channel *c) { int prune = 0; - net *n; - rte *e; - FIB_WALK(&t->fib, fn) + FIB_WALK(&t->fib, net, n) { - n = (net *) fn; + rte *e; for (e = n->routes; e; e = e->next) - if ((e->sender == ah) && (e->flags & REF_STALE)) + if ((e->sender == c) && (e->flags & REF_STALE)) { e->flags |= REF_DISCARD; prune = 1; @@ -1384,8 +1542,8 @@ void rte_dump(rte *e) { net *n = e->net; - debug("%-1I/%2d ", n->n.prefix, n->n.pxlen); - debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod); + debug("%-1N ", n->n.addr); + debug("KF=%02x PF=%02x pref=%d ", n->n.flags, e->pflags, e->pref); rta_dump(e->attrs); if (e->attrs->src->proto->proto->dump_attrs) e->attrs->src->proto->proto->dump_attrs(e); @@ -1401,23 +1559,17 @@ rte_dump(rte *e) void rt_dump(rtable *t) { - rte *e; - net *n; - struct announce_hook *a; - debug("Dump of routing table <%s>\n", t->name); #ifdef DEBUGGING fib_check(&t->fib); #endif - FIB_WALK(&t->fib, fn) + FIB_WALK(&t->fib, net, n) { - n = (net *) fn; + rte *e; for(e=n->routes; e; e=e->next) rte_dump(e); } FIB_WALK_END; - WALK_LIST(a, t->hooks) - debug("\tAnnounces routes to protocol %s\n", a->proto->name); debug("\n"); } @@ -1436,23 +1588,6 @@ rt_dump_all(void) } static inline void -rt_schedule_prune(rtable *tab) -{ - rt_mark_for_prune(tab); - ev_schedule(tab->rt_event); -} - -static inline void -rt_schedule_gc(rtable *tab) -{ - if (tab->gc_scheduled) - return; - - tab->gc_scheduled = 1; - ev_schedule(tab->rt_event); -} - -static inline void rt_schedule_hcu(rtable *tab) { if (tab->hcu_scheduled) @@ -1465,51 +1600,34 @@ rt_schedule_hcu(rtable *tab) static inline void rt_schedule_nhu(rtable *tab) { - if (tab->nhu_state == 0) + if (tab->nhu_state == NHU_CLEAN) ev_schedule(tab->rt_event); - /* state change 0->1, 2->3 */ - tab->nhu_state |= 1; + /* state change: + * NHU_CLEAN -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_DIRTY + */ + tab->nhu_state |= NHU_SCHEDULED; } - -static void -rt_prune_nets(rtable *tab) +void +rt_schedule_prune(rtable *tab) { - struct fib_iterator fit; - int ncnt = 0, ndel = 0; - -#ifdef DEBUGGING - fib_check(&tab->fib); -#endif - - FIB_ITERATE_INIT(&fit, &tab->fib); -again: - FIB_ITERATE_START(&tab->fib, &fit, f) - { - net *n = (net *) f; - ncnt++; - if (!n->routes) /* Orphaned FIB entry */ - { - FIB_ITERATE_PUT(&fit, f); - fib_delete(&tab->fib, f); - ndel++; - goto again; - } - } - FIB_ITERATE_END(f); - DBG("Pruned %d of %d networks\n", ndel, ncnt); + if (tab->prune_state == 0) + ev_schedule(tab->rt_event); - tab->gc_counter = 0; - tab->gc_time = now; - tab->gc_scheduled = 0; + /* state change 0->1, 2->3 */ + tab->prune_state |= 1; } + static void rt_event(void *ptr) { rtable *tab = ptr; + rt_lock_table(tab); + if (tab->hcu_scheduled) rt_update_hostcache(tab); @@ -1517,35 +1635,25 @@ rt_event(void *ptr) rt_next_hop_update(tab); if (tab->prune_state) - if (!rt_prune_table(tab)) - { - /* Table prune unfinished */ - ev_schedule(tab->rt_event); - return; - } + rt_prune_table(tab); - if (tab->gc_scheduled) - { - rt_prune_nets(tab); - rt_prune_sources(); // FIXME this should be moved to independent event - } + rt_unlock_table(tab); } void -rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf) +rt_setup(pool *p, rtable *t, struct rtable_config *cf) { bzero(t, sizeof(*t)); - fib_init(&t->fib, p, sizeof(net), 0, rte_init); - t->name = name; + t->name = cf->name; t->config = cf; - init_list(&t->hooks); - if (cf) - { - t->rt_event = ev_new(p); - t->rt_event->hook = rt_event; - t->rt_event->data = t; - t->gc_time = now; - } + t->addr_type = cf->addr_type; + fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + init_list(&t->channels); + + t->rt_event = ev_new(p); + t->rt_event->hook = rt_event; + t->rt_event->data = t; + t->gc_time = current_time(); } /** @@ -1559,121 +1667,123 @@ rt_init(void) { rta_init(); rt_table_pool = rp_new(&root_pool, "Routing tables"); - rte_update_pool = lp_new(rt_table_pool, 4080); + rte_update_pool = lp_new_default(rt_table_pool); rte_slab = sl_new(rt_table_pool, sizeof(rte)); init_list(&routing_tables); } -static int -rt_prune_step(rtable *tab, int *limit) +/** + * rt_prune_table - prune a routing table + * + * The prune loop scans routing tables and removes routes belonging to flushing + * protocols, discarded routes and also stale network entries. It is called from + * rt_event(). The event is rescheduled if the current iteration do not finish + * the table. The pruning is directed by the prune state (@prune_state), + * specifying whether the prune cycle is scheduled or running, and there + * is also a persistent pruning iterator (@prune_fit). + * + * The prune loop is used also for channel flushing. For this purpose, the + * channels to flush are marked before the iteration and notified after the + * iteration. + */ +static void +rt_prune_table(rtable *tab) { struct fib_iterator *fit = &tab->prune_fit; + int limit = 512; + + struct channel *c; + node *n, *x; DBG("Pruning route table %s\n", tab->name); #ifdef DEBUGGING fib_check(&tab->fib); #endif - if (tab->prune_state == RPS_NONE) - return 1; + if (tab->prune_state == 0) + return; - if (tab->prune_state == RPS_SCHEDULED) - { - FIB_ITERATE_INIT(fit, &tab->fib); - tab->prune_state = RPS_RUNNING; - } + if (tab->prune_state == 1) + { + /* Mark channels to flush */ + WALK_LIST2(c, n, tab->channels, table_node) + if (c->channel_state == CS_FLUSHING) + c->flush_active = 1; + + FIB_ITERATE_INIT(fit, &tab->fib); + tab->prune_state = 2; + } again: - FIB_ITERATE_START(&tab->fib, fit, fn) + FIB_ITERATE_START(&tab->fib, fit, net, n) { - net *n = (net *) fn; rte *e; rescan: for (e=n->routes; e; e=e->next) - if (e->sender->proto->flushing || (e->flags & REF_DISCARD)) + if (e->sender->flush_active || (e->flags & REF_DISCARD)) { - if (*limit <= 0) + if (limit <= 0) { - FIB_ITERATE_PUT(fit, fn); - return 0; + FIB_ITERATE_PUT(fit); + ev_schedule(tab->rt_event); + return; } rte_discard(e); - (*limit)--; + limit--; goto rescan; } + if (!n->routes) /* Orphaned FIB entry */ { - FIB_ITERATE_PUT(fit, fn); - fib_delete(&tab->fib, fn); + FIB_ITERATE_PUT(fit); + fib_delete(&tab->fib, n); goto again; } } - FIB_ITERATE_END(fn); + FIB_ITERATE_END; #ifdef DEBUGGING fib_check(&tab->fib); #endif - tab->prune_state = RPS_NONE; - return 1; -} + tab->gc_counter = 0; + tab->gc_time = current_time(); -/** - * rt_prune_table - prune a routing table - * @tab: a routing table for pruning - * - * This function scans the routing table @tab and removes routes belonging to - * flushing protocols, discarded routes and also stale network entries, in a - * similar fashion like rt_prune_loop(). Returns 1 when all such routes are - * pruned. Contrary to rt_prune_loop(), this function is not a part of the - * protocol flushing loop, but it is called from rt_event() for just one routing - * table. - * - * Note that rt_prune_table() and rt_prune_loop() share (for each table) the - * prune state (@prune_state) and also the pruning iterator (@prune_fit). - */ -static inline int -rt_prune_table(rtable *tab) -{ - int limit = 512; - return rt_prune_step(tab, &limit); -} + /* state change 2->0, 3->1 */ + tab->prune_state &= 1; -/** - * rt_prune_loop - prune routing tables - * - * The prune loop scans routing tables and removes routes belonging to flushing - * protocols, discarded routes and also stale network entries. Returns 1 when - * all such routes are pruned. It is a part of the protocol flushing loop. - */ -int -rt_prune_loop(void) -{ - int limit = 512; - rtable *t; + if (tab->prune_state > 0) + ev_schedule(tab->rt_event); - WALK_LIST(t, routing_tables) - if (! rt_prune_step(t, &limit)) - return 0; + /* FIXME: This should be handled in a better way */ + rt_prune_sources(); - return 1; + /* Close flushed channels */ + WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) + if (c->flush_active) + { + c->flush_active = 0; + channel_set_state(c, CS_DOWN); + } + + return; } void rt_preconfig(struct config *c) { - struct symbol *s = cf_get_symbol("master"); - init_list(&c->tables); - c->master_rtc = rt_new_table(s); + + rt_new_table(cf_get_symbol("master4"), NET_IP4); + rt_new_table(cf_get_symbol("master6"), NET_IP6); } -/* +/* * Some functions for handing internal next hop updates * triggered by rt_schedule_nhu(). */ @@ -1689,33 +1799,105 @@ rta_next_hop_outdated(rta *a) if (!he->src) return a->dest != RTD_UNREACHABLE; - return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) || - (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - !mpnh_same(a->nexthops, he->src->nexthops); + return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || + (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); } -static inline void -rta_apply_hostentry(rta *a, struct hostentry *he) +void +rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) { a->hostentry = he; - a->iface = he->src ? he->src->iface : NULL; - a->gw = he->gw; a->dest = he->dest; a->igp_metric = he->igp_metric; - a->nexthops = he->src ? he->src->nexthops : NULL; + + if (a->dest != RTD_UNICAST) + { + /* No nexthop */ +no_nexthop: + a->nh = (struct nexthop) {}; + if (mls) + { /* Store the label stack for later changes */ + a->nh.labels_orig = a->nh.labels = mls->len; + memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); + } + return; + } + + if (((!mls) || (!mls->len)) && he->nexthop_linkable) + { /* Just link the nexthop chain, no label append happens. */ + memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + return; + } + + struct nexthop *nhp = NULL, *nhr = NULL; + int skip_nexthop = 0; + + for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + { + if (skip_nexthop) + skip_nexthop--; + else + { + nhr = nhp; + nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + } + + nhp->iface = nh->iface; + nhp->weight = nh->weight; + if (mls) + { + nhp->labels = nh->labels + mls->len; + nhp->labels_orig = mls->len; + if (nhp->labels <= MPLS_MAX_LABEL_STACK) + { + memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ + memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ + } + else + { + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); + skip_nexthop++; + continue; + } + } + if (ipa_nonzero(nh->gw)) + { + nhp->gw = nh->gw; /* Router nexthop */ + nhp->flags |= (nh->flags & RNF_ONLINK); + } + else if (ipa_nonzero(he->link)) + nhp->gw = he->link; /* Device nexthop with link-local address known */ + else + nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + } + + if (skip_nexthop) + if (nhr) + nhr->next = NULL; + else + { + a->dest = RTD_UNREACHABLE; + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + goto no_nexthop; + } } static inline rte * rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) { - rta a; - memcpy(&a, old->attrs, sizeof(rta)); - rta_apply_hostentry(&a, old->attrs->hostentry); - a.aflags = 0; + rta *a = alloca(RTA_MAX_SIZE); + memcpy(a, old->attrs, rta_size(old->attrs)); + + mpls_label_stack mls = { .len = a->nh.labels_orig }; + memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); + + rta_apply_hostentry(a, old->attrs->hostentry, &mls); + a->aflags = 0; rte *e = sl_alloc(rte_slab); memcpy(e, old, sizeof(rte)); - e->attrs = rta_lookup(&a); + e->attrs = rta_lookup(a); return e; } @@ -1796,49 +1978,61 @@ rt_next_hop_update(rtable *tab) struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; - if (tab->nhu_state == 0) + if (tab->nhu_state == NHU_CLEAN) return; - if (tab->nhu_state == 1) + if (tab->nhu_state == NHU_SCHEDULED) { FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = 2; + tab->nhu_state = NHU_RUNNING; } - FIB_ITERATE_START(&tab->fib, fit, fn) + FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { - FIB_ITERATE_PUT(fit, fn); + FIB_ITERATE_PUT(fit); ev_schedule(tab->rt_event); return; } - max_feed -= rt_next_hop_update_net(tab, (net *) fn); + max_feed -= rt_next_hop_update_net(tab, n); } - FIB_ITERATE_END(fn); + FIB_ITERATE_END; - /* state change 2->0, 3->1 */ + /* State change: + * NHU_DIRTY -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_CLEAN + */ tab->nhu_state &= 1; - if (tab->nhu_state > 0) + if (tab->nhu_state != NHU_CLEAN) ev_schedule(tab->rt_event); } struct rtable_config * -rt_new_table(struct symbol *s) +rt_new_table(struct symbol *s, uint addr_type) { /* Hack that allows to 'redefine' the master table */ - if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc)) + if ((s->class == SYM_TABLE) && + (s->def == new_config->def_tables[addr_type]) && + ((addr_type == NET_IP4) || (addr_type == NET_IP6))) return s->def; struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config)); cf_define_symbol(s, SYM_TABLE, c); c->name = s->name; - add_tail(&new_config->tables, &c->n); + c->addr_type = addr_type; c->gc_max_ops = 1000; c->gc_min_time = 5; + + add_tail(&new_config->tables, &c->n); + + /* First table of each type is kept as default */ + if (! new_config->def_tables[addr_type]) + new_config->def_tables[addr_type] = c; + return c; } @@ -1935,7 +2129,7 @@ rt_commit(struct config *new, struct config *old) { rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable)); DBG("\t%s: created\n", r->name); - rt_setup(rt_table_pool, t, r->name, r); + rt_setup(rt_table_pool, t, r); add_tail(&routing_tables, &t->n); r->table = t; } @@ -1943,119 +2137,104 @@ rt_commit(struct config *new, struct config *old) } static inline void -do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e) +do_feed_channel(struct channel *c, net *n, rte *e) { rte_update_lock(); - if (type == RA_ACCEPTED) - rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1); - else if (type == RA_MERGED) - rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding); - else - rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding); + if (c->ra_mode == RA_ACCEPTED) + rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1); + else if (c->ra_mode == RA_MERGED) + rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding); + else /* RA_BASIC */ + rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding); rte_update_unlock(); } /** - * rt_feed_baby - advertise routes to a new protocol - * @p: protocol to be fed + * rt_feed_channel - advertise all routes to a channel + * @c: channel to be fed * - * This function performs one pass of advertisement of routes to a newly - * initialized protocol. It's called by the protocol code as long as it - * has something to do. (We avoid transferring all the routes in single - * pass in order not to monopolize CPU time.) + * This function performs one pass of advertisement of routes to a channel that + * is in the ES_FEEDING state. It is called by the protocol code as long as it + * has something to do. (We avoid transferring all the routes in single pass in + * order not to monopolize CPU time.) */ int -rt_feed_baby(struct proto *p) +rt_feed_channel(struct channel *c) { - struct announce_hook *h; - struct fib_iterator *fit; + struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - if (!p->feed_ahook) /* Need to initialize first */ + ASSERT(c->export_state == ES_FEEDING); + + if (!c->feed_active) { - if (!p->ahooks) - return 1; - DBG("Announcing routes to new protocol %s\n", p->name); - p->feed_ahook = p->ahooks; - fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator)); - goto next_hook; + FIB_ITERATE_INIT(fit, &c->table->fib); + c->feed_active = 1; } - fit = p->feed_iterator; -again: - h = p->feed_ahook; - FIB_ITERATE_START(&h->table->fib, fit, fn) + FIB_ITERATE_START(&c->table->fib, fit, net, n) { - net *n = (net *) fn; rte *e = n->routes; if (max_feed <= 0) { - FIB_ITERATE_PUT(fit, fn); + FIB_ITERATE_PUT(fit); return 0; } - /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */ + /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */ - if ((p->accept_ra_types == RA_OPTIMAL) || - (p->accept_ra_types == RA_ACCEPTED) || - (p->accept_ra_types == RA_MERGED)) + if ((c->ra_mode == RA_OPTIMAL) || + (c->ra_mode == RA_ACCEPTED) || + (c->ra_mode == RA_MERGED)) if (rte_is_valid(e)) { - if (p->export_state != ES_FEEDING) - return 1; /* In the meantime, the protocol fell down. */ + /* In the meantime, the protocol may fell down */ + if (c->export_state != ES_FEEDING) + goto done; - do_feed_baby(p, p->accept_ra_types, h, n, e); + do_feed_channel(c, n, e); max_feed--; } - if (p->accept_ra_types == RA_ANY) + if (c->ra_mode == RA_ANY) for(e = n->routes; e; e = e->next) { - if (p->export_state != ES_FEEDING) - return 1; /* In the meantime, the protocol fell down. */ + /* In the meantime, the protocol may fell down */ + if (c->export_state != ES_FEEDING) + goto done; if (!rte_is_valid(e)) continue; - do_feed_baby(p, RA_ANY, h, n, e); + do_feed_channel(c, n, e); max_feed--; } } - FIB_ITERATE_END(fn); - p->feed_ahook = h->next; - if (!p->feed_ahook) - { - mb_free(p->feed_iterator); - p->feed_iterator = NULL; - return 1; - } + FIB_ITERATE_END; -next_hook: - h = p->feed_ahook; - FIB_ITERATE_INIT(fit, &h->table->fib); - goto again; +done: + c->feed_active = 0; + return 1; } /** * rt_feed_baby_abort - abort protocol feeding - * @p: protocol + * @c: channel * - * This function is called by the protocol code when the protocol - * stops or ceases to exist before the last iteration of rt_feed_baby() - * has finished. + * This function is called by the protocol code when the protocol stops or + * ceases to exist during the feeding. */ void -rt_feed_baby_abort(struct proto *p) +rt_feed_channel_abort(struct channel *c) { - if (p->feed_ahook) + if (c->feed_active) { - /* Unlink the iterator and exit */ - fit_get(&p->feed_ahook->table->fib, p->feed_iterator); - p->feed_ahook = NULL; + /* Unlink the iterator */ + fit_get(&c->table->fib, &c->feed_fit); + c->feed_active = 0; } } - static inline unsigned ptr_hash(void *ptr) { @@ -2063,10 +2242,10 @@ ptr_hash(void *ptr) return p ^ (p << 8) ^ (p >> 16); } -static inline unsigned +static inline u32 hc_hash(ip_addr a, rtable *dep) { - return (ipa_hash(a) ^ ptr_hash(dep)) & 0xffff; + return ipa_hash(a) ^ ptr_hash(dep); } static inline void @@ -2100,7 +2279,7 @@ hc_alloc_table(struct hostcache *hc, unsigned order) { uint hsize = 1 << order; hc->hash_order = order; - hc->hash_shift = 16 - order; + hc->hash_shift = 32 - order; hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK); hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK); @@ -2130,12 +2309,12 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig { struct hostentry *he = sl_alloc(hc->slab); - he->addr = a; - he->link = ll; - he->tab = dep; - he->hash_key = k; - he->uc = 0; - he->src = NULL; + *he = (struct hostentry) { + .addr = a, + .link = ll, + .tab = dep, + .hash_key = k, + }; add_tail(&hc->hostentries, &he->ln); hc_insert(hc, he); @@ -2171,7 +2350,7 @@ rt_init_hostcache(rtable *tab) hc_alloc_table(hc, HC_DEF_ORDER); hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry)); - hc->lp = lp_new(rt_table_pool, 1008); + hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024)); hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node)); tab->hostcache = hc; @@ -2201,12 +2380,10 @@ rt_free_hostcache(rtable *tab) static void rt_notify_hostcache(rtable *tab, net *net) { - struct hostcache *hc = tab->hostcache; - if (tab->hcu_scheduled) return; - if (trie_match_prefix(hc->trie, net->n.prefix, net->n.pxlen)) + if (trie_match_net(tab->hostcache->trie, net->n.addr)) rt_schedule_hcu(tab); } @@ -2222,7 +2399,7 @@ if_local_addr(ip_addr a, struct iface *i) return 0; } -static u32 +static u32 rt_get_igp_metric(rte *rt) { eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC); @@ -2244,8 +2421,7 @@ rt_get_igp_metric(rte *rt) return rt->u.rip.metric; #endif - /* Device routes */ - if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH)) + if (a->source == RTS_DEVICE) return 0; return IGP_METRIC_UNKNOWN; @@ -2255,57 +2431,58 @@ static int rt_update_hostentry(rtable *tab, struct hostentry *he) { rta *old_src = he->src; + int direct = 0; int pxlen = 0; - /* Reset the hostentry */ + /* Reset the hostentry */ he->src = NULL; - he->gw = IPA_NONE; he->dest = RTD_UNREACHABLE; + he->nexthop_linkable = 0; he->igp_metric = 0; - net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH); + net_addr he_addr; + net_fill_ip_host(&he_addr, he->addr); + net *n = net_route(tab, &he_addr); if (n) { rte *e = n->routes; rta *a = e->attrs; - pxlen = n->n.pxlen; + pxlen = n->n.addr->pxlen; if (a->hostentry) { /* Recursive route should not depend on another recursive route */ - log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d", - he->addr, n->n.prefix, pxlen); + log(L_WARN "Next hop address %I resolvable through recursive route for %N", + he->addr, n->n.addr); goto done; } - if (a->dest == RTD_DEVICE) - { - if (if_local_addr(he->addr, a->iface)) - { - /* The host address is a local address, this is not valid */ - log(L_WARN "Next hop address %I is a local address of iface %s", - he->addr, a->iface->name); - goto done; - } - - /* The host is directly reachable, use link as a gateway */ - he->gw = he->link; - he->dest = RTD_ROUTER; - } - else + if (a->dest == RTD_UNICAST) { - /* The host is reachable through some route entry */ - he->gw = a->gw; - he->dest = a->dest; + for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + if (ipa_zero(nh->gw)) + { + if (if_local_addr(he->addr, nh->iface)) + { + /* The host address is a local address, this is not valid */ + log(L_WARN "Next hop address %I is a local address of iface %s", + he->addr, nh->iface->name); + goto done; + } + + direct++; + } } he->src = rta_clone(a); + he->dest = a->dest; + he->nexthop_linkable = !direct; he->igp_metric = rt_get_igp_metric(e); } - done: +done: /* Add a prefix range to the trie */ - trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH); + trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen); rta_free(old_src); return old_src != he->src; @@ -2338,7 +2515,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -static struct hostentry * +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; @@ -2346,277 +2523,17 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) if (!tab->hostcache) rt_init_hostcache(tab); - uint k = hc_hash(a, dep); + u32 k = hc_hash(a, dep); struct hostcache *hc = tab->hostcache; for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next) if (ipa_equal(he->addr, a) && (he->tab == dep)) return he; - he = hc_new_hostentry(hc, a, ll, dep, k); + he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k); rt_update_hostentry(tab, he); return he; } -void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep)); -} - - -/* - * CLI commands - */ - -static byte * -rt_format_via(rte *e) -{ - rta *a = e->attrs; - - /* Max text length w/o IP addr and interface name is 16 */ - static byte via[STD_ADDRESS_P_LENGTH+sizeof(a->iface->name)+16]; - - switch (a->dest) - { - case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break; - case RTD_DEVICE: bsprintf(via, "dev %s", a->iface->name); break; - case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break; - case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break; - case RTD_PROHIBIT: bsprintf(via, "prohibited"); break; - case RTD_MULTIPATH: bsprintf(via, "multipath"); break; - default: bsprintf(via, "???"); - } - return via; -} - -static void -rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa) -{ - byte from[STD_ADDRESS_P_LENGTH+8]; - byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; - rta *a = e->attrs; - int primary = (e->net->routes == e); - int sync_error = (e->net->n.flags & KRF_SYNC_ERROR); - void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); - struct mpnh *nh; - - tm_format_datetime(tm, &config->tf_route, e->lastmod); - if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw)) - bsprintf(from, " from %I", a->from); - else - from[0] = 0; - - get_route_info = a->src->proto->proto->get_route_info; - if (get_route_info || d->verbose) - { - /* Need to normalize the extended attributes */ - ea_list *t = tmpa; - t = ea_append(t, a->eattrs); - tmpa = alloca(ea_scan(t)); - ea_merge(t, tmpa); - ea_sort(tmpa); - } - if (get_route_info) - get_route_info(e, info, tmpa); - else - bsprintf(info, " (%d)", e->pref); - cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name, - tm, from, primary ? (sync_error ? " !" : " *") : "", info); - for (nh = a->nexthops; nh; nh = nh->next) - cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1); - if (d->verbose) - rta_show(c, a, tmpa); -} - -static void -rt_show_net(struct cli *c, net *n, struct rt_show_data *d) -{ - rte *e, *ee; - byte ia[STD_ADDRESS_P_LENGTH+8]; - struct ea_list *tmpa; - struct announce_hook *a = NULL; - int first = 1; - int pass = 0; - - bsprintf(ia, "%I/%d", n->n.prefix, n->n.pxlen); - - if (d->export_mode) - { - if (! d->export_protocol->rt_notify) - return; - - a = proto_find_announce_hook(d->export_protocol, d->table); - if (!a) - return; - } - - for (e = n->routes; e; e = e->next) - { - if (rte_is_filtered(e) != d->filtered) - continue; - - d->rt_counter++; - d->net_counter += first; - first = 0; - - if (pass) - continue; - - ee = e; - rte_update_lock(); /* We use the update buffer for filtering */ - tmpa = rte_make_tmp_attrs(e, rte_update_pool); - - /* Special case for merged export */ - if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED)) - { - rte *rt_free; - e = rt_export_merged(a, n, &rt_free, &tmpa, rte_update_pool, 1); - pass = 1; - - if (!e) - { e = ee; goto skip; } - } - else if (d->export_mode) - { - struct proto *ep = d->export_protocol; - int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0; - - if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED) - pass = 1; - - if (ic < 0) - goto skip; - - if (d->export_mode > RSEM_PREEXPORT) - { - /* - * FIXME - This shows what should be exported according to current - * filters, but not what was really exported. 'configure soft' - * command may change the export filter and do not update routes. - */ - int do_export = (ic > 0) || - (f_run(a->out_filter, &e, &tmpa, rte_update_pool, - FF_FORCE_TMPATTR | FF_SILENT) <= F_ACCEPT); - - if (do_export != (d->export_mode == RSEM_EXPORT)) - goto skip; - - if ((d->export_mode == RSEM_EXPORT) && (ep->accept_ra_types == RA_ACCEPTED)) - pass = 1; - } - } - - if (d->show_protocol && (d->show_protocol != e->attrs->src->proto)) - goto skip; - - if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT) - goto skip; - - d->show_counter++; - if (d->stats < 2) - rt_show_rte(c, ia, e, d, tmpa); - ia[0] = 0; - - skip: - if (e != ee) - { - rte_free(e); - e = ee; - } - rte_update_unlock(); - - if (d->primary_only) - break; - } -} - -static void -rt_show_cont(struct cli *c) -{ - struct rt_show_data *d = c->rover; -#ifdef DEBUGGING - unsigned max = 4; -#else - unsigned max = 64; -#endif - struct fib *fib = &d->table->fib; - struct fib_iterator *it = &d->fit; - - FIB_ITERATE_START(fib, it, f) - { - net *n = (net *) f; - if (d->running_on_config && d->running_on_config != config) - { - cli_printf(c, 8004, "Stopped due to reconfiguration"); - goto done; - } - if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN)) - { - cli_printf(c, 8005, "Protocol is down"); - goto done; - } - if (!max--) - { - FIB_ITERATE_PUT(it, f); - return; - } - rt_show_net(c, n, d); - } - FIB_ITERATE_END(f); - if (d->stats) - cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter); - else - cli_printf(c, 0, ""); -done: - c->cont = c->cleanup = NULL; -} - -static void -rt_show_cleanup(struct cli *c) -{ - struct rt_show_data *d = c->rover; - - /* Unlink the iterator */ - fit_get(&d->table->fib, &d->fit); -} - -void -rt_show(struct rt_show_data *d) -{ - net *n; - - /* Default is either a master table or a table related to a respective protocol */ - if (!d->table && d->export_protocol) d->table = d->export_protocol->table; - if (!d->table && d->show_protocol) d->table = d->show_protocol->table; - if (!d->table) d->table = config->master_rtc->table; - - /* Filtered routes are neither exported nor have sensible ordering */ - if (d->filtered && (d->export_mode || d->primary_only)) - cli_msg(0, ""); - - if (d->pxlen == 256) - { - FIB_ITERATE_INIT(&d->fit, &d->table->fib); - this_cli->cont = rt_show_cont; - this_cli->cleanup = rt_show_cleanup; - this_cli->rover = d; - } - else - { - if (d->show_for) - n = net_route(d->table, d->prefix, d->pxlen); - else - n = net_find(d->table, d->prefix, d->pxlen); - - if (n) - rt_show_net(this_cli, n, d); - - if (d->rt_counter) - cli_msg(0, ""); - else - cli_msg(8001, "Network not in table"); - } -} /* * Documentation for functions declared inline in route.h @@ -2627,26 +2544,24 @@ rt_show(struct rt_show_data *d) * net_find - find a network entry * @tab: a routing table * @addr: address of the network - * @len: length of the network prefix * * net_find() looks up the given network in routing table @tab and * returns a pointer to its &net entry or %NULL if no such network * exists. */ -static inline net *net_find(rtable *tab, ip_addr addr, unsigned len) +static inline net *net_find(rtable *tab, net_addr *addr) { DUMMY; } /** * net_get - obtain a network entry * @tab: a routing table * @addr: address of the network - * @len: length of the network prefix * * net_get() looks up the given network in routing table @tab and * returns a pointer to its &net entry. If no such entry exists, it's * created. */ -static inline net *net_get(rtable *tab, ip_addr addr, unsigned len) +static inline net *net_get(rtable *tab, net_addr *addr) { DUMMY; } /** @@ -4,7 +4,8 @@ C bfd C bgp C ospf C pipe -C rip C radv +C rip +C rpki C static S ../nest/rt-dev.c diff --git a/proto/babel/Makefile b/proto/babel/Makefile index 400ffbac..a5b4a13b 100644 --- a/proto/babel/Makefile +++ b/proto/babel/Makefile @@ -1,5 +1,6 @@ -source=babel.c packets.c -root-rel=../../ -dir-name=proto/babel +src := babel.c packets.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) -include ../../Rules +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 38be6909..88c4711e 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -2,6 +2,8 @@ * BIRD -- The Babel protocol * * Copyright (c) 2015--2016 Toke Hoiland-Jorgensen + * (c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2016--2017 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. * @@ -29,17 +31,14 @@ * * The main route selection is done in babel_select_route(). This is called when * an entry is updated by receiving updates from the network or when modified by - * internal timers. It performs feasibility checks on the available routes for - * the prefix and selects the one with the lowest metric to be announced to the - * core. + * internal timers. The function selects from feasible and reachable routes the + * one with the lowest metric to be announced to the core. */ #include <stdlib.h> #include "babel.h" -#define OUR_ROUTE(r) (r->neigh == NULL) - /* * Is one number greater or equal than another mod 2^16? This is based on the * definition of serial number space in RFC 1982. Note that arguments are of @@ -48,47 +47,49 @@ static inline int ge_mod64k(uint a, uint b) { return (u16)(a - b) < 0x8000; } -static void babel_dump_entry(struct babel_entry *e); -static void babel_dump_route(struct babel_route *r); -static void babel_select_route(struct babel_entry *e); -static void babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n); -static void babel_send_wildcard_request(struct babel_iface *ifa); -static int babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen, - u64 router_id, u16 seqno); -static void babel_trigger_iface_update(struct babel_iface *ifa); -static void babel_trigger_update(struct babel_proto *p); -static void babel_send_seqno_request(struct babel_entry *e); +static void babel_expire_requests(struct babel_proto *p, struct babel_entry *e); +static void babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod); +static inline void babel_announce_retraction(struct babel_proto *p, struct babel_entry *e); +static void babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n); +static void babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr); +static void babel_update_cost(struct babel_neighbor *n); static inline void babel_kick_timer(struct babel_proto *p); static inline void babel_iface_kick_timer(struct babel_iface *ifa); +static inline void babel_lock_neighbor(struct babel_neighbor *nbr) +{ if (nbr) nbr->uc++; } + +static inline void babel_unlock_neighbor(struct babel_neighbor *nbr) +{ if (nbr && !--nbr->uc) mb_free(nbr); } + /* * Functions to maintain data structures */ static void -babel_init_entry(struct fib_node *n) +babel_init_entry(void *E) { - struct babel_entry *e = (void *) n; - e->proto = NULL; - e->selected_in = NULL; - e->selected_out = NULL; - e->updated = now; + struct babel_entry *e = E; + + e->updated = current_time(); + init_list(&e->requests); init_list(&e->sources); init_list(&e->routes); } static inline struct babel_entry * -babel_find_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +babel_find_entry(struct babel_proto *p, const net_addr *n) { - return fib_find(&p->rtable, &prefix, plen); + struct fib *rtable = (n->type == NET_IP4) ? &p->ip4_rtable : &p->ip6_rtable; + return fib_find(rtable, n); } static struct babel_entry * -babel_get_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +babel_get_entry(struct babel_proto *p, const net_addr *n) { - struct babel_entry *e = fib_get(&p->rtable, &prefix, plen); - e->proto = p; + struct fib *rtable = (n->type == NET_IP4) ? &p->ip4_rtable : &p->ip6_rtable; + struct babel_entry *e = fib_get(rtable, n); return e; } @@ -105,9 +106,8 @@ babel_find_source(struct babel_entry *e, u64 router_id) } static struct babel_source * -babel_get_source(struct babel_entry *e, u64 router_id) +babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id) { - struct babel_proto *p = e->proto; struct babel_source *s = babel_find_source(e, router_id); if (s) @@ -115,7 +115,7 @@ babel_get_source(struct babel_entry *e, u64 router_id) s = sl_alloc(p->source_slab); s->router_id = router_id; - s->expires = now + BABEL_GARBAGE_INTERVAL; + s->expires = current_time() + BABEL_GARBAGE_INTERVAL; s->seqno = 0; s->metric = BABEL_INFINITY; add_tail(&e->sources, NODE s); @@ -124,14 +124,14 @@ babel_get_source(struct babel_entry *e, u64 router_id) } static void -babel_expire_sources(struct babel_entry *e) +babel_expire_sources(struct babel_proto *p, struct babel_entry *e) { - struct babel_proto *p = e->proto; struct babel_source *n, *nx; + btime now_ = current_time(); WALK_LIST_DELSAFE(n, nx, e->sources) { - if (n->expires && n->expires <= now) + if (n->expires && n->expires <= now_) { rem_node(NODE n); sl_free(p->source_slab, n); @@ -152,9 +152,8 @@ babel_find_route(struct babel_entry *e, struct babel_neighbor *n) } static struct babel_route * -babel_get_route(struct babel_entry *e, struct babel_neighbor *nbr) +babel_get_route(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *nbr) { - struct babel_proto *p = e->proto; struct babel_route *r = babel_find_route(e, nbr); if (r) @@ -162,94 +161,91 @@ babel_get_route(struct babel_entry *e, struct babel_neighbor *nbr) r = sl_alloc(p->route_slab); memset(r, 0, sizeof(*r)); + r->e = e; + r->neigh = nbr; add_tail(&e->routes, NODE r); - - if (nbr) - { - r->neigh = nbr; - r->expires = now + BABEL_GARBAGE_INTERVAL; - add_tail(&nbr->routes, NODE &r->neigh_route); - } + add_tail(&nbr->routes, NODE &r->neigh_route); return r; } -static void -babel_flush_route(struct babel_route *r) +static inline void +babel_retract_route(struct babel_proto *p, struct babel_route *r) { - struct babel_proto *p = r->e->proto; - - DBG("Babel: Flush route %I/%d router_id %lR neigh %I\n", - r->e->n.prefix, r->e->n.pxlen, r->router_id, r->neigh ? r->neigh->addr : IPA_NONE); + r->metric = r->advert_metric = BABEL_INFINITY; - rem_node(NODE r); + if (r == r->e->selected) + babel_select_route(p, r->e, r); +} - if (r->neigh) - rem_node(&r->neigh_route); +static void +babel_flush_route(struct babel_proto *p, struct babel_route *r) +{ + DBG("Babel: Flush route %N router_id %lR neigh %I\n", + r->e->n.addr, r->router_id, r->neigh->addr); - if (r->e->selected_in == r) - r->e->selected_in = NULL; + rem_node(NODE r); + rem_node(&r->neigh_route); - if (r->e->selected_out == r) - r->e->selected_out = NULL; + if (r->e->selected == r) + r->e->selected = NULL; sl_free(p->route_slab, r); } static void -babel_expire_route(struct babel_route *r) +babel_expire_route(struct babel_proto *p, struct babel_route *r) { - struct babel_proto *p = r->e->proto; - struct babel_entry *e = r->e; + struct babel_config *cf = (void *) p->p.cf; - TRACE(D_EVENTS, "Route expiry timer for %I/%d router-id %lR fired", - e->n.prefix, e->n.pxlen, r->router_id); + TRACE(D_EVENTS, "Route expiry timer for %N router-id %lR fired", + r->e->n.addr, r->router_id); if (r->metric < BABEL_INFINITY) { - r->metric = BABEL_INFINITY; - r->expires = now + r->expiry_interval; + r->metric = r->advert_metric = BABEL_INFINITY; + r->expires = current_time() + cf->hold_time; } else { - babel_flush_route(r); + babel_flush_route(p, r); } } static void -babel_refresh_route(struct babel_route *r) +babel_refresh_route(struct babel_proto *p, struct babel_route *r) { - if (!OUR_ROUTE(r) && (r == r->e->selected_in)) - babel_send_route_request(r->e, r->neigh); + if (r == r->e->selected) + babel_send_route_request(p, r->e, r->neigh); r->refresh_time = 0; } static void -babel_expire_routes(struct babel_proto *p) +babel_expire_routes_(struct babel_proto *p, struct fib *rtable) { - struct babel_entry *e; + struct babel_config *cf = (void *) p->p.cf; struct babel_route *r, *rx; struct fib_iterator fit; + btime now_ = current_time(); - FIB_ITERATE_INIT(&fit, &p->rtable); + FIB_ITERATE_INIT(&fit, rtable); loop: - FIB_ITERATE_START(&p->rtable, &fit, n) + FIB_ITERATE_START(rtable, &fit, struct babel_entry, e) { - e = (struct babel_entry *) n; int changed = 0; WALK_LIST_DELSAFE(r, rx, e->routes) { - if (r->refresh_time && r->refresh_time <= now) - babel_refresh_route(r); + if (r->refresh_time && r->refresh_time <= now_) + babel_refresh_route(p, r); - if (r->expires && r->expires <= now) + if (r->expires && r->expires <= now_) { - babel_expire_route(r); - changed = 1; + changed = changed || (r == e->selected); + babel_expire_route(p, r); } } @@ -258,25 +254,148 @@ loop: /* * We have to restart the iteration because there may be a cascade of * synchronous events babel_select_route() -> nest table change -> - * babel_rt_notify() -> p->rtable change, invalidating hidden variables. + * babel_rt_notify() -> rtable change, invalidating hidden variables. */ + FIB_ITERATE_PUT(&fit); + babel_select_route(p, e, NULL); + goto loop; + } - FIB_ITERATE_PUT(&fit, n); - babel_select_route(e); + /* Clean up stale entries */ + if ((e->valid == BABEL_ENTRY_STALE) && ((e->updated + cf->hold_time) <= now_)) + e->valid = BABEL_ENTRY_DUMMY; + + /* Clean up unreachable route */ + if (e->unreachable && (!e->valid || (e->router_id == p->router_id))) + { + FIB_ITERATE_PUT(&fit); + babel_announce_retraction(p, e); goto loop; } - babel_expire_sources(e); + babel_expire_sources(p, e); + babel_expire_requests(p, e); /* Remove empty entries */ - if (EMPTY_LIST(e->sources) && EMPTY_LIST(e->routes)) + if (!e->valid && EMPTY_LIST(e->routes) && EMPTY_LIST(e->sources) && EMPTY_LIST(e->requests)) { - FIB_ITERATE_PUT(&fit, n); - fib_delete(&p->rtable, e); + FIB_ITERATE_PUT(&fit); + fib_delete(rtable, e); goto loop; } } - FIB_ITERATE_END(n); + FIB_ITERATE_END; +} + +static void +babel_expire_routes(struct babel_proto *p) +{ + babel_expire_routes_(p, &p->ip4_rtable); + babel_expire_routes_(p, &p->ip6_rtable); +} + +static inline int seqno_request_valid(struct babel_seqno_request *sr) +{ return !sr->nbr || sr->nbr->ifa; } + +/* + * Add seqno request to the table of pending requests (RFC 6216 3.2.6) and send + * it to network. Do nothing if it is already in the table. + */ + +static void +babel_add_seqno_request(struct babel_proto *p, struct babel_entry *e, + u64 router_id, u16 seqno, u8 hop_count, + struct babel_neighbor *nbr) +{ + struct babel_seqno_request *sr; + + WALK_LIST(sr, e->requests) + if (sr->router_id == router_id) + { + /* Found matching or newer */ + if (ge_mod64k(sr->seqno, seqno) && seqno_request_valid(sr)) + return; + + /* Found older */ + babel_unlock_neighbor(sr->nbr); + rem_node(NODE sr); + goto found; + } + + /* No entries found */ + sr = sl_alloc(p->seqno_slab); + +found: + sr->router_id = router_id; + sr->seqno = seqno; + sr->hop_count = hop_count; + sr->count = 0; + sr->expires = current_time() + BABEL_SEQNO_REQUEST_EXPIRY; + babel_lock_neighbor(sr->nbr = nbr); + add_tail(&e->requests, NODE sr); + + babel_send_seqno_request(p, e, sr); +} + +static void +babel_remove_seqno_request(struct babel_proto *p, struct babel_seqno_request *sr) +{ + babel_unlock_neighbor(sr->nbr); + rem_node(NODE sr); + sl_free(p->seqno_slab, sr); +} + +static int +babel_satisfy_seqno_request(struct babel_proto *p, struct babel_entry *e, + u64 router_id, u16 seqno) +{ + struct babel_seqno_request *sr; + + WALK_LIST(sr, e->requests) + if ((sr->router_id == router_id) && ge_mod64k(seqno, sr->seqno)) + { + /* Found the request, remove it */ + babel_remove_seqno_request(p, sr); + return 1; + } + + return 0; +} + +static void +babel_expire_requests(struct babel_proto *p, struct babel_entry *e) +{ + struct babel_seqno_request *sr, *srx; + btime now_ = current_time(); + + WALK_LIST_DELSAFE(sr, srx, e->requests) + { + /* Remove seqno requests sent to dead neighbors */ + if (!seqno_request_valid(sr)) + { + babel_remove_seqno_request(p, sr); + continue; + } + + /* Handle expired requests - resend or remove */ + if (sr->expires && sr->expires <= now_) + { + if (sr->count < BABEL_SEQNO_REQUEST_RETRY) + { + sr->count++; + sr->expires += (BABEL_SEQNO_REQUEST_EXPIRY << sr->count); + babel_send_seqno_request(p, e, sr); + } + else + { + TRACE(D_EVENTS, "Seqno request for %N router-id %lR expired", + e->n.addr, sr->router_id); + + babel_remove_seqno_request(p, sr); + continue; + } + } + } } static struct babel_neighbor * @@ -294,61 +413,79 @@ babel_find_neighbor(struct babel_iface *ifa, ip_addr addr) static struct babel_neighbor * babel_get_neighbor(struct babel_iface *ifa, ip_addr addr) { + struct babel_proto *p = ifa->proto; struct babel_neighbor *nbr = babel_find_neighbor(ifa, addr); if (nbr) return nbr; + TRACE(D_EVENTS, "New neighbor %I on %s", addr, ifa->iface->name); + nbr = mb_allocz(ifa->pool, sizeof(struct babel_neighbor)); nbr->ifa = ifa; nbr->addr = addr; + nbr->rxcost = BABEL_INFINITY; nbr->txcost = BABEL_INFINITY; + nbr->cost = BABEL_INFINITY; init_list(&nbr->routes); + babel_lock_neighbor(nbr); add_tail(&ifa->neigh_list, NODE nbr); return nbr; } static void -babel_flush_neighbor(struct babel_neighbor *nbr) +babel_flush_neighbor(struct babel_proto *p, struct babel_neighbor *nbr) { - struct babel_proto *p = nbr->ifa->proto; + struct babel_route *r; node *n; - TRACE(D_EVENTS, "Flushing neighbor %I", nbr->addr); + TRACE(D_EVENTS, "Removing neighbor %I on %s", nbr->addr, nbr->ifa->iface->name); WALK_LIST_FIRST(n, nbr->routes) { - struct babel_route *r = SKIP_BACK(struct babel_route, neigh_route, n); - struct babel_entry *e = r->e; - int selected = (r == e->selected_in); - - babel_flush_route(r); - - if (selected) - babel_select_route(e); + r = SKIP_BACK(struct babel_route, neigh_route, n); + babel_retract_route(p, r); + babel_flush_route(p, r); } + nbr->ifa = NULL; rem_node(NODE nbr); - mb_free(nbr); + babel_unlock_neighbor(nbr); } static void -babel_expire_ihu(struct babel_neighbor *nbr) +babel_expire_ihu(struct babel_proto *p, struct babel_neighbor *nbr) { + TRACE(D_EVENTS, "IHU from nbr %I on %s expired", nbr->addr, nbr->ifa->iface->name); + nbr->txcost = BABEL_INFINITY; + nbr->ihu_expiry = 0; + babel_update_cost(nbr); } static void -babel_expire_hello(struct babel_neighbor *nbr) +babel_expire_hello(struct babel_proto *p, struct babel_neighbor *nbr, btime now_) { +again: nbr->hello_map <<= 1; if (nbr->hello_cnt < 16) nbr->hello_cnt++; - if (!nbr->hello_map) - babel_flush_neighbor(nbr); + nbr->hello_expiry += nbr->last_hello_int; + + /* We may expire multiple hellos if last_hello_int is too short */ + if (nbr->hello_map && nbr->hello_expiry <= now_) + goto again; + + TRACE(D_EVENTS, "Hello from nbr %I on %s expired, %d left", + nbr->addr, nbr->ifa->iface->name, u32_popcount(nbr->hello_map)); + + if (nbr->hello_map) + babel_update_cost(nbr); + else + babel_flush_neighbor(p, nbr); } static void @@ -356,16 +493,17 @@ babel_expire_neighbors(struct babel_proto *p) { struct babel_iface *ifa; struct babel_neighbor *nbr, *nbx; + btime now_ = current_time(); WALK_LIST(ifa, p->interfaces) { WALK_LIST_DELSAFE(nbr, nbx, ifa->neigh_list) { - if (nbr->ihu_expiry && nbr->ihu_expiry <= now) - babel_expire_ihu(nbr); + if (nbr->ihu_expiry && nbr->ihu_expiry <= now_) + babel_expire_ihu(p, nbr); - if (nbr->hello_expiry && nbr->hello_expiry <= now) - babel_expire_hello(nbr); + if (nbr->hello_expiry && nbr->hello_expiry <= now_) + babel_expire_hello(p, nbr, now_); } } } @@ -399,66 +537,81 @@ babel_is_feasible(struct babel_source *s, u16 seqno, u16 metric) ((seqno == s->seqno) && (metric < s->metric)); } -static u16 -babel_compute_rxcost(struct babel_neighbor *n) +/* Simple additive metric - Appendix 3.1 in the RFC */ +static inline u16 +babel_compute_metric(struct babel_neighbor *n, uint metric) { - struct babel_iface *ifa = n->ifa; - u8 cnt, missed; - u16 map=n->hello_map; - - if (!map) return BABEL_INFINITY; - cnt = u32_popcount(map); // number of bits set - missed = n->hello_cnt-cnt; + return MIN(metric + n->cost, BABEL_INFINITY); +} - if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS) - { - /* ETX - Appendix 2.2 in the RFC. +static void +babel_update_cost(struct babel_neighbor *nbr) +{ + struct babel_proto *p = nbr->ifa->proto; + struct babel_iface_config *cf = nbr->ifa->cf; + uint rcv = u32_popcount(nbr->hello_map); // number of bits set + uint max = nbr->hello_cnt; + uint rxcost = BABEL_INFINITY; /* Cost to announce in IHU */ + uint txcost = BABEL_INFINITY; /* Effective cost for route selection */ - beta = prob. of successful transmission. - rxcost = BABEL_RXCOST_WIRELESS/beta + if (!rcv || !nbr->ifa->up) + goto done; - Since: beta = 1-missed/n->hello_cnt = cnt/n->hello_cnt - Then: rxcost = BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt - */ - if (!cnt) return BABEL_INFINITY; - return BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt; - } - else + switch (cf->type) { + case BABEL_IFACE_TYPE_WIRED: /* k-out-of-j selection - Appendix 2.1 in the RFC. */ - DBG("Babel: Missed %d hellos from %I\n", missed, n->addr); - /* Link is bad if more than half the expected hellos were lost */ - return (missed > n->hello_cnt/2) ? BABEL_INFINITY : ifa->cf->rxcost; - } -} + /* Link is bad if less than cf->limit/16 of expected hellos were received */ + if (rcv * 16 < cf->limit * max) + break; -static u16 -babel_compute_cost(struct babel_neighbor *n) -{ - struct babel_iface *ifa = n->ifa; - u16 rxcost = babel_compute_rxcost(n); - if (rxcost == BABEL_INFINITY) return rxcost; - else if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS) - { - /* ETX - Appendix 2.2 in the RFC */ - return (MAX(n->txcost, BABEL_RXCOST_WIRELESS) * rxcost)/BABEL_RXCOST_WIRELESS; + rxcost = cf->rxcost; + txcost = nbr->txcost; + break; + + case BABEL_IFACE_TYPE_WIRELESS: + /* + * ETX - Appendix 2.2 in the RFC. + * + * alpha = prob. of successful transmission estimated by the neighbor + * beta = prob. of successful transmission estimated by the router + * rxcost = nominal rxcost of the router / beta + * txcost = nominal rxcost of the neighbor / (alpha * beta) + * = received txcost / beta + * + * Note that received txcost is just neighbor's rxcost. Beta is rcv/max, + * we use inverse values of beta (i.e. max/rcv) to stay in integers. + */ + rxcost = MIN( cf->rxcost * max / rcv, BABEL_INFINITY); + txcost = MIN(nbr->txcost * max / rcv, BABEL_INFINITY); + break; } - else + +done: + /* If RX cost changed, send IHU with next Hello */ + if (rxcost != nbr->rxcost) { - /* k-out-of-j selection - Appendix 2.1 in the RFC. */ - return n->txcost; + nbr->rxcost = rxcost; + nbr->ihu_cnt = 0; } -} -/* Simple additive metric - Appendix 3.1 in the RFC */ -static u16 -babel_compute_metric(struct babel_neighbor *n, uint metric) -{ - metric += babel_compute_cost(n); - return MIN(metric, BABEL_INFINITY); -} + /* If link cost changed, run route selection */ + if (txcost != nbr->cost) + { + TRACE(D_EVENTS, "Cost of nbr %I on %s changed from %u to %u", + nbr->addr, nbr->ifa->iface->name, nbr->cost, txcost); + nbr->cost = txcost; + + struct babel_route *r; node *n; + WALK_LIST2(r, n, nbr->routes, neigh_route) + { + r->metric = babel_compute_metric(nbr, r->advert_metric); + babel_select_route(p, r->e, r); + } + } +} /** * babel_announce_rte - announce selected route to the core @@ -466,123 +619,151 @@ babel_compute_metric(struct babel_neighbor *n, uint metric) * @e: Babel route entry to announce * * This function announces a Babel entry to the core if it has a selected - * incoming path, and retracts it otherwise. If the selected entry has infinite - * metric, the route is announced as unreachable. + * incoming path, and retracts it otherwise. If there is no selected route but + * the entry is valid and ours, the unreachable route is announced instead. */ static void babel_announce_rte(struct babel_proto *p, struct babel_entry *e) { - struct babel_route *r = e->selected_in; + struct babel_route *r = e->selected; + struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel; if (r) { - net *n = net_get(p->p.table, e->n.prefix, e->n.pxlen); - rta A = { + rta a0 = { .src = p->p.main_source, .source = RTS_BABEL, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST, - .dest = r->metric == BABEL_INFINITY ? RTD_UNREACHABLE : RTD_ROUTER, - .flags = 0, + .dest = RTD_UNICAST, .from = r->neigh->addr, - .iface = r->neigh->ifa->iface, + .nh.gw = r->next_hop, + .nh.iface = r->neigh->ifa->iface, }; - if (r->metric < BABEL_INFINITY) - A.gw = r->next_hop; - - rta *a = rta_lookup(&A); + rta *a = rta_lookup(&a0); rte *rte = rte_get_temp(a); + rte->u.babel.seqno = r->seqno; rte->u.babel.metric = r->metric; rte->u.babel.router_id = r->router_id; - rte->net = n; rte->pflags = 0; - rte_update(&p->p, n, rte); + e->unreachable = 0; + rte_update2(c, e->n.addr, rte, p->p.main_source); + } + else if (e->valid && (e->router_id != p->router_id)) + { + /* Unreachable */ + rta a0 = { + .src = p->p.main_source, + .source = RTS_BABEL, + .scope = SCOPE_UNIVERSE, + .dest = RTD_UNREACHABLE, + }; + + rta *a = rta_lookup(&a0); + rte *rte = rte_get_temp(a); + memset(&rte->u.babel, 0, sizeof(rte->u.babel)); + rte->pflags = 0; + rte->pref = 1; + + e->unreachable = 1; + rte_update2(c, e->n.addr, rte, p->p.main_source); } else { /* Retraction */ - net *n = net_find(p->p.table, e->n.prefix, e->n.pxlen); - rte_update(&p->p, n, NULL); + e->unreachable = 0; + rte_update2(c, e->n.addr, NULL, p->p.main_source); } } +/* Special case of babel_announce_rte() just for retraction */ +static inline void +babel_announce_retraction(struct babel_proto *p, struct babel_entry *e) +{ + struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel; + e->unreachable = 0; + rte_update2(c, e->n.addr, NULL, p->p.main_source); +} + + /** * babel_select_route - select best route for given route entry + * @p: Babel protocol instance * @e: Babel entry to select the best route for + * @mod: Babel route that was modified or NULL if unspecified * - * Select the best feasible route for a given prefix among the routes received - * from peers, and propagate it to the nest. This just selects the feasible - * route with the lowest metric. + * Select the best reachable and feasible route for a given prefix among the + * routes received from peers, and propagate it to the nest. This just selects + * the reachable and feasible route with the lowest metric, but keeps selected + * the old one in case of tie. * * If no feasible route is available for a prefix that previously had a route - * selected, a seqno request is sent to try to get a valid route. In the - * meantime, the route is marked as infeasible in the nest (to blackhole packets - * going to it, as per the RFC). + * selected, a seqno request is sent to try to get a valid route. If the entry + * is valid and not owned by us, the unreachable route is announced to the nest + * (to blackhole packets going to it, as per section 2.8). It is later removed + * by babel_expire_routes(). Otherwise, the route is just removed from the nest. + * + * Argument @mod is used to optimize best route calculation. When specified, the + * function can assume that only the @mod route was modified to avoid full best + * route selection and announcement when non-best route was modified in minor + * way. The caller is advised to not call babel_select_route() when no change is + * done (e.g. periodic route updates) to avoid unnecessary announcements of the + * same best route. The caller is not required to call the function in case of a + * retraction of a non-best route. * - * If no feasible route is available, and no previous route is selected, the - * route is removed from the nest entirely. + * Note that the function does not active triggered updates. That is done by + * babel_rt_notify() when the change is propagated back to Babel. */ static void -babel_select_route(struct babel_entry *e) +babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod) { - struct babel_proto *p = e->proto; - struct babel_route *r, *cur = e->selected_in; + struct babel_route *r, *best = e->selected; - /* try to find the best feasible route */ - WALK_LIST(r, e->routes) - if (!OUR_ROUTE(r) && /* prevent propagating our own routes back to core */ - (!cur || r->metric < cur->metric) && - babel_is_feasible(babel_find_source(e, r->router_id), r->seqno, r->advert_metric)) - cur = r; - - if (cur && !OUR_ROUTE(cur) && - ((!e->selected_in && cur->metric < BABEL_INFINITY) || - (e->selected_in && cur->metric < e->selected_in->metric))) + /* Shortcut if only non-best was modified */ + if (mod && (mod != best)) { - TRACE(D_EVENTS, "Picked new route for prefix %I/%d: router id %lR metric %d", - e->n.prefix, e->n.pxlen, cur->router_id, cur->metric); - - e->selected_in = cur; - e->updated = now; - babel_announce_rte(p, e); + /* Either select modified route, or keep old best route */ + if ((mod->metric < (best ? best->metric : BABEL_INFINITY)) && mod->feasible) + best = mod; + else + return; } - else if (!cur || cur->metric == BABEL_INFINITY) + else { - /* Couldn't find a feasible route. If we have a selected route, that means - it just became infeasible; so set it's metric to infinite and install it - (as unreachable), then send a seqno request. - - babel_build_rte() will set the unreachable flag if the metric is BABEL_INFINITY.*/ - if (e->selected_in) - { - TRACE(D_EVENTS, "Lost feasible route for prefix %I/%d", - e->n.prefix, e->n.pxlen); - - e->selected_in->metric = BABEL_INFINITY; - e->updated = now; + /* Selected route may be modified and no longer admissible */ + if (!best || (best->metric == BABEL_INFINITY) || !best->feasible) + best = NULL; + + /* Find the best feasible route from all routes */ + WALK_LIST(r, e->routes) + if ((r->metric < (best ? best->metric : BABEL_INFINITY)) && r->feasible) + best = r; + } - babel_send_seqno_request(e); - babel_announce_rte(p, e); + if (best) + { + if (best != e->selected) + TRACE(D_EVENTS, "Picked new route for prefix %N: router-id %lR metric %d", + e->n.addr, best->router_id, best->metric); + } + else if (e->selected) + { + /* + * We have lost all feasible routes. We have to broadcast seqno request + * (Section 3.8.2.1) and keep unreachable route for a while (section 2.8). + * The later is done automatically by babel_announce_rte(). + */ - /* Section 3.6 of the RFC forbids an infeasible from being selected. This - is cleared after announcing the route to the core to make sure an - unreachable route is propagated first. */ - e->selected_in = NULL; - } - else - { - /* No route currently selected, and no new one selected; this means we - don't have a route to this destination anymore (and were probably - called from an expiry timer). Remove the route from the nest. */ - TRACE(D_EVENTS, "Flushing route for prefix %I/%d", e->n.prefix, e->n.pxlen); - - e->selected_in = NULL; - e->updated = now; - babel_announce_rte(p, e); - } + TRACE(D_EVENTS, "Lost feasible route for prefix %N", e->n.addr); + if (e->valid && (e->selected->router_id == e->router_id)) + babel_add_seqno_request(p, e, e->selected->router_id, e->selected->seqno + 1, 0, NULL); } + else + return; + + e->selected = best; + babel_announce_rte(p, e); } /* @@ -610,11 +791,11 @@ babel_build_ihu(union babel_msg *msg, struct babel_iface *ifa, struct babel_neig msg->type = BABEL_TLV_IHU; msg->ihu.addr = n->addr; - msg->ihu.rxcost = babel_compute_rxcost(n); + msg->ihu.rxcost = n->rxcost; msg->ihu.interval = ifa->cf->ihu_interval; - TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %d", - msg->ihu.addr, msg->ihu.rxcost, msg->ihu.interval); + TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %t", + msg->ihu.addr, msg->ihu.rxcost, (btime) msg->ihu.interval); } static void @@ -623,6 +804,7 @@ babel_send_ihu(struct babel_iface *ifa, struct babel_neighbor *n) union babel_msg msg = {}; babel_build_ihu(&msg, ifa, n); babel_send_unicast(&msg, ifa, n->addr); + n->ihu_cnt = BABEL_IHU_INTERVAL_FACTOR; } static void @@ -631,14 +813,18 @@ babel_send_ihus(struct babel_iface *ifa) struct babel_neighbor *n; WALK_LIST(n, ifa->neigh_list) { - union babel_msg msg = {}; - babel_build_ihu(&msg, ifa, n); - babel_enqueue(&msg, ifa); + if (n->hello_cnt && (--n->ihu_cnt <= 0)) + { + union babel_msg msg = {}; + babel_build_ihu(&msg, ifa, n); + babel_enqueue(&msg, ifa); + n->ihu_cnt = BABEL_IHU_INTERVAL_FACTOR; + } } } static void -babel_send_hello(struct babel_iface *ifa, u8 send_ihu) +babel_send_hello(struct babel_iface *ifa) { struct babel_proto *p = ifa->proto; union babel_msg msg = {}; @@ -647,30 +833,26 @@ babel_send_hello(struct babel_iface *ifa, u8 send_ihu) msg.hello.seqno = ifa->hello_seqno++; msg.hello.interval = ifa->cf->hello_interval; - TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %d", - ifa->ifname, msg.hello.seqno, msg.hello.interval); + TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t", + ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval); babel_enqueue(&msg, ifa); - if (send_ihu) - babel_send_ihus(ifa); + babel_send_ihus(ifa); } static void -babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n) +babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n) { - struct babel_proto *p = e->proto; - struct babel_iface *ifa = n->ifa; union babel_msg msg = {}; - TRACE(D_PACKETS, "Sending route request for %I/%d to %I", - e->n.prefix, e->n.pxlen, n->addr); + TRACE(D_PACKETS, "Sending route request for %N to %I", + e->n.addr, n->addr); msg.type = BABEL_TLV_ROUTE_REQUEST; - msg.route_request.prefix = e->n.prefix; - msg.route_request.plen = e->n.pxlen; + net_copy(&msg.route_request.net, e->n.addr); - babel_send_unicast(&msg, ifa, n->addr); + babel_send_unicast(&msg, n->ifa, n->addr); } static void @@ -689,56 +871,32 @@ babel_send_wildcard_request(struct babel_iface *ifa) } static void -babel_send_seqno_request(struct babel_entry *e) +babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr) { - struct babel_proto *p = e->proto; - struct babel_route *r = e->selected_in; - struct babel_iface *ifa = NULL; - struct babel_source *s = NULL; union babel_msg msg = {}; - s = babel_find_source(e, r->router_id); - if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1)) - return; - - TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d", - e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1); - msg.type = BABEL_TLV_SEQNO_REQUEST; - msg.seqno_request.plen = e->n.pxlen; - msg.seqno_request.seqno = s->seqno + 1; - msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT; - msg.seqno_request.router_id = r->router_id; - msg.seqno_request.prefix = e->n.prefix; - - WALK_LIST(ifa, p->interfaces) - babel_enqueue(&msg, ifa); -} - -static void -babel_unicast_seqno_request(struct babel_route *r) -{ - struct babel_entry *e = r->e; - struct babel_proto *p = e->proto; - struct babel_iface *ifa = r->neigh->ifa; - struct babel_source *s = NULL; - union babel_msg msg = {}; + msg.seqno_request.hop_count = sr->hop_count ?: BABEL_INITIAL_HOP_COUNT; + msg.seqno_request.seqno = sr->seqno; + msg.seqno_request.router_id = sr->router_id; + net_copy(&msg.seqno_request.net, e->n.addr); - s = babel_find_source(e, r->router_id); - if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1)) - return; - - TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d", - e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1); + if (sr->nbr) + { + TRACE(D_PACKETS, "Sending seqno request for %N router-id %lR seqno %d to %I on %s", + e->n.addr, sr->router_id, sr->seqno, sr->nbr->addr, sr->nbr->ifa->ifname); - msg.type = BABEL_TLV_SEQNO_REQUEST; - msg.seqno_request.plen = e->n.pxlen; - msg.seqno_request.seqno = s->seqno + 1; - msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT; - msg.seqno_request.router_id = r->router_id; - msg.seqno_request.prefix = e->n.prefix; + babel_send_unicast(&msg, sr->nbr->ifa, sr->nbr->addr); + } + else + { + TRACE(D_PACKETS, "Sending broadcast seqno request for %N router-id %lR seqno %d", + e->n.addr, sr->router_id, sr->seqno); - babel_send_unicast(&msg, ifa, r->neigh->addr); + struct babel_iface *ifa; + WALK_LIST(ifa, p->interfaces) + babel_enqueue(&msg, ifa); + } } /** @@ -752,49 +910,55 @@ babel_unicast_seqno_request(struct babel_route *r) * transmitted entry is updated. */ static void -babel_send_update(struct babel_iface *ifa, bird_clock_t changed) +babel_send_update_(struct babel_iface *ifa, btime changed, struct fib *rtable) { struct babel_proto *p = ifa->proto; - FIB_WALK(&p->rtable, n) + /* Update increase was requested */ + if (p->update_seqno_inc) { - struct babel_entry *e = (void *) n; - struct babel_route *r = e->selected_out; + p->update_seqno++; + p->update_seqno_inc = 0; + } - if (!r) + FIB_WALK(rtable, struct babel_entry, e) + { + if (!e->valid) continue; /* Our own seqno might have changed, in which case we update the routes we originate. */ - if ((r->router_id == p->router_id) && (r->seqno < p->update_seqno)) + if ((e->router_id == p->router_id) && (e->seqno < p->update_seqno)) { - r->seqno = p->update_seqno; - e->updated = now; + e->seqno = p->update_seqno; + e->updated = current_time(); } /* Skip routes that weren't updated since 'changed' time */ if (e->updated < changed) continue; - TRACE(D_PACKETS, "Sending update for %I/%d router-id %lR seqno %d metric %d", - e->n.prefix, e->n.pxlen, r->router_id, r->seqno, r->metric); + TRACE(D_PACKETS, "Sending update for %N router-id %lR seqno %d metric %d", + e->n.addr, e->router_id, e->seqno, e->metric); union babel_msg msg = {}; msg.type = BABEL_TLV_UPDATE; - msg.update.plen = e->n.pxlen; msg.update.interval = ifa->cf->update_interval; - msg.update.seqno = r->seqno; - msg.update.metric = r->metric; - msg.update.prefix = e->n.prefix; - msg.update.router_id = r->router_id; + msg.update.seqno = e->seqno; + msg.update.metric = e->metric; + msg.update.router_id = e->router_id; + net_copy(&msg.update.net, e->n.addr); + + msg.update.next_hop = ((e->n.addr->type == NET_IP4) ? + ifa->next_hop_ip4 : ifa->next_hop_ip6); babel_enqueue(&msg, ifa); /* Update feasibility distance for redistributed routes */ - if (!OUR_ROUTE(r)) + if (e->router_id != p->router_id) { - struct babel_source *s = babel_get_source(e, r->router_id); - s->expires = now + BABEL_GARBAGE_INTERVAL; + struct babel_source *s = babel_get_source(p, e, e->router_id); + s->expires = current_time() + BABEL_GARBAGE_INTERVAL; if ((msg.update.seqno > s->seqno) || ((msg.update.seqno == s->seqno) && (msg.update.metric < s->metric))) @@ -808,6 +972,15 @@ babel_send_update(struct babel_iface *ifa, bird_clock_t changed) } static void +babel_send_update(struct babel_iface *ifa, btime changed) +{ + struct babel_proto *p = ifa->proto; + + babel_send_update_(ifa, changed, &p->ip4_rtable); + babel_send_update_(ifa, changed, &p->ip6_rtable); +} + +static void babel_trigger_iface_update(struct babel_iface *ifa) { struct babel_proto *p = ifa->proto; @@ -819,7 +992,7 @@ babel_trigger_iface_update(struct babel_iface *ifa) TRACE(D_EVENTS, "Scheduling triggered updates for %s seqno %d", ifa->iface->name, p->update_seqno); - ifa->want_triggered = now; + ifa->want_triggered = current_time(); babel_iface_kick_timer(ifa); } @@ -839,20 +1012,18 @@ babel_trigger_update(struct babel_proto *p) /* A retraction is an update with an infinite metric */ static void -babel_send_retraction(struct babel_iface *ifa, ip_addr prefix, int plen) +babel_send_retraction(struct babel_iface *ifa, net_addr *n) { struct babel_proto *p = ifa->proto; union babel_msg msg = {}; - TRACE(D_PACKETS, "Sending retraction for %I/%d seqno %d", - prefix, plen, p->update_seqno); + TRACE(D_PACKETS, "Sending retraction for %N seqno %d", n, p->update_seqno); msg.type = BABEL_TLV_UPDATE; - msg.update.plen = plen; msg.update.interval = ifa->cf->update_interval; msg.update.seqno = p->update_seqno; msg.update.metric = BABEL_INFINITY; - msg.update.prefix = prefix; + msg.update.net = *n; babel_enqueue(&msg, ifa); } @@ -881,7 +1052,7 @@ babel_send_wildcard_retraction(struct babel_iface *ifa) /* Update hello history according to Appendix A1 of the RFC */ static void -babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval) +babel_update_hello_history(struct babel_neighbor *n, u16 seqno, uint interval) { /* * Compute the difference between expected and received seqno (modulo 2^16). @@ -892,7 +1063,7 @@ babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval) u16 delta = ((uint) seqno - (uint) n->next_hello_seqno); - if (delta == 0) + if ((delta == 0) || (n->hello_cnt == 0)) { /* Do nothing */ } @@ -919,84 +1090,10 @@ babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval) n->hello_map = (n->hello_map << 1) | 1; n->next_hello_seqno = seqno+1; if (n->hello_cnt < 16) n->hello_cnt++; - n->hello_expiry = now + BABEL_HELLO_EXPIRY_FACTOR(interval); -} -static void -babel_expire_seqno_requests(struct babel_proto *p) -{ - struct babel_seqno_request *n, *nx; - WALK_LIST_DELSAFE(n, nx, p->seqno_cache) - { - if ((n->updated + BABEL_SEQNO_REQUEST_EXPIRY) <= now) - { - rem_node(NODE n); - sl_free(p->seqno_slab, n); - } - } -} - -/* - * Checks the seqno request cache for a matching request and returns failure if - * found. Otherwise, a new entry is stored in the cache. - */ -static int -babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen, - u64 router_id, u16 seqno) -{ - struct babel_seqno_request *r; - - WALK_LIST(r, p->seqno_cache) - { - if (ipa_equal(r->prefix, prefix) && (r->plen == plen) && - (r->router_id == router_id) && (r->seqno == seqno)) - return 0; - } - - /* no entries found */ - r = sl_alloc(p->seqno_slab); - r->prefix = prefix; - r->plen = plen; - r->router_id = router_id; - r->seqno = seqno; - r->updated = now; - add_tail(&p->seqno_cache, NODE r); - - return 1; -} - -static void -babel_forward_seqno_request(struct babel_entry *e, - struct babel_msg_seqno_request *in, - ip_addr sender) -{ - struct babel_proto *p = e->proto; - struct babel_route *r; - - TRACE(D_PACKETS, "Forwarding seqno request for %I/%d router-id %lR seqno %d", - e->n.prefix, e->n.pxlen, in->router_id, in->seqno); - - WALK_LIST(r, e->routes) - { - if ((r->router_id == in->router_id) && - !OUR_ROUTE(r) && - !ipa_equal(r->neigh->addr, sender)) - { - if (!babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, in->router_id, in->seqno)) - return; - - union babel_msg msg = {}; - msg.type = BABEL_TLV_SEQNO_REQUEST; - msg.seqno_request.plen = in->plen; - msg.seqno_request.seqno = in->seqno; - msg.seqno_request.hop_count = in->hop_count-1; - msg.seqno_request.router_id = in->router_id; - msg.seqno_request.prefix = e->n.prefix; - - babel_send_unicast(&msg, r->neigh->ifa, r->neigh->addr); - return; - } - } + /* Update expiration */ + n->hello_expiry = current_time() + BABEL_HELLO_EXPIRY_FACTOR(interval); + n->last_hello_int = interval; } @@ -1010,8 +1107,8 @@ babel_handle_ack_req(union babel_msg *m, struct babel_iface *ifa) struct babel_proto *p = ifa->proto; struct babel_msg_ack_req *msg = &m->ack_req; - TRACE(D_PACKETS, "Handling ACK request nonce %d interval %d", - msg->nonce, msg->interval); + TRACE(D_PACKETS, "Handling ACK request nonce %d interval %t", + msg->nonce, (btime) msg->interval); babel_send_ack(ifa, msg->sender, msg->nonce); } @@ -1022,12 +1119,17 @@ babel_handle_hello(union babel_msg *m, struct babel_iface *ifa) struct babel_proto *p = ifa->proto; struct babel_msg_hello *msg = &m->hello; - TRACE(D_PACKETS, "Handling hello seqno %d interval %d", - msg->seqno, msg->interval); + TRACE(D_PACKETS, "Handling hello seqno %d interval %t", + msg->seqno, (btime) msg->interval); struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender); + int first_hello = !n->hello_cnt; + babel_update_hello_history(n, msg->seqno, msg->interval); - if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS) + babel_update_cost(n); + + /* Speed up session establishment by sending IHU immediately */ + if (first_hello) babel_send_ihu(ifa, n); } @@ -1041,12 +1143,13 @@ babel_handle_ihu(union babel_msg *m, struct babel_iface *ifa) if ((msg->ae != BABEL_AE_WILDCARD) && !ipa_equal(msg->addr, ifa->addr)) return; - TRACE(D_PACKETS, "Handling IHU rxcost %d interval %d", - msg->rxcost, msg->interval); + TRACE(D_PACKETS, "Handling IHU rxcost %d interval %t", + msg->rxcost, (btime) msg->interval); struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender); n->txcost = msg->rxcost; - n->ihu_expiry = now + BABEL_IHU_EXPIRY_FACTOR(msg->interval); + n->ihu_expiry = current_time() + BABEL_IHU_EXPIRY_FACTOR(msg->interval); + babel_update_cost(n); } /** @@ -1069,12 +1172,15 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa) struct babel_neighbor *nbr; struct babel_entry *e; struct babel_source *s; - struct babel_route *r; + struct babel_route *r, *best; node *n; - int feasible; + int feasible, metric; - TRACE(D_PACKETS, "Handling update for %I/%d with seqno %d metric %d", - msg->prefix, msg->plen, msg->seqno, msg->metric); + if (msg->wildcard) + TRACE(D_PACKETS, "Handling wildcard retraction", msg->seqno); + else + TRACE(D_PACKETS, "Handling update for %N with seqno %d metric %d", + &msg->net, msg->seqno, msg->metric); nbr = babel_find_neighbor(ifa, msg->sender); if (!nbr) @@ -1089,38 +1195,12 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa) return; } - /* - * RFC section 3.5.4: - * - * When a Babel node receives an update (id, prefix, seqno, metric) from a - * neighbour neigh with a link cost value equal to cost, it checks whether it - * already has a routing table entry indexed by (neigh, id, prefix). - * - * If no such entry exists: - * - * o if the update is unfeasible, it is ignored; - * - * o if the metric is infinite (the update is a retraction), the update is - * ignored; - * - * o otherwise, a new route table entry is created, indexed by (neigh, id, - * prefix), with seqno equal to seqno and an advertised metric equal to the - * metric carried by the update. - * - * If such an entry exists: - * - * o if the entry is currently installed and the update is unfeasible, then - * the behaviour depends on whether the router-ids of the two entries match. - * If the router-ids are different, the update is treated as though it were - * a retraction (i.e., as though the metric were FFFF hexadecimal). If the - * router-ids are equal, the update is ignored; - * - * o otherwise (i.e., if either the update is feasible or the entry is not - * currently installed), then the entry's sequence number, advertised - * metric, metric, and router-id are updated and, unless the advertised - * metric is infinite, the route's expiry timer is reset to a small multiple - * of the Interval value included in the update. - */ + struct channel *c = (msg->net.type == NET_IP4) ? p->ip4_channel : p->ip6_channel; + if (!c || (c->channel_state != CS_UP)) + { + DBG("Babel: Ignoring update for inactive address family.\n"); + return; + } /* Retraction */ if (msg->metric == BABEL_INFINITY) @@ -1134,13 +1214,12 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa) WALK_LIST(n, nbr->routes) { r = SKIP_BACK(struct babel_route, neigh_route, n); - r->metric = BABEL_INFINITY; - babel_select_route(r->e); + babel_retract_route(p, r); } } else { - e = babel_find_entry(p, msg->prefix, msg->plen); + e = babel_find_entry(p, &msg->net); if (!e) return; @@ -1151,68 +1230,56 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa) if (!r) return; - r->metric = BABEL_INFINITY; - babel_select_route(e); + /* Router-id, next-hop and seqno are ignored for retractions */ + babel_retract_route(p, r); } /* Done with retractions */ return; } - e = babel_get_entry(p, msg->prefix, msg->plen); - r = babel_find_route(e, nbr); /* the route entry indexed by neighbour */ + /* Regular update */ + e = babel_get_entry(p, &msg->net); + r = babel_get_route(p, e, nbr); /* the route entry indexed by neighbour */ s = babel_find_source(e, msg->router_id); /* for feasibility */ feasible = babel_is_feasible(s, msg->seqno, msg->metric); + metric = babel_compute_metric(nbr, msg->metric); + best = e->selected; - if (!r) - { - if (!feasible) - return; + /* RFC section 3.8.2.2 - Dealing with unfeasible updates */ + if (!feasible && (metric != BABEL_INFINITY) && + (!best || (r == best) || (metric < best->metric))) + babel_add_seqno_request(p, e, s->router_id, s->seqno + 1, 0, nbr); - r = babel_get_route(e, nbr); - r->advert_metric = msg->metric; - r->router_id = msg->router_id; - r->metric = babel_compute_metric(nbr, msg->metric); - r->next_hop = msg->next_hop; - r->seqno = msg->seqno; - } - else if (r == r->e->selected_in && !feasible) - { - /* - * Route is installed and update is infeasible - we may lose the route, - * so send a unicast seqno request (section 3.8.2.2 second paragraph). - */ - babel_unicast_seqno_request(r); + /* Special case - ignore unfeasible update to best route */ + if (r == best && !feasible && (msg->router_id == r->router_id)) + return; - if (msg->router_id == r->router_id) - return; + r->expires = current_time() + BABEL_ROUTE_EXPIRY_FACTOR(msg->interval); + r->refresh_time = current_time() + BABEL_ROUTE_REFRESH_FACTOR(msg->interval); - /* Treat as retraction */ - r->metric = BABEL_INFINITY; - } - else + /* No further processing if there is no change */ + if ((r->feasible == feasible) && (r->seqno == msg->seqno) && + (r->metric == metric) && (r->advert_metric == msg->metric) && + (r->router_id == msg->router_id) && ipa_equal(r->next_hop, msg->next_hop)) + return; + + /* Last paragraph above - update the entry */ + r->feasible = feasible; + r->seqno = msg->seqno; + r->metric = metric; + r->advert_metric = msg->metric; + r->router_id = msg->router_id; + r->next_hop = msg->next_hop; + + /* If received update satisfies seqno request, we send triggered updates */ + if (babel_satisfy_seqno_request(p, e, msg->router_id, msg->seqno)) { - /* Last paragraph above - update the entry */ - r->advert_metric = msg->metric; - r->metric = babel_compute_metric(nbr, msg->metric); - r->next_hop = msg->next_hop; - - r->router_id = msg->router_id; - r->seqno = msg->seqno; - - r->expiry_interval = BABEL_ROUTE_EXPIRY_FACTOR(msg->interval); - r->expires = now + r->expiry_interval; - if (r->expiry_interval > BABEL_ROUTE_REFRESH_INTERVAL) - r->refresh_time = now + r->expiry_interval - BABEL_ROUTE_REFRESH_INTERVAL; - - /* If the route is not feasible at this point, it means it is from another - neighbour than the one currently selected; so send a unicast seqno - request to try to get a better route (section 3.8.2.2 last paragraph). */ - if (!feasible) - babel_unicast_seqno_request(r); + babel_trigger_update(p); + e->updated = current_time(); } - babel_select_route(e); + babel_select_route(p, e, r); } void @@ -1231,23 +1298,22 @@ babel_handle_route_request(union babel_msg *m, struct babel_iface *ifa) return; } - TRACE(D_PACKETS, "Handling route request for %I/%d", msg->prefix, msg->plen); + TRACE(D_PACKETS, "Handling route request for %N", &msg->net); /* Non-wildcard request - see if we have an entry for the route. If not, send a retraction, otherwise send an update. */ - struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen); + struct babel_entry *e = babel_find_entry(p, &msg->net); if (!e) { - babel_send_retraction(ifa, msg->prefix, msg->plen); + babel_send_retraction(ifa, &msg->net); } else { babel_trigger_iface_update(ifa); - e->updated = now; + e->updated = current_time(); } } - void babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa) { @@ -1256,36 +1322,54 @@ babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa) /* RFC 6126 3.8.1.2 */ - TRACE(D_PACKETS, "Handling seqno request for %I/%d router-id %lR seqno %d hop count %d", - msg->prefix, msg->plen, msg->router_id, msg->seqno, msg->hop_count); + TRACE(D_PACKETS, "Handling seqno request for %N router-id %lR seqno %d hop count %d", + &msg->net, msg->router_id, msg->seqno, msg->hop_count); /* Ignore if we have no such entry or entry has infinite metric */ - struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen); - if (!e || !e->selected_out || (e->selected_out->metric == BABEL_INFINITY)) + struct babel_entry *e = babel_find_entry(p, &msg->net); + if (!e || !e->valid || (e->metric == BABEL_INFINITY)) return; /* Trigger update on incoming interface if we have a selected route with different router id or seqno no smaller than requested */ - struct babel_route *r = e->selected_out; - if ((r->router_id != msg->router_id) || ge_mod64k(r->seqno, msg->seqno)) + if ((e->router_id != msg->router_id) || ge_mod64k(e->seqno, msg->seqno)) { babel_trigger_iface_update(ifa); - e->updated = now; + e->updated = current_time(); return; } /* Seqno is larger; check if we own the router id */ if (msg->router_id == p->router_id) { - /* Ours; update seqno and trigger global update */ - p->update_seqno++; + /* Ours; seqno increase and trigger global update */ + p->update_seqno_inc = 1; babel_trigger_update(p); } - else + else if (msg->hop_count > 1) { /* Not ours; forward if TTL allows it */ - if (msg->hop_count > 1) - babel_forward_seqno_request(e, msg, msg->sender); + + /* Find best admissible route */ + struct babel_route *r, *best1 = NULL, *best2 = NULL; + WALK_LIST(r, e->routes) + if ((r->router_id == msg->router_id) && !ipa_equal(r->neigh->addr, msg->sender)) + { + /* Find best feasible route */ + if ((!best1 || r->metric < best1->metric) && r->feasible) + best1 = r; + + /* Find best not necessary feasible route */ + if (!best2 || r->metric < best2->metric) + best2 = r; + } + + /* If no route is found, do nothing */ + r = best1 ?: best2; + if (!r) + return; + + babel_add_seqno_request(p, e, msg->router_id, msg->seqno, msg->hop_count-1, r->neigh); } } @@ -1320,42 +1404,43 @@ babel_iface_timer(timer *t) { struct babel_iface *ifa = t->data; struct babel_proto *p = ifa->proto; - bird_clock_t hello_period = ifa->cf->hello_interval; - bird_clock_t update_period = ifa->cf->update_interval; + btime hello_period = ifa->cf->hello_interval; + btime update_period = ifa->cf->update_interval; + btime now_ = current_time(); - if (now >= ifa->next_hello) + if (now_ >= ifa->next_hello) { - babel_send_hello(ifa, (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS || - ifa->hello_seqno % BABEL_IHU_INTERVAL_FACTOR == 0)); - ifa->next_hello += hello_period * (1 + (now - ifa->next_hello) / hello_period); + babel_send_hello(ifa); + ifa->next_hello += hello_period * (1 + (now_ - ifa->next_hello) / hello_period); } - if (now >= ifa->next_regular) + if (now_ >= ifa->next_regular) { TRACE(D_EVENTS, "Sending regular updates on %s", ifa->ifname); babel_send_update(ifa, 0); - ifa->next_regular += update_period * (1 + (now - ifa->next_regular) / update_period); + ifa->next_regular += update_period * (1 + (now_ - ifa->next_regular) / update_period); ifa->want_triggered = 0; p->triggered = 0; } - else if (ifa->want_triggered && (now >= ifa->next_triggered)) + else if (ifa->want_triggered && (now_ >= ifa->next_triggered)) { TRACE(D_EVENTS, "Sending triggered updates on %s", ifa->ifname); babel_send_update(ifa, ifa->want_triggered); - ifa->next_triggered = now + MIN(5, update_period / 2 + 1); + ifa->next_triggered = now_ + MIN(1 S, update_period / 2); ifa->want_triggered = 0; p->triggered = 0; } - bird_clock_t next_event = MIN(ifa->next_hello, ifa->next_regular); - tm_start(ifa->timer, ifa->want_triggered ? 1 : (next_event - now)); + btime next_event = MIN(ifa->next_hello, ifa->next_regular); + if (ifa->want_triggered) next_event = MIN(next_event, ifa->next_triggered); + tm_set(ifa->timer, next_event); } static inline void babel_iface_kick_timer(struct babel_iface *ifa) { - if (ifa->timer->expires > (now + 1)) - tm_start(ifa->timer, 1); + if (ifa->timer->expires > (current_time() + 100 MS)) + tm_start(ifa->timer, 100 MS); } static void @@ -1365,14 +1450,14 @@ babel_iface_start(struct babel_iface *ifa) TRACE(D_EVENTS, "Starting interface %s", ifa->ifname); - ifa->next_hello = now + (random() % ifa->cf->hello_interval) + 1; - ifa->next_regular = now + (random() % ifa->cf->update_interval) + 1; - ifa->next_triggered = now + MIN(5, ifa->cf->update_interval / 2 + 1); + ifa->next_hello = current_time() + (random() % ifa->cf->hello_interval); + ifa->next_regular = current_time() + (random() % ifa->cf->update_interval); + ifa->next_triggered = current_time() + MIN(1 S, ifa->cf->update_interval / 2); ifa->want_triggered = 0; /* We send an immediate update (below) */ - tm_start(ifa->timer, 1); + tm_start(ifa->timer, 100 MS); ifa->up = 1; - babel_send_hello(ifa, 0); + babel_send_hello(ifa); babel_send_wildcard_retraction(ifa); babel_send_wildcard_request(ifa); babel_send_update(ifa, 0); /* Full update */ @@ -1398,9 +1483,7 @@ babel_iface_stop(struct babel_iface *ifa) WALK_LIST(n, nbr->routes) { r = SKIP_BACK(struct babel_route, neigh_route, n); - r->metric = BABEL_INFINITY; - r->expires = now + r->expiry_interval; - babel_select_route(r->e); + babel_retract_route(p, r); } } @@ -1488,21 +1571,21 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con ifa->cf = ic; ifa->pool = pool; ifa->ifname = new->name; + ifa->addr = new->llv6->ip; add_tail(&p->interfaces, NODE ifa); - struct ifa *addr; - WALK_LIST(addr, new->addrs) - if (ipa_is_link_local(addr->ip)) - ifa->addr = addr->ip; + ip_addr addr4 = new->addr4 ? new->addr4->ip : IPA_NONE; + ifa->next_hop_ip4 = ipa_nonzero(ic->next_hop_ip4) ? ic->next_hop_ip4 : addr4; + ifa->next_hop_ip6 = ipa_nonzero(ic->next_hop_ip6) ? ic->next_hop_ip6 : ifa->addr; - if (ipa_zero(ifa->addr)) - log(L_WARN "%s: Cannot find link-local addr on %s", p->p.name, new->name); + if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel) + log(L_WARN "%s: Cannot find IPv4 next hop addr on %s", p->p.name, new->name); init_list(&ifa->neigh_list); ifa->hello_seqno = 1; - ifa->timer = tm_new_set(ifa->pool, babel_iface_timer, ifa, 0, 0); + ifa->timer = tm_new_init(ifa->pool, babel_iface_timer, ifa, 0, 0); init_list(&ifa->msg_queue); ifa->send_event = ev_new(ifa->pool); @@ -1527,7 +1610,7 @@ babel_remove_iface(struct babel_proto *p, struct babel_iface *ifa) struct babel_neighbor *n; WALK_LIST_FIRST(n, ifa->neigh_list) - babel_flush_neighbor(n); + babel_flush_neighbor(p, n); rem_node(NODE ifa); @@ -1545,12 +1628,16 @@ babel_if_notify(struct proto *P, unsigned flags, struct iface *iface) if (flags & IF_CHANGE_UP) { - struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, iface->addr); + struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL); /* we only speak multicast */ if (!(iface->flags & IF_MULTICAST)) return; + /* Ignore ifaces without link-local address */ + if (!iface->llv6) + return; + if (ic) babel_add_iface(p, iface, ic); @@ -1590,11 +1677,18 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b ifa->cf = new; - if (ifa->next_hello > (now + new->hello_interval)) - ifa->next_hello = now + (random() % new->hello_interval) + 1; + ip_addr addr4 = ifa->iface->addr4 ? ifa->iface->addr4->ip : IPA_NONE; + ifa->next_hop_ip4 = ipa_nonzero(new->next_hop_ip4) ? new->next_hop_ip4 : addr4; + ifa->next_hop_ip6 = ipa_nonzero(new->next_hop_ip6) ? new->next_hop_ip6 : ifa->addr; - if (ifa->next_regular > (now + new->update_interval)) - ifa->next_regular = now + (random() % new->update_interval) + 1; + if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel) + log(L_WARN "%s: Cannot find IPv4 next hop addr on %s", p->p.name, ifa->ifname); + + if (ifa->next_hello > (current_time() + new->hello_interval)) + ifa->next_hello = current_time() + (random() % new->hello_interval); + + if (ifa->next_regular > (current_time() + new->update_interval)) + ifa->next_regular = current_time() + (random() % new->update_interval); if ((new->tx_length != old->tx_length) || (new->rx_buffer != old->rx_buffer)) babel_iface_update_buffers(ifa); @@ -1615,7 +1709,15 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf) WALK_LIST(iface, iface_list) { - if (! (iface->flags & IF_UP)) + if (!(iface->flags & IF_UP)) + continue; + + /* Ignore non-multicast ifaces */ + if (!(iface->flags & IF_MULTICAST)) + continue; + + /* Ignore ifaces without link-local address */ + if (!iface->llv6) continue; struct babel_iface *ifa = babel_find_iface(p, iface); @@ -1648,18 +1750,17 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf) static void babel_dump_source(struct babel_source *s) { - debug("Source router_id %lR seqno %d metric %d expires %d\n", - s->router_id, s->seqno, s->metric, s->expires ? s->expires-now : 0); + debug("Source router_id %lR seqno %d metric %d expires %t\n", + s->router_id, s->seqno, s->metric, + s->expires ? s->expires - current_time() : 0); } static void babel_dump_route(struct babel_route *r) { - debug("Route neigh %I if %s seqno %d metric %d/%d router_id %lR expires %d\n", - r->neigh ? r->neigh->addr : IPA_NONE, - r->neigh ? r->neigh->ifa->ifname : "(none)", - r->seqno, r->advert_metric, r->metric, - r->router_id, r->expires ? r->expires-now : 0); + debug("Route neigh %I if %s seqno %d metric %d/%d router_id %lR expires %t\n", + r->neigh->addr, r->neigh->ifa->ifname, r->seqno, r->advert_metric, r->metric, + r->router_id, r->expires ? r->expires - current_time() : 0); } static void @@ -1668,7 +1769,7 @@ babel_dump_entry(struct babel_entry *e) struct babel_source *s; struct babel_route *r; - debug("Babel: Entry %I/%d:\n", e->n.prefix, e->n.pxlen); + debug("Babel: Entry %N:\n", e->n.addr); WALK_LIST(s,e->sources) { debug(" "); babel_dump_source(s); } @@ -1676,8 +1777,7 @@ babel_dump_entry(struct babel_entry *e) WALK_LIST(r,e->routes) { debug(" "); - if (r == e->selected_out) debug("*"); - if (r == e->selected_in) debug("+"); + if (r == e->selected) debug("*"); babel_dump_route(r); } } @@ -1685,10 +1785,10 @@ babel_dump_entry(struct babel_entry *e) static void babel_dump_neighbor(struct babel_neighbor *n) { - debug("Neighbor %I txcost %d hello_map %x next seqno %d expires %d/%d\n", + debug("Neighbor %I txcost %d hello_map %x next seqno %d expires %t/%t\n", n->addr, n->txcost, n->hello_map, n->next_hello_seqno, - n->hello_expiry ? n->hello_expiry - now : 0, - n->ihu_expiry ? n->ihu_expiry - now : 0); + n->hello_expiry ? n->hello_expiry - current_time() : 0, + n->ihu_expiry ? n->ihu_expiry - current_time() : 0); } static void @@ -1696,9 +1796,10 @@ babel_dump_iface(struct babel_iface *ifa) { struct babel_neighbor *n; - debug("Babel: Interface %s addr %I rxcost %d type %d hello seqno %d intervals %d %d\n", + debug("Babel: Interface %s addr %I rxcost %d type %d hello seqno %d intervals %t %t", ifa->ifname, ifa->addr, ifa->cf->rxcost, ifa->cf->type, ifa->hello_seqno, ifa->cf->hello_interval, ifa->cf->update_interval); + debug(" next hop v4 %I next hop v6 %I\n", ifa->next_hop_ip4, ifa->next_hop_ip6); WALK_LIST(n, ifa->neigh_list) { debug(" "); babel_dump_neighbor(n); } @@ -1715,9 +1816,14 @@ babel_dump(struct proto *P) WALK_LIST(ifa, p->interfaces) babel_dump_iface(ifa); - FIB_WALK(&p->rtable, n) + FIB_WALK(&p->ip4_rtable, struct babel_entry, e) + { + babel_dump_entry(e); + } + FIB_WALK_END; + FIB_WALK(&p->ip6_rtable, struct babel_entry, e) { - babel_dump_entry((struct babel_entry *) n); + babel_dump_entry(e); } FIB_WALK_END; } @@ -1765,8 +1871,9 @@ babel_show_interfaces(struct proto *P, char *iff) } cli_msg(-1023, "%s:", p->p.name); - cli_msg(-1023, "%-10s %-6s %7s %6s %6s", - "Interface", "State", "RX cost", "Nbrs", "Timer"); + cli_msg(-1023, "%-10s %-6s %7s %6s %7s %-15s %s", + "Interface", "State", "RX cost", "Nbrs", "Timer", + "Next hop (v4)", "Next hop (v6)"); WALK_LIST(ifa, p->interfaces) { @@ -1777,9 +1884,11 @@ babel_show_interfaces(struct proto *P, char *iff) WALK_LIST(nbr, ifa->neigh_list) nbrs++; - int timer = MIN(ifa->next_regular, ifa->next_hello) - now; - cli_msg(-1023, "%-10s %-6s %7u %6u %6u", - ifa->iface->name, (ifa->up ? "Up" : "Down"), ifa->cf->rxcost, nbrs, MAX(timer, 0)); + btime timer = MIN(ifa->next_regular, ifa->next_hello) - current_time(); + cli_msg(-1023, "%-10s %-6s %7u %6u %7t %-15I %I", + ifa->iface->name, (ifa->up ? "Up" : "Down"), + ifa->cf->rxcost, nbrs, MAX(timer, 0), + ifa->next_hop_ip4, ifa->next_hop_ip6); } cli_msg(0, ""); @@ -1801,8 +1910,8 @@ babel_show_neighbors(struct proto *P, char *iff) } cli_msg(-1024, "%s:", p->p.name); - cli_msg(-1024, "%-25s %-10s %6s %6s %10s", - "IP address", "Interface", "Metric", "Routes", "Next hello"); + cli_msg(-1024, "%-25s %-10s %6s %6s %6s %7s", + "IP address", "Interface", "Metric", "Routes", "Hellos", "Expires"); WALK_LIST(ifa, p->interfaces) { @@ -1815,25 +1924,51 @@ babel_show_neighbors(struct proto *P, char *iff) WALK_LIST(r, n->routes) rts++; - int timer = n->hello_expiry - now; - cli_msg(-1024, "%-25I %-10s %6u %6u %10u", - n->addr, ifa->iface->name, n->txcost, rts, MAX(timer, 0)); + uint hellos = u32_popcount(n->hello_map); + btime timer = n->hello_expiry - current_time(); + cli_msg(-1024, "%-25I %-10s %6u %6u %6u %7t", + n->addr, ifa->iface->name, n->cost, rts, hellos, MAX(timer, 0)); } } cli_msg(0, ""); } +static void +babel_show_entries_(struct babel_proto *p, struct fib *rtable) +{ + int width = babel_sadr_enabled(p) ? -54 : -24; + + FIB_WALK(rtable, struct babel_entry, e) + { + struct babel_route *r = NULL; + uint rts = 0, srcs = 0; + node *n; + + WALK_LIST(n, e->routes) + rts++; + + WALK_LIST(n, e->sources) + srcs++; + + if (e->valid) + cli_msg(-1025, "%-*N %-23lR %6u %5u %7u %7u", width, + e->n.addr, e->router_id, e->metric, e->seqno, rts, srcs); + else if (r = e->selected) + cli_msg(-1025, "%-*N %-23lR %6u %5u %7u %7u", width, + e->n.addr, r->router_id, r->metric, r->seqno, rts, srcs); + else + cli_msg(-1025, "%-*N %-23s %6s %5s %7u %7u", width, + e->n.addr, "<none>", "-", "-", rts, srcs); + } + FIB_WALK_END; +} + void babel_show_entries(struct proto *P) { struct babel_proto *p = (void *) P; - struct babel_entry *e = NULL; - struct babel_source *s = NULL; - struct babel_route *r = NULL; - - char ipbuf[STD_ADDRESS_P_LENGTH+5]; - char ridbuf[ROUTER_ID_64_LENGTH+1]; + int width = babel_sadr_enabled(p) ? -54 : -24; if (p->p.proto_state != PS_UP) { @@ -1843,37 +1978,54 @@ babel_show_entries(struct proto *P) } cli_msg(-1025, "%s:", p->p.name); - cli_msg(-1025, "%-29s %-23s %6s %5s %7s %7s", - "Prefix", "Router ID", "Metric", "Seqno", "Expires", "Sources"); + cli_msg(-1025, "%-*s %-23s %6s %5s %7s %7s", width, + "Prefix", "Router ID", "Metric", "Seqno", "Routes", "Sources"); - FIB_WALK(&p->rtable, n) - { - e = (struct babel_entry *) n; - r = e->selected_in ? e->selected_in : e->selected_out; - - int srcs = 0; - WALK_LIST(s, e->sources) - srcs++; + babel_show_entries_(p, &p->ip4_rtable); + babel_show_entries_(p, &p->ip6_rtable); - bsprintf(ipbuf, "%I/%u", e->n.prefix, e->n.pxlen); + cli_msg(0, ""); +} - if (r) - { - if (r->router_id == p->router_id) - bsprintf(ridbuf, "%s", "<self>"); - else - bsprintf(ridbuf, "%lR", r->router_id); +static void +babel_show_routes_(struct babel_proto *p, struct fib *rtable) +{ + int width = babel_sadr_enabled(p) ? -54 : -24; - int time = r->expires ? r->expires - now : 0; - cli_msg(-1025, "%-29s %-23s %6u %5u %7u %7u", - ipbuf, ridbuf, r->metric, r->seqno, MAX(time, 0), srcs); - } - else + FIB_WALK(rtable, struct babel_entry, e) + { + struct babel_route *r; + WALK_LIST(r, e->routes) { - cli_msg(-1025, "%-29s %-44s %7u", ipbuf, "<pending>", srcs); + char c = (r == e->selected) ? '*' : (r->feasible ? '+' : ' '); + btime time = r->expires ? r->expires - current_time() : 0; + cli_msg(-1025, "%-*N %-25I %-10s %5u %c %5u %7t", width, + e->n.addr, r->next_hop, r->neigh->ifa->ifname, + r->metric, c, r->seqno, MAX(time, 0)); } } FIB_WALK_END; +} + +void +babel_show_routes(struct proto *P) +{ + struct babel_proto *p = (void *) P; + int width = babel_sadr_enabled(p) ? -54 : -24; + + if (p->p.proto_state != PS_UP) + { + cli_msg(-1025, "%s: is not up", p->p.name); + cli_msg(0, ""); + return; + } + + cli_msg(-1025, "%s:", p->p.name); + cli_msg(-1025, "%-*s %-25s %-9s %6s F %5s %7s", width, + "Prefix", "Nexthop", "Interface", "Metric", "Seqno", "Expires"); + + babel_show_routes_(p, &p->ip4_rtable); + babel_show_routes_(p, &p->ip6_rtable); cli_msg(0, ""); } @@ -1897,15 +2049,14 @@ babel_timer(timer *t) struct babel_proto *p = t->data; babel_expire_routes(p); - babel_expire_seqno_requests(p); babel_expire_neighbors(p); } static inline void babel_kick_timer(struct babel_proto *p) { - if (p->timer->expires > (now + 1)) - tm_start(p->timer, 1); + if (p->timer->expires > (current_time() + 100 MS)) + tm_start(p->timer, 100 MS); } @@ -1936,12 +2087,18 @@ babel_prepare_attrs(struct linpool *pool, ea_list *next, uint metric, u64 router static int -babel_import_control(struct proto *P, struct rte **rt, struct ea_list **attrs, struct linpool *pool) +babel_import_control(struct proto *P, struct rte **new, struct ea_list **attrs, struct linpool *pool) { struct babel_proto *p = (void *) P; + rte *rt = *new; + + /* Reject our own unreachable routes */ + if ((rt->attrs->dest == RTD_UNREACHABLE) && (rt->attrs->src->proto == P)) + return -1; + /* Prepare attributes with initial values */ - if ((*rt)->attrs->source != RTS_BABEL) + if (rt->attrs->source != RTS_BABEL) *attrs = babel_prepare_attrs(pool, NULL, 0, p->router_id); return 0; @@ -1964,70 +2121,55 @@ babel_store_tmp_attrs(struct rte *rt, struct ea_list *attrs) * so store it into our data structures. */ static void -babel_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, +babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, struct rte *new, struct rte *old UNUSED, struct ea_list *attrs UNUSED) { struct babel_proto *p = (void *) P; struct babel_entry *e; - struct babel_route *r; if (new) { /* Update */ - e = babel_get_entry(p, net->n.prefix, net->n.pxlen); + uint internal = (new->attrs->src->proto == P); + uint rt_seqno = internal ? new->u.babel.seqno : p->update_seqno; + uint rt_metric = ea_get_int(attrs, EA_BABEL_METRIC, 0); + uint rt_router_id = internal ? new->u.babel.router_id : p->router_id; - if (new->attrs->src->proto != P) + if (rt_metric > BABEL_INFINITY) { - r = babel_get_route(e, NULL); - r->seqno = p->update_seqno; - r->router_id = p->router_id; - r->metric = 0; /* FIXME: should be selectable */ + log(L_WARN "%s: Invalid babel_metric value %u for route %N", + p->p.name, rt_metric, net->n.addr); + rt_metric = BABEL_INFINITY; } - else - r = e->selected_in; - if (r != e->selected_out) + e = babel_get_entry(p, net->n.addr); + + /* Activate triggered updates */ + if ((e->valid != BABEL_ENTRY_VALID) || + (e->router_id != rt_router_id)) { - e->selected_out = r; - e->updated = now; babel_trigger_update(p); + e->updated = current_time(); } + + e->valid = BABEL_ENTRY_VALID; + e->seqno = rt_seqno; + e->metric = rt_metric; + e->router_id = rt_router_id; } else { /* Withdraw */ - e = babel_find_entry(p, net->n.prefix, net->n.pxlen); - if (!e || !e->selected_out) + e = babel_find_entry(p, net->n.addr); + + if (!e || e->valid != BABEL_ENTRY_VALID) return; - if (OUR_ROUTE(e->selected_out)) - { - /* - * We originate this route, so set its metric to infinity and set an - * expiry time. This causes a retraction to be sent, and later the route - * to be flushed once the hold time has passed. - */ - e->selected_out->metric = BABEL_INFINITY; - e->selected_out->expires = now + BABEL_HOLD_TIME; - e->updated = now; - babel_trigger_update(p); - } - else - { - /* - * This is a route originating from someone else that was lost; presumably - * because an export filter was updated to filter it. This means we can't - * set the metric to infinity (it would be overridden on subsequent - * updates from the peer originating the route), so just clear the - * exported route. - * - * This causes peers to expire the route after a while (like if we just - * shut down), but it's the best we can do in these circumstances; and - * since export filters presumably aren't updated that often this is - * acceptable. - */ - e->selected_out = NULL; - } + e->valid = BABEL_ENTRY_STALE; + e->metric = BABEL_INFINITY; + + babel_trigger_update(p); + e->updated = current_time(); } } @@ -2040,17 +2182,39 @@ babel_rte_better(struct rte *new, struct rte *old) static int babel_rte_same(struct rte *new, struct rte *old) { - return ((new->u.babel.router_id == old->u.babel.router_id) && - (new->u.babel.metric == old->u.babel.metric)); + return ((new->u.babel.seqno == old->u.babel.seqno) && + (new->u.babel.metric == old->u.babel.metric) && + (new->u.babel.router_id == old->u.babel.router_id)); } +static void +babel_postconfig(struct proto_config *CF) +{ + struct babel_config *cf = (void *) CF; + struct channel_config *ip4, *ip6, *ip6_sadr; + + ip4 = proto_cf_find_channel(CF, NET_IP4); + ip6 = proto_cf_find_channel(CF, NET_IP6); + ip6_sadr = proto_cf_find_channel(CF, NET_IP6_SADR); + + if (ip6 && ip6_sadr) + cf_error("Both ipv6 and ipv6-sadr channels"); + + cf->ip4_channel = ip4; + cf->ip6_channel = ip6 ?: ip6_sadr; +} + static struct proto * -babel_init(struct proto_config *cfg) +babel_init(struct proto_config *CF) { - struct proto *P = proto_new(cfg, sizeof(struct babel_proto)); + struct proto *P = proto_new(CF); + struct babel_proto *p = (void *) P; + struct babel_config *cf = (void *) CF; + + proto_configure_channel(P, &p->ip4_channel, cf->ip4_channel); + proto_configure_channel(P, &p->ip6_channel, cf->ip6_channel); - P->accept_ra_types = RA_OPTIMAL; P->if_notify = babel_if_notify; P->rt_notify = babel_rt_notify; P->import_control = babel_import_control; @@ -2067,11 +2231,16 @@ babel_start(struct proto *P) { struct babel_proto *p = (void *) P; struct babel_config *cf = (void *) P->cf; + u8 ip6_type = cf->ip6_channel ? cf->ip6_channel->net_type : NET_IP6; + + fib_init(&p->ip4_rtable, P->pool, NET_IP4, sizeof(struct babel_entry), + OFFSETOF(struct babel_entry, n), 0, babel_init_entry); + fib_init(&p->ip6_rtable, P->pool, ip6_type, sizeof(struct babel_entry), + OFFSETOF(struct babel_entry, n), 0, babel_init_entry); - fib_init(&p->rtable, P->pool, sizeof(struct babel_entry), 0, babel_init_entry); init_list(&p->interfaces); - p->timer = tm_new_set(P->pool, babel_timer, p, 0, 1); - tm_start(p->timer, 2); + p->timer = tm_new_init(P->pool, babel_timer, p, 1 S, 0); + tm_start(p->timer, 1 S); p->update_seqno = 1; p->router_id = proto_get_router_id(&cf->c); @@ -2079,7 +2248,6 @@ babel_start(struct proto *P) p->source_slab = sl_new(P->pool, sizeof(struct babel_source)); p->msg_slab = sl_new(P->pool, sizeof(struct babel_msg_node)); p->seqno_slab = sl_new(P->pool, sizeof(struct babel_seqno_request)); - init_list(&p->seqno_cache); p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 }; @@ -2111,14 +2279,22 @@ babel_shutdown(struct proto *P) } static int -babel_reconfigure(struct proto *P, struct proto_config *c) +babel_reconfigure(struct proto *P, struct proto_config *CF) { struct babel_proto *p = (void *) P; - struct babel_config *new = (void *) c; + struct babel_config *new = (void *) CF; + u8 ip6_type = new->ip6_channel ? new->ip6_channel->net_type : NET_IP6; TRACE(D_EVENTS, "Reconfiguring"); - p->p.cf = c; + if (p->ip6_rtable.addr_type != ip6_type) + return 0; + + if (!proto_configure_channel(P, &p->ip4_channel, new->ip4_channel) || + !proto_configure_channel(P, &p->ip6_channel, new->ip6_channel)) + return 0; + + p->p.cf = CF; babel_reconfigure_ifaces(p, new); babel_trigger_update(p); @@ -2133,7 +2309,10 @@ struct protocol proto_babel = { .template = "babel%d", .attr_class = EAP_BABEL, .preference = DEF_PREF_BABEL, + .channel_mask = NB_IP | NB_IP6_SADR, + .proto_size = sizeof(struct babel_proto), .config_size = sizeof(struct babel_config), + .postconfig = babel_postconfig, .init = babel_init, .dump = babel_dump, .start = babel_start, diff --git a/proto/babel/babel.h b/proto/babel/babel.h index 6a95d82f..b194ce30 100644 --- a/proto/babel/babel.h +++ b/proto/babel/babel.h @@ -2,6 +2,8 @@ * BIRD -- The Babel protocol * * Copyright (c) 2015--2016 Toke Hoiland-Jorgensen + * (c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2016--2017 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. * @@ -23,10 +25,6 @@ #include "lib/string.h" #include "lib/timer.h" -#ifndef IPV6 -#error "The Babel protocol only speaks IPv6" -#endif - #define EA_BABEL_METRIC EA_CODE(EAP_BABEL, 0) #define EA_BABEL_ROUTER_ID EA_CODE(EAP_BABEL, 1) @@ -36,27 +34,30 @@ #define BABEL_INFINITY 0xFFFF -#define BABEL_HELLO_INTERVAL_WIRED 4 /* Default hello intervals in seconds */ -#define BABEL_HELLO_INTERVAL_WIRELESS 4 +#define BABEL_HELLO_INTERVAL_WIRED (4 S_) /* Default hello intervals in seconds */ +#define BABEL_HELLO_INTERVAL_WIRELESS (4 S_) +#define BABEL_HELLO_LIMIT 12 #define BABEL_UPDATE_INTERVAL_FACTOR 4 #define BABEL_IHU_INTERVAL_FACTOR 3 -#define BABEL_IHU_EXPIRY_FACTOR(X) ((X)*3/2) /* 1.5 */ -#define BABEL_HELLO_EXPIRY_FACTOR(X) ((X)*3/2) /* 1.5 */ -#define BABEL_ROUTE_EXPIRY_FACTOR(X) ((X)*7/2) /* 3.5 */ -#define BABEL_ROUTE_REFRESH_INTERVAL 2 /* Seconds before route expiry to send route request */ -#define BABEL_HOLD_TIME 10 /* Expiry time for our own routes */ +#define BABEL_HOLD_TIME_FACTOR 4 /* How long we keep unreachable route relative to update interval */ +#define BABEL_IHU_EXPIRY_FACTOR(X) ((btime)(X)*7/2) /* 3.5 */ +#define BABEL_HELLO_EXPIRY_FACTOR(X) ((btime)(X)*3/2) /* 1.5 */ +#define BABEL_ROUTE_EXPIRY_FACTOR(X) ((btime)(X)*7/2) /* 3.5 */ +#define BABEL_ROUTE_REFRESH_FACTOR(X) ((btime)(X)*5/2) /* 2.5 */ +#define BABEL_SEQNO_REQUEST_RETRY 4 +#define BABEL_SEQNO_REQUEST_EXPIRY (2 S_) +#define BABEL_GARBAGE_INTERVAL (300 S_) #define BABEL_RXCOST_WIRED 96 #define BABEL_RXCOST_WIRELESS 256 #define BABEL_INITIAL_HOP_COUNT 255 -#define BABEL_MAX_SEND_INTERVAL 5 -#define BABEL_TIME_UNITS 100 /* On-wire times are counted in centiseconds */ -#define BABEL_SEQNO_REQUEST_EXPIRY 60 -#define BABEL_GARBAGE_INTERVAL 300 +#define BABEL_MAX_SEND_INTERVAL 5 /* Unused ? */ /* Max interval that will not overflow when carried as 16-bit centiseconds */ -#define BABEL_MAX_INTERVAL (0xFFFF/BABEL_TIME_UNITS) +#define BABEL_TIME_UNITS 10000 /* On-wire times are counted in centiseconds */ +#define BABEL_MIN_INTERVAL (0x0001 * BABEL_TIME_UNITS) +#define BABEL_MAX_INTERVAL (0xFFFF * BABEL_TIME_UNITS) -#define BABEL_OVERHEAD (SIZE_OF_IP_HEADER+UDP_HEADER_LENGTH) +#define BABEL_OVERHEAD (IP6_HEADER_LENGTH+UDP_HEADER_LENGTH) #define BABEL_MIN_MTU (512 + BABEL_OVERHEAD) @@ -82,6 +83,14 @@ enum babel_tlv_type { BABEL_TLV_MAX }; +enum babel_subtlv_type { + BABEL_SUBTLV_PAD1 = 0, + BABEL_SUBTLV_PADN = 1, + + /* Mandatory subtlvs */ + BABEL_SUBTLV_SOURCE_PREFIX = 128, +}; + enum babel_iface_type { /* In practice, UNDEF and WIRED give equivalent behaviour */ BABEL_IFACE_TYPE_UNDEF = 0, @@ -101,8 +110,11 @@ enum babel_ae_type { struct babel_config { struct proto_config c; + list iface_list; /* List of iface configs (struct babel_iface_config) */ + uint hold_time; /* Time to hold stale entries and unreachable routes */ - list iface_list; /* Patterns configured -- keep it first; see babel_reconfigure why */ + struct channel_config *ip4_channel; + struct channel_config *ip6_channel; }; struct babel_iface_config { @@ -110,33 +122,41 @@ struct babel_iface_config { u16 rxcost; u8 type; + u8 limit; /* Minimum number of Hellos to keep link up */ u8 check_link; uint port; - u16 hello_interval; - u16 ihu_interval; - u16 update_interval; + uint hello_interval; /* Hello interval, in us */ + uint ihu_interval; /* IHU interval, in us */ + uint update_interval; /* Update interval, in us */ u16 rx_buffer; /* RX buffer size, 0 for MTU */ u16 tx_length; /* TX packet length limit (including headers), 0 for MTU */ int tx_tos; int tx_priority; + + ip_addr next_hop_ip4; + ip_addr next_hop_ip6; }; struct babel_proto { struct proto p; timer *timer; - struct fib rtable; + struct fib ip4_rtable; + struct fib ip6_rtable; + + struct channel *ip4_channel; + struct channel *ip6_channel; + list interfaces; /* Interfaces we really know about (struct babel_iface) */ u64 router_id; u16 update_seqno; /* To be increased on request */ + u8 update_seqno_inc; /* Request for update_seqno increase */ u8 triggered; /* For triggering global updates */ slab *route_slab; slab *source_slab; slab *msg_slab; - slab *seqno_slab; - list seqno_cache; /* Seqno requests in the cache (struct babel_seqno_request) */ struct tbf log_pkt_tbf; /* TBF for packet messages */ }; @@ -155,16 +175,18 @@ struct babel_iface { char *ifname; sock *sk; ip_addr addr; + ip_addr next_hop_ip4; + ip_addr next_hop_ip6; int tx_length; list neigh_list; /* List of neighbors seen on this iface (struct babel_neighbor) */ list msg_queue; u16 hello_seqno; /* To be increased on each hello */ - bird_clock_t next_hello; - bird_clock_t next_regular; - bird_clock_t next_triggered; - bird_clock_t want_triggered; + btime next_hello; + btime next_regular; + btime next_triggered; + btime want_triggered; timer *timer; event *send_event; @@ -175,13 +197,18 @@ struct babel_neighbor { struct babel_iface *ifa; ip_addr addr; - u16 txcost; + uint uc; /* Reference counter for seqno requests */ + u16 rxcost; /* Sent in last IHU */ + u16 txcost; /* Received in last IHU */ + u16 cost; /* Computed neighbor cost */ + s8 ihu_cnt; /* IHU countdown, 0 to send it */ u8 hello_cnt; u16 hello_map; u16 next_hello_seqno; + uint last_hello_int; /* expiry timers */ - bird_clock_t hello_expiry; - bird_clock_t ihu_expiry; + btime hello_expiry; + btime ihu_expiry; list routes; /* Routes this neighbour has sent us (struct babel_route) */ }; @@ -192,7 +219,7 @@ struct babel_source { u64 router_id; u16 seqno; u16 metric; - bird_clock_t expires; + btime expires; }; struct babel_route { @@ -201,38 +228,47 @@ struct babel_route { struct babel_entry *e; struct babel_neighbor *neigh; + u8 feasible; u16 seqno; - u16 advert_metric; u16 metric; + u16 advert_metric; u64 router_id; ip_addr next_hop; - bird_clock_t refresh_time; - bird_clock_t expires; - u16 expiry_interval; + btime refresh_time; + btime expires; }; -struct babel_entry { - struct fib_node n; - struct babel_proto *proto; - struct babel_route *selected_in; - struct babel_route *selected_out; - - bird_clock_t updated; - - list sources; /* Source entries for this prefix (struct babel_source). */ - list routes; /* Routes for this prefix (struct babel_route) */ -}; - -/* Stores forwarded seqno requests for duplicate suppression. */ struct babel_seqno_request { node n; - ip_addr prefix; - u8 plen; u64 router_id; u16 seqno; - bird_clock_t updated; + u8 hop_count; + u8 count; + btime expires; + struct babel_neighbor *nbr; +}; + +struct babel_entry { + struct babel_route *selected; + + list routes; /* Routes for this prefix (struct babel_route) */ + list sources; /* Source entries for this prefix (struct babel_source). */ + list requests; + + u8 valid; /* Entry validity state (BABEL_ENTRY_*) */ + u8 unreachable; /* Unreachable route is announced */ + u16 seqno; /* Outgoing seqno */ + u16 metric; /* Outgoing metric */ + u64 router_id; /* Outgoing router ID */ + btime updated; /* Last change of outgoing rte, for triggered updates */ + + struct fib_node n; }; +#define BABEL_ENTRY_DUMMY 0 /* No outgoing route */ +#define BABEL_ENTRY_VALID 1 /* Valid outgoing route */ +#define BABEL_ENTRY_STALE 2 /* Stale outgoing route, waiting for GC */ + /* * Internal TLV messages @@ -241,7 +277,7 @@ struct babel_seqno_request { struct babel_msg_ack_req { u8 type; u16 nonce; - u16 interval; + uint interval; ip_addr sender; }; @@ -253,7 +289,7 @@ struct babel_msg_ack { struct babel_msg_hello { u8 type; u16 seqno; - u16 interval; + uint interval; ip_addr sender; }; @@ -261,7 +297,7 @@ struct babel_msg_ihu { u8 type; u8 ae; u16 rxcost; - u16 interval; + uint interval; ip_addr addr; ip_addr sender; }; @@ -269,12 +305,14 @@ struct babel_msg_ihu { struct babel_msg_update { u8 type; u8 wildcard; - u8 plen; - u16 interval; + uint interval; u16 seqno; u16 metric; - ip_addr prefix; u64 router_id; + union { + net_addr net; + net_addr_ip6_sadr net_sadr; + }; ip_addr next_hop; ip_addr sender; }; @@ -282,17 +320,21 @@ struct babel_msg_update { struct babel_msg_route_request { u8 type; u8 full; - u8 plen; - ip_addr prefix; + union { + net_addr net; + net_addr_ip6_sadr net_sadr; + }; }; struct babel_msg_seqno_request { u8 type; - u8 plen; - u16 seqno; u8 hop_count; + u16 seqno; u64 router_id; - ip_addr prefix; + union { + net_addr net; + net_addr_ip6_sadr net_sadr; + }; ip_addr sender; }; @@ -312,6 +354,8 @@ struct babel_msg_node { union babel_msg msg; }; +static inline int babel_sadr_enabled(struct babel_proto *p) +{ return p->ip6_rtable.addr_type == NET_IP6_SADR; } /* babel.c */ void babel_handle_ack_req(union babel_msg *msg, struct babel_iface *ifa); @@ -326,6 +370,7 @@ void babel_handle_seqno_request(union babel_msg *msg, struct babel_iface *ifa); void babel_show_interfaces(struct proto *P, char *iff); void babel_show_neighbors(struct proto *P, char *iff); void babel_show_entries(struct proto *P); +void babel_show_routes(struct proto *P); /* packets.c */ void babel_enqueue(union babel_msg *msg, struct babel_iface *ifa); diff --git a/proto/babel/config.Y b/proto/babel/config.Y index dcc0847e..7adfb4bb 100644 --- a/proto/babel/config.Y +++ b/proto/babel/config.Y @@ -2,6 +2,8 @@ * BIRD -- Babel Configuration * * Copyright (c) 2015-2016 Toke Hoiland-Jorgensen + * (c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2016--2017 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -22,7 +24,8 @@ CF_DECLS CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK, - BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS, ENTRIES) + NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS, + ENTRIES) CF_GRAMMAR @@ -32,10 +35,12 @@ babel_proto_start: proto_start BABEL { this_proto = proto_config_new(&proto_babel, $1); init_list(&BABEL_CFG->iface_list); + BABEL_CFG->hold_time = 1 S_; }; babel_proto_item: proto_item + | proto_channel | INTERFACE babel_iface ; @@ -55,6 +60,7 @@ babel_iface_start: init_list(&this_ipatt->ipn_list); BABEL_IFACE->port = BABEL_PORT; BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; + BABEL_IFACE->limit = BABEL_HELLO_LIMIT; BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL; BABEL_IFACE->tx_priority = sk_priority_control; BABEL_IFACE->check_link = 1; @@ -82,21 +88,26 @@ babel_iface_finish: if (!BABEL_IFACE->update_interval) BABEL_IFACE->update_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR, BABEL_MAX_INTERVAL); BABEL_IFACE->ihu_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_IHU_INTERVAL_FACTOR, BABEL_MAX_INTERVAL); + + BABEL_CFG->hold_time = MAX_(BABEL_CFG->hold_time, BABEL_IFACE->update_interval*BABEL_HOLD_TIME_FACTOR); }; babel_iface_item: | PORT expr { BABEL_IFACE->port = $2; if (($2<1) || ($2>65535)) cf_error("Invalid port number"); } | RXCOST expr { BABEL_IFACE->rxcost = $2; if (($2<1) || ($2>65535)) cf_error("Invalid rxcost"); } - | HELLO INTERVAL expr { BABEL_IFACE->hello_interval = $3; if (($3<1) || ($3>BABEL_MAX_INTERVAL)) cf_error("Invalid hello interval"); } - | UPDATE INTERVAL expr { BABEL_IFACE->update_interval = $3; if (($3<1) || ($3>BABEL_MAX_INTERVAL)) cf_error("Invalid update interval"); } + | LIMIT expr { BABEL_IFACE->limit = $2; if (($2<1) || ($2>16)) cf_error("Limit must be in range 1-16"); } | TYPE WIRED { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; } | TYPE WIRELESS { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRELESS; } + | HELLO INTERVAL expr_us { BABEL_IFACE->hello_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Hello interval must be in range 10 ms - 655 s"); } + | UPDATE INTERVAL expr_us { BABEL_IFACE->update_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Update interval must be in range 10 ms - 655 s"); } | RX BUFFER expr { BABEL_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX buffer must be in range 256-65535"); } | TX LENGTH expr { BABEL_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); } | TX tos { BABEL_IFACE->tx_tos = $2; } | TX PRIORITY expr { BABEL_IFACE->tx_priority = $3; } | CHECK LINK bool { BABEL_IFACE->check_link = $3; } + | NEXT HOP IPV4 ipa { BABEL_IFACE->next_hop_ip4 = $4; if (!ipa_is_ip4($4)) cf_error("Must be an IPv4 address"); } + | NEXT HOP IPV6 ipa { BABEL_IFACE->next_hop_ip6 = $4; if (!ipa_is_ip6($4)) cf_error("Must be an IPv6 address"); } ; babel_iface_opts: @@ -126,6 +137,9 @@ CF_CLI(SHOW BABEL NEIGHBORS, optsym opttext, [<name>] [\"<interface>\"], [[Show CF_CLI(SHOW BABEL ENTRIES, optsym opttext, [<name>], [[Show information about Babel prefix entries]]) { babel_show_entries(proto_get_named($4, &proto_babel)); }; +CF_CLI(SHOW BABEL ROUTES, optsym opttext, [<name>], [[Show information about Babel route entries]]) +{ babel_show_routes(proto_get_named($4, &proto_babel)); }; + CF_CODE CF_END diff --git a/proto/babel/packets.c b/proto/babel/packets.c index 768858d0..59678678 100644 --- a/proto/babel/packets.c +++ b/proto/babel/packets.c @@ -2,6 +2,8 @@ * BIRD -- The Babel protocol * * Copyright (c) 2015--2016 Toke Hoiland-Jorgensen + * (c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2016--2017 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. * @@ -103,6 +105,13 @@ struct babel_tlv_seqno_request { u8 addr[0]; } PACKED; +struct babel_subtlv_source_prefix { + u8 type; + u8 length; + u8 plen; + u8 addr[0]; +} PACKED; + /* Hello flags */ #define BABEL_HF_UNICAST 0x8000 @@ -116,13 +125,16 @@ struct babel_parse_state { struct babel_proto *proto; struct babel_iface *ifa; ip_addr saddr; - ip_addr next_hop; + ip_addr next_hop_ip4; + ip_addr next_hop_ip6; u64 router_id; /* Router ID used in subsequent updates */ u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */ u8 def_ip4_prefix[4]; /* Implicit IPv4 prefix in network order */ u8 router_id_seen; /* router_id field is valid */ u8 def_ip6_prefix_seen; /* def_ip6_prefix is valid */ u8 def_ip4_prefix_seen; /* def_ip4_prefix is valid */ + u8 current_tlv_endpos; /* End of self-terminating TLVs (offset from start) */ + u8 sadr_enabled; }; enum parse_result { @@ -134,7 +146,10 @@ enum parse_result { struct babel_write_state { u64 router_id; u8 router_id_seen; -// ip_addr next_hop; + ip_addr next_hop_ip4; + ip_addr next_hop_ip6; + u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */ + u8 def_ip6_pxlen; }; @@ -150,32 +165,58 @@ struct babel_write_state { #define TLV_HDR(tlv,t,l) ({ tlv->type = t; tlv->length = l - sizeof(struct babel_tlv); }) #define TLV_HDR0(tlv,t) TLV_HDR(tlv, t, tlv_data[t].min_length) -static inline u16 +#define NET_SIZE(n) BYTES(net_pxlen(n)) + +static inline uint +bytes_equal(u8 *b1, u8 *b2, uint maxlen) +{ + uint i; + for (i = 0; (i < maxlen) && (*b1 == *b2); i++, b1++, b2++) + ; + return i; +} + +static inline uint get_time16(const void *p) { - u16 v = get_u16(p) / BABEL_TIME_UNITS; - return MAX(1, v); + uint v = get_u16(p) * BABEL_TIME_UNITS; + return MAX(BABEL_MIN_INTERVAL, v); } static inline void -put_time16(void *p, u16 v) +put_time16(void *p, uint v) { - put_u16(p, v * BABEL_TIME_UNITS); + put_u16(p, v / BABEL_TIME_UNITS); } -static inline ip6_addr -get_ip6_px(const void *p, uint plen) +static inline void +read_ip4_px(net_addr *n, const void *p, uint plen) +{ + ip4_addr addr = {0}; + memcpy(&addr, p, BYTES(plen)); + net_fill_ip4(n, ip4_ntoh(addr), plen); +} + +static inline void +put_ip4_px(void *p, net_addr *n) +{ + ip4_addr addr = ip4_hton(net4_prefix(n)); + memcpy(p, &addr, NET_SIZE(n)); +} + +static inline void +read_ip6_px(net_addr *n, const void *p, uint plen) { ip6_addr addr = IPA_NONE; memcpy(&addr, p, BYTES(plen)); - return ip6_ntoh(addr); + net_fill_ip6(n, ip6_ntoh(addr), plen); } static inline void -put_ip6_px(void *p, ip6_addr addr, uint plen) +put_ip6_px(void *p, net_addr *n) { - addr = ip6_hton(addr); - memcpy(p, &addr, BYTES(plen)); + ip6_addr addr = ip6_hton(net6_prefix(n)); + memcpy(p, &addr, NET_SIZE(n)); } static inline ip6_addr @@ -204,6 +245,7 @@ static int babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *msg, stru static int babel_read_update(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); static int babel_read_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); static int babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_source_prefix(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); static uint babel_write_ack(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len); static uint babel_write_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len); @@ -211,6 +253,7 @@ static uint babel_write_ihu(struct babel_tlv *hdr, union babel_msg *msg, struct static uint babel_write_update(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len); static uint babel_write_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len); static uint babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len); +static int babel_write_source_prefix(struct babel_tlv *hdr, net_addr *net, uint max_len); struct babel_tlv_data { u8 min_length; @@ -358,14 +401,33 @@ babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m, if (msg->ae >= BABEL_AE_MAX) return PARSE_IGNORE; - // We handle link-local IPs. In every other case, the addr field will be 0 but - // validation will succeed. The handler takes care of these cases. - if (msg->ae == BABEL_AE_IP6_LL) + /* + * We only actually read link-local IPs. In every other case, the addr field + * will be 0 but validation will succeed. The handler takes care of these + * cases. We handle them here anyway because we need the length for parsing + * subtlvs. + */ + switch (msg->ae) { + case BABEL_AE_IP4: + if (TLV_OPT_LENGTH(tlv) < 4) + return PARSE_ERROR; + state->current_tlv_endpos += 4; + break; + + case BABEL_AE_IP6: + if (TLV_OPT_LENGTH(tlv) < 16) + return PARSE_ERROR; + state->current_tlv_endpos += 16; + break; + + case BABEL_AE_IP6_LL: if (TLV_OPT_LENGTH(tlv) < 8) return PARSE_ERROR; msg->addr = ipa_from_ip6(get_ip6_ll(&tlv->addr)); + state->current_tlv_endpos += 8; + break; } return PARSE_SUCCESS; @@ -438,21 +500,27 @@ babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED, return PARSE_ERROR; case BABEL_AE_IP4: - /* TODO */ + if (TLV_OPT_LENGTH(tlv) < sizeof(ip4_addr)) + return PARSE_ERROR; + + state->next_hop_ip4 = ipa_from_ip4(get_ip4(&tlv->addr)); + state->current_tlv_endpos += sizeof(ip4_addr); return PARSE_IGNORE; case BABEL_AE_IP6: if (TLV_OPT_LENGTH(tlv) < sizeof(ip6_addr)) return PARSE_ERROR; - state->next_hop = ipa_from_ip6(get_ip6(&tlv->addr)); + state->next_hop_ip6 = ipa_from_ip6(get_ip6(&tlv->addr)); + state->current_tlv_endpos += sizeof(ip6_addr); return PARSE_IGNORE; case BABEL_AE_IP6_LL: if (TLV_OPT_LENGTH(tlv) < 8) return PARSE_ERROR; - state->next_hop = ipa_from_ip6(get_ip6_ll(&tlv->addr)); + state->next_hop_ip6 = ipa_from_ip6(get_ip6_ll(&tlv->addr)); + state->current_tlv_endpos += 8; return PARSE_IGNORE; default: @@ -462,6 +530,51 @@ babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED, return PARSE_IGNORE; } +/* This is called directly from babel_write_update() and returns -1 if a next + hop should be written but there is not enough space. */ +static int +babel_write_next_hop(struct babel_tlv *hdr, ip_addr addr, + struct babel_write_state *state, uint max_len) +{ + struct babel_tlv_next_hop *tlv = (void *) hdr; + + if (ipa_zero(addr)) + { + /* Should not happen */ + return 0; + } + else if (ipa_is_ip4(addr) && !ipa_equal(addr, state->next_hop_ip4)) + { + uint len = sizeof(struct babel_tlv_next_hop) + sizeof(ip4_addr); + if (len > max_len) + return -1; + + TLV_HDR(tlv, BABEL_TLV_NEXT_HOP, len); + + tlv->ae = BABEL_AE_IP4; + put_ip4(&tlv->addr, ipa_to_ip4(addr)); + state->next_hop_ip4 = addr; + + return len; + } + else if (ipa_is_ip6(addr) && !ipa_equal(addr, state->next_hop_ip6)) + { + uint len = sizeof(struct babel_tlv_next_hop) + sizeof(ip6_addr); + if (len > max_len) + return -1; + + TLV_HDR(tlv, BABEL_TLV_NEXT_HOP, len); + + tlv->ae = BABEL_AE_IP6; + put_ip6(&tlv->addr, ipa_to_ip6(addr)); + state->next_hop_ip6 = addr; + + return len; + } + + return 0; +} + static int babel_read_update(struct babel_tlv *hdr, union babel_msg *m, struct babel_parse_state *state) @@ -487,15 +600,43 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m, if (tlv->plen > 0) return PARSE_ERROR; + if (msg->metric != 65535) + return PARSE_ERROR; + msg->wildcard = 1; break; case BABEL_AE_IP4: - /* TODO */ - return PARSE_IGNORE; + if (tlv->plen > IP4_MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + /* Cannot omit data if there is no saved prefix */ + if (tlv->omitted && !state->def_ip4_prefix_seen) + return PARSE_ERROR; + + /* Update must have next hop, unless it is retraction */ + if (ipa_zero(state->next_hop_ip4) && (msg->metric != BABEL_INFINITY)) + return PARSE_ERROR; + + /* Merge saved prefix and received prefix parts */ + memcpy(buf, state->def_ip4_prefix, tlv->omitted); + memcpy(buf + tlv->omitted, tlv->addr, len); + + ip4_addr prefix4 = get_ip4(buf); + net_fill_ip4(&msg->net, prefix4, tlv->plen); + + if (tlv->flags & BABEL_UF_DEF_PREFIX) + { + put_ip4(state->def_ip4_prefix, prefix4); + state->def_ip4_prefix_seen = 1; + } + + msg->next_hop = state->next_hop_ip4; + + break; case BABEL_AE_IP6: - if (tlv->plen > MAX_PREFIX_LENGTH) + if (tlv->plen > IP6_MAX_PREFIX_LENGTH) return PARSE_ERROR; /* Cannot omit data if there is no saved prefix */ @@ -506,20 +647,26 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m, memcpy(buf, state->def_ip6_prefix, tlv->omitted); memcpy(buf + tlv->omitted, tlv->addr, len); - msg->plen = tlv->plen; - msg->prefix = ipa_from_ip6(get_ip6(buf)); + ip6_addr prefix6 = get_ip6(buf); + net_fill_ip6(&msg->net, prefix6, tlv->plen); + + if (state->sadr_enabled) + net_make_ip6_sadr(&msg->net); if (tlv->flags & BABEL_UF_DEF_PREFIX) { - put_ip6(state->def_ip6_prefix, msg->prefix); + put_ip6(state->def_ip6_prefix, prefix6); state->def_ip6_prefix_seen = 1; } if (tlv->flags & BABEL_UF_ROUTER_ID) { - state->router_id = ((u64) _I2(msg->prefix)) << 32 | _I3(msg->prefix); + state->router_id = ((u64) _I2(prefix6)) << 32 | _I3(prefix6); state->router_id_seen = 1; } + + msg->next_hop = state->next_hop_ip6; + break; case BABEL_AE_IP6_LL: @@ -538,8 +685,8 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m, } msg->router_id = state->router_id; - msg->next_hop = state->next_hop; msg->sender = state->saddr; + state->current_tlv_endpos += len; return PARSE_SUCCESS; } @@ -548,7 +695,6 @@ static uint babel_write_update(struct babel_tlv *hdr, union babel_msg *m, struct babel_write_state *state, uint max_len) { - struct babel_tlv_update *tlv = (void *) hdr; struct babel_msg_update *msg = &m->update; uint len0 = 0; @@ -557,16 +703,35 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m, * both of them. There is enough space for the Router-ID TLV, because * sizeof(struct babel_tlv_router_id) == sizeof(struct babel_tlv_update). * - * Router ID is not used for retractions, so do not us it in such case. + * Router ID is not used for retractions, so do not use it in such case. */ if ((msg->metric < BABEL_INFINITY) && (!state->router_id_seen || (msg->router_id != state->router_id))) { len0 = babel_write_router_id(hdr, msg->router_id, state, max_len); - tlv = (struct babel_tlv_update *) NEXT_TLV(tlv); + hdr = NEXT_TLV(hdr); + } + + /* + * We also may add Next Hop TLV for regular updates. It may fail for not + * enough space or it may be unnecessary as the next hop is the same as the + * last one already announced. So we handle all three cases. + */ + if (msg->metric < BABEL_INFINITY) + { + int l = babel_write_next_hop(hdr, msg->next_hop, state, max_len - len0); + if (l < 0) + return 0; + + if (l) + { + len0 += l; + hdr = NEXT_TLV(hdr); + } } - uint len = sizeof(struct babel_tlv_update) + BYTES(msg->plen); + struct babel_tlv_update *tlv = (void *) hdr; + uint len = sizeof(struct babel_tlv_update) + NET_SIZE(&msg->net); if (len0 + len > max_len) return 0; @@ -579,23 +744,60 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m, tlv->ae = BABEL_AE_WILDCARD; tlv->plen = 0; } + else if (msg->net.type == NET_IP4) + { + tlv->ae = BABEL_AE_IP4; + tlv->plen = net4_pxlen(&msg->net); + put_ip4_px(tlv->addr, &msg->net); + } else { tlv->ae = BABEL_AE_IP6; - tlv->plen = msg->plen; - put_ip6_px(tlv->addr, msg->prefix, msg->plen); + tlv->plen = net6_pxlen(&msg->net); + + /* Address compression - omit initial matching bytes */ + u8 buf[16], omit; + put_ip6(buf, net6_prefix(&msg->net)); + omit = bytes_equal(buf, state->def_ip6_prefix, + MIN(tlv->plen, state->def_ip6_pxlen) / 8); + + if (omit > 0) + { + memcpy(tlv->addr, buf + omit, NET_SIZE(&msg->net) - omit); + + tlv->omitted = omit; + tlv->length -= omit; + len -= omit; + } + else + { + put_ip6_px(tlv->addr, &msg->net); + tlv->flags |= BABEL_UF_DEF_PREFIX; + + put_ip6(state->def_ip6_prefix, net6_prefix(&msg->net)); + state->def_ip6_pxlen = tlv->plen; + } } put_time16(&tlv->interval, msg->interval); put_u16(&tlv->seqno, msg->seqno); put_u16(&tlv->metric, msg->metric); + if (msg->net.type == NET_IP6_SADR) + { + int l = babel_write_source_prefix(hdr, &msg->net, max_len - (len0 + len)); + if (l < 0) + return 0; + + len += l; + } + return len0 + len; } static int babel_read_route_request(struct babel_tlv *hdr, union babel_msg *m, - struct babel_parse_state *state UNUSED) + struct babel_parse_state *state) { struct babel_tlv_route_request *tlv = (void *) hdr; struct babel_msg_route_request *msg = &m->route_request; @@ -613,18 +815,29 @@ babel_read_route_request(struct babel_tlv *hdr, union babel_msg *m, return PARSE_SUCCESS; case BABEL_AE_IP4: - /* TODO */ - return PARSE_IGNORE; + if (tlv->plen > IP4_MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen)) + return PARSE_ERROR; + + read_ip4_px(&msg->net, tlv->addr, tlv->plen); + state->current_tlv_endpos += BYTES(tlv->plen); + return PARSE_SUCCESS; case BABEL_AE_IP6: - if (tlv->plen > MAX_PREFIX_LENGTH) + if (tlv->plen > IP6_MAX_PREFIX_LENGTH) return PARSE_ERROR; if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen)) return PARSE_ERROR; - msg->plen = tlv->plen; - msg->prefix = get_ip6_px(tlv->addr, tlv->plen); + read_ip6_px(&msg->net, tlv->addr, tlv->plen); + state->current_tlv_endpos += BYTES(tlv->plen); + + if (state->sadr_enabled) + net_make_ip6_sadr(&msg->net); + return PARSE_SUCCESS; case BABEL_AE_IP6_LL: @@ -644,7 +857,7 @@ babel_write_route_request(struct babel_tlv *hdr, union babel_msg *m, struct babel_tlv_route_request *tlv = (void *) hdr; struct babel_msg_route_request *msg = &m->route_request; - uint len = sizeof(struct babel_tlv_route_request) + BYTES(msg->plen); + uint len = sizeof(struct babel_tlv_route_request) + NET_SIZE(&msg->net); if (len > max_len) return 0; @@ -656,11 +869,26 @@ babel_write_route_request(struct babel_tlv *hdr, union babel_msg *m, tlv->ae = BABEL_AE_WILDCARD; tlv->plen = 0; } + else if (msg->net.type == NET_IP4) + { + tlv->ae = BABEL_AE_IP4; + tlv->plen = net4_pxlen(&msg->net); + put_ip4_px(tlv->addr, &msg->net); + } else { tlv->ae = BABEL_AE_IP6; - tlv->plen = msg->plen; - put_ip6_px(tlv->addr, msg->prefix, msg->plen); + tlv->plen = net6_pxlen(&msg->net); + put_ip6_px(tlv->addr, &msg->net); + } + + if (msg->net.type == NET_IP6_SADR) + { + int l = babel_write_source_prefix(hdr, &msg->net, max_len - len); + if (l < 0) + return 0; + + len += l; } return len; @@ -688,18 +916,29 @@ babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *m, return PARSE_ERROR; case BABEL_AE_IP4: - /* TODO */ - return PARSE_IGNORE; + if (tlv->plen > IP4_MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen)) + return PARSE_ERROR; + + read_ip4_px(&msg->net, tlv->addr, tlv->plen); + state->current_tlv_endpos += BYTES(tlv->plen); + return PARSE_SUCCESS; case BABEL_AE_IP6: - if (tlv->plen > MAX_PREFIX_LENGTH) + if (tlv->plen > IP6_MAX_PREFIX_LENGTH) return PARSE_ERROR; if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen)) return PARSE_ERROR; - msg->plen = tlv->plen; - msg->prefix = get_ip6_px(tlv->addr, tlv->plen); + read_ip6_px(&msg->net, tlv->addr, tlv->plen); + state->current_tlv_endpos += BYTES(tlv->plen); + + if (state->sadr_enabled) + net_make_ip6_sadr(&msg->net); + return PARSE_SUCCESS; case BABEL_AE_IP6_LL: @@ -719,22 +958,178 @@ babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *m, struct babel_tlv_seqno_request *tlv = (void *) hdr; struct babel_msg_seqno_request *msg = &m->seqno_request; - uint len = sizeof(struct babel_tlv_seqno_request) + BYTES(msg->plen); + uint len = sizeof(struct babel_tlv_seqno_request) + NET_SIZE(&msg->net); if (len > max_len) return 0; TLV_HDR(tlv, BABEL_TLV_SEQNO_REQUEST, len); - tlv->ae = BABEL_AE_IP6; - tlv->plen = msg->plen; + + if (msg->net.type == NET_IP4) + { + tlv->ae = BABEL_AE_IP4; + tlv->plen = net4_pxlen(&msg->net); + put_ip4_px(tlv->addr, &msg->net); + } + else + { + tlv->ae = BABEL_AE_IP6; + tlv->plen = net6_pxlen(&msg->net); + put_ip6_px(tlv->addr, &msg->net); + } + put_u16(&tlv->seqno, msg->seqno); tlv->hop_count = msg->hop_count; put_u64(&tlv->router_id, msg->router_id); - put_ip6_px(tlv->addr, msg->prefix, msg->plen); + + if (msg->net.type == NET_IP6_SADR) + { + int l = babel_write_source_prefix(hdr, &msg->net, max_len - len); + if (l < 0) + return 0; + + len += l; + } + + return len; +} + +static int +babel_read_source_prefix(struct babel_tlv *hdr, union babel_msg *msg, + struct babel_parse_state *state UNUSED) +{ + struct babel_subtlv_source_prefix *tlv = (void *) hdr; + net_addr_ip6_sadr *net; + + /* + * We would like to skip the sub-TLV if SADR is not enabled, but we do not + * know AF of the enclosing TLV yet. We will do that later. + */ + + /* Check internal consistency */ + if ((tlv->length < 1) || + (tlv->plen > IP6_MAX_PREFIX_LENGTH) || + (tlv->length < (1 + BYTES(tlv->plen)))) + return PARSE_ERROR; + + /* Plen MUST NOT be 0 */ + if (tlv->plen == 0) + return PARSE_ERROR; + + switch(msg->type) + { + case BABEL_TLV_UPDATE: + /* Wildcard updates with source prefix MUST be silently ignored */ + if (msg->update.wildcard) + return PARSE_IGNORE; + + net = (void *) &msg->update.net; + break; + + case BABEL_TLV_ROUTE_REQUEST: + /* Wildcard requests with source addresses MUST be silently ignored */ + if (msg->route_request.full) + return PARSE_IGNORE; + + net = (void *) &msg->route_request.net; + break; + + case BABEL_TLV_SEQNO_REQUEST: + net = (void *) &msg->seqno_request.net; + break; + + default: + return PARSE_ERROR; + } + + /* If SADR is active, the net has appropriate type */ + if (net->type != NET_IP6_SADR) + return PARSE_IGNORE; + + /* Duplicate Source Prefix sub-TLV; SHOULD ignore whole TLV */ + if (net->src_pxlen > 0) + return PARSE_IGNORE; + + net_addr_ip6 src; + read_ip6_px((void *) &src, tlv->addr, tlv->plen); + net->src_prefix = src.prefix; + net->src_pxlen = src.pxlen; + + return PARSE_SUCCESS; +} + +static int +babel_write_source_prefix(struct babel_tlv *hdr, net_addr *n, uint max_len) +{ + struct babel_subtlv_source_prefix *tlv = (void *) NEXT_TLV(hdr); + net_addr_ip6_sadr *net = (void *) n; + + /* Do not use this sub-TLV for default prefix */ + if (net->src_pxlen == 0) + return 0; + + uint len = sizeof(*tlv) + BYTES(net->src_pxlen); + + if (len > max_len) + return -1; + + TLV_HDR(tlv, BABEL_SUBTLV_SOURCE_PREFIX, len); + hdr->length += len; + + net_addr_ip6 src = NET_ADDR_IP6(net->src_prefix, net->src_pxlen); + tlv->plen = src.pxlen; + put_ip6_px(tlv->addr, (void *) &src); return len; } + +static inline int +babel_read_subtlvs(struct babel_tlv *hdr, + union babel_msg *msg, + struct babel_parse_state *state) +{ + struct babel_tlv *tlv; + byte *pos, *end = (byte *) hdr + TLV_LENGTH(hdr); + int res; + + for (tlv = (void *) hdr + state->current_tlv_endpos; + (byte *) tlv < end; + tlv = NEXT_TLV(tlv)) + { + /* Ugly special case */ + if (tlv->type == BABEL_TLV_PAD1) + continue; + + /* The end of the common TLV header */ + pos = (byte *)tlv + sizeof(struct babel_tlv); + if ((pos > end) || (pos + tlv->length > end)) + return PARSE_ERROR; + + /* + * The subtlv type space is non-contiguous (due to the mandatory bit), so + * use a switch for dispatch instead of the mapping array we use for TLVs + */ + switch (tlv->type) + { + case BABEL_SUBTLV_SOURCE_PREFIX: + res = babel_read_source_prefix(tlv, msg, state); + if (res != PARSE_SUCCESS) + return res; + break; + + case BABEL_SUBTLV_PADN: + default: + /* Unknown mandatory subtlv; PARSE_IGNORE ignores the whole TLV */ + if (tlv->type >= 128) + return PARSE_IGNORE; + break; + } + } + + return PARSE_SUCCESS; +} + static inline int babel_read_tlv(struct babel_tlv *hdr, union babel_msg *msg, @@ -748,8 +1143,14 @@ babel_read_tlv(struct babel_tlv *hdr, if (TLV_LENGTH(hdr) < tlv_data[hdr->type].min_length) return PARSE_ERROR; + state->current_tlv_endpos = tlv_data[hdr->type].min_length; memset(msg, 0, sizeof(*msg)); - return tlv_data[hdr->type].read_tlv(hdr, msg, state); + + int res = tlv_data[hdr->type].read_tlv(hdr, msg, state); + if (res != PARSE_SUCCESS) + return res; + + return babel_read_subtlvs(hdr, msg, state); } static uint @@ -804,7 +1205,7 @@ static uint babel_write_queue(struct babel_iface *ifa, list *queue) { struct babel_proto *p = ifa->proto; - struct babel_write_state state = {}; + struct babel_write_state state = { .next_hop_ip6 = ifa->addr }; if (EMPTY_LIST(*queue)) return 0; @@ -940,10 +1341,11 @@ babel_process_packet(struct babel_pkt_header *pkt, int len, byte *end = (byte *)pkt + plen; struct babel_parse_state state = { - .proto = p, - .ifa = ifa, - .saddr = saddr, - .next_hop = saddr, + .proto = p, + .ifa = ifa, + .saddr = saddr, + .next_hop_ip6 = saddr, + .sadr_enabled = babel_sadr_enabled(p), }; if ((pkt->magic != BABEL_MAGIC) || (pkt->version != BABEL_VERSION)) @@ -1052,7 +1454,7 @@ babel_rx_hook(sock *sk, uint len) sk->iface->name, sk->faddr, sk->laddr); /* Silently ignore my own packets */ - if (ipa_equal(ifa->iface->addr->ip, sk->faddr)) + if (ipa_equal(sk->faddr, sk->saddr)) return 1; if (!ipa_is_link_local(sk->faddr)) @@ -1087,6 +1489,7 @@ babel_open_socket(struct babel_iface *ifa) sk->sport = ifa->cf->port; sk->dport = ifa->cf->port; sk->iface = ifa->iface; + sk->saddr = ifa->addr; sk->vrf = p->p.vrf; sk->rx_hook = babel_rx_hook; diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile index c28cedec..402122fc 100644 --- a/proto/bfd/Makefile +++ b/proto/bfd/Makefile @@ -1,5 +1,6 @@ -source=bfd.c packets.c io.c -root-rel=../../ -dir-name=proto/bfd +src := bfd.c io.c packets.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) -include ../../Rules +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 79135fae..67ec2270 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -64,16 +64,15 @@ * ready, the protocol just creates a BFD request like any other protocol. * * The protocol uses a new generic event loop (structure &birdloop) from |io.c|, - * which supports sockets, timers and events like the main loop. Timers - * (structure &timer2) are new microsecond based timers, while sockets and - * events are the same. A birdloop is associated with a thread (field @thread) - * in which event hooks are executed. Most functions for setting event sources - * (like sk_start() or tm2_start()) must be called from the context of that - * thread. Birdloop allows to temporarily acquire the context of that thread for - * the main thread by calling birdloop_enter() and then birdloop_leave(), which - * also ensures mutual exclusion with all event hooks. Note that resources - * associated with a birdloop (like timers) should be attached to the - * independent resource pool, detached from the main resource tree. + * which supports sockets, timers and events like the main loop. A birdloop is + * associated with a thread (field @thread) in which event hooks are executed. + * Most functions for setting event sources (like sk_start() or tm_start()) must + * be called from the context of that thread. Birdloop allows to temporarily + * acquire the context of that thread for the main thread by calling + * birdloop_enter() and then birdloop_leave(), which also ensures mutual + * exclusion with all event hooks. Note that resources associated with a + * birdloop (like timers) should be attached to the independent resource pool, + * detached from the main resource tree. * * There are two kinds of interaction between the BFD core (running in the BFD * thread) and the rest of BFD (running in the main thread). The first kind are @@ -112,7 +111,7 @@ #define HASH_IP_KEY(n) n->addr #define HASH_IP_NEXT(n) n->next_ip #define HASH_IP_EQ(a,b) ipa_equal(a,b) -#define HASH_IP_FN(k) ipa_hash32(k) +#define HASH_IP_FN(k) ipa_hash(k) static list bfd_proto_list; static list bfd_wait_list; @@ -145,6 +144,7 @@ bfd_session_update_state(struct bfd_session *s, uint state, uint diag) bfd_lock_sessions(p); s->loc_state = state; s->loc_diag = diag; + s->last_state_change = current_time(); notify = !NODE_VALID(&s->n); if (notify) @@ -176,7 +176,7 @@ bfd_session_update_tx_interval(struct bfd_session *s) return; /* Set timer relative to last tx_timer event */ - tm2_set(s->tx_timer, s->last_tx + tx_int_l); + tm_set(s->tx_timer, s->last_tx + tx_int_l); } static void @@ -190,7 +190,7 @@ bfd_session_update_detection_time(struct bfd_session *s, int kick) if (!s->last_rx) return; - tm2_set(s->hold_timer, s->last_rx + timeout); + tm_set(s->hold_timer, s->last_rx + timeout); } static void @@ -211,16 +211,16 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset) goto stop; /* So TX timer should run */ - if (reset || !tm2_active(s->tx_timer)) + if (reset || !tm_active(s->tx_timer)) { s->last_tx = 0; - tm2_start(s->tx_timer, 0); + tm_start(s->tx_timer, 0); } return; stop: - tm2_stop(s->tx_timer); + tm_stop(s->tx_timer); s->last_tx = 0; } @@ -379,7 +379,7 @@ bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr) } static void -bfd_tx_timer_hook(timer2 *t) +bfd_tx_timer_hook(timer *t) { struct bfd_session *s = t->data; @@ -388,7 +388,7 @@ bfd_tx_timer_hook(timer2 *t) } static void -bfd_hold_timer_hook(timer2 *t) +bfd_hold_timer_hook(timer *t) { bfd_session_timeout(t->data); } @@ -432,13 +432,13 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * s->passive = ifa->cf->passive; s->tx_csn = random_u32(); - s->tx_timer = tm2_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0); - s->hold_timer = tm2_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0); + s->tx_timer = tm_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0); + s->hold_timer = tm_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0); bfd_session_update_tx_interval(s); bfd_session_control_tx_timer(s, 1); init_list(&s->request_list); - s->last_state_change = now; + s->last_state_change = current_time(); TRACE(D_EVENTS, "Session to %I added", s->addr); @@ -879,9 +879,6 @@ bfd_notify_hook(sock *sk, uint len UNUSED) diag = s->loc_diag; bfd_unlock_sessions(p); - /* FIXME: convert to btime and move to bfd_session_update_state() */ - s->last_state_change = now; - s->notify_running = 1; WALK_LIST_DELSAFE(n, nn, s->request_list) bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag); @@ -954,7 +951,7 @@ bfd_init_all(void) static struct proto * bfd_init(struct proto_config *c) { - struct proto *p = proto_new(c, sizeof(struct bfd_proto)); + struct proto *p = proto_new(c); p->neigh_notify = bfd_neigh_notify; @@ -983,8 +980,10 @@ bfd_start(struct proto *P) add_tail(&bfd_proto_list, &p->bfd_node); birdloop_enter(p->loop); - p->rx_1 = bfd_open_rx_sk(p, 0); - p->rx_m = bfd_open_rx_sk(p, 1); + p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); + p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4); + p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6); + p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); birdloop_leave(p->loop); bfd_take_requests(p); @@ -1078,7 +1077,7 @@ bfd_show_sessions(struct proto *P) byte tbuf[TM_DATETIME_BUFFER_SIZE]; struct bfd_proto *p = (struct bfd_proto *) P; uint state, diag UNUSED; - u32 tx_int, timeout; + btime tx_int, timeout; const char *ifname; if (p->p.proto_state != PS_UP) @@ -1099,15 +1098,14 @@ bfd_show_sessions(struct proto *P) state = s->loc_state; diag = s->loc_diag; ifname = (s->ifa && s->ifa->iface) ? s->ifa->iface->name : "---"; - tx_int = s->last_tx ? (MAX(s->des_min_tx_int, s->rem_min_rx_int) TO_MS) : 0; - timeout = (MAX(s->req_min_rx_int, s->rem_min_tx_int) TO_MS) * s->rem_detect_mult; + tx_int = s->last_tx ? MAX(s->des_min_tx_int, s->rem_min_rx_int) : 0; + timeout = (btime) MAX(s->req_min_rx_int, s->rem_min_tx_int) * s->rem_detect_mult; state = (state < 4) ? state : 0; - tm_format_datetime(tbuf, &config->tf_proto, s->last_state_change); + tm_format_time(tbuf, &config->tf_proto, s->last_state_change); - cli_msg(-1020, "%-25I %-10s %-10s %-10s %3u.%03u %3u.%03u", - s->addr, ifname, bfd_state_names[state], tbuf, - tx_int / 1000, tx_int % 1000, timeout / 1000, timeout % 1000); + cli_msg(-1020, "%-25I %-10s %-10s %-10s %7t %7t", + s->addr, ifname, bfd_state_names[state], tbuf, tx_int, timeout); } HASH_WALK_END; @@ -1118,6 +1116,7 @@ bfd_show_sessions(struct proto *P) struct protocol proto_bfd = { .name = "BFD", .template = "bfd%d", + .proto_size = sizeof(struct bfd_proto), .config_size = sizeof(struct bfd_config), .init = bfd_init, .start = bfd_start, diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index 46e09879..bc4fe969 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -87,8 +87,10 @@ struct bfd_proto sock *notify_ws; list notify_list; - sock *rx_1; - sock *rx_m; + sock *rx4_1; + sock *rx6_1; + sock *rx4_m; + sock *rx6_m; list iface_list; }; @@ -138,11 +140,11 @@ struct bfd_session btime last_tx; /* Time of last sent periodic control packet */ btime last_rx; /* Time of last received valid control packet */ - timer2 *tx_timer; /* Periodic control packet timer */ - timer2 *hold_timer; /* Timer for session down detection time */ + timer *tx_timer; /* Periodic control packet timer */ + timer *hold_timer; /* Timer for session down detection time */ list request_list; /* List of client requests (struct bfd_request) */ - bird_clock_t last_state_change; /* Time of last state change */ + btime last_state_change; /* Time of last state change */ u8 notify_running; /* 1 if notify hooks are running */ u8 rx_csn_known; /* Received crypto sequence number is known */ @@ -201,7 +203,7 @@ void bfd_show_sessions(struct proto *P); /* packets.c */ void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final); -sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop); +sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int inet_version); sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa); diff --git a/proto/bfd/io.c b/proto/bfd/io.c index 8f4f5007..1cd9365a 100644 --- a/proto/bfd/io.c +++ b/proto/bfd/io.c @@ -18,10 +18,10 @@ #include "proto/bfd/io.h" #include "lib/buffer.h" -#include "lib/heap.h" #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" +#include "lib/timer.h" #include "lib/socket.h" @@ -31,16 +31,12 @@ struct birdloop pthread_t thread; pthread_mutex_t mutex; - btime last_time; - btime real_time; - u8 use_monotonic_clock; - u8 stop_called; u8 poll_active; u8 wakeup_masked; int wakeup_fds[2]; - BUFFER(timer2 *) timers; + struct timeloop time; list event_list; list sock_list; uint sock_num; @@ -57,6 +53,7 @@ struct birdloop */ static pthread_key_t current_loop_key; +extern pthread_key_t current_time_key; static inline struct birdloop * birdloop_current(void) @@ -68,6 +65,7 @@ static inline void birdloop_set_current(struct birdloop *loop) { pthread_setspecific(current_loop_key, loop); + pthread_setspecific(current_time_key, loop ? &loop->time : &main_timeloop); } static inline void @@ -78,98 +76,6 @@ birdloop_init_current(void) /* - * Time clock - */ - -static void times_update_alt(struct birdloop *loop); - -static void -times_init(struct birdloop *loop) -{ - struct timespec ts; - int rv; - - rv = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rv < 0) - { - log(L_WARN "Monotonic clock is missing"); - - loop->use_monotonic_clock = 0; - loop->last_time = 0; - loop->real_time = 0; - times_update_alt(loop); - return; - } - - if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40))) - log(L_WARN "Monotonic clock is crazy"); - - loop->use_monotonic_clock = 1; - loop->last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000); - loop->real_time = 0; -} - -static void -times_update_pri(struct birdloop *loop) -{ - struct timespec ts; - int rv; - - rv = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rv < 0) - die("clock_gettime: %m"); - - btime new_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000); - - if (new_time < loop->last_time) - log(L_ERR "Monotonic clock is broken"); - - loop->last_time = new_time; - loop->real_time = 0; -} - -static void -times_update_alt(struct birdloop *loop) -{ - struct timeval tv; - int rv; - - rv = gettimeofday(&tv, NULL); - if (rv < 0) - die("gettimeofday: %m"); - - btime new_time = ((s64) tv.tv_sec S) + tv.tv_usec; - btime delta = new_time - loop->real_time; - - if ((delta < 0) || (delta > (60 S))) - { - if (loop->real_time) - log(L_WARN "Time jump, delta %d us", (int) delta); - - delta = 100 MS; - } - - loop->last_time += delta; - loop->real_time = new_time; -} - -static void -times_update(struct birdloop *loop) -{ - if (loop->use_monotonic_clock) - times_update_pri(loop); - else - times_update_alt(loop); -} - -btime -current_time(void) -{ - return birdloop_current()->last_time; -} - - -/* * Wakeup code for birdloop */ @@ -238,7 +144,7 @@ wakeup_drain(struct birdloop *loop) } static inline void -wakeup_do_kick(struct birdloop *loop) +wakeup_do_kick(struct birdloop *loop) { pipe_kick(loop->wakeup_fds[1]); } @@ -252,6 +158,16 @@ wakeup_kick(struct birdloop *loop) loop->wakeup_masked = 2; } +/* For notifications from outside */ +void +wakeup_kick_current(void) +{ + struct birdloop *loop = birdloop_current(); + + if (loop && loop->poll_active) + wakeup_kick(loop); +} + /* * Events @@ -272,7 +188,7 @@ events_init(struct birdloop *loop) static void events_fire(struct birdloop *loop) { - times_update(loop); + times_update(&loop->time); ev_run_list(&loop->event_list); } @@ -292,154 +208,6 @@ ev2_schedule(event *e) /* - * Timers - */ - -#define TIMER_LESS(a,b) ((a)->expires < (b)->expires) -#define TIMER_SWAP(heap,a,b,t) (t = heap[a], heap[a] = heap[b], heap[b] = t, \ - heap[a]->index = (a), heap[b]->index = (b)) - -static inline uint timers_count(struct birdloop *loop) -{ return loop->timers.used - 1; } - -static inline timer2 *timers_first(struct birdloop *loop) -{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; } - - -static void -tm2_free(resource *r) -{ - timer2 *t = (timer2 *) r; - - tm2_stop(t); -} - -static void -tm2_dump(resource *r) -{ - timer2 *t = (timer2 *) r; - - debug("(code %p, data %p, ", t->hook, t->data); - if (t->randomize) - debug("rand %d, ", t->randomize); - if (t->recurrent) - debug("recur %d, ", t->recurrent); - if (t->expires) - debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS); - else - debug("inactive)\n"); -} - - -static struct resclass tm2_class = { - "Timer", - sizeof(timer2), - tm2_free, - tm2_dump, - NULL, - NULL -}; - -timer2 * -tm2_new(pool *p) -{ - timer2 *t = ralloc(p, &tm2_class); - t->index = -1; - return t; -} - -void -tm2_set(timer2 *t, btime when) -{ - struct birdloop *loop = birdloop_current(); - uint tc = timers_count(loop); - - if (!t->expires) - { - t->index = ++tc; - t->expires = when; - BUFFER_PUSH(loop->timers) = t; - HEAP_INSERT(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP); - } - else if (t->expires < when) - { - t->expires = when; - HEAP_INCREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index); - } - else if (t->expires > when) - { - t->expires = when; - HEAP_DECREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index); - } - - if (loop->poll_active && (t->index == 1)) - wakeup_kick(loop); -} - -void -tm2_start(timer2 *t, btime after) -{ - tm2_set(t, current_time() + MAX(after, 0)); -} - -void -tm2_stop(timer2 *t) -{ - if (!t->expires) - return; - - struct birdloop *loop = birdloop_current(); - uint tc = timers_count(loop); - - HEAP_DELETE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index); - BUFFER_POP(loop->timers); - - t->index = -1; - t->expires = 0; -} - -static void -timers_init(struct birdloop *loop) -{ - BUFFER_INIT(loop->timers, loop->pool, 4); - BUFFER_PUSH(loop->timers) = NULL; -} - -static void -timers_fire(struct birdloop *loop) -{ - btime base_time; - timer2 *t; - - times_update(loop); - base_time = loop->last_time; - - while (t = timers_first(loop)) - { - if (t->expires > base_time) - return; - - if (t->recurrent) - { - btime when = t->expires + t->recurrent; - - if (when <= loop->last_time) - when = loop->last_time + t->recurrent; - - if (t->randomize) - when += random() % (t->randomize + 1); - - tm2_set(t, when); - } - else - tm2_stop(t); - - t->hook(t); - } -} - - -/* * Sockets */ @@ -530,7 +298,7 @@ sockets_prepare(struct birdloop *loop) struct pollfd *pfd = loop->poll_fd.data; sock **psk = loop->poll_sk.data; - int i = 0; + uint i = 0; node *n; WALK_LIST(n, loop->sock_list) @@ -586,7 +354,7 @@ sockets_fire(struct birdloop *loop) sock **psk = loop->poll_sk.data; int poll_num = loop->poll_fd.used - 1; - times_update(loop); + times_update(&loop->time); /* Last fd is internal wakeup fd */ if (pfd[poll_num].revents & POLLIN) @@ -634,11 +402,10 @@ birdloop_new(void) loop->pool = p; pthread_mutex_init(&loop->mutex, NULL); - times_init(loop); wakeup_init(loop); events_init(loop); - timers_init(loop); + timers_init(&loop->time, p); sockets_init(loop); return loop; @@ -710,7 +477,7 @@ static void * birdloop_main(void *arg) { struct birdloop *loop = arg; - timer2 *t; + timer *t; int rv, timeout; birdloop_set_current(loop); @@ -719,13 +486,13 @@ birdloop_main(void *arg) while (1) { events_fire(loop); - timers_fire(loop); + timers_fire(&loop->time); - times_update(loop); + times_update(&loop->time); if (events_waiting(loop)) timeout = 0; - else if (t = timers_first(loop)) - timeout = (tm2_remains(t) TO_MS) + 1; + else if (t = timers_first(&loop->time)) + timeout = (tm_remains(t) TO_MS) + 1; else timeout = -1; @@ -756,7 +523,7 @@ birdloop_main(void *arg) if (rv) sockets_fire(loop); - timers_fire(loop); + timers_fire(&loop->time); } loop->stop_called = 0; diff --git a/proto/bfd/io.h b/proto/bfd/io.h index 641ee054..ec706e9a 100644 --- a/proto/bfd/io.h +++ b/proto/bfd/io.h @@ -11,80 +11,15 @@ #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" +#include "lib/timer.h" #include "lib/socket.h" -// #include "lib/timer.h" -typedef struct timer2 -{ - resource r; - void (*hook)(struct timer2 *); - void *data; - - btime expires; /* 0=inactive */ - uint randomize; /* Amount of randomization */ - uint recurrent; /* Timer recurrence */ - - int index; -} timer2; - - -btime current_time(void); - void ev2_schedule(event *e); - -timer2 *tm2_new(pool *p); -void tm2_set(timer2 *t, btime when); -void tm2_start(timer2 *t, btime after); -void tm2_stop(timer2 *t); - -static inline int -tm2_active(timer2 *t) -{ - return t->expires != 0; -} - -static inline btime -tm2_remains(timer2 *t) -{ - btime now = current_time(); - return (t->expires > now) ? (t->expires - now) : 0; -} - -static inline timer2 * -tm2_new_init(pool *p, void (*hook)(struct timer2 *), void *data, uint rec, uint rand) -{ - timer2 *t = tm2_new(p); - t->hook = hook; - t->data = data; - t->recurrent = rec; - t->randomize = rand; - return t; -} - -static inline void -tm2_set_max(timer2 *t, btime when) -{ - if (when > t->expires) - tm2_set(t, when); -} - -/* -static inline void -tm2_start_max(timer2 *t, btime after) -{ - btime rem = tm2_remains(t); - tm2_start(t, MAX_(rem, after)); -} -*/ - - void sk_start(sock *s); void sk_stop(sock *s); - - struct birdloop *birdloop_new(void); void birdloop_start(struct birdloop *loop); void birdloop_stop(struct birdloop *loop); diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index 129db72f..b76efda6 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -248,7 +248,7 @@ bfd_check_authentication(struct bfd_proto *p, struct bfd_session *s, struct bfd_ /* BFD CSNs are in 32-bit circular number space */ u32 csn = ntohl(auth->csn); if (s->rx_csn_known && - (((csn - s->rx_csn) > (3 * s->detect_mult)) || + (((csn - s->rx_csn) > (3 * (uint) s->detect_mult)) || (meticulous && (csn == s->rx_csn)))) { /* We want to report both new and old CSN */ @@ -405,10 +405,11 @@ bfd_err_hook(sock *sk, int err) } sock * -bfd_open_rx_sk(struct bfd_proto *p, int multihop) +bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) { sock *sk = sk_new(p->tpool); sk->type = SK_UDP; + sk->subtype = af; sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; sk->data = p; @@ -421,10 +422,6 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop) sk->priority = sk_priority_control; sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0); -#ifdef IPV6 - sk->flags |= SKF_V6ONLY; -#endif - if (sk_open(sk) < 0) goto err; @@ -456,10 +453,6 @@ bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) sk->ttl = ifa ? 255 : -1; sk->flags = SKF_THREAD | SKF_BIND | SKF_HIGH_PORT; -#ifdef IPV6 - sk->flags |= SKF_V6ONLY; -#endif - if (sk_open(sk) < 0) goto err; diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile index a634cf0d..00aaef5e 100644 --- a/proto/bgp/Makefile +++ b/proto/bgp/Makefile @@ -1,5 +1,6 @@ -source=bgp.c attrs.c packets.c -root-rel=../../ -dir-name=proto/bgp +src := attrs.c bgp.c packets.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) -include ../../Rules +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 73eb4040..0f41f818 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -2,6 +2,8 @@ * BIRD -- BGP Attributes * * (c) 2000 Martin Mares <mj@ucw.cz> + * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2008--2016 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -39,888 +41,1266 @@ * specifies that such updates should be ignored, but that is generally * a bad idea. * - * Error checking of optional transitive attributes is done according to - * draft-ietf-idr-optional-transitive-03, but errors are handled always - * as withdraws. + * BGP attribute table has several hooks: * - * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed, - * but unknown segments cause a session drop with Malformed AS_PATH - * error (see validate_path()). The behavior in such case is not - * explicitly specified by RFC 4271. RFC 5065 specifies that - * inconsistent AS_CONFED_* segments should cause a session drop, but - * implementations that pass invalid AS_CONFED_* segments are - * widespread. + * export - Hook that validates and normalizes attribute during export phase. + * Receives eattr, may modify it (e.g., sort community lists for canonical + * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if + * necessary. May assume that eattr has value valid w.r.t. its type, but may be + * invalid w.r.t. BGP constraints. Optional. * - * Error handling of AS4_* attributes is done as specified by RFC 6793. There - * are several possible inconsistencies between AGGREGATOR and AS4_AGGREGATOR - * that are not handled by that RFC, these are logged and ignored (see - * bgp_reconstruct_4b_attrs()). + * encode - Hook that converts internal representation to external one during + * packet writing. Receives eattr and puts it in the buffer (including attribute + * header). Returns number of bytes, or -1 if not enough space. May assume that + * eattr has value valid w.r.t. its type and validated by export hook. Mandatory + * for all known attributes that exist internally after export phase (i.e., all + * except pseudoattributes MP_(UN)REACH_NLRI). + * + * decode - Hook that converts external representation to internal one during + * packet parsing. Receives attribute data in buffer, validates it and adds + * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or + * bgp_parse_error() may be used to escape. Mandatory for all known attributes. + * + * format - Optional hook that converts eattr to textual representation. */ -static byte bgp_mandatory_attrs[] = { BA_ORIGIN, BA_AS_PATH -#ifndef IPV6 -,BA_NEXT_HOP -#endif +struct bgp_attr_desc { + const char *name; + uint type; + uint flags; + void (*export)(struct bgp_export_state *s, eattr *a); + int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); + void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); + void (*format)(eattr *ea, byte *buf, uint size); }; -struct attr_desc { - char *name; - int expected_length; - int expected_flags; - int type; - int allow_in_ebgp; - int (*validate)(struct bgp_proto *p, byte *attr, int len); - void (*format)(eattr *ea, byte *buf, int buflen); -}; +static const struct bgp_attr_desc bgp_attr_table[]; + +static inline int bgp_attr_known(uint code); + +eattr * +bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val) +{ + ASSERT(bgp_attr_known(code)); + + ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); + eattr *e = &a->attrs[0]; + + a->flags = EALF_SORTED; + a->count = 1; + a->next = *attrs; + *attrs = a; + + e->id = EA_CODE(EAP_BGP, code); + e->type = bgp_attr_table[code].type; + e->flags = flags; + + if (e->type & EAF_EMBEDDED) + e->u.data = (u32) val; + else + e->u.ptr = (struct adata *) val; + + return e; +} + + + +#define REPORT(msg, args...) \ + ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); }) + +#define DISCARD(msg, args...) \ + ({ REPORT(msg, ## args); return; }) -#define IGNORE -1 -#define WITHDRAW -2 +#define WITHDRAW(msg, args...) \ + ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) + +#define UNSET(a) \ + ({ a->type = EAF_TYPE_UNDEF; return; }) + +#define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor" +#define BAD_EBGP "Discarding %s attribute received from EBGP neighbor" +#define BAD_LENGTH "Malformed %s attribute - invalid length (%u)" +#define BAD_VALUE "Malformed %s attribute - invalid value (%u)" +#define NO_MANDATORY "Missing mandatory %s attribute" + + +static inline int +bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len) +{ + *buf++ = flags; + *buf++ = code; + *buf++ = len; + return 3; +} + +static inline int +bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len) +{ + *buf++ = flags | BAF_EXT_LEN; + *buf++ = code; + put_u16(buf, len); + return 4; +} + +static inline int +bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len) +{ + if (len < 256) + return bgp_put_attr_hdr3(buf, code, flags, len); + else + return bgp_put_attr_hdr4(buf, code, flags, len); +} static int -bgp_check_origin(struct bgp_proto *p UNUSED, byte *a, int len UNUSED) +bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) { - if (*a > 2) - return 6; - return 0; + if (size < (3+1)) + return -1; + + bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1); + buf[3] = a->u.data; + + return 3+1; } -static void -bgp_format_origin(eattr *a, byte *buf, int buflen UNUSED) +static int +bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) { - static char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" }; + if (size < (3+4)) + return -1; - bsprintf(buf, bgp_origin_names[a->u.data]); + bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4); + put_u32(buf+3, a->u.data); + + return 3+4; } static int -path_segment_contains(byte *p, int bs, u32 asn) +bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) { - int i; - int len = p[1]; - p += 2; + uint len = a->u.ptr->length; - for(i=0; i<len; i++) - { - u32 asn2 = (bs == 4) ? get_u32(p) : get_u16(p); - if (asn2 == asn) - return 1; - p += bs; - } + if (size < (4+len)) + return -1; - return 0; + uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len); + put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4); + + return hdr + len; } -/* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */ static int -validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ilength) +bgp_put_attr(byte *buf, uint size, uint code, uint flags, byte *data, uint len) { - int res = 0; - u8 *a, *dst; - int len, plen; + if (size < (4+len)) + return -1; - dst = a = idata; - len = *ilength; + uint hdr = bgp_put_attr_hdr(buf, code, flags, len); + memcpy(buf + hdr, data, len); - while (len) - { - if (len < 2) - return -1; - - plen = 2 + bs * a[1]; - if (len < plen) - return -1; - - if (a[1] == 0) - { - log(L_WARN "%s: %s_PATH attribute contains empty segment, skipping it", - p->p.name, as_path ? "AS" : "AS4"); - goto skip; - } - - switch (a[0]) - { - case AS_PATH_SET: - res++; - break; - - case AS_PATH_SEQUENCE: - res += a[1]; - break; - - case AS_PATH_CONFED_SEQUENCE: - case AS_PATH_CONFED_SET: - if (as_path && path_segment_contains(a, bs, p->remote_as)) - { - log(L_WARN "%s: AS_CONFED_* segment with peer ASN found, misconfigured confederation?", p->p.name); - return -1; - } - - log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment", - p->p.name, as_path ? "AS" : "AS4"); - goto skip; - - default: - return -1; - } - - if (dst != a) - memmove(dst, a, plen); - dst += plen; - - skip: - len -= plen; - a += plen; - } + return hdr + len; +} - *ilength = dst - idata; - return res; +static int +bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) +{ + return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length); } -static inline int -validate_as_path(struct bgp_proto *p, byte *a, int *len) + +/* + * Attribute hooks + */ + +static void +bgp_export_origin(struct bgp_export_state *s, eattr *a) { - return validate_path(p, 1, p->as4_session ? 4 : 2, a, len); + if (a->u.data > 2) + WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data); } -static inline int -validate_as4_path(struct bgp_proto *p, struct adata *path) +static void +bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) +{ + if (len != 1) + WITHDRAW(BAD_LENGTH, "ORIGIN", len); + + if (data[0] > 2) + WITHDRAW(BAD_VALUE, "ORIGIN", data[0]); + + bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]); +} + +static void +bgp_format_origin(eattr *a, byte *buf, uint size UNUSED) +{ + static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" }; + + bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?"); +} + + +static int +bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size) +{ + byte *data = a->u.ptr->data; + uint len = a->u.ptr->length; + + if (!s->as4_session) + { + /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */ + byte *src = data; + data = alloca(len); + len = as_path_32to16(data, src, len); + } + + return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len); +} + +static void +bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { - return validate_path(p, 0, 4, path->data, &path->length); + struct bgp_proto *p = s->proto; + int as_length = s->as4_session ? 4 : 2; + int as_confed = p->cf->confederation && p->is_interior; + char err[128]; + + if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err))) + WITHDRAW("Malformed AS_PATH attribute - %s", err); + + /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */ + if (p->is_interior && !p->is_internal && + ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE))) + WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE"); + + if (!s->as4_session) + { + /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */ + byte *src = data; + data = alloca(2*len); + len = as_path_16to32(data, src, len); + } + + bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len); } + static int -bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a UNUSED6, int len UNUSED6) +bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size) { -#ifdef IPV6 - return IGNORE; -#else - ip_addr addr; + /* + * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP, + * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we + * store it and encode it later by AFI-specific hooks. + */ + + if (s->channel->afi == BGP_AF_IPV4) + { + ASSERT(a->u.ptr->length == sizeof(ip_addr)); - memcpy(&addr, a, len); - ipa_ntoh(addr); - if (ipa_classify(addr) & IADDR_HOST) + if (size < (3+4)) + return -1; + + bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4); + put_ip4(buf+3, ipa_to_ip4( *(ip_addr *) a->u.ptr->data )); + + return 3+4; + } + else + { + s->mp_next_hop = a; return 0; + } +} + +static void +bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED) +{ + if (len != 4) + WITHDRAW(BAD_LENGTH, "NEXT_HOP", len); + + /* Semantic checks are done later */ + s->ip_next_hop_len = len; + s->ip_next_hop_data = data; +} + +/* TODO: This function should use AF-specific hook */ +static void +bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED) +{ + ip_addr *nh = (void *) a->u.ptr->data; + uint len = a->u.ptr->length; + + ASSERT((len == 16) || (len == 32)); + + /* in IPv6, we may have two addresses in NEXT HOP */ + if ((len == 16) || ipa_zero(nh[1])) + bsprintf(buf, "%I", nh[0]); else - return 8; -#endif + bsprintf(buf, "%I %I", nh[0], nh[1]); } + static void -bgp_format_next_hop(eattr *a, byte *buf, int buflen UNUSED) +bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { - ip_addr *ipp = (ip_addr *) a->u.ptr->data; -#ifdef IPV6 - /* in IPv6, we might have two addresses in NEXT HOP */ - if ((a->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(ipp[1])) - { - bsprintf(buf, "%I %I", ipp[0], ipp[1]); - return; - } -#endif + if (len != 4) + WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len); - bsprintf(buf, "%I", ipp[0]); + u32 val = get_u32(data); + bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val); } -static int -bgp_check_aggregator(struct bgp_proto *p, byte *a UNUSED, int len) + +static void +bgp_export_local_pref(struct bgp_export_state *s, eattr *a) { - int exp_len = p->as4_session ? 8 : 6; - - return (len == exp_len) ? 0 : WITHDRAW; + if (!s->proto->is_interior && !s->proto->cf->allow_local_pref) + UNSET(a); } static void -bgp_format_aggregator(eattr *a, byte *buf, int buflen UNUSED) +bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { - struct adata *ad = a->u.ptr; - byte *data = ad->data; - u32 as; + if (!s->proto->is_interior && !s->proto->cf->allow_local_pref) + DISCARD(BAD_EBGP, "LOCAL_PREF"); - as = get_u32(data); - data += 4; + if (len != 4) + WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len); - bsprintf(buf, "%d.%d.%d.%d AS%u", data[0], data[1], data[2], data[3], as); + u32 val = get_u32(data); + bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val); } -static int -bgp_check_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len) + +static void +bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to) { - return ((len % 4) == 0) ? 0 : WITHDRAW; + if (len != 0) + DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len); + + bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0); } static int -bgp_check_cluster_list(struct bgp_proto *p UNUSED, byte *a UNUSED, int len) +bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size) { - return ((len % 4) == 0) ? 0 : 5; + byte *data = a->u.ptr->data; + uint len = a->u.ptr->length; + + if (!s->as4_session) + { + /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */ + byte *src = data; + data = alloca(6); + len = aggregator_32to16(data, src); + } + + return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len); } static void -bgp_format_cluster_list(eattr *a, byte *buf, int buflen) +bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { - /* Truncates cluster lists larger than buflen, probably not a problem */ - int_set_format(a->u.ptr, 0, -1, buf, buflen); + if (len != (s->as4_session ? 8 : 6)) + DISCARD(BAD_LENGTH, "AGGREGATOR", len); + + if (!s->as4_session) + { + /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */ + byte *src = data; + data = alloca(8); + len = aggregator_16to32(data, src); + } + + bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len); } -static int -bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED) +static void +bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED) { -#ifdef IPV6 - p->mp_reach_start = a; - p->mp_reach_len = len; -#endif - return IGNORE; + byte *data = a->u.ptr->data; + + bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0)); } -static int -bgp_check_unreach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED) + +static void +bgp_export_community(struct bgp_export_state *s, eattr *a) { -#ifdef IPV6 - p->mp_unreach_start = a; - p->mp_unreach_len = len; -#endif - return IGNORE; + if (a->u.ptr->length == 0) + UNSET(a); + + a->u.ptr = int_set_sort(s->pool, a->u.ptr); } -static int -bgp_check_ext_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len) +static void +bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { - return ((len % 8) == 0) ? 0 : WITHDRAW; + if (!len || (len % 4)) + WITHDRAW(BAD_LENGTH, "COMMUNITY", len); + + struct adata *ad = lp_alloc_adata(s->pool, len); + get_u32s(data, (u32 *) ad->data, len / 4); + bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad); } -static int -bgp_check_large_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len) -{ - return ((len % 12) == 0) ? 0 : WITHDRAW; -} - - -static struct attr_desc bgp_attr_table[] = { - { NULL, -1, 0, 0, 0, /* Undefined */ - NULL, NULL }, - { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_ORIGIN */ - bgp_check_origin, bgp_format_origin }, - { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1, /* BA_AS_PATH */ - NULL, NULL }, /* is checked by validate_as_path() as a special case */ - { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */ - bgp_check_next_hop, bgp_format_next_hop }, - { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */ - NULL, NULL }, - { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_LOCAL_PREF */ - NULL, NULL }, - { "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_ATOMIC_AGGR */ - NULL, NULL }, - { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */ - bgp_check_aggregator, bgp_format_aggregator }, - { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */ - bgp_check_community, NULL }, - { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_ROUTER_ID, 0, /* BA_ORIGINATOR_ID */ - NULL, NULL }, - { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0, /* BA_CLUSTER_LIST */ - bgp_check_cluster_list, bgp_format_cluster_list }, - { .name = NULL }, /* BA_DPA */ - { .name = NULL }, /* BA_ADVERTISER */ - { .name = NULL }, /* BA_RCID_PATH */ - { "mp_reach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_REACH_NLRI */ - bgp_check_reach_nlri, NULL }, - { "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_UNREACH_NLRI */ - bgp_check_unreach_nlri, NULL }, - { "ext_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_EC_SET, 1, /* BA_EXT_COMMUNITY */ - bgp_check_ext_community, NULL }, - { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */ - NULL, NULL }, - { "as4_aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */ - NULL, NULL }, - [BA_LARGE_COMMUNITY] = - { "large_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_LC_SET, 1, - bgp_check_large_community, NULL } -}; -/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH. - * It does not matter as this attribute does not appear on routes in the routing table. - */ +static void +bgp_export_originator_id(struct bgp_export_state *s, eattr *a) +{ + if (!s->proto->is_internal) + UNSET(a); +} -#define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name) +static void +bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) +{ + if (!s->proto->is_internal) + DISCARD(BAD_EBGP, "ORIGINATOR_ID"); -static inline struct adata * -bgp_alloc_adata(struct linpool *pool, unsigned len) + if (len != 4) + WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len); + + u32 val = get_u32(data); + bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val); +} + + +static void +bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a) { - struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); - ad->length = len; - return ad; + if (!s->proto->is_internal) + UNSET(a); + + if (a->u.ptr->length == 0) + UNSET(a); } static void -bgp_set_attr(eattr *e, unsigned attr, uintptr_t val) +bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { - ASSERT(ATTR_KNOWN(attr)); - e->id = EA_CODE(EAP_BGP, attr); - e->type = bgp_attr_table[attr].type; - e->flags = bgp_attr_table[attr].expected_flags; - if (e->type & EAF_EMBEDDED) - e->u.data = val; - else - e->u.ptr = (struct adata *) val; + if (!s->proto->is_internal) + DISCARD(BAD_EBGP, "CLUSTER_LIST"); + + if (!len || (len % 4)) + WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len); + + struct adata *ad = lp_alloc_adata(s->pool, len); + get_u32s(data, (u32 *) ad->data, len / 4); + bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad); } -static byte * -bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len) +static void +bgp_format_cluster_list(eattr *a, byte *buf, uint size) { - struct adata *ad = bgp_alloc_adata(pool, len); - bgp_set_attr(e, attr, (uintptr_t) ad); - return ad->data; + /* Truncates cluster lists larger than buflen, probably not a problem */ + int_set_format(a->u.ptr, 0, -1, buf, size); } -void -bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val) + +static inline u32 +get_af3(byte *buf) { - ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - a->next = *to; - *to = a; - a->flags = EALF_SORTED; - a->count = 1; - bgp_set_attr(a->attrs, attr, val); + return (get_u16(buf) << 16) | buf[2]; } -byte * -bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len) +static void +bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED) { - struct adata *ad = bgp_alloc_adata(pool, len); - bgp_attach_attr(to, pool, attr, (uintptr_t) ad); - return ad->data; + /* + * 2 B MP_REACH_NLRI data - Address Family Identifier + * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier + * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address + * var MP_REACH_NLRI data - Network Address of Next Hop + * 1 B MP_REACH_NLRI data - Reserved (zero) + * var MP_REACH_NLRI data - Network Layer Reachability Information + */ + + if ((len < 5) || (len < (5 + (uint) data[3]))) + bgp_parse_error(s, 9); + + s->mp_reach_af = get_af3(data); + s->mp_next_hop_len = data[3]; + s->mp_next_hop_data = data + 4; + s->mp_reach_len = len - 5 - s->mp_next_hop_len; + s->mp_reach_nlri = data + 5 + s->mp_next_hop_len; } -static int -bgp_encode_attr_hdr(byte *dst, uint flags, unsigned code, int len) + +static void +bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED) { - int wlen; + /* + * 2 B MP_UNREACH_NLRI data - Address Family Identifier + * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier + * var MP_UNREACH_NLRI data - Network Layer Reachability Information + */ - DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags); + if (len < 3) + bgp_parse_error(s, 9); + + s->mp_unreach_af = get_af3(data); + s->mp_unreach_len = len - 3; + s->mp_unreach_nlri = data + 3; +} - if (len < 256) - { - *dst++ = flags; - *dst++ = code; - *dst++ = len; - wlen = 3; - } - else - { - *dst++ = flags | BAF_EXT_LEN; - *dst++ = code; - put_u16(dst, len); - wlen = 4; - } - return wlen; +static void +bgp_export_ext_community(struct bgp_export_state *s, eattr *a) +{ + a->u.ptr = ec_set_del_nontrans(s->pool, a->u.ptr); + + if (a->u.ptr->length == 0) + UNSET(a); + + ec_set_sort_x(a->u.ptr); } static void -aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used) +bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { - byte *src = aggr->data; - *new_used = 0; + if (!len || (len % 8)) + WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len); - u32 as = get_u32(src); - if (as > 0xFFFF) - { - as = AS_TRANS; - *new_used = 1; - } - put_u16(dst, as); + struct adata *ad = lp_alloc_adata(s->pool, len); + get_u32s(data, (u32 *) ad->data, len / 4); + bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad); +} + + +static void +bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) +{ + if (s->as4_session) + DISCARD(NEW_BGP, "AS4_AGGREGATOR"); - /* Copy IPv4 address */ - memcpy(dst + 2, src + 4, 4); + if (len != 8) + DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len); + + bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len); } static void -aggregator_convert_to_new(struct adata *aggr, byte *dst) +bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { - byte *src = aggr->data; + char err[128]; + + if (s->as4_session) + DISCARD(NEW_BGP, "AS4_PATH"); + + if (len < 6) + DISCARD(BAD_LENGTH, "AS4_PATH", len); + + if (!as_path_valid(data, len, 4, 1, err, sizeof(err))) + DISCARD("Malformed AS4_PATH attribute - %s", err); + + struct adata *a = lp_alloc_adata(s->pool, len); + memcpy(a->data, data, len); - u32 as = get_u16(src); - put_u32(dst, as); + /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */ + if (as_path_contains_confed(a)) + { + REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute"); + a = as_path_strip_confed(s->pool, a); + } - /* Copy IPv4 address */ - memcpy(dst + 4, src + 2, 4); + bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a); +} + +static void +bgp_export_large_community(struct bgp_export_state *s, eattr *a) +{ + if (a->u.ptr->length == 0) + UNSET(a); + + a->u.ptr = lc_set_sort(s->pool, a->u.ptr); +} + +static void +bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) +{ + if (!len || (len % 12)) + WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len); + + struct adata *ad = lp_alloc_adata(s->pool, len); + get_u32s(data, (u32 *) ad->data, len / 4); + bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad); +} + +static void +bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) +{ + net_addr *n = s->route->net->n.addr; + u32 *labels = (u32 *) a->u.ptr->data; + uint lnum = a->u.ptr->length / 4; + + /* Perhaps we should just ignore it? */ + if (!s->mpls) + WITHDRAW("Unexpected MPLS stack"); + + /* Empty MPLS stack is not allowed */ + if (!lnum) + WITHDRAW("Malformed MPLS stack - empty"); + + /* This is ugly, but we must ensure that labels fit into NLRI field */ + if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255) + WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum); + + for (uint i = 0; i < lnum; i++) + { + if (labels[i] > 0xfffff) + WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]); + + /* TODO: Check for special-purpose label values? */ + } } static int -bgp_get_attr_len(eattr *a) +bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED) { - int len; - if (ATTR_KNOWN(EA_ID(a->id))) + /* + * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute, + * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks. + */ + + s->mpls_labels = a->u.ptr; + return 0; +} + +static void +bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED) +{ + DISCARD("Discarding received attribute #0"); +} + +static void +bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size) +{ + u32 *labels = (u32 *) a->u.ptr->data; + uint lnum = a->u.ptr->length / 4; + char *pos = buf; + + for (uint i = 0; i < lnum; i++) + { + if (size < 20) { - int code = EA_ID(a->id); - struct attr_desc *desc = &bgp_attr_table[code]; - len = desc->expected_length; - if (len < 0) - { - ASSERT(!(a->type & EAF_EMBEDDED)); - len = a->u.ptr->length; - } + bsprintf(pos, "..."); + return; } + + uint l = bsprintf(pos, "%d/", labels[i]); + ADVANCE(pos, size, l); + } + + /* Clear last slash or terminate empty string */ + pos[lnum ? -1 : 0] = 0; +} + +static inline void +bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) +{ + /* Cannot use bgp_set_attr_data() as it works on known attributes only */ + ea_set_attr_data(to, s->pool, EA_CODE(EAP_BGP, code), flags, EAF_TYPE_OPAQUE, data, len); +} + + +/* + * Attribute table + */ + +static const struct bgp_attr_desc bgp_attr_table[] = { + [BA_ORIGIN] = { + .name = "origin", + .type = EAF_TYPE_INT, + .flags = BAF_TRANSITIVE, + .export = bgp_export_origin, + .encode = bgp_encode_u8, + .decode = bgp_decode_origin, + .format = bgp_format_origin, + }, + [BA_AS_PATH] = { + .name = "as_path", + .type = EAF_TYPE_AS_PATH, + .flags = BAF_TRANSITIVE, + .encode = bgp_encode_as_path, + .decode = bgp_decode_as_path, + }, + [BA_NEXT_HOP] = { + .name = "next_hop", + .type = EAF_TYPE_IP_ADDRESS, + .flags = BAF_TRANSITIVE, + .encode = bgp_encode_next_hop, + .decode = bgp_decode_next_hop, + .format = bgp_format_next_hop, + }, + [BA_MULTI_EXIT_DISC] = { + .name = "med", + .type = EAF_TYPE_INT, + .flags = BAF_OPTIONAL, + .encode = bgp_encode_u32, + .decode = bgp_decode_med, + }, + [BA_LOCAL_PREF] = { + .name = "local_pref", + .type = EAF_TYPE_INT, + .flags = BAF_TRANSITIVE, + .export = bgp_export_local_pref, + .encode = bgp_encode_u32, + .decode = bgp_decode_local_pref, + }, + [BA_ATOMIC_AGGR] = { + .name = "atomic_aggr", + .type = EAF_TYPE_OPAQUE, + .flags = BAF_TRANSITIVE, + .encode = bgp_encode_raw, + .decode = bgp_decode_atomic_aggr, + }, + [BA_AGGREGATOR] = { + .name = "aggregator", + .type = EAF_TYPE_OPAQUE, + .flags = BAF_OPTIONAL | BAF_TRANSITIVE, + .encode = bgp_encode_aggregator, + .decode = bgp_decode_aggregator, + .format = bgp_format_aggregator, + }, + [BA_COMMUNITY] = { + .name = "community", + .type = EAF_TYPE_INT_SET, + .flags = BAF_OPTIONAL | BAF_TRANSITIVE, + .export = bgp_export_community, + .encode = bgp_encode_u32s, + .decode = bgp_decode_community, + }, + [BA_ORIGINATOR_ID] = { + .name = "originator_id", + .type = EAF_TYPE_ROUTER_ID, + .flags = BAF_OPTIONAL, + .export = bgp_export_originator_id, + .encode = bgp_encode_u32, + .decode = bgp_decode_originator_id, + }, + [BA_CLUSTER_LIST] = { + .name = "cluster_list", + .type = EAF_TYPE_INT_SET, + .flags = BAF_OPTIONAL, + .export = bgp_export_cluster_list, + .encode = bgp_encode_u32s, + .decode = bgp_decode_cluster_list, + .format = bgp_format_cluster_list, + }, + [BA_MP_REACH_NLRI] = { + .name = "mp_reach_nlri", + .type = EAF_TYPE_OPAQUE, + .flags = BAF_OPTIONAL, + .decode = bgp_decode_mp_reach_nlri, + }, + [BA_MP_UNREACH_NLRI] = { + .name = "mp_unreach_nlri", + .type = EAF_TYPE_OPAQUE, + .flags = BAF_OPTIONAL, + .decode = bgp_decode_mp_unreach_nlri, + }, + [BA_EXT_COMMUNITY] = { + .name = "ext_community", + .type = EAF_TYPE_EC_SET, + .flags = BAF_OPTIONAL | BAF_TRANSITIVE, + .export = bgp_export_ext_community, + .encode = bgp_encode_u32s, + .decode = bgp_decode_ext_community, + }, + [BA_AS4_PATH] = { + .name = "as4_path", + .type = EAF_TYPE_AS_PATH, + .flags = BAF_OPTIONAL | BAF_TRANSITIVE, + .encode = bgp_encode_raw, + .decode = bgp_decode_as4_path, + }, + [BA_AS4_AGGREGATOR] = { + .name = "as4_aggregator", + .type = EAF_TYPE_OPAQUE, + .flags = BAF_OPTIONAL | BAF_TRANSITIVE, + .encode = bgp_encode_raw, + .decode = bgp_decode_as4_aggregator, + .format = bgp_format_aggregator, + }, + [BA_LARGE_COMMUNITY] = { + .name = "large_community", + .type = EAF_TYPE_LC_SET, + .flags = BAF_OPTIONAL | BAF_TRANSITIVE, + .export = bgp_export_large_community, + .encode = bgp_encode_u32s, + .decode = bgp_decode_large_community, + }, + [BA_MPLS_LABEL_STACK] = { + .name = "mpls_label_stack", + .type = EAF_TYPE_INT_SET, + .export = bgp_export_mpls_label_stack, + .encode = bgp_encode_mpls_label_stack, + .decode = bgp_decode_mpls_label_stack, + .format = bgp_format_mpls_label_stack, + }, +}; + +static inline int +bgp_attr_known(uint code) +{ + return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name; +} + + +/* + * Attribute export + */ + +static inline void +bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) +{ + if (EA_PROTO(a->id) != EAP_BGP) + return; + + uint code = EA_ID(a->id); + + if (bgp_attr_known(code)) + { + const struct bgp_attr_desc *desc = &bgp_attr_table[code]; + + /* The flags might have been zero if the attr was added by filters */ + a->flags = (a->flags & BAF_PARTIAL) | desc->flags; + + /* Set partial bit if new opt-trans attribute is attached to non-local route */ + if ((s->src != NULL) && (a->type & EAF_ORIGINATED) && + (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) + a->flags |= BAF_PARTIAL; + + /* Call specific hook */ + CALL(desc->export, s, a); + + /* Attribute might become undefined in hook */ + if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + return; + } else - { - ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE); - len = a->u.ptr->length; - } - - return len; + { + /* Don't re-export unknown non-transitive attributes */ + if (!(a->flags & BAF_TRANSITIVE)) + return; + + a->flags |= BAF_PARTIAL; + } + + /* Append updated attribute */ + to->attrs[to->count++] = *a; +} + +/** + * bgp_export_attrs - export BGP attributes + * @s: BGP export state + * @attrs: a list of extended attributes + * + * The bgp_export_attrs() function takes a list of attributes and merges it to + * one newly allocated and sorted segment. Attributes are validated and + * normalized by type-specific export hooks and attribute flags are updated. + * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or + * empty community sets). + * + * Result: one sorted attribute list segment, or NULL if attributes are unsuitable. + */ +static inline ea_list * +bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) +{ + /* Merge the attribute list */ + ea_list *new = lp_alloc(s->pool, ea_scan(attrs)); + ea_merge(attrs, new); + ea_sort(new); + + uint i, count; + count = new->count; + new->count = 0; + + /* Export each attribute */ + for (i = 0; i < count; i++) + bgp_export_attr(s, &new->attrs[i], new); + + if (s->err_withdraw) + return NULL; + + return new; +} + + +/* + * Attribute encoding + */ + +static inline int +bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size) +{ + ASSERT(EA_PROTO(a->id) == EAP_BGP); + + uint code = EA_ID(a->id); + + if (bgp_attr_known(code)) + return bgp_attr_table[code].encode(s, a, buf, size); + else + return bgp_encode_raw(s, a, buf, size); } /** * bgp_encode_attrs - encode BGP attributes - * @p: BGP instance - * @w: buffer + * @s: BGP write state * @attrs: a list of extended attributes - * @remains: remaining space in the buffer + * @buf: buffer + * @end: buffer end * * The bgp_encode_attrs() function takes a list of extended attributes * and converts it to its BGP representation (a part of an Update message). * * Result: Length of the attribute block generated or -1 if not enough space. */ -uint -bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) +int +bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end) { - uint i, code, type, flags; - byte *start = w; - int len, rv; + byte *pos = buf; + int i, len; - for(i=0; i<attrs->count; i++) - { - eattr *a = &attrs->attrs[i]; - ASSERT(EA_PROTO(a->id) == EAP_BGP); - code = EA_ID(a->id); - -#ifdef IPV6 - /* When talking multiprotocol BGP, the NEXT_HOP attributes are used only temporarily. */ - if (code == BA_NEXT_HOP) - continue; -#endif - - /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker, - * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH - * as optional AS4_PATH attribute. - */ - if ((code == BA_AS_PATH) && (! p->as4_session)) - { - len = a->u.ptr->length; - - if (remains < (len + 4)) - goto err_no_buffer; - - /* Using temporary buffer because don't know a length of created attr - * and therefore a length of a header. Perhaps i should better always - * use BAF_EXT_LEN. */ - - byte buf[len]; - int new_used; - int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used); - - DBG("BGP: Encoding old AS_PATH\n"); - rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl); - ADVANCE(w, remains, rv); - memcpy(w, buf, nl); - ADVANCE(w, remains, nl); - - if (! new_used) - continue; - - if (remains < (len + 4)) - goto err_no_buffer; - - /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments - * here but we don't support confederations and such paths we already - * discarded in bgp_check_as_path(). - */ - - DBG("BGP: Encoding AS4_PATH\n"); - rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len); - ADVANCE(w, remains, rv); - memcpy(w, a->u.ptr->data, len); - ADVANCE(w, remains, len); - - continue; - } - - /* The same issue with AGGREGATOR attribute */ - if ((code == BA_AGGREGATOR) && (! p->as4_session)) - { - int new_used; - - len = 6; - if (remains < (len + 3)) - goto err_no_buffer; - - rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len); - ADVANCE(w, remains, rv); - aggregator_convert_to_old(a->u.ptr, w, &new_used); - ADVANCE(w, remains, len); - - if (! new_used) - continue; - - len = 8; - if (remains < (len + 3)) - goto err_no_buffer; - - rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len); - ADVANCE(w, remains, rv); - memcpy(w, a->u.ptr->data, len); - ADVANCE(w, remains, len); - - continue; - } - - /* Standard path continues here ... */ - - type = a->type & EAF_TYPE_MASK; - flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL); - len = bgp_get_attr_len(a); - - /* Skip empty sets */ - if (((type == EAF_TYPE_INT_SET) || (type == EAF_TYPE_EC_SET) || (type == EAF_TYPE_LC_SET)) && (len == 0)) - continue; - - if (remains < len + 4) - goto err_no_buffer; - - rv = bgp_encode_attr_hdr(w, flags, code, len); - ADVANCE(w, remains, rv); - - switch (type) - { - case EAF_TYPE_INT: - case EAF_TYPE_ROUTER_ID: - if (len == 4) - put_u32(w, a->u.data); - else - *w = a->u.data; - break; - case EAF_TYPE_IP_ADDRESS: - { - ip_addr ip = *(ip_addr *)a->u.ptr->data; - ipa_hton(ip); - memcpy(w, &ip, len); - break; - } - case EAF_TYPE_INT_SET: - case EAF_TYPE_LC_SET: - case EAF_TYPE_EC_SET: - { - u32 *z = int_set_get_data(a->u.ptr); - int i; - for(i=0; i<len; i+=4) - put_u32(w+i, *z++); - break; - } - case EAF_TYPE_OPAQUE: - case EAF_TYPE_AS_PATH: - memcpy(w, a->u.ptr->data, len); - break; - default: - bug("bgp_encode_attrs: unknown attribute type %02x", a->type); - } - ADVANCE(w, remains, len); - } - return w - start; + for (i = 0; i < attrs->count; i++) + { + len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos); + + if (len < 0) + return -1; + + pos += len; + } - err_no_buffer: - return -1; + return pos - buf; } + /* -static void -bgp_init_prefix(struct fib_node *N) + * Attribute decoding + */ + +static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool); + +static inline int +bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn) { - struct bgp_prefix *p = (struct bgp_prefix *) N; - p->bucket_node.next = NULL; + eattr *e = bgp_find_attr(attrs, BA_AS_PATH); + int num = p->cf->allow_local_as + 1; + return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num)); } -*/ -static int -bgp_compare_u32(const u32 *x, const u32 *y) +static inline int +bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs) { - return (*x < *y) ? -1 : (*x > *y) ? 1 : 0; + eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID); + return (e && (e->u.data == p->local_id)); } -static inline void -bgp_normalize_int_set(u32 *dest, u32 *src, unsigned cnt) +static inline int +bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs) { - memcpy(dest, src, sizeof(u32) * cnt); - qsort(dest, cnt, sizeof(u32), (int(*)(const void *, const void *)) bgp_compare_u32); + eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST); + return (e && int_set_contains(e->u.ptr, p->rr_cluster_id)); } -static int -bgp_compare_ec(const u32 *xp, const u32 *yp) +static inline void +bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) { - u64 x = ec_get(xp, 0); - u64 y = ec_get(yp, 0); - return (x < y) ? -1 : (x > y) ? 1 : 0; + /* Handle duplicate attributes; RFC 7606 3 (g) */ + if (BIT32_TEST(s->attrs_seen, code)) + { + if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI)) + bgp_parse_error(s, 1); + else + DISCARD("Discarding duplicate attribute (code %u)", code); + } + BIT32_SET(s->attrs_seen, code); + + if (bgp_attr_known(code)) + { + const struct bgp_attr_desc *desc = &bgp_attr_table[code]; + + /* Handle conflicting flags; RFC 7606 3 (c) */ + if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) + WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags); + + desc->decode(s, code, flags, data, len, to); + } + else /* Unknown attribute */ + { + if (!(flags & BAF_OPTIONAL)) + WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags); + + bgp_decode_unknown(s, code, flags, data, len, to); + } } -static inline void -bgp_normalize_ec_set(struct adata *ad, u32 *src, int internal) +/** + * bgp_decode_attrs - check and decode BGP attributes + * @s: BGP parse state + * @data: start of attribute block + * @len: length of attribute block + * + * This function takes a BGP attribute block (a part of an Update message), checks + * its consistency and converts it to a list of BIRD route attributes represented + * by an (uncached) &rta. + */ +ea_list * +bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) { - u32 *dst = int_set_get_data(ad); + struct bgp_proto *p = s->proto; + ea_list *attrs = NULL; + uint code, flags, alen; + byte *pos = data; - /* Remove non-transitive communities (EC_TBIT active) on external sessions */ - if (! internal) + /* Parse the attributes */ + while (len) + { + alen = 0; + + /* Read attribute type */ + if (len < 2) + goto framing_error; + flags = pos[0]; + code = pos[1]; + ADVANCE(pos, len, 2); + + /* Read attribute length */ + if (flags & BAF_EXT_LEN) { - int len = int_set_get_size(ad); - u32 *t = dst; - int i; - - for (i=0; i < len; i += 2) - { - if (src[i] & EC_TBIT) - continue; - - *t++ = src[i]; - *t++ = src[i+1]; - } - - ad->length = (t - dst) * 4; + if (len < 2) + goto framing_error; + alen = get_u16(pos); + ADVANCE(pos, len, 2); + } + else + { + if (len < 1) + goto framing_error; + alen = *pos; + ADVANCE(pos, len, 1); } - else - memcpy(dst, src, ad->length); - qsort(dst, ad->length / 8, 8, (int(*)(const void *, const void *)) bgp_compare_ec); -} + if (alen > len) + goto framing_error; -static int -bgp_compare_lc(const u32 *x, const u32 *y) -{ - if (x[0] != y[0]) - return (x[0] > y[0]) ? 1 : -1; - if (x[1] != y[1]) - return (x[1] > y[1]) ? 1 : -1; - if (x[2] != y[2]) - return (x[2] > y[2]) ? 1 : -1; - return 0; + DBG("Attr %02x %02x %u\n", code, flags, alen); + + bgp_decode_attr(s, code, flags, pos, alen, &attrs); + ADVANCE(pos, len, alen); + } + + if (s->err_withdraw) + goto withdraw; + + /* If there is no reachability NLRI, we are finished */ + if (!s->ip_reach_len && !s->mp_reach_len) + return NULL; + + + /* Handle missing mandatory attributes; RFC 7606 3 (d) */ + if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN)) + { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; } + + if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH)) + { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; } + + /* When receiving attributes from non-AS4-aware BGP speaker, we have to + reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */ + if (!p->as4_session) + bgp_process_as4_attrs(&attrs, s->pool); + + /* Reject routes with our ASN in AS_PATH attribute */ + if (bgp_as_path_loopy(p, attrs, p->local_as)) + goto withdraw; + + /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */ + if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as)) + goto withdraw; + + /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */ + if (p->is_internal && bgp_originator_id_loopy(p, attrs)) + goto withdraw; + + /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */ + if (p->rr_client && bgp_cluster_list_loopy(p, attrs)) + goto withdraw; + + /* If there is no local preference, define one */ + if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF)) + bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + + return attrs; + + +framing_error: + /* RFC 7606 4 - handle attribute framing errors */ + REPORT("Malformed attribute list - framing error (%u/%u) at %d", + alen, len, (int) (pos - s->attrs)); + +withdraw: + /* RFC 7606 5.2 - handle missing NLRI during errors */ + if (!s->ip_reach_len && !s->mp_reach_len) + bgp_parse_error(s, 1); + + s->err_withdraw = 1; + return NULL; } -static inline void -bgp_normalize_lc_set(u32 *dest, u32 *src, unsigned cnt) + +/* + * Route bucket hash table + */ + +#define RBH_KEY(b) b->eattrs, b->hash +#define RBH_NEXT(b) b->next +#define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2) +#define RBH_FN(a,h) h + +#define RBH_REHASH bgp_rbh_rehash +#define RBH_PARAMS /8, *2, 2, 2, 8, 20 + + +HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket) + +void +bgp_init_bucket_table(struct bgp_channel *c) { - memcpy(dest, src, LCOMM_LENGTH * cnt); - qsort(dest, cnt, LCOMM_LENGTH, (int(*)(const void *, const void *)) bgp_compare_lc); + HASH_INIT(c->bucket_hash, c->pool, 8); + + init_list(&c->bucket_queue); + c->withdraw_bucket = NULL; } -static void -bgp_rehash_buckets(struct bgp_proto *p) +void +bgp_free_bucket_table(struct bgp_channel *c) { - struct bgp_bucket **old = p->bucket_hash; - struct bgp_bucket **new; - unsigned oldn = p->hash_size; - unsigned i, e, mask; + HASH_FREE(c->bucket_hash); + struct bgp_bucket *b; + WALK_LIST_FIRST(b, c->bucket_queue) + { + rem_node(&b->send_node); + mb_free(b); + } - p->hash_size = p->hash_limit; - DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, p->hash_size); - p->hash_limit *= 4; - if (p->hash_limit >= 65536) - p->hash_limit = ~0; - new = p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *)); - mask = p->hash_size - 1; - for (i=0; i<oldn; i++) - while (b = old[i]) - { - old[i] = b->hash_next; - e = b->hash & mask; - b->hash_next = new[e]; - if (b->hash_next) - b->hash_next->hash_prev = b; - b->hash_prev = NULL; - new[e] = b; - } - mb_free(old); + mb_free(c->withdraw_bucket); + c->withdraw_bucket = NULL; } static struct bgp_bucket * -bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash) +bgp_get_bucket(struct bgp_channel *c, ea_list *new) { - struct bgp_bucket *b; - unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr); - unsigned ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN); - unsigned size = sizeof(struct bgp_bucket) + ea_size_aligned; - unsigned i; + /* Hash and lookup */ + u32 hash = ea_hash(new); + struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash); + + if (b) + return b; + + uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr); + uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN); + uint size = sizeof(struct bgp_bucket) + ea_size_aligned; + uint i; byte *dest; - unsigned index = hash & (p->hash_size - 1); /* Gather total size of non-inline attributes */ - for (i=0; i<new->count; i++) - { - eattr *a = &new->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) - size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN); - } + for (i = 0; i < new->count; i++) + { + eattr *a = &new->attrs[i]; - /* Create the bucket and hash it */ - b = mb_alloc(p->p.pool, size); - b->hash_next = p->bucket_hash[index]; - if (b->hash_next) - b->hash_next->hash_prev = b; - p->bucket_hash[index] = b; - b->hash_prev = NULL; - b->hash = hash; - add_tail(&p->bucket_queue, &b->send_node); + if (!(a->type & EAF_EMBEDDED)) + size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN); + } + + /* Create the bucket */ + b = mb_alloc(c->pool, size); init_list(&b->prefixes); + b->hash = hash; + + /* Copy list of extended attributes */ memcpy(b->eattrs, new, ea_size); - dest = ((byte *)b->eattrs) + ea_size_aligned; + dest = ((byte *) b->eattrs) + ea_size_aligned; /* Copy values of non-inline attributes */ - for (i=0; i<new->count; i++) + for (i = 0; i < new->count; i++) + { + eattr *a = &b->eattrs->attrs[i]; + + if (!(a->type & EAF_EMBEDDED)) { - eattr *a = &b->eattrs->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) - { - struct adata *oa = a->u.ptr; - struct adata *na = (struct adata *) dest; - memcpy(na, oa, sizeof(struct adata) + oa->length); - a->u.ptr = na; - dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN); - } + struct adata *oa = a->u.ptr; + struct adata *na = (struct adata *) dest; + memcpy(na, oa, sizeof(struct adata) + oa->length); + a->u.ptr = na; + dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN); } + } - /* If needed, rehash */ - p->hash_count++; - if (p->hash_count > p->hash_limit) - bgp_rehash_buckets(p); + /* Insert the bucket to send queue and bucket hash */ + add_tail(&c->bucket_queue, &b->send_node); + HASH_INSERT2(c->bucket_hash, RBH, c->pool, b); return b; } static struct bgp_bucket * -bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate) +bgp_get_withdraw_bucket(struct bgp_channel *c) { - ea_list *new; - unsigned i, cnt, hash, code; - eattr *a, *d; - u32 seen = 0; - struct bgp_bucket *b; - - /* Merge the attribute list */ - new = alloca(ea_scan(attrs)); - ea_merge(attrs, new); - ea_sort(new); + if (!c->withdraw_bucket) + { + c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket)); + init_list(&c->withdraw_bucket->prefixes); + } - /* Normalize attributes */ - d = new->attrs; - cnt = new->count; - new->count = 0; - for(i=0; i<cnt; i++) - { - a = &new->attrs[i]; - if (EA_PROTO(a->id) != EAP_BGP) - continue; - code = EA_ID(a->id); - if (ATTR_KNOWN(code)) - { - if (!p->is_internal) - { - if (!bgp_attr_table[code].allow_in_ebgp) - continue; - if ((code == BA_LOCAL_PREF) && !p->cf->allow_local_pref) - continue; - } - /* The flags might have been zero if the attr was added by filters */ - a->flags = (a->flags & BAF_PARTIAL) | bgp_attr_table[code].expected_flags; - if (code < 32) - seen |= 1 << code; - } - else - { - /* Don't re-export unknown non-transitive attributes */ - if (!(a->flags & BAF_TRANSITIVE)) - continue; - } - *d = *a; - if ((d->type & EAF_ORIGINATED) && !originate && (d->flags & BAF_TRANSITIVE) && (d->flags & BAF_OPTIONAL)) - d->flags |= BAF_PARTIAL; - switch (d->type & EAF_TYPE_MASK) - { - case EAF_TYPE_INT_SET: - { - struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length); - z->length = d->u.ptr->length; - bgp_normalize_int_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / 4); - d->u.ptr = z; - break; - } - case EAF_TYPE_EC_SET: - { - struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length); - z->length = d->u.ptr->length; - bgp_normalize_ec_set(z, (u32 *) d->u.ptr->data, p->is_internal); - d->u.ptr = z; - break; - } - case EAF_TYPE_LC_SET: - { - struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length); - z->length = d->u.ptr->length; - bgp_normalize_lc_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / LCOMM_LENGTH); - d->u.ptr = z; - break; - } - default: ; - } - d++; - new->count++; - } + return c->withdraw_bucket; +} - /* Hash */ - hash = ea_hash(new); - for(b=p->bucket_hash[hash & (p->hash_size - 1)]; b; b=b->hash_next) - if (b->hash == hash && ea_same(b->eattrs, new)) - { - DBG("Found bucket.\n"); - return b; - } - - /* Ensure that there are all mandatory attributes */ - for(i=0; i<ARRAY_SIZE(bgp_mandatory_attrs); i++) - if (!(seen & (1 << bgp_mandatory_attrs[i]))) - { - log(L_ERR "%s: Mandatory attribute %s missing in route %I/%d", p->p.name, bgp_attr_table[bgp_mandatory_attrs[i]].name, n->n.prefix, n->n.pxlen); - return NULL; - } - - /* Check if next hop is valid */ - a = ea_find(new, EA_CODE(EAP_BGP, BA_NEXT_HOP)); - if (!a || ipa_equal(p->cf->remote_ip, *(ip_addr *)a->u.ptr->data)) - { - log(L_ERR "%s: Invalid NEXT_HOP attribute in route %I/%d", p->p.name, n->n.prefix, n->n.pxlen); - return NULL; - } +void +bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b) +{ + rem_node(&b->send_node); + HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b); + mb_free(b); +} - /* Create new bucket */ - DBG("Creating bucket.\n"); - return bgp_new_bucket(p, new, hash); +void +bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b) +{ + rem_node(&b->send_node); + add_tail(&c->bucket_queue, &b->send_node); } void -bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck) +bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) { - if (buck->hash_next) - buck->hash_next->hash_prev = buck->hash_prev; - if (buck->hash_prev) - buck->hash_prev->hash_next = buck->hash_next; - else - p->bucket_hash[buck->hash & (p->hash_size-1)] = buck->hash_next; - mb_free(buck); + struct bgp_proto *p = (void *) c->c.proto; + struct bgp_bucket *wb = bgp_get_withdraw_bucket(c); + + log(L_ERR "%s: Attribute list too long", p->p.name); + while (!EMPTY_LIST(b->prefixes)) + { + struct bgp_prefix *px = HEAD(b->prefixes); + + log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net); + rem_node(&px->buck_node); + add_tail(&wb->prefixes, &px->buck_node); + } } -/* Prefix hash table */ +/* + * Prefix hash table + */ -#define PXH_KEY(n1) n1->n.prefix, n1->n.pxlen, n1->path_id -#define PXH_NEXT(n) n->next -#define PXH_EQ(p1,l1,i1,p2,l2,i2) ipa_equal(p1, p2) && l1 == l2 && i1 == i2 -#define PXH_FN(p,l,i) ipa_hash32(p) ^ u32_hash((l << 16) ^ i) +#define PXH_KEY(px) px->net, px->path_id, px->hash +#define PXH_NEXT(px) px->next +#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2) +#define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash #define PXH_PARAMS /8, *2, 2, 2, 8, 20 @@ -929,308 +1309,282 @@ bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck) HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) void -bgp_init_prefix_table(struct bgp_proto *p, u32 order) +bgp_init_prefix_table(struct bgp_channel *c) { - HASH_INIT(p->prefix_hash, p->p.pool, order); + HASH_INIT(c->prefix_hash, c->pool, 8); - p->prefix_slab = sl_new(p->p.pool, sizeof(struct bgp_prefix)); + uint alen = net_addr_length[c->c.net_type]; + c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL; } void -bgp_free_prefix_table(struct bgp_proto *p) +bgp_free_prefix_table(struct bgp_channel *c) { - HASH_FREE(p->prefix_hash); + HASH_FREE(c->prefix_hash); - rfree(p->prefix_slab); - p->prefix_slab = NULL; + rfree(c->prefix_slab); + c->prefix_slab = NULL; } static struct bgp_prefix * -bgp_get_prefix(struct bgp_proto *p, ip_addr prefix, int pxlen, u32 path_id) +bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id) { - struct bgp_prefix *bp = HASH_FIND(p->prefix_hash, PXH, prefix, pxlen, path_id); + u32 hash = net_hash(net) ^ u32_hash(path_id); + struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); - if (bp) - return bp; + if (px) + { + rem_node(&px->buck_node); + return px; + } - bp = sl_alloc(p->prefix_slab); - bp->n.prefix = prefix; - bp->n.pxlen = pxlen; - bp->path_id = path_id; - bp->bucket_node.next = NULL; + if (c->prefix_slab) + px = sl_alloc(c->prefix_slab); + else + px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length); - HASH_INSERT2(p->prefix_hash, PXH, p->p.pool, bp); + px->buck_node.next = NULL; + px->buck_node.prev = NULL; + px->hash = hash; + px->path_id = path_id; + net_copy(px->net, net); - return bp; + HASH_INSERT2(c->prefix_hash, PXH, c->pool, px); + + return px; } void -bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp) +bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) { - HASH_REMOVE2(p->prefix_hash, PXH, p->p.pool, bp); - sl_free(p->prefix_slab, bp); + rem_node(&px->buck_node); + HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); + + if (c->prefix_slab) + sl_free(c->prefix_slab, px); + else + mb_free(px); } -void -bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs) +/* + * BGP protocol glue + */ + +int +bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED) { + rte *e = *new; + struct proto *SRC = e->attrs->src->proto; struct bgp_proto *p = (struct bgp_proto *) P; - struct bgp_bucket *buck; - struct bgp_prefix *px; - rte *key; - u32 path_id; + struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL; - DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down"); + /* Reject our routes */ + if (src == p) + return -1; - if (new) - { - key = new; - buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP); - if (!buck) /* Inconsistent attribute list */ - return; - } - else - { - key = old; - if (!(buck = p->withdraw_bucket)) - { - buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket)); - init_list(&buck->prefixes); - } - } - path_id = p->add_path_tx ? key->attrs->src->global_id : 0; - px = bgp_get_prefix(p, n->n.prefix, n->n.pxlen, path_id); - if (px->bucket_node.next) - { - DBG("\tRemoving old entry.\n"); - rem_node(&px->bucket_node); - } - add_tail(&buck->prefixes, &px->bucket_node); - bgp_schedule_packet(p->conn, PKT_UPDATE); -} + /* Accept non-BGP routes */ + if (src == NULL) + return 0; -static int -bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool) -{ - ea_list *ea = lp_alloc(pool, sizeof(ea_list) + 4*sizeof(eattr)); - rta *rta = e->attrs; - byte *z; + // XXXX: Check next hop AF - ea->next = *attrs; - *attrs = ea; - ea->flags = EALF_SORTED; - ea->count = 4; + /* IBGP route reflection, RFC 4456 */ + if (p->is_internal && src->is_internal && (p->local_as == src->local_as)) + { + /* Rejected unless configured as route reflector */ + if (!p->rr_client && !src->rr_client) + return -1; + + /* Generally, this should be handled when path is received, but we check it + also here as rr_cluster_id may be undefined or different in src. */ + if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs)) + return -1; + } - bgp_set_attr(ea->attrs, BA_ORIGIN, - ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP); + /* Handle well-known communities, RFC 1997 */ + struct eattr *c; + if (p->cf->interpret_communities && + (c = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY)))) + { + struct adata *d = c->u.ptr; - if (p->is_internal) - bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0); - else - { - z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 6); - z[0] = AS_PATH_SEQUENCE; - z[1] = 1; /* 1 AS */ - put_u32(z+2, p->local_as); - } + /* Do not export anywhere */ + if (int_set_contains(d, BGP_COMM_NO_ADVERTISE)) + return -1; - /* iBGP -> use gw, eBGP multi-hop -> use source_addr, - eBGP single-hop -> use gw if on the same iface */ - z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH); - if (p->cf->next_hop_self || - rta->dest != RTD_ROUTER || - ipa_equal(rta->gw, IPA_NONE) || - ipa_is_link_local(rta->gw) || - (!p->is_internal && !p->cf->next_hop_keep && - (!p->neigh || (rta->iface != p->neigh->iface)))) - set_next_hop(z, p->source_addr); - else - set_next_hop(z, rta->gw); + /* Do not export outside of AS (or member-AS) */ + if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED)) + return -1; - bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, p->cf->default_local_pref); + /* Do not export outside of AS (or confederation) */ + if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT)) + return -1; + } - return 0; /* Leave decision to the filters */ + return 0; } -static inline int -bgp_as_path_loopy(struct bgp_proto *p, rta *a) -{ - int num = p->cf->allow_local_as + 1; - eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - return (e && (num > 0) && as_path_contains(e->u.ptr, p->local_as, num)); -} - -static inline int -bgp_originator_id_loopy(struct bgp_proto *p, rta *a) -{ - eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); - return (e && (e->u.data == p->local_id)); -} +static adata null_adata; /* adata of length 0 */ -static inline int -bgp_cluster_list_loopy(struct bgp_proto *p, rta *a) +static ea_list * +bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool) { - eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); - return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id)); -} + struct proto *SRC = e->attrs->src->proto; + struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL; + struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls }; + ea_list *attrs = attrs0; + eattr *a; + adata *ad; + /* ORIGIN attribute - mandatory, attach if missing */ + if (! bgp_find_attr(attrs0, BA_ORIGIN)) + bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); -static inline void -bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as) -{ - eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as)); -} + /* AS_PATH attribute - mandatory */ + a = bgp_find_attr(attrs0, BA_AS_PATH); + ad = a ? a->u.ptr : &null_adata; -static inline void -bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid) -{ - eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); - bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_prepend(pool, a ? a->u.ptr : NULL, cid)); -} + /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */ + if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad)) + ad = as_path_strip_confed(pool, ad); -static int -bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr) -{ - eattr *a; + /* AS_PATH attribute - keep or prepend ASN */ + if (p->is_internal || + (p->rs_client && src && src->rs_client)) + { + /* IBGP or route server -> just ensure there is one */ + if (!a) + bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata); + } + else if (p->is_interior) + { + /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */ + ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as); + bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + } + else /* Regular EBGP (no RS, no confederation) */ + { + /* Regular EBGP -> prepend ASN as regular sequence */ + ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as); + bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + + /* MULTI_EXIT_DESC attribute - accept only if set in export filter */ + a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC); + if (a && !(a->type & EAF_FRESH)) + bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC); + } - if (!p->is_internal && !p->rs_client) - { - bgp_path_prepend(e, attrs, pool, p->local_as); - - /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be - * propagated to other neighboring ASes. - * Perhaps it would be better to undefine it. - */ - a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - if (a) - bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0); - } + /* NEXT_HOP attribute - delegated to AF-specific hook */ + a = bgp_find_attr(attrs0, BA_NEXT_HOP); + bgp_update_next_hop(&s, a, &attrs); - /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr, - * eBGP single-hop -> keep next_hop if on the same iface. - * If the next_hop is zero (i.e. link-local), keep only if on the same iface. - * - * Note that same-iface-check uses iface from route, which is based on gw. - */ - a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); - if (a && !p->cf->next_hop_self && - (p->cf->next_hop_keep || - (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) || - (p->neigh && (e->attrs->iface == p->neigh->iface)))) - { - /* Leave the original next hop attribute, will check later where does it point */ - } - else - { - /* Need to create new one */ - byte *b = bgp_attach_attr_wa(attrs, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH); - set_next_hop(b, p->source_addr); - } + /* LOCAL_PREF attribute - required for IBGP, attach if missing */ + if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF)) + bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); - if (rr) - { - /* Handling route reflection, RFC 4456 */ - struct bgp_proto *src = (struct bgp_proto *) e->attrs->src->proto; + /* IBGP route reflection, RFC 4456 */ + if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as)) + { + /* ORIGINATOR_ID attribute - attach if not already set */ + if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID)) + bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id); - a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); - if (!a) - bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id); + /* CLUSTER_LIST attribute - prepend cluster ID */ + a = bgp_find_attr(attrs0, BA_CLUSTER_LIST); + ad = a ? a->u.ptr : NULL; - /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */ - bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id); + /* Prepend src cluster ID */ + if (src->rr_cluster_id) + ad = int_set_prepend(pool, ad, src->rr_cluster_id); - /* Two RR clients with different cluster ID, hmmm */ - if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id)) - bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id); - } + /* Prepend dst cluster ID if src and dst clusters are different */ + if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id)) + ad = int_set_prepend(pool, ad, p->rr_cluster_id); - return 0; /* Leave decision to the filters */ -} + /* Should be at least one prepended cluster ID */ + bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad); + } -static int -bgp_community_filter(struct bgp_proto *p, rte *e) -{ - eattr *a; - struct adata *d; + /* AS4_* transition attributes, RFC 6793 4.2.2 */ + if (! p->as4_session) + { + a = bgp_find_attr(attrs, BA_AS_PATH); + if (a && as_path_contains_as4(a->u.ptr)) + { + bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr)); + } - /* Check if we aren't forbidden to export the route by communities */ - a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY)); - if (a) + a = bgp_find_attr(attrs, BA_AGGREGATOR); + if (a && aggregator_contains_as4(a->u.ptr)) { - d = a->u.ptr; - if (int_set_contains(d, BGP_COMM_NO_ADVERTISE)) - { - DBG("\tNO_ADVERTISE\n"); - return 1; - } - if (!p->is_internal && - (int_set_contains(d, BGP_COMM_NO_EXPORT) || - int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))) - { - DBG("\tNO_EXPORT\n"); - return 1; - } + bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr); } + } - return 0; + /* + * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above + * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute + * should be checked in AF-specific hooks. + */ + + /* Apply per-attribute export hooks for validatation and normalization */ + return bgp_export_attrs(&s, attrs); } -int -bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool) +void +bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea_list *attrs) { - rte *e = *new; - struct bgp_proto *p = (struct bgp_proto *) P; - struct bgp_proto *new_bgp = (e->attrs->src->proto->proto == &proto_bgp) ? - (struct bgp_proto *) e->attrs->src->proto : NULL; + struct bgp_proto *p = (void *) P; + struct bgp_channel *c = (void *) C; + struct bgp_bucket *buck; + struct bgp_prefix *px; + u32 path; - if (p == new_bgp) /* Poison reverse updates */ - return -1; - if (new_bgp) - { - /* We should check here for cluster list loop, because the receiving BGP instance - might have different cluster ID */ - if (bgp_cluster_list_loopy(p, e->attrs)) - return -1; - - if (p->cf->interpret_communities && bgp_community_filter(p, e)) - return -1; - - if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal) - { - /* Redistribution of internal routes with IBGP */ - if (p->rr_client || new_bgp->rr_client) - /* Route reflection, RFC 4456 */ - return bgp_update_attrs(p, e, attrs, pool, 1); - else - return -1; - } - else - return bgp_update_attrs(p, e, attrs, pool, 0); - } + if (new) + { + attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool2); + + /* If attributes are invalid, we fail back to withdraw */ + buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); + path = new->attrs->src->global_id; + + lp_flush(bgp_linpool2); + } else - return bgp_create_attrs(p, e, attrs, pool); + { + buck = bgp_get_withdraw_bucket(c); + path = old->attrs->src->global_id; + } + + px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0); + add_tail(&buck->prefixes, &px->buck_node); + + bgp_schedule_packet(p->conn, c, PKT_UPDATE); } + static inline u32 bgp_get_neighbor(rte *r) { eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); u32 as; - if (e && as_path_get_first(e->u.ptr, &as)) + if (e && as_path_get_first_regular(e->u.ptr, &as)) return as; - else - return ((struct bgp_proto *) r->attrs->src->proto)->remote_as; + + /* If AS_PATH is not defined, we treat rte as locally originated */ + struct bgp_proto *p = (void *) r->attrs->src->proto; + return p->cf->confederation ?: p->local_as; } static inline int rte_resolvable(rte *rt) { - int rd = rt->attrs->dest; - return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH); + return rt->attrs->dest == RTD_UNICAST; } int @@ -1269,16 +1623,16 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths) - { - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; - o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; - if (n < o) - return 1; - if (n > o) - return 0; - } + { + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; + o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; + if (n < o) + return 1; + if (n > o) + return 0; + } /* RFC 4271 9.1.2.2. b) Use origins */ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); @@ -1303,21 +1657,21 @@ bgp_rte_better(rte *new, rte *old) */ if (new_bgp->cf->med_metric || old_bgp->cf->med_metric || (bgp_get_neighbor(new) == bgp_get_neighbor(old))) - { - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - n = x ? x->u.data : new_bgp->cf->default_med; - o = y ? y->u.data : old_bgp->cf->default_med; - if (n < o) - return 1; - if (n > o) - return 0; - } + { + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + n = x ? x->u.data : new_bgp->cf->default_med; + o = y ? y->u.data : old_bgp->cf->default_med; + if (n < o) + return 1; + if (n > o) + return 0; + } /* RFC 4271 9.1.2.2. d) Prefer external peers */ - if (new_bgp->is_internal > old_bgp->is_internal) + if (new_bgp->is_interior > old_bgp->is_interior) return 0; - if (new_bgp->is_internal < old_bgp->is_internal) + if (new_bgp->is_interior < old_bgp->is_interior) return 1; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ @@ -1329,7 +1683,7 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */ - /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */ + /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); n = x ? x->u.data : new_bgp->remote_id; @@ -1388,18 +1742,18 @@ bgp_rte_mergable(rte *pri, rte *sec) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths) - { - x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; - s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; + { + x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; + s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; - if (p != s) - return 0; + if (p != s) + return 0; -// if (DELTA(p, s) > pri_bgp->cf->relax_multipath) -// return 0; - } +// if (DELTA(p, s) > pri_bgp->cf->relax_multipath) +// return 0; + } /* RFC 4271 9.1.2.2. b) Use origins */ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); @@ -1412,17 +1766,17 @@ bgp_rte_mergable(rte *pri, rte *sec) /* RFC 4271 9.1.2.2. c) Compare MED's */ if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric || (bgp_get_neighbor(pri) == bgp_get_neighbor(sec))) - { - x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - p = x ? x->u.data : pri_bgp->cf->default_med; - s = y ? y->u.data : sec_bgp->cf->default_med; - if (p != s) - return 0; - } + { + x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + p = x ? x->u.data : pri_bgp->cf->default_med; + s = y ? y->u.data : sec_bgp->cf->default_med; + if (p != s) + return 0; + } /* RFC 4271 9.1.2.2. d) Prefer external peers */ - if (pri_bgp->is_internal != sec_bgp->is_internal) + if (pri_bgp->is_interior != sec_bgp->is_interior) return 0; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ @@ -1437,7 +1791,6 @@ bgp_rte_mergable(rte *pri, rte *sec) } - static inline int same_group(rte *r, u32 lpref, u32 lasn) { @@ -1482,7 +1835,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) * that this fn is not called for them. * * The idea is simple, the implementation is more problematic, - * mostly because of optimizations in rte_recalculate() that + * mostly because of optimizations in rte_recalculate() that * avoids full recalculation in most cases. * * We can assume that at least one of new, old is non-NULL and both @@ -1494,14 +1847,14 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) /* If new and old are from different groups, we just process that as two independent events */ if (new && old && !same_group(old, lpref, lasn)) - { - int i1, i2; - i1 = bgp_rte_recalculate(table, net, NULL, old, old_best); - i2 = bgp_rte_recalculate(table, net, new, NULL, old_best); - return i1 || i2; - } + { + int i1, i2; + i1 = bgp_rte_recalculate(table, net, NULL, old, old_best); + i2 = bgp_rte_recalculate(table, net, new, NULL, old_best); + return i1 || i2; + } - /* + /* * We could find the best-in-group and then make some shortcuts like * in rte_recalculate, but as we would have to walk through all * net->routes just to find it, it is probably not worth. So we @@ -1513,35 +1866,35 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) new->u.bgp.suppressed = 1; if (old) + { + old_is_group_best = !old->u.bgp.suppressed; + old->u.bgp.suppressed = 1; + int new_is_better = new && bgp_rte_better(new, old); + + /* The first case - replace not best with worse (or remove not best) */ + if (!old_is_group_best && !new_is_better) + return 0; + + /* The second case - replace the best with better */ + if (old_is_group_best && new_is_better) { - old_is_group_best = !old->u.bgp.suppressed; - old->u.bgp.suppressed = 1; - int new_is_better = new && bgp_rte_better(new, old); - - /* The first case - replace not best with worse (or remove not best) */ - if (!old_is_group_best && !new_is_better) - return 0; - - /* The second case - replace the best with better */ - if (old_is_group_best && new_is_better) - { - /* new is best-in-group, the see discussion below - this is - a special variant of NBG && OBG. From OBG we can deduce - that same_group(old_best) iff (old == old_best) */ - new->u.bgp.suppressed = 0; - return (old == old_best); - } + /* new is best-in-group, the see discussion below - this is + a special variant of NBG && OBG. From OBG we can deduce + that same_group(old_best) iff (old == old_best) */ + new->u.bgp.suppressed = 0; + return (old == old_best); } + } /* The default case - find a new best-in-group route */ r = new; /* new may not be in the list */ for (s=net->routes; rte_is_valid(s); s=s->next) if (use_deterministic_med(s) && same_group(s, lpref, lasn)) - { - s->u.bgp.suppressed = 1; - if (!r || bgp_rte_better(s, r)) - r = s; - } + { + s->u.bgp.suppressed = 1; + if (!r || bgp_rte_better(s, r)) + r = s; + } /* Simple case - the last route in group disappears */ if (!r) @@ -1580,397 +1933,77 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) return old_is_group_best; } -static struct adata * -bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool) -{ - struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8); - newa->length = 8; - aggregator_convert_to_new(old, newa->data); - return newa; -} - -/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format - * and append path old4 (in 4B format). +/* + * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3 */ -static struct adata * -bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool) -{ - byte buf[old2->length * 2]; - - int ol = as_path_convert_to_new(old2, buf, req_as); - int nl = ol + (old4 ? old4->length : 0); - - struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl); - newa->length = nl; - memcpy(newa->data, buf, ol); - if (old4) memcpy(newa->data + ol, old4->data, old4->length); - - return newa; -} - -static int -as4_aggregator_valid(struct adata *aggr) -{ - return aggr->length == 8; -} - - -/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */ -static void -bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool) -{ - eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH)); - eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR)); - eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR)); - int a4_removed = 0; - - if (a4 && !as4_aggregator_valid(a4->u.ptr)) - { - log(L_WARN "%s: AS4_AGGREGATOR attribute is invalid, skipping attribute", p->p.name); - a4 = NULL; - a4_removed = 1; - } - - if (a2) - { - u32 a2_as = get_u16(a2->u.ptr->data); - - if (a4) - { - if (a2_as != AS_TRANS) - { - /* Routes were aggregated by old router and therefore AS4_PATH - * and AS4_AGGREGATOR is invalid - * - * Convert AS_PATH and AGGREGATOR to 4B format and finish. - */ - - a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool); - p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool); - - return; - } - else - { - /* Common case, use AS4_AGGREGATOR attribute */ - a2->u.ptr = a4->u.ptr; - } - } - else - { - /* Common case, use old AGGREGATOR attribute */ - a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool); - - if ((a2_as == AS_TRANS) && !a4_removed) - log(L_WARN "%s: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing", p->p.name); - } - } - else - if (a4) - log(L_WARN "%s: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing", p->p.name); - - int p2_len = as_path_getlen_int(p2->u.ptr, 2); - int p4_len = p4 ? validate_as4_path(p, p4->u.ptr) : -1; - - if (p4 && (p4_len < 0)) - log(L_WARN "%s: AS4_PATH attribute is malformed, skipping attribute", p->p.name); - - if ((p4_len <= 0) || (p2_len < p4_len)) - p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool); - else - p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool); -} - static void -bgp_remove_as4_attrs(struct bgp_proto *p, rta *a) +bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) { - unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH); - unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR); - ea_list **el = &(a->eattrs); - - /* We know that ea_lists constructed in bgp_decode attrs have one attribute per ea_list struct */ - while (*el != NULL) - { - unsigned fid = (*el)->attrs[0].id; - - if ((fid == id1) || (fid == id2)) - { - *el = (*el)->next; - if (p->as4_session) - log(L_WARN "%s: Unexpected AS4_* attributes received", p->p.name); - } - else - el = &((*el)->next); - } -} - -/** - * bgp_decode_attrs - check and decode BGP attributes - * @conn: connection - * @attr: start of attribute block - * @len: length of attribute block - * @pool: linear pool to make all the allocations in - * @mandatory: 1 iff presence of mandatory attributes has to be checked - * - * This function takes a BGP attribute block (a part of an Update message), checks - * its consistency and converts it to a list of BIRD route attributes represented - * by a &rta. - */ -struct rta * -bgp_decode_attrs(struct bgp_conn *conn, byte *attr, uint len, struct linpool *pool, int mandatory) -{ - struct bgp_proto *bgp = conn->bgp; - rta *a = lp_alloc(pool, sizeof(struct rta)); - uint flags, code, l, i, type; - int errcode; - byte *z, *attr_start; - byte seen[256/8]; - ea_list *ea; - struct adata *ad; - int withdraw = 0; - - bzero(a, sizeof(rta)); - a->source = RTS_BGP; - a->scope = SCOPE_UNIVERSE; - a->cast = RTC_UNICAST; - /* a->dest = RTD_ROUTER; -- set in bgp_set_next_hop() */ - a->from = bgp->cf->remote_ip; - - /* Parse the attributes */ - bzero(seen, sizeof(seen)); - DBG("BGP: Parsing attributes\n"); - while (len) - { - if (len < 2) - goto malformed; - attr_start = attr; - flags = *attr++; - code = *attr++; - len -= 2; - if (flags & BAF_EXT_LEN) - { - if (len < 2) - goto malformed; - l = get_u16(attr); - attr += 2; - len -= 2; - } - else - { - if (len < 1) - goto malformed; - l = *attr++; - len--; - } - if (l > len) - goto malformed; - len -= l; - z = attr; - attr += l; - DBG("Attr %02x %02x %d\n", code, flags, l); - if (seen[code/8] & (1 << (code%8))) - goto malformed; - if (ATTR_KNOWN(code)) - { - struct attr_desc *desc = &bgp_attr_table[code]; - if (desc->expected_length >= 0 && desc->expected_length != (int) l) - { errcode = 5; goto err; } - if ((desc->expected_flags ^ flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) - { errcode = 4; goto err; } - if (!bgp->is_internal) - { - if (!desc->allow_in_ebgp) - continue; - if ((code == BA_LOCAL_PREF) && !bgp->cf->allow_local_pref) - continue; - } - if (desc->validate) - { - errcode = desc->validate(bgp, z, l); - if (errcode > 0) - goto err; - if (errcode == IGNORE) - continue; - if (errcode <= WITHDRAW) - { - log(L_WARN "%s: Attribute %s is malformed, withdrawing update", - bgp->p.name, desc->name); - withdraw = 1; - } - } - else if (code == BA_AS_PATH) - { - /* Special case as it might also trim the attribute */ - if (validate_as_path(bgp, z, &l) < 0) - { errcode = 11; goto err; } - } - type = desc->type; - } - else /* Unknown attribute */ - { - if (!(flags & BAF_OPTIONAL)) - { errcode = 2; goto err; } - type = EAF_TYPE_OPAQUE; - } - - // Only OPTIONAL and TRANSITIVE attributes may have non-zero PARTIAL flag - // if (!((flags & BAF_OPTIONAL) && (flags & BAF_TRANSITIVE)) && (flags & BAF_PARTIAL)) - // { errcode = 4; goto err; } - - seen[code/8] |= (1 << (code%8)); - ea = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = a->eattrs; - a->eattrs = ea; - ea->flags = 0; - ea->count = 1; - ea->attrs[0].id = EA_CODE(EAP_BGP, code); - ea->attrs[0].flags = flags; - ea->attrs[0].type = type; - if (type & EAF_EMBEDDED) - ad = NULL; - else - { - ad = lp_alloc(pool, sizeof(struct adata) + l); - ea->attrs[0].u.ptr = ad; - ad->length = l; - memcpy(ad->data, z, l); - } - switch (type) - { - case EAF_TYPE_ROUTER_ID: - case EAF_TYPE_INT: - if (l == 1) - ea->attrs[0].u.data = *z; - else - ea->attrs[0].u.data = get_u32(z); - break; - case EAF_TYPE_IP_ADDRESS: - ipa_ntoh(*(ip_addr *)ad->data); - break; - case EAF_TYPE_INT_SET: - case EAF_TYPE_LC_SET: - case EAF_TYPE_EC_SET: - { - u32 *z = (u32 *) ad->data; - for(i=0; i<ad->length/4; i++) - z[i] = ntohl(z[i]); - break; - } - } - } - - if (withdraw) - goto withdraw; + eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH); + eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH); + eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR); + eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR); -#ifdef IPV6 - /* If we received MP_REACH_NLRI we should check mandatory attributes */ - if (bgp->mp_reach_len != 0) - mandatory = 1; -#endif + /* First, unset AS4_* attributes */ + if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH); + if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR); - /* If there is no (reachability) NLRI, we should exit now */ - if (! mandatory) - return a; - - /* Check if all mandatory attributes are present */ - for(i=0; i < ARRAY_SIZE(bgp_mandatory_attrs); i++) - { - code = bgp_mandatory_attrs[i]; - if (!(seen[code/8] & (1 << (code%8)))) - { - bgp_error(conn, 3, 3, &bgp_mandatory_attrs[i], 1); - return NULL; - } - } - - /* When receiving attributes from non-AS4-aware BGP speaker, - * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes - */ - if (! bgp->as4_session) - bgp_reconstruct_4b_atts(bgp, a, pool); - - bgp_remove_as4_attrs(bgp, a); - - /* If the AS path attribute contains our AS, reject the routes */ - if (bgp_as_path_loopy(bgp, a)) - goto withdraw; - - /* Two checks for IBGP loops caused by route reflection, RFC 4456 */ - if (bgp_originator_id_loopy(bgp, a) || - bgp_cluster_list_loopy(bgp, a)) - goto withdraw; + /* Handle AGGREGATOR attribute */ + if (a2 && a4) + { + u32 a2_asn = get_u32(a2->u.ptr->data); - /* If there's no local preference, define one */ - if (!(seen[0] & (1 << BA_LOCAL_PREF))) - bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, bgp->cf->default_local_pref); + /* If routes were aggregated by an old router, then AS4_PATH and + AS4_AGGREGATOR are invalid. In that case we give up. */ + if (a2_asn != AS_TRANS) + return; - return a; + /* Use AS4_AGGREGATOR instead of AGGREGATOR */ + a2->u.ptr = a4->u.ptr; + } -withdraw: - return NULL; + /* Handle AS_PATH attribute */ + if (p2 && p4) + { + /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */ + int p2_len = as_path_getlen(p2->u.ptr); + int p4_len = as_path_getlen(p4->u.ptr); -malformed: - bgp_error(conn, 3, 1, NULL, 0); - return NULL; + /* AS_PATH is too short, give up */ + if (p2_len < p4_len) + return; -err: - bgp_error(conn, 3, errcode, attr_start, z+l-attr_start); - return NULL; + /* Merge AS_PATH and AS4_PATH */ + as_path_cut(p2->u.ptr, p2_len - p4_len); + p2->u.ptr = as_path_merge(pool, p2->u.ptr, p4->u.ptr); + } } int bgp_get_attr(eattr *a, byte *buf, int buflen) { uint i = EA_ID(a->id); - struct attr_desc *d; + const struct bgp_attr_desc *d; int len; - if (ATTR_KNOWN(i)) + if (bgp_attr_known(i)) + { + d = &bgp_attr_table[i]; + len = bsprintf(buf, "%s", d->name); + buf += len; + if (d->format) { - d = &bgp_attr_table[i]; - len = bsprintf(buf, "%s", d->name); - buf += len; - if (d->format) - { - *buf++ = ':'; - *buf++ = ' '; - d->format(a, buf, buflen - len - 2); - return GA_FULL; - } - return GA_NAME; + *buf++ = ':'; + *buf++ = ' '; + d->format(a, buf, buflen - len - 2); + return GA_FULL; } - bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : ""); - return GA_NAME; -} - -void -bgp_init_bucket_table(struct bgp_proto *p) -{ - p->hash_size = 256; - p->hash_limit = p->hash_size * 4; - p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *)); - init_list(&p->bucket_queue); - p->withdraw_bucket = NULL; - // fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix); -} - -void -bgp_free_bucket_table(struct bgp_proto *p) -{ - mb_free(p->bucket_hash); - p->bucket_hash = NULL; - - struct bgp_bucket *b; - WALK_LIST_FIRST(b, p->bucket_queue) - { - rem_node(&b->send_node); - mb_free(b); + return GA_NAME; } - mb_free(p->withdraw_bucket); - p->withdraw_bucket = NULL; + bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : ""); + return GA_NAME; } void @@ -1986,14 +2019,14 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs) buf += bsprintf(buf, "-"); if (e->attrs->hostentry) - { - if (!rte_resolvable(e)) - buf += bsprintf(buf, "/-"); - else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN) - buf += bsprintf(buf, "/?"); - else - buf += bsprintf(buf, "/%d", e->attrs->igp_metric); - } + { + if (!rte_resolvable(e)) + buf += bsprintf(buf, "/-"); + else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN) + buf += bsprintf(buf, "/?"); + else + buf += bsprintf(buf, "/%d", e->attrs->igp_metric); + } buf += bsprintf(buf, ") ["); if (p && as_path_get_last(p->u.ptr, &origas)) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index b99672f5..9db26050 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -2,6 +2,8 @@ * BIRD -- The Border Gateway Protocol * * (c) 2000 Martin Mares <mj@ucw.cz> + * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2008--2016 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -9,48 +11,52 @@ /** * DOC: Border Gateway Protocol * - * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the - * connection and most of the interface with BIRD core, |packets.c| handling + * The BGP protocol is implemented in three parts: |bgp.c| which takes care of + * the connection and most of the interface with BIRD core, |packets.c| handling * both incoming and outgoing BGP packets and |attrs.c| containing functions for * manipulation with BGP attribute lists. * - * As opposed to the other existing routing daemons, BIRD has a sophisticated core - * architecture which is able to keep all the information needed by BGP in the - * primary routing table, therefore no complex data structures like a central - * BGP table are needed. This increases memory footprint of a BGP router with - * many connections, but not too much and, which is more important, it makes - * BGP much easier to implement. + * As opposed to the other existing routing daemons, BIRD has a sophisticated + * core architecture which is able to keep all the information needed by BGP in + * the primary routing table, therefore no complex data structures like a + * central BGP table are needed. This increases memory footprint of a BGP router + * with many connections, but not too much and, which is more important, it + * makes BGP much easier to implement. * - * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto - * structure to which are attached individual connections represented by &bgp_connection - * (usually, there exists only one connection, but during BGP session setup, there - * can be more of them). The connections are handled according to the BGP state machine - * defined in the RFC with all the timers and all the parameters configurable. + * Each instance of BGP (corresponding to a single BGP peer) is described by a + * &bgp_proto structure to which are attached individual connections represented + * by &bgp_connection (usually, there exists only one connection, but during BGP + * session setup, there can be more of them). The connections are handled + * according to the BGP state machine defined in the RFC with all the timers and + * all the parameters configurable. * - * In incoming direction, we listen on the connection's socket and each time we receive - * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and - * passes complete packets to bgp_rx_packet() which distributes the packet according - * to its type. + * In incoming direction, we listen on the connection's socket and each time we + * receive some input, we pass it to bgp_rx(). It decodes packet headers and the + * markers and passes complete packets to bgp_rx_packet() which distributes the + * packet according to its type. * - * In outgoing direction, we gather all the routing updates and sort them to buckets - * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison - * of &rta's and a &fib which helps us to find if we already have another route for - * the same destination queued for sending, so that we can replace it with the new one - * immediately instead of sending both updates). There also exists a special bucket holding - * all the route withdrawals which cannot be queued anywhere else as they don't have any - * attributes. If we have any packet to send (due to either new routes or the connection - * tracking code wanting to send a Open, Keepalive or Notification message), we call - * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send - * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty, - * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls - * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet - * type if we have more data of the same type to send. + * In outgoing direction, we gather all the routing updates and sort them to + * buckets (&bgp_bucket) according to their attributes (we keep a hash table for + * fast comparison of &rta's and a &fib which helps us to find if we already + * have another route for the same destination queued for sending, so that we + * can replace it with the new one immediately instead of sending both + * updates). There also exists a special bucket holding all the route + * withdrawals which cannot be queued anywhere else as they don't have any + * attributes. If we have any packet to send (due to either new routes or the + * connection tracking code wanting to send a Open, Keepalive or Notification + * message), we call bgp_schedule_packet() which sets the corresponding bit in a + * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket + * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the + * packet type bits and calls the corresponding bgp_create_xx() functions, + * eventually rescheduling the same packet type if we have more data of the same + * type to send. * - * The processing of attributes consists of two functions: bgp_decode_attrs() for checking - * of the attribute blocks and translating them to the language of BIRD's extended attributes - * and bgp_encode_attrs() which does the converse. Both functions are built around a - * @bgp_attr_table array describing all important characteristics of all known attributes. - * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams. + * The processing of attributes consists of two functions: bgp_decode_attrs() + * for checking of the attribute blocks and translating them to the language of + * BIRD's extended attributes and bgp_encode_attrs() which does the + * converse. Both functions are built around a @bgp_attr_table array describing + * all important characteristics of all known attributes. Unknown transitive + * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams. * * BGP protocol implements graceful restart in both restarting (local restart) * and receiving (neighbor restart) roles. The first is handled mostly by the @@ -61,10 +67,46 @@ * point of view and therefore maintaining received routes. Routing table * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing * stale routes after reestablishment of BGP session during graceful restart. - */ + * + * Supported standards: + * <itemize> + * <item> <rfc id="4271"> - Border Gateway Protocol 4 (BGP) + * <item> <rfc id="1997"> - BGP Communities Attribute + * <item> <rfc id="2385"> - Protection of BGP Sessions via TCP MD5 Signature + * <item> <rfc id="2545"> - Use of BGP Multiprotocol Extensions for IPv6 + * <item> <rfc id="2918"> - Route Refresh Capability + * <item> <rfc id="3107"> - Carrying Label Information in BGP + * <item> <rfc id="4360"> - BGP Extended Communities Attribute + * <item> <rfc id="4364"> - BGP/MPLS IPv4 Virtual Private Networks + * <item> <rfc id="4456"> - BGP Route Reflection + * <item> <rfc id="4486"> - Subcodes for BGP Cease Notification Message + * <item> <rfc id="4659"> - BGP/MPLS IPv6 Virtual Private Networks + * <item> <rfc id="4724"> - Graceful Restart Mechanism for BGP + * <item> <rfc id="4760"> - Multiprotocol extensions for BGP + * <item> <rfc id="4798"> - Connecting IPv6 Islands over IPv4 MPLS + * <item> <rfc id="5065"> - AS confederations for BGP + * <item> <rfc id="5082"> - Generalized TTL Security Mechanism + * <item> <rfc id="5492"> - Capabilities Advertisement with BGP + * <item> <rfc id="5549"> - Advertising IPv4 NLRI with an IPv6 Next Hop + * <item> <rfc id="5575"> - Dissemination of Flow Specification Rules + * <item> <rfc id="5668"> - 4-Octet AS Specific BGP Extended Community + * <item> <rfc id="6286"> - AS-Wide Unique BGP Identifier + * <item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error + * <item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers + * <item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP + * <item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages + * <item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP + * <item> <rfc id="7947"> - Internet Exchange BGP Route Server + * <item> <rfc id="8092"> - BGP Large Communities Attribute + * <item> <rfc id="8203"> - BGP Administrative Shutdown Communication + * <item> <rfc id="8212"> - Default EBGP Route Propagation Behavior without Policies + * </itemize> +*/ #undef LOCAL_DEBUG +#include <stdlib.h> + #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" @@ -72,6 +114,7 @@ #include "nest/cli.h" #include "nest/locks.h" #include "conf/conf.h" +#include "filter/filter.h" #include "lib/socket.h" #include "lib/resource.h" #include "lib/string.h" @@ -80,70 +123,150 @@ struct linpool *bgp_linpool; /* Global temporary pool */ -static sock *bgp_listen_sk; /* Global listening socket */ -static int bgp_counter; /* Number of protocol instances using the listening socket */ +struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */ +static list bgp_sockets; /* Global list of listening sockets */ + -static void bgp_close(struct bgp_proto *p, int apply_md5); static void bgp_connect(struct bgp_proto *p); static void bgp_active(struct bgp_proto *p); -static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags); static void bgp_update_bfd(struct bgp_proto *p, int use_bfd); +static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); +static void bgp_listen_sock_err(sock *sk UNUSED, int err); /** * bgp_open - open a BGP instance * @p: BGP instance * - * This function allocates and configures shared BGP resources. - * Should be called as the last step during initialization - * (when lock is acquired and neighbor is ready). - * When error, state changed to PS_DOWN, -1 is returned and caller - * should return immediately. + * This function allocates and configures shared BGP resources, mainly listening + * sockets. Should be called as the last step during initialization (when lock + * is acquired and neighbor is ready). When error, caller should change state to + * PS_DOWN and return immediately. */ static int bgp_open(struct bgp_proto *p) { - struct config *cfg = p->cf->c.global; - int errcode; + struct bgp_socket *bs = NULL; + struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; + ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : + (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6); + uint port = p->cf->local_port; - if (!bgp_listen_sk) - bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags); + /* FIXME: Add some global init? */ + if (!bgp_linpool) + init_list(&bgp_sockets); + + /* We assume that cf->iface is defined iff cf->local_ip is link-local */ - if (!bgp_listen_sk) + WALK_LIST(bs, bgp_sockets) + if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->iface == ifa) && (bs->sk->sport == port)) { - errcode = BEM_NO_SOCKET; - goto err; + bs->uc++; + p->sock = bs; + return 0; } - if (!bgp_linpool) - bgp_linpool = lp_new(&root_pool, 4080); + sock *sk = sk_new(proto_pool); + sk->type = SK_TCP_PASSIVE; + sk->ttl = 255; + sk->saddr = addr; + sk->sport = port; + sk->flags = 0; + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->rbsize = BGP_RX_BUFFER_SIZE; + sk->tbsize = BGP_TX_BUFFER_SIZE; + sk->rx_hook = bgp_incoming_connection; + sk->err_hook = bgp_listen_sock_err; + + if (sk_open(sk) < 0) + goto err; - bgp_counter++; + bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); + bs->sk = sk; + bs->uc = 1; + p->sock = bs; - if (p->cf->password) - if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip, - p->cf->iface, p->cf->password, p->cf->setkey) < 0) - { - sk_log_error(bgp_listen_sk, p->p.name); - bgp_close(p, 0); - errcode = BEM_INVALID_MD5; - goto err; - } + add_tail(&bgp_sockets, &bs->n); + + if (!bgp_linpool) + { + bgp_linpool = lp_new_default(proto_pool); + bgp_linpool2 = lp_new_default(proto_pool); + } return 0; err: - p->p.disabled = 1; - bgp_store_error(p, NULL, BE_MISC, errcode); - proto_notify_state(&p->p, PS_DOWN); + sk_log_error(sk, p->p.name); + log(L_ERR "%s: Cannot open listening socket", p->p.name); + rfree(sk); return -1; } +/** + * bgp_close - close a BGP instance + * @p: BGP instance + * + * This function frees and deconfigures shared BGP resources. + */ +static void +bgp_close(struct bgp_proto *p) +{ + struct bgp_socket *bs = p->sock; + + ASSERT(bs && bs->uc); + + if (--bs->uc) + return; + + rfree(bs->sk); + rem_node(&bs->n); + mb_free(bs); + + if (!EMPTY_LIST(bgp_sockets)) + return; + + rfree(bgp_linpool); + bgp_linpool = NULL; + + rfree(bgp_linpool2); + bgp_linpool2 = NULL; +} + +static inline int +bgp_setup_auth(struct bgp_proto *p, int enable) +{ + if (p->cf->password) + { + int rv = sk_set_md5_auth(p->sock->sk, + p->cf->local_ip, p->cf->remote_ip, p->cf->iface, + enable ? p->cf->password : NULL, p->cf->setkey); + + if (rv < 0) + sk_log_error(p->sock->sk, p->p.name); + + return rv; + } + else + return 0; +} + +static inline struct bgp_channel * +bgp_find_channel(struct bgp_proto *p, u32 afi) +{ + struct bgp_channel *c; + WALK_LIST(c, p->p.channels) + if (c->afi == afi) + return c; + + return NULL; +} + static void bgp_startup(struct bgp_proto *p) { BGP_TRACE(D_EVENTS, "Started"); - p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP; + p->start_state = BSS_CONNECT; if (!p->cf->passive) bgp_active(p); @@ -159,70 +282,57 @@ bgp_startup_timeout(timer *t) static void bgp_initiate(struct bgp_proto *p) { - int rv = bgp_open(p); - if (rv < 0) - return; + int err_val; + + if (bgp_open(p) < 0) + { err_val = BEM_NO_SOCKET; goto err1; } + + if (bgp_setup_auth(p, 1) < 0) + { err_val = BEM_INVALID_MD5; goto err2; } if (p->cf->bfd) bgp_update_bfd(p, p->cf->bfd); if (p->startup_delay) - { - p->start_state = BSS_DELAY; - BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay); - bgp_start_timer(p->startup_timer, p->startup_delay); - } + { + p->start_state = BSS_DELAY; + BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay); + bgp_start_timer(p->startup_timer, p->startup_delay); + } else bgp_startup(p); -} -/** - * bgp_close - close a BGP instance - * @p: BGP instance - * @apply_md5: 0 to disable unsetting MD5 auth - * - * This function frees and deconfigures shared BGP resources. - * @apply_md5 is set to 0 when bgp_close is called as a cleanup - * from failed bgp_open(). - */ -static void -bgp_close(struct bgp_proto *p, int apply_md5) -{ - ASSERT(bgp_counter); - bgp_counter--; + return; - if (p->cf->password && apply_md5) - if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip, - p->cf->iface, NULL, p->cf->setkey) < 0) - sk_log_error(bgp_listen_sk, p->p.name); +err2: + bgp_close(p); +err1: + p->p.disabled = 1; + bgp_store_error(p, NULL, BE_MISC, err_val); + proto_notify_state(&p->p, PS_DOWN); - if (!bgp_counter) - { - rfree(bgp_listen_sk); - bgp_listen_sk = NULL; - rfree(bgp_linpool); - bgp_linpool = NULL; - } + return; } /** * bgp_start_timer - start a BGP timer * @t: timer - * @value: time to fire (0 to disable the timer) + * @value: time (in seconds) to fire (0 to disable the timer) * - * This functions calls tm_start() on @t with time @value and the - * amount of randomization suggested by the BGP standard. Please use - * it for all BGP timers. + * This functions calls tm_start() on @t with time @value and the amount of + * randomization suggested by the BGP standard. Please use it for all BGP + * timers. */ void -bgp_start_timer(timer *t, int value) +bgp_start_timer(timer *t, uint value) { if (value) - { - /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */ - t->randomize = value / 4; - tm_start(t, value - t->randomize); - } + { + /* The randomization procedure is specified in RFC 4271 section 10 */ + btime time = value S; + btime randomize = random() % ((time / 4) + 1); + tm_start(t, time - randomize); + } else tm_stop(t); } @@ -231,8 +341,8 @@ bgp_start_timer(timer *t, int value) * bgp_close_conn - close a BGP connection * @conn: connection to close * - * This function takes a connection described by the &bgp_conn structure, - * closes its socket and frees all resources associated with it. + * This function takes a connection described by the &bgp_conn structure, closes + * its socket and frees all resources associated with it. */ void bgp_close_conn(struct bgp_conn *conn) @@ -241,16 +351,22 @@ bgp_close_conn(struct bgp_conn *conn) DBG("BGP: Closing connection\n"); conn->packets_to_send = 0; - rfree(conn->connect_retry_timer); - conn->connect_retry_timer = NULL; + conn->channels_to_send = 0; + rfree(conn->connect_timer); + conn->connect_timer = NULL; rfree(conn->keepalive_timer); conn->keepalive_timer = NULL; rfree(conn->hold_timer); conn->hold_timer = NULL; - rfree(conn->sk); - conn->sk = NULL; rfree(conn->tx_ev); conn->tx_ev = NULL; + rfree(conn->sk); + conn->sk = NULL; + + mb_free(conn->local_caps); + conn->local_caps = NULL; + mb_free(conn->remote_caps); + conn->remote_caps = NULL; } @@ -258,9 +374,9 @@ bgp_close_conn(struct bgp_conn *conn) * bgp_update_startup_delay - update a startup delay * @p: BGP instance * - * This function updates a startup delay that is used to postpone next BGP connect. - * It also handles disable_after_error and might stop BGP instance when error - * happened and disable_after_error is on. + * This function updates a startup delay that is used to postpone next BGP + * connect. It also handles disable_after_error and might stop BGP instance + * when error happened and disable_after_error is on. * * It should be called when BGP protocol error happened. */ @@ -271,17 +387,17 @@ bgp_update_startup_delay(struct bgp_proto *p) DBG("BGP: Updating startup delay\n"); - if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time)) + if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S)) p->startup_delay = 0; - p->last_proto_error = now; + p->last_proto_error = current_time(); if (cf->disable_after_error) - { - p->startup_delay = 0; - p->p.disabled = 1; - return; - } + { + p->startup_delay = 0; + p->p.disabled = 1; + return; + } if (!p->startup_delay) p->startup_delay = cf->error_delay_time_min; @@ -293,29 +409,35 @@ static void bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len) { switch (conn->state) - { - case BS_IDLE: - case BS_CLOSE: - return; - case BS_CONNECT: - case BS_ACTIVE: - bgp_conn_enter_idle_state(conn); - return; - case BS_OPENSENT: - case BS_OPENCONFIRM: - case BS_ESTABLISHED: - bgp_error(conn, 6, subcode, data, len); - return; - default: - bug("bgp_graceful_close_conn: Unknown state %d", conn->state); - } + { + case BS_IDLE: + case BS_CLOSE: + return; + + case BS_CONNECT: + case BS_ACTIVE: + bgp_conn_enter_idle_state(conn); + return; + + case BS_OPENSENT: + case BS_OPENCONFIRM: + case BS_ESTABLISHED: + bgp_error(conn, 6, subcode, data, len); + return; + + default: + bug("bgp_graceful_close_conn: Unknown state %d", conn->state); + } } static void bgp_down(struct bgp_proto *p) { if (p->start_state > BSS_PREPARE) - bgp_close(p, 1); + { + bgp_setup_auth(p, 0); + bgp_close(p); + } BGP_TRACE(D_EVENTS, "Down"); proto_notify_state(&p->p, PS_DOWN); @@ -327,15 +449,15 @@ bgp_decision(void *vp) struct bgp_proto *p = vp; DBG("BGP: Decision start\n"); - if ((p->p.proto_state == PS_START) - && (p->outgoing_conn.state == BS_IDLE) - && (p->incoming_conn.state != BS_OPENCONFIRM) - && (!p->cf->passive)) + if ((p->p.proto_state == PS_START) && + (p->outgoing_conn.state == BS_IDLE) && + (p->incoming_conn.state != BS_OPENCONFIRM) && + !p->cf->passive) bgp_active(p); - if ((p->p.proto_state == PS_STOP) - && (p->outgoing_conn.state == BS_IDLE) - && (p->incoming_conn.state == BS_IDLE)) + if ((p->p.proto_state == PS_STOP) && + (p->outgoing_conn.state == BS_IDLE) && + (p->incoming_conn.state == BS_IDLE)) bgp_down(p); } @@ -349,7 +471,7 @@ bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len) } static inline void -bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state) +bgp_conn_set_state(struct bgp_conn *conn, uint new_state) { if (conn->bgp->p.mrtdump & MD_STATES) mrt_dump_bgp_state_change(conn, conn->state, new_state); @@ -364,13 +486,17 @@ bgp_conn_enter_openconfirm_state(struct bgp_conn *conn) bgp_conn_set_state(conn, BS_OPENCONFIRM); } +static const struct bgp_af_caps dummy_af_caps = { }; + void bgp_conn_enter_established_state(struct bgp_conn *conn) { struct bgp_proto *p = conn->bgp; + struct bgp_caps *local = conn->local_caps; + struct bgp_caps *peer = conn->remote_caps; + struct bgp_channel *c; BGP_TRACE(D_EVENTS, "BGP session established"); - DBG("BGP: UP!!!\n"); /* For multi-hop BGP sessions */ if (ipa_zero(p->source_addr)) @@ -381,30 +507,92 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) p->conn = conn; p->last_error_class = 0; p->last_error_code = 0; - p->feed_state = BFS_NONE; - p->load_state = BFS_NONE; - bgp_init_bucket_table(p); - bgp_init_prefix_table(p, 8); - int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART); + p->as4_session = conn->as4_session; - if (p->p.gr_recovery && !peer_gr_ready) - proto_graceful_restart_unlock(&p->p); + p->route_refresh = peer->route_refresh; + p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh; - if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready) - p->p.gr_wait = 1; + /* Whether we may handle possible GR of peer (it has some AF GR-able) */ + p->gr_ready = 0; /* Updated later */ - if (p->gr_active) + /* Whether peer is ready to handle our GR recovery */ + int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART); + + if (p->gr_active_num) tm_stop(p->gr_timer); - if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING))) - bgp_graceful_restart_done(p); + /* Number of active channels */ + int num = 0; + + WALK_LIST(c, p->p.channels) + { + const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); + const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi); - /* GR capability implies that neighbor will send End-of-RIB */ - if (conn->peer_gr_aware) - p->load_state = BFS_LOADING; + /* Ignore AFIs that were not announced in multiprotocol capability */ + if (!loc || !loc->ready) + loc = &dummy_af_caps; - /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */ + if (!rem || !rem->ready) + rem = &dummy_af_caps; + + int active = loc->ready && rem->ready; + c->c.disabled = !active; + c->c.reloadable = p->route_refresh; + + c->index = active ? num++ : 0; + + c->feed_state = BFS_NONE; + c->load_state = BFS_NONE; + + /* Channels where peer may do GR */ + c->gr_ready = active && local->gr_aware && rem->gr_able; + p->gr_ready = p->gr_ready || c->gr_ready; + + /* Channels not able to recover gracefully */ + if (p->p.gr_recovery && (!active || !peer_gr_ready)) + channel_graceful_restart_unlock(&c->c); + + /* Channels waiting for local convergence */ + if (p->p.gr_recovery && loc->gr_able && peer_gr_ready) + c->c.gr_wait = 1; + + /* Channels where peer is not able to recover gracefully */ + if (c->gr_active && ! (c->gr_ready && (rem->gr_af_flags & BGP_GRF_FORWARDING))) + bgp_graceful_restart_done(c); + + /* GR capability implies that neighbor will send End-of-RIB */ + if (peer->gr_aware) + c->load_state = BFS_LOADING; + + c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop); + c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX); + c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX); + + /* Update RA mode */ + if (c->add_path_tx) + c->c.ra_mode = RA_ANY; + else if (c->cf->secondary) + c->c.ra_mode = RA_ACCEPTED; + else + c->c.ra_mode = RA_OPTIMAL; + } + + p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32)); + p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *)); + p->channel_count = num; + + WALK_LIST(c, p->p.channels) + { + if (c->c.disabled) + continue; + + p->afi_map[c->index] = c->afi; + p->channel_map[c->index] = c; + } + + /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */ bgp_conn_set_state(conn, BS_ESTABLISHED); proto_notify_state(&p->p, PS_UP); @@ -416,9 +604,6 @@ bgp_conn_leave_established_state(struct bgp_proto *p) BGP_TRACE(D_EVENTS, "BGP session closed"); p->conn = NULL; - bgp_free_prefix_table(p); - bgp_free_bucket_table(p); - if (p->p.proto_state == PS_UP) bgp_stop(p, 0, NULL, 0); } @@ -471,34 +656,71 @@ bgp_handle_graceful_restart(struct bgp_proto *p) ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready); BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s", - p->gr_active ? " - already pending" : ""); - proto_notify_state(&p->p, PS_START); + p->gr_active_num ? " - already pending" : ""); + + p->gr_active_num = 0; + + struct bgp_channel *c; + WALK_LIST(c, p->p.channels) + { + /* FIXME: perhaps check for channel state instead of disabled flag? */ + if (c->c.disabled) + continue; + + if (c->gr_ready) + { + if (c->gr_active) + rt_refresh_end(c->c.table, &c->c); + + c->gr_active = 1; + p->gr_active_num++; + rt_refresh_begin(c->c.table, &c->c); + } + else + { + /* Just flush the routes */ + rt_refresh_begin(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c); + } - if (p->gr_active) - rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook); + /* Reset bucket and prefix tables */ + bgp_free_bucket_table(c); + bgp_free_prefix_table(c); + bgp_init_bucket_table(c); + bgp_init_prefix_table(c); + c->packets_to_send = 0; + } + + /* p->gr_ready -> at least one active channel is c->gr_ready */ + ASSERT(p->gr_active_num > 0); - p->gr_active = 1; - bgp_start_timer(p->gr_timer, p->conn->peer_gr_time); - rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook); + proto_notify_state(&p->p, PS_START); + bgp_start_timer(p->gr_timer, p->conn->remote_caps->gr_time); } /** * bgp_graceful_restart_done - finish active BGP graceful restart - * @p: BGP instance + * @c: BGP channel * * This function is called when the active BGP graceful restart of the neighbor - * should be finished - either successfully (the neighbor sends all paths and - * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does - * not support BGP graceful restart on the new session). The function ends - * routing table refresh cycle and stops BGP restart timer. + * should be finished for channel @c - either successfully (the neighbor sends + * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or + * unsuccessfully (the neighbor does not support BGP graceful restart on the new + * session). The function ends the routing table refresh cycle. */ void -bgp_graceful_restart_done(struct bgp_proto *p) +bgp_graceful_restart_done(struct bgp_channel *c) { - BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); - p->gr_active = 0; - tm_stop(p->gr_timer); - rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook); + struct bgp_proto *p = (void *) c->c.proto; + + ASSERT(c->gr_active); + c->gr_active = 0; + p->gr_active_num--; + + if (!p->gr_active_num) + BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); + + rt_refresh_end(c->c.table, &c->c); } /** @@ -522,7 +744,7 @@ bgp_graceful_restart_timeout(timer *t) /** * bgp_refresh_begin - start incoming enhanced route refresh sequence - * @p: BGP instance + * @c: BGP channel * * This function is called when an incoming enhanced route refresh sequence is * started by the neighbor, demarcated by the BoRR packet. The function updates @@ -531,18 +753,20 @@ bgp_graceful_restart_timeout(timer *t) * ensure that these two sequences do not overlap. */ void -bgp_refresh_begin(struct bgp_proto *p) +bgp_refresh_begin(struct bgp_channel *c) { - if (p->load_state == BFS_LOADING) - { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } + struct bgp_proto *p = (void *) c->c.proto; + + if (c->load_state == BFS_LOADING) + { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } - p->load_state = BFS_REFRESHING; - rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook); + c->load_state = BFS_REFRESHING; + rt_refresh_begin(c->c.table, &c->c); } /** * bgp_refresh_end - finish incoming enhanced route refresh sequence - * @p: BGP instance + * @c: BGP channel * * This function is called when an incoming enhanced route refresh sequence is * finished by the neighbor, demarcated by the EoRR packet. The function updates @@ -550,39 +774,26 @@ bgp_refresh_begin(struct bgp_proto *p) * during the sequence are removed by the nest. */ void -bgp_refresh_end(struct bgp_proto *p) +bgp_refresh_end(struct bgp_channel *c) { - if (p->load_state != BFS_REFRESHING) - { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } + struct bgp_proto *p = (void *) c->c.proto; + + if (c->load_state != BFS_REFRESHING) + { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } - p->load_state = BFS_NONE; - rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook); + c->load_state = BFS_NONE; + rt_refresh_end(c->c.table, &c->c); } static void bgp_send_open(struct bgp_conn *conn) { - conn->start_state = conn->bgp->start_state; - - // Default values, possibly changed by receiving capabilities. - conn->advertised_as = 0; - conn->peer_refresh_support = 0; - conn->peer_as4_support = 0; - conn->peer_add_path = 0; - conn->peer_enhanced_refresh_support = 0; - conn->peer_gr_aware = 0; - conn->peer_gr_able = 0; - conn->peer_gr_time = 0; - conn->peer_gr_flags = 0; - conn->peer_gr_aflags = 0; - conn->peer_ext_messages_support = 0; - DBG("BGP: Sending open\n"); conn->sk->rx_hook = bgp_rx; conn->sk->tx_hook = bgp_tx; - tm_stop(conn->connect_retry_timer); - bgp_schedule_packet(conn, PKT_OPEN); + tm_stop(conn->connect_timer); + bgp_schedule_packet(conn, NULL, PKT_OPEN); bgp_conn_set_state(conn, BS_OPENSENT); bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time); } @@ -605,10 +816,10 @@ bgp_connect_timeout(timer *t) DBG("BGP: connect_timeout\n"); if (p->p.proto_state == PS_START) - { - bgp_close_conn(conn); - bgp_connect(p); - } + { + bgp_close_conn(conn); + bgp_connect(p); + } else bgp_conn_enter_idle_state(conn); } @@ -672,7 +883,7 @@ bgp_keepalive_timeout(timer *t) struct bgp_conn *conn = t->data; DBG("BGP: Keepalive timer\n"); - bgp_schedule_packet(conn, PKT_KEEPALIVE); + bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE); /* Kick TX a bit faster */ if (ev_active(conn->tx_ev)) @@ -682,21 +893,18 @@ bgp_keepalive_timeout(timer *t) static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn) { - timer *t; - conn->sk = NULL; conn->bgp = p; + conn->packets_to_send = 0; + conn->channels_to_send = 0; + conn->last_channel = 0; + conn->last_channel_count = 0; + + conn->connect_timer = tm_new_init(p->p.pool, bgp_connect_timeout, conn, 0, 0); + conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0); + conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0); - t = conn->connect_retry_timer = tm_new(p->p.pool); - t->hook = bgp_connect_timeout; - t->data = conn; - t = conn->hold_timer = tm_new(p->p.pool); - t->hook = bgp_hold_timeout; - t->data = conn; - t = conn->keepalive_timer = tm_new(p->p.pool); - t->hook = bgp_keepalive_timeout; - t->data = conn; conn->tx_ev = ev_new(p->p.pool); conn->tx_ev->hook = bgp_kick_tx; conn->tx_ev->data = conn; @@ -720,7 +928,7 @@ bgp_active(struct bgp_proto *p) BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); bgp_setup_conn(p, conn); bgp_conn_set_state(conn, BS_ACTIVE); - bgp_start_timer(conn->connect_retry_timer, delay); + bgp_start_timer(conn->connect_timer, delay); } /** @@ -734,12 +942,11 @@ bgp_active(struct bgp_proto *p) static void bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */ { - sock *s; struct bgp_conn *conn = &p->outgoing_conn; int hops = p->cf->multihop ? : 1; DBG("BGP: Connecting\n"); - s = sk_new(p->p.pool); + sock *s = sk_new(p->p.pool); s->type = SK_TCP_ACTIVE; s->saddr = p->source_addr; s->daddr = p->cf->remote_ip; @@ -767,10 +974,10 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c goto err; DBG("BGP: Waiting for connect success\n"); - bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time); + bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time); return; - err: +err: sk_log_error(s, p->p.name); bgp_sock_err(s, 0); return; @@ -784,16 +991,15 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c static struct bgp_proto * bgp_find_proto(sock *sk) { - struct proto_config *pc; + struct bgp_proto *p; - WALK_LIST(pc, config->protos) - if ((pc->protocol == &proto_bgp) && pc->proto) - { - struct bgp_proto *p = (struct bgp_proto *) pc->proto; - if (ipa_equal(p->cf->remote_ip, sk->daddr) && - (!p->cf->iface || (p->cf->iface == sk->iface))) - return p; - } + WALK_LIST(p, proto_list) + if ((p->p.proto == &proto_bgp) && + ipa_equal(p->cf->remote_ip, sk->daddr) && + (!p->cf->iface || (p->cf->iface == sk->iface)) && + (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)) && + (p->cf->local_port == sk->sport)) + return p; return NULL; } @@ -819,12 +1025,12 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport); p = bgp_find_proto(sk); if (!p) - { - log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)", - sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport); - rfree(sk); - return 0; - } + { + log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)", + sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport); + rfree(sk); + return 0; + } /* * BIRD should keep multiple incoming connections in OpenSent state (for @@ -837,26 +1043,26 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk); if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready) - { - bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART); - bgp_handle_graceful_restart(p); - bgp_conn_enter_idle_state(p->conn); - acc = 1; - - /* There might be separate incoming connection in OpenSent state */ - if (p->incoming_conn.state > BS_ACTIVE) - bgp_close_conn(&p->incoming_conn); - } + { + bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART); + bgp_handle_graceful_restart(p); + bgp_conn_enter_idle_state(p->conn); + acc = 1; + + /* There might be separate incoming connection in OpenSent state */ + if (p->incoming_conn.state > BS_ACTIVE) + bgp_close_conn(&p->incoming_conn); + } BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s", sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport, acc ? "accepted" : "rejected"); if (!acc) - { - rfree(sk); - return 0; - } + { + rfree(sk); + return 0; + } hops = p->cf->multihop ? : 1; @@ -868,11 +1074,11 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) goto err; if (p->cf->enable_extended_messages) - { - sk->rbsize = BGP_RX_BUFFER_EXT_SIZE; - sk->tbsize = BGP_TX_BUFFER_EXT_SIZE; - sk_reallocate(sk); - } + { + sk->rbsize = BGP_RX_BUFFER_EXT_SIZE; + sk->tbsize = BGP_TX_BUFFER_EXT_SIZE; + sk_reallocate(sk); + } bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, sk); @@ -895,34 +1101,6 @@ bgp_listen_sock_err(sock *sk UNUSED, int err) log(L_ERR "BGP: Error on listening socket: %M", err); } -static sock * -bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags) -{ - sock *s = sk_new(&root_pool); - DBG("BGP: Creating listening socket\n"); - s->type = SK_TCP_PASSIVE; - s->ttl = 255; - s->saddr = addr; - s->sport = port ? port : BGP_PORT; - s->flags = flags ? 0 : SKF_V6ONLY; - s->tos = IP_PREC_INTERNET_CONTROL; - s->rbsize = BGP_RX_BUFFER_SIZE; - s->tbsize = BGP_TX_BUFFER_SIZE; - s->rx_hook = bgp_incoming_connection; - s->err_hook = bgp_listen_sock_err; - - if (sk_open(s) < 0) - goto err; - - return s; - - err: - sk_log_error(s, "BGP"); - log(L_ERR "BGP: Cannot open listening socket"); - rfree(s); - return NULL; -} - static void bgp_start_neighbor(struct bgp_proto *p) { @@ -931,23 +1109,10 @@ bgp_start_neighbor(struct bgp_proto *p) if (ipa_zero(p->source_addr)) p->source_addr = p->neigh->ifa->ip; -#ifdef IPV6 - { - struct ifa *a; - p->local_link = IPA_NONE; - WALK_LIST(a, p->neigh->iface->addrs) - if (a->scope == SCOPE_LINK) - { - p->local_link = a->ip; - break; - } - - if (! ipa_nonzero(p->local_link)) - log(L_WARN "%s: Missing link local address on interface %s", p->p.name, p->neigh->iface->name); - - DBG("BGP: Selected link-level address %I\n", p->local_link); - } -#endif + if (ipa_is_link_local(p->source_addr)) + p->link_addr = p->source_addr; + else if (p->neigh->iface->llv6) + p->link_addr = p->neigh->iface->llv6->ip; bgp_initiate(p); } @@ -967,34 +1132,34 @@ bgp_neigh_notify(neighbor *n) int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE); if (n->scope <= 0) + { + if (!prepare) { - if (!prepare) - { - BGP_TRACE(D_EVENTS, "Neighbor lost"); - bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST); - /* Perhaps also run bgp_update_startup_delay(p)? */ - bgp_stop(p, 0, NULL, 0); - } + BGP_TRACE(D_EVENTS, "Neighbor lost"); + bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST); + /* Perhaps also run bgp_update_startup_delay(p)? */ + bgp_stop(p, 0, NULL, 0); } + } else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP)) + { + if (!prepare) { - if (!prepare) - { - BGP_TRACE(D_EVENTS, "Link down"); - bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN); - if (ps == PS_UP) - bgp_update_startup_delay(p); - bgp_stop(p, 0, NULL, 0); - } + BGP_TRACE(D_EVENTS, "Link down"); + bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN); + if (ps == PS_UP) + bgp_update_startup_delay(p); + bgp_stop(p, 0, NULL, 0); } + } else + { + if (prepare) { - if (prepare) - { - BGP_TRACE(D_EVENTS, "Neighbor ready"); - bgp_start_neighbor(p); - } + BGP_TRACE(D_EVENTS, "Neighbor ready"); + bgp_start_neighbor(p); } + } } static void @@ -1004,13 +1169,13 @@ bgp_bfd_notify(struct bfd_request *req) int ps = p->p.proto_state; if (req->down && ((ps == PS_START) || (ps == PS_UP))) - { - BGP_TRACE(D_EVENTS, "BFD session down"); - bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN); - if (ps == PS_UP) - bgp_update_startup_delay(p); - bgp_stop(p, 0, NULL, 0); - } + { + BGP_TRACE(D_EVENTS, "BFD session down"); + bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN); + if (ps == PS_UP) + bgp_update_startup_delay(p); + bgp_stop(p, 0, NULL, 0); + } } static void @@ -1022,71 +1187,72 @@ bgp_update_bfd(struct bgp_proto *p, int use_bfd) bgp_bfd_notify, p); if (!use_bfd && p->bfd_req) - { - rfree(p->bfd_req); - p->bfd_req = NULL; - } + { + rfree(p->bfd_req); + p->bfd_req = NULL; + } } -static int -bgp_reload_routes(struct proto *P) +static void +bgp_reload_routes(struct channel *C) { - struct bgp_proto *p = (struct bgp_proto *) P; - if (!p->conn || !p->conn->peer_refresh_support) - return 0; + struct bgp_proto *p = (void *) C->proto; + struct bgp_channel *c = (void *) C; - bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH); - return 1; + ASSERT(p->conn && p->route_refresh); + + bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); } static void -bgp_feed_begin(struct proto *P, int initial) +bgp_feed_begin(struct channel *C, int initial) { - struct bgp_proto *p = (struct bgp_proto *) P; + struct bgp_proto *p = (void *) C->proto; + struct bgp_channel *c = (void *) C; /* This should not happen */ if (!p->conn) return; if (initial && p->cf->gr_mode) - p->feed_state = BFS_LOADING; + c->feed_state = BFS_LOADING; /* It is refeed and both sides support enhanced route refresh */ - if (!initial && p->cf->enable_refresh && - p->conn->peer_enhanced_refresh_support) - { - /* BoRR must not be sent before End-of-RIB */ - if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED) - return; + if (!initial && p->enhanced_refresh) + { + /* BoRR must not be sent before End-of-RIB */ + if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED) + return; - p->feed_state = BFS_REFRESHING; - bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH); - } + c->feed_state = BFS_REFRESHING; + bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH); + } } static void -bgp_feed_end(struct proto *P) +bgp_feed_end(struct channel *C) { - struct bgp_proto *p = (struct bgp_proto *) P; + struct bgp_proto *p = (void *) C->proto; + struct bgp_channel *c = (void *) C; /* This should not happen */ if (!p->conn) return; /* Non-demarcated feed ended, nothing to do */ - if (p->feed_state == BFS_NONE) + if (c->feed_state == BFS_NONE) return; /* Schedule End-of-RIB packet */ - if (p->feed_state == BFS_LOADING) - p->feed_state = BFS_LOADED; + if (c->feed_state == BFS_LOADING) + c->feed_state = BFS_LOADED; /* Schedule EoRR packet */ - if (p->feed_state == BFS_REFRESHING) - p->feed_state = BFS_REFRESHED; + if (c->feed_state == BFS_REFRESHING) + c->feed_state = BFS_REFRESHED; /* Kick TX hook */ - bgp_schedule_packet(p->conn, PKT_UPDATE); + bgp_schedule_packet(p->conn, c, PKT_UPDATE); } @@ -1097,30 +1263,30 @@ bgp_start_locked(struct object_lock *lock) struct bgp_config *cf = p->cf; if (p->p.proto_state != PS_START) - { - DBG("BGP: Got lock in different state %d\n", p->p.proto_state); - return; - } + { + DBG("BGP: Got lock in different state %d\n", p->p.proto_state); + return; + } DBG("BGP: Got lock\n"); if (cf->multihop) - { - /* Multi-hop sessions do not use neighbor entries */ - bgp_initiate(p); - return; - } + { + /* Multi-hop sessions do not use neighbor entries */ + bgp_initiate(p); + return; + } neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY); if (!n) - { - log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface); - /* As we do not start yet, we can just disable protocol */ - p->p.disabled = 1; - bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); - proto_notify_state(&p->p, PS_DOWN); - return; - } + { + log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface); + /* As we do not start yet, we can just disable protocol */ + p->p.disabled = 1; + bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); + proto_notify_state(&p->p, PS_DOWN); + return; + } p->neigh = n; @@ -1145,36 +1311,34 @@ bgp_start(struct proto *P) p->neigh = NULL; p->bfd_req = NULL; p->gr_ready = 0; - p->gr_active = 0; - - rt_lock_table(p->igp_table); + p->gr_active_num = 0; p->event = ev_new(p->p.pool); p->event->hook = bgp_decision; p->event->data = p; - p->startup_timer = tm_new(p->p.pool); - p->startup_timer->hook = bgp_startup_timeout; - p->startup_timer->data = p; - - p->gr_timer = tm_new(p->p.pool); - p->gr_timer->hook = bgp_graceful_restart_timeout; - p->gr_timer->data = p; + p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0); + p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0); p->local_id = proto_get_router_id(P->cf); if (p->rr_client) p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id; p->remote_id = 0; - p->source_addr = p->cf->source_addr; + p->source_addr = p->cf->local_ip; + p->link_addr = IPA_NONE; + /* Lock all channels when in GR recovery mode */ if (p->p.gr_recovery && p->cf->gr_mode) - proto_graceful_restart_lock(P); + { + struct bgp_channel *c; + WALK_LIST(c, p->p.channels) + channel_graceful_restart_lock(&c->c); + } /* - * Before attempting to create the connection, we need to lock the - * port, so that are sure we're the only instance attempting to talk - * with that neighbor. + * Before attempting to create the connection, we need to lock the port, + * so that we are the only instance attempting to talk with that neighbor. */ lock = p->lock = olock_new(P->pool); @@ -1205,45 +1369,45 @@ bgp_shutdown(struct proto *P) BGP_TRACE(D_EVENTS, "Shutdown requested"); switch (P->down_code) - { - case PDC_CF_REMOVE: - case PDC_CF_DISABLE: - subcode = 3; // Errcode 6, 3 - peer de-configured - break; - - case PDC_CF_RESTART: - subcode = 6; // Errcode 6, 6 - other configuration change - break; - - case PDC_CMD_DISABLE: - case PDC_CMD_SHUTDOWN: - subcode = 2; // Errcode 6, 2 - administrative shutdown - message = P->message; - break; - - case PDC_CMD_RESTART: - subcode = 4; // Errcode 6, 4 - administrative reset - message = P->message; - break; - - case PDC_RX_LIMIT_HIT: - case PDC_IN_LIMIT_HIT: - subcode = 1; // Errcode 6, 1 - max number of prefixes reached - /* log message for compatibility */ - log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name); - goto limit; - - case PDC_OUT_LIMIT_HIT: - subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown - - limit: - bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED); - if (proto_restart) - bgp_update_startup_delay(p); - else - p->startup_delay = 0; - goto done; - } + { + case PDC_CF_REMOVE: + case PDC_CF_DISABLE: + subcode = 3; // Errcode 6, 3 - peer de-configured + break; + + case PDC_CF_RESTART: + subcode = 6; // Errcode 6, 6 - other configuration change + break; + + case PDC_CMD_DISABLE: + case PDC_CMD_SHUTDOWN: + subcode = 2; // Errcode 6, 2 - administrative shutdown + message = P->message; + break; + + case PDC_CMD_RESTART: + subcode = 4; // Errcode 6, 4 - administrative reset + message = P->message; + break; + + case PDC_RX_LIMIT_HIT: + case PDC_IN_LIMIT_HIT: + subcode = 1; // Errcode 6, 1 - max number of prefixes reached + /* log message for compatibility */ + log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name); + goto limit; + + case PDC_OUT_LIMIT_HIT: + subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown + + limit: + bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED); + if (proto_restart) + bgp_update_startup_delay(p); + else + p->startup_delay = 0; + goto done; + } bgp_store_error(p, NULL, BE_MAN_DOWN, 0); p->startup_delay = 0; @@ -1267,27 +1431,13 @@ done: return p->p.proto_state; } -static void -bgp_cleanup(struct proto *P) -{ - struct bgp_proto *p = (struct bgp_proto *) P; - rt_unlock_table(p->igp_table); -} - -static rtable * -get_igp_table(struct bgp_config *cf) -{ - return cf->igp_table ? cf->igp_table->table : cf->c.table->table; -} - static struct proto * -bgp_init(struct proto_config *C) +bgp_init(struct proto_config *CF) { - struct proto *P = proto_new(C, sizeof(struct bgp_proto)); - struct bgp_config *c = (struct bgp_config *) C; + struct proto *P = proto_new(CF); struct bgp_proto *p = (struct bgp_proto *) P; + struct bgp_config *cf = (struct bgp_config *) CF; - P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL; P->rt_notify = bgp_rt_notify; P->import_control = bgp_import_control; P->neigh_notify = bgp_neigh_notify; @@ -1296,102 +1446,308 @@ bgp_init(struct proto_config *C) P->feed_end = bgp_feed_end; P->rte_better = bgp_rte_better; P->rte_mergable = bgp_rte_mergable; - P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL; - - p->cf = c; - p->local_as = c->local_as; - p->remote_as = c->remote_as; - p->is_internal = (c->local_as == c->remote_as); - p->rs_client = c->rs_client; - p->rr_client = c->rr_client; - p->igp_table = get_igp_table(c); + P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; + + p->cf = cf; + p->local_as = cf->local_as; + p->remote_as = cf->remote_as; + p->public_as = cf->local_as; + p->is_internal = (cf->local_as == cf->remote_as); + p->is_interior = p->is_internal || cf->confederation_member; + p->rs_client = cf->rs_client; + p->rr_client = cf->rr_client; + + /* Confederation ID is used for truly external peers */ + if (cf->confederation && !p->is_interior) + p->public_as = cf->confederation; + + /* Add all channels */ + struct bgp_channel_config *cc; + WALK_LIST(cc, CF->channels) + proto_add_channel(P, &cc->c); return P; } +static void +bgp_channel_init(struct channel *C, struct channel_config *CF) +{ + struct bgp_channel *c = (void *) C; + struct bgp_channel_config *cf = (void *) CF; + + c->cf = cf; + c->afi = cf->afi; + c->desc = cf->desc; + + if (cf->igp_table_ip4) + c->igp_table_ip4 = cf->igp_table_ip4->table; + + if (cf->igp_table_ip6) + c->igp_table_ip6 = cf->igp_table_ip6->table; +} + +static int +bgp_channel_start(struct channel *C) +{ + struct bgp_proto *p = (void *) C->proto; + struct bgp_channel *c = (void *) C; + ip_addr src = p->source_addr; + + if (c->igp_table_ip4) + rt_lock_table(c->igp_table_ip4); + + if (c->igp_table_ip6) + rt_lock_table(c->igp_table_ip6); + + c->pool = p->p.pool; // XXXX + bgp_init_bucket_table(c); + bgp_init_prefix_table(c); + + c->next_hop_addr = c->cf->next_hop_addr; + c->link_addr = IPA_NONE; + c->packets_to_send = 0; + + /* Try to use source address as next hop address */ + if (ipa_zero(c->next_hop_addr)) + { + if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop)) + c->next_hop_addr = src; + + if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop)) + c->next_hop_addr = src; + } + + /* Use preferred addresses associated with interface / source address */ + if (ipa_zero(c->next_hop_addr)) + { + /* We know the iface for single-hop, we make lookup for multihop */ + struct neighbor *nbr = p->neigh ?: neigh_find2(&p->p, &src, NULL, 0); + struct iface *iface = nbr ? nbr->iface : NULL; + + if (bgp_channel_is_ipv4(c) && iface && iface->addr4) + c->next_hop_addr = iface->addr4->ip; + + if (bgp_channel_is_ipv6(c) && iface && iface->addr6) + c->next_hop_addr = iface->addr6->ip; + } + + /* Exit if no feasible next hop address is found */ + if (ipa_zero(c->next_hop_addr)) + { + log(L_WARN "%s: Missing next hop address", p->p.name); + return 0; + } + + /* Set link-local address for IPv6 single-hop BGP */ + if (ipa_is_ip6(c->next_hop_addr) && p->neigh) + { + c->link_addr = p->link_addr; + + if (ipa_zero(c->link_addr)) + log(L_WARN "%s: Missing link-local address", p->p.name); + } + + /* Link local address is already in c->link_addr */ + if (ipa_is_link_local(c->next_hop_addr)) + c->next_hop_addr = IPA_NONE; + + return 0; /* XXXX: Currently undefined */ +} + +static void +bgp_channel_shutdown(struct channel *C) +{ + struct bgp_channel *c = (void *) C; + + c->next_hop_addr = IPA_NONE; + c->link_addr = IPA_NONE; + c->packets_to_send = 0; +} + +static void +bgp_channel_cleanup(struct channel *C) +{ + struct bgp_channel *c = (void *) C; + + if (c->igp_table_ip4) + rt_unlock_table(c->igp_table_ip4); + + if (c->igp_table_ip6) + rt_unlock_table(c->igp_table_ip6); +} + +static inline struct bgp_channel_config * +bgp_find_channel_config(struct bgp_config *cf, u32 afi) +{ + struct bgp_channel_config *cc; + + WALK_LIST(cc, cf->c.channels) + if (cc->afi == afi) + return cc; + + return NULL; +} + +struct rtable_config * +bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type) +{ + struct bgp_channel_config *cc2; + struct rtable_config *tab; + + /* First, try table connected by the channel */ + if (cc->c.table->addr_type == type) + return cc->c.table; + + /* Find paired channel with the same SAFI but the other AFI */ + u32 afi2 = cc->afi ^ 0x30000; + cc2 = bgp_find_channel_config(cf, afi2); + + /* Second, try IGP table configured in the paired channel */ + if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6)) + return tab; + + /* Third, try table connected by the paired channel */ + if (cc2 && (cc2->c.table->addr_type == type)) + return cc2->c.table; + + /* Last, try default table of given type */ + if (tab = cf->c.global->def_tables[type]) + return tab; + + cf_error("Undefined IGP table"); +} + void -bgp_check_config(struct bgp_config *c) +bgp_postconfig(struct proto_config *CF) { - int internal = (c->local_as == c->remote_as); + struct bgp_config *cf = (void *) CF; + int internal = (cf->local_as == cf->remote_as); + int interior = internal || cf->confederation_member; /* Do not check templates at all */ - if (c->c.class == SYM_TEMPLATE) + if (cf->c.class == SYM_TEMPLATE) return; /* EBGP direct by default, IBGP multihop by default */ - if (c->multihop < 0) - c->multihop = internal ? 64 : 0; - - /* Different default for gw_mode */ - if (!c->gw_mode) - c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT; - - /* Different default based on rs_client */ - if (!c->missing_lladdr) - c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF; + if (cf->multihop < 0) + cf->multihop = internal ? 64 : 0; - /* Disable after error incompatible with restart limit action */ - if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error) - c->c.in_limit->action = PLA_DISABLE; + /* Link check for single-hop BGP by default */ + if (cf->check_link < 0) + cf->check_link = !cf->multihop; - if (!c->local_as) + if (!cf->local_as) cf_error("Local AS number must be set"); - if (ipa_zero(c->remote_ip)) + if (ipa_zero(cf->remote_ip)) cf_error("Neighbor must be configured"); - if (!c->remote_as) + if (!cf->remote_as) cf_error("Remote AS number must be set"); - if (ipa_is_link_local(c->remote_ip) && !c->iface) + if (ipa_is_link_local(cf->remote_ip) && !cf->iface) cf_error("Link-local neighbor address requires specified interface"); - if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF)) + if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF)) cf_error("Neighbor AS number out of range (AS4 not available)"); - if (!internal && c->rr_client) + if (!internal && cf->rr_client) cf_error("Only internal neighbor can be RR client"); - if (internal && c->rs_client) + if (internal && cf->rs_client) cf_error("Only external neighbor can be RS client"); - if (c->multihop && (c->gw_mode == GW_DIRECT)) - cf_error("Multihop BGP cannot use direct gateway mode"); + if (!cf->confederation && cf->confederation_member) + cf_error("Confederation ID must be set for member sessions"); - if (c->multihop && (ipa_is_link_local(c->remote_ip) || - ipa_is_link_local(c->source_addr))) + if (cf->multihop && (ipa_is_link_local(cf->local_ip) || + ipa_is_link_local(cf->remote_ip))) cf_error("Multihop BGP cannot be used with link-local addresses"); - if (c->multihop && c->iface) + if (cf->multihop && cf->iface) cf_error("Multihop BGP cannot be bound to interface"); - if (c->multihop && c->check_link) + if (cf->multihop && cf->check_link) cf_error("Multihop BGP cannot depend on link state"); - if (c->multihop && c->bfd && ipa_zero(c->source_addr)) - cf_error("Multihop BGP with BFD requires specified source address"); + if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip)) + cf_error("Multihop BGP with BFD requires specified local address"); - if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted) - cf_error("BGP in recursive mode prohibits sorted table"); - if (c->deterministic_med && c->c.table->sorted) - cf_error("BGP with deterministic MED prohibits sorted table"); + struct bgp_channel_config *cc; + WALK_LIST(cc, CF->channels) + { + /* Handle undefined import filter */ + if (cc->c.in_filter == FILTER_UNDEF) + if (interior) + cc->c.in_filter = FILTER_ACCEPT; + else + cf_error("EBGP requires explicit import policy"); - if (c->secondary && !c->c.table->sorted) - cf_error("BGP with secondary option requires sorted table"); + /* Handle undefined export filter */ + if (cc->c.out_filter == FILTER_UNDEF) + if (interior) + cc->c.out_filter = FILTER_REJECT; + else + cf_error("EBGP requires explicit export policy"); + + /* Disable after error incompatible with restart limit action */ + if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error) + cc->c.in_limit.action = PLA_DISABLE; + + /* Different default based on rs_client */ + if (!cc->missing_lladdr) + cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF; + + /* Different default for gw_mode */ + if (!cc->gw_mode) + cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT; + + /* Default based on proto config */ + if (cc->gr_able == 0xff) + cc->gr_able = (cf->gr_mode == BGP_GR_ABLE); + + /* Default values of IGP tables */ + if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp) + { + if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop)) + cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4); + + if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop)) + cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6); + + if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop) + cf_error("Mismatched IGP table type"); + + if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop) + cf_error("Mismatched IGP table type"); + } + + if (cf->multihop && (cc->gw_mode == GW_DIRECT)) + cf_error("Multihop BGP cannot use direct gateway mode"); + + if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted) + cf_error("BGP in recursive mode prohibits sorted table"); + + if (cf->deterministic_med && cc->c.table->sorted) + cf_error("BGP with deterministic MED prohibits sorted table"); + + if (cc->secondary && !cc->c.table->sorted) + cf_error("BGP with secondary option requires sorted table"); + } } static int -bgp_reconfigure(struct proto *P, struct proto_config *C) +bgp_reconfigure(struct proto *P, struct proto_config *CF) { - struct bgp_config *new = (struct bgp_config *) C; - struct bgp_proto *p = (struct bgp_proto *) P; + struct bgp_proto *p = (void *) P; + struct bgp_config *new = (void *) CF; struct bgp_config *old = p->cf; - if (proto_get_router_id(C) != p->local_id) + if (proto_get_router_id(CF) != p->local_id) return 0; int same = !memcmp(((byte *) old) + sizeof(struct proto_config), @@ -1399,8 +1755,26 @@ bgp_reconfigure(struct proto *P, struct proto_config *C) // password item is last and must be checked separately OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config)) && ((!old->password && !new->password) - || (old->password && new->password && !strcmp(old->password, new->password))) - && (get_igp_table(old) == get_igp_table(new)); + || (old->password && new->password && !strcmp(old->password, new->password))); + + /* FIXME: Move channel reconfiguration to generic protocol code ? */ + struct channel *C, *C2; + struct bgp_channel_config *cc; + + WALK_LIST(C, p->p.channels) + C->stale = 1; + + WALK_LIST(cc, new->c.channels) + { + C = (struct channel *) bgp_find_channel(p, cc->afi); + same = proto_configure_channel(P, &C, &cc->c) && same; + C->stale = 0; + } + + WALK_LIST_DELSAFE(C, C2, p->p.channels) + if (C->stale) + same = proto_configure_channel(P, &C, NULL) && same; + if (same && (p->start_state > BSS_PREPARE)) bgp_update_bfd(p, new->bfd); @@ -1412,11 +1786,34 @@ bgp_reconfigure(struct proto *P, struct proto_config *C) return same; } +#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL ) + +static int +bgp_channel_reconfigure(struct channel *C, struct channel_config *CC) +{ + struct bgp_channel *c = (void *) C; + struct bgp_channel_config *new = (void *) CC; + struct bgp_channel_config *old = c->cf; + + if (memcmp(((byte *) old) + sizeof(struct channel_config), + ((byte *) new) + sizeof(struct channel_config), + /* Remaining items must be checked separately */ + OFFSETOF(struct bgp_channel_config, rest) - sizeof(struct channel_config))) + return 0; + + /* Check change in IGP tables */ + if ((IGP_TABLE(old, ip4) != IGP_TABLE(new, ip4)) || + (IGP_TABLE(old, ip6) != IGP_TABLE(new, ip6))) + return 0; + + c->cf = new; + return 1; +} + static void -bgp_copy_config(struct proto_config *dest, struct proto_config *src) +bgp_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED) { /* Just a shallow copy */ - proto_copy_rest(dest, src, sizeof(struct bgp_config)); } @@ -1433,14 +1830,14 @@ bgp_copy_config(struct proto_config *dest, struct proto_config *src) * closes the connection. */ void -bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len) +bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len) { struct bgp_proto *p = c->bgp; if (c->state == BS_CLOSE) return; - bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len); + bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len)); bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode); bgp_conn_enter_close_state(c); @@ -1448,13 +1845,13 @@ bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int l c->notify_subcode = subcode; c->notify_data = data; c->notify_size = (len > 0) ? len : 0; - bgp_schedule_packet(c, PKT_NOTIFICATION); + bgp_schedule_packet(c, NULL, PKT_NOTIFICATION); if (code != 6) - { - bgp_update_startup_delay(p); - bgp_stop(p, 0, NULL, 0); - } + { + bgp_update_startup_delay(p); + bgp_stop(p, 0, NULL, 0); + } } /** @@ -1493,19 +1890,19 @@ static const char * bgp_last_errmsg(struct bgp_proto *p) { switch (p->last_error_class) - { - case BE_MISC: - return bgp_misc_errors[p->last_error_code]; - case BE_SOCKET: - return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code); - case BE_BGP_RX: - case BE_BGP_TX: - return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF); - case BE_AUTO_DOWN: - return bgp_auto_errors[p->last_error_code]; - default: - return ""; - } + { + case BE_MISC: + return bgp_misc_errors[p->last_error_code]; + case BE_SOCKET: + return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code); + case BE_BGP_RX: + case BE_BGP_TX: + return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF); + case BE_AUTO_DOWN: + return bgp_auto_errors[p->last_error_code]; + default: + return ""; + } } static const char * @@ -1536,86 +1933,230 @@ bgp_get_status(struct proto *P, byte *buf) } static void +bgp_show_afis(int code, char *s, u32 *afis, uint count) +{ + buffer b; + LOG_BUFFER_INIT(b); + + buffer_puts(&b, s); + + for (u32 *af = afis; af < (afis + count); af++) + { + const struct bgp_af_desc *desc = bgp_get_af_desc(*af); + if (desc) + buffer_print(&b, " %s", desc->name); + else + buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af)); + } + + if (b.pos == b.end) + strcpy(b.end - 32, " ... <too long>"); + + cli_msg(code, b.start); +} + +static void +bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps) +{ + struct bgp_af_caps *ac; + uint any_mp_bgp = 0; + uint any_gr_able = 0; + uint any_add_path = 0; + uint any_ext_next_hop = 0; + u32 *afl1 = alloca(caps->af_count * sizeof(u32)); + u32 *afl2 = alloca(caps->af_count * sizeof(u32)); + uint afn1, afn2; + + WALK_AF_CAPS(caps, ac) + { + any_mp_bgp |= ac->ready; + any_gr_able |= ac->gr_able; + any_add_path |= ac->add_path; + any_ext_next_hop |= ac->ext_next_hop; + } + + if (any_mp_bgp) + { + cli_msg(-1006, " Multiprotocol"); + + afn1 = 0; + WALK_AF_CAPS(caps, ac) + if (ac->ready) + afl1[afn1++] = ac->afi; + + bgp_show_afis(-1006, " AF announced:", afl1, afn1); + } + + if (caps->route_refresh) + cli_msg(-1006, " Route refresh"); + + if (any_ext_next_hop) + { + cli_msg(-1006, " Extended next hop"); + + afn1 = 0; + WALK_AF_CAPS(caps, ac) + if (ac->ext_next_hop) + afl1[afn1++] = ac->afi; + + bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1); + } + + if (caps->ext_messages) + cli_msg(-1006, " Extended message"); + + if (caps->gr_aware) + cli_msg(-1006, " Graceful restart"); + + if (any_gr_able) + { + /* Continues from gr_aware */ + cli_msg(-1006, " Restart time: %u", caps->gr_time); + if (caps->gr_flags & BGP_GRF_RESTART) + cli_msg(-1006, " Restart recovery"); + + afn1 = afn2 = 0; + WALK_AF_CAPS(caps, ac) + { + if (ac->gr_able) + afl1[afn1++] = ac->afi; + + if (ac->gr_af_flags & BGP_GRF_FORWARDING) + afl2[afn2++] = ac->afi; + } + + bgp_show_afis(-1006, " AF supported:", afl1, afn1); + bgp_show_afis(-1006, " AF preserved:", afl2, afn2); + } + + if (caps->as4_support) + cli_msg(-1006, " 4-octet AS numbers"); + + if (any_add_path) + { + cli_msg(-1006, " ADD-PATH"); + + afn1 = afn2 = 0; + WALK_AF_CAPS(caps, ac) + { + if (ac->add_path & BGP_ADD_PATH_RX) + afl1[afn1++] = ac->afi; + + if (ac->add_path & BGP_ADD_PATH_TX) + afl2[afn2++] = ac->afi; + } + + bgp_show_afis(-1006, " RX:", afl1, afn1); + bgp_show_afis(-1006, " TX:", afl2, afn2); + } + + if (caps->enhanced_refresh) + cli_msg(-1006, " Enhanced refresh"); +} + +static void bgp_show_proto_info(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; - struct bgp_conn *c = p->conn; - - proto_show_basic_info(P); cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p)); cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface); cli_msg(-1006, " Neighbor AS: %u", p->remote_as); - if (p->gr_active) + if (p->gr_active_num) cli_msg(-1006, " Neighbor graceful restart active"); if (P->proto_state == PS_START) - { - struct bgp_conn *oc = &p->outgoing_conn; + { + struct bgp_conn *oc = &p->outgoing_conn; - if ((p->start_state < BSS_CONNECT) && - (p->startup_timer->expires)) - cli_msg(-1006, " Error wait: %d/%d", - p->startup_timer->expires - now, p->startup_delay); + if ((p->start_state < BSS_CONNECT) && + (tm_active(p->startup_timer))) + cli_msg(-1006, " Error wait: %t/%u", + tm_remains(p->startup_timer), p->startup_delay); - if ((oc->state == BS_ACTIVE) && - (oc->connect_retry_timer->expires)) - cli_msg(-1006, " Connect delay: %d/%d", - oc->connect_retry_timer->expires - now, p->cf->connect_delay_time); + if ((oc->state == BS_ACTIVE) && + (tm_active(oc->connect_timer))) + cli_msg(-1006, " Connect delay: %t/%u", + tm_remains(oc->connect_timer), p->cf->connect_delay_time); - if (p->gr_active && p->gr_timer->expires) - cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now); - } + if (p->gr_active_num && tm_active(p->gr_timer)) + cli_msg(-1006, " Restart timer: %t/-", + tm_remains(p->gr_timer)); + } else if (P->proto_state == PS_UP) - { - cli_msg(-1006, " Neighbor ID: %R", p->remote_id); - cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s", - c->peer_refresh_support ? " refresh" : "", - c->peer_enhanced_refresh_support ? " enhanced-refresh" : "", - c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""), - c->peer_as4_support ? " AS4" : "", - (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "", - (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "", - c->peer_ext_messages_support ? " ext-messages" : ""); - cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s", - p->is_internal ? "internal" : "external", - p->cf->multihop ? " multihop" : "", - p->rr_client ? " route-reflector" : "", - p->rs_client ? " route-server" : "", - p->as4_session ? " AS4" : "", - p->add_path_rx ? " add-path-rx" : "", - p->add_path_tx ? " add-path-tx" : "", - p->ext_messages ? " ext-messages" : ""); - cli_msg(-1006, " Source address: %I", p->source_addr); - if (P->cf->in_limit) - cli_msg(-1006, " Route limit: %d/%d", - p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit); - cli_msg(-1006, " Hold timer: %d/%d", - tm_remains(c->hold_timer), c->hold_time); - cli_msg(-1006, " Keepalive timer: %d/%d", - tm_remains(c->keepalive_timer), c->keepalive_time); - } + { + cli_msg(-1006, " Neighbor ID: %R", p->remote_id); + cli_msg(-1006, " Local capabilities"); + bgp_show_capabilities(p, p->conn->local_caps); + cli_msg(-1006, " Neighbor capabilities"); + bgp_show_capabilities(p, p->conn->remote_caps); + cli_msg(-1006, " Session: %s%s%s%s%s", + p->is_internal ? "internal" : "external", + p->cf->multihop ? " multihop" : "", + p->rr_client ? " route-reflector" : "", + p->rs_client ? " route-server" : "", + p->as4_session ? " AS4" : ""); + cli_msg(-1006, " Source address: %I", p->source_addr); + cli_msg(-1006, " Hold timer: %t/%u", + tm_remains(p->conn->hold_timer), p->conn->hold_time); + cli_msg(-1006, " Keepalive timer: %t/%u", + tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time); + } if ((p->last_error_class != BE_NONE) && (p->last_error_class != BE_MAN_DOWN)) + { + const char *err1 = bgp_err_classes[p->last_error_class]; + const char *err2 = bgp_last_errmsg(p); + cli_msg(-1006, " Last error: %s%s", err1, err2); + } + + { + struct bgp_channel *c; + WALK_LIST(c, p->p.channels) { - const char *err1 = bgp_err_classes[p->last_error_class]; - const char *err2 = bgp_last_errmsg(p); - cli_msg(-1006, " Last error: %s%s", err1, err2); + channel_show_info(&c->c); + + if (c->c.channel_state == CS_UP) + { + if (ipa_zero(c->link_addr)) + cli_msg(-1006, " BGP Next hop: %I", c->next_hop_addr); + else + cli_msg(-1006, " BGP Next hop: %I %I", c->next_hop_addr, c->link_addr); + } + + if (c->igp_table_ip4) + cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name); + + if (c->igp_table_ip6) + cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name); } + } } +struct channel_class channel_bgp = { + .channel_size = sizeof(struct bgp_channel), + .config_size = sizeof(struct bgp_channel_config), + .init = bgp_channel_init, + .start = bgp_channel_start, + .shutdown = bgp_channel_shutdown, + .cleanup = bgp_channel_cleanup, + .reconfigure = bgp_channel_reconfigure, +}; + struct protocol proto_bgp = { .name = "BGP", .template = "bgp%d", .attr_class = EAP_BGP, .preference = DEF_PREF_BGP, + .channel_mask = NB_IP | NB_VPN | NB_FLOW, + .proto_size = sizeof(struct bgp_proto), .config_size = sizeof(struct bgp_config), + .postconfig = bgp_postconfig, .init = bgp_init, .start = bgp_start, .shutdown = bgp_shutdown, - .cleanup = bgp_cleanup, .reconfigure = bgp_reconfigure, .copy_config = bgp_copy_config, .get_status = bgp_get_status, diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index b3db8b7e..30424abb 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -2,6 +2,8 @@ * BIRD -- The Border Gateway Protocol * * (c) 2000 Martin Mares <mj@ucw.cz> + * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2008--2016 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -10,26 +12,80 @@ #define _BIRD_BGP_H_ #include <stdint.h> +#include <setjmp.h> +#include "nest/bird.h" #include "nest/route.h" #include "nest/bfd.h" +//#include "lib/lists.h" #include "lib/hash.h" +#include "lib/socket.h" struct linpool; struct eattr; + +/* Address families */ + +#define BGP_AFI_IPV4 1 +#define BGP_AFI_IPV6 2 + +#define BGP_SAFI_UNICAST 1 +#define BGP_SAFI_MULTICAST 2 +#define BGP_SAFI_MPLS 4 +#define BGP_SAFI_MPLS_VPN 128 +#define BGP_SAFI_VPN_MULTICAST 129 +#define BGP_SAFI_FLOW 133 + +/* Internal AF codes */ + +#define BGP_AF(A, B) (((u32)(A) << 16) | (u32)(B)) +#define BGP_AFI(A) ((u32)(A) >> 16) +#define BGP_SAFI(A) ((u32)(A) & 0xFFFF) + +#define BGP_AF_IPV4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_UNICAST ) +#define BGP_AF_IPV6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_UNICAST ) +#define BGP_AF_IPV4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MULTICAST ) +#define BGP_AF_IPV6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MULTICAST ) +#define BGP_AF_IPV4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS ) +#define BGP_AF_IPV6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS ) +#define BGP_AF_VPN4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS_VPN ) +#define BGP_AF_VPN6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS_VPN ) +#define BGP_AF_VPN4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_VPN_MULTICAST ) +#define BGP_AF_VPN6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_VPN_MULTICAST ) +#define BGP_AF_FLOW4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW ) +#define BGP_AF_FLOW6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW ) + + +struct bgp_write_state; +struct bgp_parse_state; +struct bgp_export_state; +struct bgp_bucket; + +struct bgp_af_desc { + u32 afi; + u32 net; + u8 mpls; + u8 no_igp; + const char *name; + uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size); + void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a); + void (*update_next_hop)(struct bgp_export_state *s, eattr *nh, ea_list **to); + uint (*encode_next_hop)(struct bgp_write_state *s, eattr *nh, byte *buf, uint size); + void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, rta *a); +}; + + struct bgp_config { struct proto_config c; u32 local_as, remote_as; + ip_addr local_ip; /* Source address to use */ ip_addr remote_ip; - ip_addr source_addr; /* Source address to use */ struct iface *iface; /* Interface for link-local addresses */ + u16 local_port; /* Local listening port */ u16 remote_port; /* Neighbor destination port */ int multihop; /* Number of hops if multihop */ - int ttl_security; /* Enable TTL security [RFC5082] */ - int next_hop_self; /* Always set next hop to local IP address */ - int next_hop_keep; /* Do not touch next hop attribute */ - int missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */ - int gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ + int strict_bind; /* Bind listening socket to local address */ + int ttl_security; /* Enable TTL security [RFC 5082] */ int compare_path_lengths; /* Use path lengths when selecting best route */ int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */ int igp_metric; /* Use IGP metrics when selecting best route */ @@ -37,22 +93,22 @@ struct bgp_config { int deterministic_med; /* Use more complicated algo to have strict RFC 4271 MED comparison */ u32 default_local_pref; /* Default value for LOCAL_PREF attribute */ u32 default_med; /* Default value for MULTI_EXIT_DISC attribute */ - int capabilities; /* Enable capability handshake [RFC3392] */ - int enable_refresh; /* Enable local support for route refresh [RFC2918] */ - int enable_as4; /* Enable local support for 4B AS numbers [RFC4893] */ + int capabilities; /* Enable capability handshake [RFC 5492] */ + int enable_refresh; /* Enable local support for route refresh [RFC 2918] */ + int enable_as4; /* Enable local support for 4B AS numbers [RFC 6793] */ int enable_extended_messages; /* Enable local support for extended messages [draft] */ u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */ int rr_client; /* Whether neighbor is RR client of me */ int rs_client; /* Whether neighbor is RS client of me */ - int advertise_ipv4; /* Whether we should add IPv4 capability advertisement to OPEN message */ + u32 confederation; /* Confederation ID, or zero if confeds not active */ + int confederation_member; /* Whether neighbor AS is member of our confederation */ int passive; /* Do not initiate outgoing connection */ int interpret_communities; /* Hardwired handling of well-known communities */ - int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ - int add_path; /* Use ADD-PATH extension [RFC7911] */ int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */ int allow_local_pref; /* Allow LOCAL_PREF in EBGP sessions */ int gr_mode; /* Graceful restart mode (BGP_GR_*) */ int setkey; /* Set MD5 password to system SA/SP database */ + /* Times below are in seconds */ unsigned gr_time; /* Graceful restart timeout */ unsigned connect_delay_time; /* Minimum delay between connect attempts */ unsigned connect_retry_time; /* Timeout for connect attempts */ @@ -65,11 +121,31 @@ struct bgp_config { u32 disable_after_cease; /* Disable it when cease is received, bitfield */ char *password; /* Password used for MD5 authentication */ - struct rtable_config *igp_table; /* Table used for recursive next hop lookups */ int check_link; /* Use iface link state for liveness detection */ int bfd; /* Use BFD for liveness detection */ }; +struct bgp_channel_config { + struct channel_config c; + + u32 afi; + const struct bgp_af_desc *desc; + + ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ + u8 next_hop_self; /* Always set next hop to local IP address */ + u8 next_hop_keep; /* Do not touch next hop attribute */ + u8 missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */ + u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ + u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ + u8 gr_able; /* Allow full graceful restart for the channel */ + u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */ + u8 add_path; /* Use ADD-PATH extension [RFC 7911] */ + + uint rest[0]; /* Remaining items are reconfigured separately */ + struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ + struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ +}; + #define MLL_SELF 1 #define MLL_DROP 2 #define MLL_IGNORE 3 @@ -77,112 +153,241 @@ struct bgp_config { #define GW_DIRECT 1 #define GW_RECURSIVE 2 -#define ADD_PATH_RX 1 -#define ADD_PATH_TX 2 -#define ADD_PATH_FULL 3 +#define BGP_ADD_PATH_RX 1 +#define BGP_ADD_PATH_TX 2 +#define BGP_ADD_PATH_FULL 3 -#define BGP_GR_ABLE 1 -#define BGP_GR_AWARE 2 +#define BGP_GR_ABLE 1 +#define BGP_GR_AWARE 2 -/* For peer_gr_flags */ +/* For GR capability common flags */ #define BGP_GRF_RESTART 0x80 -/* For peer_gr_aflags */ +/* For GR capability per-AF flags */ #define BGP_GRF_FORWARDING 0x80 +struct bgp_af_caps { + u32 afi; + u8 ready; /* Multiprotocol capability, RFC 4760 */ + u8 gr_able; /* Graceful restart support, RFC 4724 */ + u8 gr_af_flags; /* Graceful restart per-AF flags */ + u8 ext_next_hop; /* Extended IPv6 next hop, RFC 5549 */ + u8 add_path; /* Multiple paths support, RFC 7911 */ +}; + +struct bgp_caps { + u32 as4_number; /* Announced ASN */ + + u8 as4_support; /* Four-octet AS capability, RFC 6793 */ + u8 ext_messages; /* Extended message length, RFC draft */ + u8 route_refresh; /* Route refresh capability, RFC 2918 */ + u8 enhanced_refresh; /* Enhanced route refresh, RFC 7313 */ + + u8 gr_aware; /* Graceful restart capability, RFC 4724 */ + u8 gr_flags; /* Graceful restart flags */ + u16 gr_time; /* Graceful restart time in seconds */ + + u16 af_count; /* Number of af_data items */ + + struct bgp_af_caps af_data[0]; /* Per-AF capability data */ +}; + +#define WALK_AF_CAPS(caps,ac) \ + for (ac = caps->af_data; ac < &caps->af_data[caps->af_count]; ac++) + + +struct bgp_socket { + node n; /* Node in global bgp_sockets */ + sock *sk; /* Real listening socket */ + u32 uc; /* Use count */ +}; + struct bgp_conn { struct bgp_proto *bgp; struct birdsock *sk; - uint state; /* State of connection state machine */ - struct timer *connect_retry_timer; - struct timer *hold_timer; - struct timer *keepalive_timer; - struct event *tx_ev; - int packets_to_send; /* Bitmap of packet types to be sent */ + u8 state; /* State of connection state machine */ + u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ + u8 ext_messages; /* Session uses extended message length */ + + struct bgp_caps *local_caps; + struct bgp_caps *remote_caps; + timer *connect_timer; + timer *hold_timer; + timer *keepalive_timer; + event *tx_ev; + u32 packets_to_send; /* Bitmap of packet types to be sent */ + u32 channels_to_send; /* Bitmap of channels with packets to be sent */ + u8 last_channel; /* Channel used last time for TX */ + u8 last_channel_count; /* Number of times the last channel was used in succession */ int notify_code, notify_subcode, notify_size; byte *notify_data; - u32 advertised_as; /* Temporary value for AS number received */ - int start_state; /* protocol start_state snapshot when connection established */ - u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */ - u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */ - u8 peer_add_path; /* Peer supports ADD-PATH [RFC7911] */ - u8 peer_enhanced_refresh_support; /* Peer supports enhanced refresh [RFC7313] */ - u8 peer_gr_aware; - u8 peer_gr_able; - u16 peer_gr_time; - u8 peer_gr_flags; - u8 peer_gr_aflags; - u8 peer_ext_messages_support; /* Peer supports extended message length [draft] */ - unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ + + uint hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; struct bgp_proto { struct proto p; struct bgp_config *cf; /* Shortcut to BGP configuration */ u32 local_as, remote_as; - int start_state; /* Substates that partitions BS_START */ - u8 is_internal; /* Internal BGP connection (local_as == remote_as) */ - u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ - u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ - u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ - u8 ext_messages; /* Session allows to use extended messages (both sides support it) */ + u32 public_as; /* Externally visible ASN (local_as or confederation id) */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ u32 rr_cluster_id; /* Route reflector cluster ID */ - int rr_client; /* Whether neighbor is RR client of me */ - int rs_client; /* Whether neighbor is RS client of me */ + int start_state; /* Substates that partitions BS_START */ + u8 is_internal; /* Internal BGP session (local_as == remote_as) */ + u8 is_interior; /* Internal or intra-confederation BGP session */ + u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ + u8 rr_client; /* Whether neighbor is RR client of me */ + u8 rs_client; /* Whether neighbor is RS client of me */ + u8 route_refresh; /* Route refresh allowed to send [RFC 2918] */ + u8 enhanced_refresh; /* Enhanced refresh is negotiated [RFC 7313] */ u8 gr_ready; /* Neighbor could do graceful restart */ - u8 gr_active; /* Neighbor is doing graceful restart */ - u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */ - u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */ + u8 gr_active_num; /* Neighbor is doing GR, number of active channels */ + u8 channel_count; /* Number of active channels */ + u32 *afi_map; /* Map channel index -> AFI */ + struct bgp_channel **channel_map; /* Map channel index -> channel */ struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ struct object_lock *lock; /* Lock for neighbor connection */ struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ + struct bgp_socket *sock; /* Shared listening socket */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ ip_addr source_addr; /* Local address used as an advertised next hop */ - rtable *igp_table; /* Table used for recursive next hop lookups */ - struct event *event; /* Event for respawning and shutting process */ - struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ - struct timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */ - struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */ - uint hash_size, hash_count, hash_limit; - HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ - slab *prefix_slab; /* Slab holding prefix nodes */ - list bucket_queue; /* Queue of buckets to send */ - struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ - unsigned startup_delay; /* Time to delay protocol startup by due to errors */ - bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */ + ip_addr link_addr; /* Link-local version of source_addr */ + event *event; /* Event for respawning and shutting process */ + timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ + timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */ + uint startup_delay; /* Delay (in seconds) of protocol startup due to previous errors */ + btime last_proto_error; /* Time of last error that leads to protocol stop */ u8 last_error_class; /* Error class of last error */ u32 last_error_code; /* Error code of last error. BGP protocol errors are encoded as (bgp_err_code << 16 | bgp_err_subcode) */ -#ifdef IPV6 - byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */ - unsigned mp_reach_len, mp_unreach_len; - ip_addr local_link; /* Link-level version of source_addr */ -#endif +}; + +struct bgp_channel { + struct channel c; + + /* Rest are BGP specific data */ + struct bgp_channel_config *cf; + pool *pool; /* XXXX */ + + u32 afi; + u32 index; + const struct bgp_af_desc *desc; + + HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */ + struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ + list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */ + + HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ + slab *prefix_slab; /* Slab holding prefix nodes */ + + rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ + rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ + ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ + ip_addr link_addr; /* Link-local version of next_hop_addr */ + + u32 packets_to_send; /* Bitmap of packet types to be sent */ + + u8 gr_ready; /* Neighbor could do GR on this AF */ + u8 gr_active; /* Neighbor is doing GR and keeping fwd state */ + + u8 ext_next_hop; /* Session allows both IPv4 and IPv6 next hops */ + + u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ + u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ + + u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */ + u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */ }; struct bgp_prefix { - struct { - ip_addr prefix; - int pxlen; - } n; + node buck_node; /* Node in per-bucket list */ + struct bgp_prefix *next; /* Node in prefix hash table */ + u32 hash; u32 path_id; - struct bgp_prefix *next; - node bucket_node; /* Node in per-bucket list */ + net_addr net[0]; }; struct bgp_bucket { node send_node; /* Node in send queue */ - struct bgp_bucket *hash_next, *hash_prev; /* Node in bucket hash table */ - unsigned hash; /* Hash over extended attributes */ - list prefixes; /* Prefixes in this buckets */ + struct bgp_bucket *next; /* Node in bucket hash table */ + list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */ + u32 hash; /* Hash over extended attributes */ ea_list eattrs[0]; /* Per-bucket extended attributes */ }; +struct bgp_export_state { + struct bgp_proto *proto; + struct bgp_channel *channel; + struct linpool *pool; + + struct bgp_proto *src; + rte *route; + int mpls; + + u32 attrs_seen[1]; + uint err_withdraw; +}; + +struct bgp_write_state { + struct bgp_proto *proto; + struct bgp_channel *channel; + struct linpool *pool; + + int as4_session; + int add_path; + int mpls; + + eattr *mp_next_hop; + adata *mpls_labels; +}; + +struct bgp_parse_state { + struct bgp_proto *proto; + struct bgp_channel *channel; + struct linpool *pool; + + int as4_session; + int add_path; + int mpls; + + u32 attrs_seen[256/32]; + + u32 mp_reach_af; + u32 mp_unreach_af; + + uint attr_len; + uint ip_reach_len; + uint ip_unreach_len; + uint ip_next_hop_len; + uint mp_reach_len; + uint mp_unreach_len; + uint mp_next_hop_len; + + byte *attrs; + byte *ip_reach_nlri; + byte *ip_unreach_nlri; + byte *ip_next_hop_data; + byte *mp_reach_nlri; + byte *mp_unreach_nlri; + byte *mp_next_hop_data; + + uint err_withdraw; + uint err_subcode; + jmp_buf err_jmpbuf; + + struct hostentry *hostentry; + adata *mpls_labels; + + /* Cached state for bgp_rte_update() */ + u32 last_id; + struct rte_src *last_src; + rta *cached_rta; +}; + #define BGP_PORT 179 #define BGP_VERSION 4 #define BGP_HEADER_LENGTH 19 @@ -193,13 +398,33 @@ struct bgp_bucket { #define BGP_RX_BUFFER_EXT_SIZE 65535 #define BGP_TX_BUFFER_EXT_SIZE 65535 -static inline uint bgp_max_packet_length(struct bgp_proto *p) -{ return p->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; } +static inline int bgp_channel_is_ipv4(struct bgp_channel *c) +{ return BGP_AFI(c->afi) == BGP_AFI_IPV4; } + +static inline int bgp_channel_is_ipv6(struct bgp_channel *c) +{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; } + +static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c) +{ return BGP_AFI(c->afi) == BGP_AFI_IPV4; } + +static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c) +{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; } + +static inline uint bgp_max_packet_length(struct bgp_conn *conn) +{ return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; } + +static inline void +bgp_parse_error(struct bgp_parse_state *s, uint subcode) +{ + s->err_subcode = subcode; + longjmp(s->err_jmpbuf, 1); +} extern struct linpool *bgp_linpool; +extern struct linpool *bgp_linpool2; -void bgp_start_timer(struct timer *t, int value); +void bgp_start_timer(timer *t, uint value); void bgp_check_config(struct bgp_config *c); void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len); void bgp_close_conn(struct bgp_conn *c); @@ -209,9 +434,9 @@ void bgp_conn_enter_established_state(struct bgp_conn *conn); void bgp_conn_enter_close_state(struct bgp_conn *conn); void bgp_conn_enter_idle_state(struct bgp_conn *conn); void bgp_handle_graceful_restart(struct bgp_proto *p); -void bgp_graceful_restart_done(struct bgp_proto *p); -void bgp_refresh_begin(struct bgp_proto *p); -void bgp_refresh_end(struct bgp_proto *p); +void bgp_graceful_restart_done(struct bgp_channel *c); +void bgp_refresh_begin(struct bgp_channel *c); +void bgp_refresh_end(struct bgp_channel *c); void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); void bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len); @@ -234,48 +459,73 @@ struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); /* attrs.c */ -/* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6 - * we store two addesses in it - a global address and a link local address. - */ -#ifdef IPV6 -#define NEXT_HOP_LENGTH (2*sizeof(ip_addr)) -static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; } -#else -#define NEXT_HOP_LENGTH sizeof(ip_addr) -static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; } -#endif +static inline eattr * +bgp_find_attr(ea_list *attrs, uint code) +{ + return ea_find(attrs, EA_CODE(EAP_BGP, code)); +} + +eattr * +bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val); + +static inline void +bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val) +{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } + +static inline void +bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, struct adata *val) +{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } + +static inline void +bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len) +{ + struct adata *a = lp_alloc_adata(pool, len); + memcpy(a->data, data, len); + bgp_set_attr(to, pool, code, flags, (uintptr_t) a); +} + +static inline void +bgp_unset_attr(ea_list **to, struct linpool *pool, uint code) +{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; } + + +int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end); +ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len); + +void bgp_init_bucket_table(struct bgp_channel *c); +void bgp_free_bucket_table(struct bgp_channel *c); +void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b); +void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b); +void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b); + +void bgp_init_prefix_table(struct bgp_channel *c); +void bgp_free_prefix_table(struct bgp_channel *c); +void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); -void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val); -byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len); -struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory); -int bgp_get_attr(struct eattr *e, byte *buf, int buflen); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); -void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs); +void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea_list *attrs); int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *); -void bgp_init_bucket_table(struct bgp_proto *); -void bgp_free_bucket_table(struct bgp_proto *p); -void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck); -void bgp_init_prefix_table(struct bgp_proto *p, u32 order); -void bgp_free_prefix_table(struct bgp_proto *p); -void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp); -uint bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains); +int bgp_get_attr(struct eattr *e, byte *buf, int buflen); void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs); -inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a) -{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; } /* packets.c */ void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new); -void bgp_schedule_packet(struct bgp_conn *conn, int type); +const struct bgp_af_desc *bgp_get_af_desc(u32 afi); +const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi); +void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type); void bgp_kick_tx(void *vconn); void bgp_tx(struct birdsock *sk); int bgp_rx(struct birdsock *sk, uint size); const char * bgp_error_dsc(unsigned code, unsigned subcode); void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len); +void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); + + /* Packet types */ #define PKT_OPEN 0x01 @@ -293,26 +543,25 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define BAF_PARTIAL 0x20 #define BAF_EXT_LEN 0x10 -#define BA_ORIGIN 0x01 /* [RFC1771] */ /* WM */ +#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */ #define BA_AS_PATH 0x02 /* WM */ #define BA_NEXT_HOP 0x03 /* WM */ #define BA_MULTI_EXIT_DISC 0x04 /* ON */ #define BA_LOCAL_PREF 0x05 /* WD */ #define BA_ATOMIC_AGGR 0x06 /* WD */ #define BA_AGGREGATOR 0x07 /* OT */ -#define BA_COMMUNITY 0x08 /* [RFC1997] */ /* OT */ -#define BA_ORIGINATOR_ID 0x09 /* [RFC1966] */ /* ON */ -#define BA_CLUSTER_LIST 0x0a /* ON */ -/* We don't support these: */ -#define BA_DPA 0x0b /* ??? */ -#define BA_ADVERTISER 0x0c /* [RFC1863] */ -#define BA_RCID_PATH 0x0d -#define BA_MP_REACH_NLRI 0x0e /* [RFC2283] */ -#define BA_MP_UNREACH_NLRI 0x0f -#define BA_EXT_COMMUNITY 0x10 /* [RFC4360] */ -#define BA_AS4_PATH 0x11 /* [RFC4893] */ -#define BA_AS4_AGGREGATOR 0x12 -#define BA_LARGE_COMMUNITY 0x20 /* [RFC8092] */ +#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */ +#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */ +#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */ +#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */ +#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */ +#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */ +#define BA_AS4_PATH 0x11 /* RFC 6793 */ +#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ +#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ + +/* Bird's private internal BGP attributes */ +#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */ /* BGP connection states */ @@ -332,14 +581,12 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi * * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP * protocol done what is neccessary to start itself (like acquiring the lock), - * it goes to BSS_CONNECT. When some connection attempt failed because of - * option or capability error, it goes to BSS_CONNECT_NOCAP. + * it goes to BSS_CONNECT. */ #define BSS_PREPARE 0 /* Used before ordinary BGP started, i. e. waiting for lock */ #define BSS_DELAY 1 /* Startup delay due to previous errors */ #define BSS_CONNECT 2 /* Ordinary BGP connecting */ -#define BSS_CONNECT_NOCAP 3 /* Legacy BGP connecting (without capabilities) */ /* BGP feed states (TX) @@ -348,7 +595,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi * * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets. * - * These states (stored in p->feed_state) are used to keep track of these + * These states (stored in c->feed_state) are used to keep track of these * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB * or EoRR packet. When the packet is sent, the state returned to BFS_NONE. @@ -404,15 +651,5 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define ORIGIN_EGP 1 #define ORIGIN_INCOMPLETE 2 -/* Address families */ - -#define BGP_AF_IPV4 1 -#define BGP_AF_IPV6 2 - -#ifdef IPV6 -#define BGP_AF BGP_AF_IPV6 -#else -#define BGP_AF BGP_AF_IPV4 -#endif #endif diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 075403a3..41eaa729 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -13,21 +13,24 @@ CF_HDR CF_DEFINES #define BGP_CFG ((struct bgp_config *) this_proto) +#define BGP_CC ((struct bgp_channel_config *) this_channel) CF_DECLS -CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, - KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, - PATH, METRIC, ERROR, START, DELAY, FORGET, WAIT, ENABLE, - DISABLE, AFTER, BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, - BGP_NEXT_HOP, BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, - BGP_EXT_COMMUNITY, SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, - CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE, - PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH, - INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, - TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, - SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, - CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, BGP_LARGE_COMMUNITY) +CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, + MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR, + START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH, + BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR, + BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY, + SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE, + IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR, + DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, + BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, + SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, + GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, + STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6) + +%type <i32> bgp_afi CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER, CONFIGURATION, CHANGE, DECONFIGURED, CONNECTION, REJECTED, COLLISION, @@ -37,10 +40,11 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER, CF_GRAMMAR -CF_ADDTO(proto, bgp_proto '}' { bgp_check_config(BGP_CFG); } ) +CF_ADDTO(proto, bgp_proto '}' ) bgp_proto_start: proto_start BGP { this_proto = proto_config_new(&proto_bgp, $1); + BGP_CFG->local_port = BGP_PORT; BGP_CFG->remote_port = BGP_PORT; BGP_CFG->multihop = -1; /* undefined */ BGP_CFG->hold_time = 240; @@ -55,18 +59,24 @@ bgp_proto_start: proto_start BGP { BGP_CFG->enable_refresh = 1; BGP_CFG->enable_as4 = 1; BGP_CFG->capabilities = 2; - BGP_CFG->advertise_ipv4 = 1; BGP_CFG->interpret_communities = 1; BGP_CFG->default_local_pref = 100; BGP_CFG->gr_mode = BGP_GR_AWARE; BGP_CFG->gr_time = 120; BGP_CFG->setkey = 1; - } + BGP_CFG->check_link = -1; + } + ; + +bgp_loc_opts: + /* empty */ + | bgp_loc_opts PORT expr { BGP_CFG->local_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); } + | bgp_loc_opts AS expr { BGP_CFG->local_as = $3; } ; bgp_nbr_opts: /* empty */ - | bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); } + | bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); } | bgp_nbr_opts AS expr { BGP_CFG->remote_as = $3; } ; @@ -96,8 +106,12 @@ bgp_cease_flag: bgp_proto: bgp_proto_start proto_name '{' | bgp_proto proto_item ';' - | bgp_proto LOCAL AS expr ';' { BGP_CFG->local_as = $4; } - | bgp_proto LOCAL ipa AS expr ';' { BGP_CFG->source_addr = $3; BGP_CFG->local_as = $5; } + | bgp_proto bgp_proto_channel ';' + | bgp_proto LOCAL bgp_loc_opts ';' + | bgp_proto LOCAL ipa ipa_scope bgp_loc_opts ';' { + BGP_CFG->local_ip = $3; + if ($4) BGP_CFG->iface = $4; + } | bgp_proto NEIGHBOR bgp_nbr_opts ';' | bgp_proto NEIGHBOR ipa ipa_scope bgp_nbr_opts ';' { if (ipa_nonzero(BGP_CFG->remote_ip)) @@ -107,20 +121,16 @@ bgp_proto: } | bgp_proto INTERFACE TEXT ';' { BGP_CFG->iface = if_get_by_name($3); } | bgp_proto RR CLUSTER ID idval ';' { BGP_CFG->rr_cluster_id = $5; } - | bgp_proto RR CLIENT ';' { BGP_CFG->rr_client = 1; } - | bgp_proto RS CLIENT ';' { BGP_CFG->rs_client = 1; } + | bgp_proto RR CLIENT bool ';' { BGP_CFG->rr_client = $4; } + | bgp_proto RS CLIENT bool ';' { BGP_CFG->rs_client = $4; } + | bgp_proto CONFEDERATION expr ';' { BGP_CFG->confederation = $3; } + | bgp_proto CONFEDERATION MEMBER bool ';' { BGP_CFG->confederation_member = $4; } | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; } | bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; } | bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; } | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); } - | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; BGP_CFG->next_hop_keep = 0; } - | bgp_proto NEXT HOP KEEP ';' { BGP_CFG->next_hop_keep = 1; BGP_CFG->next_hop_self = 0; } - | bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; } - | bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; } - | bgp_proto MISSING LLADDR IGNORE ';' { BGP_CFG->missing_lladdr = MLL_IGNORE; } - | bgp_proto GATEWAY DIRECT ';' { BGP_CFG->gw_mode = GW_DIRECT; } - | bgp_proto GATEWAY RECURSIVE ';' { BGP_CFG->gw_mode = GW_RECURSIVE; } + | bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; } | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; } | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; } | bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; } @@ -128,7 +138,7 @@ bgp_proto: | bgp_proto DETERMINISTIC MED bool ';' { BGP_CFG->deterministic_med = $4; } | bgp_proto DEFAULT BGP_MED expr ';' { BGP_CFG->default_med = $4; } | bgp_proto DEFAULT BGP_LOCAL_PREF expr ';' { BGP_CFG->default_local_pref = $4; } - | bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->source_addr = $4; } + | bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->local_ip = $4; } | bgp_proto START DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; log(L_WARN "%s: Start delay time option is deprecated, use connect delay time", this_proto->name); } | bgp_proto CONNECT DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; } | bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; } @@ -141,33 +151,107 @@ bgp_proto: | bgp_proto ENABLE AS4 bool ';' { BGP_CFG->enable_as4 = $4; } | bgp_proto ENABLE EXTENDED MESSAGES bool ';' { BGP_CFG->enable_extended_messages = $5; } | bgp_proto CAPABILITIES bool ';' { BGP_CFG->capabilities = $3; } - | bgp_proto ADVERTISE IPV4 bool ';' { BGP_CFG->advertise_ipv4 = $4; } | bgp_proto PASSWORD text ';' { BGP_CFG->password = $3; } | bgp_proto SETKEY bool ';' { BGP_CFG->setkey = $3; } - | bgp_proto ROUTE LIMIT expr ';' { - this_proto->in_limit = cfg_allocz(sizeof(struct proto_limit)); - this_proto->in_limit->limit = $4; - this_proto->in_limit->action = PLA_RESTART; - log(L_WARN "%s: Route limit option is deprecated, use import limit", this_proto->name); - } | bgp_proto PASSIVE bool ';' { BGP_CFG->passive = $3; } | bgp_proto INTERPRET COMMUNITIES bool ';' { BGP_CFG->interpret_communities = $4; } - | bgp_proto SECONDARY bool ';' { BGP_CFG->secondary = $3; } - | bgp_proto ADD PATHS RX ';' { BGP_CFG->add_path = ADD_PATH_RX; } - | bgp_proto ADD PATHS TX ';' { BGP_CFG->add_path = ADD_PATH_TX; } - | bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; } - | bgp_proto ALLOW BGP_LOCAL_PREF bool ';' { BGP_CFG->allow_local_pref = $4; } | bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; } | bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; } + | bgp_proto ALLOW BGP_LOCAL_PREF bool ';' { BGP_CFG->allow_local_pref = $4; } | bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; } | bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; } | bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; } - | bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; } | bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; } | bgp_proto CHECK LINK bool ';' { BGP_CFG->check_link = $4; } | bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); } ; +bgp_afi: + IPV4 { $$ = BGP_AF_IPV4; } + | IPV6 { $$ = BGP_AF_IPV6; } + | IPV4 MULTICAST { $$ = BGP_AF_IPV4_MC; } + | IPV6 MULTICAST { $$ = BGP_AF_IPV6_MC; } + | IPV4 MPLS { $$ = BGP_AF_IPV4_MPLS; } + | IPV6 MPLS { $$ = BGP_AF_IPV6_MPLS; } + | VPN4 MPLS { $$ = BGP_AF_VPN4_MPLS; } + | VPN6 MPLS { $$ = BGP_AF_VPN6_MPLS; } + | VPN4 MULTICAST { $$ = BGP_AF_VPN4_MC; } + | VPN6 MULTICAST { $$ = BGP_AF_VPN6_MC; } + | FLOW4 { $$ = BGP_AF_FLOW4; } + | FLOW6 { $$ = BGP_AF_FLOW6; } + ; + +bgp_channel_start: bgp_afi +{ + const struct bgp_af_desc *desc = bgp_get_af_desc($1); + + if (!desc) + cf_error("Unknown AFI/SAFI"); + + this_channel = channel_config_get(&channel_bgp, desc->name, desc->net, this_proto); + + /* New channel */ + if (!BGP_CC->desc) + { + BGP_CC->c.in_filter = FILTER_UNDEF; + BGP_CC->c.out_filter = FILTER_UNDEF; + BGP_CC->c.ra_mode = RA_UNDEF; + BGP_CC->afi = $1; + BGP_CC->desc = desc; + BGP_CC->gr_able = 0xff; /* undefined */ + } +}; + +bgp_channel_item: + channel_item + | NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; } + | NEXT HOP SELF { BGP_CC->next_hop_self = 1; BGP_CC->next_hop_keep = 0; } + | NEXT HOP KEEP { BGP_CC->next_hop_keep = 1; BGP_CC->next_hop_self = 0; } + | MISSING LLADDR SELF { BGP_CC->missing_lladdr = MLL_SELF; } + | MISSING LLADDR DROP { BGP_CC->missing_lladdr = MLL_DROP; } + | MISSING LLADDR IGNORE { BGP_CC->missing_lladdr = MLL_IGNORE; } + | GATEWAY DIRECT { BGP_CC->gw_mode = GW_DIRECT; } + | GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; } + | SECONDARY bool { BGP_CC->secondary = $2; } + | GRACEFUL RESTART bool { BGP_CC->gr_able = $3; } + | EXTENDED NEXT HOP bool { BGP_CC->ext_next_hop = $4; } + | ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; } + | ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; } + | ADD PATHS bool { BGP_CC->add_path = $3 ? BGP_ADD_PATH_FULL : 0; } + | IGP TABLE rtable { + if (BGP_CC->desc->no_igp) + cf_error("IGP table not allowed here"); + + if ($3->addr_type == NET_IP4) + BGP_CC->igp_table_ip4 = $3; + else if ($3->addr_type == NET_IP6) + BGP_CC->igp_table_ip6 = $3; + else + cf_error("Mismatched IGP table type"); + } + ; + +bgp_channel_opts: + /* empty */ + | bgp_channel_opts bgp_channel_item ';' + ; + +bgp_channel_opt_list: + /* empty */ + | '{' bgp_channel_opts '}' + ; + +bgp_channel_end: +{ + if (!this_channel->table) + cf_error("Routing table not specified"); + + this_channel = NULL; +}; + +bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end; + + CF_ADDTO(dynamic_attr, BGP_ORIGIN { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(EAP_BGP, BA_ORIGIN)); }) CF_ADDTO(dynamic_attr, BGP_PATH diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index f0049d3a..aa08732d 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -2,12 +2,16 @@ * BIRD -- BGP Packet Processing * * (c) 2000 Martin Mares <mj@ucw.cz> + * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2008--2016 CZ.NIC z.s.p.o. * * Can be freely distributed and used under the terms of the GNU GPL. */ #undef LOCAL_DEBUG +#include <stdlib.h> + #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" @@ -16,6 +20,7 @@ #include "nest/mrtdump.h" #include "conf/conf.h" #include "lib/unaligned.h" +#include "lib/flowspec.h" #include "lib/socket.h" #include "nest/cli.h" @@ -27,6 +32,13 @@ #define BGP_RR_BEGIN 1 #define BGP_RR_END 2 +#define BGP_NLRI_MAX (4 + 1 + 32) + +#define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */ +#define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */ +#define BGP_MPLS_NULL 3 /* Implicit NULL label */ +#define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */ + static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS; static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS; @@ -38,6 +50,46 @@ static byte fsm_err_subcode[BS_MAX] = { [BS_ESTABLISHED] = 3 }; + +static struct bgp_channel * +bgp_get_channel(struct bgp_proto *p, u32 afi) +{ + uint i; + + for (i = 0; i < p->channel_count; i++) + if (p->afi_map[i] == afi) + return p->channel_map[i]; + + return NULL; +} + +static inline void +put_af3(byte *buf, u32 id) +{ + put_u16(buf, id >> 16); + buf[2] = id & 0xff; +} + +static inline void +put_af4(byte *buf, u32 id) +{ + put_u16(buf, id >> 16); + buf[2] = 0; + buf[3] = id & 0xff; +} + +static inline u32 +get_af3(byte *buf) +{ + return (get_u16(buf) << 16) | buf[2]; +} + +static inline u32 +get_af4(byte *buf) +{ + return (get_u16(buf) << 16) | buf[3]; +} + /* * MRT Dump format is not semantically specified. * We will use these values in appropriate fields: @@ -58,31 +110,41 @@ static byte * mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4) { struct bgp_proto *p = conn->bgp; + uint v4 = ipa_is_ip4(p->cf->remote_ip); if (as4) - { - put_u32(buf+0, p->remote_as); - put_u32(buf+4, p->local_as); - buf+=8; - } + { + put_u32(buf+0, p->remote_as); + put_u32(buf+4, p->public_as); + buf+=8; + } else - { - put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS); - put_u16(buf+2, (p->local_as <= 0xFFFF) ? p->local_as : AS_TRANS); - buf+=4; - } + { + put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS); + put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS); + buf+=4; + } put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0); - put_u16(buf+2, BGP_AF); + put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6); buf+=4; - buf = put_ipa(buf, conn->sk ? conn->sk->daddr : IPA_NONE); - buf = put_ipa(buf, conn->sk ? conn->sk->saddr : IPA_NONE); + + if (v4) + { + buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE); + buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE); + } + else + { + buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE); + buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE); + } return buf; } static void -mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len) +mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len) { byte *buf = alloca(128+len); /* 128 is enough for MRT headers */ byte *bp = buf + MRTDUMP_HDR_LENGTH; @@ -96,14 +158,14 @@ mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len) } static inline u16 -convert_state(unsigned state) +convert_state(uint state) { /* Convert state from our BS_* values to values used in MRTDump */ return (state == BS_CLOSE) ? 1 : state + 1; } void -mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new) +mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new) { byte buf[128]; byte *bp = buf + MRTDUMP_HDR_LENGTH; @@ -127,1303 +189,2429 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf) return buf + 2 + conn->notify_size; } -#ifdef IPV6 -static byte * -bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf) -{ - *buf++ = 1; /* Capability 1: Multiprotocol extensions */ - *buf++ = 4; /* Capability data length */ - *buf++ = 0; /* We support AF IPv6 */ - *buf++ = BGP_AF_IPV6; - *buf++ = 0; /* RFU */ - *buf++ = 1; /* and SAFI 1 */ - return buf; -} -#else +/* Capability negotiation as per RFC 5492 */ -static byte * -bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf) -{ - *buf++ = 1; /* Capability 1: Multiprotocol extensions */ - *buf++ = 4; /* Capability data length */ - *buf++ = 0; /* We support AF IPv4 */ - *buf++ = BGP_AF_IPV4; - *buf++ = 0; /* RFU */ - *buf++ = 1; /* and SAFI 1 */ - return buf; +const struct bgp_af_caps * +bgp_find_af_caps(struct bgp_caps *caps, u32 afi) +{ + struct bgp_af_caps *ac; + + WALK_AF_CAPS(caps, ac) + if (ac->afi == afi) + return ac; + + return NULL; } -#endif -static byte * -bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf) +static struct bgp_af_caps * +bgp_get_af_caps(struct bgp_caps *caps, u32 afi) { - *buf++ = 2; /* Capability 2: Support for route refresh */ - *buf++ = 0; /* Capability data length */ - return buf; + struct bgp_af_caps *ac; + + WALK_AF_CAPS(caps, ac) + if (ac->afi == afi) + return ac; + + ac = &caps->af_data[caps->af_count++]; + memset(ac, 0, sizeof(struct bgp_af_caps)); + ac->afi = afi; + + return ac; } -static byte * -bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf) +static int +bgp_af_caps_cmp(const void *X, const void *Y) { - *buf++ = 6; /* Capability 6: Support for extended messages */ - *buf++ = 0; /* Capability data length */ - return buf; + const struct bgp_af_caps *x = X, *y = Y; + return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0; } + static byte * -bgp_put_cap_gr1(struct bgp_proto *p, byte *buf) +bgp_write_capabilities(struct bgp_conn *conn, byte *buf) { - *buf++ = 64; /* Capability 64: Support for graceful restart */ - *buf++ = 6; /* Capability data length */ + struct bgp_proto *p = conn->bgp; + struct bgp_channel *c; + struct bgp_caps *caps; + struct bgp_af_caps *ac; + uint any_ext_next_hop = 0; + uint any_add_path = 0; + byte *data; - put_u16(buf, p->cf->gr_time); - if (p->p.gr_recovery) - buf[0] |= BGP_GRF_RESTART; - buf += 2; + /* Prepare bgp_caps structure */ + + int n = list_length(&p->p.channels); + caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps)); + conn->local_caps = caps; + + caps->as4_support = p->cf->enable_as4; + caps->ext_messages = p->cf->enable_extended_messages; + caps->route_refresh = p->cf->enable_refresh; + caps->enhanced_refresh = p->cf->enable_refresh; + + if (caps->as4_support) + caps->as4_number = p->public_as; + + if (p->cf->gr_mode) + { + caps->gr_aware = 1; + caps->gr_time = p->cf->gr_time; + caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0; + } + + /* Allocate and fill per-AF fields */ + WALK_LIST(c, p->p.channels) + { + ac = &caps->af_data[caps->af_count++]; + ac->afi = c->afi; + ac->ready = 1; + + ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop; + any_ext_next_hop |= ac->ext_next_hop; + + ac->add_path = c->cf->add_path; + any_add_path |= ac->add_path; + + if (c->cf->gr_able) + { + ac->gr_able = 1; + + if (p->p.gr_recovery) + ac->gr_af_flags |= BGP_GRF_FORWARDING; + } + } + + /* Sort capability fields by AFI/SAFI */ + qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp); - *buf++ = 0; /* Appropriate AF */ - *buf++ = BGP_AF; - *buf++ = 1; /* and SAFI 1 */ - *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0; + + /* Create capability list in buffer */ + + /* + * Note that max length is ~ 20+14*af_count. With max 12 channels that is + * 188. Option limit is 253 and buffer size is 4096, so we cannot overflow + * unless we add new capabilities or more AFs. + */ + + WALK_AF_CAPS(caps, ac) + if (ac->ready) + { + *buf++ = 1; /* Capability 1: Multiprotocol extensions */ + *buf++ = 4; /* Capability data length */ + put_af4(buf, ac->afi); + buf += 4; + } + + if (caps->route_refresh) + { + *buf++ = 2; /* Capability 2: Support for route refresh */ + *buf++ = 0; /* Capability data length */ + } + + if (any_ext_next_hop) + { + *buf++ = 5; /* Capability 5: Support for extended next hop */ + *buf++ = 0; /* Capability data length, will be fixed later */ + data = buf; + + WALK_AF_CAPS(caps, ac) + if (ac->ext_next_hop) + { + put_af4(buf, ac->afi); + put_u16(buf+4, BGP_AFI_IPV6); + buf += 6; + } + + data[-1] = buf - data; + } + + if (caps->ext_messages) + { + *buf++ = 6; /* Capability 6: Support for extended messages */ + *buf++ = 0; /* Capability data length */ + } + + if (caps->gr_aware) + { + *buf++ = 64; /* Capability 64: Support for graceful restart */ + *buf++ = 0; /* Capability data length, will be fixed later */ + data = buf; + + put_u16(buf, caps->gr_time); + buf[0] |= caps->gr_flags; + buf += 2; + + WALK_AF_CAPS(caps, ac) + if (ac->gr_able) + { + put_af3(buf, ac->afi); + buf[3] = ac->gr_af_flags; + buf += 4; + } + + data[-1] = buf - data; + } + + if (caps->as4_support) + { + *buf++ = 65; /* Capability 65: Support for 4-octet AS number */ + *buf++ = 4; /* Capability data length */ + put_u32(buf, p->public_as); + buf += 4; + } + + if (any_add_path) + { + *buf++ = 69; /* Capability 69: Support for ADD-PATH */ + *buf++ = 0; /* Capability data length, will be fixed later */ + data = buf; + + WALK_AF_CAPS(caps, ac) + if (ac->add_path) + { + put_af3(buf, ac->afi); + buf[3] = ac->add_path; + buf += 4; + } + + data[-1] = buf - data; + } + + if (caps->enhanced_refresh) + { + *buf++ = 70; /* Capability 70: Support for enhanced route refresh */ + *buf++ = 0; /* Capability data length */ + } return buf; } -static byte * -bgp_put_cap_gr2(struct bgp_proto *p UNUSED, byte *buf) +static void +bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, int len) { - *buf++ = 64; /* Capability 64: Support for graceful restart */ - *buf++ = 2; /* Capability data length */ - put_u16(buf, 0); - return buf + 2; -} + struct bgp_proto *p = conn->bgp; + struct bgp_af_caps *ac; + int i, cl; + u32 af; -static byte * -bgp_put_cap_as4(struct bgp_proto *p, byte *buf) -{ - *buf++ = 65; /* Capability 65: Support for 4-octet AS number */ - *buf++ = 4; /* Capability data length */ - put_u32(buf, p->local_as); - return buf + 4; -} + while (len > 0) + { + if (len < 2 || len < (2 + pos[1])) + goto err; -static byte * -bgp_put_cap_add_path(struct bgp_proto *p, byte *buf) -{ - *buf++ = 69; /* Capability 69: Support for ADD-PATH */ - *buf++ = 4; /* Capability data length */ + /* Capability length */ + cl = pos[1]; + + /* Capability type */ + switch (pos[0]) + { + case 1: /* Multiprotocol capability, RFC 4760 */ + if (cl != 4) + goto err; - *buf++ = 0; /* Appropriate AF */ - *buf++ = BGP_AF; - *buf++ = 1; /* SAFI 1 */ + af = get_af4(pos+2); + ac = bgp_get_af_caps(caps, af); + ac->ready = 1; + break; - *buf++ = p->cf->add_path; + case 2: /* Route refresh capability, RFC 2918 */ + if (cl != 0) + goto err; - return buf; + caps->route_refresh = 1; + break; + + case 5: /* Extended next hop encoding capability, RFC 5549 */ + if (cl % 6) + goto err; + + for (i = 0; i < cl; i += 6) + { + /* Specified only for IPv4 prefixes with IPv6 next hops */ + if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) || + (get_u16(pos+2+i+4) != BGP_AFI_IPV6)) + continue; + + af = get_af4(pos+2+i); + ac = bgp_get_af_caps(caps, af); + ac->ext_next_hop = 1; + } + break; + + case 6: /* Extended message length capability, RFC draft */ + if (cl != 0) + goto err; + + caps->ext_messages = 1; + break; + + case 64: /* Graceful restart capability, RFC 4724 */ + if (cl % 4 != 2) + goto err; + + /* Only the last instance is valid */ + WALK_AF_CAPS(caps, ac) + { + ac->gr_able = 0; + ac->gr_af_flags = 0; + } + + caps->gr_aware = 1; + caps->gr_flags = pos[2] & 0xf0; + caps->gr_time = get_u16(pos + 2) & 0x0fff; + + for (i = 2; i < cl; i += 4) + { + af = get_af3(pos+2+i); + ac = bgp_get_af_caps(caps, af); + ac->gr_able = 1; + ac->gr_af_flags = pos[2+i+3]; + } + break; + + case 65: /* AS4 capability, RFC 6793 */ + if (cl != 4) + goto err; + + caps->as4_support = 1; + caps->as4_number = get_u32(pos + 2); + break; + + case 69: /* ADD-PATH capability, RFC 7911 */ + if (cl % 4) + goto err; + + for (i = 0; i < cl; i += 4) + { + byte val = pos[2+i+3]; + if (!val || (val > BGP_ADD_PATH_FULL)) + { + log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring", + p->p.name, val); + break; + } + } + + for (i = 0; i < cl; i += 4) + { + af = get_af3(pos+2+i); + ac = bgp_get_af_caps(caps, af); + ac->add_path = pos[2+i+3]; + } + break; + + case 70: /* Enhanced route refresh capability, RFC 7313 */ + if (cl != 0) + goto err; + + caps->enhanced_refresh = 1; + break; + + /* We can safely ignore all other capabilities */ + } + + ADVANCE(pos, len, 2 + cl); + } + return; + +err: + bgp_error(conn, 2, 0, NULL, 0); + return; } -static byte * -bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf) +static int +bgp_read_options(struct bgp_conn *conn, byte *pos, int len) { - *buf++ = 70; /* Capability 70: Support for enhanced route refresh */ - *buf++ = 0; /* Capability data length */ - return buf; -} + struct bgp_proto *p = conn->bgp; + struct bgp_caps *caps; + int ol; + + /* Max number of announced AFIs is limited by max option length (255) */ + caps = alloca(sizeof(struct bgp_caps) + 64 * sizeof(struct bgp_af_caps)); + memset(caps, 0, sizeof(struct bgp_caps)); + + while (len > 0) + { + if ((len < 2) || (len < (2 + pos[1]))) + { bgp_error(conn, 2, 0, NULL, 0); return -1; } + + ol = pos[1]; + if (pos[0] == 2) + { + /* BGP capabilities, RFC 5492 */ + if (p->cf->capabilities) + bgp_read_capabilities(conn, caps, pos + 2, ol); + } + else + { + /* Unknown option */ + bgp_error(conn, 2, 4, pos, ol); /* FIXME: ol or ol+2 ? */ + return -1; + } + ADVANCE(pos, len, 2 + ol); + } + + uint n = sizeof(struct bgp_caps) + caps->af_count * sizeof(struct bgp_af_caps); + conn->remote_caps = mb_allocz(p->p.pool, n); + memcpy(conn->remote_caps, caps, n); + + return 0; +} static byte * bgp_create_open(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; - byte *cap; - int cap_len; BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)", - BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id); + BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id); + buf[0] = BGP_VERSION; - put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS); + put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS); put_u16(buf+3, p->cf->hold_time); put_u32(buf+5, p->local_id); - if (conn->start_state == BSS_CONNECT_NOCAP) - { - BGP_TRACE(D_PACKETS, "Skipping capabilities"); - buf[9] = 0; - return buf + 10; - } + if (p->cf->capabilities) + { + /* Prepare local_caps and write capabilities to buffer */ + byte *end = bgp_write_capabilities(conn, buf+12); + uint len = end - (buf+12); + + buf[9] = len + 2; /* Optional parameters length */ + buf[10] = 2; /* Option 2: Capability list */ + buf[11] = len; /* Option data length */ + + return end; + } + else + { + /* Prepare empty local_caps */ + conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps)); + + buf[9] = 0; /* No optional parameters */ + return buf + 10; + } + + return buf; +} + +static void +bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) +{ + struct bgp_proto *p = conn->bgp; + struct bgp_conn *other; + u32 asn, hold, id; - /* Skipped 3 B for length field and Capabilities parameter header */ - cap = buf + 12; + /* Check state */ + if (conn->state != BS_OPENSENT) + { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } -#ifndef IPV6 - if (p->cf->advertise_ipv4) - cap = bgp_put_cap_ipv4(p, cap); -#endif + /* Check message contents */ + if (len < 29 || len != 29 + (uint) pkt[28]) + { bgp_error(conn, 1, 2, pkt+16, 2); return; } -#ifdef IPV6 - cap = bgp_put_cap_ipv6(p, cap); -#endif + if (pkt[19] != BGP_VERSION) + { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; } + + asn = get_u16(pkt+20); + hold = get_u16(pkt+22); + id = get_u32(pkt+24); + BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id); - if (p->cf->enable_refresh) - cap = bgp_put_cap_rr(p, cap); + if (bgp_read_options(conn, pkt+29, pkt[28]) < 0) + return; - if (p->cf->gr_mode == BGP_GR_ABLE) - cap = bgp_put_cap_gr1(p, cap); - else if (p->cf->gr_mode == BGP_GR_AWARE) - cap = bgp_put_cap_gr2(p, cap); + if (hold > 0 && hold < 3) + { bgp_error(conn, 2, 6, pkt+22, 2); return; } - if (p->cf->enable_as4) - cap = bgp_put_cap_as4(p, cap); + /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */ + if (!id || (p->is_internal && id == p->local_id)) + { bgp_error(conn, 2, 3, pkt+24, -4); return; } - if (p->cf->add_path) - cap = bgp_put_cap_add_path(p, cap); + struct bgp_caps *caps = conn->remote_caps; - if (p->cf->enable_refresh) - cap = bgp_put_cap_err(p, cap); + if (caps->as4_support) + { + u32 as4 = caps->as4_number; - if (p->cf->enable_extended_messages) - cap = bgp_put_cap_ext_msg(p, cap); + if ((as4 != asn) && (asn != AS_TRANS)) + log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name); - cap_len = cap - buf - 12; - if (cap_len > 0) - { - buf[9] = cap_len + 2; /* Optional params len */ - buf[10] = 2; /* Option: Capability list */ - buf[11] = cap_len; /* Option length */ - return cap; - } + if (as4 != p->remote_as) + { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; } + } else + { + if (asn != p->remote_as) + { bgp_error(conn, 2, 2, pkt+20, 2); return; } + } + + /* Check the other connection */ + other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn; + switch (other->state) + { + case BS_CONNECT: + case BS_ACTIVE: + /* Stop outgoing connection attempts */ + bgp_conn_enter_idle_state(other); + break; + + case BS_IDLE: + case BS_OPENSENT: + case BS_CLOSE: + break; + + case BS_OPENCONFIRM: + /* + * Description of collision detection rules in RFC 4271 is confusing and + * contradictory, but it is essentially: + * + * 1. Router with higher ID is dominant + * 2. If both have the same ID, router with higher ASN is dominant [RFC6286] + * 3. When both connections are in OpenConfirm state, one initiated by + * the dominant router is kept. + * + * The first line in the expression below evaluates whether the neighbor + * is dominant, the second line whether the new connection was initiated + * by the neighbor. If both are true (or both are false), we keep the new + * connection, otherwise we keep the old one. + */ + if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as))) + == (conn == &p->incoming_conn)) { - buf[9] = 0; /* No optional parameters */ - return buf + 10; + /* Should close the other connection */ + BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection"); + bgp_error(other, 6, 7, NULL, 0); + break; } + /* Fall thru */ + case BS_ESTABLISHED: + /* Should close this connection */ + BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection"); + bgp_error(conn, 6, 7, NULL, 0); + return; + + default: + bug("bgp_rx_open: Unknown state"); + } + + /* Update our local variables */ + conn->hold_time = MIN(hold, p->cf->hold_time); + conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3; + conn->as4_session = conn->local_caps->as4_support && caps->as4_support; + conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages; + p->remote_id = id; + + DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", + conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session); + + bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE); + bgp_start_timer(conn->hold_timer, conn->hold_time); + bgp_conn_enter_openconfirm_state(conn); } -static uint -bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, uint remains) + +/* + * Next hop handling + */ + +#define REPORT(msg, args...) \ + ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); }) + +#define DISCARD(msg, args...) \ + ({ REPORT(msg, ## args); return; }) + +#define WITHDRAW(msg, args...) \ + ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) + +#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE" +#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute" +#define NO_NEXT_HOP "Missing NEXT_HOP attribute" +#define NO_LABEL_STACK "Missing MPLS stack" + + +static void +bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) { - byte *start = w; - ip_addr a; - int bytes; + struct bgp_proto *p = s->proto; + struct bgp_channel *c = s->channel; - while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr)))) - { - struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes)); - DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen); + if (c->cf->gw_mode == GW_DIRECT) + { + neighbor *nbr = NULL; - if (p->add_path_tx) - { - put_u32(w, px->path_id); - w += 4; - remains -= 4; - } + /* GW_DIRECT -> single_hop -> p->neigh != NULL */ + if (ipa_nonzero(gw)) + nbr = neigh_find2(&p->p, &gw, NULL, 0); + else if (ipa_nonzero(ll)) + nbr = neigh_find2(&p->p, &ll, p->neigh->iface, 0); - *w++ = px->n.pxlen; - bytes = (px->n.pxlen + 7) / 8; - a = px->n.prefix; - ipa_hton(a); - memcpy(w, &a, bytes); - w += bytes; - remains -= bytes + 1; - rem_node(&px->bucket_node); - bgp_free_prefix(p, px); - // fib_delete(&p->prefix_fib, px); - } - return w - start; + if (!nbr || (nbr->scope == SCOPE_HOST)) + WITHDRAW(BAD_NEXT_HOP); + + a->dest = RTD_UNICAST; + a->nh.gw = nbr->addr; + a->nh.iface = nbr->iface; + } + else /* GW_RECURSIVE */ + { + if (ipa_zero(gw)) + WITHDRAW(BAD_NEXT_HOP); + + rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6; + s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table); + + if (!s->mpls) + rta_apply_hostentry(a, s->hostentry, NULL); + + /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */ + } } static void -bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck) +bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum) { - while (!EMPTY_LIST(buck->prefixes)) - { - struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes)); - log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen); - rem_node(&px->bucket_node); - bgp_free_prefix(p, px); - // fib_delete(&p->prefix_fib, px); - } + if (lnum > MPLS_MAX_LABEL_STACK) + { + REPORT("Too many MPLS labels ($u)", lnum); + + a->dest = RTD_UNREACHABLE; + a->hostentry = NULL; + a->nh = (struct nexthop) { }; + return; + } + + /* Handle implicit NULL as empty MPLS stack */ + if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL)) + lnum = 0; + + if (s->channel->cf->gw_mode == GW_DIRECT) + { + a->nh.labels = lnum; + memcpy(a->nh.label, labels, 4*lnum); + } + else /* GW_RECURSIVE */ + { + mpls_label_stack ms; + + ms.len = lnum; + memcpy(ms.stack, labels, 4*lnum); + rta_apply_hostentry(a, s->hostentry, &ms); + } } -#ifndef IPV6 /* IPv4 version */ -static byte * -bgp_create_update(struct bgp_conn *conn, byte *buf) +static inline int +bgp_use_next_hop(struct bgp_export_state *s, eattr *a) { - struct bgp_proto *p = conn->bgp; - struct bgp_bucket *buck; - int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4; - byte *w; - int wd_size = 0; - int r_size = 0; - int a_size = 0; - - w = buf+2; - if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) - { - DBG("Withdrawn routes:\n"); - wd_size = bgp_encode_prefixes(p, w, buck, remains); - w += wd_size; - remains -= wd_size; - } - put_u16(buf, wd_size); + struct bgp_proto *p = s->proto; + ip_addr *nh = (void *) a->u.ptr->data; - if (!wd_size) - { - while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next) - { - if (EMPTY_LIST(buck->prefixes)) - { - DBG("Deleting empty bucket %p\n", buck); - rem_node(&buck->send_node); - bgp_free_bucket(p, buck); - continue; - } - - DBG("Processing bucket %p\n", buck); - a_size = bgp_encode_attrs(p, w+2, buck->eattrs, remains - 1024); - - if (a_size < 0) - { - log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name); - bgp_flush_prefixes(p, buck); - rem_node(&buck->send_node); - bgp_free_bucket(p, buck); - continue; - } - - put_u16(w, a_size); - w += a_size + 2; - r_size = bgp_encode_prefixes(p, w, buck, remains - a_size); - w += r_size; - break; - } - } - if (!a_size) /* Attributes not already encoded */ + if (s->channel->cf->next_hop_self) + return 0; + + if (s->channel->cf->next_hop_keep) + return 1; + + /* Keep it when explicitly set in export filter */ + if (a->type & EAF_FRESH) + return 1; + + /* Keep it when exported to internal peers */ + if (p->is_interior && ipa_nonzero(*nh)) + return 1; + + /* Keep it when forwarded between single-hop BGPs on the same iface */ + struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL; + return p->neigh && (p->neigh->iface == ifa); +} + +static inline int +bgp_use_gateway(struct bgp_export_state *s) +{ + struct bgp_proto *p = s->proto; + rta *ra = s->route->attrs; + + if (s->channel->cf->next_hop_self) + return 0; + + /* We need one valid global gateway */ + if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw)) + return 0; + + /* Use it when exported to internal peers */ + if (p->is_interior) + return 1; + + /* Use it when forwarded to single-hop BGP peer on on the same iface */ + return p->neigh && (p->neigh->iface == ra->nh.iface); +} + +static void +bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) +{ + if (!a || !bgp_use_next_hop(s, a)) + { + if (bgp_use_gateway(s)) { - put_u16(w, 0); - w += 2; + rta *ra = s->route->attrs; + ip_addr nh[1] = { ra->nh.gw }; + bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16); + + if (s->mpls) + { + u32 implicit_null = BGP_MPLS_NULL; + u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null; + uint lnum = ra->nh.labels ? ra->nh.labels : 1; + bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); + } } - if (wd_size || r_size) + else { - BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE"); - return w; + ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr }; + bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); + + /* TODO: Use local MPLS assigned label */ + if (s->mpls) + bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK); } + } + + /* Check if next hop is valid */ + a = bgp_find_attr(*to, BA_NEXT_HOP); + if (!a) + WITHDRAW(NO_NEXT_HOP); + + ip_addr *nh = (void *) a->u.ptr->data; + ip_addr peer = s->proto->cf->remote_ip; + uint len = a->u.ptr->length; + + /* Forbid zero next hop */ + if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1]))) + WITHDRAW(BAD_NEXT_HOP); + + /* Forbid next hop equal to neighbor IP */ + if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1]))) + WITHDRAW(BAD_NEXT_HOP); + + /* Forbid next hop with non-matching AF */ + if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) && + !s->channel->ext_next_hop) + WITHDRAW(BAD_NEXT_HOP); + + /* Just check if MPLS stack */ + if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK)) + WITHDRAW(NO_LABEL_STACK); +} + +static uint +bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED) +{ + /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */ + ip_addr *nh = (void *) a->u.ptr->data; + uint len = a->u.ptr->length; + + ASSERT((len == 16) || (len == 32)); + + /* + * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This + * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference + * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped + * IPv6 address with IPv6 NLRI. + */ + + if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0])) + { + put_ip4(buf, ipa_to_ip4(nh[0])); + return 4; + } + + put_ip6(buf, ipa_to_ip6(nh[0])); + + if (len == 32) + put_ip6(buf+16, ipa_to_ip6(nh[1])); + + return len; +} + +static void +bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a) +{ + struct bgp_channel *c = s->channel; + struct adata *ad = lp_alloc_adata(s->pool, 32); + ip_addr *nh = (void *) ad->data; + + if (len == 4) + { + nh[0] = ipa_from_ip4(get_ip4(data)); + nh[1] = IPA_NONE; + } + else if (len == 16) + { + nh[0] = ipa_from_ip6(get_ip6(data)); + nh[1] = IPA_NONE; + + if (ipa_is_link_local(nh[0])) + { nh[1] = nh[0]; nh[0] = IPA_NONE; } + } + else if (len == 32) + { + nh[0] = ipa_from_ip6(get_ip6(data)); + nh[1] = ipa_from_ip6(get_ip6(data+16)); + + if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1])) + nh[1] = IPA_NONE; + } else - return NULL; + bgp_parse_error(s, 9); + + if (ipa_zero(nh[1])) + ad->length = 16; + + if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) + WITHDRAW(BAD_NEXT_HOP); + + // XXXX validate next hop + + bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); + bgp_apply_next_hop(s, a, nh[0], nh[1]); } -static byte * -bgp_create_end_mark(struct bgp_conn *conn, byte *buf) +static uint +bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED) { - struct bgp_proto *p = conn->bgp; - BGP_TRACE(D_PACKETS, "Sending END-OF-RIB"); + ip_addr *nh = (void *) a->u.ptr->data; + uint len = a->u.ptr->length; - put_u32(buf, 0); - return buf+4; + ASSERT((len == 16) || (len == 32)); + + /* + * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This + * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference + * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped + * IPv6 address with VPNv6 NLRI. + */ + + if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0])) + { + put_u64(buf, 0); /* VPN RD is 0 */ + put_ip4(buf+8, ipa_to_ip4(nh[0])); + return 12; + } + + put_u64(buf, 0); /* VPN RD is 0 */ + put_ip6(buf+8, ipa_to_ip6(nh[0])); + + if (len == 16) + return 24; + + put_u64(buf+24, 0); /* VPN RD is 0 */ + put_ip6(buf+32, ipa_to_ip6(nh[1])); + + return 48; } -#else /* IPv6 version */ +static void +bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a) +{ + struct bgp_channel *c = s->channel; + struct adata *ad = lp_alloc_adata(s->pool, 32); + ip_addr *nh = (void *) ad->data; -static inline int -same_iface(struct bgp_proto *p, ip_addr *ip) + if (len == 12) + { + nh[0] = ipa_from_ip4(get_ip4(data+8)); + nh[1] = IPA_NONE; + } + else if (len == 24) + { + nh[0] = ipa_from_ip6(get_ip6(data+8)); + nh[1] = IPA_NONE; + + if (ipa_is_link_local(nh[0])) + { nh[1] = nh[0]; nh[0] = IPA_NONE; } + } + else if (len == 48) + { + nh[0] = ipa_from_ip6(get_ip6(data+8)); + nh[1] = ipa_from_ip6(get_ip6(data+32)); + + if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1])) + nh[1] = IPA_NONE; + } + else + bgp_parse_error(s, 9); + + if (ipa_zero(nh[1])) + ad->length = 16; + + /* XXXX which error */ + if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0))) + bgp_parse_error(s, 9); + + if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) + WITHDRAW(BAD_NEXT_HOP); + + // XXXX validate next hop + + bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); + bgp_apply_next_hop(s, a, nh[0], nh[1]); +} + + + +static uint +bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED) { - neighbor *n = neigh_find(&p->p, ip, 0); - return n && p->neigh && n->iface == p->neigh->iface; + return 0; } -static byte * -bgp_create_update(struct bgp_conn *conn, byte *buf) +static void +bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED) { - struct bgp_proto *p = conn->bgp; - struct bgp_bucket *buck; - int size, second, rem_stored; - int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4; - byte *w, *w_stored, *tmp, *tstart; - ip_addr *ipp, ip, ip_ll; - ea_list *ea; - eattr *nh; + /* + * Although we expect no next hop and RFC 7606 7.11 states that attribute + * MP_REACH_NLRI with unexpected next hop length is considered malformed, + * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt. + */ + + return; +} + +static void +bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) +{ + /* NEXT_HOP shall not pass */ + if (a) + bgp_unset_attr(to, s->pool, BA_NEXT_HOP); +} + + +/* + * UPDATE + */ + +static void +bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) +{ + if (path_id != s->last_id) + { + s->last_src = rt_get_source(&s->proto->p, path_id); + s->last_id = path_id; + + rta_free(s->cached_rta); + s->cached_rta = NULL; + } + + if (!a0) + { + /* Route withdraw */ + rte_update2(&s->channel->c, n, NULL, s->last_src); + return; + } + + /* Prepare cached route attributes */ + if (s->cached_rta == NULL) + { + a0->src = s->last_src; + + /* Workaround for rta_lookup() breaking eattrs */ + ea_list *ea = a0->eattrs; + s->cached_rta = rta_lookup(a0); + a0->eattrs = ea; + } + + rta *a = rta_clone(s->cached_rta); + rte *e = rte_get_temp(a); + + e->pflags = 0; + e->u.bgp.suppressed = 0; + rte_update2(&s->channel->c, n, e, s->last_src); +} + +static void +bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen) +{ + u32 dummy = 0; + u32 *labels = mpls ? (u32 *) mpls->data : &dummy; + uint lnum = mpls ? (mpls->length / 4) : 1; + + for (uint i = 0; i < lnum; i++) + { + put_u24(*pos, labels[i] << 4); + ADVANCE(*pos, *size, 3); + } + + /* Add bottom-of-stack flag */ + (*pos)[-1] |= BGP_MPLS_BOS; + + *pxlen += 24 * lnum; +} + +static void +bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a) +{ + u32 labels[BGP_MPLS_MAX], label; + uint lnum = 0; + + do { + if (*pxlen < 24) + bgp_parse_error(s, 1); + + label = get_u24(*pos); + labels[lnum++] = label >> 4; + ADVANCE(*pos, *len, 3); + *pxlen -= 24; + + /* Withdraw: Magic label stack value 0x800000 according to RFC 3107, section 3, last paragraph */ + if (!a && !s->err_withdraw && (lnum == 1) && (label == BGP_MPLS_MAGIC)) + break; + } + while (!(label & BGP_MPLS_BOS)); + + if (!a) + return; - put_u16(buf, 0); - w = buf+4; + /* Attach MPLS attribute unless we already have one */ + if (!s->mpls_labels) + { + s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX); + bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels); + } + + /* Overwrite data in the attribute */ + s->mpls_labels->length = 4*lnum; + memcpy(s->mpls_labels->data, labels, 4*lnum); + + /* Update next hop entry in rta */ + bgp_apply_mpls_labels(s, a, labels, lnum); + + /* Attributes were changed, invalidate cached entry */ + rta_free(s->cached_rta); + s->cached_rta = NULL; + + return; +} + +static uint +bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; - if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_ip4 *net = (void *) px->net; + + /* Encode path ID */ + if (s->add_path) { - DBG("Withdrawn routes:\n"); - tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8); - *tmp++ = 0; - *tmp++ = BGP_AF_IPV6; - *tmp++ = 1; - ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11); - size = bgp_encode_attrs(p, w, ea, remains); - ASSERT(size >= 0); - w += size; - remains -= size; + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); } - else + + /* Encode prefix length */ + *pos = net->pxlen; + ADVANCE(pos, size, 1); + + /* Encode MPLS labels */ + if (s->mpls) + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + + /* Encode prefix body */ + ip4_addr a = ip4_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; + memcpy(pos, &a, b); + ADVANCE(pos, size, b); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + while (len) + { + net_addr_ip4 net; + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) { - while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next) - { - if (EMPTY_LIST(buck->prefixes)) - { - DBG("Deleting empty bucket %p\n", buck); - rem_node(&buck->send_node); - bgp_free_bucket(p, buck); - continue; - } - - DBG("Processing bucket %p\n", buck); - rem_stored = remains; - w_stored = w; - - size = bgp_encode_attrs(p, w, buck->eattrs, remains - 1024); - if (size < 0) - { - log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name); - bgp_flush_prefixes(p, buck); - rem_node(&buck->send_node); - bgp_free_bucket(p, buck); - continue; - } - w += size; - remains -= size; - - /* We have two addresses here in NEXT_HOP eattr. Really. - Unless NEXT_HOP was modified by filter */ - nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); - ASSERT(nh); - second = (nh->u.ptr->length == NEXT_HOP_LENGTH); - ipp = (ip_addr *) nh->u.ptr->data; - ip = ipp[0]; - ip_ll = IPA_NONE; - - if (ipa_equal(ip, p->source_addr)) - ip_ll = p->local_link; - else - { - /* If we send a route with 'third party' next hop destinated - * in the same interface, we should also send a link local - * next hop address. We use the received one (stored in the - * other part of BA_NEXT_HOP eattr). If we didn't received - * it (for example it is a static route), we can't use - * 'third party' next hop and we have to use local IP address - * as next hop. Sending original next hop address without - * link local address seems to be a natural way to solve that - * problem, but it is contrary to RFC 2545 and Quagga does not - * accept such routes. - * - * There are two cases, either we have global IP, or - * IPA_NONE if the neighbor is link-local. For IPA_NONE, - * we suppose it is on the same iface, see bgp_update_attrs(). - */ - - if (ipa_zero(ip) || same_iface(p, &ip)) - { - if (second && ipa_nonzero(ipp[1])) - ip_ll = ipp[1]; - else - { - switch (p->cf->missing_lladdr) - { - case MLL_SELF: - ip = p->source_addr; - ip_ll = p->local_link; - break; - case MLL_DROP: - log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name); - w = w_stored; - remains = rem_stored; - bgp_flush_prefixes(p, buck); - rem_node(&buck->send_node); - bgp_free_bucket(p, buck); - continue; - case MLL_IGNORE: - break; - } - } - } - } - - tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8); - *tmp++ = 0; - *tmp++ = BGP_AF_IPV6; - *tmp++ = 1; - - if (ipa_is_link_local(ip)) - ip = IPA_NONE; - - if (ipa_nonzero(ip_ll)) - { - *tmp++ = 32; - ipa_hton(ip); - memcpy(tmp, &ip, 16); - ipa_hton(ip_ll); - memcpy(tmp+16, &ip_ll, 16); - tmp += 32; - } - else - { - *tmp++ = 16; - ipa_hton(ip); - memcpy(tmp, &ip, 16); - tmp += 16; - } - - *tmp++ = 0; /* No SNPA information */ - tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1)); - ea->attrs[0].u.ptr->length = tmp - tstart; - size = bgp_encode_attrs(p, w, ea, remains); - ASSERT(size >= 0); - w += size; - break; - } + if (len < 5) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); } - size = w - (buf+4); - put_u16(buf+2, size); - lp_flush(bgp_linpool); - if (size) + /* Decode prefix length */ + uint l = *pos; + ADVANCE(pos, len, 1); + + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + + if (l > IP4_MAX_PREFIX_LENGTH) + bgp_parse_error(s, 10); + + /* Decode prefix body */ + ip4_addr addr = IP4_NONE; + uint b = (l + 7) / 8; + memcpy(&addr, pos, b); + ADVANCE(pos, len, b); + + net = NET_ADDR_IP4(ip4_ntoh(addr), l); + net_normalize_ip4(&net); + + // XXXX validate prefix + + bgp_rte_update(s, (net_addr *) &net, path_id, a); + } +} + + +static uint +bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_ip6 *net = (void *) px->net; + + /* Encode path ID */ + if (s->add_path) { - BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE"); - return w; + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); } - else - return NULL; + + /* Encode prefix length */ + *pos = net->pxlen; + ADVANCE(pos, size, 1); + + /* Encode MPLS labels */ + if (s->mpls) + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + + /* Encode prefix body */ + ip6_addr a = ip6_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; + memcpy(pos, &a, b); + ADVANCE(pos, size, b); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; } -static byte * -bgp_create_end_mark(struct bgp_conn *conn, byte *buf) +static void +bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) { - struct bgp_proto *p = conn->bgp; - BGP_TRACE(D_PACKETS, "Sending END-OF-RIB"); + while (len) + { + net_addr_ip6 net; + u32 path_id = 0; - put_u16(buf+0, 0); - put_u16(buf+2, 6); /* length 4-9 */ - buf += 4; + /* Decode path ID */ + if (s->add_path) + { + if (len < 5) + bgp_parse_error(s, 1); - /* Empty MP_UNREACH_NLRI atribute */ - *buf++ = BAF_OPTIONAL; - *buf++ = BA_MP_UNREACH_NLRI; - *buf++ = 3; /* Length 7-9 */ - *buf++ = 0; /* AFI */ - *buf++ = BGP_AF_IPV6; - *buf++ = 1; /* SAFI */ - return buf; -} + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } -#endif + /* Decode prefix length */ + uint l = *pos; + ADVANCE(pos, len, 1); -static inline byte * -bgp_create_route_refresh(struct bgp_conn *conn, byte *buf) -{ - struct bgp_proto *p = conn->bgp; - BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH"); + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); - /* Original original route refresh request, RFC 2918 */ - *buf++ = 0; - *buf++ = BGP_AF; - *buf++ = BGP_RR_REQUEST; - *buf++ = 1; /* SAFI */ - return buf; + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + + if (l > IP6_MAX_PREFIX_LENGTH) + bgp_parse_error(s, 10); + + /* Decode prefix body */ + ip6_addr addr = IP6_NONE; + uint b = (l + 7) / 8; + memcpy(&addr, pos, b); + ADVANCE(pos, len, b); + + net = NET_ADDR_IP6(ip6_ntoh(addr), l); + net_normalize_ip6(&net); + + // XXXX validate prefix + + bgp_rte_update(s, (net_addr *) &net, path_id, a); + } } -static inline byte * -bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf) +static uint +bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) { - struct bgp_proto *p = conn->bgp; - BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR"); + byte *pos = buf; - /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */ - *buf++ = 0; - *buf++ = BGP_AF; - *buf++ = BGP_RR_BEGIN; - *buf++ = 1; /* SAFI */ - return buf; + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_vpn4 *net = (void *) px->net; + + /* Encode path ID */ + if (s->add_path) + { + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); + } + + /* Encode prefix length */ + *pos = 64 + net->pxlen; + ADVANCE(pos, size, 1); + + /* Encode MPLS labels */ + if (s->mpls) + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode prefix body */ + ip4_addr a = ip4_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; + memcpy(pos, &a, b); + ADVANCE(pos, size, b); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; } -static inline byte * -bgp_create_end_refresh(struct bgp_conn *conn, byte *buf) +static void +bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) { - struct bgp_proto *p = conn->bgp; - BGP_TRACE(D_PACKETS, "Sending END-OF-RR"); + while (len) + { + net_addr_vpn4 net; + u32 path_id = 0; - /* Demarcation of ending of route refresh (EoRR), RFC 7313 */ - *buf++ = 0; - *buf++ = BGP_AF; - *buf++ = BGP_RR_END; - *buf++ = 1; /* SAFI */ - return buf; + /* Decode path ID */ + if (s->add_path) + { + if (len < 5) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } + + /* Decode prefix length */ + uint l = *pos; + ADVANCE(pos, len, 1); + + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + + /* Decode route distinguisher */ + if (l < 64) + bgp_parse_error(s, 1); + + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + l -= 64; + + if (l > IP4_MAX_PREFIX_LENGTH) + bgp_parse_error(s, 10); + + /* Decode prefix body */ + ip4_addr addr = IP4_NONE; + uint b = (l + 7) / 8; + memcpy(&addr, pos, b); + ADVANCE(pos, len, b); + + net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd); + net_normalize_vpn4(&net); + + // XXXX validate prefix + + bgp_rte_update(s, (net_addr *) &net, path_id, a); + } } +static uint +bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_vpn6 *net = (void *) px->net; + + /* Encode path ID */ + if (s->add_path) + { + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); + } + + /* Encode prefix length */ + *pos = 64 + net->pxlen; + ADVANCE(pos, size, 1); + + /* Encode MPLS labels */ + if (s->mpls) + bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1); + + /* Encode route distinguisher */ + put_u64(pos, net->rd); + ADVANCE(pos, size, 8); + + /* Encode prefix body */ + ip6_addr a = ip6_hton(net->prefix); + uint b = (net->pxlen + 7) / 8; + memcpy(pos, &a, b); + ADVANCE(pos, size, b); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + static void -bgp_create_header(byte *buf, uint len, uint type) +bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) { - memset(buf, 0xff, 16); /* Marker */ - put_u16(buf+16, len); - buf[18] = type; + while (len) + { + net_addr_vpn6 net; + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) + { + if (len < 5) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); + } + + /* Decode prefix length */ + uint l = *pos; + ADVANCE(pos, len, 1); + + if (len < ((l + 7) / 8)) + bgp_parse_error(s, 1); + + /* Decode MPLS labels */ + if (s->mpls) + bgp_decode_mpls_labels(s, &pos, &len, &l, a); + + /* Decode route distinguisher */ + if (l < 64) + bgp_parse_error(s, 1); + + u64 rd = get_u64(pos); + ADVANCE(pos, len, 8); + l -= 64; + + if (l > IP6_MAX_PREFIX_LENGTH) + bgp_parse_error(s, 10); + + /* Decode prefix body */ + ip6_addr addr = IP6_NONE; + uint b = (l + 7) / 8; + memcpy(&addr, pos, b); + ADVANCE(pos, len, b); + + net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd); + net_normalize_vpn6(&net); + + // XXXX validate prefix + + bgp_rte_update(s, (net_addr *) &net, path_id, a); + } } -/** - * bgp_fire_tx - transmit packets - * @conn: connection - * - * Whenever the transmit buffers of the underlying TCP connection - * are free and we have any packets queued for sending, the socket functions - * call bgp_fire_tx() which takes care of selecting the highest priority packet - * queued (Notification > Keepalive > Open > Update), assembling its header - * and body and sending it to the connection. - */ -static int -bgp_fire_tx(struct bgp_conn *conn) + +static uint +bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) { - struct bgp_proto *p = conn->bgp; - uint s = conn->packets_to_send; - sock *sk = conn->sk; - byte *buf, *pkt, *end; - int type; + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= 4)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_flow4 *net = (void *) px->net; + uint flen = net->length - sizeof(net_addr_flow4); - if (!sk) + /* Encode path ID */ + if (s->add_path) { - conn->packets_to_send = 0; - return 0; + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); } - buf = sk->tbuf; - pkt = buf + BGP_HEADER_LENGTH; - if (s & (1 << PKT_SCHEDULE_CLOSE)) + if (flen > size) + break; + + /* Copy whole flow data including length */ + memcpy(pos, net->data, flen); + ADVANCE(pos, size, flen); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + while (len) + { + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) { - /* We can finally close connection and enter idle state */ - bgp_conn_enter_idle_state(conn); - return 0; + if (len < 4) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); } - if (s & (1 << PKT_NOTIFICATION)) + + if (len < 2) + bgp_parse_error(s, 1); + + /* Decode flow length */ + uint hlen = flow_hdr_length(pos); + uint dlen = flow_read_length(pos); + uint flen = hlen + dlen; + byte *data = pos + hlen; + + if (len < flen) + bgp_parse_error(s, 1); + + /* Validate flow data */ + enum flow_validated_state r = flow4_validate(data, dlen); + if (r != FLOW_ST_VALID) { - s = 1 << PKT_SCHEDULE_CLOSE; - type = PKT_NOTIFICATION; - end = bgp_create_notification(conn, pkt); + log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r)); + bgp_parse_error(s, 1); } - else if (s & (1 << PKT_KEEPALIVE)) + + if (data[0] != FLOW_TYPE_DST_PREFIX) { - s &= ~(1 << PKT_KEEPALIVE); - type = PKT_KEEPALIVE; - end = pkt; /* Keepalives carry no data */ - BGP_TRACE(D_PACKETS, "Sending KEEPALIVE"); - bgp_start_timer(conn->keepalive_timer, conn->keepalive_time); + log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name); + bgp_parse_error(s, 1); } - else if (s & (1 << PKT_OPEN)) + + /* Decode dst prefix */ + ip4_addr px = IP4_NONE; + uint pxlen = data[1]; + + // FIXME: Use some generic function + memcpy(&px, data+2, BYTES(pxlen)); + px = ip4_and(ip4_ntoh(px), ip4_mkmask(pxlen)); + + /* Prepare the flow */ + net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen); + net_fill_flow4(n, px, pxlen, pos, flen); + ADVANCE(pos, len, flen); + + bgp_rte_update(s, n, path_id, a); + } +} + + +static uint +bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) +{ + byte *pos = buf; + + while (!EMPTY_LIST(buck->prefixes) && (size >= 4)) + { + struct bgp_prefix *px = HEAD(buck->prefixes); + struct net_addr_flow6 *net = (void *) px->net; + uint flen = net->length - sizeof(net_addr_flow6); + + /* Encode path ID */ + if (s->add_path) { - s &= ~(1 << PKT_OPEN); - type = PKT_OPEN; - end = bgp_create_open(conn, pkt); + put_u32(pos, px->path_id); + ADVANCE(pos, size, 4); } - else if (s & (1 << PKT_ROUTE_REFRESH)) + + if (flen > size) + break; + + /* Copy whole flow data including length */ + memcpy(pos, net->data, flen); + ADVANCE(pos, size, flen); + + bgp_free_prefix(s->channel, px); + } + + return pos - buf; +} + +static void +bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +{ + while (len) + { + u32 path_id = 0; + + /* Decode path ID */ + if (s->add_path) { - s &= ~(1 << PKT_ROUTE_REFRESH); - type = PKT_ROUTE_REFRESH; - end = bgp_create_route_refresh(conn, pkt); + if (len < 4) + bgp_parse_error(s, 1); + + path_id = get_u32(pos); + ADVANCE(pos, len, 4); } - else if (s & (1 << PKT_BEGIN_REFRESH)) + + if (len < 2) + bgp_parse_error(s, 1); + + /* Decode flow length */ + uint hlen = flow_hdr_length(pos); + uint dlen = flow_read_length(pos); + uint flen = hlen + dlen; + byte *data = pos + hlen; + + if (len < flen) + bgp_parse_error(s, 1); + + /* Validate flow data */ + enum flow_validated_state r = flow6_validate(data, dlen); + if (r != FLOW_ST_VALID) { - s &= ~(1 << PKT_BEGIN_REFRESH); - type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */ - end = bgp_create_begin_refresh(conn, pkt); + log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r)); + bgp_parse_error(s, 1); } - else if (s & (1 << PKT_UPDATE)) + + if (data[0] != FLOW_TYPE_DST_PREFIX) { - type = PKT_UPDATE; - end = bgp_create_update(conn, pkt); + log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name); + bgp_parse_error(s, 1); + } - if (!end) - { - /* No update to send, perhaps we need to send End-of-RIB or EoRR */ + /* Decode dst prefix */ + ip6_addr px = IP6_NONE; + uint pxlen = data[1]; - conn->packets_to_send = 0; + // FIXME: Use some generic function + memcpy(&px, data+2, BYTES(pxlen)); + px = ip6_and(ip6_ntoh(px), ip6_mkmask(pxlen)); - if (p->feed_state == BFS_LOADED) - { - type = PKT_UPDATE; - end = bgp_create_end_mark(conn, pkt); - } + /* Prepare the flow */ + net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen); + net_fill_flow6(n, px, pxlen, pos, flen); + ADVANCE(pos, len, flen); - else if (p->feed_state == BFS_REFRESHED) - { - type = PKT_ROUTE_REFRESH; - end = bgp_create_end_refresh(conn, pkt); - } + bgp_rte_update(s, n, path_id, a); + } +} - else /* Really nothing to send */ - return 0; - p->feed_state = BFS_NONE; - } - } - else - return 0; +static const struct bgp_af_desc bgp_af_table[] = { + { + .afi = BGP_AF_IPV4, + .net = NET_IP4, + .name = "ipv4", + .encode_nlri = bgp_encode_nlri_ip4, + .decode_nlri = bgp_decode_nlri_ip4, + .encode_next_hop = bgp_encode_next_hop_ip, + .decode_next_hop = bgp_decode_next_hop_ip, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_IPV4_MC, + .net = NET_IP4, + .name = "ipv4-mc", + .encode_nlri = bgp_encode_nlri_ip4, + .decode_nlri = bgp_decode_nlri_ip4, + .encode_next_hop = bgp_encode_next_hop_ip, + .decode_next_hop = bgp_decode_next_hop_ip, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_IPV4_MPLS, + .net = NET_IP4, + .mpls = 1, + .name = "ipv4-mpls", + .encode_nlri = bgp_encode_nlri_ip4, + .decode_nlri = bgp_decode_nlri_ip4, + .encode_next_hop = bgp_encode_next_hop_ip, + .decode_next_hop = bgp_decode_next_hop_ip, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_IPV6, + .net = NET_IP6, + .name = "ipv6", + .encode_nlri = bgp_encode_nlri_ip6, + .decode_nlri = bgp_decode_nlri_ip6, + .encode_next_hop = bgp_encode_next_hop_ip, + .decode_next_hop = bgp_decode_next_hop_ip, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_IPV6_MC, + .net = NET_IP6, + .name = "ipv6-mc", + .encode_nlri = bgp_encode_nlri_ip6, + .decode_nlri = bgp_decode_nlri_ip6, + .encode_next_hop = bgp_encode_next_hop_ip, + .decode_next_hop = bgp_decode_next_hop_ip, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_IPV6_MPLS, + .net = NET_IP6, + .mpls = 1, + .name = "ipv6-mpls", + .encode_nlri = bgp_encode_nlri_ip6, + .decode_nlri = bgp_decode_nlri_ip6, + .encode_next_hop = bgp_encode_next_hop_ip, + .decode_next_hop = bgp_decode_next_hop_ip, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_VPN4_MPLS, + .net = NET_VPN4, + .mpls = 1, + .name = "vpn4-mpls", + .encode_nlri = bgp_encode_nlri_vpn4, + .decode_nlri = bgp_decode_nlri_vpn4, + .encode_next_hop = bgp_encode_next_hop_vpn, + .decode_next_hop = bgp_decode_next_hop_vpn, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_VPN6_MPLS, + .net = NET_VPN6, + .mpls = 1, + .name = "vpn6-mpls", + .encode_nlri = bgp_encode_nlri_vpn6, + .decode_nlri = bgp_decode_nlri_vpn6, + .encode_next_hop = bgp_encode_next_hop_vpn, + .decode_next_hop = bgp_decode_next_hop_vpn, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_VPN4_MC, + .net = NET_VPN4, + .name = "vpn4-mc", + .encode_nlri = bgp_encode_nlri_vpn4, + .decode_nlri = bgp_decode_nlri_vpn4, + .encode_next_hop = bgp_encode_next_hop_vpn, + .decode_next_hop = bgp_decode_next_hop_vpn, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_VPN6_MC, + .net = NET_VPN6, + .name = "vpn6-mc", + .encode_nlri = bgp_encode_nlri_vpn6, + .decode_nlri = bgp_decode_nlri_vpn6, + .encode_next_hop = bgp_encode_next_hop_vpn, + .decode_next_hop = bgp_decode_next_hop_vpn, + .update_next_hop = bgp_update_next_hop_ip, + }, + { + .afi = BGP_AF_FLOW4, + .net = NET_FLOW4, + .no_igp = 1, + .name = "flow4", + .encode_nlri = bgp_encode_nlri_flow4, + .decode_nlri = bgp_decode_nlri_flow4, + .encode_next_hop = bgp_encode_next_hop_none, + .decode_next_hop = bgp_decode_next_hop_none, + .update_next_hop = bgp_update_next_hop_none, + }, + { + .afi = BGP_AF_FLOW6, + .net = NET_FLOW6, + .no_igp = 1, + .name = "flow6", + .encode_nlri = bgp_encode_nlri_flow6, + .decode_nlri = bgp_decode_nlri_flow6, + .encode_next_hop = bgp_encode_next_hop_none, + .decode_next_hop = bgp_decode_next_hop_none, + .update_next_hop = bgp_update_next_hop_none, + }, +}; - conn->packets_to_send = s; - bgp_create_header(buf, end - buf, type); - return sk_send(sk, end - buf); +const struct bgp_af_desc * +bgp_get_af_desc(u32 afi) +{ + uint i; + for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++) + if (bgp_af_table[i].afi == afi) + return &bgp_af_table[i]; + + return NULL; } -/** - * bgp_schedule_packet - schedule a packet for transmission - * @conn: connection - * @type: packet type - * - * Schedule a packet of type @type to be sent as soon as possible. - */ -void -bgp_schedule_packet(struct bgp_conn *conn, int type) +static inline uint +bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { - DBG("BGP: Scheduling packet type %d\n", type); - conn->packets_to_send |= 1 << type; - if (conn->sk && conn->sk->tpos == conn->sk->tbuf && !ev_active(conn->tx_ev)) - ev_schedule(conn->tx_ev); + return s->channel->desc->encode_nlri(s, buck, buf, end - buf); } -void -bgp_kick_tx(void *vconn) +static inline uint +bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf) { - struct bgp_conn *conn = vconn; - - DBG("BGP: kicking TX\n"); - while (bgp_fire_tx(conn) > 0) - ; + return s->channel->desc->encode_next_hop(s, nh, buf, 255); } void -bgp_tx(sock *sk) +bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to) { - struct bgp_conn *conn = sk->data; - - DBG("BGP: TX hook\n"); - while (bgp_fire_tx(conn) > 0) - ; + s->channel->desc->update_next_hop(s, a, to); } -/* Capatibility negotiation as per RFC 2842 */ +#define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024) -void -bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) +static byte * +bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { - // struct bgp_proto *p = conn->bgp; - int i, cl; + /* + * 2 B Withdrawn Routes Length (zero) + * --- IPv4 Withdrawn Routes NLRI (unused) + * 2 B Total Path Attribute Length + * var Path Attributes + * var IPv4 Network Layer Reachability Information + */ + + int lr, la; + + la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH); + if (la < 0) + { + /* Attribute list too long */ + bgp_withdraw_bucket(s->channel, buck); + return NULL; + } - while (len > 0) - { - if (len < 2 || len < 2 + opt[1]) - goto err; + put_u16(buf+0, 0); + put_u16(buf+2, la); - cl = opt[1]; + lr = bgp_encode_nlri(s, buck, buf+4+la, end); - switch (opt[0]) - { - case 2: /* Route refresh capability, RFC 2918 */ - if (cl != 0) - goto err; - conn->peer_refresh_support = 1; - break; + return buf+4+la+lr; +} - case 6: /* Extended message length capability, draft */ - if (cl != 0) - goto err; - conn->peer_ext_messages_support = 1; - break; +static byte * +bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) +{ + /* + * 2 B IPv4 Withdrawn Routes Length (zero) + * --- IPv4 Withdrawn Routes NLRI (unused) + * 2 B Total Path Attribute Length + * 1 B MP_REACH_NLRI hdr - Attribute Flags + * 1 B MP_REACH_NLRI hdr - Attribute Type Code + * 2 B MP_REACH_NLRI hdr - Length of Attribute Data + * 2 B MP_REACH_NLRI data - Address Family Identifier + * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier + * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address + * var MP_REACH_NLRI data - Network Address of Next Hop + * 1 B MP_REACH_NLRI data - Reserved (zero) + * var MP_REACH_NLRI data - Network Layer Reachability Information + * var Rest of Path Attributes + * --- IPv4 Network Layer Reachability Information (unused) + */ + + int lh, lr, la; /* Lengths of next hop, NLRI and attributes */ + + /* Begin of MP_REACH_NLRI atribute */ + buf[4] = BAF_OPTIONAL | BAF_EXT_LEN; + buf[5] = BA_MP_REACH_NLRI; + put_u16(buf+6, 0); /* Will be fixed later */ + put_af3(buf+8, s->channel->afi); + byte *pos = buf+11; + + /* Encode attributes to temporary buffer */ + byte *abuf = alloca(MAX_ATTRS_LENGTH); + la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH); + if (la < 0) + { + /* Attribute list too long */ + bgp_withdraw_bucket(s->channel, buck); + return NULL; + } - case 64: /* Graceful restart capability, RFC 4724 */ - if (cl % 4 != 2) - goto err; - conn->peer_gr_aware = 1; - conn->peer_gr_able = 0; - conn->peer_gr_time = get_u16(opt + 2) & 0x0fff; - conn->peer_gr_flags = opt[2] & 0xf0; - conn->peer_gr_aflags = 0; - for (i = 2; i < cl; i += 4) - if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */ - { - conn->peer_gr_able = 1; - conn->peer_gr_aflags = opt[2+i+3]; - } - break; + /* Encode the next hop */ + lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1); + *pos = lh; + pos += 1+lh; - case 65: /* AS4 capability, RFC 4893 */ - if (cl != 4) - goto err; - conn->peer_as4_support = 1; - if (conn->bgp->cf->enable_as4) - conn->advertised_as = get_u32(opt + 2); - break; + /* Reserved field */ + *pos++ = 0; - case 69: /* ADD-PATH capability, RFC 7911 */ - if (cl % 4) - goto err; - for (i = 0; i < cl; i += 4) - if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */ - conn->peer_add_path = opt[2+i+3]; - if (conn->peer_add_path > ADD_PATH_FULL) - goto err; - break; + /* Encode the NLRI */ + lr = bgp_encode_nlri(s, buck, pos, end - la); + pos += lr; - case 70: /* Enhanced route refresh capability, RFC 7313 */ - if (cl != 0) - goto err; - conn->peer_enhanced_refresh_support = 1; - break; + /* End of MP_REACH_NLRI atribute, update data length */ + put_u16(buf+6, pos-buf-8); - /* We can safely ignore all other capabilities */ - } - len -= 2 + cl; - opt += 2 + cl; - } - return; + /* Copy remaining attributes */ + memcpy(pos, abuf, la); + pos += la; - err: - bgp_error(conn, 2, 0, NULL, 0); - return; + /* Initial UPDATE fields */ + put_u16(buf+0, 0); + put_u16(buf+2, pos-buf-4); + + return pos; } -static int -bgp_parse_options(struct bgp_conn *conn, byte *opt, int len) +#undef MAX_ATTRS_LENGTH + +static byte * +bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { - struct bgp_proto *p = conn->bgp; - int ol; + /* + * 2 B Withdrawn Routes Length + * var IPv4 Withdrawn Routes NLRI + * 2 B Total Path Attribute Length (zero) + * --- Path Attributes (unused) + * --- IPv4 Network Layer Reachability Information (unused) + */ - while (len > 0) - { - if (len < 2 || len < 2 + opt[1]) - { bgp_error(conn, 2, 0, NULL, 0); return 0; } -#ifdef LOCAL_DEBUG - { - int i; - DBG("\tOption %02x:", opt[0]); - for(i=0; i<opt[1]; i++) - DBG(" %02x", opt[2+i]); - DBG("\n"); - } -#endif + uint len = bgp_encode_nlri(s, buck, buf+2, end); - ol = opt[1]; - switch (opt[0]) - { - case 2: - if (conn->start_state == BSS_CONNECT_NOCAP) - BGP_TRACE(D_PACKETS, "Ignoring received capabilities"); - else - bgp_parse_capabilities(conn, opt + 2, ol); - break; + put_u16(buf+0, len); + put_u16(buf+2+len, 0); - default: - /* - * BGP specs don't tell us to send which option - * we didn't recognize, but it's common practice - * to do so. Also, capability negotiation with - * Cisco routers doesn't work without that. - */ - bgp_error(conn, 2, 4, opt, ol); - return 0; - } - len -= 2 + ol; - opt += 2 + ol; - } - return 0; + return buf+4+len; } -static void -bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) +static byte * +bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { - struct bgp_conn *other; - struct bgp_proto *p = conn->bgp; - unsigned hold; - u16 base_as; - u32 id; + /* + * 2 B Withdrawn Routes Length (zero) + * --- IPv4 Withdrawn Routes NLRI (unused) + * 2 B Total Path Attribute Length + * 1 B MP_UNREACH_NLRI hdr - Attribute Flags + * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code + * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data + * 2 B MP_UNREACH_NLRI data - Address Family Identifier + * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier + * var MP_UNREACH_NLRI data - Network Layer Reachability Information + * --- IPv4 Network Layer Reachability Information (unused) + */ + + uint len = bgp_encode_nlri(s, buck, buf+11, end); - /* Check state */ - if (conn->state != BS_OPENSENT) - { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } + put_u16(buf+0, 0); + put_u16(buf+2, 7+len); - /* Check message contents */ - if (len < 29 || len != 29U + pkt[28]) - { bgp_error(conn, 1, 2, pkt+16, 2); return; } - if (pkt[19] != BGP_VERSION) - { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */ - conn->advertised_as = base_as = get_u16(pkt+20); - hold = get_u16(pkt+22); - id = get_u32(pkt+24); - BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id); + /* Begin of MP_UNREACH_NLRI atribute */ + buf[4] = BAF_OPTIONAL | BAF_EXT_LEN; + buf[5] = BA_MP_UNREACH_NLRI; + put_u16(buf+6, 3+len); + put_af3(buf+8, s->channel->afi); - if (bgp_parse_options(conn, pkt+29, pkt[28])) - return; + return buf+11+len; +} - if (hold > 0 && hold < 3) - { bgp_error(conn, 2, 6, pkt+22, 2); return; } +static byte * +bgp_create_update(struct bgp_channel *c, byte *buf) +{ + struct bgp_proto *p = (void *) c->c.proto; + struct bgp_bucket *buck; + byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH); + byte *res = NULL; + +again: ; + + /* Initialize write state */ + struct bgp_write_state s = { + .proto = p, + .channel = c, + .pool = bgp_linpool, + .as4_session = p->as4_session, + .add_path = c->add_path_tx, + .mpls = c->desc->mpls, + }; + + /* Try unreachable bucket */ + if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) + { + res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ? + bgp_create_ip_unreach(&s, buck, buf, end): + bgp_create_mp_unreach(&s, buck, buf, end); - /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */ - if (!id || (p->is_internal && id == p->local_id)) - { bgp_error(conn, 2, 3, pkt+24, -4); return; } + goto done; + } - if ((conn->advertised_as != base_as) && (base_as != AS_TRANS)) - log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name); + /* Try reachable buckets */ + if (!EMPTY_LIST(c->bucket_queue)) + { + buck = HEAD(c->bucket_queue); - if (conn->advertised_as != p->remote_as) + /* Cleanup empty buckets */ + if (EMPTY_LIST(buck->prefixes)) { - if (conn->peer_as4_support) - { - u32 val = htonl(conn->advertised_as); - bgp_error(conn, 2, 2, (byte *) &val, 4); - } - else - bgp_error(conn, 2, 2, pkt+20, 2); - - return; + bgp_free_bucket(c, buck); + goto again; } - /* Check the other connection */ - other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn; - switch (other->state) - { - case BS_CONNECT: - case BS_ACTIVE: - /* Stop outgoing connection attempts */ - bgp_conn_enter_idle_state(other); - break; + res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ? + bgp_create_ip_reach(&s, buck, buf, end): + bgp_create_mp_reach(&s, buck, buf, end); - case BS_IDLE: - case BS_OPENSENT: - case BS_CLOSE: - break; + if (EMPTY_LIST(buck->prefixes)) + bgp_free_bucket(c, buck); + else + bgp_defer_bucket(c, buck); - case BS_OPENCONFIRM: - /* - * Description of collision detection rules in RFC 4271 is confusing and - * contradictory, but it is essentially: - * - * 1. Router with higher ID is dominant - * 2. If both have the same ID, router with higher ASN is dominant [RFC6286] - * 3. When both connections are in OpenConfirm state, one initiated by - * the dominant router is kept. - * - * The first line in the expression below evaluates whether the neighbor - * is dominant, the second line whether the new connection was initiated - * by the neighbor. If both are true (or both are false), we keep the new - * connection, otherwise we keep the old one. - */ - if (((p->local_id < id) || ((p->local_id == id) && (p->local_as < p->remote_as))) - == (conn == &p->incoming_conn)) - { - /* Should close the other connection */ - BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection"); - bgp_error(other, 6, 7, NULL, 0); - break; - } - /* Fall thru */ - case BS_ESTABLISHED: - /* Should close this connection */ - BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection"); - bgp_error(conn, 6, 7, NULL, 0); - return; - default: - bug("bgp_rx_open: Unknown state"); - } + if (!res) + goto again; - /* Update our local variables */ - conn->hold_time = MIN(hold, p->cf->hold_time); - conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3; - p->remote_id = id; - p->as4_session = p->cf->enable_as4 && conn->peer_as4_support; - p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX); - p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX); - p->gr_ready = p->cf->gr_mode && conn->peer_gr_able; - p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support; - - /* Update RA mode */ - if (p->add_path_tx) - p->p.accept_ra_types = RA_ANY; - else if (p->cf->secondary) - p->p.accept_ra_types = RA_ACCEPTED; - else - p->p.accept_ra_types = RA_OPTIMAL; + goto done; + } - DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session); + /* No more prefixes to send */ + return NULL; - bgp_schedule_packet(conn, PKT_KEEPALIVE); - bgp_start_timer(conn->hold_timer, conn->hold_time); - bgp_conn_enter_openconfirm_state(conn); +done: + BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE"); + lp_flush(s.pool); + + return res; } +static byte * +bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf) +{ + /* Empty update packet */ + put_u32(buf, 0); + + return buf+4; +} -static inline void -bgp_rx_end_mark(struct bgp_proto *p) +static byte * +bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf) { - BGP_TRACE(D_PACKETS, "Got END-OF-RIB"); + put_u16(buf+0, 0); + put_u16(buf+2, 6); /* length 4--9 */ - if (p->load_state == BFS_LOADING) - p->load_state = BFS_NONE; + /* Empty MP_UNREACH_NLRI atribute */ + buf[4] = BAF_OPTIONAL; + buf[5] = BA_MP_UNREACH_NLRI; + buf[6] = 3; /* Length 7--9 */ + put_af3(buf+7, c->afi); - if (p->p.gr_recovery) - proto_graceful_restart_unlock(&p->p); - - if (p->gr_active) - bgp_graceful_restart_done(p); -} - - -#define DECODE_PREFIX(pp, ll) do { \ - if (p->add_path_rx) \ - { \ - if (ll < 5) { err=1; goto done; } \ - path_id = get_u32(pp); \ - pp += 4; \ - ll -= 4; \ - } \ - int b = *pp++; \ - int q; \ - ll--; \ - if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \ - q = (b+7) / 8; \ - if (ll < q) { err=1; goto done; } \ - memcpy(&prefix, pp, q); \ - pp += q; \ - ll -= q; \ - ipa_ntoh(prefix); \ - prefix = ipa_and(prefix, ipa_mkmask(b)); \ - pxlen = b; \ -} while (0) + return buf+10; +} +static byte * +bgp_create_end_mark(struct bgp_channel *c, byte *buf) +{ + struct bgp_proto *p = (void *) c->c.proto; + + BGP_TRACE(D_PACKETS, "Sending END-OF-RIB"); + + return (c->afi == BGP_AF_IPV4) ? + bgp_create_ip_end_mark(c, buf): + bgp_create_mp_end_mark(c, buf); +} static inline void -bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen, - u32 path_id, u32 *last_id, struct rte_src **src, - rta *a0, rta **a) +bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi) { - if (path_id != *last_id) - { - *src = rt_get_source(&p->p, path_id); - *last_id = path_id; + struct bgp_proto *p = s->proto; + struct bgp_channel *c = bgp_get_channel(p, afi); - if (*a) - { - rta_free(*a); - *a = NULL; - } - } + BGP_TRACE(D_PACKETS, "Got END-OF-RIB"); - /* Prepare cached route attributes */ - if (!*a) - { - a0->src = *src; + if (!c) + DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi)); - /* Workaround for rta_lookup() breaking eattrs */ - ea_list *ea = a0->eattrs; - *a = rta_lookup(a0); - a0->eattrs = ea; - } + if (c->load_state == BFS_LOADING) + c->load_state = BFS_NONE; - net *n = net_get(p->p.table, prefix, pxlen); - rte *e = rte_get_temp(rta_clone(*a)); - e->net = n; - e->pflags = 0; - e->u.bgp.suppressed = 0; - rte_update2(p->p.main_ahook, n, e, *src); + if (p->p.gr_recovery) + channel_graceful_restart_unlock(&c->c); + + if (c->gr_active) + bgp_graceful_restart_done(c); } static inline void -bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen, - u32 path_id, u32 *last_id, struct rte_src **src) +bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len) { - if (path_id != *last_id) - { - *src = rt_find_source(&p->p, path_id); - *last_id = path_id; - } + struct bgp_channel *c = bgp_get_channel(s->proto, afi); + rta *a = NULL; - net *n = net_find(p->p.table, prefix, pxlen); - rte_update2( p->p.main_ahook, n, NULL, *src); -} + if (!c) + DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi)); -static inline int -bgp_set_next_hop(struct bgp_proto *p, rta *a) -{ - struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); - ip_addr *nexthop = (ip_addr *) nh->u.ptr->data; + s->channel = c; + s->add_path = c->add_path_rx; + s->mpls = c->desc->mpls; -#ifdef IPV6 - int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]); + s->last_id = 0; + s->last_src = s->proto->p.main_source; - /* First address should not be link-local, but may be zero in direct mode */ - if (ipa_is_link_local(*nexthop)) - *nexthop = IPA_NONE; -#else - int second = 0; -#endif + /* + * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not + * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for + * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by + * decode_next_hop hooks) by restoring a->eattrs afterwards. + */ - if (p->cf->gw_mode == GW_DIRECT) - { - neighbor *ng = NULL; - - if (ipa_nonzero(*nexthop)) - ng = neigh_find(&p->p, nexthop, 0); - else if (second) /* GW_DIRECT -> single_hop -> p->neigh != NULL */ - ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0); + if (ea) + { + a = allocz(RTA_MAX_SIZE); - /* Fallback */ - if (!ng) - ng = p->neigh; + a->source = RTS_BGP; + a->scope = SCOPE_UNIVERSE; + a->from = s->proto->cf->remote_ip; + a->eattrs = ea; - if (ng->scope == SCOPE_HOST) - return 0; + c->desc->decode_next_hop(s, nh, nh_len, a); - a->dest = RTD_ROUTER; - a->gw = ng->addr; - a->iface = ng->iface; - a->hostentry = NULL; - a->igp_metric = 0; - } - else /* GW_RECURSIVE */ - { - if (ipa_zero(*nexthop)) - return 0; + /* Handle withdraw during next hop decoding */ + if (s->err_withdraw) + a = NULL; + } - rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second); - } + c->desc->decode_nlri(s, nlri, len, a); - return 1; + rta_free(s->cached_rta); + s->cached_rta = NULL; } -#ifndef IPV6 /* IPv4 version */ - static void -bgp_do_rx_update(struct bgp_conn *conn, - byte *withdrawn, int withdrawn_len, - byte *nlri, int nlri_len, - byte *attrs, int attr_len) +bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) { struct bgp_proto *p = conn->bgp; - struct rte_src *src = p->p.main_source; - rta *a0, *a = NULL; - ip_addr prefix; - int pxlen, err = 0; - u32 path_id = 0; - u32 last_id = 0; + ea_list *ea = NULL; - /* Check for End-of-RIB marker */ - if (!withdrawn_len && !attr_len && !nlri_len) - { - bgp_rx_end_mark(p); - return; - } + BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE"); - /* Withdraw routes */ - while (withdrawn_len) - { - DECODE_PREFIX(withdrawn, withdrawn_len); - DBG("Withdraw %I/%d\n", prefix, pxlen); + /* Workaround for some BGP implementations that skip initial KEEPALIVE */ + if (conn->state == BS_OPENCONFIRM) + bgp_conn_enter_established_state(conn); - bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src); - } + if (conn->state != BS_ESTABLISHED) + { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } - if (!attr_len && !nlri_len) /* shortcut */ - return; + bgp_start_timer(conn->hold_timer, conn->hold_time); - a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len); + /* Initialize parse state */ + struct bgp_parse_state s = { + .proto = p, + .pool = bgp_linpool, + .as4_session = p->as4_session, + }; - if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */ - return; + /* Parse error handler */ + if (setjmp(s.err_jmpbuf)) + { + bgp_error(conn, 3, s.err_subcode, NULL, 0); + goto done; + } + + /* Check minimal length */ + if (len < 23) + { bgp_error(conn, 1, 2, pkt+16, 2); return; } - if (a0 && nlri_len && !bgp_set_next_hop(p, a0)) - a0 = NULL; + /* Skip fixed header */ + uint pos = 19; - last_id = 0; - src = p->p.main_source; + /* + * UPDATE message format + * + * 2 B IPv4 Withdrawn Routes Length + * var IPv4 Withdrawn Routes NLRI + * 2 B Total Path Attribute Length + * var Path Attributes + * var IPv4 Reachable Routes NLRI + */ - while (nlri_len) - { - DECODE_PREFIX(nlri, nlri_len); - DBG("Add %I/%d\n", prefix, pxlen); + s.ip_unreach_len = get_u16(pkt + pos); + s.ip_unreach_nlri = pkt + pos + 2; + pos += 2 + s.ip_unreach_len; - if (a0) - bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a); - else /* Forced withdraw as a result of soft error */ - bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src); - } + if (pos + 2 > len) + bgp_parse_error(&s, 1); - done: - if (a) - rta_free(a); + s.attr_len = get_u16(pkt + pos); + s.attrs = pkt + pos + 2; + pos += 2 + s.attr_len; - if (err) - bgp_error(conn, 3, err, NULL, 0); + if (pos > len) + bgp_parse_error(&s, 1); + + s.ip_reach_len = len - pos; + s.ip_reach_nlri = pkt + pos; + + + if (s.attr_len) + ea = bgp_decode_attrs(&s, s.attrs, s.attr_len); + else + ea = NULL; + /* Check for End-of-RIB marker */ + if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len) + { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; } + + /* Check for MP End-of-RIB marker */ + if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len && + !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af) + { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; } + + if (s.ip_unreach_len) + bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0); + + if (s.mp_unreach_len) + bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0); + + if (s.ip_reach_len) + bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len, + ea, s.ip_next_hop_data, s.ip_next_hop_len); + + if (s.mp_reach_len) + bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len, + ea, s.mp_next_hop_data, s.mp_next_hop_len); + +done: + rta_free(s.cached_rta); + lp_flush(s.pool); return; } -#else /* IPv6 version */ -#define DO_NLRI(name) \ - x = p->name##_start; \ - len = len0 = p->name##_len; \ - if (len) \ - { \ - if (len < 3) { err=9; goto done; } \ - af = get_u16(x); \ - x += 3; \ - len -= 3; \ - DBG("\tNLRI AF=%d sub=%d len=%d\n", af, x[-1], len);\ - } \ - else \ - af = 0; \ - if (af == BGP_AF_IPV6) +/* + * ROUTE-REFRESH + */ -static void -bgp_attach_next_hop(rta *a0, byte *x) +static inline byte * +bgp_create_route_refresh(struct bgp_channel *c, byte *buf) { - ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH); - memcpy(nh, x+1, 16); - ipa_ntoh(nh[0]); + struct bgp_proto *p = (void *) c->c.proto; - /* We store received link local address in the other part of BA_NEXT_HOP eattr. */ - if (*x == 32) - { - memcpy(nh+1, x+17, 16); - ipa_ntoh(nh[1]); - } - else - nh[1] = IPA_NONE; + BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH"); + + /* Original route refresh request, RFC 2918 */ + put_af4(buf, c->afi); + buf[2] = BGP_RR_REQUEST; + + return buf+4; +} + +static inline byte * +bgp_create_begin_refresh(struct bgp_channel *c, byte *buf) +{ + struct bgp_proto *p = (void *) c->c.proto; + + BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR"); + + /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */ + put_af4(buf, c->afi); + buf[2] = BGP_RR_BEGIN; + + return buf+4; } +static inline byte * +bgp_create_end_refresh(struct bgp_channel *c, byte *buf) +{ + struct bgp_proto *p = (void *) c->c.proto; + + BGP_TRACE(D_PACKETS, "Sending END-OF-RR"); + + /* Demarcation of ending of route refresh (EoRR), RFC 7313 */ + put_af4(buf, c->afi); + buf[2] = BGP_RR_END; + + return buf+4; +} static void -bgp_do_rx_update(struct bgp_conn *conn, - byte *withdrawn UNUSED, int withdrawn_len, - byte *nlri UNUSED, int nlri_len, - byte *attrs, int attr_len) +bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) { struct bgp_proto *p = conn->bgp; - struct rte_src *src = p->p.main_source; - byte *x; - int len, len0; - unsigned af; - rta *a0, *a = NULL; - ip_addr prefix; - int pxlen, err = 0; - u32 path_id = 0; - u32 last_id = 0; - - p->mp_reach_len = 0; - p->mp_unreach_len = 0; - a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0); - - if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */ - return; - /* Check for End-of-RIB marker */ - if ((attr_len < 8) && !withdrawn_len && !nlri_len && !p->mp_reach_len && - (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6)) - { - bgp_rx_end_mark(p); - return; - } + if (conn->state != BS_ESTABLISHED) + { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } - DO_NLRI(mp_unreach) - { - while (len) - { - DECODE_PREFIX(x, len); - DBG("Withdraw %I/%d\n", prefix, pxlen); - bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src); - } - } + if (!conn->local_caps->route_refresh) + { bgp_error(conn, 1, 3, pkt+18, 1); return; } - DO_NLRI(mp_reach) - { - /* Create fake NEXT_HOP attribute */ - if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2) - { err = 9; goto done; } + if (len < (BGP_HEADER_LENGTH + 4)) + { bgp_error(conn, 1, 2, pkt+16, 2); return; } - if (a0) - bgp_attach_next_hop(a0, x); + if (len > (BGP_HEADER_LENGTH + 4)) + { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; } - /* Also ignore one reserved byte */ - len -= *x + 2; - x += *x + 2; + struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19)); + if (!c) + { + log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring", + p->p.name, pkt[21], get_u16(pkt+19), pkt[22]); + return; + } - if (a0 && ! bgp_set_next_hop(p, a0)) - a0 = NULL; + /* RFC 7313 redefined reserved field as RR message subtype */ + uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST; - last_id = 0; - src = p->p.main_source; + switch (subtype) + { + case BGP_RR_REQUEST: + BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); + channel_request_feeding(&c->c); + break; - while (len) - { - DECODE_PREFIX(x, len); - DBG("Add %I/%d\n", prefix, pxlen); + case BGP_RR_BEGIN: + BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR"); + bgp_refresh_begin(c); + break; - if (a0) - bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a); - else /* Forced withdraw as a result of soft error */ - bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src); - } - } + case BGP_RR_END: + BGP_TRACE(D_PACKETS, "Got END-OF-RR"); + bgp_refresh_end(c); + break; - done: - if (a) - rta_free(a); + default: + log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring", + p->p.name, subtype); + break; + } +} - if (err) /* Use subcode 9, not err */ - bgp_error(conn, 3, 9, NULL, 0); +static inline struct bgp_channel * +bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn) +{ + uint i = conn->last_channel; - return; + /* Try the last channel, but at most several times */ + if ((conn->channels_to_send & (1 << i)) && + (conn->last_channel_count < 16)) + goto found; + + /* Find channel with non-zero channels_to_send */ + do + { + i++; + if (i >= p->channel_count) + i = 0; + } + while (! (conn->channels_to_send & (1 << i))); + + /* Use that channel */ + conn->last_channel = i; + conn->last_channel_count = 0; + +found: + conn->last_channel_count++; + return p->channel_map[i]; } -#endif +static inline int +bgp_send(struct bgp_conn *conn, uint type, uint len) +{ + sock *sk = conn->sk; + byte *buf = sk->tbuf; -static void -bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) + memset(buf, 0xff, 16); /* Marker */ + put_u16(buf+16, len); + buf[18] = type; + + return sk_send(sk, len); +} + +/** + * bgp_fire_tx - transmit packets + * @conn: connection + * + * Whenever the transmit buffers of the underlying TCP connection + * are free and we have any packets queued for sending, the socket functions + * call bgp_fire_tx() which takes care of selecting the highest priority packet + * queued (Notification > Keepalive > Open > Update), assembling its header + * and body and sending it to the connection. + */ +static int +bgp_fire_tx(struct bgp_conn *conn) { struct bgp_proto *p = conn->bgp; - byte *withdrawn, *attrs, *nlri; - uint withdrawn_len, attr_len, nlri_len; + struct bgp_channel *c; + byte *buf, *pkt, *end; + uint s; - BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE"); + if (!conn->sk) + return 0; - /* Workaround for some BGP implementations that skip initial KEEPALIVE */ - if (conn->state == BS_OPENCONFIRM) - bgp_conn_enter_established_state(conn); + buf = conn->sk->tbuf; + pkt = buf + BGP_HEADER_LENGTH; + s = conn->packets_to_send; - if (conn->state != BS_ESTABLISHED) - { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } - bgp_start_timer(conn->hold_timer, conn->hold_time); + if (s & (1 << PKT_SCHEDULE_CLOSE)) + { + /* We can finally close connection and enter idle state */ + bgp_conn_enter_idle_state(conn); + return 0; + } + if (s & (1 << PKT_NOTIFICATION)) + { + conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE; + end = bgp_create_notification(conn, pkt); + return bgp_send(conn, PKT_NOTIFICATION, end - buf); + } + else if (s & (1 << PKT_KEEPALIVE)) + { + conn->packets_to_send &= ~(1 << PKT_KEEPALIVE); + BGP_TRACE(D_PACKETS, "Sending KEEPALIVE"); + bgp_start_timer(conn->keepalive_timer, conn->keepalive_time); + return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH); + } + else if (s & (1 << PKT_OPEN)) + { + conn->packets_to_send &= ~(1 << PKT_OPEN); + end = bgp_create_open(conn, pkt); + return bgp_send(conn, PKT_OPEN, end - buf); + } + else while (conn->channels_to_send) + { + c = bgp_get_channel_to_send(p, conn); + s = c->packets_to_send; - /* Find parts of the packet and check sizes */ - if (len < 23) + if (s & (1 << PKT_ROUTE_REFRESH)) { - bgp_error(conn, 1, 2, pkt+16, 2); - return; + c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH); + end = bgp_create_route_refresh(c, pkt); + return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf); } - withdrawn = pkt + 21; - withdrawn_len = get_u16(pkt + 19); - if (withdrawn_len + 23 > len) - goto malformed; - attrs = withdrawn + withdrawn_len + 2; - attr_len = get_u16(attrs - 2); - if (withdrawn_len + attr_len + 23 > len) - goto malformed; - nlri = attrs + attr_len; - nlri_len = len - withdrawn_len - attr_len - 23; - if (!attr_len && nlri_len) - goto malformed; - DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len); - - lp_flush(bgp_linpool); - - bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len); - return; + else if (s & (1 << PKT_BEGIN_REFRESH)) + { + /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */ + c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH); + end = bgp_create_begin_refresh(c, pkt); + return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf); + } + else if (s & (1 << PKT_UPDATE)) + { + end = bgp_create_update(c, pkt); + if (end) + return bgp_send(conn, PKT_UPDATE, end - buf); + + /* No update to send, perhaps we need to send End-of-RIB or EoRR */ + c->packets_to_send = 0; + conn->channels_to_send &= ~(1 << c->index); + + if (c->feed_state == BFS_LOADED) + { + c->feed_state = BFS_NONE; + end = bgp_create_end_mark(c, pkt); + return bgp_send(conn, PKT_UPDATE, end - buf); + } + + else if (c->feed_state == BFS_REFRESHED) + { + c->feed_state = BFS_NONE; + end = bgp_create_end_refresh(c, pkt); + return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf); + } + } + else if (s) + bug("Channel packets_to_send: %x", s); + + c->packets_to_send = 0; + conn->channels_to_send &= ~(1 << c->index); + } + + return 0; +} + +/** + * bgp_schedule_packet - schedule a packet for transmission + * @conn: connection + * @c: channel + * @type: packet type + * + * Schedule a packet of type @type to be sent as soon as possible. + */ +void +bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) +{ + ASSERT(conn->sk); + + DBG("BGP: Scheduling packet type %d\n", type); + + if (c) + { + if (! conn->channels_to_send) + { + conn->last_channel = c->index; + conn->last_channel_count = 0; + } + + c->packets_to_send |= 1 << type; + conn->channels_to_send |= 1 << c->index; + } + else + conn->packets_to_send |= 1 << type; + + if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev)) + ev_schedule(conn->tx_ev); +} + +void +bgp_kick_tx(void *vconn) +{ + struct bgp_conn *conn = vconn; + + DBG("BGP: kicking TX\n"); + while (bgp_fire_tx(conn) > 0) + ; +} -malformed: - bgp_error(conn, 3, 1, NULL, 0); +void +bgp_tx(sock *sk) +{ + struct bgp_conn *conn = sk->data; + + DBG("BGP: TX hook\n"); + while (bgp_fire_tx(conn) > 0) + ; } + static struct { byte major, minor; byte *msg; @@ -1480,17 +2668,16 @@ static struct { * which might be static string or given temporary buffer. */ const char * -bgp_error_dsc(unsigned code, unsigned subcode) +bgp_error_dsc(uint code, uint subcode) { static char buff[32]; - unsigned i; + uint i; + for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++) if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode) - { - return bgp_msg_table[i].msg; - } + return bgp_msg_table[i].msg; - bsprintf(buff, "Unknown error %d.%d", code, subcode); + bsprintf(buff, "Unknown error %u.%u", code, subcode); return buff; } @@ -1521,10 +2708,10 @@ bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp) } void -bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len) +bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len) { byte argbuf[256], *t = argbuf; - unsigned i; + uint i; /* Don't report Cease messages generated by myself */ if (code == 6 && class == BE_BGP_TX) @@ -1566,57 +2753,35 @@ static void bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len) { struct bgp_proto *p = conn->bgp; + if (len < 21) - { - bgp_error(conn, 1, 2, pkt+16, 2); - return; - } + { bgp_error(conn, 1, 2, pkt+16, 2); return; } - unsigned code = pkt[19]; - unsigned subcode = pkt[20]; + uint code = pkt[19]; + uint subcode = pkt[20]; int err = (code != 6); bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21); bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode); -#ifndef IPV6 - if ((code == 2) && ((subcode == 4) || (subcode == 7)) - /* Error related to capability: - * 4 - Peer does not support capabilities at all. - * 7 - Peer request some capability. Strange unless it is IPv6 only peer. - */ - && (p->cf->capabilities == 2) - /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */ - && (conn->start_state == BSS_CONNECT) - /* Failed connection attempt have used capabilities */ - && (p->cf->remote_as <= 0xFFFF)) - /* Not possible with disabled capabilities */ - { - /* We try connect without capabilities */ - log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name); - p->start_state = BSS_CONNECT_NOCAP; - err = 0; - } -#endif - bgp_conn_enter_close_state(conn); - bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE); + bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE); - if (err) - { - bgp_update_startup_delay(p); - bgp_stop(p, 0, NULL, 0); - } + if (err) + { + bgp_update_startup_delay(p); + bgp_stop(p, 0, NULL, 0); + } else + { + uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0); + if (p->cf->disable_after_cease & subcode_bit) { - uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0); - if (p->cf->disable_after_cease & subcode_bit) - { - log(L_INFO "%s: Disabled after Cease notification", p->p.name); - p->startup_delay = 0; - p->p.disabled = 1; - } + log(L_INFO "%s: Disabled after Cease notification", p->p.name); + p->startup_delay = 0; + p->p.disabled = 1; } + } } static void @@ -1626,64 +2791,12 @@ bgp_rx_keepalive(struct bgp_conn *conn) BGP_TRACE(D_PACKETS, "Got KEEPALIVE"); bgp_start_timer(conn->hold_timer, conn->hold_time); - switch (conn->state) - { - case BS_OPENCONFIRM: - bgp_conn_enter_established_state(conn); - break; - case BS_ESTABLISHED: - break; - default: - bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); - } -} -static void -bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) -{ - struct bgp_proto *p = conn->bgp; + if (conn->state == BS_OPENCONFIRM) + { bgp_conn_enter_established_state(conn); return; } if (conn->state != BS_ESTABLISHED) - { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } - - if (!p->cf->enable_refresh) - { bgp_error(conn, 1, 3, pkt+18, 1); return; } - - if (len < (BGP_HEADER_LENGTH + 4)) - { bgp_error(conn, 1, 2, pkt+16, 2); return; } - - if (len > (BGP_HEADER_LENGTH + 4)) - { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; } - - /* FIXME - we ignore AFI/SAFI values, as we support - just one value and even an error code for an invalid - request is not defined */ - - /* RFC 7313 redefined reserved field as RR message subtype */ - uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST; - - switch (subtype) - { - case BGP_RR_REQUEST: - BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); - proto_request_feeding(&p->p); - break; - - case BGP_RR_BEGIN: - BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR"); - bgp_refresh_begin(p); - break; - - case BGP_RR_END: - BGP_TRACE(D_PACKETS, "Got END-OF-RR"); - bgp_refresh_end(p); - break; - - default: - log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring", - p->p.name, subtype); - break; - } + bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); } @@ -1697,7 +2810,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) * packet handler according to the packet type. */ static void -bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len) +bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len) { byte type = pkt[18]; @@ -1707,14 +2820,14 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len) mrt_dump_bgp_packet(conn, pkt, len); switch (type) - { - case PKT_OPEN: return bgp_rx_open(conn, pkt, len); - case PKT_UPDATE: return bgp_rx_update(conn, pkt, len); - case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len); - case PKT_KEEPALIVE: return bgp_rx_keepalive(conn); - case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len); - default: bgp_error(conn, 1, 3, pkt+18, 1); - } + { + case PKT_OPEN: return bgp_rx_open(conn, pkt, len); + case PKT_UPDATE: return bgp_rx_update(conn, pkt, len); + case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len); + case PKT_KEEPALIVE: return bgp_rx_keepalive(conn); + case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len); + default: bgp_error(conn, 1, 3, pkt+18, 1); + } } /** @@ -1731,10 +2844,9 @@ int bgp_rx(sock *sk, uint size) { struct bgp_conn *conn = sk->data; - struct bgp_proto *p = conn->bgp; byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; - unsigned i, len; + uint i, len; DBG("BGP: RX hook: Got %d bytes\n", size); while (end >= pkt_start + BGP_HEADER_LENGTH) @@ -1748,7 +2860,7 @@ bgp_rx(sock *sk, uint size) break; } len = get_u16(pkt_start+16); - if (len < BGP_HEADER_LENGTH || len > bgp_max_packet_length(p)) + if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn))) { bgp_error(conn, 1, 2, pkt_start+16, 2); break; diff --git a/proto/ospf/Makefile b/proto/ospf/Makefile index f90222cf..39e74f71 100644 --- a/proto/ospf/Makefile +++ b/proto/ospf/Makefile @@ -1,5 +1,6 @@ -source=ospf.c topology.c packet.c hello.c neighbor.c iface.c dbdes.c lsreq.c lsupd.c lsack.c lsalib.c rt.c -root-rel=../../ -dir-name=proto/ospf +src := dbdes.c hello.c iface.c lsack.c lsalib.c lsreq.c lsupd.c neighbor.c ospf.c packet.c rt.c topology.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) -include ../../Rules +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 46b8b238..005f4381 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -78,14 +78,66 @@ static void ospf_proto_finish(void) { struct ospf_config *cf = OSPF_CFG; + struct ospf_area_config *ac; + struct ospf_iface_patt *ic; + + /* Define default channel */ + if (EMPTY_LIST(this_proto->channels)) + { + uint net_type = this_proto->net_type = ospf_cfg_is_v2() ? NET_IP4 : NET_IP6; + channel_config_new(NULL, net_label[net_type], net_type, this_proto); + } + + /* Propagate global instance ID to interfaces */ + if (cf->instance_id_set) + { + WALK_LIST(ac, cf->area_list) + WALK_LIST(ic, ac->patt_list) + if (!ic->instance_id_set) + { ic->instance_id = cf->instance_id; ic->instance_id_set = 1; } + + WALK_LIST(ic, cf->vlink_list) + if (!ic->instance_id_set) + { ic->instance_id = cf->instance_id; ic->instance_id_set = 1; } + } + + if (ospf_cfg_is_v3()) + { + uint ipv4 = (this_proto->net_type == NET_IP4); + uint base = (ipv4 ? 64 : 0) + (cf->af_mc ? 32 : 0); + + /* RFC 5838 - OSPFv3-AF */ + if (cf->af_ext) + { + /* RFC 5838 2.1 - instance IDs based on AFs */ + WALK_LIST(ac, cf->area_list) + WALK_LIST(ic, ac->patt_list) + { + if (!ic->instance_id_set) + ic->instance_id = base; + else if (ic->instance_id >= 128) + log(L_WARN "Instance ID %d from unassigned/private range", ic->instance_id); + else if ((ic->instance_id < base) || (ic->instance_id >= (base + 32))) + cf_error("Instance ID %d invalid for given channel type", ic->instance_id); + } + + /* RFC 5838 2.8 - vlinks limited to IPv6 unicast */ + if ((ipv4 || cf->af_mc) && !EMPTY_LIST(cf->vlink_list)) + cf_error("Vlinks not supported in AFs other than IPv6 unicast"); + } + else + { + if (ipv4 || cf->af_mc) + cf_error("Different channel type"); + } + } if (EMPTY_LIST(cf->area_list)) - cf_error( "No configured areas in OSPF"); + cf_error("No configured areas in OSPF"); int areano = 0; int backbone = 0; int nssa = 0; - struct ospf_area_config *ac; WALK_LIST(ac, cf->area_list) { areano++; @@ -98,7 +150,7 @@ ospf_proto_finish(void) cf->abr = areano > 1; /* Route export or NSSA translation (RFC 3101 3.1) */ - cf->asbr = (this_proto->out_filter != FILTER_REJECT) || (nssa && cf->abr); + cf->asbr = (proto_cf_main_channel(this_proto)->out_filter != FILTER_REJECT) || (nssa && cf->abr); if (cf->abr && !backbone) { @@ -122,7 +174,7 @@ static inline void ospf_check_defcost(int cost) { if ((cost <= 0) || (cost >= LSINFINITY)) - cf_error("Default cost must be in range 1-%d", LSINFINITY-1); + cf_error("Default cost must be in range 1-%u", LSINFINITY-1); } static inline void @@ -135,8 +187,8 @@ ospf_check_auth(void) CF_DECLS -CF_KEYWORDS(OSPF, AREA, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID) -CF_KEYWORDS(NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT) +CF_KEYWORDS(OSPF, V2, V3, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID) +CF_KEYWORDS(AREA, NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT) CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT) CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP) CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC, TTL, SECURITY) @@ -144,38 +196,69 @@ CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK, ONLY, BFD) CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL) CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY) CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY, LENGTH) -CF_KEYWORDS(SECONDARY, MERGE, LSA, SUPPRESSION) +CF_KEYWORDS(SECONDARY, MERGE, LSA, SUPPRESSION, MULTICAST, RFC5838) %type <ld> lsadb_args -%type <i> nbma_eligible +%type <i> ospf_variant ospf_af_mc nbma_eligible +%type <cc> ospf_channel_start ospf_channel CF_GRAMMAR CF_ADDTO(proto, ospf_proto '}' { ospf_proto_finish(); } ) -ospf_proto_start: proto_start OSPF { - this_proto = proto_config_new(&proto_ospf, $1); - init_list(&OSPF_CFG->area_list); - init_list(&OSPF_CFG->vlink_list); - OSPF_CFG->tick = OSPF_DEFAULT_TICK; - OSPF_CFG->ospf2 = OSPF_IS_V2; - } +ospf_variant: + OSPF { $$ = 1; } + | OSPF V2 { $$ = 1; } + | OSPF V3 { $$ = 0; } ; +ospf_proto_start: proto_start ospf_variant +{ + this_proto = proto_config_new(&proto_ospf, $1); + this_proto->net_type = $2 ? NET_IP4 : 0; + + init_list(&OSPF_CFG->area_list); + init_list(&OSPF_CFG->vlink_list); + OSPF_CFG->ecmp = rt_default_ecmp; + OSPF_CFG->tick = OSPF_DEFAULT_TICK; + OSPF_CFG->ospf2 = $2; + OSPF_CFG->af_ext = !$2; +}; + ospf_proto: ospf_proto_start proto_name '{' | ospf_proto ospf_proto_item ';' ; +ospf_af_mc: + { $$ = 0; } + | MULTICAST { $$ = 1; } + ; + +/* We redefine proto_channel to add multicast flag */ +ospf_channel_start: net_type ospf_af_mc +{ + /* TODO: change name for multicast channels */ + $$ = this_channel = channel_config_get(NULL, net_label[$1], $1, this_proto); + + /* Save the multicast flag */ + if (this_channel == proto_cf_main_channel(this_proto)) + OSPF_CFG->af_mc = $2; +}; + +ospf_channel: ospf_channel_start channel_opt_list channel_end; + ospf_proto_item: proto_item + | ospf_channel { this_proto->net_type = $1->net_type; } | RFC1583COMPAT bool { OSPF_CFG->rfc1583 = $2; } + | RFC5838 bool { OSPF_CFG->af_ext = $2; if (!ospf_cfg_is_v3()) cf_error("RFC5838 option requires OSPFv3"); } | STUB ROUTER bool { OSPF_CFG->stub_router = $3; } | ECMP bool { OSPF_CFG->ecmp = $2 ? OSPF_DEFAULT_ECMP_LIMIT : 0; } - | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); } + | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; } | MERGE EXTERNAL bool { OSPF_CFG->merge_external = $3; } - | TICK expr { OSPF_CFG->tick = $2; if($2<=0) cf_error("Tick must be greater than zero"); } - | INSTANCE ID expr { OSPF_CFG->instance_id = $3; if (($3<0) || ($3>255)) cf_error("Instance ID must be in range 0-255"); } + | TICK expr { OSPF_CFG->tick = $2; if($2 <= 0) cf_error("Tick must be greater than zero"); } + | INSTANCE ID expr { OSPF_CFG->instance_id = $3; OSPF_CFG->instance_id_set = 1; if ($3 > 255) cf_error("Instance ID must be in range 0-255"); } | ospf_area ; @@ -225,10 +308,10 @@ ospf_stubnet: ; ospf_stubnet_start: - prefix { + net_ip { this_stubnet = cfg_allocz(sizeof(struct ospf_stubnet_config)); add_tail(&this_area->stubnet_list, NODE this_stubnet); - this_stubnet->px = $1; + this_stubnet->prefix = $1; this_stubnet->cost = COST_D; } ; @@ -280,7 +363,6 @@ ospf_vlink_start: VIRTUAL LINK idval OSPF_PATT->inftransdelay = INFTRANSDELAY_D; OSPF_PATT->deadc = DEADC_D; OSPF_PATT->type = OSPF_IT_VLINK; - OSPF_PATT->instance_id = OSPF_CFG->instance_id; init_list(&OSPF_PATT->nbma_list); reset_passwords(); } @@ -305,7 +387,7 @@ ospf_iface_item: | REAL BROADCAST bool { OSPF_PATT->real_bcast = $3; if (!ospf_cfg_is_v2()) cf_error("Real broadcast option requires OSPFv2"); } | PTP NETMASK bool { OSPF_PATT->ptp_netmask = $3; if (!ospf_cfg_is_v2()) cf_error("PtP netmask option requires OSPFv2"); } | TRANSMIT DELAY expr { OSPF_PATT->inftransdelay = $3 ; if (($3<=0) || ($3>65535)) cf_error("Transmit delay must be in range 1-65535"); } - | PRIORITY expr { OSPF_PATT->priority = $2 ; if (($2<0) || ($2>255)) cf_error("Priority must be in range 0-255"); } + | PRIORITY expr { OSPF_PATT->priority = $2 ; if ($2>255) cf_error("Priority must be in range 0-255"); } | STRICT NONBROADCAST bool { OSPF_PATT->strictnbma = $3 ; } | STUB bool { OSPF_PATT->stub = $2 ; } | CHECK LINK bool { OSPF_PATT->check_link = $3; } @@ -324,7 +406,6 @@ ospf_iface_item: | TTL SECURITY bool { OSPF_PATT->ttl_security = $3; } | TTL SECURITY TX ONLY { OSPF_PATT->ttl_security = 2; } | BFD bool { OSPF_PATT->bfd = $2; cf_check_bfd($2); } - | SECONDARY bool { OSPF_PATT->bsd_secondary = $2; } | password_list { ospf_check_auth(); } ; @@ -335,12 +416,11 @@ pref_list: pref_item: pref_base pref_opt ';' ; -pref_base: prefix +pref_base: net_ip { this_pref = cfg_allocz(sizeof(struct area_net_config)); add_tail(this_nets, NODE this_pref); - this_pref->px.addr = $1.addr; - this_pref->px.len = $1.len; + this_pref->prefix = $1; } ; @@ -382,8 +462,8 @@ ospf_iface_start: OSPF_PATT->priority = PRIORITY_D; OSPF_PATT->deadc = DEADC_D; OSPF_PATT->type = OSPF_IT_UNDEF; - OSPF_PATT->instance_id = OSPF_CFG->instance_id; init_list(&OSPF_PATT->nbma_list); + OSPF_PATT->check_link = 1; OSPF_PATT->ptp_netmask = 2; /* not specified */ OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL; OSPF_PATT->tx_priority = sk_priority_control; @@ -393,7 +473,7 @@ ospf_iface_start: ospf_instance_id: /* empty */ - | INSTANCE expr { OSPF_PATT->instance_id = $2; if (($2<0) || ($2>255)) cf_error("Instance ID must be in range 0-255"); } + | INSTANCE expr { OSPF_PATT->instance_id = $2; OSPF_PATT->instance_id_set = 1; if ($2 > 255) cf_error("Instance ID must be in range 0-255"); } ; ospf_iface_patt_list: @@ -420,7 +500,7 @@ CF_ADDTO(dynamic_attr, OSPF_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEM CF_ADDTO(dynamic_attr, OSPF_ROUTER_ID { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID | EAF_TEMP, T_QUAD, EA_OSPF_ROUTER_ID); }) CF_CLI_HELP(SHOW OSPF, ..., [[Show information about OSPF protocol]]); -CF_CLI(SHOW OSPF, optsym, [<name>], [[Show information about OSPF protocol XXX]]) +CF_CLI(SHOW OSPF, optsym, [<name>], [[Show information about OSPF protocol]]) { ospf_sh(proto_get_named($3, &proto_ospf)); }; CF_CLI(SHOW OSPF NEIGHBORS, optsym opttext, [<name>] [\"<interface>\"], [[Show information about OSPF neighbors]]) diff --git a/proto/ospf/dbdes.c b/proto/ospf/dbdes.c index d6904343..f211935f 100644 --- a/proto/ospf/dbdes.c +++ b/proto/ospf/dbdes.c @@ -39,7 +39,7 @@ struct ospf_dbdes3_packet static inline uint -ospf_dbdes_hdrlen(struct ospf_proto *p UNUSED4 UNUSED6) +ospf_dbdes_hdrlen(struct ospf_proto *p) { return ospf_is_v2(p) ? sizeof(struct ospf_dbdes2_packet) : sizeof(struct ospf_dbdes3_packet); @@ -356,7 +356,7 @@ ospf_receive_dbdes(struct ospf_packet *pkt, struct ospf_iface *ifa, LOG_PKT_WARN("MTU mismatch with nbr %R on %s (remote %d, local %d)", n->rid, ifa->ifname, rcv_iface_mtu, ifa->iface->mtu); - if ((rcv_imms == DBDES_IMMS) && + if (((rcv_imms & DBDES_IMMS) == DBDES_IMMS) && (n->rid > p->router_id) && (plen == ospf_dbdes_hdrlen(p))) { @@ -428,7 +428,7 @@ ospf_receive_dbdes(struct ospf_packet *pkt, struct ospf_iface *ifa, } ospf_send_dbdes(p, n); - tm_start(n->dbdes_timer, n->ifa->rxmtint); + tm_start(n->dbdes_timer, n->ifa->rxmtint S); } else { diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c index e00487dc..e706ea0f 100644 --- a/proto/ospf/hello.c +++ b/proto/ospf/hello.c @@ -32,10 +32,7 @@ struct ospf_hello3_packet struct ospf_packet hdr; u32 iface_id; - u8 priority; - u8 options3; - u8 options2; - u8 options; + u32 options; u16 helloint; u16 deadint; u32 dr; @@ -74,7 +71,7 @@ ospf_send_hello(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn) ((ifa->type == OSPF_IT_PTP) && !ifa->ptp_netmask)) ps->netmask = 0; else - ps->netmask = htonl(u32_mkmask(ifa->addr->pxlen)); + ps->netmask = htonl(u32_mkmask(ifa->addr->prefix.pxlen)); ps->helloint = ntohs(ifa->helloint); ps->options = ifa->oa->options; @@ -91,10 +88,7 @@ ospf_send_hello(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn) struct ospf_hello3_packet *ps = (void *) pkt; ps->iface_id = htonl(ifa->iface_id); - ps->priority = ifa->priority; - ps->options3 = ifa->oa->options >> 16; - ps->options2 = ifa->oa->options >> 8; - ps->options = ifa->oa->options; + ps->options = ntohl(ifa->oa->options | (ifa->priority << 24)); ps->helloint = ntohs(ifa->helloint); ps->deadint = htons(ifa->deadint); ps->dr = htonl(ifa->drid); @@ -190,7 +184,8 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa, struct ospf_proto *p = ifa->oa->po; const char *err_dsc = NULL; u32 rcv_iface_id, rcv_helloint, rcv_deadint, rcv_dr, rcv_bdr; - u8 rcv_options, rcv_priority; + uint rcv_options, rcv_priority; + uint loc_options = ifa->oa->options; u32 *neighbors; u32 neigh_count; uint plen, i, err_val = 0; @@ -198,7 +193,7 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa, /* RFC 2328 10.5 */ /* - * We may not yet havethe associate neighbor, so we use Router ID from the + * We may not yet have the associate neighbor, so we use Router ID from the * packet instead of one from the neighbor structure for log messages. */ u32 rcv_rid = ntohl(pkt->routerid); @@ -227,7 +222,7 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa, if ((ifa->type != OSPF_IT_VLINK) && (ifa->type != OSPF_IT_PTP) && - ((uint) pxlen != ifa->addr->pxlen)) + ((uint) pxlen != ifa->addr->prefix.pxlen)) DROP("prefix length mismatch", pxlen); neighbors = ps->neighbors; @@ -245,8 +240,8 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa, rcv_deadint = ntohs(ps->deadint); rcv_dr = ntohl(ps->dr); rcv_bdr = ntohl(ps->bdr); - rcv_options = ps->options; - rcv_priority = ps->priority; + rcv_options = ntohl(ps->options) & 0x00FFFFFF; + rcv_priority = ntohl(ps->options) >> 24; neighbors = ps->neighbors; neigh_count = (plen - sizeof(struct ospf_hello3_packet)) / sizeof(u32); @@ -259,9 +254,13 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa, DROP("dead interval mismatch", rcv_deadint); /* Check whether bits E, N match */ - if ((rcv_options ^ ifa->oa->options) & (OPT_E | OPT_N)) + if ((rcv_options ^ loc_options) & (OPT_E | OPT_N)) DROP("area type mismatch", rcv_options); + /* RFC 5838 2.4 - AF-bit check unless on IPv6 unicast */ + if ((loc_options & OPT_AF) && !(loc_options & OPT_V6) && !(rcv_options & OPT_AF)) + DROP("AF-bit mismatch", rcv_options); + /* Check consistency of existing neighbor entry */ if (n) { diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 1795ec22..e3d8d61b 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -55,7 +55,9 @@ ifa_tx_length(struct ospf_iface *ifa) static inline uint ifa_tx_hdrlen(struct ospf_iface *ifa) { - uint hlen = SIZE_OF_IP_HEADER; + struct ospf_proto *p = ifa->oa->po; + + uint hlen = ospf_is_v2(p) ? IP4_HEADER_LENGTH : IP6_HEADER_LENGTH; /* Relevant just for OSPFv2 */ if (ifa->autype == OSPF_AUTH_CRYPT) @@ -115,6 +117,7 @@ ospf_sk_open(struct ospf_iface *ifa) sock *sk = sk_new(ifa->pool); sk->type = SK_IP; + sk->subtype = ospf_is_v2(p) ? SK_IPV4 : SK_IPV6; sk->dport = OSPF_PROTO; sk->saddr = ifa->addr->ip; sk->iface = ifa->iface; @@ -200,6 +203,7 @@ ospf_open_vlink_sk(struct ospf_proto *p) { sock *sk = sk_new(p->p.pool); sk->type = SK_IP; + sk->subtype = ospf_is_v2(p) ? SK_IPV4 : SK_IPV6; sk->dport = OSPF_PROTO; sk->vrf = p->p.vrf; @@ -246,8 +250,8 @@ ospf_iface_down(struct ospf_iface *ifa) OSPF_TRACE(D_EVENTS, "Removing interface %s (peer %I) from area %R", ifa->ifname, ifa->addr->opposite, ifa->oa->areaid); else - OSPF_TRACE(D_EVENTS, "Removing interface %s (%I/%d) from area %R", - ifa->ifname, ifa->addr->prefix, ifa->addr->pxlen, ifa->oa->areaid); + OSPF_TRACE(D_EVENTS, "Removing interface %s (%N) from area %R", + ifa->ifname, &ifa->addr->prefix, ifa->oa->areaid); /* First of all kill all the related vlinks */ WALK_LIST(iff, p->iface_list) @@ -394,15 +398,15 @@ ospf_iface_sm(struct ospf_iface *ifa, int event) { ospf_iface_chstate(ifa, OSPF_IS_WAITING); if (ifa->wait_timer) - tm_start(ifa->wait_timer, ifa->waitint); + tm_start(ifa->wait_timer, ifa->waitint S); } } if (ifa->hello_timer) - tm_start(ifa->hello_timer, ifa->helloint); + tm_start(ifa->hello_timer, ifa->helloint S); if (ifa->poll_timer) - tm_start(ifa->poll_timer, ifa->pollint); + tm_start(ifa->poll_timer, ifa->pollint S); ospf_send_hello(ifa, OHS_HELLO, NULL); } @@ -492,13 +496,13 @@ ospf_iface_add(struct object_lock *lock) if (! ifa->stub) { - ifa->hello_timer = tm_new_set(ifa->pool, hello_timer_hook, ifa, 0, ifa->helloint); + ifa->hello_timer = tm_new_init(ifa->pool, hello_timer_hook, ifa, ifa->helloint S, 0); if (ifa->type == OSPF_IT_NBMA) - ifa->poll_timer = tm_new_set(ifa->pool, poll_timer_hook, ifa, 0, ifa->pollint); + ifa->poll_timer = tm_new_init(ifa->pool, poll_timer_hook, ifa, ifa->pollint S, 0); if ((ifa->type == OSPF_IT_BCAST) || (ifa->type == OSPF_IT_NBMA)) - ifa->wait_timer = tm_new_set(ifa->pool, wait_timer_hook, ifa, 0, 0); + ifa->wait_timer = tm_new_init(ifa->pool, wait_timer_hook, ifa, 0, 0); ifa->flood_queue_size = ifa_flood_queue_size(ifa); ifa->flood_queue = mb_allocz(ifa->pool, ifa->flood_queue_size * sizeof(void *)); @@ -532,15 +536,6 @@ ospf_iface_stubby(struct ospf_iface_patt *ip, struct ifa *addr) if (addr->iface->flags & IF_LOOPBACK) return 1; - /* - * For compatibility reasons on BSD systems, we force OSPF - * interfaces with non-primary IP prefixes to be stub. - */ -#if defined(OSPFv2) && !defined(CONFIG_MC_PROPER_SRC) - if (!ip->bsd_secondary && !(addr->flags & IA_PRIMARY)) - return 1; -#endif - return ip->stub; } @@ -559,8 +554,8 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i OSPF_TRACE(D_EVENTS, "Adding interface %s (peer %I) to area %R", iface->name, addr->opposite, oa->areaid); else - OSPF_TRACE(D_EVENTS, "Adding interface %s (%I/%d) to area %R", - iface->name, addr->prefix, addr->pxlen, oa->areaid); + OSPF_TRACE(D_EVENTS, "Adding interface %s (%N) to area %R", + iface->name, &addr->prefix, oa->areaid); pool = rp_new(p->p.pool, "OSPF Interface"); ifa = mb_allocz(pool, sizeof(struct ospf_iface)); @@ -598,6 +593,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i if (ip->ptp_netmask < 2) ifa->ptp_netmask = ip->ptp_netmask; + ifa->drip = ifa->bdrip = ospf_is_v2(p) ? IPA_NONE4 : IPA_NONE6; ifa->type = ospf_iface_classify(ip->type, addr); @@ -637,7 +633,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i should be used). Because OSPFv3 iface is not subnet-specific, there is no need for ipa_in_net() check */ - if (ospf_is_v2(p) && !ipa_in_net(nb->ip, addr->prefix, addr->pxlen)) + if (ospf_is_v2(p) && !ipa_in_netX(nb->ip, &addr->prefix)) continue; if (ospf_is_v3(p) && !ipa_is_link_local(nb->ip)) @@ -650,7 +646,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i add_tail(&oa->po->iface_list, NODE ifa); struct object_lock *lock = olock_new(pool); - lock->addr = ospf_is_v2(p) ? ifa->addr->prefix : IPA_NONE; + lock->addr = ospf_is_v2(p) ? ipa_from_ip4(net4_prefix(&ifa->addr->prefix)) : IPA_NONE; lock->type = OBJLOCK_IP; lock->port = OSPF_PROTO; lock->inst = ifa->instance_id; @@ -709,7 +705,7 @@ ospf_iface_new_vlink(struct ospf_proto *p, struct ospf_iface_patt *ip) add_tail(&p->iface_list, NODE ifa); - ifa->hello_timer = tm_new_set(ifa->pool, hello_timer_hook, ifa, 0, ifa->helloint); + ifa->hello_timer = tm_new_init(ifa->pool, hello_timer_hook, ifa, ifa->helloint S, 0); ifa->flood_queue_size = ifa_flood_queue_size(ifa); ifa->flood_queue = mb_allocz(ifa->pool, ifa->flood_queue_size * sizeof(void *)); @@ -721,10 +717,10 @@ ospf_iface_change_timer(timer *tm, uint val) if (!tm) return; - tm->recurrent = val; + tm->recurrent = val S; - if (tm->expires) - tm_start(tm, val); + if (tm_active(tm)) + tm_start(tm, val S); } static inline void @@ -807,8 +803,8 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) ifname, ifa->waitint, new->waitint); ifa->waitint = new->waitint; - if (ifa->wait_timer && ifa->wait_timer->expires) - tm_start(ifa->wait_timer, ifa->waitint); + if (ifa->wait_timer && tm_active(ifa->wait_timer)) + tm_start(ifa->wait_timer, ifa->waitint S); } /* DEAD TIMER */ @@ -900,7 +896,7 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) WALK_LIST(nb, new->nbma_list) { /* See related note in ospf_iface_new() */ - if (ospf_is_v2(p) && !ipa_in_net(nb->ip, ifa->addr->prefix, ifa->addr->pxlen)) + if (ospf_is_v2(p) && !ipa_in_netX(nb->ip, &ifa->addr->prefix)) continue; if (ospf_is_v3(p) && !ipa_is_link_local(nb->ip)) @@ -1087,6 +1083,9 @@ ospf_ifa_notify2(struct proto *P, uint flags, struct ifa *a) { struct ospf_proto *p = (struct ospf_proto *) P; + if (a->prefix.type != NET_IP4) + return; + if (a->flags & IA_SECONDARY) return; @@ -1126,6 +1125,9 @@ ospf_ifa_notify3(struct proto *P, uint flags, struct ifa *a) other addresses are used for link-LSA. */ if (a->scope == SCOPE_LINK) { + if (a->prefix.type != NET_IP6) + return; + if (flags & IF_CHANGE_UP) { struct ospf_mip_walk s = { .iface = a->iface }; @@ -1143,6 +1145,9 @@ ospf_ifa_notify3(struct proto *P, uint flags, struct ifa *a) } else { + if (a->prefix.type != ospf_get_af(p)) + return; + struct ospf_iface *ifa; WALK_LIST(ifa, p->iface_list) if (ifa->iface == a->iface) @@ -1168,6 +1173,9 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p) WALK_LIST(a, iface->addrs) { + if (a->prefix.type != NET_IP4) + continue; + if (a->flags & IA_SECONDARY) continue; @@ -1186,8 +1194,8 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p) continue; /* Hard restart */ - log(L_INFO "%s: Restarting interface %s (%I/%d) in area %R", - p->p.name, ifa->ifname, a->prefix, a->pxlen, s.oa->areaid); + log(L_INFO "%s: Restarting interface %s (%N) in area %R", + p->p.name, ifa->ifname, &a->prefix, s.oa->areaid); ospf_iface_shutdown(ifa); ospf_iface_remove(ifa); } @@ -1211,6 +1219,9 @@ ospf_reconfigure_ifaces3(struct ospf_proto *p) WALK_LIST(a, iface->addrs) { + if (a->prefix.type != NET_IP6) + continue; + if (a->flags & IA_SECONDARY) continue; @@ -1342,7 +1353,7 @@ ospf_iface_info(struct ospf_iface *ifa) else if (ifa->addr->flags & IA_PEER) cli_msg(-1015, "Interface %s (peer %I)", ifa->ifname, ifa->addr->opposite); else - cli_msg(-1015, "Interface %s (%I/%d)", ifa->ifname, ifa->addr->prefix, ifa->addr->pxlen); + cli_msg(-1015, "Interface %s (%N)", ifa->ifname, &ifa->addr->prefix); cli_msg(-1015, "\tType: %s%s", ospf_it[ifa->type], more); cli_msg(-1015, "\tArea: %R (%u)", ifa->oa->areaid, ifa->oa->areaid); diff --git a/proto/ospf/lsalib.c b/proto/ospf/lsalib.c index cb7b186a..fbfd8d29 100644 --- a/proto/ospf/lsalib.c +++ b/proto/ospf/lsalib.c @@ -280,21 +280,19 @@ lsa_walk_rt(struct ospf_lsa_rt_walk *rt) void -lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, ip_addr *ip, int *pxlen, u8 *pxopts, u32 *metric) +lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, int af, net_addr *net, u8 *pxopts, u32 *metric) { if (ospf2) { struct ospf_lsa_sum2 *ls = en->lsa_body; - *ip = ipa_from_u32(en->lsa.id & ls->netmask); - *pxlen = u32_masklen(ls->netmask); + net_fill_ip4(net, ip4_from_u32(en->lsa.id & ls->netmask), u32_masklen(ls->netmask)); *pxopts = 0; *metric = ls->metric & LSA_METRIC_MASK; } else { struct ospf_lsa_sum3_net *ls = en->lsa_body; - u16 rest; - lsa_get_ipv6_prefix(ls->prefix, ip, pxlen, pxopts, &rest); + ospf3_get_prefix(ls->prefix, af, net, pxopts, NULL); *metric = ls->metric & LSA_METRIC_MASK; } } @@ -319,13 +317,14 @@ lsa_parse_sum_rt(struct top_hash_entry *en, int ospf2, u32 *drid, u32 *metric, u } void -lsa_parse_ext(struct top_hash_entry *en, int ospf2, struct ospf_lsa_ext_local *rt) +lsa_parse_ext(struct top_hash_entry *en, int ospf2, int af, struct ospf_lsa_ext_local *rt) { if (ospf2) { struct ospf_lsa_ext2 *ext = en->lsa_body; - rt->ip = ipa_from_u32(en->lsa.id & ext->netmask); - rt->pxlen = u32_masklen(ext->netmask); + net_fill_ip4(&rt->net, + ip4_from_u32(en->lsa.id & ext->netmask), + u32_masklen(ext->netmask)); rt->pxopts = 0; rt->metric = ext->metric & LSA_METRIC_MASK; rt->ebit = ext->metric & LSA_EXT2_EBIT; @@ -339,14 +338,13 @@ lsa_parse_ext(struct top_hash_entry *en, int ospf2, struct ospf_lsa_ext_local *r else { struct ospf_lsa_ext3 *ext = en->lsa_body; - u16 rest; - u32 *buf = lsa_get_ipv6_prefix(ext->rest, &rt->ip, &rt->pxlen, &rt->pxopts, &rest); + u32 *buf = ospf3_get_prefix(ext->rest, af, &rt->net, &rt->pxopts, NULL); rt->metric = ext->metric & LSA_METRIC_MASK; rt->ebit = ext->metric & LSA_EXT3_EBIT; rt->fbit = ext->metric & LSA_EXT3_FBIT; if (rt->fbit) - buf = lsa_get_ipv6_addr(buf, &rt->fwaddr); + buf = ospf3_get_addr(buf, af, &rt->fwaddr); else rt->fwaddr = IPA_NONE; @@ -452,7 +450,7 @@ lsa_validate_sum3_net(struct ospf_lsa_header *lsa, struct ospf_lsa_sum3_net *bod return 0; u8 pxl = pxlen(body->prefix); - if (pxl > MAX_PREFIX_LENGTH) + if (pxl > IP6_MAX_PREFIX_LENGTH) return 0; if (lsa->length != (HDRLEN + sizeof(struct ospf_lsa_sum3_net) + @@ -491,11 +489,11 @@ lsa_validate_ext3(struct ospf_lsa_header *lsa, struct ospf_lsa_ext3 *body) return 0; u8 pxl = pxlen(body->rest); - if (pxl > MAX_PREFIX_LENGTH) + if (pxl > IP6_MAX_PREFIX_LENGTH) return 0; int len = IPV6_PREFIX_SPACE(pxl); - if (body->metric & LSA_EXT3_FBIT) // forwardinf address + if (body->metric & LSA_EXT3_FBIT) // forwarding address len += 16; if (body->metric & LSA_EXT3_TBIT) // route tag len += 4; @@ -520,7 +518,7 @@ lsa_validate_pxlist(struct ospf_lsa_header *lsa, u32 pxcount, uint offset, u8 *p return 0; u8 pxl = pxlen((u32 *) (pbuf + offset)); - if (pxl > MAX_PREFIX_LENGTH) + if (pxl > IP6_MAX_PREFIX_LENGTH) return 0; offset += IPV6_PREFIX_SPACE(pxl); @@ -554,8 +552,8 @@ lsa_validate_prefix(struct ospf_lsa_header *lsa, struct ospf_lsa_prefix *body) /** * lsa_validate - check whether given LSA is valid * @lsa: LSA header - * @lsa_type: one of %LSA_T_xxx - * @ospf2: %true means OSPF version 2, %false means OSPF version 3 + * @lsa_type: internal LSA type (%LSA_T_xxx) + * @ospf2: %true for OSPFv2, %false for OSPFv3 * @body: pointer to LSA body * * Checks internal structure of given LSA body (minimal length, diff --git a/proto/ospf/lsalib.h b/proto/ospf/lsalib.h index 638b3525..fca7faec 100644 --- a/proto/ospf/lsalib.h +++ b/proto/ospf/lsalib.h @@ -41,7 +41,7 @@ void lsa_get_type_domain_(u32 itype, struct ospf_iface *ifa, u32 *otype, u32 *do static inline void lsa_get_type_domain(struct ospf_lsa_header *lsa, struct ospf_iface *ifa, u32 *otype, u32 *domain) { lsa_get_type_domain_(lsa->type_raw, ifa, otype, domain); } -static inline u32 lsa_get_etype(struct ospf_lsa_header *h, struct ospf_proto *p UNUSED4 UNUSED6) +static inline u32 lsa_get_etype(struct ospf_lsa_header *h, struct ospf_proto *p) { return ospf_is_v2(p) ? (h->type_raw & LSA_T_V2_MASK) : h->type_raw; } @@ -55,9 +55,12 @@ u16 lsa_verify_checksum(const void *lsa_n, int lsa_len); int lsa_comp(struct ospf_lsa_header *l1, struct ospf_lsa_header *l2); void lsa_walk_rt_init(struct ospf_proto *po, struct top_hash_entry *act, struct ospf_lsa_rt_walk *rt); int lsa_walk_rt(struct ospf_lsa_rt_walk *rt); -void lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, ip_addr *ip, int *pxlen, u8 *pxopts, u32 *metric); +void lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, int af, net_addr *net, u8 *pxopts, u32 *metric); void lsa_parse_sum_rt(struct top_hash_entry *en, int ospf2, u32 *drid, u32 *metric, u32 *options); -void lsa_parse_ext(struct top_hash_entry *en, int ospf2, struct ospf_lsa_ext_local *rt); +void lsa_parse_ext(struct top_hash_entry *en, int ospf2, int af, struct ospf_lsa_ext_local *rt); int lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body); +static inline btime lsa_inst_age(struct top_hash_entry *en) +{ return current_time() - en->inst_time; } + #endif /* _BIRD_OSPF_LSALIB_H_ */ diff --git a/proto/ospf/lsupd.c b/proto/ospf/lsupd.c index 157d9628..a98c9098 100644 --- a/proto/ospf/lsupd.c +++ b/proto/ospf/lsupd.c @@ -137,7 +137,7 @@ ospf_lsa_lsrt_up(struct top_hash_entry *en, struct ospf_neighbor *n) ret->lsa_body = LSA_BODY_DUMMY; if (!tm_active(n->lsrt_timer)) - tm_start(n->lsrt_timer, n->ifa->rxmtint); + tm_start(n->lsrt_timer, n->ifa->rxmtint S); } void @@ -572,7 +572,7 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa, { /* 13. (5a) - enforce minimum time between updates for received LSAs */ /* We also use this to ratelimit reactions to received self-originated LSAs */ - if (en && ((now - en->inst_time) < MINLSARRIVAL)) + if (en && (lsa_inst_age(en) < MINLSARRIVAL)) { OSPF_TRACE(D_EVENTS, "Skipping LSA received in less that MinLSArrival"); continue; @@ -700,7 +700,7 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa, if (!EMPTY_SLIST(n->lsrql) && (n->lsrqi == SHEAD(n->lsrql))) { ospf_send_lsreq(p, n); - tm_start(n->lsrq_timer, n->ifa->rxmtint); + tm_start(n->lsrq_timer, n->ifa->rxmtint S); } return; diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index 9fe3c028..f2d3505e 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -94,11 +94,11 @@ ospf_neighbor_new(struct ospf_iface *ifa) init_list(&n->ackl[ACKL_DIRECT]); init_list(&n->ackl[ACKL_DELAY]); - n->inactim = tm_new_set(pool, inactivity_timer_hook, n, 0, 0); - n->dbdes_timer = tm_new_set(pool, dbdes_timer_hook, n, 0, ifa->rxmtint); - n->lsrq_timer = tm_new_set(pool, lsrq_timer_hook, n, 0, ifa->rxmtint); - n->lsrt_timer = tm_new_set(pool, lsrt_timer_hook, n, 0, ifa->rxmtint); - n->ackd_timer = tm_new_set(pool, ackd_timer_hook, n, 0, ifa->rxmtint / 2); + n->inactim = tm_new_init(pool, inactivity_timer_hook, n, 0, 0); + n->dbdes_timer = tm_new_init(pool, dbdes_timer_hook, n, ifa->rxmtint S, 0); + n->lsrq_timer = tm_new_init(pool, lsrq_timer_hook, n, ifa->rxmtint S, 0); + n->lsrt_timer = tm_new_init(pool, lsrt_timer_hook, n, ifa->rxmtint S, 0); + n->ackd_timer = tm_new_init(pool, ackd_timer_hook, n, ifa->rxmtint S / 2, 0); return (n); } @@ -186,7 +186,7 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state) n->myimms = DBDES_IMMS; tm_start(n->dbdes_timer, 0); - tm_start(n->ackd_timer, ifa->rxmtint / 2); + tm_start(n->ackd_timer, ifa->rxmtint S / 2); } if (state > NEIGHBOR_EXSTART) @@ -231,7 +231,7 @@ ospf_neigh_sm(struct ospf_neighbor *n, int event) ospf_neigh_chstate(n, NEIGHBOR_INIT); /* Restart inactivity timer */ - tm_start(n->inactim, n->ifa->deadint); + tm_start(n->inactim, n->ifa->deadint S); break; case INM_2WAYREC: @@ -359,7 +359,7 @@ can_do_adj(struct ospf_neighbor *n) } -static inline u32 neigh_get_id(struct ospf_proto *p UNUSED4 UNUSED6, struct ospf_neighbor *n) +static inline u32 neigh_get_id(struct ospf_proto *p, struct ospf_neighbor *n) { return ospf_is_v2(p) ? ipa_to_u32(n->ip) : n->rid; } static struct ospf_neighbor * @@ -507,13 +507,14 @@ ospf_dr_election(struct ospf_iface *ifa) u32 old_drid = ifa->drid; u32 old_bdrid = ifa->bdrid; + ip_addr none = ospf_is_v2(p) ? IPA_NONE4 : IPA_NONE6; ifa->drid = ndr ? ndr->rid : 0; - ifa->drip = ndr ? ndr->ip : IPA_NONE; + ifa->drip = ndr ? ndr->ip : none; ifa->dr_iface_id = ndr ? ndr->iface_id : 0; ifa->bdrid = nbdr ? nbdr->rid : 0; - ifa->bdrip = nbdr ? nbdr->ip : IPA_NONE; + ifa->bdrip = nbdr ? nbdr->ip : none; DBG("DR=%R, BDR=%R\n", ifa->drid, ifa->bdrid); @@ -650,20 +651,6 @@ ospf_sh_neigh_info(struct ospf_neighbor *n) { struct ospf_iface *ifa = n->ifa; char *pos = "PtP "; - char etime[6]; - int exp, sec, min; - - exp = n->inactim->expires - now; - sec = exp % 60; - min = exp / 60; - if (min > 59) - { - bsprintf(etime, "-Inf-"); - } - else - { - bsprintf(etime, "%02u:%02u", min, sec); - } if ((ifa->type == OSPF_IT_BCAST) || (ifa->type == OSPF_IT_NBMA)) { @@ -675,6 +662,7 @@ ospf_sh_neigh_info(struct ospf_neighbor *n) pos = "Other"; } - cli_msg(-1013, "%-1R\t%3u\t%s/%s\t%-5s\t%-10s %-1I", n->rid, n->priority, - ospf_ns_names[n->state], pos, etime, ifa->ifname, n->ip); + cli_msg(-1013, "%-1R\t%3u\t%s/%s\t%7t\t%-10s %-1I", + n->rid, n->priority, ospf_ns_names[n->state], pos, + tm_remains(n->inactim), ifa->ifname, n->ip); } diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index d5d5d354..df6c452e 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -92,8 +92,10 @@ * - RFC 2328 - main OSPFv2 standard * - RFC 5340 - main OSPFv3 standard * - RFC 3101 - OSPFv2 NSSA areas - * - RFC 6549 - OSPFv2 multi-instance extensions - * - RFC 6987 - OSPF stub router advertisement + * - RFC 5709 - OSPFv2 HMAC-SHA Cryptographic Authentication + * - RFC 5838 - OSPFv3 Support of Address Families + * - RFC 6549 - OSPFv2 Multi-Instance Extensions + * - RFC 6987 - OSPF Stub Router Advertisement */ #include <stdlib.h> @@ -102,18 +104,11 @@ static int ospf_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool); static struct ea_list *ospf_make_tmp_attrs(struct rte *rt, struct linpool *pool); static void ospf_store_tmp_attrs(struct rte *rt, struct ea_list *attrs); -static int ospf_reload_routes(struct proto *P); +static void ospf_reload_routes(struct channel *C); static int ospf_rte_better(struct rte *new, struct rte *old); static int ospf_rte_same(struct rte *new, struct rte *old); static void ospf_disp(timer *timer); -static void -ospf_area_initfib(struct fib_node *fn) -{ - struct area_net *an = (struct area_net *) fn; - an->hidden = 0; - an->active = 0; -} static void add_area_nets(struct ospf_area *oa, struct ospf_area_config *ac) @@ -122,23 +117,35 @@ add_area_nets(struct ospf_area *oa, struct ospf_area_config *ac) struct area_net_config *anc; struct area_net *an; - fib_init(&oa->net_fib, p->p.pool, sizeof(struct area_net), 0, ospf_area_initfib); - fib_init(&oa->enet_fib, p->p.pool, sizeof(struct area_net), 0, ospf_area_initfib); + fib_init(&oa->net_fib, p->p.pool, ospf_get_af(p), + sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL); + fib_init(&oa->enet_fib, p->p.pool, ospf_get_af(p), + sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL); WALK_LIST(anc, ac->net_list) { - an = (struct area_net *) fib_get(&oa->net_fib, &anc->px.addr, anc->px.len); + an = fib_get(&oa->net_fib, &anc->prefix); an->hidden = anc->hidden; } WALK_LIST(anc, ac->enet_list) { - an = (struct area_net *) fib_get(&oa->enet_fib, &anc->px.addr, anc->px.len); + an = fib_get(&oa->enet_fib, &anc->prefix); an->hidden = anc->hidden; an->tag = anc->tag; } } +static inline uint +ospf_opts(struct ospf_proto *p) +{ + if (ospf_is_v2(p)) + return 0; + + return ((ospf_is_ip6(p) && !p->af_mc) ? OPT_V6 : 0) | + (!p->stub_router ? OPT_R : 0) | (p->af_ext ? OPT_AF : 0); +} + static void ospf_area_add(struct ospf_proto *p, struct ospf_area_config *ac) { @@ -154,16 +161,13 @@ ospf_area_add(struct ospf_proto *p, struct ospf_area_config *ac) oa->areaid = ac->areaid; oa->rt = NULL; oa->po = p; - fib_init(&oa->rtr, p->p.pool, sizeof(ort), 0, ospf_rt_initort); + fib_init(&oa->rtr, p->p.pool, NET_IP4, sizeof(ort), OFFSETOF(ort, fn), 0, NULL); add_area_nets(oa, ac); if (oa->areaid == 0) p->backbone = oa; - if (ospf_is_v2(p)) - oa->options = ac->type; - else - oa->options = ac->type | OPT_V6 | (p->stub_router ? 0 : OPT_R); + oa->options = ac->type | ospf_opts(p); ospf_notify_rt_lsa(oa); } @@ -229,21 +233,25 @@ ospf_start(struct proto *P) p->router_id = proto_get_router_id(P->cf); p->ospf2 = c->ospf2; + p->af_ext = c->af_ext; + p->af_mc = c->af_mc; p->rfc1583 = c->rfc1583; p->stub_router = c->stub_router; p->merge_external = c->merge_external; p->asbr = c->asbr; p->ecmp = c->ecmp; p->tick = c->tick; - p->disp_timer = tm_new_set(P->pool, ospf_disp, p, 0, p->tick); - tm_start(p->disp_timer, 1); + p->disp_timer = tm_new_init(P->pool, ospf_disp, p, p->tick S, 0); + tm_start(p->disp_timer, 100 MS); p->lsab_size = 256; p->lsab_used = 0; p->lsab = mb_alloc(P->pool, p->lsab_size); - p->nhpool = lp_new(P->pool, 12*sizeof(struct mpnh)); + p->nhpool = lp_new(P->pool, 12*sizeof(struct nexthop)); init_list(&(p->iface_list)); init_list(&(p->area_list)); - fib_init(&p->rtf, P->pool, sizeof(ort), 0, ospf_rt_initort); + fib_init(&p->rtf, P->pool, ospf_get_af(p), sizeof(ort), OFFSETOF(ort, fn), 0, NULL); + if (ospf_is_v3(p)) + idm_init(&p->idm, P->pool, 16); p->areano = 0; p->gr = ospf_top_new(p, P->pool); s_init_list(&(p->lsal)); @@ -299,15 +307,16 @@ ospf_dump(struct proto *P) } static struct proto * -ospf_init(struct proto_config *c) +ospf_init(struct proto_config *CF) { - struct ospf_config *oc = (struct ospf_config *) c; - struct proto *P = proto_new(c, sizeof(struct ospf_proto)); + struct ospf_config *cf = (struct ospf_config *) CF; + struct proto *P = proto_new(CF); + + P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - P->accept_ra_types = RA_OPTIMAL; P->rt_notify = ospf_rt_notify; P->if_notify = ospf_if_notify; - P->ifa_notify = oc->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3; + P->ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3; P->import_control = ospf_import_control; P->reload_routes = ospf_reload_routes; P->make_tmp_attrs = ospf_make_tmp_attrs; @@ -391,17 +400,16 @@ ospf_schedule_rtcalc(struct ospf_proto *p) p->calcrt = 1; } -static int -ospf_reload_routes(struct proto *P) +static void +ospf_reload_routes(struct channel *C) { - struct ospf_proto *p = (struct ospf_proto *) P; + struct ospf_proto *p = (struct ospf_proto *) C->proto; - if (p->calcrt != 2) - OSPF_TRACE(D_EVENTS, "Scheduling routing table calculation with route reload"); + if (p->calcrt == 2) + return; + OSPF_TRACE(D_EVENTS, "Scheduling routing table calculation with route reload"); p->calcrt = 2; - - return 1; } @@ -506,9 +514,9 @@ ospf_shutdown(struct proto *P) ospf_iface_shutdown(ifa); /* Cleanup locked rta entries */ - FIB_WALK(&p->rtf, nftmp) + FIB_WALK(&p->rtf, ort, nf) { - rta_free(((ort *) nftmp)->old_rta); + rta_free(nf->old_rta); } FIB_WALK_END; @@ -603,11 +611,7 @@ ospf_area_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) struct ospf_iface *ifa; oa->ac = nac; - - if (ospf_is_v2(p)) - oa->options = nac->type; - else - oa->options = nac->type | OPT_V6 | (p->stub_router ? 0 : OPT_R); + oa->options = nac->type | ospf_opts(p); if (nac->type != oac->type) { @@ -639,17 +643,20 @@ ospf_area_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) * nonbroadcast network, cost of interface, etc. */ static int -ospf_reconfigure(struct proto *P, struct proto_config *c) +ospf_reconfigure(struct proto *P, struct proto_config *CF) { struct ospf_proto *p = (struct ospf_proto *) P; struct ospf_config *old = (struct ospf_config *) (P->cf); - struct ospf_config *new = (struct ospf_config *) c; + struct ospf_config *new = (struct ospf_config *) CF; struct ospf_area_config *nac; struct ospf_area *oa, *oax; struct ospf_iface *ifa, *ifx; struct ospf_iface_patt *ip; - if (proto_get_router_id(c) != p->router_id) + if (proto_get_router_id(CF) != p->router_id) + return 0; + + if (p->ospf2 != new->ospf2) return 0; if (p->rfc1583 != new->rfc1583) @@ -658,13 +665,19 @@ ospf_reconfigure(struct proto *P, struct proto_config *c) if (old->abr != new->abr) return 0; + if ((p->af_ext != new->af_ext) || (p->af_mc != new->af_mc)) + return 0; + + if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF))) + return 0; + p->stub_router = new->stub_router; p->merge_external = new->merge_external; p->asbr = new->asbr; p->ecmp = new->ecmp; p->tick = new->tick; - p->disp_timer->recurrent = p->tick; - tm_start(p->disp_timer, 1); + p->disp_timer->recurrent = p->tick S; + tm_start(p->disp_timer, 100 MS); /* Mark all areas and ifaces */ WALK_LIST(oa, p->area_list) @@ -746,7 +759,6 @@ ospf_sh(struct proto *P) struct ospf_iface *ifa; struct ospf_neighbor *n; int ifano, nno, adjno, firstfib; - struct area_net *anet; if (p->p.proto_state != PS_UP) { @@ -795,29 +807,27 @@ ospf_sh(struct proto *P) cli_msg(-1014, "\t\tNumber of adjacent neighbors:\t%u", adjno); firstfib = 1; - FIB_WALK(&oa->net_fib, nftmp) + FIB_WALK(&oa->net_fib, struct area_net, anet) { - anet = (struct area_net *) nftmp; if(firstfib) { cli_msg(-1014, "\t\tArea networks:"); firstfib = 0; } - cli_msg(-1014, "\t\t\t%1I/%u\t%s\t%s", anet->fn.prefix, anet->fn.pxlen, + cli_msg(-1014, "\t\t\t%1N\t%s\t%s", anet->fn.addr, anet->hidden ? "Hidden" : "Advertise", anet->active ? "Active" : ""); } FIB_WALK_END; firstfib = 1; - FIB_WALK(&oa->enet_fib, nftmp) + FIB_WALK(&oa->enet_fib, struct area_net, anet) { - anet = (struct area_net *) nftmp; if(firstfib) { cli_msg(-1014, "\t\tArea external networks:"); firstfib = 0; } - cli_msg(-1014, "\t\t\t%1I/%u\t%s\t%s", anet->fn.prefix, anet->fn.pxlen, + cli_msg(-1014, "\t\t\t%1N\t%s\t%s", anet->fn.addr, anet->hidden ? "Hidden" : "Advertise", anet->active ? "Active" : ""); } FIB_WALK_END; @@ -1072,15 +1082,14 @@ show_lsa_network(struct top_hash_entry *he, int ospf2) } static inline void -show_lsa_sum_net(struct top_hash_entry *he, int ospf2) +show_lsa_sum_net(struct top_hash_entry *he, int ospf2, int af) { - ip_addr ip; - int pxlen; + net_addr net; u8 pxopts; u32 metric; - lsa_parse_sum_net(he, ospf2, &ip, &pxlen, &pxopts, &metric); - cli_msg(-1016, "\t\txnetwork %I/%d metric %u", ip, pxlen, metric); + lsa_parse_sum_net(he, ospf2, af, &net, &pxopts, &metric); + cli_msg(-1016, "\t\txnetwork %N metric %u", &net, metric); } static inline void @@ -1096,16 +1105,16 @@ show_lsa_sum_rt(struct top_hash_entry *he, int ospf2) static inline void -show_lsa_external(struct top_hash_entry *he, int ospf2) +show_lsa_external(struct top_hash_entry *he, int ospf2, int af) { struct ospf_lsa_ext_local rt; - char str_via[STD_ADDRESS_P_LENGTH + 8] = ""; + char str_via[IPA_MAX_TEXT_LENGTH + 8] = ""; char str_tag[16] = ""; if (he->lsa_type == LSA_T_EXT) he->domain = 0; /* Unmark the LSA */ - lsa_parse_ext(he, ospf2, &rt); + lsa_parse_ext(he, ospf2, af, &rt); if (rt.fbit) bsprintf(str_via, " via %I", rt.fwaddr); @@ -1113,19 +1122,15 @@ show_lsa_external(struct top_hash_entry *he, int ospf2) if (rt.tag) bsprintf(str_tag, " tag %08x", rt.tag); - cli_msg(-1016, "\t\t%s %I/%d metric%s %u%s%s", + cli_msg(-1016, "\t\t%s %N metric%s %u%s%s", (he->lsa_type == LSA_T_NSSA) ? "nssa-ext" : "external", - rt.ip, rt.pxlen, rt.ebit ? "2" : "", rt.metric, str_via, str_tag); + &rt.net, rt.ebit ? "2" : "", rt.metric, str_via, str_tag); } static inline void -show_lsa_prefix(struct top_hash_entry *he, struct top_hash_entry *cnode) +show_lsa_prefix(struct top_hash_entry *he, struct top_hash_entry *cnode, int af) { struct ospf_lsa_prefix *px = he->lsa_body; - ip_addr pxa; - int pxlen; - u8 pxopts; - u16 metric; u32 *buf; int i; @@ -1141,14 +1146,18 @@ show_lsa_prefix(struct top_hash_entry *he, struct top_hash_entry *cnode) buf = px->rest; for (i = 0; i < px->pxcount; i++) - { - buf = lsa_get_ipv6_prefix(buf, &pxa, &pxlen, &pxopts, &metric); + { + net_addr net; + u8 pxopts; + u16 metric; - if (px->ref_type == LSA_T_RT) - cli_msg(-1016, "\t\tstubnet %I/%d metric %u", pxa, pxlen, metric); - else - cli_msg(-1016, "\t\taddress %I/%d", pxa, pxlen); - } + buf = ospf3_get_prefix(buf, af, &net, &pxopts, &metric); + + if (px->ref_type == LSA_T_RT) + cli_msg(-1016, "\t\tstubnet %N metric %u", &net, metric); + else + cli_msg(-1016, "\t\taddress %N", &net); + } } void @@ -1156,6 +1165,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable) { struct ospf_proto *p = (struct ospf_proto *) P; int ospf2 = ospf_is_v2(p); + int af = ospf_get_af(p); uint i, ix, j1, jx; u32 last_area = 0xFFFFFFFF; @@ -1169,7 +1179,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable) /* We store interesting area-scoped LSAs in array hea and global-scoped (LSA_T_EXT) LSAs in array hex */ - int num = p->gr->hash_entries; + uint num = p->gr->hash_entries; struct top_hash_entry *hea[num]; struct top_hash_entry *hex[verbose ? num : 0]; struct top_hash_entry *he; @@ -1276,7 +1286,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable) case LSA_T_SUM_NET: if (cnode->lsa_type == LSA_T_RT) - show_lsa_sum_net(he, ospf2); + show_lsa_sum_net(he, ospf2, af); break; case LSA_T_SUM_RT: @@ -1286,11 +1296,11 @@ ospf_sh_state(struct proto *P, int verbose, int reachable) case LSA_T_EXT: case LSA_T_NSSA: - show_lsa_external(he, ospf2); + show_lsa_external(he, ospf2, af); break; case LSA_T_PREFIX: - show_lsa_prefix(he, cnode); + show_lsa_prefix(he, cnode, af); break; } @@ -1304,7 +1314,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable) ix++; while ((ix < jx) && (hex[ix]->lsa.rt == cnode->lsa.rt)) - show_lsa_external(hex[ix++], ospf2); + show_lsa_external(hex[ix++], ospf2, af); cnode = NULL; } @@ -1338,7 +1348,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable) last_rt = he->lsa.rt; } - show_lsa_external(he, ospf2); + show_lsa_external(he, ospf2, af); } } @@ -1468,6 +1478,8 @@ struct protocol proto_ospf = { .template = "ospf%d", .attr_class = EAP_OSPF, .preference = DEF_PREF_OSPF, + .channel_mask = NB_IP, + .proto_size = sizeof(struct ospf_proto), .config_size = sizeof(struct ospf_config), .init = ospf_init, .dump = ospf_dump, diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 81c610d5..54eeb74c 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -14,7 +14,7 @@ #include "nest/bird.h" #include "lib/checksum.h" -#include "lib/ip.h" +#include "lib/idm.h" #include "lib/lists.h" #include "lib/slists.h" #include "lib/socket.h" @@ -37,14 +37,6 @@ #endif -#ifdef IPV6 -#define OSPF_IS_V2 0 -#else -#define OSPF_IS_V2 1 -#endif - -// FIXME: MAX_PREFIX_LENGTH - #define OSPF_TRACE(flags, msg, args...) \ do { if ((p->p.debug & flags) || OSPF_FORCE_DEBUG) \ log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0) @@ -66,16 +58,16 @@ log_rl(&p->log_lsa_tbf, L_REMOTE "%s: " msg, p->p.name, args) #define LOG_LSA2(msg, args...) \ - do { if (! p->log_lsa_tbf.mark) \ + do { if (! p->log_lsa_tbf.drop) \ log(L_REMOTE "%s: " msg, p->p.name, args); } while(0) #define OSPF_PROTO 89 -#define LSREFRESHTIME 1800 /* 30 minutes */ -#define MINLSINTERVAL 5 -#define MINLSARRIVAL 1 -#define LSINFINITY 0xffffff +#define LSREFRESHTIME 1800 /* 30 minutes */ +#define MINLSINTERVAL (5 S_) +#define MINLSARRIVAL (1 S_) +#define LSINFINITY 0xffffff #define OSPF_DEFAULT_TICK 1 #define OSPF_DEFAULT_STUB_COST 1000 @@ -87,16 +79,18 @@ #define OSPF_VLINK_ID_OFFSET 0x80000000 - struct ospf_config { struct proto_config c; uint tick; u8 ospf2; + u8 af_ext; + u8 af_mc; u8 rfc1583; u8 stub_router; u8 merge_external; u8 instance_id; + u8 instance_id_set; u8 abr; u8 asbr; int ecmp; @@ -125,24 +119,24 @@ struct ospf_area_config struct area_net_config { node n; - struct prefix px; + net_addr prefix; u32 tag; u8 hidden; }; struct area_net { - struct fib_node fn; u32 metric; /* With possible LSA_EXT3_EBIT for NSSA area nets */ u32 tag; u8 hidden; u8 active; + struct fib_node fn; }; struct ospf_stubnet_config { node n; - struct prefix px; + net_addr prefix; u32 cost; u8 hidden; u8 summary; @@ -177,9 +171,9 @@ struct ospf_iface_patt int tx_priority; u16 tx_length; u16 rx_buffer; - #define OSPF_RXBUF_MINSIZE 256 /* Minimal allowed size */ u8 instance_id; + u8 instance_id_set; u8 autype; /* OSPF_AUTH_*, not really used in OSPFv3 */ u8 strictnbma; u8 check_link; @@ -189,7 +183,6 @@ struct ospf_iface_patt u8 ptp_netmask; /* bool + 2 for unspecified */ u8 ttl_security; /* bool + 2 for TX only */ u8 bfd; - u8 bsd_secondary; list *passwords; }; @@ -220,12 +213,15 @@ struct ospf_proto int areano; /* Number of area I belong to */ int padj; /* Number of neighbors in Exchange or Loading state */ struct fib rtf; /* Routing table */ - byte ospf2; /* OSPF v2 or v3 */ - byte rfc1583; /* RFC1583 compatibility */ - byte stub_router; /* Do not forward transit traffic */ - byte merge_external; /* Should i merge external routes? */ - byte asbr; /* May i originate any ext/NSSA lsa? */ - byte ecmp; /* Maximal number of nexthops in ECMP route, or 0 */ + struct idm idm; /* OSPFv3 LSA ID map */ + u8 ospf2; /* OSPF v2 or v3 */ + u8 af_ext; /* OSPFv3-AF extension */ + u8 af_mc; /* OSPFv3-AF multicast */ + u8 rfc1583; /* RFC1583 compatibility */ + u8 stub_router; /* Do not forward transit traffic */ + u8 merge_external; /* Should i merge external routes? */ + u8 asbr; /* May i originate any ext/NSSA lsa? */ + u8 ecmp; /* Maximal number of nexthops in ECMP route, or 0 */ struct ospf_area *backbone; /* If exists */ event *flood_event; /* Event for flooding LS updates */ void *lsab; /* LSA buffer used when originating router LSAs */ @@ -273,10 +269,10 @@ struct ospf_iface sock *sk; /* IP socket */ list neigh_list; /* List of neighbors (struct ospf_neighbor) */ u32 cost; /* Cost of iface */ - u32 waitint; /* number of sec before changing state from wait */ - u32 rxmtint; /* number of seconds between LSA retransmissions */ - u32 pollint; /* Poll interval */ - u32 deadint; /* after "deadint" missing hellos is router dead */ + u32 waitint; /* Number of seconds before changing state from wait */ + u32 rxmtint; /* Number of seconds between LSA retransmissions */ + u32 pollint; /* Poll interval in seconds */ + u32 deadint; /* After deadint seconds without hellos is router dead */ u32 iface_id; /* Interface ID (iface->index or new value for vlinks) */ u32 vid; /* ID of peer of virtual link */ ip_addr vip; /* IP of peer of virtual link */ @@ -287,8 +283,8 @@ struct ospf_iface interface. LSAs contained in the update */ u16 helloint; /* number of seconds between hello sending */ list *passwords; - u32 csn; /* Last used crypt seq number */ - bird_clock_t csn_use; /* Last time when packet with that CSN was sent */ + u32 csn; /* Last used crypt seq number */ + btime csn_use; /* Last time when packet with that CSN was sent */ ip_addr all_routers; /* Multicast (or broadcast) address for all routers */ ip_addr des_routers; /* Multicast (or NULL) address for designated routers */ ip_addr drip; /* Designated router IP */ @@ -458,14 +454,15 @@ struct ospf_neighbor /* Generic option flags */ -#define OPT_V6 0x01 /* OSPFv3, LSA relevant for IPv6 routing calculation */ -#define OPT_E 0x02 /* Related to AS-external LSAs */ -#define OPT_MC 0x04 /* Related to MOSPF, not used and obsolete */ -#define OPT_N 0x08 /* Related to NSSA */ -#define OPT_P 0x08 /* OSPFv2, flags P and N share position, see NSSA RFC */ -#define OPT_EA 0x10 /* OSPFv2, external attributes, not used and obsolete */ -#define OPT_R 0x10 /* OSPFv3, originator is active router */ -#define OPT_DC 0x20 /* Related to demand circuits, not used */ +#define OPT_V6 0x0001 /* OSPFv3, LSA relevant for IPv6 routing calculation */ +#define OPT_E 0x0002 /* Related to AS-external LSAs */ +#define OPT_MC 0x0004 /* Related to MOSPF, not used and obsolete */ +#define OPT_N 0x0008 /* Related to NSSA */ +#define OPT_P 0x0008 /* OSPFv2, flags P and N share position, see NSSA RFC */ +#define OPT_EA 0x0010 /* OSPFv2, external attributes, not used and obsolete */ +#define OPT_R 0x0010 /* OSPFv3, originator is active router */ +#define OPT_DC 0x0020 /* Related to demand circuits, not used */ +#define OPT_AF 0x0100 /* OSPFv3 Address Families (RFC 5838) */ /* Router-LSA VEB flags are are stored together with links (OSPFv2) or options (OSPFv3) */ #define OPT_RT_B (0x01 << 24) @@ -682,8 +679,8 @@ struct ospf_lsa_ext3 struct ospf_lsa_ext_local { - ip_addr ip, fwaddr; - int pxlen; + net_addr net; + ip_addr fwaddr; u32 metric, ebit, fbit, tag, propagate; u8 pxopts; }; @@ -721,73 +718,102 @@ lsa_net_count(struct ospf_lsa_header *lsa) /* In ospf_area->rtr we store paths to routers, but we use RID (and not IP address) as index, so we need to encapsulate RID to IP address */ -#define ipa_from_rid(x) ipa_from_u32(x) -#define ipa_to_rid(x) ipa_to_u32(x) +#define net_from_rid(x) NET_ADDR_IP4(ip4_from_u32(x), IP4_MAX_PREFIX_LENGTH) +#define rid_from_net(x) ip4_to_u32(((net_addr_ip4 *) x)->prefix) #define IPV6_PREFIX_SPACE(x) ((((x) + 63) / 32) * 4) #define IPV6_PREFIX_WORDS(x) (((x) + 63) / 32) -/* FIXME: these four functions should be significantly redesigned w.r.t. integration, - also should be named as ospf3_* instead of *_ipv6_* */ + +static inline int +ospf_valid_prefix(net_addr *n) +{ + /* + * In OSPFv2, prefix is stored as netmask; ip4_masklen() returns 255 for + * invalid one. But OSPFv3-AF may receive IPv4 net with 32 < pxlen < 128. + */ + uint max = (n->type == NET_IP4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; + return n->pxlen <= max; +} + +/* + * In OSPFv3-AF (RFC 5835), IPv4 address is encoded by just placing it in the + * first 32 bits of IPv6 address and setting remaining bits to zero. Likewise + * for IPv4 prefix, where remaining bits do not matter. We use following + * functions to convert between IPv4 and IPv4-in-IPv6 representations: + */ + +static inline ip4_addr ospf3_6to4(ip6_addr a) +{ return _MI4(_I0(a)); } + +static inline ip6_addr ospf3_4to6(ip4_addr a) +{ return _MI6(_I(a), 0, 0, 0); } + static inline u32 * -lsa_get_ipv6_prefix(u32 *buf, ip_addr *addr, int *pxlen, u8 *pxopts, u16 *rest) +ospf3_get_prefix(u32 *buf, int af, net_addr *n, u8 *pxopts, u16 *rest) { - u8 pxl = (*buf >> 24); - *pxopts = (*buf >> 16); - *rest = *buf; - *pxlen = pxl; + ip6_addr px = IP6_NONE; + uint pxlen = (*buf >> 24); + *pxopts = (*buf >> 16) & 0xff; + if (rest) *rest = *buf & 0xffff; buf++; - *addr = IPA_NONE; - -#ifdef IPV6 - if (pxl > 0) - _I0(*addr) = *buf++; - if (pxl > 32) - _I1(*addr) = *buf++; - if (pxl > 64) - _I2(*addr) = *buf++; - if (pxl > 96) - _I3(*addr) = *buf++; + if (pxlen > 0) + _I0(px) = *buf++; + if (pxlen > 32) + _I1(px) = *buf++; + if (pxlen > 64) + _I2(px) = *buf++; + if (pxlen > 96) + _I3(px) = *buf++; /* Clean up remaining bits */ - if (pxl < 128) - addr->addr[pxl / 32] &= u32_mkmask(pxl % 32); -#endif + if (pxlen < 128) + px.addr[pxlen / 32] &= u32_mkmask(pxlen % 32); + + if (af == NET_IP4) + net_fill_ip4(n, ospf3_6to4(px), pxlen); + else + net_fill_ip6(n, px, pxlen); return buf; } static inline u32 * -lsa_get_ipv6_addr(u32 *buf, ip_addr *addr) +ospf3_put_prefix(u32 *buf, net_addr *n, u8 pxopts, u16 rest) { - *addr = *(ip_addr *) buf; - return buf + 4; -} + ip6_addr px = (n->type == NET_IP4) ? ospf3_4to6(net4_prefix(n)) : net6_prefix(n); + uint pxlen = n->pxlen; -static inline u32 * -put_ipv6_prefix(u32 *buf, ip_addr addr UNUSED4, u8 pxlen UNUSED4, u8 pxopts UNUSED4, u16 lh UNUSED4) -{ -#ifdef IPV6 - *buf++ = ((pxlen << 24) | (pxopts << 16) | lh); + *buf++ = ((pxlen << 24) | (pxopts << 16) | rest); if (pxlen > 0) - *buf++ = _I0(addr); + *buf++ = _I0(px); if (pxlen > 32) - *buf++ = _I1(addr); + *buf++ = _I1(px); if (pxlen > 64) - *buf++ = _I2(addr); + *buf++ = _I2(px); if (pxlen > 96) - *buf++ = _I3(addr); -#endif + *buf++ = _I3(px); + return buf; } static inline u32 * -put_ipv6_addr(u32 *buf, ip_addr addr) +ospf3_get_addr(u32 *buf, int af, ip_addr *addr) { - *(ip_addr *) buf = addr; + ip6_addr a; + memcpy(&a, buf, 16); + *addr = (af == NET_IP4) ? ipa_from_ip4(ospf3_6to4(a)) : ipa_from_ip6(a); + return buf + 4; +} + +static inline u32 * +ospf3_put_addr(u32 *buf, ip_addr addr) +{ + ip6_addr a = ipa_is_ip4(addr) ? ospf3_4to6(ipa_to_ip4(addr)) : ipa_to_ip6(addr); + memcpy(buf, &a, 16); return buf + 4; } @@ -831,19 +857,24 @@ static inline void ospf_notify_net_lsa(struct ospf_iface *ifa) static inline void ospf_notify_link_lsa(struct ospf_iface *ifa) { ifa->update_link_lsa = 1; } - -#define ospf_is_v2(X) OSPF_IS_V2 -#define ospf_is_v3(X) (!OSPF_IS_V2) -/* static inline int ospf_is_v2(struct ospf_proto *p) { return p->ospf2; } static inline int ospf_is_v3(struct ospf_proto *p) { return ! p->ospf2; } -*/ -static inline int ospf_get_version(struct ospf_proto *p UNUSED4 UNUSED6) + +static inline int ospf_get_version(struct ospf_proto *p) { return ospf_is_v2(p) ? 2 : 3; } +static inline int ospf_is_ip4(struct ospf_proto *p) +{ return p->p.net_type == NET_IP4; } + +static inline int ospf_is_ip6(struct ospf_proto *p) +{ return p->p.net_type == NET_IP6; } + +static inline int ospf_get_af(struct ospf_proto *p) +{ return p->p.net_type; } + struct ospf_area *ospf_find_area(struct ospf_proto *p, u32 aid); static inline struct ospf_area *ospf_main_area(struct ospf_proto *p) @@ -925,7 +956,7 @@ static inline void ospf_send_to_des(struct ospf_iface *ifa) #define SKIP(DSC) do { err_dsc = DSC; goto skip; } while(0) #endif -static inline uint ospf_pkt_hdrlen(struct ospf_proto *p UNUSED4 UNUSED6) +static inline uint ospf_pkt_hdrlen(struct ospf_proto *p) { return ospf_is_v2(p) ? (sizeof(struct ospf_packet) + sizeof(union ospf_auth)) : sizeof(struct ospf_packet); } static inline void * ospf_tx_buffer(struct ospf_iface *ifa) diff --git a/proto/ospf/packet.c b/proto/ospf/packet.c index 6b6a97a4..38d7a75f 100644 --- a/proto/ospf/packet.c +++ b/proto/ospf/packet.c @@ -77,16 +77,16 @@ ospf_pkt_finalize(struct ospf_iface *ifa, struct ospf_packet *pkt, uint *plen) reboot when system does not have independent RTC? */ if (!ifa->csn) { - ifa->csn = (u32) now; - ifa->csn_use = now; + ifa->csn = (u32) (current_real_time() TO_S); + ifa->csn_use = current_time(); } /* We must have sufficient delay between sending a packet and increasing CSN to prevent reordering of packets (in a network) with different CSNs */ - if ((now - ifa->csn_use) > 1) + if ((current_time() - ifa->csn_use) > 1 S) ifa->csn++; - ifa->csn_use = now; + ifa->csn_use = current_time(); uint auth_len = mac_type_length(pass->alg); byte *auth_tail = ((byte *) pkt + *plen); @@ -203,7 +203,7 @@ drop: /** * ospf_rx_hook * @sk: socket we received the packet. - * @len: size of the packet + * @len: length of the packet * * This is the entry point for messages from neighbors. Many checks (like * authentication, checksums, size) are done before the packet is passed to @@ -231,7 +231,7 @@ ospf_rx_hook(sock *sk, uint len) return 1; int src_local, dst_local, dst_mcast; - src_local = ipa_in_net(sk->faddr, ifa->addr->prefix, ifa->addr->pxlen); + src_local = ipa_in_netX(sk->faddr, &ifa->addr->prefix); dst_local = ipa_equal(sk->laddr, ifa->addr->ip); dst_mcast = ipa_equal(sk->laddr, ifa->all_routers) || ipa_equal(sk->laddr, ifa->des_routers); @@ -270,9 +270,6 @@ ospf_rx_hook(sock *sk, uint len) if (pkt == NULL) DROP("bad IP header", len); - if (ifa->check_ttl && (sk->rcv_ttl < 255)) - DROP("wrong TTL", sk->rcv_ttl); - if (len < sizeof(struct ospf_packet)) DROP("too short", len); @@ -379,6 +376,10 @@ found: if (ipa_equal(sk->laddr, ifa->des_routers) && (ifa->sk_dr == 0)) return 1; + /* TTL check must be done after instance dispatch */ + if (ifa->check_ttl && (sk->rcv_ttl < 255)) + DROP("wrong TTL", sk->rcv_ttl); + if (rid == p->router_id) DROP1("my own router ID"); @@ -491,8 +492,8 @@ ospf_send_to_agt(struct ospf_iface *ifa, u8 state) void ospf_send_to_bdr(struct ospf_iface *ifa) { - if (ipa_nonzero(ifa->drip)) + if (ipa_nonzero2(ifa->drip)) ospf_send_to(ifa, ifa->drip); - if (ipa_nonzero(ifa->bdrip)) + if (ipa_nonzero2(ifa->bdrip)) ospf_send_to(ifa, ifa->bdrip); } diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 368e3d05..c0fe218a 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -10,9 +10,7 @@ #include "ospf.h" -static void add_cand(list * l, struct top_hash_entry *en, - struct top_hash_entry *par, u32 dist, - struct ospf_area *oa, int i); +static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, u32 dist, int i, uint lif, uint nif); static void rt_sync(struct ospf_proto *p); @@ -21,17 +19,8 @@ static inline void reset_ri(ort *ort) bzero(&ort->n, sizeof(orta)); } -void -ospf_rt_initort(struct fib_node *fn) -{ - ort *ri = (ort *) fn; - reset_ri(ri); - ri->old_rta = NULL; - ri->fn.flags = 0; -} - static inline int -nh_is_vlink(struct mpnh *nhs) +nh_is_vlink(struct nexthop *nhs) { return !nhs->iface; } @@ -42,20 +31,19 @@ unresolved_vlink(ort *ort) return ort->n.nhs && nh_is_vlink(ort->n.nhs); } -static inline struct mpnh * +static inline struct nexthop * new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight) { - struct mpnh *nh = lp_alloc(p->nhpool, sizeof(struct mpnh)); + struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop)); nh->gw = gw; nh->iface = iface; - nh->next = NULL; nh->weight = weight; return nh; } /* Returns true if there are device nexthops in n */ static inline int -has_device_nexthops(const struct mpnh *n) +has_device_nexthops(const struct nexthop *n) { for (; n; n = n->next) if (ipa_zero(n->gw)) @@ -65,13 +53,13 @@ has_device_nexthops(const struct mpnh *n) } /* Replace device nexthops with nexthops to gw */ -static struct mpnh * -fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw) +static struct nexthop * +fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw) { - struct mpnh *root1 = NULL; - struct mpnh *root2 = NULL; - struct mpnh **nn1 = &root1; - struct mpnh **nn2 = &root2; + struct nexthop *root1 = NULL; + struct nexthop *root2 = NULL; + struct nexthop **nn1 = &root1; + struct nexthop **nn2 = &root2; if (!p->ecmp) return new_nexthop(p, gw, n->iface, n->weight); @@ -82,7 +70,7 @@ fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw) for (; n; n = n->next) { - struct mpnh *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight); + struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight); if (ipa_zero(n->gw)) { @@ -96,7 +84,7 @@ fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw) } } - return mpnh_merge(root1, root2, 1, 1, p->ecmp, p->nhpool); + return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool); } @@ -292,7 +280,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new) if (old->nhs != new->nhs) { - old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, + old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, p->ecmp, p->nhpool); old->nhs_reuse = 1; } @@ -308,7 +296,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new) if (old->nhs != new->nhs) { - old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, + old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, p->ecmp, p->nhpool); old->nhs_reuse = 1; } @@ -334,9 +322,9 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new) static inline void -ri_install_net(struct ospf_proto *p, ip_addr prefix, int pxlen, const orta *new) +ri_install_net(struct ospf_proto *p, net_addr *net, const orta *new) { - ort *old = (ort *) fib_get(&p->rtf, &prefix, pxlen); + ort *old = fib_get(&p->rtf, net); int cmp = orta_compare(p, new, &old->n); if (cmp > 0) @@ -348,8 +336,8 @@ ri_install_net(struct ospf_proto *p, ip_addr prefix, int pxlen, const orta *new) static inline void ri_install_rt(struct ospf_area *oa, u32 rid, const orta *new) { - ip_addr addr = ipa_from_rid(rid); - ort *old = (ort *) fib_get(&oa->rtr, &addr, MAX_PREFIX_LENGTH); + net_addr_ip4 nrid = net_from_rid(rid); + ort *old = fib_get(&oa->rtr, (net_addr *) &nrid); int cmp = orta_compare(oa->po, new, &old->n); if (cmp > 0) @@ -359,17 +347,19 @@ ri_install_rt(struct ospf_area *oa, u32 rid, const orta *new) } static inline void -ri_install_asbr(struct ospf_proto *p, ip_addr *addr, const orta *new) +ri_install_asbr(struct ospf_proto *p, u32 rid, const orta *new) { - ort *old = (ort *) fib_get(&p->backbone->rtr, addr, MAX_PREFIX_LENGTH); + net_addr_ip4 nrid = net_from_rid(rid); + ort *old = fib_get(&p->backbone->rtr, (net_addr *) &nrid); + if (orta_compare_asbr(p, new, &old->n) > 0) ort_replace(old, new); } static inline void -ri_install_ext(struct ospf_proto *p, ip_addr prefix, int pxlen, const orta *new) +ri_install_ext(struct ospf_proto *p, net_addr *net, const orta *new) { - ort *old = (ort *) fib_get(&p->rtf, &prefix, pxlen); + ort *old = fib_get(&p->rtf, net); int cmp = orta_compare_ext(p, new, &old->n); if (cmp > 0) @@ -404,7 +394,7 @@ px_pos_to_ifa(struct ospf_area *oa, int pos) static void -add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_hash_entry *en, int pos) +add_network(struct ospf_area *oa, net_addr *net, int metric, struct top_hash_entry *en, int pos) { struct ospf_proto *p = oa->po; @@ -419,7 +409,7 @@ add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_ .nhs = en->nhs }; - if (pxlen < 0 || pxlen > MAX_PREFIX_LENGTH) + if (!ospf_valid_prefix(net)) { log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)", p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt); @@ -440,7 +430,7 @@ add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_ nf.nhs = ifa ? new_nexthop(p, IPA_NONE, ifa->iface, ifa->ecmp_weight) : NULL; } - ri_install_net(p, px, pxlen, &nf); + ri_install_net(p, net, &nf); } @@ -451,8 +441,7 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr struct ospf_lsa_rt *rt = act->lsa_body; struct ospf_lsa_rt_walk rtl; struct top_hash_entry *tmp; - ip_addr prefix; - int pxlen, i; + int i; if (rt->options & OPT_RT_V) oa->trcap = 1; @@ -502,9 +491,10 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr * the same result by handing them here because add_network() * will keep the best (not the first) found route. */ - prefix = ipa_from_u32(rtl.id & rtl.data); - pxlen = u32_masklen(rtl.data); - add_network(oa, prefix, pxlen, act->dist + rtl.metric, act, i); + net_addr_ip4 net = + NET_ADDR_IP4(ip4_from_u32(rtl.id & rtl.data), u32_masklen(rtl.data)); + + add_network(oa, (net_addr *) &net, act->dist + rtl.metric, act, i); break; case LSART_NET: @@ -517,7 +507,7 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr break; } - add_cand(&oa->cand, tmp, act, act->dist + rtl.metric, oa, i); + add_cand(oa, tmp, act, act->dist + rtl.metric, i, rtl.lif, rtl.nif); } } @@ -526,21 +516,21 @@ spfa_process_net(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_ent { struct ospf_lsa_net *ln = act->lsa_body; struct top_hash_entry *tmp; - ip_addr prefix; - int pxlen, i, cnt; + int i, cnt; if (ospf_is_v2(p)) { - prefix = ipa_from_u32(act->lsa.id & ln->optx); - pxlen = u32_masklen(ln->optx); - add_network(oa, prefix, pxlen, act->dist, act, -1); + net_addr_ip4 net = + NET_ADDR_IP4(ip4_from_u32(act->lsa.id & ln->optx), u32_masklen(ln->optx)); + + add_network(oa, (net_addr *) &net, act->dist, act, -1); } cnt = lsa_net_count(&act->lsa); for (i = 0; i < cnt; i++) { tmp = ospf_hash_find_rt(p->gr, oa->areaid, ln->routers[i]); - add_cand(&oa->cand, tmp, act, act->dist, oa, -1); + add_cand(oa, tmp, act, act->dist, -1, 0, 0); } } @@ -549,10 +539,6 @@ spfa_process_prefixes(struct ospf_proto *p, struct ospf_area *oa) { struct top_hash_entry *en, *src; struct ospf_lsa_prefix *px; - ip_addr pxa; - int pxlen; - u8 pxopts; - u16 metric; u32 *buf; int i; @@ -587,18 +573,22 @@ spfa_process_prefixes(struct ospf_proto *p, struct ospf_area *oa) buf = px->rest; for (i = 0; i < px->pxcount; i++) - { - buf = lsa_get_ipv6_prefix(buf, &pxa, &pxlen, &pxopts, &metric); + { + net_addr net; + u8 pxopts; + u16 metric; - if (pxopts & OPT_PX_NU) - continue; + buf = ospf3_get_prefix(buf, ospf_get_af(p), &net, &pxopts, &metric); - /* Store the first global address to use it later as a vlink endpoint */ - if ((pxopts & OPT_PX_LA) && ipa_zero(src->lb)) - src->lb = pxa; + if (pxopts & OPT_PX_NU) + continue; - add_network(oa, pxa, pxlen, src->dist + metric, src, i); - } + /* Store the first global address to use it later as a vlink endpoint */ + if ((pxopts & OPT_PX_LA) && (net.type == NET_IP6) && ipa_zero(src->lb)) + src->lb = ipa_from_ip6(net6_prefix(&net)); + + add_network(oa, &net, src->dist + metric, src, i); + } } } @@ -659,7 +649,8 @@ ospf_rt_spfa(struct ospf_area *oa) } static int -link_back(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par) +link_back(struct ospf_area *oa, struct top_hash_entry *en, + struct top_hash_entry *par, uint lif, uint nif) { struct ospf_proto *p = oa->po; struct ospf_lsa_rt_walk rtl; @@ -697,6 +688,10 @@ link_back(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry tmp = ospf_hash_find_net(p->gr, oa->areaid, rtl.id, rtl.nif); if (tmp == par) { + /* + * Note that there may be multiple matching Rt-fields if router 'en' + * have multiple interfaces to net 'par'. Perhaps we should do ECMP. + */ if (ospf_is_v2(p)) en->lb = ipa_from_u32(rtl.data); else @@ -708,7 +703,13 @@ link_back(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry case LSART_VLNK: case LSART_PTP: - /* Not necessary the same link, see RFC 2328 [23] */ + /* + * For OSPFv2, not necessary the same link, see RFC 2328 [23]. + * For OSPFv3, we verify that by comparing nif and lif fields. + */ + if (ospf_is_v3(p) && ((rtl.lif != nif) || (rtl.nif != lif))) + break; + tmp = ospf_hash_find_rt(p->gr, oa->areaid, rtl.id); if (tmp == par) return 1; @@ -741,13 +742,12 @@ ospf_rt_sum(struct ospf_area *oa) { struct ospf_proto *p = oa->po; struct top_hash_entry *en; - ip_addr ip, abrip; + net_addr net; u32 dst_rid, metric, options; ort *abr; - int pxlen = -1, type = -1; + int type; u8 pxopts; - OSPF_TRACE(D_EVENTS, "Starting routing table calculation for inter-area (area %R)", oa->areaid); WALK_SLIST(en, p->lsal) @@ -770,18 +770,18 @@ ospf_rt_sum(struct ospf_area *oa) if (en->lsa_type == LSA_T_SUM_NET) { - lsa_parse_sum_net(en, ospf_is_v2(p), &ip, &pxlen, &pxopts, &metric); - - if (pxopts & OPT_PX_NU) - continue; + lsa_parse_sum_net(en, ospf_is_v2(p), ospf_get_af(p), &net, &pxopts, &metric); - if (pxlen < 0 || pxlen > MAX_PREFIX_LENGTH) + if (!ospf_valid_prefix(&net)) { log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)", p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt); continue; } + if (pxopts & OPT_PX_NU) + continue; + options = 0; type = ORT_NET; } @@ -802,8 +802,8 @@ ospf_rt_sum(struct ospf_area *oa) continue; /* 16.2. (4) */ - abrip = ipa_from_rid(en->lsa.rt); - abr = (ort *) fib_find(&oa->rtr, &abrip, MAX_PREFIX_LENGTH); + net_addr_ip4 nrid = net_from_rid(en->lsa.rt); + abr = fib_find(&oa->rtr, (net_addr *) &nrid); if (!abr || !abr->n.type) continue; @@ -827,7 +827,7 @@ ospf_rt_sum(struct ospf_area *oa) }; if (type == ORT_NET) - ri_install_net(p, ip, pxlen, &nf); + ri_install_net(p, &net, &nf); else ri_install_rt(oa, dst_rid, &nf); } @@ -841,11 +841,7 @@ ospf_rt_sum_tr(struct ospf_area *oa) struct ospf_area *bb = p->backbone; struct top_hash_entry *en; ort *re, *abr; - ip_addr ip, abrip; - u32 dst_rid, metric, options; - int pxlen; - u8 pxopts; - + u32 metric; if (!bb) return; @@ -868,26 +864,31 @@ ospf_rt_sum_tr(struct ospf_area *oa) if (en->lsa_type == LSA_T_SUM_NET) { - lsa_parse_sum_net(en, ospf_is_v2(p), &ip, &pxlen, &pxopts, &metric); + net_addr net; + u8 pxopts; - if (pxopts & OPT_PX_NU) - continue; + lsa_parse_sum_net(en, ospf_is_v2(p), ospf_get_af(p), &net, &pxopts, &metric); - if (pxlen < 0 || pxlen > MAX_PREFIX_LENGTH) + if (!ospf_valid_prefix(&net)) { log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)", p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt); continue; } - re = fib_find(&p->rtf, &ip, pxlen); + if (pxopts & OPT_PX_NU) + continue; + + re = fib_find(&p->rtf, &net); } else // en->lsa_type == LSA_T_SUM_RT { + u32 dst_rid, options; + lsa_parse_sum_rt(en, ospf_is_v2(p), &dst_rid, &metric, &options); - ip = ipa_from_rid(dst_rid); - re = fib_find(&bb->rtr, &ip, MAX_PREFIX_LENGTH); + net_addr_ip4 nrid = net_from_rid(dst_rid); + re = fib_find(&bb->rtr, (net_addr *) &nrid); } /* 16.3 (1b) */ @@ -905,8 +906,8 @@ ospf_rt_sum_tr(struct ospf_area *oa) continue; /* 16.3. (4) */ - abrip = ipa_from_rid(en->lsa.rt); - abr = fib_find(&oa->rtr, &abrip, MAX_PREFIX_LENGTH); + net_addr_ip4 nrid = net_from_rid(en->lsa.rt); + abr = fib_find(&oa->rtr, (net_addr *) &nrid); if (!abr || !abr->n.type) continue; @@ -997,7 +998,7 @@ decide_sum_lsa(struct ospf_area *oa, ort *nf, int dest) return 1; struct area_net *anet = (struct area_net *) - fib_route(&nf->n.oa->net_fib, nf->fn.prefix, nf->fn.pxlen); + fib_route(&nf->n.oa->net_fib, nf->fn.addr); /* Condensed area network found */ if (anet) @@ -1016,13 +1017,13 @@ check_sum_net_lsa(struct ospf_proto *p, ort *nf) if (nf->area_net) { /* It is a default route for stub areas, handled entirely in ospf_rt_abr() */ - if (nf->fn.pxlen == 0) + if (nf->fn.addr->pxlen == 0) return; /* Find that area network */ WALK_LIST(anet_oa, p->area_list) { - anet = (struct area_net *) fib_find(&anet_oa->net_fib, &nf->fn.prefix, nf->fn.pxlen); + anet = fib_find(&anet_oa->net_fib, nf->fn.addr); if (anet) break; } @@ -1041,14 +1042,16 @@ check_sum_net_lsa(struct ospf_proto *p, ort *nf) static inline void check_sum_rt_lsa(struct ospf_proto *p, ort *nf) { + u32 rid = rid_from_net(nf->fn.addr); + struct ospf_area *oa; WALK_LIST(oa, p->area_list) if (decide_sum_lsa(oa, nf, ORT_ROUTER)) - ospf_originate_sum_rt_lsa(p, oa, nf, nf->n.metric1, nf->n.options); + ospf_originate_sum_rt_lsa(p, oa, rid, nf->n.metric1, nf->n.options); } static inline int -decide_nssa_lsa(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf, struct ospf_lsa_ext_local *rt) +decide_nssa_lsa(struct ospf_proto *p, ort *nf, struct ospf_lsa_ext_local *rt) { struct ospf_area *oa = nf->n.oa; struct top_hash_entry *en = nf->n.en; @@ -1057,14 +1060,14 @@ decide_nssa_lsa(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf, struct ospf_lsa_e return 0; /* Condensed area network found */ - if (fib_route(&oa->enet_fib, nf->fn.prefix, nf->fn.pxlen)) + if (fib_route(&oa->enet_fib, nf->fn.addr)) return 0; if (!en || (en->lsa_type != LSA_T_NSSA)) return 0; /* We do not store needed data in struct orta, we have to parse the LSA */ - lsa_parse_ext(en, ospf_is_v2(p), rt); + lsa_parse_ext(en, ospf_is_v2(p), ospf_get_af(p), rt); if (rt->pxopts & OPT_PX_NU) return 0; @@ -1092,7 +1095,7 @@ check_nssa_lsa(struct ospf_proto *p, ort *nf) /* Find that area network */ WALK_LIST(oa, p->area_list) { - anet = (struct area_net *) fib_find(&oa->enet_fib, &nf->fn.prefix, nf->fn.pxlen); + anet = fib_find(&oa->enet_fib, nf->fn.addr); if (anet) break; } @@ -1162,24 +1165,20 @@ static void ospf_rt_abr1(struct ospf_proto *p) { struct area_net *anet; - ort *nf, *default_nf; + ort *default_nf; + net_addr default_net; /* RFC 2328 G.3 - incomplete resolution of virtual next hops - routers */ - FIB_WALK(&p->backbone->rtr, nftmp) + FIB_WALK(&p->backbone->rtr, ort, nf) { - nf = (ort *) nftmp; - if (nf->n.type && unresolved_vlink(nf)) reset_ri(nf); } FIB_WALK_END; - FIB_WALK(&p->rtf, nftmp) + FIB_WALK(&p->rtf, ort, nf) { - nf = (ort *) nftmp; - - /* RFC 2328 G.3 - incomplete resolution of virtual next hops - networks */ if (nf->n.type && unresolved_vlink(nf)) reset_ri(nf); @@ -1188,7 +1187,7 @@ ospf_rt_abr1(struct ospf_proto *p) /* Compute condensed area networks */ if (nf->n.type == RTS_OSPF) { - anet = (struct area_net *) fib_route(&nf->n.oa->net_fib, nf->fn.prefix, nf->fn.pxlen); + anet = (struct area_net *) fib_route(&nf->n.oa->net_fib, nf->fn.addr); if (anet) { if (!anet->active) @@ -1196,7 +1195,7 @@ ospf_rt_abr1(struct ospf_proto *p) anet->active = 1; /* Get a RT entry and mark it to know that it is an area network */ - ort *nfi = (ort *) fib_get(&p->rtf, &anet->fn.prefix, anet->fn.pxlen); + ort *nfi = fib_get(&p->rtf, anet->fn.addr); nfi->area_net = 1; /* 16.2. (3) */ @@ -1211,8 +1210,13 @@ ospf_rt_abr1(struct ospf_proto *p) } FIB_WALK_END; - ip_addr addr = IPA_NONE; - default_nf = (ort *) fib_get(&p->rtf, &addr, 0); + + if (ospf_is_v2(p)) + net_fill_ip4(&default_net, IP4_NONE, 0); + else + net_fill_ip6(&default_net, IP6_NONE, 0); + + default_nf = fib_get(&p->rtf, &default_net); default_nf->area_net = 1; struct ospf_area *oa; @@ -1239,11 +1243,10 @@ ospf_rt_abr1(struct ospf_proto *p) /* RFC 2328 16.4. (3) - precompute preferred ASBR entries */ if (oa_is_ext(oa)) { - FIB_WALK(&oa->rtr, nftmp) + FIB_WALK(&oa->rtr, ort, nf) { - nf = (ort *) nftmp; if (nf->n.options & ORTA_ASBR) - ri_install_asbr(p, &nf->fn.prefix, &nf->n); + ri_install_asbr(p, rid_from_net(nf->fn.addr), &nf->n); } FIB_WALK_END; } @@ -1251,9 +1254,9 @@ ospf_rt_abr1(struct ospf_proto *p) /* Originate or flush ASBR summary LSAs */ - FIB_WALK(&p->backbone->rtr, nftmp) + FIB_WALK(&p->backbone->rtr, ort, nf) { - check_sum_rt_lsa(p, (ort *) nftmp); + check_sum_rt_lsa(p, nf); } FIB_WALK_END; @@ -1280,8 +1283,6 @@ ospf_rt_abr2(struct ospf_proto *p) { struct ospf_area *oa; struct top_hash_entry *en; - ort *nf, *nf2; - /* RFC 3103 3.1 - type-7 translator election */ struct ospf_area *bb = p->backbone; @@ -1293,13 +1294,12 @@ ospf_rt_abr2(struct ospf_proto *p) if (oa->ac->translator) goto decided; - FIB_WALK(&oa->rtr, nftmp) + FIB_WALK(&oa->rtr, ort, nf) { - nf = (ort *) nftmp; if (!nf->n.type || !(nf->n.options & ORTA_ABR)) continue; - nf2 = fib_find(&bb->rtr, &nf->fn.prefix, MAX_PREFIX_LENGTH); + ort *nf2 = fib_find(&bb->rtr, nf->fn.addr); if (!nf2 || !nf2->n.type || !(nf2->n.options & ORTA_ABR)) continue; @@ -1329,23 +1329,21 @@ ospf_rt_abr2(struct ospf_proto *p) if (!translate && (oa->translate == TRANS_ON)) { if (oa->translator_timer == NULL) - oa->translator_timer = tm_new_set(p->p.pool, translator_timer_hook, oa, 0, 0); + oa->translator_timer = tm_new_init(p->p.pool, translator_timer_hook, oa, 0, 0); /* Schedule the end of translation */ - tm_start(oa->translator_timer, oa->ac->transint); + tm_start(oa->translator_timer, oa->ac->transint S); oa->translate = TRANS_WAIT; } } /* Compute condensed external networks */ - FIB_WALK(&p->rtf, nftmp) + FIB_WALK(&p->rtf, ort, nf) { - nf = (ort *) nftmp; if (rt_is_nssa(nf) && (nf->n.options & ORTA_PROP)) { - struct area_net *anet = (struct area_net *) - fib_route(&nf->n.oa->enet_fib, nf->fn.prefix, nf->fn.pxlen); + struct area_net *anet = fib_route(&nf->n.oa->enet_fib, nf->fn.addr); if (anet) { @@ -1354,7 +1352,7 @@ ospf_rt_abr2(struct ospf_proto *p) anet->active = 1; /* Get a RT entry and mark it to know that it is an area network */ - nf2 = (ort *) fib_get(&p->rtf, &anet->fn.prefix, anet->fn.pxlen); + ort *nf2 = fib_get(&p->rtf, anet->fn.addr); nf2->area_net = 1; } @@ -1369,10 +1367,8 @@ ospf_rt_abr2(struct ospf_proto *p) FIB_WALK_END; - FIB_WALK(&p->rtf, nftmp) + FIB_WALK(&p->rtf, ort, nf) { - nf = (ort *) nftmp; - check_sum_net_lsa(p, nf); check_nssa_lsa(p, nf); } @@ -1382,22 +1378,57 @@ ospf_rt_abr2(struct ospf_proto *p) /* Like fib_route(), but ignores dummy rt entries */ static void * -ospf_fib_route(struct fib *f, ip_addr a, int len) +ospf_fib_route_ip4(struct fib *f, ip4_addr a, int len) +{ + net_addr_ip4 net = NET_ADDR_IP4(a, len); + ort *nf; + +loop: + nf = fib_find(f, (net_addr *) &net); + if (nf && nf->n.type) + return nf; + + if (net.pxlen > 0) + { + net.pxlen--; + ip4_clrbit(&net.prefix, net.pxlen); + goto loop; + } + + return NULL; +} + +static void * +ospf_fib_route_ip6(struct fib *f, ip6_addr a, int len) { - ip_addr a0; + net_addr_ip6 net = NET_ADDR_IP6(a, len); ort *nf; - while (len >= 0) +loop: + nf = fib_find(f, (net_addr *) &net); + if (nf && nf->n.type) + return nf; + + if (net.pxlen > 0) { - a0 = ipa_and(a, ipa_mkmask(len)); - nf = fib_find(f, &a0, len); - if (nf && nf->n.type) - return nf; - len--; + net.pxlen--; + ip6_clrbit(&net.prefix, net.pxlen); + goto loop; } + return NULL; } +static void * +ospf_fib_route(struct fib *f, ip_addr a) +{ + if (f->addr_type == NET_IP4) + return ospf_fib_route_ip4(f, ipa_to_ip4(a), IP4_MAX_PREFIX_LENGTH); + else + return ospf_fib_route_ip6(f, ipa_to_ip6(a), IP6_MAX_PREFIX_LENGTH); +} + + /* RFC 2328 16.4. calculating external routes */ static void ospf_ext_spf(struct ospf_proto *p) @@ -1405,7 +1436,6 @@ ospf_ext_spf(struct ospf_proto *p) struct top_hash_entry *en; struct ospf_lsa_ext_local rt; ort *nf1, *nf2; - ip_addr rtid; u32 br_metric; struct ospf_area *atmp; @@ -1429,21 +1459,20 @@ ospf_ext_spf(struct ospf_proto *p) DBG("%s: Working on LSA. ID: %R, RT: %R, Type: %u\n", p->p.name, en->lsa.id, en->lsa.rt, en->lsa_type); - lsa_parse_ext(en, ospf_is_v2(p), &rt); - - if (rt.metric == LSINFINITY) - continue; + lsa_parse_ext(en, ospf_is_v2(p), ospf_get_af(p), &rt); - if (rt.pxopts & OPT_PX_NU) - continue; - - if (rt.pxlen < 0 || rt.pxlen > MAX_PREFIX_LENGTH) + if (!ospf_valid_prefix(&rt.net)) { log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)", p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt); continue; } + if (rt.metric == LSINFINITY) + continue; + + if (rt.pxopts & OPT_PX_NU) + continue; /* 16.4. (3) */ /* If there are more areas, we already precomputed preferred ASBR @@ -1457,8 +1486,8 @@ ospf_ext_spf(struct ospf_proto *p) if (!atmp) continue; /* Should not happen */ - rtid = ipa_from_rid(en->lsa.rt); - nf1 = fib_find(&atmp->rtr, &rtid, MAX_PREFIX_LENGTH); + net_addr_ip4 nrid = net_from_rid(en->lsa.rt); + nf1 = fib_find(&atmp->rtr, (net_addr *) &nrid); if (!nf1 || !nf1->n.type) continue; /* No AS boundary router found */ @@ -1468,7 +1497,7 @@ ospf_ext_spf(struct ospf_proto *p) /* 16.4. (3) NSSA - special rule for default routes */ /* ABR should use default only if P-bit is set and summaries are active */ - if ((en->lsa_type == LSA_T_NSSA) && ipa_zero(rt.ip) && (rt.pxlen == 0) && + if ((en->lsa_type == LSA_T_NSSA) && (rt.net.pxlen == 0) && (p->areano > 1) && !(rt.propagate && atmp->ac->summary)) continue; @@ -1480,7 +1509,7 @@ ospf_ext_spf(struct ospf_proto *p) } else { - nf2 = ospf_fib_route(&p->rtf, rt.fwaddr, MAX_PREFIX_LENGTH); + nf2 = ospf_fib_route(&p->rtf, rt.fwaddr); if (!nf2) continue; @@ -1542,7 +1571,7 @@ ospf_ext_spf(struct ospf_proto *p) nfa.oa = atmp; /* undefined in RFC 2328 */ nfa.en = en; /* store LSA for later (NSSA processing) */ - ri_install_ext(p, rt.ip, rt.pxlen, &nfa); + ri_install_ext(p, &rt.net, &nfa); } } @@ -1552,13 +1581,10 @@ ospf_rt_reset(struct ospf_proto *p) { struct ospf_area *oa; struct top_hash_entry *en; - struct area_net *anet; - ort *ri; /* Reset old routing table */ - FIB_WALK(&p->rtf, nftmp) + FIB_WALK(&p->rtf, ort, ri) { - ri = (ort *) nftmp; ri->area_net = 0; ri->keep = 0; reset_ri(ri); @@ -1580,9 +1606,8 @@ ospf_rt_reset(struct ospf_proto *p) WALK_LIST(oa, p->area_list) { /* Reset ASBR routing tables */ - FIB_WALK(&oa->rtr, nftmp) + FIB_WALK(&oa->rtr, ort, ri) { - ri = (ort *) nftmp; reset_ri(ri); } FIB_WALK_END; @@ -1590,17 +1615,15 @@ ospf_rt_reset(struct ospf_proto *p) /* Reset condensed area networks */ if (p->areano > 1) { - FIB_WALK(&oa->net_fib, nftmp) + FIB_WALK(&oa->net_fib, struct area_net, anet) { - anet = (struct area_net *) nftmp; anet->active = 0; anet->metric = 0; } FIB_WALK_END; - FIB_WALK(&oa->enet_fib, nftmp) + FIB_WALK(&oa->enet_fib, struct area_net, anet) { - anet = (struct area_net *) nftmp; anet->active = 0; anet->metric = 0; } @@ -1659,19 +1682,33 @@ ospf_rt_spf(struct ospf_proto *p) static inline int -inherit_nexthops(struct mpnh *pn) +inherit_nexthops(struct nexthop *pn) { /* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */ return pn && (ipa_nonzero(pn->gw) || !pn->iface); } -static struct mpnh * +static inline ip_addr +link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en) +{ + struct ospf_lsa_link *link_lsa = en->lsa_body; + ip6_addr ll = link_lsa->lladdr; + + if (ip6_zero(ll)) + return IPA_NONE; + + return ospf_is_ip4(p) ? ipa_from_ip4(ospf3_6to4(ll)) : ipa_from_ip6(ll); +} + +static struct nexthop * calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, - struct top_hash_entry *par, int pos) + struct top_hash_entry *par, int pos, uint lif, uint nif) { struct ospf_proto *p = oa->po; - struct mpnh *pn = par->nhs; - struct ospf_iface *ifa; + struct nexthop *pn = par->nhs; + struct top_hash_entry *link = NULL; + struct ospf_iface *ifa = NULL; + ip_addr nh = IPA_NONE; u32 rid = en->lsa.rt; /* 16.1.1. The next hop calculation */ @@ -1696,6 +1733,9 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (!ifa) return NULL; + if (ospf_is_v3(p) && (ifa->iface_id != lif)) + log(L_WARN "%s: Inconsistent interface ID %u/%u", p->p.name, ifa->iface_id, lif); + return new_nexthop(p, IPA_NONE, ifa->iface, ifa->ecmp_weight); } @@ -1706,14 +1746,44 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (!ifa) return NULL; + if (ospf_is_v3(p) && (ifa->iface_id != lif)) + log(L_WARN "%s: Inconsistent interface ID %u/%u", p->p.name, ifa->iface_id, lif); + if (ifa->type == OSPF_IT_VLINK) return new_nexthop(p, IPA_NONE, NULL, 0); - struct ospf_neighbor *m = find_neigh(ifa, rid); - if (!m || (m->state != NEIGHBOR_FULL)) - return NULL; + /* FIXME: On physical PtP links we may skip next-hop altogether */ + + if (ospf_is_v2(p) || ospf_is_ip6(p)) + { + /* + * In this case, next-hop is a source address from neighbor's packets. + * That is necessary for OSPFv2 and practical for OSPFv3 (as it works even + * if neighbor uses LinkLSASuppression), but does not work with OSPFv3-AF + * on IPv4 topology, where src is IPv6 but next-hop should be IPv4. + */ + struct ospf_neighbor *m = find_neigh(ifa, rid); + if (!m || (m->state != NEIGHBOR_FULL)) + return NULL; + + nh = m->ip; + } + else + { + /* + * Next-hop is taken from lladdr field of Link-LSA, based on Neighbor + * Iface ID (nif) field in our Router-LSA, which is just nbr->iface_id. + */ + link = ospf_hash_find(p->gr, ifa->iface_id, nif, rid, LSA_T_LINK); + if (!link) + return NULL; + + nh = link_lsa_lladdr(p, link); + if (ipa_zero(nh)) + return NULL; + } - return new_nexthop(p, m->ip, ifa->iface, ifa->ecmp_weight); + return new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight); } /* The third case - bcast or nbma neighbor */ @@ -1740,18 +1810,15 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, * Next-hop is taken from lladdr field of Link-LSA, en->lb_id * is computed in link_back(). */ - struct top_hash_entry *lhe; - lhe = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK); - - if (!lhe) + link = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK); + if (!link) return NULL; - struct ospf_lsa_link *llsa = lhe->lsa_body; - - if (ip6_zero(llsa->lladdr)) + nh = link_lsa_lladdr(p, link); + if (ipa_zero(nh)) return NULL; - return new_nexthop(p, ipa_from_ip6(llsa->lladdr), pn->iface, pn->weight); + return new_nexthop(p, nh, pn->iface, pn->weight); } } @@ -1764,8 +1831,8 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, /* Add LSA into list of candidates in Dijkstra's algorithm */ static void -add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, - u32 dist, struct ospf_area *oa, int pos) +add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, + u32 dist, int pos, uint lif, uint nif) { struct ospf_proto *p = oa->po; node *prev, *n; @@ -1778,9 +1845,9 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, if (en->lsa.age == LSA_MAXAGE) return; - if (ospf_is_v3(p) && (en->lsa_type == LSA_T_RT)) + if (ospf_is_v3(p) && (oa->options & OPT_V6) && (en->lsa_type == LSA_T_RT)) { - /* In OSPFv3, check V6 flag */ + /* In OSPFv3 IPv6 unicast, check V6 flag */ struct ospf_lsa_rt *rt = en->lsa_body; if (!(rt->options & OPT_V6)) return; @@ -1795,10 +1862,10 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, return; /* We should check whether there is a reverse link from en to par, */ - if (!link_back(oa, en, par)) + if (!link_back(oa, en, par, lif, nif)) return; - struct mpnh *nhs = calc_next_hop(oa, en, par, pos); + struct nexthop *nhs = calc_next_hop(oa, en, par, pos, lif, nif); if (!nhs) { log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)", @@ -1836,7 +1903,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, /* Merge old and new */ int new_reuse = (par->nhs != nhs); - en->nhs = mpnh_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool); + en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool); en->nhs_reuse = 1; return; } @@ -1855,20 +1922,20 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, prev = NULL; - if (EMPTY_LIST(*l)) + if (EMPTY_LIST(oa->cand)) { - add_head(l, &en->cn); + add_head(&oa->cand, &en->cn); } else { - WALK_LIST(n, *l) + WALK_LIST(n, oa->cand) { act = SKIP_BACK(struct top_hash_entry, cn, n); if ((act->dist > dist) || ((act->dist == dist) && (act->lsa_type == LSA_T_RT))) { if (prev == NULL) - add_head(l, &en->cn); + add_head(&oa->cand, &en->cn); else insert_node(&en->cn, prev); added = 1; @@ -1879,7 +1946,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, if (!added) { - add_tail(l, &en->cn); + add_tail(&oa->cand, &en->cn); } } } @@ -1892,8 +1959,7 @@ ort_changed(ort *nf, rta *nr) (nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) || (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) || (nr->source != or->source) || (nr->dest != or->dest) || - (nr->iface != or->iface) || !ipa_equal(nr->gw, or->gw) || - !mpnh_same(nr->nexthops, or->nexthops); + !nexthop_same(&(nr->nh), &(or->nh)); } static void @@ -1902,25 +1968,22 @@ rt_sync(struct ospf_proto *p) struct top_hash_entry *en; struct fib_iterator fit; struct fib *fib = &p->rtf; - ort *nf; struct ospf_area *oa; /* This is used for forced reload of routes */ int reload = (p->calcrt == 2); - OSPF_TRACE(D_EVENTS, "Starting routing table synchronisation"); + OSPF_TRACE(D_EVENTS, "Starting routing table synchronization"); DBG("Now syncing my rt table with nest's\n"); FIB_ITERATE_INIT(&fit, fib); again1: - FIB_ITERATE_START(fib, &fit, nftmp) + FIB_ITERATE_START(fib, &fit, ort, nf) { - nf = (ort *) nftmp; - /* Sanity check of next-hop addresses, failure should not happen */ if (nf->n.type) { - struct mpnh *nh; + struct nexthop *nh; for (nh = nf->n.nhs; nh; nh = nh->next) if (ipa_nonzero(nh->gw)) { @@ -1943,29 +2006,12 @@ again1: .src = p->p.main_source, .source = nf->n.type, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST + .dest = RTD_UNICAST, + .nh = *(nf->n.nhs), }; - if (nf->n.nhs->next) - { - a0.dest = RTD_MULTIPATH; - a0.nexthops = nf->n.nhs; - } - else if (ipa_nonzero(nf->n.nhs->gw)) - { - a0.dest = RTD_ROUTER; - a0.iface = nf->n.nhs->iface; - a0.gw = nf->n.nhs->gw; - } - else - { - a0.dest = RTD_DEVICE; - a0.iface = nf->n.nhs->iface; - } - if (reload || ort_changed(nf, &a0)) { - net *ne = net_get(p->p.table, nf->fn.prefix, nf->fn.pxlen); rta *a = rta_lookup(&a0); rte *e = rte_get_temp(a); @@ -1976,12 +2022,10 @@ again1: e->u.ospf.tag = nf->old_tag = nf->n.tag; e->u.ospf.router_id = nf->old_rid = nf->n.rid; e->pflags = 0; - e->net = ne; - e->pref = p->p.preference; - DBG("Mod rte type %d - %I/%d via %I on iface %s, met %d\n", - a0.source, nf->fn.prefix, nf->fn.pxlen, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1); - rte_update(&p->p, ne, e); + DBG("Mod rte type %d - %N via %I on iface %s, met %d\n", + a0.source, nf->fn.addr, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1); + rte_update(&p->p, nf->fn.addr, e); } } else if (nf->old_rta) @@ -1990,19 +2034,21 @@ again1: rta_free(nf->old_rta); nf->old_rta = NULL; - net *ne = net_get(p->p.table, nf->fn.prefix, nf->fn.pxlen); - rte_update(&p->p, ne, NULL); + rte_update(&p->p, nf->fn.addr, NULL); } /* Remove unused rt entry, some special entries are persistent */ if (!nf->n.type && !nf->external_rte && !nf->area_net && !nf->keep) { - FIB_ITERATE_PUT(&fit, nftmp); - fib_delete(fib, nftmp); + if (nf->lsa_id) + idm_free(&p->idm, nf->lsa_id); + + FIB_ITERATE_PUT(&fit); + fib_delete(fib, nf); goto again1; } } - FIB_ITERATE_END(nftmp); + FIB_ITERATE_END; WALK_LIST(oa, p->area_list) @@ -2010,18 +2056,16 @@ again1: /* Cleanup ASBR hash tables */ FIB_ITERATE_INIT(&fit, &oa->rtr); again2: - FIB_ITERATE_START(&oa->rtr, &fit, nftmp) + FIB_ITERATE_START(&oa->rtr, &fit, ort, nf) { - nf = (ort *) nftmp; - if (!nf->n.type) { - FIB_ITERATE_PUT(&fit, nftmp); - fib_delete(&oa->rtr, nftmp); + FIB_ITERATE_PUT(&fit); + fib_delete(&oa->rtr, nf); goto again2; } } - FIB_ITERATE_END(nftmp); + FIB_ITERATE_END; } /* Cleanup stale LSAs */ diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h index 73b28375..589d2bc5 100644 --- a/proto/ospf/rt.h +++ b/proto/ospf/rt.h @@ -53,7 +53,7 @@ typedef struct orta struct ospf_area *oa; struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(), NULL otherwise */ - struct mpnh *nhs; /* Next hops computed during SPF */ + struct nexthop *nhs; /* Next hops computed during SPF */ struct top_hash_entry *en; /* LSA responsible for this orta */ } orta; @@ -78,13 +78,15 @@ typedef struct ort * route was not in the last update, in that case other old_* values are not * valid. */ - struct fib_node fn; orta n; u32 old_metric1, old_metric2, old_tag, old_rid; rta *old_rta; + u32 lsa_id; u8 external_rte; u8 area_net; u8 keep; + + struct fib_node fn; } ort; diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index 341eff87..717c8280 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -70,7 +70,7 @@ ospf_install_lsa(struct ospf_proto *p, struct ospf_lsa_header *lsa, u32 type, u3 en->lsa_body = body; en->lsa = *lsa; en->init_age = en->lsa.age; - en->inst_time = now; + en->inst_time = current_time(); /* * We do not set en->mode. It is either default LSA_M_BASIC, or in a special @@ -128,7 +128,7 @@ ospf_advance_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_ls en->lsa.sn = lsa->sn + 1; en->lsa.age = 0; en->init_age = 0; - en->inst_time = now; + en->inst_time = current_time(); lsa_generate_checksum(&en->lsa, en->lsa_body); OSPF_TRACE(D_EVENTS, "Advancing LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x", @@ -160,7 +160,7 @@ ospf_advance_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_ls en->lsa = *lsa; en->lsa.age = LSA_MAXAGE; en->init_age = lsa->age; - en->inst_time = now; + en->inst_time = current_time(); OSPF_TRACE(D_EVENTS, "Resetting LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x", en->lsa_type, en->lsa.id, en->lsa.rt, en->lsa.sn); @@ -196,7 +196,7 @@ static int ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa_body, u16 lsa_blen, u16 lsa_opts) { /* Enforce MinLSInterval */ - if ((en->init_age == 0) && en->inst_time && ((en->inst_time + MINLSINTERVAL) > now)) + if (!en->init_age && en->inst_time && (lsa_inst_age(en) < MINLSINTERVAL)) return 0; /* Handle wrapping sequence number */ @@ -237,7 +237,7 @@ ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa en->lsa.sn++; en->lsa.age = 0; en->init_age = 0; - en->inst_time = now; + en->inst_time = current_time(); lsa_generate_checksum(&en->lsa, en->lsa_body); OSPF_TRACE(D_EVENTS, "Originating LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x", @@ -283,8 +283,8 @@ ospf_originate_lsa(struct ospf_proto *p, struct ospf_new_lsa *lsa) if (en->nf != lsa->nf) { - log(L_ERR "%s: LSA ID collision for %I/%d", - p->p.name, lsa->nf->fn.prefix, lsa->nf->fn.pxlen); + log(L_ERR "%s: LSA ID collision for %N", + p->p.name, lsa->nf->fn.addr); en = NULL; goto drop; @@ -381,7 +381,7 @@ ospf_refresh_lsa(struct ospf_proto *p, struct top_hash_entry *en) en->lsa.sn++; en->lsa.age = 0; en->init_age = 0; - en->inst_time = now; + en->inst_time = current_time(); lsa_generate_checksum(&en->lsa, en->lsa_body); ospf_flood_lsa(p, en, NULL); } @@ -476,14 +476,15 @@ void ospf_update_lsadb(struct ospf_proto *p) { struct top_hash_entry *en, *nxt; - bird_clock_t real_age; + btime now_ = current_time(); + int real_age; WALK_SLIST_DELSAFE(en, nxt, p->lsal) { if (en->next_lsa_body) ospf_originate_next_lsa(p, en); - real_age = en->init_age + (now - en->inst_time); + real_age = en->init_age + (now_ - en->inst_time) TO_S; if (en->lsa.age == LSA_MAXAGE) { @@ -514,14 +515,14 @@ ospf_update_lsadb(struct ospf_proto *p) } -static inline u32 -ort_to_lsaid(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf) +static u32 +ort_to_lsaid(struct ospf_proto *p, ort *nf) { /* * In OSPFv2, We have to map IP prefixes to u32 in such manner that resulting * u32 interpreted as IP address is a member of given prefix. Therefore, /32 - * prefix have to be mapped on itself. All received prefixes have to be - * mapped on different u32s. + * prefix has to be mapped on itself. All received prefixes have to be mapped + * on different u32s. * * We have an assumption that if there is nontrivial (non-/32) network prefix, * then there is not /32 prefix for the first and the last IP address of the @@ -542,17 +543,21 @@ ort_to_lsaid(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf) * network appeared, we choose a different way. * * In OSPFv3, it is simpler. There is not a requirement for membership of the - * result in the input network, so we just use a hash-based unique ID of a - * routing table entry for a route that originated given LSA. For ext-LSA, it - * is an imported route in the nest's routing table (p->table). For summary-LSA, - * it is a 'source' route in the protocol internal routing table (p->rtf). + * result in the input network, so we just allocate a unique ID from ID map + * and store it in nf->lsa_id for further reference. */ if (ospf_is_v3(p)) - return nf->fn.uid; + { + if (!nf->lsa_id) + nf->lsa_id = idm_alloc(&p->idm); - u32 id = ipa_to_u32(nf->fn.prefix); - int pxlen = nf->fn.pxlen; + return nf->lsa_id; + } + + net_addr_ip4 *net = (void *) nf->fn.addr; + u32 id = ip4_to_u32(net->prefix); + int pxlen = net->pxlen; if ((pxlen == 0) || (pxlen == 32)) return id; @@ -628,12 +633,12 @@ configured_stubnet(struct ospf_area *oa, struct ifa *a) { if (sn->summary) { - if (ipa_in_net(a->prefix, sn->px.addr, sn->px.len) && (a->pxlen >= sn->px.len)) + if (net_in_netX(&a->prefix, &sn->prefix)) return 1; } else { - if (ipa_equal(a->prefix, sn->px.addr) && (a->pxlen == sn->px.len)) + if (net_equal(&a->prefix, &sn->prefix)) return 1; } } @@ -781,7 +786,8 @@ prepare_rt2_lsa_body(struct ospf_proto *p, struct ospf_area *oa) (ifa->type == OSPF_IT_PTMP)) add_rt2_lsa_link(p, LSART_STUB, ipa_to_u32(ifa->addr->ip), 0xffffffff, 0); else - add_rt2_lsa_link(p, LSART_STUB, ipa_to_u32(ifa->addr->prefix), u32_mkmask(ifa->addr->pxlen), ifa->cost); + add_rt2_lsa_link(p, LSART_STUB, ip4_to_u32(net4_prefix(&ifa->addr->prefix)), + u32_mkmask(net4_pxlen(&ifa->addr->prefix)), ifa->cost); i++; ifa->rt_pos_end = i; @@ -790,7 +796,8 @@ prepare_rt2_lsa_body(struct ospf_proto *p, struct ospf_area *oa) struct ospf_stubnet_config *sn; WALK_LIST(sn, oa->ac->stubnet_list) if (!sn->hidden) - add_rt2_lsa_link(p, LSART_STUB, ipa_to_u32(sn->px.addr), u32_mkmask(sn->px.len), sn->cost), i++; + add_rt2_lsa_link(p, LSART_STUB, ip4_to_u32(net4_prefix(&sn->prefix)), + u32_mkmask(net4_pxlen(&sn->prefix)), sn->cost), i++; struct ospf_lsa_rt *rt = p->lsab; /* Store number of links in lower half of options */ @@ -907,7 +914,7 @@ prepare_net2_lsa_body(struct ospf_proto *p, struct ospf_iface *ifa) ASSERT(p->lsab_used == 0); net = lsab_alloc(p, sizeof(struct ospf_lsa_net) + 4 * nodes); - net->optx = u32_mkmask(ifa->addr->pxlen); + net->optx = u32_mkmask(ifa->addr->prefix.pxlen); net->routers[0] = p->router_id; WALK_LIST(n, ifa->neigh_list) @@ -999,9 +1006,10 @@ prepare_sum3_net_lsa_body(struct ospf_proto *p, ort *nf, u32 metric) { struct ospf_lsa_sum3_net *sum; - sum = lsab_allocz(p, sizeof(struct ospf_lsa_sum3_net) + IPV6_PREFIX_SPACE(nf->fn.pxlen)); + sum = lsab_allocz(p, sizeof(struct ospf_lsa_sum3_net) + + IPV6_PREFIX_SPACE(nf->fn.addr->pxlen)); sum->metric = metric; - put_ipv6_prefix(sum->prefix, nf->fn.prefix, nf->fn.pxlen, 0, 0); + ospf3_put_prefix(sum->prefix, nf->fn.addr, 0, 0); } static inline void @@ -1028,7 +1036,7 @@ ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, }; if (ospf_is_v2(p)) - prepare_sum2_lsa_body(p, nf->fn.pxlen, metric); + prepare_sum2_lsa_body(p, nf->fn.addr->pxlen, metric); else prepare_sum3_net_lsa_body(p, nf, metric); @@ -1036,20 +1044,20 @@ ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, } void -ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric, u32 options) +ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 drid, int metric, u32 options) { struct ospf_new_lsa lsa = { .type = LSA_T_SUM_RT, .mode = LSA_M_RTCALC, .dom = oa->areaid, - .id = ipa_to_rid(nf->fn.prefix), /* Router ID of ASBR, irrelevant for OSPFv3 */ + .id = drid, /* Router ID of ASBR, irrelevant for OSPFv3 */ .opts = oa->options }; if (ospf_is_v2(p)) prepare_sum2_lsa_body(p, 0, metric); else - prepare_sum3_rt_lsa_body(p, lsa.id, metric, options & LSA_OPTIONS_MASK); + prepare_sum3_rt_lsa_body(p, drid, metric, options & LSA_OPTIONS_MASK); ospf_originate_lsa(p, &lsa); } @@ -1082,7 +1090,7 @@ prepare_ext3_lsa_body(struct ospf_proto *p, ort *nf, { struct ospf_lsa_ext3 *ext; int bsize = sizeof(struct ospf_lsa_ext3) - + IPV6_PREFIX_SPACE(nf->fn.pxlen) + + IPV6_PREFIX_SPACE(nf->fn.addr->pxlen) + (ipa_nonzero(fwaddr) ? 16 : 0) + (tag ? 4 : 0); @@ -1090,7 +1098,7 @@ prepare_ext3_lsa_body(struct ospf_proto *p, ort *nf, ext->metric = metric & LSA_METRIC_MASK; u32 *buf = ext->rest; - buf = put_ipv6_prefix(buf, nf->fn.prefix, nf->fn.pxlen, pbit ? OPT_PX_P : 0, 0); + buf = ospf3_put_prefix(buf, nf->fn.addr, pbit ? OPT_PX_P : 0, 0); if (ebit) ext->metric |= LSA_EXT3_EBIT; @@ -1098,7 +1106,7 @@ prepare_ext3_lsa_body(struct ospf_proto *p, ort *nf, if (ipa_nonzero(fwaddr)) { ext->metric |= LSA_EXT3_FBIT; - buf = put_ipv6_addr(buf, fwaddr); + buf = ospf3_put_addr(buf, fwaddr); } if (tag) @@ -1140,7 +1148,7 @@ ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 m }; if (ospf_is_v2(p)) - prepare_ext2_lsa_body(p, nf->fn.pxlen, metric, ebit, fwaddr, tag); + prepare_ext2_lsa_body(p, nf->fn.addr->pxlen, metric, ebit, fwaddr, tag); else prepare_ext3_lsa_body(p, nf, metric, ebit, fwaddr, tag, oa && pbit); @@ -1177,7 +1185,7 @@ use_gw_for_fwaddr(struct ospf_proto *p, ip_addr gw, struct iface *iface) WALK_LIST(ifa, p->iface_list) if ((ifa->iface == iface) && - (!ospf_is_v2(p) || ipa_in_net(gw, ifa->addr->prefix, ifa->addr->pxlen))) + (!ospf_is_v2(p) || ipa_in_netX(gw, &ifa->addr->prefix))) return 1; return 0; @@ -1215,7 +1223,8 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa) { WALK_LIST(a, ifa->iface->addrs) { - if ((a->flags & IA_SECONDARY) || + if ((a->prefix.type != ospf_get_af(p)) || + (a->flags & IA_SECONDARY) || (a->flags & IA_PEER) || (a->scope <= SCOPE_LINK)) continue; @@ -1234,7 +1243,7 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa) } void -ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *ea) +ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *ea) { struct ospf_proto *p = (struct ospf_proto *) P; struct ospf_area *oa = NULL; /* non-NULL for NSSA-LSA */ @@ -1253,7 +1262,7 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U if (!new) { - nf = (ort *) fib_find(&p->rtf, &n->n.prefix, n->n.pxlen); + nf = fib_find(&p->rtf, n->n.addr); if (!nf || !nf->external_rte) return; @@ -1280,8 +1289,8 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U ip_addr fwd = IPA_NONE; - if ((a->dest == RTD_ROUTER) && use_gw_for_fwaddr(p, a->gw, a->iface)) - fwd = a->gw; + if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface)) + fwd = a->nh.gw; /* NSSA-LSA with P-bit set must have non-zero forwarding address */ if (oa && ipa_zero(fwd)) @@ -1290,13 +1299,13 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U if (ipa_zero(fwd)) { - log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %I/%d", - p->p.name, n->n.prefix, n->n.pxlen); + log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %N", + p->p.name, n->n.addr); return; } } - nf = (ort *) fib_get(&p->rtf, &n->n.prefix, n->n.pxlen); + nf = fib_get(&p->rtf, n->n.addr); ospf_originate_ext_lsa(p, oa, nf, LSA_M_EXPORT, metric, ebit, fwd, tag, 1); nf->external_rte = 1; } @@ -1308,38 +1317,47 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U */ static inline void -lsab_put_prefix(struct ospf_proto *p, ip_addr prefix, u32 pxlen, u32 cost) +lsab_put_prefix(struct ospf_proto *p, net_addr *n, u32 cost) { - void *buf = lsab_alloc(p, IPV6_PREFIX_SPACE(pxlen)); - u8 flags = (pxlen < MAX_PREFIX_LENGTH) ? 0 : OPT_PX_LA; - put_ipv6_prefix(buf, prefix, pxlen, flags, cost); + void *buf = lsab_alloc(p, IPV6_PREFIX_SPACE(net_pxlen(n))); + uint max = (n->type == NET_IP4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; + u8 flags = (net_pxlen(n) < max) ? 0 : OPT_PX_LA; + ospf3_put_prefix(buf, n, flags, cost); } static void prepare_link_lsa_body(struct ospf_proto *p, struct ospf_iface *ifa) { - struct ospf_lsa_link *ll; + ip_addr nh = ospf_is_ip4(p) ? IPA_NONE : ifa->addr->ip; int i = 0; + /* Preallocating space for header */ ASSERT(p->lsab_used == 0); - ll = lsab_allocz(p, sizeof(struct ospf_lsa_link)); - ll->options = ifa->oa->options | (ifa->priority << 24); - ll->lladdr = ipa_to_ip6(ifa->addr->ip); - ll = NULL; /* buffer might be reallocated later */ + lsab_allocz(p, sizeof(struct ospf_lsa_link)); struct ifa *a; WALK_LIST(a, ifa->iface->addrs) { - if ((a->flags & IA_SECONDARY) || - (a->scope < SCOPE_SITE)) + if ((a->prefix.type != ospf_get_af(p)) || + (a->flags & IA_SECONDARY) || + (a->scope <= SCOPE_LINK)) continue; - lsab_put_prefix(p, a->prefix, a->pxlen, 0); + if (ospf_is_ip4(p) && ipa_zero(nh)) + nh = a->ip; + + lsab_put_prefix(p, &a->prefix, 0); i++; } - ll = p->lsab; + /* Filling the preallocated header */ + struct ospf_lsa_link *ll = p->lsab; + ll->options = ifa->oa->options | (ifa->priority << 24); + ll->lladdr = ospf_is_ip4(p) ? ospf3_4to6(ipa_to_ip4(nh)) : ipa_to_ip6(nh); ll->pxcount = i; + + if (ipa_zero(nh)) + log(L_ERR "%s: Cannot find next hop address for %s", p->p.name, ifa->ifname); } static void @@ -1401,12 +1419,13 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) struct ifa *a; WALK_LIST(a, ifa->iface->addrs) { - if ((a->flags & IA_SECONDARY) || + if ((a->prefix.type != ospf_get_af(p)) || + (a->flags & IA_SECONDARY) || (a->flags & IA_PEER) || (a->scope <= SCOPE_LINK)) continue; - if (((a->pxlen < MAX_PREFIX_LENGTH) && net_lsa) || + if (((a->prefix.pxlen < IP6_MAX_PREFIX_LENGTH) && net_lsa) || configured_stubnet(oa, a)) continue; @@ -1414,11 +1433,12 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) (ifa->state == OSPF_IS_LOOP) || (ifa->type == OSPF_IT_PTMP)) { - lsab_put_prefix(p, a->ip, MAX_PREFIX_LENGTH, 0); + net_addr_ip6 net = NET_ADDR_IP6(a->ip, IP6_MAX_PREFIX_LENGTH); + lsab_put_prefix(p, (net_addr *) &net, 0); host_addr = 1; } else - lsab_put_prefix(p, a->prefix, a->pxlen, ifa->cost); + lsab_put_prefix(p, &a->prefix, ifa->cost); i++; } @@ -1429,15 +1449,15 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) WALK_LIST(sn, oa->ac->stubnet_list) if (!sn->hidden) { - lsab_put_prefix(p, sn->px.addr, sn->px.len, sn->cost); - if (sn->px.len == MAX_PREFIX_LENGTH) + lsab_put_prefix(p, &sn->prefix, sn->cost); + if (sn->prefix.pxlen == IP6_MAX_PREFIX_LENGTH) host_addr = 1; i++; } /* If there are some configured vlinks, find some global address (even from another area), which will be used as a vlink endpoint. */ - if (!EMPTY_LIST(cf->vlink_list) && !host_addr) + if (!EMPTY_LIST(cf->vlink_list) && !host_addr && ospf_is_ip6(p)) { WALK_LIST(ifa, p->iface_list) { @@ -1447,11 +1467,14 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) struct ifa *a; WALK_LIST(a, ifa->iface->addrs) { - if ((a->flags & IA_SECONDARY) || (a->scope <= SCOPE_LINK)) + if ((a->prefix.type != NET_IP6) || + (a->flags & IA_SECONDARY) || + (a->scope <= SCOPE_LINK)) continue; /* Found some IP */ - lsab_put_prefix(p, a->ip, MAX_PREFIX_LENGTH, 0); + net_addr_ip6 net = NET_ADDR_IP6(a->ip, IP6_MAX_PREFIX_LENGTH); + lsab_put_prefix(p, (net_addr *) &net, 0); i++; goto done; } @@ -1557,7 +1580,7 @@ add_link_lsa(struct ospf_proto *p, struct ospf_lsa_link *ll, int offset, int *px continue; /* Skip link-local prefixes */ - if ((pxlen >= 10) && ((pxb[1] & 0xffc00000) == 0xfe800000)) + if (ospf_is_ip6(p) && (pxlen >= 10) && ((pxb[1] & 0xffc00000) == 0xfe800000)) continue; add_prefix(p, pxb, offset, pxc); @@ -1614,7 +1637,7 @@ ospf_originate_prefix_net_lsa(struct ospf_proto *p, struct ospf_iface *ifa) } static inline int breaks_minlsinterval(struct top_hash_entry *en) -{ return en && (en->lsa.age < LSA_MAXAGE) && ((en->inst_time + MINLSINTERVAL) > now); } +{ return en && (en->lsa.age < LSA_MAXAGE) && (lsa_inst_age(en) < MINLSINTERVAL); } void ospf_update_topology(struct ospf_proto *p) @@ -1748,7 +1771,7 @@ ospf_top_hash(struct top_graph *f, u32 domain, u32 lsaid, u32 rtrid, u32 type) * and request lists of OSPF neighbors. */ struct top_graph * -ospf_top_new(struct ospf_proto *p UNUSED4 UNUSED6, pool *pool) +ospf_top_new(struct ospf_proto *p, pool *pool) { struct top_graph *f; diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index 5652ced0..ac87334b 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -26,9 +26,9 @@ struct top_hash_entry void *next_lsa_body; /* For postponed LSA origination */ u16 next_lsa_blen; /* For postponed LSA origination */ u16 next_lsa_opts; /* For postponed LSA origination */ - bird_clock_t inst_time; /* Time of installation into DB */ + btime inst_time; /* Time of installation into DB */ struct ort *nf; /* Reference fibnode for sum and ext LSAs, NULL for otherwise */ - struct mpnh *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ + struct nexthop *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */ u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */ u32 dist; /* Distance from the root */ @@ -185,10 +185,10 @@ static inline void ospf_flush2_lsa(struct ospf_proto *p, struct top_hash_entry * { if (*en) { ospf_flush_lsa(p, *en); *en = NULL; } } void ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric); -void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric, u32 options); +void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 drid, int metric, u32 options); void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit); -void ospf_rt_notify(struct proto *P, rtable *tbl, net *n, rte *new, rte *old, ea_list *attrs); +void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old, ea_list *attrs); void ospf_update_topology(struct ospf_proto *p); struct top_hash_entry *ospf_hash_find(struct top_graph *, u32 domain, u32 lsa, u32 rtr, u32 type); diff --git a/proto/pipe/Makefile b/proto/pipe/Makefile index 77de5b88..5093da98 100644 --- a/proto/pipe/Makefile +++ b/proto/pipe/Makefile @@ -1,6 +1,6 @@ -source=pipe.c -root-rel=../../ -dir-name=proto/pipe - -include ../../Rules +src := pipe.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index 8daf2e7c..b3c332be 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -16,28 +16,31 @@ CF_DEFINES CF_DECLS -CF_KEYWORDS(PIPE, PEER, TABLE, MODE, OPAQUE, TRANSPARENT) +CF_KEYWORDS(PIPE, PEER, TABLE) CF_GRAMMAR -CF_ADDTO(proto, pipe_proto '}') - -pipe_proto_start: proto_start PIPE { - this_proto = proto_config_new(&proto_pipe, $1); - PIPE_CFG->mode = PIPE_TRANSPARENT; +CF_ADDTO(proto, pipe_proto '}' { this_channel = NULL; } ) + +pipe_proto_start: proto_start PIPE +{ + this_proto = proto_config_new(&proto_pipe, $1); +} +proto_name +{ + this_channel = proto_cf_main_channel(this_proto); + if (!this_channel) { + this_channel = channel_config_new(NULL, NULL, 0, this_proto); + this_channel->in_filter = FILTER_ACCEPT; + this_channel->out_filter = FILTER_ACCEPT; } - ; +}; pipe_proto: - pipe_proto_start proto_name '{' + pipe_proto_start '{' | pipe_proto proto_item ';' - | pipe_proto PEER TABLE SYM ';' { - if ($4->class != SYM_TABLE) - cf_error("Routing table name expected"); - PIPE_CFG->peer = $4->def; - } - | pipe_proto MODE OPAQUE ';' { PIPE_CFG->mode = PIPE_OPAQUE; } - | pipe_proto MODE TRANSPARENT ';' { PIPE_CFG->mode = PIPE_TRANSPARENT; } + | pipe_proto channel_item ';' + | pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; } ; CF_CODE diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 6ef80322..49ff52e2 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -44,54 +44,42 @@ #include "pipe.h" static void -pipe_rt_notify(struct proto *P, rtable *src_table, net *n, rte *new, rte *old, ea_list *attrs) +pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *old, ea_list *attrs) { - struct pipe_proto *p = (struct pipe_proto *) P; - struct announce_hook *ah = (src_table == P->table) ? p->peer_ahook : P->main_ahook; - rtable *dst_table = ah->table; + struct pipe_proto *p = (void *) P; + struct channel *dst = (src_ch == p->pri) ? p->sec : p->pri; struct rte_src *src; - net *nn; rte *e; - rta a; + rta *a; if (!new && !old) return; - if (dst_table->pipe_busy) + if (dst->table->pipe_busy) { - log(L_ERR "Pipe loop detected when sending %I/%d to table %s", - n->n.prefix, n->n.pxlen, dst_table->name); + log(L_ERR "Pipe loop detected when sending %N to table %s", + n->n.addr, dst->table->name); return; } - nn = net_get(dst_table, n->n.prefix, n->n.pxlen); if (new) { - memcpy(&a, new->attrs, sizeof(rta)); - - if (p->mode == PIPE_OPAQUE) - { - a.src = P->main_source; - a.source = RTS_PIPE; - } - - a.aflags = 0; - a.eattrs = attrs; - a.hostentry = NULL; - e = rte_get_temp(&a); - e->net = nn; + a = alloca(rta_size(new->attrs)); + memcpy(a, new->attrs, rta_size(new->attrs)); + + a->aflags = 0; + a->eattrs = attrs; + a->hostentry = NULL; + e = rte_get_temp(a); e->pflags = 0; - if (p->mode == PIPE_TRANSPARENT) - { - /* Copy protocol specific embedded attributes. */ - memcpy(&(e->u), &(new->u), sizeof(e->u)); - e->pref = new->pref; - e->pflags = new->pflags; - } + /* Copy protocol specific embedded attributes. */ + memcpy(&(e->u), &(new->u), sizeof(e->u)); + e->pref = new->pref; + e->pflags = new->pflags; - src = a.src; + src = a->src; } else { @@ -99,9 +87,9 @@ pipe_rt_notify(struct proto *P, rtable *src_table, net *n, rte *new, rte *old, e src = old->attrs->src; } - src_table->pipe_busy = 1; - rte_update2(ah, nn, e, src); - src_table->pipe_busy = 0; + src_ch->table->pipe_busy = 1; + rte_update2(dst, n->n.addr, e, src); + src_ch->table->pipe_busy = 0; } static int @@ -111,171 +99,117 @@ pipe_import_control(struct proto *P, rte **ee, ea_list **ea UNUSED, struct linpo if (pp == P) return -1; /* Avoid local loops automatically */ + return 0; } -static int -pipe_reload_routes(struct proto *P) +static void +pipe_reload_routes(struct channel *C) { - struct pipe_proto *p = (struct pipe_proto *) P; - - /* - * Because the pipe protocol feeds routes from both routing tables - * together, both directions are reloaded during refeed and 'reload - * out' command works like 'reload' command. For symmetry, we also - * request refeed when 'reload in' command is used. - */ - proto_request_feeding(P); + struct pipe_proto *p = (void *) C->proto; - proto_reset_limit(P->main_ahook->in_limit); - proto_reset_limit(p->peer_ahook->in_limit); - - return 1; + /* Route reload on one channel is just refeed on the other */ + channel_request_feeding((C == p->pri) ? p->sec : p->pri); } -static struct proto * -pipe_init(struct proto_config *C) -{ - struct pipe_config *c = (struct pipe_config *) C; - struct proto *P = proto_new(C, sizeof(struct pipe_proto)); - struct pipe_proto *p = (struct pipe_proto *) P; - - p->mode = c->mode; - p->peer_table = c->peer->table; - P->accept_ra_types = (p->mode == PIPE_OPAQUE) ? RA_OPTIMAL : RA_ANY; - P->rt_notify = pipe_rt_notify; - P->import_control = pipe_import_control; - P->reload_routes = pipe_reload_routes; - - return P; -} -static int -pipe_start(struct proto *P) +static void +pipe_postconfig(struct proto_config *CF) { - struct pipe_config *cf = (struct pipe_config *) P->cf; - struct pipe_proto *p = (struct pipe_proto *) P; + struct pipe_config *cf = (void *) CF; + struct channel_config *cc = proto_cf_main_channel(CF); - /* Lock both tables, unlock is handled in pipe_cleanup() */ - rt_lock_table(P->table); - rt_lock_table(p->peer_table); + if (!cc->table) + cf_error("Primary routing table not specified"); - /* Going directly to PS_UP - prepare for feeding, - connect the protocol to both routing tables */ + if (!cf->peer) + cf_error("Secondary routing table not specified"); - P->main_ahook = proto_add_announce_hook(P, P->table, &P->stats); - P->main_ahook->out_filter = cf->c.out_filter; - P->main_ahook->in_limit = cf->c.in_limit; - proto_reset_limit(P->main_ahook->in_limit); + if (cc->table == cf->peer) + cf_error("Primary table and peer table must be different"); - p->peer_ahook = proto_add_announce_hook(P, p->peer_table, &p->peer_stats); - p->peer_ahook->out_filter = cf->c.in_filter; - p->peer_ahook->in_limit = cf->c.out_limit; - proto_reset_limit(p->peer_ahook->in_limit); + if (cc->table->addr_type != cf->peer->addr_type) + cf_error("Primary table and peer table must have the same type"); - if (p->mode == PIPE_OPAQUE) - { - P->main_source = rt_get_source(P, 0); - rt_lock_source(P->main_source); - } + if (cc->rx_limit.action) + cf_error("Pipe protocol does not support receive limits"); - return PS_UP; + if (cc->in_keep_filtered) + cf_error("Pipe protocol prohibits keeping filtered routes"); } -static void -pipe_cleanup(struct proto *P) +static int +pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf) { - struct pipe_proto *p = (struct pipe_proto *) P; - - bzero(&P->stats, sizeof(struct proto_stats)); - bzero(&p->peer_stats, sizeof(struct proto_stats)); - - P->main_ahook = NULL; - p->peer_ahook = NULL; - - if (p->mode == PIPE_OPAQUE) - rt_unlock_source(P->main_source); - P->main_source = NULL; - - rt_unlock_table(P->table); - rt_unlock_table(p->peer_table); + struct channel_config *cc = proto_cf_main_channel(&cf->c); + + struct channel_config pri_cf = { + .name = "pri", + .channel = cc->channel, + .table = cc->table, + .out_filter = cc->out_filter, + .in_limit = cc->in_limit, + .ra_mode = RA_ANY + }; + + struct channel_config sec_cf = { + .name = "sec", + .channel = cc->channel, + .table = cf->peer, + .out_filter = cc->in_filter, + .in_limit = cc->out_limit, + .ra_mode = RA_ANY + }; + + return + proto_configure_channel(&p->p, &p->pri, &pri_cf) && + proto_configure_channel(&p->p, &p->sec, &sec_cf); } -static void -pipe_postconfig(struct proto_config *C) +static struct proto * +pipe_init(struct proto_config *CF) { - struct pipe_config *c = (struct pipe_config *) C; + struct proto *P = proto_new(CF); + struct pipe_proto *p = (void *) P; + struct pipe_config *cf = (void *) CF; - if (!c->peer) - cf_error("Name of peer routing table not specified"); - if (c->peer == C->table) - cf_error("Primary table and peer table must be different"); + P->rt_notify = pipe_rt_notify; + P->import_control = pipe_import_control; + P->reload_routes = pipe_reload_routes; - if (C->in_keep_filtered) - cf_error("Pipe protocol prohibits keeping filtered routes"); - if (C->rx_limit) - cf_error("Pipe protocol does not support receive limits"); -} + pipe_configure_channels(p, cf); -extern int proto_reconfig_type; + return P; +} static int -pipe_reconfigure(struct proto *P, struct proto_config *new) +pipe_reconfigure(struct proto *P, struct proto_config *CF) { - struct pipe_proto *p = (struct pipe_proto *)P; - struct proto_config *old = P->cf; - struct pipe_config *oc = (struct pipe_config *) old; - struct pipe_config *nc = (struct pipe_config *) new; - - if ((oc->peer->table != nc->peer->table) || (oc->mode != nc->mode)) - return 0; - - /* Update output filters in ahooks */ - if (P->main_ahook) - { - P->main_ahook->out_filter = new->out_filter; - P->main_ahook->in_limit = new->in_limit; - proto_verify_limits(P->main_ahook); - } + struct pipe_proto *p = (void *) P; + struct pipe_config *cf = (void *) CF; - if (p->peer_ahook) - { - p->peer_ahook->out_filter = new->in_filter; - p->peer_ahook->in_limit = new->out_limit; - proto_verify_limits(p->peer_ahook); - } - - if ((P->proto_state != PS_UP) || (proto_reconfig_type == RECONFIG_SOFT)) - return 1; - - if ((new->preference != old->preference) - || ! filter_same(new->in_filter, old->in_filter) - || ! filter_same(new->out_filter, old->out_filter)) - proto_request_feeding(P); - - return 1; + return pipe_configure_channels(p, cf); } static void -pipe_copy_config(struct proto_config *dest, struct proto_config *src) +pipe_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED) { /* Just a shallow copy, not many items here */ - proto_copy_rest(dest, src, sizeof(struct pipe_config)); } static void pipe_get_status(struct proto *P, byte *buf) { - struct pipe_proto *p = (struct pipe_proto *) P; + struct pipe_proto *p = (void *) P; - bsprintf(buf, "%c> %s", (p->mode == PIPE_OPAQUE) ? '-' : '=', p->peer_table->name); + bsprintf(buf, "%s <=> %s", p->pri->table->name, p->sec->table->name); } static void pipe_show_stats(struct pipe_proto *p) { - struct proto_stats *s1 = &p->p.stats; - struct proto_stats *s2 = &p->peer_stats; + struct proto_stats *s1 = &p->pri->stats; + struct proto_stats *s2 = &p->sec->stats; /* * Pipe stats (as anything related to pipes) are a bit tricky. There @@ -315,20 +249,23 @@ pipe_show_stats(struct pipe_proto *p) s2->imp_withdraws_ignored, s2->imp_withdraws_accepted); } +static const char *pipe_feed_state[] = { [ES_DOWN] = "down", [ES_FEEDING] = "feed", [ES_READY] = "up" }; + static void pipe_show_proto_info(struct proto *P) { - struct pipe_proto *p = (struct pipe_proto *) P; - struct pipe_config *cf = (struct pipe_config *) P->cf; + struct pipe_proto *p = (void *) P; - // cli_msg(-1006, " Table: %s", P->table->name); - // cli_msg(-1006, " Peer table: %s", p->peer_table->name); - cli_msg(-1006, " Preference: %d", P->preference); - cli_msg(-1006, " Input filter: %s", filter_name(cf->c.in_filter)); - cli_msg(-1006, " Output filter: %s", filter_name(cf->c.out_filter)); + cli_msg(-1006, " Channel %s", "main"); + cli_msg(-1006, " Table: %s", p->pri->table->name); + cli_msg(-1006, " Peer table: %s", p->sec->table->name); + cli_msg(-1006, " Import state: %s", pipe_feed_state[p->sec->export_state]); + cli_msg(-1006, " Export state: %s", pipe_feed_state[p->pri->export_state]); + cli_msg(-1006, " Import filter: %s", filter_name(p->sec->out_filter)); + cli_msg(-1006, " Export filter: %s", filter_name(p->pri->out_filter)); - proto_show_limit(cf->c.in_limit, "Import limit:"); - proto_show_limit(cf->c.out_limit, "Export limit:"); + channel_show_limit(&p->pri->in_limit, "Import limit:"); + channel_show_limit(&p->sec->in_limit, "Export limit:"); if (P->proto_state != PS_DOWN) pipe_show_stats(p); @@ -338,13 +275,10 @@ pipe_show_proto_info(struct proto *P) struct protocol proto_pipe = { .name = "Pipe", .template = "pipe%d", - .multitable = 1, - .preference = DEF_PREF_PIPE, + .proto_size = sizeof(struct pipe_proto), .config_size = sizeof(struct pipe_config), .postconfig = pipe_postconfig, .init = pipe_init, - .start = pipe_start, - .cleanup = pipe_cleanup, .reconfigure = pipe_reconfigure, .copy_config = pipe_copy_config, .get_status = pipe_get_status, diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h index 50b31698..038c6666 100644 --- a/proto/pipe/pipe.h +++ b/proto/pipe/pipe.h @@ -9,27 +9,15 @@ #ifndef _BIRD_PIPE_H_ #define _BIRD_PIPE_H_ -#define PIPE_OPAQUE 0 -#define PIPE_TRANSPARENT 1 - struct pipe_config { struct proto_config c; struct rtable_config *peer; /* Table we're connected to */ - int mode; /* PIPE_OPAQUE or PIPE_TRANSPARENT */ }; struct pipe_proto { struct proto p; - struct rtable *peer_table; - struct announce_hook *peer_ahook; /* Announce hook for direction peer->primary */ - struct proto_stats peer_stats; /* Statistics for the direction peer->primary */ - int mode; /* PIPE_OPAQUE or PIPE_TRANSPARENT */ + struct channel *pri; + struct channel *sec; }; - -extern struct protocol proto_pipe; - -static inline int proto_is_pipe(struct proto *p) -{ return p->proto == &proto_pipe; } - #endif diff --git a/proto/radv/Makefile b/proto/radv/Makefile index efc4d4af..05317eff 100644 --- a/proto/radv/Makefile +++ b/proto/radv/Makefile @@ -1,5 +1,6 @@ -source=radv.c packets.c -root-rel=../../ -dir-name=proto/radv +src := packets.c radv.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) -include ../../Rules +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/radv/config.Y b/proto/radv/config.Y index 84a2de0e..37815f0d 100644 --- a/proto/radv/config.Y +++ b/proto/radv/config.Y @@ -44,6 +44,7 @@ CF_ADDTO(proto, radv_proto) radv_proto_start: proto_start RADV { this_proto = proto_config_new(&proto_radv, $1); + init_list(&RADV_CFG->patt_list); init_list(&RADV_CFG->pref_list); init_list(&RADV_CFG->rdnss_list); @@ -52,15 +53,12 @@ radv_proto_start: proto_start RADV radv_proto_item: proto_item + | proto_channel | INTERFACE radv_iface | PREFIX radv_prefix { add_tail(&RADV_CFG->pref_list, NODE this_radv_prefix); } | RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_CFG->rdnss_list, &radv_dns_list); } | DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_CFG->dnssl_list, &radv_dns_list); } - | TRIGGER prefix { - RADV_CFG->trigger_prefix = $2.addr; - RADV_CFG->trigger_pxlen = $2.len; - RADV_CFG->trigger_valid = 1; - } + | TRIGGER net_ip6 { RADV_CFG->trigger = $2; } | PROPAGATE ROUTES bool { RADV_CFG->propagate_routes = $3; } ; @@ -82,16 +80,16 @@ radv_iface_start: init_list(&RADV_IFACE->rdnss_list); init_list(&RADV_IFACE->dnssl_list); - RADV_IFACE->min_ra_int = -1; /* undefined */ + RADV_IFACE->min_ra_int = (u32) -1; /* undefined */ RADV_IFACE->max_ra_int = DEFAULT_MAX_RA_INT; RADV_IFACE->min_delay = DEFAULT_MIN_DELAY; - RADV_IFACE->prefix_linger_time = -1; - RADV_IFACE->route_linger_time = -1; + RADV_IFACE->prefix_linger_time = (u32) -1; + RADV_IFACE->route_linger_time = (u32) -1; RADV_IFACE->current_hop_limit = DEFAULT_CURRENT_HOP_LIMIT; - RADV_IFACE->default_lifetime = -1; + RADV_IFACE->default_lifetime = (u32) -1; RADV_IFACE->default_lifetime_sensitive = 1; RADV_IFACE->default_preference = RA_PREF_MEDIUM; - RADV_IFACE->route_lifetime = -1; + RADV_IFACE->route_lifetime = (u32) -1; RADV_IFACE->route_lifetime_sensitive = 0; RADV_IFACE->route_preference = RA_PREF_MEDIUM; }; @@ -102,18 +100,18 @@ radv_iface_item: | MIN DELAY expr { RADV_IFACE->min_delay = $3; if ($3 <= 0) cf_error("Min delay must be positive"); } | MANAGED bool { RADV_IFACE->managed = $2; } | OTHER CONFIG bool { RADV_IFACE->other_config = $3; } - | LINK MTU expr { RADV_IFACE->link_mtu = $3; if ($3 < 0) cf_error("Link MTU must be 0 or positive"); } - | REACHABLE TIME expr { RADV_IFACE->reachable_time = $3; if (($3 < 0) || ($3 > 3600000)) cf_error("Reachable time must be in range 0-3600000"); } - | RETRANS TIMER expr { RADV_IFACE->retrans_timer = $3; if ($3 < 0) cf_error("Retrans timer must be 0 or positive"); } - | CURRENT HOP LIMIT expr { RADV_IFACE->current_hop_limit = $4; if (($4 < 0) || ($4 > 255)) cf_error("Current hop limit must be in range 0-255"); } + | LINK MTU expr { RADV_IFACE->link_mtu = $3; } + | REACHABLE TIME expr { RADV_IFACE->reachable_time = $3; if ($3 > 3600000) cf_error("Reachable time must be in range 0-3600000"); } + | RETRANS TIMER expr { RADV_IFACE->retrans_timer = $3; } + | CURRENT HOP LIMIT expr { RADV_IFACE->current_hop_limit = $4; if ($4 > 255) cf_error("Current hop limit must be in range 0-255"); } | DEFAULT LIFETIME expr radv_sensitive { RADV_IFACE->default_lifetime = $3; - if (($3 < 0) || ($3 > 9000)) cf_error("Default lifetime must be in range 0-9000"); - if ($4 != -1) RADV_IFACE->default_lifetime_sensitive = $4; + if ($3 > 9000) cf_error("Default lifetime must be in range 0-9000"); + if ($4 != (uint) -1) RADV_IFACE->default_lifetime_sensitive = $4; } | ROUTE LIFETIME expr radv_sensitive { RADV_IFACE->route_lifetime = $3; - if ($4 != -1) RADV_IFACE->route_lifetime_sensitive = $4; + if ($4 != (uint) -1) RADV_IFACE->route_lifetime_sensitive = $4; } | DEFAULT PREFERENCE radv_preference { RADV_IFACE->default_preference = $3; } | ROUTE PREFERENCE radv_preference { RADV_IFACE->route_preference = $3; } @@ -152,7 +150,7 @@ radv_iface_finish: if ((ic->min_ra_int > 3) && (ic->min_ra_int > (ic->max_ra_int * 3 / 4))) - cf_error("Min RA interval must be at most 3/4 * Max RA interval %d %d", ic->min_ra_int, ic->max_ra_int); + cf_error("Min RA interval must be at most 3/4 * Max RA interval"); if ((ic->default_lifetime > 0) && (ic->default_lifetime < ic->max_ra_int)) cf_error("Default lifetime must be either 0 or at least Max RA interval"); @@ -184,11 +182,10 @@ radv_iface: radv_iface_start iface_patt_list_nopx radv_iface_opt_list radv_iface_finish; -radv_prefix_start: prefix +radv_prefix_start: net_ip6 { this_radv_prefix = cfg_allocz(sizeof(struct radv_prefix_config)); - RADV_PREFIX->prefix = $1.addr; - RADV_PREFIX->pxlen = $1.len; + RADV_PREFIX->prefix = *(net_addr_ip6 *) &($1); RADV_PREFIX->onlink = 1; RADV_PREFIX->autonomous = 1; @@ -202,11 +199,11 @@ radv_prefix_item: | AUTONOMOUS bool { RADV_PREFIX->autonomous = $2; } | VALID LIFETIME expr radv_sensitive { RADV_PREFIX->valid_lifetime = $3; - if ($4 != -1) RADV_PREFIX->valid_lifetime_sensitive = $4; + if ($4 != (uint) -1) RADV_PREFIX->valid_lifetime_sensitive = $4; } | PREFERRED LIFETIME expr radv_sensitive { RADV_PREFIX->preferred_lifetime = $3; - if ($4 != -1) RADV_PREFIX->preferred_lifetime_sensitive = $4; + if ($4 != (uint) -1) RADV_PREFIX->preferred_lifetime_sensitive = $4; } ; @@ -331,7 +328,7 @@ radv_mult: ; radv_sensitive: - /* empty */ { $$ = -1; } + /* empty */ { $$ = (uint) -1; } | SENSITIVE bool { $$ = $2; } ; diff --git a/proto/radv/packets.c b/proto/radv/packets.c index 7d54a827..b12d3a12 100644 --- a/proto/radv/packets.c +++ b/proto/radv/packets.c @@ -39,7 +39,7 @@ struct radv_opt_prefix u32 valid_lifetime; u32 preferred_lifetime; u32 reserved; - ip_addr prefix; + ip6_addr prefix; }; #define OPT_PX_ONLINK 0x80 @@ -68,7 +68,7 @@ struct radv_opt_rdnss u8 length; u16 reserved; u32 lifetime; - ip_addr servers[]; + ip6_addr servers[]; }; struct radv_opt_dnssl @@ -85,7 +85,7 @@ radv_prepare_route(struct radv_iface *ifa, struct radv_route *rt, char **buf, char *bufend) { struct radv_proto *p = ifa->ra; - u8 px_blocks = (rt->n.pxlen + 63) / 64; + u8 px_blocks = (net6_pxlen(rt->n.addr) + 63) / 64; u8 opt_len = 8 * (1 + px_blocks); if (*buf + opt_len > bufend) @@ -103,17 +103,17 @@ radv_prepare_route(struct radv_iface *ifa, struct radv_route *rt, *buf += opt_len; opt->type = OPT_ROUTE; opt->length = 1 + px_blocks; - opt->pxlen = rt->n.pxlen; + opt->pxlen = net6_pxlen(rt->n.addr); opt->flags = preference; opt->lifetime = valid ? htonl(lifetime) : 0; /* Copy the relevant part of the prefix */ - ip6_addr px_addr = ip6_hton(rt->n.prefix); + ip6_addr px_addr = ip6_hton(net6_prefix(rt->n.addr)); memcpy(opt->prefix, &px_addr, 8 * px_blocks); /* Keeping track of first linger timeout */ if (!rt->valid) - ifa->valid_time = MIN(ifa->valid_time, rt->changed + ifa->cf->route_linger_time); + ifa->valid_time = MIN(ifa->valid_time, rt->changed + ifa->cf->route_linger_time S); return 0; } @@ -127,7 +127,7 @@ radv_prepare_rdnss(struct radv_iface *ifa, list *rdnss_list, char **buf, char *b { struct radv_rdnss_config *rcf_base = rcf; struct radv_opt_rdnss *op = (void *) *buf; - int max_i = (bufend - *buf - sizeof(struct radv_opt_rdnss)) / sizeof(ip_addr); + int max_i = (bufend - *buf - sizeof(struct radv_opt_rdnss)) / sizeof(ip6_addr); int i = 0; if (max_i < 1) @@ -148,8 +148,7 @@ radv_prepare_rdnss(struct radv_iface *ifa, list *rdnss_list, char **buf, char *b if (i >= max_i) goto too_much; - op->servers[i] = rcf->server; - ipa_hton(op->servers[i]); + op->servers[i] = ip6_hton(rcf->server); i++; rcf = NODE_NEXT(rcf); @@ -254,10 +253,10 @@ radv_prepare_dnssl(struct radv_iface *ifa, list *dnssl_list, char **buf, char *b } static int -radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *prefix, +radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *px, char **buf, char *bufend) { - struct radv_prefix_config *pc = prefix->cf; + struct radv_prefix_config *pc = px->cf; if (*buf + sizeof(struct radv_opt_prefix) > bufend) { @@ -269,7 +268,7 @@ radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *prefix, struct radv_opt_prefix *op = (void *) *buf; op->type = OPT_PREFIX; op->length = 4; - op->pxlen = prefix->len; + op->pxlen = px->prefix.pxlen; op->flags = (pc->onlink ? OPT_PX_ONLINK : 0) | (pc->autonomous ? OPT_PX_AUTONOMOUS : 0); op->valid_lifetime = (ifa->ra->active || !pc->valid_lifetime_sensitive) ? @@ -277,13 +276,12 @@ radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *prefix, op->preferred_lifetime = (ifa->ra->active || !pc->preferred_lifetime_sensitive) ? htonl(pc->preferred_lifetime) : 0; op->reserved = 0; - op->prefix = prefix->prefix; - ipa_hton(op->prefix); + op->prefix = ip6_hton(px->prefix.prefix); *buf += sizeof(*op); /* Keeping track of first linger timeout */ - if (!prefix->valid) - ifa->valid_time = MIN(ifa->valid_time, prefix->changed + ifa->cf->prefix_linger_time); + if (!px->valid) + ifa->valid_time = MIN(ifa->valid_time, px->changed + ifa->cf->prefix_linger_time S); return 0; } @@ -294,6 +292,7 @@ radv_prepare_ra(struct radv_iface *ifa) struct radv_proto *p = ifa->ra; struct radv_config *cf = (struct radv_config *) (p->p.cf); struct radv_iface_config *ic = ifa->cf; + btime now = current_time(); char *buf = ifa->sk->tbuf; char *bufstart = buf; @@ -330,7 +329,7 @@ radv_prepare_ra(struct radv_iface *ifa) WALK_LIST(px, ifa->prefixes) { /* Skip invalid prefixes that are past linger timeout but still not pruned */ - if (!px->valid && (px->changed + ic->prefix_linger_time <= now)) + if (!px->valid && ((px->changed + ic->prefix_linger_time S) <= now)) continue; if (radv_prepare_prefix(ifa, px, &buf, bufend) < 0) @@ -353,12 +352,10 @@ radv_prepare_ra(struct radv_iface *ifa) if (p->fib_up) { - FIB_WALK(&p->routes, n) + FIB_WALK(&p->routes, struct radv_route, rt) { - struct radv_route *rt = (void *) n; - /* Skip invalid routes that are past linger timeout but still not pruned */ - if (!rt->valid && (rt->changed + ic->route_linger_time <= now)) + if (!rt->valid && ((rt->changed + ic->route_linger_time S) <= now)) continue; if (radv_prepare_route(ifa, rt, &buf, bufend) < 0) @@ -396,7 +393,7 @@ radv_rx_hook(sock *sk, uint size) if (sk->lifindex != sk->iface->index) return 1; - if (ipa_equal(sk->faddr, ifa->addr->ip)) + if (ipa_equal(sk->faddr, sk->saddr)) return 1; if (size < 8) @@ -448,6 +445,7 @@ radv_sk_open(struct radv_iface *ifa) { sock *sk = sk_new(ifa->pool); sk->type = SK_IP; + sk->subtype = SK_IPV6; sk->dport = ICMPV6_PROTO; sk->saddr = ifa->addr->ip; sk->vrf = ifa->ra->p.vrf; diff --git a/proto/radv/radv.c b/proto/radv/radv.c index 7e8950c5..8a79dfaf 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -52,6 +52,7 @@ radv_timer(timer *tm) { struct radv_iface *ifa = tm->data; struct radv_proto *p = ifa->ra; + btime now = current_time(); RADV_TRACE(D_EVENTS, "Timer fired on %s", ifa->iface->name); @@ -68,16 +69,17 @@ radv_timer(timer *tm) /* Update timer */ ifa->last = now; - unsigned after = ifa->cf->min_ra_int; - after += random() % (ifa->cf->max_ra_int - ifa->cf->min_ra_int + 1); + btime t = ifa->cf->min_ra_int S; + btime r = (ifa->cf->max_ra_int - ifa->cf->min_ra_int) S; + t += random() % (r + 1); if (ifa->initial) + { + t = MIN(t, MAX_INITIAL_RTR_ADVERT_INTERVAL); ifa->initial--; + } - if (ifa->initial) - after = MIN(after, MAX_INITIAL_RTR_ADVERT_INTERVAL); - - tm_start(ifa->timer, after); + tm_start(ifa->timer, t); } static struct radv_prefix_config default_prefix = { @@ -92,21 +94,18 @@ static struct radv_prefix_config dead_prefix = { /* Find a corresponding config for the given prefix */ static struct radv_prefix_config * -radv_prefix_match(struct radv_iface *ifa, struct ifa *a) +radv_prefix_match(struct radv_iface *ifa, net_addr_ip6 *px) { struct radv_proto *p = ifa->ra; struct radv_config *cf = (struct radv_config *) (p->p.cf); struct radv_prefix_config *pc; - if (a->scope <= SCOPE_LINK) - return NULL; - WALK_LIST(pc, ifa->cf->pref_list) - if ((a->pxlen >= pc->pxlen) && ipa_in_net(a->prefix, pc->prefix, pc->pxlen)) + if (net_in_net_ip6(px, &pc->prefix)) return pc; WALK_LIST(pc, cf->pref_list) - if ((a->pxlen >= pc->pxlen) && ipa_in_net(a->prefix, pc->prefix, pc->pxlen)) + if (net_in_net_ip6(px, &pc->prefix)) return pc; return &default_prefix; @@ -121,6 +120,7 @@ radv_prepare_prefixes(struct radv_iface *ifa) { struct radv_proto *p = ifa->ra; struct radv_prefix *pfx, *next; + btime now = current_time(); /* First mark all the prefixes as unused */ WALK_LIST(pfx, ifa->prefixes) @@ -130,7 +130,12 @@ radv_prepare_prefixes(struct radv_iface *ifa) struct ifa *addr; WALK_LIST(addr, ifa->iface->addrs) { - struct radv_prefix_config *pc = radv_prefix_match(ifa, addr); + if ((addr->prefix.type != NET_IP6) || + (addr->scope <= SCOPE_LINK)) + continue; + + net_addr_ip6 *prefix = (void *) &addr->prefix; + struct radv_prefix_config *pc = radv_prefix_match(ifa, prefix); if (!pc || pc->skip) continue; @@ -138,7 +143,7 @@ radv_prepare_prefixes(struct radv_iface *ifa) /* Do we have it already? */ struct radv_prefix *existing = NULL; WALK_LIST(pfx, ifa->prefixes) - if ((pfx->len == addr->pxlen) && ipa_equal(pfx->prefix, addr->prefix)) + if (net_equal_ip6(&pfx->prefix, prefix)) { existing = pfx; break; @@ -146,12 +151,11 @@ radv_prepare_prefixes(struct radv_iface *ifa) if (!existing) { - RADV_TRACE(D_EVENTS, "Adding new prefix %I/%d on %s", - addr->prefix, addr->pxlen, ifa->iface->name); + RADV_TRACE(D_EVENTS, "Adding new prefix %N on %s", + prefix, ifa->iface->name); existing = mb_allocz(ifa->pool, sizeof *existing); - existing->prefix = addr->prefix; - existing->len = addr->pxlen; + net_copy_ip6(&existing->prefix, prefix); add_tail(&ifa->prefixes, NODE existing); } @@ -169,8 +173,8 @@ radv_prepare_prefixes(struct radv_iface *ifa) { if (pfx->valid && !pfx->mark) { - RADV_TRACE(D_EVENTS, "Invalidating prefix %I/%d on %s", - pfx->prefix, pfx->len, ifa->iface->name); + RADV_TRACE(D_EVENTS, "Invalidating prefix %N on %s", + pfx->prefix, ifa->iface->name); pfx->valid = 0; pfx->changed = now; @@ -183,20 +187,21 @@ static void radv_prune_prefixes(struct radv_iface *ifa) { struct radv_proto *p = ifa->ra; - bird_clock_t next = TIME_INFINITY; - bird_clock_t expires = 0; + btime now = current_time(); + btime next = TIME_INFINITY; + btime expires = 0; struct radv_prefix *px, *pxn; WALK_LIST_DELSAFE(px, pxn, ifa->prefixes) { if (!px->valid) { - expires = px->changed + ifa->cf->prefix_linger_time; + expires = px->changed + ifa->cf->prefix_linger_time S; if (expires <= now) { - RADV_TRACE(D_EVENTS, "Removing prefix %I/%d on %s", - px->prefix, px->len, ifa->iface->name); + RADV_TRACE(D_EVENTS, "Removing prefix %N on %s", + px->prefix, ifa->iface->name); rem_node(NODE px); mb_free(px); @@ -236,13 +241,8 @@ radv_iface_notify(struct radv_iface *ifa, int event) } /* Update timer */ - unsigned delta = now - ifa->last; - unsigned after = 0; - - if (delta < ifa->cf->min_delay) - after = ifa->cf->min_delay - delta; - - tm_start(ifa->timer, after); + btime t = ifa->last + ifa->cf->min_delay S - current_time(); + tm_start(ifa->timer, t); } static void @@ -281,17 +281,6 @@ radv_iface_add(struct object_lock *lock) radv_iface_notify(ifa, RA_EV_INIT); } -static inline struct ifa * -find_lladdr(struct iface *iface) -{ - struct ifa *a; - WALK_LIST(a, iface->addrs) - if (a->scope == SCOPE_LINK) - return a; - - return NULL; -} - static void radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_config *cf) { @@ -305,24 +294,13 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf ifa->ra = p; ifa->cf = cf; ifa->iface = iface; + ifa->addr = iface->llv6; init_list(&ifa->prefixes); ifa->prune_time = TIME_INFINITY; add_tail(&p->iface_list, NODE ifa); - ifa->addr = find_lladdr(iface); - if (!ifa->addr) - { - log(L_ERR "%s: Missing link-local address on interface %s", p->p.name, iface->name); - return; - } - - timer *tm = tm_new(pool); - tm->hook = radv_timer; - tm->data = ifa; - tm->randomize = 0; - tm->recurrent = 0; - ifa->timer = tm; + ifa->timer = tm_new_init(pool, radv_timer, ifa, 0, 0); struct object_lock *lock = olock_new(pool); lock->type = OBJLOCK_IP; @@ -357,8 +335,15 @@ radv_if_notify(struct proto *P, unsigned flags, struct iface *iface) if (flags & IF_CHANGE_UP) { - struct radv_iface_config *ic = (struct radv_iface_config *) - iface_patt_find(&cf->patt_list, iface, NULL); + struct radv_iface_config *ic = (void *) iface_patt_find(&cf->patt_list, iface, NULL); + + /* Ignore non-multicast ifaces */ + if (!(iface->flags & IF_MULTICAST)) + return; + + /* Ignore ifaces without link-local address */ + if (!iface->llv6) + return; if (ic) radv_iface_new(p, iface, ic); @@ -397,11 +382,16 @@ radv_ifa_notify(struct proto *P, unsigned flags UNUSED, struct ifa *a) radv_iface_notify(ifa, RA_EV_CHANGE); } -static inline int radv_net_match_trigger(struct radv_config *cf, net *n) +static inline int +radv_trigger_valid(struct radv_config *cf) +{ + return cf->trigger.type != 0; +} + +static inline int +radv_net_match_trigger(struct radv_config *cf, net *n) { - return cf->trigger_valid && - (n->n.pxlen == cf->trigger_pxlen) && - ipa_equal(n->n.prefix, cf->trigger_prefix); + return radv_trigger_valid(cf) && net_equal(n->n.addr, &cf->trigger); } int @@ -420,7 +410,7 @@ radv_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct l } static void -radv_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs) +radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs) { struct radv_proto *p = (struct radv_proto *) P; struct radv_config *cf = (struct radv_config *) (P->cf); @@ -470,15 +460,15 @@ radv_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U (preference != RA_PREF_MEDIUM) && (preference != RA_PREF_HIGH)) { - log(L_WARN "%s: Invalid ra_preference value %u on route %I/%d", - p->p.name, preference, n->n.prefix, n->n.pxlen); + log(L_WARN "%s: Invalid ra_preference value %u on route %N", + p->p.name, preference, n->n.addr); preference = RA_PREF_MEDIUM; preference_set = 1; lifetime = 0; lifetime_set = 1; } - rt = fib_get(&p->routes, &n->n.prefix, n->n.pxlen); + rt = fib_get(&p->routes, n->n.addr); /* Ignore update if nothing changed */ if (rt->valid && @@ -492,7 +482,7 @@ radv_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U log(L_WARN "%s: More than 17 routes exported to RAdv", p->p.name); rt->valid = 1; - rt->changed = now; + rt->changed = current_time(); rt->preference = preference; rt->preference_set = preference_set; rt->lifetime = lifetime; @@ -501,17 +491,17 @@ radv_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U else { /* Withdraw */ - rt = fib_find(&p->routes, &n->n.prefix, n->n.pxlen); + rt = fib_find(&p->routes, n->n.addr); if (!rt || !rt->valid) return; /* Invalidate the route */ rt->valid = 0; - rt->changed = now; + rt->changed = current_time(); /* Invalidated route will be pruned eventually */ - bird_clock_t expires = rt->changed + cf->max_linger_time; + btime expires = rt->changed + cf->max_linger_time S; p->prune_time = MIN(p->prune_time, expires); } @@ -526,8 +516,9 @@ static void radv_prune_routes(struct radv_proto *p) { struct radv_config *cf = (struct radv_config *) (p->p.cf); - bird_clock_t next = TIME_INFINITY; - bird_clock_t expires = 0; + btime now = current_time(); + btime next = TIME_INFINITY; + btime expires = 0; /* Should not happen */ if (!p->fib_up) @@ -537,26 +528,24 @@ radv_prune_routes(struct radv_proto *p) FIB_ITERATE_INIT(&fit, &p->routes); again: - FIB_ITERATE_START(&p->routes, &fit, node) + FIB_ITERATE_START(&p->routes, &fit, struct radv_route, rt) { - struct radv_route *rt = (void *) node; - if (!rt->valid) { - expires = rt->changed + cf->max_linger_time; + expires = rt->changed + cf->max_linger_time S; /* Delete expired nodes */ if (expires <= now) { - FIB_ITERATE_PUT(&fit, node); - fib_delete(&p->routes, node); + FIB_ITERATE_PUT(&fit); + fib_delete(&p->routes, rt); goto again; } else next = MIN(next, expires); } } - FIB_ITERATE_END(node); + FIB_ITERATE_END; p->prune_time = next; } @@ -566,19 +555,30 @@ radv_check_active(struct radv_proto *p) { struct radv_config *cf = (struct radv_config *) (p->p.cf); - if (! cf->trigger_valid) + if (!radv_trigger_valid(cf)) return 1; - return rt_examine(p->p.table, cf->trigger_prefix, cf->trigger_pxlen, - &(p->p), p->p.cf->out_filter); + struct channel *c = p->p.main_channel; + return rt_examine(c->table, &cf->trigger, &p->p, c->out_filter); +} + +static void +radv_postconfig(struct proto_config *CF) +{ + // struct radv_config *cf = (void *) CF; + + /* Define default channel */ + if (EMPTY_LIST(CF->channels)) + channel_config_new(NULL, net_label[NET_IP6], NET_IP6, CF); } static struct proto * -radv_init(struct proto_config *c) +radv_init(struct proto_config *CF) { - struct proto *P = proto_new(c, sizeof(struct radv_proto)); + struct proto *P = proto_new(CF); + + P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - P->accept_ra_types = RA_OPTIMAL; P->import_control = radv_import_control; P->rt_notify = radv_rt_notify; P->if_notify = radv_if_notify; @@ -594,7 +594,8 @@ radv_set_fib(struct radv_proto *p, int up) return; if (up) - fib_init(&p->routes, p->p.pool, sizeof(struct radv_route), 4, NULL); + fib_init(&p->routes, p->p.pool, NET_IP6, sizeof(struct radv_route), + OFFSETOF(struct radv_route, n), 4, NULL); else fib_free(&p->routes); @@ -610,7 +611,7 @@ radv_start(struct proto *P) init_list(&(p->iface_list)); p->valid = 1; - p->active = !cf->trigger_valid; + p->active = !radv_trigger_valid(cf); p->fib_up = 0; radv_set_fib(p, cf->propagate_routes); @@ -644,13 +645,16 @@ radv_shutdown(struct proto *P) } static int -radv_reconfigure(struct proto *P, struct proto_config *c) +radv_reconfigure(struct proto *P, struct proto_config *CF) { struct radv_proto *p = (struct radv_proto *) P; struct radv_config *old = (struct radv_config *) (P->cf); - struct radv_config *new = (struct radv_config *) c; + struct radv_config *new = (struct radv_config *) CF; - P->cf = c; /* radv_check_active() requires proper P->cf */ + if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF))) + return 0; + + P->cf = CF; /* radv_check_active() requires proper P->cf */ p->active = radv_check_active(p); /* Allocate or free FIB */ @@ -658,11 +662,22 @@ radv_reconfigure(struct proto *P, struct proto_config *c) /* We started to accept routes so we need to refeed them */ if (!old->propagate_routes && new->propagate_routes) - proto_request_feeding(&p->p); + channel_request_feeding(p->p.main_channel); struct iface *iface; WALK_LIST(iface, iface_list) { + if (!(iface->flags & IF_UP)) + continue; + + /* Ignore non-multicast ifaces */ + if (!(iface->flags & IF_MULTICAST)) + continue; + + /* Ignore ifaces without link-local address */ + if (!iface->llv6) + continue; + struct radv_iface *ifa = radv_iface_find(p, iface); struct radv_iface_config *ic = (struct radv_iface_config *) iface_patt_find(&new->patt_list, iface, NULL); @@ -748,7 +763,10 @@ struct protocol proto_radv = { .name = "RAdv", .template = "radv%d", .attr_class = EAP_RADV, + .channel_mask = NB_IP6, + .proto_size = sizeof(struct radv_proto), .config_size = sizeof(struct radv_config), + .postconfig = radv_postconfig, .init = radv_init, .start = radv_start, .shutdown = radv_shutdown, diff --git a/proto/radv/radv.h b/proto/radv/radv.h index ab081397..66f785a7 100644 --- a/proto/radv/radv.h +++ b/proto/radv/radv.h @@ -30,7 +30,7 @@ #define ICMPV6_RA 134 #define MAX_INITIAL_RTR_ADVERTISEMENTS 3 -#define MAX_INITIAL_RTR_ADVERT_INTERVAL 16 +#define MAX_INITIAL_RTR_ADVERT_INTERVAL (16 S_) #define DEFAULT_MAX_RA_INT 600 #define DEFAULT_MIN_DELAY 3 @@ -50,9 +50,7 @@ struct radv_config list rdnss_list; /* Global list of RDNSS configs (struct radv_rdnss_config) */ list dnssl_list; /* Global list of DNSSL configs (struct radv_dnssl_config) */ - ip_addr trigger_prefix; /* Prefix of a trigger route, if defined */ - u8 trigger_pxlen; /* Pxlen of a trigger route, if defined */ - u8 trigger_valid; /* Whether a trigger route is defined */ + net_addr trigger; /* Prefix of a trigger route, if defined */ u8 propagate_routes; /* Do we propagate more specific routes (RFC 4191)? */ u32 max_linger_time; /* Maximum of interface route_linger_time */ }; @@ -91,8 +89,7 @@ struct radv_iface_config struct radv_prefix_config { node n; - ip_addr prefix; - uint pxlen; + net_addr_ip6 prefix; u8 skip; /* Do not include this prefix to RA */ u8 onlink; /* Standard options from RFC 4861 */ @@ -108,7 +105,7 @@ struct radv_rdnss_config node n; u32 lifetime; /* Valid if lifetime_mult is 0 */ u16 lifetime_mult; /* Lifetime specified as multiple of max_ra_int */ - ip_addr server; /* IP address of recursive DNS server */ + ip6_addr server; /* IP address of recursive DNS server */ }; struct radv_dnssl_config @@ -130,13 +127,14 @@ struct radv_dnssl_config */ struct radv_route { - struct fib_node n; u32 lifetime; /* Lifetime from an attribute */ u8 lifetime_set; /* Whether lifetime is defined */ u8 preference; /* Preference of the route, RA_PREF_* */ u8 preference_set; /* Whether preference is defined */ u8 valid; /* Whethe route is valid or withdrawn */ - bird_clock_t changed; /* Last time when the route changed */ + btime changed; /* Last time when the route changed */ + + struct fib_node n; }; struct radv_proto @@ -147,18 +145,18 @@ struct radv_proto u8 active; /* Whether radv is active w.r.t. triggers */ u8 fib_up; /* FIB table (routes) is initialized */ struct fib routes; /* FIB table of specific routes (struct radv_route) */ - bird_clock_t prune_time; /* Next time of route table pruning */ + btime prune_time; /* Next time of route table pruning */ }; struct radv_prefix /* One prefix we advertise */ { node n; - ip_addr prefix; - u8 len; + net_addr_ip6 prefix; + u8 valid; /* Is the prefix valid? If not, we advertise it with 0 lifetime, so clients stop using it */ u8 mark; /* A temporary mark for processing */ - bird_clock_t changed; /* Last time when the prefix changed */ + btime changed; /* Last time when the prefix changed */ struct radv_prefix_config *cf; /* The config tied to this prefix */ }; @@ -171,14 +169,14 @@ struct radv_iface struct ifa *addr; /* Link-local address of iface */ struct pool *pool; /* A pool for interface-specific things */ list prefixes; /* The prefixes we advertise (struct radv_prefix) */ - bird_clock_t prune_time; /* Next time of prefix list pruning */ - bird_clock_t valid_time; /* Cached packet is valid until first linger timeout */ + btime prune_time; /* Next time of prefix list pruning */ + btime valid_time; /* Cached packet is valid until first linger timeout */ timer *timer; struct object_lock *lock; sock *sk; - bird_clock_t last; /* Time of last sending of RA */ + btime last; /* Time of last sending of RA */ u16 plen; /* Length of prepared RA in tbuf, or 0 if not valid */ byte initial; /* How many RAs are still to be sent as initial */ }; diff --git a/proto/rip/Makefile b/proto/rip/Makefile index d2d3c987..7feabcd8 100644 --- a/proto/rip/Makefile +++ b/proto/rip/Makefile @@ -1,5 +1,6 @@ -source=rip.c packets.c -root-rel=../../ -dir-name=proto/rip +src := packets.c rip.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) -include ../../Rules +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/rip/config.Y b/proto/rip/config.Y index 4ec45c7a..aff63f03 100644 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@ -32,34 +32,41 @@ rip_check_auth(void) CF_DECLS -CF_KEYWORDS(RIP, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT, +CF_KEYWORDS(RIP, NG, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT, GARBAGE, PORT, ADDRESS, MODE, BROADCAST, MULTICAST, PASSIVE, VERSION, SPLIT, HORIZON, POISON, REVERSE, CHECK, ZERO, TIME, BFD, AUTHENTICATION, NONE, PLAINTEXT, CRYPTOGRAPHIC, MD5, TTL, SECURITY, RX, TX, BUFFER, LENGTH, PRIORITY, ONLY, LINK, RIP_METRIC, RIP_TAG) -%type <i> rip_auth +%type <i> rip_variant rip_auth CF_GRAMMAR CF_ADDTO(proto, rip_proto) -rip_proto_start: proto_start RIP +rip_variant: + RIP { $$ = 1; } + | RIP NG { $$ = 0; } + ; + +rip_proto_start: proto_start rip_variant { this_proto = proto_config_new(&proto_rip, $1); - init_list(&RIP_CFG->patt_list); + this_proto->net_type = $2 ? NET_IP4 : NET_IP6; - RIP_CFG->rip2 = RIP_IS_V2; + init_list(&RIP_CFG->patt_list); + RIP_CFG->rip2 = $2; + RIP_CFG->ecmp = rt_default_ecmp; RIP_CFG->infinity = RIP_DEFAULT_INFINITY; - - RIP_CFG->min_timeout_time = 60; - RIP_CFG->max_garbage_time = 60; + RIP_CFG->min_timeout_time = 60 S_; + RIP_CFG->max_garbage_time = 60 S_; }; rip_proto_item: proto_item + | proto_channel | ECMP bool { RIP_CFG->ecmp = $2 ? RIP_DEFAULT_ECMP_LIMIT : 0; } - | ECMP bool LIMIT expr { RIP_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); } + | ECMP bool LIMIT expr { RIP_CFG->ecmp = $2 ? $4 : 0; } | INFINITY expr { RIP_CFG->infinity = $2; } | INTERFACE rip_iface ; @@ -86,6 +93,7 @@ rip_iface_start: RIP_IFACE->split_horizon = 1; RIP_IFACE->poison_reverse = 1; RIP_IFACE->check_zero = 1; + RIP_IFACE->check_link = 1; RIP_IFACE->ttl_security = rip_cfg_is_v2() ? 0 : 1; RIP_IFACE->rx_buffer = rip_cfg_is_v2() ? RIP_MAX_PKT_LENGTH : 0; RIP_IFACE->tx_length = rip_cfg_is_v2() ? RIP_MAX_PKT_LENGTH : 0; @@ -131,7 +139,7 @@ rip_iface_item: | MODE MULTICAST { RIP_IFACE->mode = RIP_IM_MULTICAST; } | MODE BROADCAST { RIP_IFACE->mode = RIP_IM_BROADCAST; if (rip_cfg_is_ng()) cf_error("Broadcast not supported in RIPng"); } | PASSIVE bool { RIP_IFACE->passive = $2; } - | ADDRESS ipa { RIP_IFACE->address = $2; } + | ADDRESS ipa { RIP_IFACE->address = $2; if (ipa_is_ip4($2) != rip_cfg_is_v2()) cf_error("IP address version mismatch"); } | PORT expr { RIP_IFACE->port = $2; if (($2<1) || ($2>65535)) cf_error("Invalid port number"); } | VERSION expr { RIP_IFACE->version = $2; if (rip_cfg_is_ng()) cf_error("Version not supported in RIPng"); @@ -141,9 +149,9 @@ rip_iface_item: | SPLIT HORIZON bool { RIP_IFACE->split_horizon = $3; } | POISON REVERSE bool { RIP_IFACE->poison_reverse = $3; } | CHECK ZERO bool { RIP_IFACE->check_zero = $3; } - | UPDATE TIME expr { RIP_IFACE->update_time = $3; if ($3<=0) cf_error("Update time must be positive"); } - | TIMEOUT TIME expr { RIP_IFACE->timeout_time = $3; if ($3<=0) cf_error("Timeout time must be positive"); } - | GARBAGE TIME expr { RIP_IFACE->garbage_time = $3; if ($3<=0) cf_error("Garbage time must be positive"); } + | UPDATE TIME expr { RIP_IFACE->update_time = $3 S_; if ($3<=0) cf_error("Update time must be positive"); } + | TIMEOUT TIME expr { RIP_IFACE->timeout_time = $3 S_; if ($3<=0) cf_error("Timeout time must be positive"); } + | GARBAGE TIME expr { RIP_IFACE->garbage_time = $3 S_; if ($3<=0) cf_error("Garbage time must be positive"); } | ECMP WEIGHT expr { RIP_IFACE->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); } | RX BUFFER expr { RIP_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX length must be in range 256-65535"); } | TX LENGTH expr { RIP_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); } diff --git a/proto/rip/packets.c b/proto/rip/packets.c index 722a9012..891f454f 100644 --- a/proto/rip/packets.c +++ b/proto/rip/packets.c @@ -9,6 +9,8 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ +#undef LOCAL_DEBUG + #include "rip.h" #include "lib/mac.h" @@ -76,8 +78,7 @@ struct rip_auth_tail /* Internal representation of RTE block data */ struct rip_block { - ip_addr prefix; - int pxlen; + net_addr net; u32 metric; u16 tag; u16 no_af; @@ -106,30 +107,30 @@ static inline uint rip_pkt_hdrlen(struct rip_iface *ifa) { return sizeof(struct rip_packet) + (ifa->cf->auth_type ? RIP_BLOCK_LENGTH : 0); } static inline void -rip_put_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *rte) +rip_put_block(struct rip_proto *p, byte *pos, struct rip_block *rte) { if (rip_is_v2(p)) { struct rip_block_v2 *block = (void *) pos; block->family = rte->no_af ? 0 : htons(RIP_AF_IPV4); block->tag = htons(rte->tag); - block->network = ip4_hton(ipa_to_ip4(rte->prefix)); - block->netmask = ip4_hton(ip4_mkmask(rte->pxlen)); + block->network = ip4_hton(net4_prefix(&rte->net)); + block->netmask = ip4_hton(ip4_mkmask(net4_pxlen(&rte->net))); block->next_hop = ip4_hton(ipa_to_ip4(rte->next_hop)); block->metric = htonl(rte->metric); } else /* RIPng */ { struct rip_block_ng *block = (void *) pos; - block->prefix = ip6_hton(ipa_to_ip6(rte->prefix)); + block->prefix = ip6_hton(net6_prefix(&rte->net)); block->tag = htons(rte->tag); - block->pxlen = rte->pxlen; + block->pxlen = net6_pxlen(&rte->net); block->metric = rte->metric; } } static inline void -rip_put_next_hop(struct rip_proto *p UNUSED, byte *pos, struct rip_block *rte UNUSED4) +rip_put_next_hop(struct rip_proto *p UNUSED, byte *pos, struct rip_block *rte) { struct rip_block_ng *block = (void *) pos; block->prefix = ip6_hton(ipa_to_ip6(rte->next_hop)); @@ -139,7 +140,7 @@ rip_put_next_hop(struct rip_proto *p UNUSED, byte *pos, struct rip_block *rte UN } static inline int -rip_get_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *rte) +rip_get_block(struct rip_proto *p, byte *pos, struct rip_block *rte) { if (rip_is_v2(p)) { @@ -149,8 +150,8 @@ rip_get_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block * if (block->family != (rte->no_af ? 0 : htons(RIP_AF_IPV4))) return 0; - rte->prefix = ipa_from_ip4(ip4_ntoh(block->network)); - rte->pxlen = ip4_masklen(ip4_ntoh(block->netmask)); + uint pxlen = ip4_masklen(ip4_ntoh(block->netmask)); + net_fill_ip4(&rte->net, ip4_ntoh(block->network), pxlen); rte->metric = ntohl(block->metric); rte->tag = ntohs(block->tag); rte->next_hop = ipa_from_ip4(ip4_ntoh(block->next_hop)); @@ -169,8 +170,8 @@ rip_get_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block * return 0; } - rte->prefix = ipa_from_ip6(ip6_ntoh(block->prefix)); - rte->pxlen = block->pxlen; + uint pxlen = (block->pxlen <= IP6_MAX_PREFIX_LENGTH) ? block->pxlen : 255; + net_fill_ip6(&rte->net, ip6_ntoh(block->prefix), pxlen); rte->metric = block->metric; rte->tag = ntohs(block->tag); /* rte->next_hop is deliberately kept unmodified */; @@ -188,7 +189,10 @@ rip_update_csn(struct rip_proto *p UNUSED, struct rip_iface *ifa) * have the same CSN. We are using real time, but enforcing monotonicity. */ if (ifa->cf->auth_type == RIP_AUTH_CRYPTO) - ifa->csn = (ifa->csn < (u32) now_real) ? (u32) now_real : ifa->csn + 1; + { + u32 now_real = (u32) (current_real_time() TO_S); + ifa->csn = (ifa->csn < now_real) ? now_real : ifa->csn + 1; + } } static void @@ -406,8 +410,9 @@ rip_receive_request(struct rip_proto *p, struct rip_iface *ifa, struct rip_packe if (!rip_get_block(p, pos, &b)) return; - /* Special case - zero prefix, infinity metric */ - if (ipa_nonzero(b.prefix) || b.pxlen || (b.metric != p->infinity)) + /* Special case - infinity metric, for RIPng also zero prefix */ + if ((b.metric != p->infinity) || + (rip_is_ng(p) && !net_zero_ip6((net_addr_ip6 *) &b.net))) return; /* We do nothing if TX is already active */ @@ -432,6 +437,7 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa) byte *max = rip_tx_buffer(ifa) + ifa->tx_plen - (rip_is_v2(p) ? RIP_BLOCK_LENGTH : 2*RIP_BLOCK_LENGTH); ip_addr last_next_hop = IPA_NONE; + btime now_ = current_time(); int send = 0; struct rip_packet *pkt = (void *) pos; @@ -440,17 +446,15 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa) pkt->unused = 0; pos += rip_pkt_hdrlen(ifa); - FIB_ITERATE_START(&p->rtable, &ifa->tx_fit, z) + FIB_ITERATE_START(&p->rtable, &ifa->tx_fit, struct rip_entry, en) { - struct rip_entry *en = (struct rip_entry *) z; - /* Dummy entries */ if (!en->valid) goto next_entry; /* Stale entries that should be removed */ if ((en->valid == RIP_ENTRY_STALE) && - ((en->changed + ifa->cf->garbage_time) <= now)) + ((en->changed + ifa->cf->garbage_time) <= now_)) goto next_entry; /* Triggered updates */ @@ -460,28 +464,28 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa) /* Not enough space for current entry */ if (pos > max) { - FIB_ITERATE_PUT(&ifa->tx_fit, z); + FIB_ITERATE_PUT(&ifa->tx_fit); goto break_loop; } struct rip_block rte = { - .prefix = en->n.prefix, - .pxlen = en->n.pxlen, .metric = en->metric, .tag = en->tag }; + net_copy(&rte.net, en->n.addr); + if (en->iface == ifa->iface) rte.next_hop = en->next_hop; if (rip_is_v2(p) && (ifa->cf->version == RIP_V1)) { /* Skipping subnets (i.e. not hosts, classful networks or default route) */ - if (ip4_masklen(ip4_class_mask(ipa_to_ip4(en->n.prefix))) != en->n.pxlen) + if (ip4_masklen(ip4_class_mask(net4_prefix(&rte.net))) != rte.net.pxlen) goto next_entry; rte.tag = 0; - rte.pxlen = 0; + rte.net.pxlen = 0; rte.next_hop = IPA_NONE; } @@ -497,7 +501,7 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa) goto next_entry; } - // TRACE(D_PACKETS, " %I/%d -> %I metric %d", rte.prefix, rte.pxlen, rte.next_hop, rte.metric); + // TRACE(D_PACKETS, " %N -> %I metric %d", &rte.net, rte.next_hop, rte.metric); /* RIPng next hop entry */ if (rip_is_ng(p) && !ipa_equal(rte.next_hop, last_next_hop)) @@ -513,7 +517,7 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa) next_entry: ; } - FIB_ITERATE_END(z); + FIB_ITERATE_END; ifa->tx_active = 0; /* Do not send empty packet */ @@ -540,9 +544,9 @@ break_loop: * activating the new one. */ void -rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, bird_clock_t changed) +rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, btime changed) { - DBG("RIP: Opening TX session to %I on %s\n", dst, ifa->iface->name); + DBG("RIP: Opening TX session to %I on %s\n", addr, ifa->iface->name); rip_reset_tx_session(p, ifa); @@ -591,6 +595,7 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack byte *pos = (byte *) pkt + sizeof(struct rip_packet); byte *end = (byte *) pkt + plen; + btime now_ = current_time(); for (; pos < end; pos += RIP_BLOCK_LENGTH) { @@ -598,23 +603,25 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack if (!rip_get_block(p, pos, &rte)) continue; - int c = ipa_classify_net(rte.prefix); - if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) - SKIP("invalid prefix"); - if (rip_is_v2(p) && (pkt->version == RIP_V1)) { - if (ifa->cf->check_zero && (rte.tag || rte.pxlen || ipa_nonzero(rte.next_hop))) + if (ifa->cf->check_zero && (rte.tag || rte.net.pxlen || ipa_nonzero(rte.next_hop))) SKIP("RIPv1 reserved field is nonzero"); rte.tag = 0; - rte.pxlen = ip4_masklen(ip4_class_mask(ipa_to_ip4(rte.prefix))); + rte.net.pxlen = ip4_masklen(ip4_class_mask(net4_prefix(&rte.net))); rte.next_hop = IPA_NONE; } - if ((rte.pxlen < 0) || (rte.pxlen > MAX_PREFIX_LENGTH)) + if (rte.net.pxlen == 255) SKIP("invalid prefix length"); + net_normalize(&rte.net); + + int c = net_classify(&rte.net); + if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) + SKIP("invalid prefix"); + if (rte.metric > p->infinity) SKIP("invalid metric"); @@ -625,7 +632,7 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack rte.next_hop = IPA_NONE; } - // TRACE(D_PACKETS, " %I/%d -> %I metric %d", rte.prefix, rte.pxlen, rte.next_hop, rte.metric); + // TRACE(D_PACKETS, " %N -> %I metric %d", &rte.net.n, rte.next_hop, rte.metric); rte.metric += ifa->cf->metric; @@ -636,19 +643,19 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack .next_hop = ipa_nonzero(rte.next_hop) ? rte.next_hop : from->nbr->addr, .metric = rte.metric, .tag = rte.tag, - .expires = now + ifa->cf->timeout_time + .expires = now_ + ifa->cf->timeout_time }; - rip_update_rte(p, &rte.prefix, rte.pxlen, &new); + rip_update_rte(p, &rte.net, &new); } else - rip_withdraw_rte(p, &rte.prefix, rte.pxlen, from); + rip_withdraw_rte(p, &rte.net, from); continue; skip: - LOG_RTE("Ignoring route %I/%d received from %I - %s", - rte.prefix, rte.pxlen, from->nbr->addr, err_dsc); + LOG_RTE("Ignoring route %N received from %I - %s", + &rte.net, from->nbr->addr, err_dsc); } } @@ -667,8 +674,7 @@ rip_rx_hook(sock *sk, uint len) sk->iface->name, sk->faddr, sk->laddr); /* Silently ignore my own packets */ - /* FIXME: Better local address check */ - if (ipa_equal(ifa->iface->addr->ip, sk->faddr)) + if (ipa_equal(sk->faddr, sk->saddr)) return 1; if (rip_is_ng(p) && !ipa_is_link_local(sk->faddr)) @@ -704,7 +710,7 @@ rip_rx_hook(sock *sk, uint len) if ((plen - sizeof(struct rip_packet)) % RIP_BLOCK_LENGTH) DROP("invalid length", plen); - n->last_seen = now; + n->last_seen = current_time(); rip_update_bfd(p, n); switch (pkt->command) @@ -736,19 +742,13 @@ rip_open_socket(struct rip_iface *ifa) sock *sk = sk_new(p->p.pool); sk->type = SK_UDP; + sk->subtype = rip_is_v2(p) ? SK_IPV4 : SK_IPV6; sk->sport = ifa->cf->port; sk->dport = ifa->cf->port; sk->iface = ifa->iface; + sk->saddr = rip_is_v2(p) ? ifa->iface->addr4->ip : ifa->iface->llv6->ip; sk->vrf = p->p.vrf; - /* - * For RIPv2, we explicitly choose a primary address, mainly to ensure that - * RIP and BFD uses the same one. For RIPng, we left it to kernel, which - * should choose some link-local address based on the same scope rule. - */ - if (rip_is_v2(p)) - sk->saddr = ifa->iface->addr->ip; - sk->rx_hook = rip_rx_hook; sk->tx_hook = rip_tx_hook; sk->err_hook = rip_err_hook; diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 7b380097..85e37cea 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -92,15 +92,6 @@ static void rip_trigger_update(struct rip_proto *p); * RIP routes */ -static void -rip_init_entry(struct fib_node *fn) -{ - // struct rip_entry *en = (void) *fn; - - const uint offset = OFFSETOF(struct rip_entry, routes); - memset((byte *)fn + offset, 0, sizeof(struct rip_entry) - offset); -} - static struct rip_rte * rip_add_rte(struct rip_proto *p, struct rip_rte **rp, struct rip_rte *src) { @@ -152,27 +143,20 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) if (rt) { /* Update */ - net *n = net_get(p->p.table, en->n.prefix, en->n.pxlen); - rta a0 = { .src = p->p.main_source, .source = RTS_RIP, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST + .dest = RTD_UNICAST, }; u8 rt_metric = rt->metric; u16 rt_tag = rt->tag; - struct rip_rte *rt2 = rt->next; - /* Find second valid rte */ - while (rt2 && !rip_valid_rte(rt2)) - rt2 = rt2->next; - - if (p->ecmp && rt2) + if (p->ecmp) { /* ECMP route */ - struct mpnh *nhs = NULL; + struct nexthop *nhs = NULL; int num = 0; for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) @@ -180,54 +164,51 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) if (!rip_valid_rte(rt)) continue; - struct mpnh *nh = alloca(sizeof(struct mpnh)); + struct nexthop *nh = allocz(sizeof(struct nexthop)); + nh->gw = rt->next_hop; nh->iface = rt->from->nbr->iface; nh->weight = rt->from->ifa->cf->ecmp_weight; - mpnh_insert(&nhs, nh); + + nexthop_insert(&nhs, nh); num++; if (rt->tag != rt_tag) rt_tag = 0; } - a0.dest = RTD_MULTIPATH; - a0.nexthops = nhs; + a0.nh = *nhs; } else { /* Unipath route */ - a0.dest = RTD_ROUTER; - a0.gw = rt->next_hop; - a0.iface = rt->from->nbr->iface; a0.from = rt->from->nbr->addr; + a0.nh.gw = rt->next_hop; + a0.nh.iface = rt->from->nbr->iface; } rta *a = rta_lookup(&a0); rte *e = rte_get_temp(a); - e->u.rip.from = a0.iface; + e->u.rip.from = a0.nh.iface; e->u.rip.metric = rt_metric; e->u.rip.tag = rt_tag; - e->net = n; e->pflags = 0; - rte_update(&p->p, n, e); + rte_update(&p->p, en->n.addr, e); } else { /* Withdraw */ - net *n = net_find(p->p.table, en->n.prefix, en->n.pxlen); - rte_update(&p->p, n, NULL); + rte_update(&p->p, en->n.addr, NULL); } } /** * rip_update_rte - enter a route update to RIP routing table * @p: RIP instance - * @prefix: network prefix - * @pxlen: network prefix length + * @addr: network address * @new: a &rip_rte representing the new route * * The function is called by the RIP packet processing code whenever it receives @@ -237,9 +218,9 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) * rip_withdraw_rte() should be called instead of rip_update_rte(). */ void -rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *new) +rip_update_rte(struct rip_proto *p, net_addr *n, struct rip_rte *new) { - struct rip_entry *en = fib_get(&p->rtable, prefix, pxlen); + struct rip_entry *en = fib_get(&p->rtable, n); struct rip_rte *rt, **rp; int changed = 0; @@ -279,8 +260,7 @@ rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte * /** * rip_withdraw_rte - enter a route withdraw to RIP routing table * @p: RIP instance - * @prefix: network prefix - * @pxlen: network prefix length + * @addr: network address * @from: a &rip_neighbor propagating the withdraw * * The function is called by the RIP packet processing code whenever it receives @@ -288,9 +268,9 @@ rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte * * removed. Eventually, the change is also propagated by rip_announce_rte(). */ void -rip_withdraw_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_neighbor *from) +rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from) { - struct rip_entry *en = fib_find(&p->rtable, prefix, pxlen); + struct rip_entry *en = fib_find(&p->rtable, n); struct rip_rte *rt, **rp; if (!en) @@ -317,7 +297,7 @@ rip_withdraw_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_nei * it into our data structures. */ static void -rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, struct rte *new, +rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, struct rte *new, struct rte *old UNUSED, struct ea_list *attrs) { struct rip_proto *p = (struct rip_proto *) P; @@ -332,15 +312,15 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, if (rt_metric > p->infinity) { - log(L_WARN "%s: Invalid rip_metric value %u for route %I/%d", - p->p.name, rt_metric, net->n.prefix, net->n.pxlen); + log(L_WARN "%s: Invalid rip_metric value %u for route %N", + p->p.name, rt_metric, net->n.addr); rt_metric = p->infinity; } if (rt_tag > 0xffff) { - log(L_WARN "%s: Invalid rip_tag value %u for route %I/%d", - p->p.name, rt_tag, net->n.prefix, net->n.pxlen); + log(L_WARN "%s: Invalid rip_tag value %u for route %N", + p->p.name, rt_tag, net->n.addr); rt_metric = p->infinity; rt_tag = 0; } @@ -352,7 +332,7 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, * collection. */ - en = fib_get(&p->rtable, &net->n.prefix, net->n.pxlen); + en = fib_get(&p->rtable, net->n.addr); old_metric = en->valid ? en->metric : -1; @@ -360,13 +340,13 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, en->metric = rt_metric; en->tag = rt_tag; en->from = (new->attrs->src->proto == P) ? new->u.rip.from : NULL; - en->iface = new->attrs->iface; - en->next_hop = new->attrs->gw; + en->iface = new->attrs->nh.iface; + en->next_hop = new->attrs->nh.gw; } else { /* Withdraw */ - en = fib_find(&p->rtable, &net->n.prefix, net->n.pxlen); + en = fib_find(&p->rtable, net->n.addr); if (!en || en->valid != RIP_ENTRY_VALID) return; @@ -384,7 +364,7 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, /* Activate triggered updates */ if (en->metric != old_metric) { - en->changed = now; + en->changed = current_time(); rip_trigger_update(p); } } @@ -526,10 +506,10 @@ rip_iface_start(struct rip_iface *ifa) TRACE(D_EVENTS, "Starting interface %s", ifa->iface->name); - ifa->next_regular = now + (random() % ifa->cf->update_time) + 1; - ifa->next_triggered = now; /* Available immediately */ - ifa->want_triggered = 1; /* All routes in triggered update */ - tm_start(ifa->timer, 1); /* Or 100 ms */ + ifa->next_regular = current_time() + (random() % ifa->cf->update_time) + 100 MS; + ifa->next_triggered = current_time(); /* Available immediately */ + ifa->want_triggered = 1; /* All routes in triggered update */ + tm_start(ifa->timer, 100 MS); ifa->up = 1; if (!ifa->cf->passive) @@ -650,13 +630,19 @@ rip_add_iface(struct rip_proto *p, struct iface *iface, struct rip_iface_config else if (ic->mode == RIP_IM_MULTICAST) ifa->addr = rip_is_v2(p) ? IP4_RIP_ROUTERS : IP6_RIP_ROUTERS; else /* Broadcast */ - ifa->addr = iface->addr->brd; + ifa->addr = iface->addr4->brd; + /* + * The above is just a workaround for BSD as it can't send broadcasts + * to 255.255.255.255. BSD systems need the network broadcast address instead. + * + * TODO: move this to sysdep code + */ init_list(&ifa->neigh_list); add_tail(&p->iface_list, NODE ifa); - ifa->timer = tm_new_set(p->p.pool, rip_iface_timer, ifa, 0, 0); + ifa->timer = tm_new_init(p->p.pool, rip_iface_timer, ifa, 0, 0); struct object_lock *lock = olock_new(p->p.pool); lock->type = OBJLOCK_UDP; @@ -704,8 +690,8 @@ rip_reconfigure_iface(struct rip_proto *p, struct rip_iface *ifa, struct rip_ifa rip_iface_update_buffers(ifa); - if (ifa->next_regular > (now + new->update_time)) - ifa->next_regular = now + (random() % new->update_time) + 1; + if (ifa->next_regular > (current_time() + new->update_time)) + ifa->next_regular = current_time() + (random() % new->update_time) + 100 MS; if (new->check_link != old->check_link) rip_iface_update_state(ifa); @@ -726,7 +712,11 @@ rip_reconfigure_ifaces(struct rip_proto *p, struct rip_config *cf) WALK_LIST(iface, iface_list) { - if (! (iface->flags & IF_UP)) + if (!(iface->flags & IF_UP)) + continue; + + /* Ignore ifaces without appropriate address */ + if (rip_is_v2(p) ? !iface->addr4 : !iface->llv6) continue; struct rip_iface *ifa = rip_find_iface(p, iface); @@ -764,6 +754,10 @@ rip_if_notify(struct proto *P, unsigned flags, struct iface *iface) { struct rip_iface_config *ic = (void *) iface_patt_find(&cf->patt_list, iface, NULL); + /* Ignore ifaces without appropriate address */ + if (rip_is_v2(p) ? !iface->addr4 : !iface->llv6) + return; + if (ic) rip_add_iface(p, iface, ic); @@ -822,24 +816,24 @@ rip_timer(timer *t) struct rip_iface *ifa; struct rip_neighbor *n, *nn; struct fib_iterator fit; - bird_clock_t next = now + MIN(cf->min_timeout_time, cf->max_garbage_time); - bird_clock_t expires = 0; + btime now_ = current_time(); + btime next = now_ + MIN(cf->min_timeout_time, cf->max_garbage_time); + btime expires = 0; TRACE(D_EVENTS, "Main timer fired"); FIB_ITERATE_INIT(&fit, &p->rtable); loop: - FIB_ITERATE_START(&p->rtable, &fit, node) + FIB_ITERATE_START(&p->rtable, &fit, struct rip_entry, en) { - struct rip_entry *en = (struct rip_entry *) node; struct rip_rte *rt, **rp; int changed = 0; /* Checking received routes for timeout and for dead neighbors */ for (rp = &en->routes; rt = *rp; /* rp = &rt->next */) { - if (!rip_valid_rte(rt) || (rt->expires <= now)) + if (!rip_valid_rte(rt) || (rt->expires <= now_)) { rip_remove_rte(p, rp); changed = 1; @@ -859,7 +853,7 @@ rip_timer(timer *t) * rip_rt_notify() -> p->rtable change, invalidating hidden variables. */ - FIB_ITERATE_PUT_NEXT(&fit, &p->rtable, node); + FIB_ITERATE_PUT_NEXT(&fit, &p->rtable); rip_announce_rte(p, en); goto loop; } @@ -869,9 +863,9 @@ rip_timer(timer *t) { expires = en->changed + cf->max_garbage_time; - if (expires <= now) + if (expires <= now_) { - // TRACE(D_EVENTS, "entry is too old: %I/%d", en->n.prefix, en->n.pxlen); + // TRACE(D_EVENTS, "entry is too old: %N", en->n.addr); en->valid = 0; } else @@ -881,12 +875,12 @@ rip_timer(timer *t) /* Remove empty nodes */ if (!en->valid && !en->routes) { - FIB_ITERATE_PUT(&fit, node); - fib_delete(&p->rtable, node); + FIB_ITERATE_PUT(&fit); + fib_delete(&p->rtable, en); goto loop; } } - FIB_ITERATE_END(node); + FIB_ITERATE_END; p->rt_reload = 0; @@ -897,20 +891,20 @@ rip_timer(timer *t) { expires = n->last_seen + n->ifa->cf->timeout_time; - if (expires <= now) + if (expires <= now_) rip_remove_neighbor(p, n); else next = MIN(next, expires); } - tm_start(p->timer, MAX(next - now, 1)); + tm_start(p->timer, MAX(next - now_, 100 MS)); } static inline void rip_kick_timer(struct rip_proto *p) { - if (p->timer->expires > (now + 1)) - tm_start(p->timer, 1); /* Or 100 ms */ + if (p->timer->expires > (current_time() + 100 MS)) + tm_start(p->timer, 100 MS); } /** @@ -928,7 +922,8 @@ rip_iface_timer(timer *t) { struct rip_iface *ifa = t->data; struct rip_proto *p = ifa->rip; - bird_clock_t period = ifa->cf->update_time; + btime now_ = current_time(); + btime period = ifa->cf->update_time; if (ifa->cf->passive) return; @@ -937,40 +932,40 @@ rip_iface_timer(timer *t) if (ifa->tx_active) { - if (now < (ifa->next_regular + period)) - { tm_start(ifa->timer, 1); return; } + if (now_ < (ifa->next_regular + period)) + { tm_start(ifa->timer, 100 MS); return; } /* We are too late, reset is done by rip_send_table() */ log(L_WARN "%s: Too slow update on %s, resetting", p->p.name, ifa->iface->name); } - if (now >= ifa->next_regular) + if (now_ >= ifa->next_regular) { /* Send regular update, set timer for next period (or following one if necessay) */ TRACE(D_EVENTS, "Sending regular updates for %s", ifa->iface->name); rip_send_table(p, ifa, ifa->addr, 0); - ifa->next_regular += period * (1 + ((now - ifa->next_regular) / period)); + ifa->next_regular += period * (1 + ((now_ - ifa->next_regular) / period)); ifa->want_triggered = 0; p->triggered = 0; } - else if (ifa->want_triggered && (now >= ifa->next_triggered)) + else if (ifa->want_triggered && (now_ >= ifa->next_triggered)) { /* Send triggered update, enforce interval between triggered updates */ TRACE(D_EVENTS, "Sending triggered updates for %s", ifa->iface->name); rip_send_table(p, ifa, ifa->addr, ifa->want_triggered); - ifa->next_triggered = now + MIN(5, period / 2 + 1); + ifa->next_triggered = now_ + MIN(5 S, period / 2); ifa->want_triggered = 0; p->triggered = 0; } - tm_start(ifa->timer, ifa->want_triggered ? 1 : (ifa->next_regular - now)); + tm_start(ifa->timer, ifa->want_triggered ? (1 S) : (ifa->next_regular - now_)); } static inline void rip_iface_kick_timer(struct rip_iface *ifa) { - if (ifa->timer->expires > (now + 1)) - tm_start(ifa->timer, 1); /* Or 100 ms */ + if (ifa->timer->expires > (current_time() + 100 MS)) + tm_start(ifa->timer, 100 MS); } static void @@ -991,7 +986,7 @@ rip_trigger_update(struct rip_proto *p) continue; TRACE(D_EVENTS, "Scheduling triggered updates for %s", ifa->iface->name); - ifa->want_triggered = now; + ifa->want_triggered = current_time(); rip_iface_kick_timer(ifa); } @@ -1035,19 +1030,17 @@ rip_import_control(struct proto *P UNUSED, struct rte **rt, struct ea_list **att return 0; } -static int -rip_reload_routes(struct proto *P) +static void +rip_reload_routes(struct channel *C) { - struct rip_proto *p = (struct rip_proto *) P; + struct rip_proto *p = (struct rip_proto *) C->proto; if (p->rt_reload) - return 1; + return; TRACE(D_EVENTS, "Scheduling route reload"); p->rt_reload = 1; rip_kick_timer(p); - - return 1; } static struct ea_list * @@ -1078,12 +1071,23 @@ rip_rte_same(struct rte *new, struct rte *old) } +static void +rip_postconfig(struct proto_config *CF) +{ + // struct rip_config *cf = (void *) CF; + + /* Define default channel */ + if (EMPTY_LIST(CF->channels)) + channel_config_new(NULL, net_label[CF->net_type], CF->net_type, CF); +} + static struct proto * -rip_init(struct proto_config *cfg) +rip_init(struct proto_config *CF) { - struct proto *P = proto_new(cfg, sizeof(struct rip_proto)); + struct proto *P = proto_new(CF); + + P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - P->accept_ra_types = RA_OPTIMAL; P->if_notify = rip_if_notify; P->rt_notify = rip_rt_notify; P->neigh_notify = rip_neigh_notify; @@ -1104,10 +1108,12 @@ rip_start(struct proto *P) struct rip_config *cf = (void *) (P->cf); init_list(&p->iface_list); - fib_init(&p->rtable, P->pool, sizeof(struct rip_entry), 0, rip_init_entry); + fib_init(&p->rtable, P->pool, cf->rip2 ? NET_IP4 : NET_IP6, + sizeof(struct rip_entry), OFFSETOF(struct rip_entry, n), 0, NULL); p->rte_slab = sl_new(P->pool, sizeof(struct rip_rte)); - p->timer = tm_new_set(P->pool, rip_timer, p, 0, 0); + p->timer = tm_new_init(P->pool, rip_timer, p, 0, 0); + p->rip2 = cf->rip2; p->ecmp = cf->ecmp; p->infinity = cf->infinity; p->triggered = 0; @@ -1121,18 +1127,24 @@ rip_start(struct proto *P) } static int -rip_reconfigure(struct proto *P, struct proto_config *c) +rip_reconfigure(struct proto *P, struct proto_config *CF) { struct rip_proto *p = (void *) P; - struct rip_config *new = (void *) c; + struct rip_config *new = (void *) CF; // struct rip_config *old = (void *) (P->cf); + if (new->rip2 != p->rip2) + return 0; + if (new->infinity != p->infinity) return 0; + if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF))) + return 0; + TRACE(D_EVENTS, "Reconfiguring"); - p->p.cf = c; + p->p.cf = CF; p->ecmp = new->ecmp; rip_reconfigure_ifaces(p, new); @@ -1184,7 +1196,7 @@ rip_show_interfaces(struct proto *P, char *iff) } cli_msg(-1021, "%s:", p->p.name); - cli_msg(-1021, "%-10s %-6s %6s %6s %6s", + cli_msg(-1021, "%-10s %-6s %6s %6s %7s", "Interface", "State", "Metric", "Nbrs", "Timer"); WALK_LIST(ifa, p->iface_list) @@ -1197,8 +1209,9 @@ rip_show_interfaces(struct proto *P, char *iff) if (n->last_seen) nbrs++; - int timer = MAX(ifa->next_regular - now, 0); - cli_msg(-1021, "%-10s %-6s %6u %6u %6u", + btime now_ = current_time(); + btime timer = (ifa->next_regular > now_) ? (ifa->next_regular - now_) : 0; + cli_msg(-1021, "%-10s %-6s %6u %6u %7t", ifa->iface->name, (ifa->up ? "Up" : "Down"), ifa->cf->metric, nbrs, timer); } @@ -1220,7 +1233,7 @@ rip_show_neighbors(struct proto *P, char *iff) } cli_msg(-1022, "%s:", p->p.name); - cli_msg(-1022, "%-25s %-10s %6s %6s %6s", + cli_msg(-1022, "%-25s %-10s %6s %6s %7s", "IP address", "Interface", "Metric", "Routes", "Seen"); WALK_LIST(ifa, p->iface_list) @@ -1233,8 +1246,8 @@ rip_show_neighbors(struct proto *P, char *iff) if (!n->last_seen) continue; - int timer = now - n->last_seen; - cli_msg(-1022, "%-25I %-10s %6u %6u %6u", + btime timer = current_time() - n->last_seen; + cli_msg(-1022, "%-25I %-10s %6u %6u %7t", n->nbr->addr, ifa->iface->name, ifa->cf->metric, n->uc, timer); } } @@ -1250,12 +1263,11 @@ rip_dump(struct proto *P) int i; i = 0; - FIB_WALK(&p->rtable, e) + FIB_WALK(&p->rtable, struct rip_entry, en) { - struct rip_entry *en = (struct rip_entry *) e; - debug("RIP: entry #%d: %I/%d via %I dev %s valid %d metric %d age %d s\n", - i++, en->n.prefix, en->n.pxlen, en->next_hop, en->iface->name, - en->valid, en->metric, now - en->changed); + debug("RIP: entry #%d: %N via %I dev %s valid %d metric %d age %t\n", + i++, en->n.addr, en->next_hop, en->iface->name, + en->valid, en->metric, current_time() - en->changed); } FIB_WALK_END; @@ -1274,7 +1286,10 @@ struct protocol proto_rip = { .template = "rip%d", .attr_class = EAP_RIP, .preference = DEF_PREF_RIP, + .channel_mask = NB_IP, + .proto_size = sizeof(struct rip_proto), .config_size = sizeof(struct rip_config), + .postconfig = rip_postconfig, .init = rip_init, .dump = rip_dump, .start = rip_start, diff --git a/proto/rip/rip.h b/proto/rip/rip.h index b24d9536..55696333 100644 --- a/proto/rip/rip.h +++ b/proto/rip/rip.h @@ -27,12 +27,6 @@ #include "lib/timer.h" -#ifdef IPV6 -#define RIP_IS_V2 0 -#else -#define RIP_IS_V2 1 -#endif - #define RIP_V1 1 #define RIP_V2 2 @@ -44,9 +38,9 @@ #define RIP_DEFAULT_ECMP_LIMIT 16 #define RIP_DEFAULT_INFINITY 16 -#define RIP_DEFAULT_UPDATE_TIME 30 -#define RIP_DEFAULT_TIMEOUT_TIME 180 -#define RIP_DEFAULT_GARBAGE_TIME 120 +#define RIP_DEFAULT_UPDATE_TIME (30 S_) +#define RIP_DEFAULT_TIMEOUT_TIME (180 S_) +#define RIP_DEFAULT_GARBAGE_TIME (120 S_) struct rip_config @@ -58,8 +52,8 @@ struct rip_config u8 ecmp; /* Maximum number of nexthops in ECMP route, or 0 */ u8 infinity; /* Maximum metric value, representing infinity */ - u32 min_timeout_time; /* Minimum of interface timeout_time */ - u32 max_garbage_time; /* Maximum of interface garbage_time */ + btime min_timeout_time; /* Minimum of interface timeout_time */ + btime max_garbage_time; /* Maximum of interface garbage_time */ }; struct rip_iface_config @@ -84,9 +78,9 @@ struct rip_iface_config u16 tx_length; /* TX packet length limit (including headers), 0 for MTU */ int tx_tos; int tx_priority; - u32 update_time; /* Periodic update interval */ - u32 timeout_time; /* Route expiration timeout */ - u32 garbage_time; /* Unreachable entry GC timeout */ + btime update_time; /* Periodic update interval */ + btime timeout_time; /* Route expiration timeout */ + btime garbage_time; /* Unreachable entry GC timeout */ list *passwords; /* Passwords for authentication */ }; @@ -98,6 +92,7 @@ struct rip_proto slab *rte_slab; /* Slab for internal routes (struct rip_rte) */ timer *timer; /* Main protocol timer */ + u8 rip2; /* RIPv2 (IPv4) or RIPng (IPv6) */ u8 ecmp; /* Maximum number of nexthops in ECMP route, or 0 */ u8 infinity; /* Maximum metric value, representing infinity */ u8 triggered; /* Logical AND of interface want_triggered values */ @@ -125,14 +120,14 @@ struct rip_iface list neigh_list; /* List of iface neighbors (struct rip_neighbor) */ /* Update scheduling */ - bird_clock_t next_regular; /* Next time when regular update should be called */ - bird_clock_t next_triggered; /* Next time when triggerd update may be called */ - bird_clock_t want_triggered; /* Nonzero if triggered update is scheduled */ + btime next_regular; /* Next time when regular update should be called */ + btime next_triggered; /* Next time when triggerd update may be called */ + btime want_triggered; /* Nonzero if triggered update is scheduled */ /* Active update */ int tx_active; /* Update session is active */ ip_addr tx_addr; /* Update session destination address */ - bird_clock_t tx_changed; /* Minimal changed time for triggered update */ + btime tx_changed; /* Minimal changed time for triggered update */ struct fib_iterator tx_fit; /* FIB iterator in RIP routing table (p.rtable) */ }; @@ -142,14 +137,13 @@ struct rip_neighbor struct rip_iface *ifa; /* Associated interface, may be NULL if stale */ struct neighbor *nbr; /* Associaded core neighbor, may be NULL if stale */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ - bird_clock_t last_seen; /* Time of last received and accepted message */ + btime last_seen; /* Time of last received and accepted message */ u32 uc; /* Use count, number of routes linking the neighbor */ u32 csn; /* Last received crypto sequence number */ }; struct rip_entry { - struct fib_node n; struct rip_rte *routes; /* List of incoming routes */ u8 valid; /* Entry validity state (RIP_ENTRY_*) */ @@ -159,7 +153,9 @@ struct rip_entry struct iface *iface; /* Outgoing route iface (for next hop) */ ip_addr next_hop; /* Outgoing route next hop */ - bird_clock_t changed; /* Last time when the outgoing route metric changed */ + btime changed; /* Last time when the outgoing route metric changed */ + + struct fib_node n; }; struct rip_rte @@ -171,7 +167,7 @@ struct rip_rte u16 metric; /* Route metric (after increase) */ u16 tag; /* Route tag */ - bird_clock_t expires; /* Time of route expiration */ + btime expires; /* Time of route expiration */ }; @@ -189,16 +185,11 @@ struct rip_rte #define EA_RIP_METRIC EA_CODE(EAP_RIP, 0) #define EA_RIP_TAG EA_CODE(EAP_RIP, 1) -#define rip_is_v2(X) RIP_IS_V2 -#define rip_is_ng(X) (!RIP_IS_V2) - -/* static inline int rip_is_v2(struct rip_proto *p) { return p->rip2; } static inline int rip_is_ng(struct rip_proto *p) { return ! p->rip2; } -*/ static inline void rip_reset_tx_session(struct rip_proto *p, struct rip_iface *ifa) @@ -211,8 +202,8 @@ rip_reset_tx_session(struct rip_proto *p, struct rip_iface *ifa) } /* rip.c */ -void rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *new); -void rip_withdraw_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_neighbor *from); +void rip_update_rte(struct rip_proto *p, net_addr *n, struct rip_rte *new); +void rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from); struct rip_neighbor * rip_get_neighbor(struct rip_proto *p, ip_addr *a, struct rip_iface *ifa); void rip_update_bfd(struct rip_proto *p, struct rip_neighbor *n); void rip_show_interfaces(struct proto *P, char *iff); @@ -220,7 +211,7 @@ void rip_show_neighbors(struct proto *P, char *iff); /* packets.c */ void rip_send_request(struct rip_proto *p, struct rip_iface *ifa); -void rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, bird_clock_t changed); +void rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, btime changed); int rip_open_socket(struct rip_iface *ifa); diff --git a/proto/rpki/Doc b/proto/rpki/Doc new file mode 100644 index 00000000..d1d1bf55 --- /dev/null +++ b/proto/rpki/Doc @@ -0,0 +1,5 @@ +S rpki.c +S packets.c +S transport.c +S tcp_transport.c +S ssh_transport.c diff --git a/proto/rpki/Makefile b/proto/rpki/Makefile new file mode 100644 index 00000000..eb09b7df --- /dev/null +++ b/proto/rpki/Makefile @@ -0,0 +1,6 @@ +src := rpki.c packets.c tcp_transport.c ssh_transport.c transport.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) + +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/rpki/config.Y b/proto/rpki/config.Y new file mode 100644 index 00000000..39fdfd01 --- /dev/null +++ b/proto/rpki/config.Y @@ -0,0 +1,144 @@ +/* + * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol + * + * (c) 2015 CZ.NIC + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +CF_HDR + +#include "proto/rpki/rpki.h" + +CF_DEFINES + +#define RPKI_CFG ((struct rpki_config *) this_proto) +#define RPKI_TR_SSH_CFG ((struct rpki_tr_ssh_config *) RPKI_CFG->tr_config.spec) + +static void +rpki_check_unused_hostname(void) +{ + if (RPKI_CFG->hostname != NULL) + cf_error("Only one cache server per protocol allowed"); +} + +static void +rpki_check_unused_transport(void) +{ + if (RPKI_CFG->tr_config.spec != NULL) + cf_error("At the most one transport per protocol allowed"); +} + +CF_DECLS + +CF_KEYWORDS(RPKI, REMOTE, BIRD, PRIVATE, PUBLIC, KEY, TCP, SSH, TRANSPORT, USER, + RETRY, REFRESH, EXPIRE, KEEP) + +%type <i> rpki_keep_interval + +CF_GRAMMAR + +CF_ADDTO(proto, rpki_proto) + +rpki_proto_start: proto_start RPKI { + this_proto = proto_config_new(&proto_rpki, $1); + RPKI_CFG->retry_interval = RPKI_RETRY_INTERVAL; + RPKI_CFG->refresh_interval = RPKI_REFRESH_INTERVAL; + RPKI_CFG->expire_interval = RPKI_EXPIRE_INTERVAL; +}; + +rpki_proto: rpki_proto_start proto_name '{' rpki_proto_opts '}' { rpki_check_config(RPKI_CFG); }; + +rpki_proto_opts: + /* empty */ + | rpki_proto_opts rpki_proto_item ';' + ; + +rpki_proto_item: + proto_item + | proto_channel + | REMOTE rpki_cache_addr + | REMOTE rpki_cache_addr rpki_proto_item_port + | rpki_proto_item_port + | TRANSPORT rpki_transport + | REFRESH rpki_keep_interval expr { + if (rpki_check_refresh_interval($3)) + cf_error(rpki_check_refresh_interval($3)); + RPKI_CFG->refresh_interval = $3; + RPKI_CFG->keep_refresh_interval = $2; + } + | RETRY rpki_keep_interval expr { + if (rpki_check_retry_interval($3)) + cf_error(rpki_check_retry_interval($3)); + RPKI_CFG->retry_interval = $3; + RPKI_CFG->keep_retry_interval = $2; + } + | EXPIRE rpki_keep_interval expr { + if (rpki_check_expire_interval($3)) + cf_error(rpki_check_expire_interval($3)); + RPKI_CFG->expire_interval = $3; + RPKI_CFG->keep_expire_interval = $2; + } + ; + +rpki_keep_interval: + /* empty */ { $$ = 0; } + | KEEP { $$ = 1; } + ; + +rpki_proto_item_port: PORT expr { check_u16($2); RPKI_CFG->port = $2; }; + +rpki_cache_addr: + text { + rpki_check_unused_hostname(); + RPKI_CFG->hostname = $1; + } + | ipa { + rpki_check_unused_hostname(); + RPKI_CFG->ip = $1; + /* Ensure hostname is filled */ + char *hostname = cfg_allocz(sizeof(INET6_ADDRSTRLEN + 1)); + bsnprintf(hostname, INET6_ADDRSTRLEN+1, "%I", RPKI_CFG->ip); + RPKI_CFG->hostname = hostname; + } + ; + +rpki_transport: + TCP rpki_transport_tcp_init + | SSH rpki_transport_ssh_init '{' rpki_transport_ssh_opts '}' rpki_transport_ssh_check + ; + +rpki_transport_tcp_init: +{ + rpki_check_unused_transport(); + RPKI_CFG->tr_config.spec = cfg_allocz(sizeof(struct rpki_tr_tcp_config)); + RPKI_CFG->tr_config.type = RPKI_TR_TCP; +}; + +rpki_transport_ssh_init: +{ + rpki_check_unused_transport(); + RPKI_CFG->tr_config.spec = cfg_allocz(sizeof(struct rpki_tr_ssh_config)); + RPKI_CFG->tr_config.type = RPKI_TR_SSH; +}; + +rpki_transport_ssh_opts: + /* empty */ + | rpki_transport_ssh_opts rpki_transport_ssh_item ';' + ; + +rpki_transport_ssh_item: + BIRD PRIVATE KEY text { RPKI_TR_SSH_CFG->bird_private_key = $4; } + | REMOTE PUBLIC KEY text { RPKI_TR_SSH_CFG->cache_public_key = $4; } + | USER text { RPKI_TR_SSH_CFG->user = $2; } + ; + +rpki_transport_ssh_check: +{ + if (RPKI_TR_SSH_CFG->user == NULL) + cf_error("User must be set"); +}; + +CF_CODE + +CF_END diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c new file mode 100644 index 00000000..59a5efaf --- /dev/null +++ b/proto/rpki/packets.c @@ -0,0 +1,1073 @@ +/* + * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol + * + * (c) 2015 CZ.NIC + * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com> + * + * This file was a part of RTRlib: http://rpki.realmv6.org/ + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#undef LOCAL_DEBUG + +#include "rpki.h" +#include "transport.h" +#include "packets.h" + +#define RPKI_ADD_FLAG 0b00000001 + +enum rpki_transmit_type { + RPKI_RECV = 0, + RPKI_SEND = 1, +}; + +enum pdu_error_type { + CORRUPT_DATA = 0, + INTERNAL_ERROR = 1, + NO_DATA_AVAIL = 2, + INVALID_REQUEST = 3, + UNSUPPORTED_PROTOCOL_VER = 4, + UNSUPPORTED_PDU_TYPE = 5, + WITHDRAWAL_OF_UNKNOWN_RECORD = 6, + DUPLICATE_ANNOUNCEMENT = 7, + PDU_TOO_BIG = 32 +}; + +static const char *str_pdu_error_type[] = { + [CORRUPT_DATA] = "Corrupt-Data", + [INTERNAL_ERROR] = "Internal-Error", + [NO_DATA_AVAIL] = "No-Data-Available", + [INVALID_REQUEST] = "Invalid-Request", + [UNSUPPORTED_PROTOCOL_VER] = "Unsupported-Protocol-Version", + [UNSUPPORTED_PDU_TYPE] = "Unsupported-PDU-Type", + [WITHDRAWAL_OF_UNKNOWN_RECORD]= "Withdrawal-Of-Unknown-Record", + [DUPLICATE_ANNOUNCEMENT] = "Duplicate-Announcement", + [PDU_TOO_BIG] = "PDU-Too-Big", +}; + +enum pdu_type { + SERIAL_NOTIFY = 0, + SERIAL_QUERY = 1, + RESET_QUERY = 2, + CACHE_RESPONSE = 3, + IPV4_PREFIX = 4, + RESERVED = 5, + IPV6_PREFIX = 6, + END_OF_DATA = 7, + CACHE_RESET = 8, + ROUTER_KEY = 9, + ERROR = 10, + PDU_TYPE_MAX +}; + +static const char *str_pdu_type_[] = { + [SERIAL_NOTIFY] = "Serial Notify", + [SERIAL_QUERY] = "Serial Query", + [RESET_QUERY] = "Reset Query", + [CACHE_RESPONSE] = "Cache Response", + [IPV4_PREFIX] = "IPv4 Prefix", + [RESERVED] = "Reserved", + [IPV6_PREFIX] = "IPv6 Prefix", + [END_OF_DATA] = "End of Data", + [CACHE_RESET] = "Cache Reset", + [ROUTER_KEY] = "Router Key", + [ERROR] = "Error" +}; + +static const char *str_pdu_type(uint type) { + if (type < PDU_TYPE_MAX) + return str_pdu_type_[type]; + else + return "Undefined packet type"; +} + +/* + * 0 8 16 24 31 + * .-------------------------------------------. + * | Protocol | PDU | | + * | Version | Type | reserved = zero | + * | 0 or 1 | 0 - 10 | | + * +-------------------------------------------+ + * | | + * | Length >= 8 | + * | | + * `-------------------------------------------' */ +struct pdu_header { + u8 ver; + u8 type; + u16 reserved; + u32 len; +} PACKED; + +struct pdu_cache_response { + u8 ver; + u8 type; + u16 session_id; + u32 len; +} PACKED; + +struct pdu_serial_notify { + u8 ver; + u8 type; + u16 session_id; + u32 len; + u32 serial_num; +} PACKED; + +struct pdu_serial_query { + u8 ver; + u8 type; + u16 session_id; + u32 len; + u32 serial_num; +} PACKED; + +struct pdu_ipv4 { + u8 ver; + u8 type; + u16 reserved; + u32 len; + u8 flags; + u8 prefix_len; + u8 max_prefix_len; + u8 zero; + ip4_addr prefix; + u32 asn; +} PACKED; + +struct pdu_ipv6 { + u8 ver; + u8 type; + u16 reserved; + u32 len; + u8 flags; + u8 prefix_len; + u8 max_prefix_len; + u8 zero; + ip6_addr prefix; + u32 asn; +} PACKED; + +/* + * 0 8 16 24 31 + * .-------------------------------------------. + * | Protocol | PDU | | + * | Version | Type | Error Code | + * | 1 | 10 | | + * +-------------------------------------------+ + * | | + * | Length | + * | | + * +-------------------------------------------+ + * | | + * | Length of Encapsulated PDU | + * | | + * +-------------------------------------------+ + * | | + * ~ Copy of Erroneous PDU ~ + * | | + * +-------------------------------------------+ + * | | + * | Length of Error Text | + * | | + * +-------------------------------------------+ + * | | + * | Arbitrary Text | + * | of | + * ~ Error Diagnostic Message ~ + * | | + * `-------------------------------------------' */ +struct pdu_error { + u8 ver; + u8 type; + u16 error_code; + u32 len; + u32 len_enc_pdu; /* Length of Encapsulated PDU */ + byte rest[]; /* Copy of Erroneous PDU + * Length of Error Text + * Error Diagnostic Message */ +} PACKED; + +struct pdu_reset_query { + u8 ver; + u8 type; + u16 flags; + u32 len; +} PACKED; + +struct pdu_end_of_data_v0 { + u8 ver; + u8 type; + u16 session_id; + u32 len; + u32 serial_num; +} PACKED; + +struct pdu_end_of_data_v1 { + u8 ver; + u8 type; + u16 session_id; + u32 len; + u32 serial_num; + u32 refresh_interval; + u32 retry_interval; + u32 expire_interval; +} PACKED; + +static const size_t min_pdu_size[] = { + [SERIAL_NOTIFY] = sizeof(struct pdu_serial_notify), + [SERIAL_QUERY] = sizeof(struct pdu_serial_query), + [RESET_QUERY] = sizeof(struct pdu_reset_query), + [CACHE_RESPONSE] = sizeof(struct pdu_cache_response), + [IPV4_PREFIX] = sizeof(struct pdu_ipv4), + [RESERVED] = sizeof(struct pdu_header), + [IPV6_PREFIX] = sizeof(struct pdu_ipv6), + [END_OF_DATA] = sizeof(struct pdu_end_of_data_v0), + [CACHE_RESET] = sizeof(struct pdu_cache_response), + [ROUTER_KEY] = sizeof(struct pdu_header), /* FIXME */ + [ERROR] = 16, +}; + +static int rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...); + +static void +rpki_pdu_to_network_byte_order(struct pdu_header *pdu) +{ + pdu->reserved = htons(pdu->reserved); + pdu->len = htonl(pdu->len); + + switch (pdu->type) + { + case SERIAL_QUERY: + { + /* Note that a session_id is converted using converting header->reserved */ + struct pdu_serial_query *sq_pdu = (void *) pdu; + sq_pdu->serial_num = htonl(sq_pdu->serial_num); + break; + } + + case ERROR: + { + struct pdu_error *err = (void *) pdu; + u32 *err_text_len = (u32 *)(err->rest + err->len_enc_pdu); + *err_text_len = htonl(*err_text_len); + err->len_enc_pdu = htonl(err->len_enc_pdu); + break; + } + + case RESET_QUERY: + break; + + default: + bug("PDU type %s should not be sent by us", str_pdu_type(pdu->type)); + } +} + +static void +rpki_pdu_to_host_byte_order(struct pdu_header *pdu) +{ + /* The Router Key PDU has two one-byte fields instead of one two-bytes field. */ + if (pdu->type != ROUTER_KEY) + pdu->reserved = ntohs(pdu->reserved); + + pdu->len = ntohl(pdu->len); + + switch (pdu->type) + { + case SERIAL_NOTIFY: + { + /* Note that a session_id is converted using converting header->reserved */ + struct pdu_serial_notify *sn_pdu = (void *) pdu; + sn_pdu->serial_num = ntohl(sn_pdu->serial_num); + break; + } + + case END_OF_DATA: + { + /* Note that a session_id is converted using converting header->reserved */ + struct pdu_end_of_data_v0 *eod0 = (void *) pdu; + eod0->serial_num = ntohl(eod0->serial_num); /* Same either for version 1 */ + + if (pdu->ver == RPKI_VERSION_1) + { + struct pdu_end_of_data_v1 *eod1 = (void *) pdu; + eod1->expire_interval = ntohl(eod1->expire_interval); + eod1->refresh_interval = ntohl(eod1->refresh_interval); + eod1->retry_interval = ntohl(eod1->retry_interval); + } + break; + } + + case IPV4_PREFIX: + { + struct pdu_ipv4 *ipv4 = (void *) pdu; + ipv4->prefix = ip4_ntoh(ipv4->prefix); + ipv4->asn = ntohl(ipv4->asn); + break; + } + + case IPV6_PREFIX: + { + struct pdu_ipv6 *ipv6 = (void *) pdu; + ipv6->prefix = ip6_ntoh(ipv6->prefix); + ipv6->asn = ntohl(ipv6->asn); + break; + } + + case ERROR: + { + /* Note that a error_code is converted using converting header->reserved */ + struct pdu_error *err = (void *) pdu; + err->len_enc_pdu = ntohl(err->len_enc_pdu); + u32 *err_text_len = (u32 *)(err->rest + err->len_enc_pdu); + *err_text_len = htonl(*err_text_len); + break; + } + + case ROUTER_KEY: + /* Router Key PDU is not supported yet */ + + case SERIAL_QUERY: + case RESET_QUERY: + /* Serial/Reset Query are sent only in direction router to cache. + * We don't care here. */ + + case CACHE_RESPONSE: + case CACHE_RESET: + /* Converted with pdu->reserved */ + break; + } +} + +/** + * rpki_convert_pdu_back_to_network_byte_order - convert host-byte order PDU back to network-byte order + * @out: allocated memory for writing a converted PDU of size @in->len + * @in: host-byte order PDU + * + * Assumed: |A == ntoh(ntoh(A))| + */ +static struct pdu_header * +rpki_pdu_back_to_network_byte_order(struct pdu_header *out, const struct pdu_header *in) +{ + memcpy(out, in, in->len); + rpki_pdu_to_host_byte_order(out); + return out; +} + +static void +rpki_log_packet(struct rpki_cache *cache, const struct pdu_header *pdu, const enum rpki_transmit_type action) +{ + if (!(cache->p->p.debug & D_PACKETS)) + return; + + const char *str_type = str_pdu_type(pdu->type); + char detail[256]; + +#define SAVE(fn) \ + do { \ + if (fn < 0) \ + { \ + bsnprintf(detail + sizeof(detail) - 16, 16, "... <too long>)"); \ + goto detail_finished; \ + } \ + } while(0) \ + + switch (pdu->type) + { + case SERIAL_NOTIFY: + case SERIAL_QUERY: + SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u, serial number: %u)", pdu->reserved, ((struct pdu_serial_notify *) pdu)->serial_num)); + break; + + case END_OF_DATA: + { + const struct pdu_end_of_data_v1 *eod = (void *) pdu; + if (eod->ver == RPKI_VERSION_1) + SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u, serial number: %u, refresh: %us, retry: %us, expire: %us)", eod->session_id, eod->serial_num, eod->refresh_interval, eod->retry_interval, eod->expire_interval)); + else + SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u, serial number: %u)", eod->session_id, eod->serial_num)); + break; + } + + case CACHE_RESPONSE: + SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u)", pdu->reserved)); + break; + + case IPV4_PREFIX: + { + const struct pdu_ipv4 *ipv4 = (void *) pdu; + SAVE(bsnprintf(detail, sizeof(detail), "(%I4/%u-%u AS%u)", ipv4->prefix, ipv4->prefix_len, ipv4->max_prefix_len, ipv4->asn)); + break; + } + + case IPV6_PREFIX: + { + const struct pdu_ipv6 *ipv6 = (void *) pdu; + SAVE(bsnprintf(detail, sizeof(detail), "(%I6/%u-%u AS%u)", ipv6->prefix, ipv6->prefix_len, ipv6->max_prefix_len, ipv6->asn)); + break; + } + + case ROUTER_KEY: + /* We don't support saving Router Key PDUs yet */ + SAVE(bsnprintf(detail, sizeof(detail), "(ignored)")); + break; + + case ERROR: + { + const struct pdu_error *err = (void *) pdu; + SAVE(bsnprintf(detail, sizeof(detail), "(%s", str_pdu_error_type[err->error_code])); + + /* Optional description of error */ + const u32 len_err_txt = *((u32 *) (err->rest + err->len_enc_pdu)); + if (len_err_txt > 0) + { + size_t expected_len = err->len_enc_pdu + len_err_txt + 16; + if (expected_len == err->len) + { + char txt[len_err_txt + 1]; + char *pdu_txt = (char *) err->rest + err->len_enc_pdu + 4; + bsnprintf(txt, sizeof(txt), "%s", pdu_txt); /* it's ensured that txt is ended with a null byte */ + SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ": '%s'", txt)); + } + else + { + SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ", malformed size")); + } + } + + /* Optional encapsulated erroneous packet */ + if (err->len_enc_pdu) + { + SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ", %s packet:", str_pdu_type(((struct pdu_header *) err->rest)->type))); + if (err->rest + err->len_enc_pdu <= (byte *)err + err->len) + { + for (const byte *c = err->rest; c != err->rest + err->len_enc_pdu; c++) + SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), " %02X", *c)); + } + } + + SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ")")); + break; + } + + default: + *detail = '\0'; + } +#undef SAVE + + detail_finished: + + if (action == RPKI_RECV) + { + CACHE_TRACE(D_PACKETS, cache, "Received %s packet %s", str_type, detail); + } + else + { + CACHE_TRACE(D_PACKETS, cache, "Sending %s packet %s", str_type, detail); + } + +#if defined(LOCAL_DEBUG) || defined(GLOBAL_DEBUG) + int seq = 0; + for(const byte *c = pdu; c != pdu + pdu->len; c++) + { + if ((seq % 4) == 0) + DBG("%2d: ", seq); + + DBG(" 0x%02X %-3u", *c, *c); + + if ((++seq % 4) == 0) + DBG("\n"); + } + if ((seq % 4) != 0) + DBG("\n"); +#endif +} + +static int +rpki_send_pdu(struct rpki_cache *cache, const void *pdu, const uint len) +{ + struct rpki_proto *p = cache->p; + sock *sk = cache->tr_sock->sk; + + rpki_log_packet(cache, pdu, RPKI_SEND); + + if (sk->tbuf != sk->tpos) + { + RPKI_WARN(p, "Old packet overwritten in TX buffer"); + } + + if (len > sk->tbsize) + { + RPKI_WARN(p, "%u bytes is too much for send", len); + ASSERT(0); + return RPKI_ERROR; + } + + memcpy(sk->tbuf, pdu, len); + rpki_pdu_to_network_byte_order((void *) sk->tbuf); + + if (!sk_send(sk, len)) + { + DBG("Cannot send just the whole data. It will be sent using a call of tx_hook()"); + } + + return RPKI_SUCCESS; +} + +/** + * rpki_check_receive_packet - make a basic validation of received RPKI PDU header + * @cache: cache connection instance + * @pdu: RPKI PDU in network byte order + * + * This function checks protocol version, PDU type, version and size. If all is all right then + * function returns |RPKI_SUCCESS| otherwise sends Error PDU and returns + * |RPKI_ERROR|. + */ +static int +rpki_check_receive_packet(struct rpki_cache *cache, const struct pdu_header *pdu) +{ + u32 pdu_len = ntohl(pdu->len); + + /* + * Minimal and maximal allowed PDU size is treated in rpki_rx_hook() function. + * @header.len corresponds to number of bytes of @pdu and + * it is in range from RPKI_PDU_HEADER_LEN to RPKI_PDU_MAX_LEN bytes. + */ + + /* Do not handle error PDUs here, leave this task to rpki_handle_error_pdu() */ + if (pdu->ver != cache->version && pdu->type != ERROR) + { + /* If this is the first PDU we have received */ + if (cache->request_session_id) + { + if (pdu->type == SERIAL_NOTIFY) + { + /* + * The router MUST ignore any Serial Notify PDUs it might receive from + * the cache during this initial start-up period, regardless of the + * Protocol Version field in the Serial Notify PDU. + * (https://tools.ietf.org/html/draft-ietf-sidr-rpki-rtr-rfc6810-bis-07#section-7) + */ + } + else if (!cache->last_update && + (pdu->ver <= RPKI_MAX_VERSION) && + (pdu->ver < cache->version)) + { + CACHE_TRACE(D_EVENTS, cache, "Downgrade session to %s from %u to %u version", rpki_get_cache_ident(cache), cache->version, pdu->ver); + cache->version = pdu->ver; + } + else + { + /* If this is not the first PDU we have received, something is wrong with + * the server implementation -> Error */ + rpki_send_error_pdu(cache, UNSUPPORTED_PROTOCOL_VER, pdu_len, pdu, "PDU with unsupported Protocol version received"); + return RPKI_ERROR; + } + } + } + + if ((pdu->type >= PDU_TYPE_MAX) || (pdu->ver == RPKI_VERSION_0 && pdu->type == ROUTER_KEY)) + { + rpki_send_error_pdu(cache, UNSUPPORTED_PDU_TYPE, pdu_len, pdu, "Unsupported PDU type %u received", pdu->type); + return RPKI_ERROR; + } + + if (pdu_len < min_pdu_size[pdu->type]) + { + rpki_send_error_pdu(cache, CORRUPT_DATA, pdu_len, pdu, "Received %s packet with %d bytes, but expected at least %d bytes", str_pdu_type(pdu->type), pdu_len, min_pdu_size[pdu->type]); + return RPKI_ERROR; + } + + return RPKI_SUCCESS; +} + +static int +rpki_handle_error_pdu(struct rpki_cache *cache, const struct pdu_error *pdu) +{ + switch (pdu->error_code) + { + case CORRUPT_DATA: + case INTERNAL_ERROR: + case INVALID_REQUEST: + case UNSUPPORTED_PDU_TYPE: + rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); + break; + + case NO_DATA_AVAIL: + rpki_cache_change_state(cache, RPKI_CS_ERROR_NO_DATA_AVAIL); + break; + + case UNSUPPORTED_PROTOCOL_VER: + CACHE_TRACE(D_PACKETS, cache, "Client uses unsupported protocol version"); + if (pdu->ver <= RPKI_MAX_VERSION && + pdu->ver < cache->version) + { + CACHE_TRACE(D_EVENTS, cache, "Downgrading from protocol version %d to version %d", cache->version, pdu->ver); + cache->version = pdu->ver; + rpki_cache_change_state(cache, RPKI_CS_FAST_RECONNECT); + } + else + { + CACHE_TRACE(D_PACKETS, cache, "Got UNSUPPORTED_PROTOCOL_VER error PDU with invalid values, " \ + "current version: %d, PDU version: %d", cache->version, pdu->ver); + rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); + } + break; + + default: + CACHE_TRACE(D_PACKETS, cache, "Error unknown, server sent unsupported error code %u", pdu->error_code); + rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); + break; + } + + return RPKI_SUCCESS; +} + +static void +rpki_handle_serial_notify_pdu(struct rpki_cache *cache, const struct pdu_serial_notify *pdu) +{ + /* The router MUST ignore any Serial Notify PDUs it might receive from + * the cache during this initial start-up period, regardless of the + * Protocol Version field in the Serial Notify PDU. + * (https://tools.ietf.org/html/draft-ietf-sidr-rpki-rtr-rfc6810-bis-07#section-7) + */ + if (cache->request_session_id) + { + CACHE_TRACE(D_PACKETS, cache, "Ignore a Serial Notify packet during initial start-up period"); + return; + } + + /* XXX Serial number should be compared using method RFC 1982 (3.2) */ + if (cache->serial_num != pdu->serial_num) + rpki_cache_change_state(cache, RPKI_CS_SYNC_START); +} + +static int +rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_response *pdu) +{ + if (cache->request_session_id) + { + if (cache->last_update) + { + /* + * This isn't the first sync and we already received records. This point + * is after Reset Query and before importing new records from cache + * server. We need to load new ones and kick out missing ones. So start + * a refresh cycle. + */ + if (cache->p->roa4_channel) + rt_refresh_begin(cache->p->roa4_channel->table, cache->p->roa4_channel); + if (cache->p->roa6_channel) + rt_refresh_begin(cache->p->roa6_channel->table, cache->p->roa6_channel); + + cache->p->refresh_channels = 1; + } + cache->session_id = pdu->session_id; + cache->request_session_id = 0; + } + else + { + if (cache->session_id != pdu->session_id) + { + byte tmp[pdu->len]; + const struct pdu_header *hton_pdu = rpki_pdu_back_to_network_byte_order((void *) tmp, (const void *) pdu); + rpki_send_error_pdu(cache, CORRUPT_DATA, pdu->len, hton_pdu, "Wrong session_id %u in Cache Response PDU", pdu->session_id); + rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); + return RPKI_ERROR; + } + } + + rpki_cache_change_state(cache, RPKI_CS_SYNC_RUNNING); + return RPKI_SUCCESS; +} + +/** + * rpki_prefix_pdu_2_net_addr - convert IPv4/IPv6 Prefix PDU into net_addr_union + * @pdu: host byte order IPv4/IPv6 Prefix PDU + * @n: allocated net_addr_union for save ROA + * + * This function reads ROA data from IPv4/IPv6 Prefix PDU and + * write them into net_addr_roa4 or net_addr_roa6 data structure. + */ +static net_addr_union * +rpki_prefix_pdu_2_net_addr(const struct pdu_header *pdu, net_addr_union *n) +{ + /* + * Note that sizeof(net_addr_roa6) > sizeof(net_addr) + * and thence we must use net_addr_union and not only net_addr + */ + + if (pdu->type == IPV4_PREFIX) + { + const struct pdu_ipv4 *ipv4 = (void *) pdu; + n->roa4.type = NET_ROA4; + n->roa4.length = sizeof(net_addr_roa4); + n->roa4.prefix = ipv4->prefix; + n->roa4.asn = ipv4->asn; + n->roa4.pxlen = ipv4->prefix_len; + n->roa4.max_pxlen = ipv4->max_prefix_len; + } + else + { + const struct pdu_ipv6 *ipv6 = (void *) pdu; + n->roa6.type = NET_ROA6; + n->roa6.length = sizeof(net_addr_roa6); + n->roa6.prefix = ipv6->prefix; + n->roa6.asn = ipv6->asn; + n->roa6.pxlen = ipv6->prefix_len; + n->roa6.max_pxlen = ipv6->max_prefix_len; + } + + return n; +} + +static int +rpki_handle_prefix_pdu(struct rpki_cache *cache, const struct pdu_header *pdu) +{ + const enum pdu_type type = pdu->type; + ASSERT(type == IPV4_PREFIX || type == IPV6_PREFIX); + + net_addr_union addr = {}; + rpki_prefix_pdu_2_net_addr(pdu, &addr); + + struct channel *channel = NULL; + + if (type == IPV4_PREFIX) + channel = cache->p->roa4_channel; + if (type == IPV6_PREFIX) + channel = cache->p->roa6_channel; + + if (!channel) + { + CACHE_TRACE(D_ROUTES, cache, "Skip %N, missing %s channel", &addr, (type == IPV4_PREFIX ? "roa4" : "roa6"), addr); + return RPKI_ERROR; + } + + cache->last_rx_prefix = current_time(); + + /* A place for 'flags' is same for both data structures pdu_ipv4 or pdu_ipv6 */ + struct pdu_ipv4 *pfx = (void *) pdu; + if (pfx->flags & RPKI_ADD_FLAG) + rpki_table_add_roa(cache, channel, &addr); + else + rpki_table_remove_roa(cache, channel, &addr); + + return RPKI_SUCCESS; +} + +static uint +rpki_check_interval(struct rpki_cache *cache, const char *(check_fn)(uint), uint interval) +{ + if (check_fn(interval)) + { + RPKI_WARN(cache->p, "%s, received %u seconds", check_fn(interval), interval); + return 0; + } + return 1; +} + +static void +rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_data_v1 *pdu) +{ + const struct rpki_config *cf = (void *) cache->p->p.cf; + + if (pdu->session_id != cache->session_id) + { + byte tmp[pdu->len]; + const struct pdu_header *hton_pdu = rpki_pdu_back_to_network_byte_order((void *) tmp, (const void *) pdu); + rpki_send_error_pdu(cache, CORRUPT_DATA, pdu->len, hton_pdu, "Received Session ID %u, but expected %u", pdu->session_id, cache->session_id); + rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); + return; + } + + if (pdu->ver == RPKI_VERSION_1) + { + if (!cf->keep_refresh_interval && rpki_check_interval(cache, rpki_check_refresh_interval, pdu->refresh_interval)) + cache->refresh_interval = pdu->refresh_interval; + + if (!cf->keep_retry_interval && rpki_check_interval(cache, rpki_check_retry_interval, pdu->retry_interval)) + cache->retry_interval = pdu->retry_interval; + + if (!cf->keep_expire_interval && rpki_check_interval(cache, rpki_check_expire_interval, pdu->expire_interval)) + cache->expire_interval = pdu->expire_interval; + + CACHE_TRACE(D_EVENTS, cache, "New interval values: " + "refresh: %s%us, " + "retry: %s%us, " + "expire: %s%us", + (cf->keep_refresh_interval ? "keeps " : ""), cache->refresh_interval, + (cf->keep_retry_interval ? "keeps " : ""), cache->retry_interval, + (cf->keep_expire_interval ? "keeps " : ""), cache->expire_interval); + } + + if (cache->p->refresh_channels) + { + cache->p->refresh_channels = 0; + if (cache->p->roa4_channel) + rt_refresh_end(cache->p->roa4_channel->table, cache->p->roa4_channel); + if (cache->p->roa6_channel) + rt_refresh_end(cache->p->roa6_channel->table, cache->p->roa6_channel); + } + + cache->last_update = current_time(); + cache->serial_num = pdu->serial_num; + rpki_cache_change_state(cache, RPKI_CS_ESTABLISHED); +} + +/** + * rpki_rx_packet - process a received RPKI PDU + * @cache: RPKI connection instance + * @pdu: a RPKI PDU in network byte order + */ +static void +rpki_rx_packet(struct rpki_cache *cache, struct pdu_header *pdu) +{ + struct rpki_proto *p = cache->p; + + if (rpki_check_receive_packet(cache, pdu) == RPKI_ERROR) + { + rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); + return; + } + + rpki_pdu_to_host_byte_order(pdu); + rpki_log_packet(cache, pdu, RPKI_RECV); + + switch (pdu->type) + { + case RESET_QUERY: + case SERIAL_QUERY: + RPKI_WARN(p, "Received a %s packet that is destined for cache server", str_pdu_type(pdu->type)); + break; + + case SERIAL_NOTIFY: + /* This is a signal to synchronize with the cache server just now */ + rpki_handle_serial_notify_pdu(cache, (void *) pdu); + break; + + case CACHE_RESPONSE: + rpki_handle_cache_response_pdu(cache, (void *) pdu); + break; + + case IPV4_PREFIX: + case IPV6_PREFIX: + rpki_handle_prefix_pdu(cache, pdu); + break; + + case END_OF_DATA: + rpki_handle_end_of_data_pdu(cache, (void *) pdu); + break; + + case CACHE_RESET: + /* Cache cannot provide an incremental update. */ + rpki_cache_change_state(cache, RPKI_CS_NO_INCR_UPDATE_AVAIL); + break; + + case ERROR: + rpki_handle_error_pdu(cache, (void *) pdu); + break; + + case ROUTER_KEY: + /* TODO: Implement Router Key PDU handling */ + break; + + default: + CACHE_TRACE(D_PACKETS, cache, "Received unsupported type (%u)", pdu->type); + }; +} + +int +rpki_rx_hook(struct birdsock *sk, uint size) +{ + struct rpki_cache *cache = sk->data; + struct rpki_proto *p = cache->p; + + byte *pkt_start = sk->rbuf; + byte *end = pkt_start + size; + + DBG("rx hook got %u bytes \n", size); + + while (end >= pkt_start + RPKI_PDU_HEADER_LEN) + { + struct pdu_header *pdu = (void *) pkt_start; + u32 pdu_size = ntohl(pdu->len); + + if (pdu_size < RPKI_PDU_HEADER_LEN || pdu_size > RPKI_PDU_MAX_LEN) + { + RPKI_WARN(p, "Received invalid packet length %u, purge the whole receiving buffer", pdu_size); + return 1; /* Purge recv buffer */ + } + + if (end < pkt_start + pdu_size) + break; + + rpki_rx_packet(cache, pdu); + + /* It is possible that bird socket was freed/closed */ + if (p->p.proto_state == PS_DOWN || sk != cache->tr_sock->sk) + return 0; + + pkt_start += pdu_size; + } + + if (pkt_start != sk->rbuf) + { + CACHE_DBG(cache, "Move %u bytes of a memory at the start of buffer", end - pkt_start); + memmove(sk->rbuf, pkt_start, end - pkt_start); + sk->rpos = sk->rbuf + (end - pkt_start); + } + + return 0; /* Not purge sk->rbuf */ +} + +void +rpki_err_hook(struct birdsock *sk, int error_num) +{ + struct rpki_cache *cache = sk->data; + + if (error_num) + { + /* sk->err may contains a SSH error description */ + if (sk->err) + CACHE_TRACE(D_EVENTS, cache, "Lost connection: %s", sk->err); + else + CACHE_TRACE(D_EVENTS, cache, "Lost connection: %M", error_num); + } + else + { + CACHE_TRACE(D_EVENTS, cache, "The other side closed a connection"); + } + + + rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); +} + +static int +rpki_fire_tx(struct rpki_cache *cache) +{ + sock *sk = cache->tr_sock->sk; + + uint bytes_to_send = sk->tpos - sk->tbuf; + DBG("Sending %u bytes", bytes_to_send); + return sk_send(sk, bytes_to_send); +} + +void +rpki_tx_hook(sock *sk) +{ + struct rpki_cache *cache = sk->data; + + while (rpki_fire_tx(cache) > 0) + ; +} + +void +rpki_connected_hook(sock *sk) +{ + struct rpki_cache *cache = sk->data; + + CACHE_TRACE(D_EVENTS, cache, "Connected"); + proto_notify_state(&cache->p->p, PS_UP); + + sk->rx_hook = rpki_rx_hook; + sk->tx_hook = rpki_tx_hook; + + rpki_cache_change_state(cache, RPKI_CS_SYNC_START); +} + +/** + * rpki_send_error_pdu - send RPKI Error PDU + * @cache: RPKI connection instance + * @error_code: PDU Error type + * @err_pdu_len: length of @erroneous_pdu + * @erroneous_pdu: optional network byte-order PDU that invokes Error by us or NULL + * @fmt: optional description text of error or NULL + * @args: optional arguments for @fmt + * + * This function prepares Error PDU and sends it to a cache server. + */ +static int +rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...) +{ + va_list args; + char msg[128]; + + /* Size including the terminating null byte ('\0') */ + int msg_len = 0; + + /* Don't send errors for erroneous error PDUs */ + if (err_pdu_len >= 2) + { + if (erroneous_pdu->type == ERROR) + return RPKI_SUCCESS; + } + + if (fmt) + { + va_start(args, fmt); + msg_len = bvsnprintf(msg, sizeof(msg), fmt, args) + 1; + } + + u32 pdu_size = 16 + err_pdu_len + msg_len; + byte pdu[pdu_size]; + memset(pdu, 0, sizeof(pdu)); + + struct pdu_error *e = (void *) pdu; + e->ver = cache->version; + e->type = ERROR; + e->error_code = error_code; + e->len = pdu_size; + + e->len_enc_pdu = err_pdu_len; + if (err_pdu_len > 0) + memcpy(e->rest, erroneous_pdu, err_pdu_len); + + *((u32 *)(e->rest + err_pdu_len)) = msg_len; + if (msg_len > 0) + memcpy(e->rest + err_pdu_len + 4, msg, msg_len); + + return rpki_send_pdu(cache, pdu, pdu_size); +} + +int +rpki_send_serial_query(struct rpki_cache *cache) +{ + struct pdu_serial_query pdu = { + .ver = cache->version, + .type = SERIAL_QUERY, + .session_id = cache->session_id, + .len = sizeof(pdu), + .serial_num = cache->serial_num + }; + + if (rpki_send_pdu(cache, &pdu, sizeof(pdu)) != RPKI_SUCCESS) + { + rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); + return RPKI_ERROR; + } + + return RPKI_SUCCESS; +} + +int +rpki_send_reset_query(struct rpki_cache *cache) +{ + struct pdu_reset_query pdu = { + .ver = cache->version, + .type = RESET_QUERY, + .len = sizeof(pdu), + }; + + if (rpki_send_pdu(cache, &pdu, sizeof(pdu)) != RPKI_SUCCESS) + { + rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); + return RPKI_ERROR; + } + + return RPKI_SUCCESS; +} diff --git a/proto/rpki/packets.h b/proto/rpki/packets.h new file mode 100644 index 00000000..d6f8a249 --- /dev/null +++ b/proto/rpki/packets.h @@ -0,0 +1,45 @@ +/* + * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol + * + * (c) 2015 CZ.NIC + * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com> + * + * This file was a part of RTRlib: http://rpki.realmv6.org/ + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_RPKI_PACKETS_H_ +#define _BIRD_RPKI_PACKETS_H_ + +#include <arpa/inet.h> + +#define RPKI_PDU_HEADER_LEN 8 + +/* A Error PDU size is the biggest (has encapsulate PDU inside): + * +8 bytes (Header size) + * +4 bytes (Length of Encapsulated PDU) + * +32 bytes (Encapsulated PDU IPv6 32) + * +4 bytes (Length of inserted text) + * +800 bytes (UTF-8 text 400*2 bytes) + * ------------ + * = 848 bytes (Maximal expected PDU size) */ +#define RPKI_PDU_MAX_LEN 848 + +/* RX buffer size has a great impact to scheduler granularity */ +#define RPKI_RX_BUFFER_SIZE 4096 +#define RPKI_TX_BUFFER_SIZE RPKI_PDU_MAX_LEN + +/* Return values */ +enum rpki_rtvals { + RPKI_SUCCESS = 0, + RPKI_ERROR = -1 +}; + +int rpki_send_serial_query(struct rpki_cache *cache); +int rpki_send_reset_query(struct rpki_cache *cache); +int rpki_rx_hook(sock *sk, uint size); +void rpki_connected_hook(sock *sk); +void rpki_err_hook(sock *sk, int size); + +#endif diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c new file mode 100644 index 00000000..74860071 --- /dev/null +++ b/proto/rpki/rpki.c @@ -0,0 +1,928 @@ +/* + * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol + * + * (c) 2015 CZ.NIC + * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com> + * + * Using RTRlib: http://rpki.realmv6.org/ + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/** + * DOC: RPKI To Router (RPKI-RTR) + * + * The RPKI-RTR protocol is implemented in several files: |rpki.c| containing + * the routes handling, protocol logic, timer events, cache connection, + * reconfiguration, configuration and protocol glue with BIRD core, |packets.c| + * containing the RPKI packets handling and finally all transports files: + * |transport.c|, |tcp_transport.c| and |ssh_transport.c|. + * + * The |transport.c| is a middle layer and interface for each specific + * transport. Transport is a way how to wrap a communication with a cache + * server. There is supported an unprotected TCP transport and an encrypted + * SSHv2 transport. The SSH transport requires LibSSH library. LibSSH is + * loading dynamically using |dlopen()| function. SSH support is integrated in + * |sysdep/unix/io.c|. Each transport must implement an initialization + * function, an open function and a socket identification function. That's all. + * + * This implementation is based on the RTRlib (http://rpki.realmv6.org/). The + * BIRD takes over files |packets.c|, |rtr.c| (inside |rpki.c|), |transport.c|, + * |tcp_transport.c| and |ssh_transport.c| from RTRlib. + * + * A RPKI-RTR connection is described by a structure &rpki_cache. The main + * logic is located in |rpki_cache_change_state()| function. There is a state + * machine. The standard starting state flow looks like |Down| ~> |Connecting| + * ~> |Sync-Start| ~> |Sync-Running| ~> |Established| and then the last three + * states are periodically repeated. + * + * |Connecting| state establishes the transport connection. The state from a + * call |rpki_cache_change_state(CONNECTING)| to a call |rpki_connected_hook()| + * + * |Sync-Start| state starts with sending |Reset Query| or |Serial Query| and + * then waits for |Cache Response|. The state from |rpki_connected_hook()| to + * |rpki_handle_cache_response_pdu()| + * + * During |Sync-Running| BIRD receives data with IPv4/IPv6 Prefixes from cache + * server. The state starts from |rpki_handle_cache_response_pdu()| and ends + * in |rpki_handle_end_of_data_pdu()|. + * + * |Established| state means that BIRD has synced all data with cache server. + * Schedules a refresh timer event that invokes |Sync-Start|. Schedules Expire + * timer event and stops a Retry timer event. + * + * |Transport Error| state means that we have some troubles with a network + * connection. We cannot connect to a cache server or we wait too long for some + * expected PDU for received - |Cache Response| or |End of Data|. It closes + * current connection and schedules a Retry timer event. + * + * |Fatal Protocol Error| is occurred e.g. by received a bad Session ID. We + * restart a protocol, so all ROAs are flushed immediately. + * + * The RPKI-RTR protocol (RFC 6810 bis) defines configurable refresh, retry and + * expire intervals. For maintaining a connection are used timer events that + * are scheduled by |rpki_schedule_next_refresh()|, + * |rpki_schedule_next_retry()| and |rpki_schedule_next_expire()| functions. + * + * A Refresh timer event performs a sync of |Established| connection. So it + * shifts state to |Sync-Start|. If at the beginning of second call of a + * refresh event is connection in |Sync-Start| state then we didn't receive a + * |Cache Response| from a cache server and we invoke |Transport Error| state. + * + * A Retry timer event attempts to connect cache server. It is activated after + * |Transport Error| state and terminated by reaching |Established| state. + * If cache connection is still connecting to the cache server at the beginning + * of an event call then the Retry timer event invokes |Transport Error| state. + * + * An Expire timer event checks expiration of ROAs. If a last successful sync + * was more ago than the expire interval then the Expire timer event invokes a + * protocol restart thereby removes all ROAs learned from that cache server and + * continue trying to connect to cache server. The Expire event is activated + * by initial successful loading of ROAs, receiving End of Data PDU. + * + * A reconfiguration of cache connection works well without restarting when we + * change only intervals values. + * + * Supported standards: + * - RFC 6810 - main RPKI-RTR standard + * - RFC 6810 bis - an explicit timing parameters and protocol version number negotiation + */ + +#include <stdlib.h> +#include <netdb.h> + +#undef LOCAL_DEBUG + +#include "rpki.h" +#include "lib/string.h" +#include "nest/cli.h" + +/* Return values for reconfiguration functions */ +#define NEED_RESTART 0 +#define SUCCESSFUL_RECONF 1 + +static int rpki_open_connection(struct rpki_cache *cache); +static void rpki_close_connection(struct rpki_cache *cache); +static void rpki_schedule_next_refresh(struct rpki_cache *cache); +static void rpki_schedule_next_retry(struct rpki_cache *cache); +static void rpki_schedule_next_expire_check(struct rpki_cache *cache); +static void rpki_stop_refresh_timer_event(struct rpki_cache *cache); +static void rpki_stop_retry_timer_event(struct rpki_cache *cache); +static void rpki_stop_expire_timer_event(struct rpki_cache *cache); + + +/* + * Routes handling + */ + +void +rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr) +{ + struct rpki_proto *p = cache->p; + + rta a0 = { + .src = p->p.main_source, + .source = RTS_RPKI, + .scope = SCOPE_UNIVERSE, + .dest = RTD_NONE, + }; + + rta *a = rta_lookup(&a0); + rte *e = rte_get_temp(a); + + e->pflags = 0; + + rte_update2(channel, &pfxr->n, e, a0.src); +} + +void +rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr) +{ + struct rpki_proto *p = cache->p; + rte_update2(channel, &pfxr->n, NULL, p->p.main_source); +} + + +/* + * RPKI Protocol Logic + */ + +static const char *str_cache_states[] = { + [RPKI_CS_CONNECTING] = "Connecting", + [RPKI_CS_ESTABLISHED] = "Established", + [RPKI_CS_RESET] = "Reseting", + [RPKI_CS_SYNC_START] = "Sync-Start", + [RPKI_CS_SYNC_RUNNING] = "Sync-Running", + [RPKI_CS_FAST_RECONNECT] = "Fast-Reconnect", + [RPKI_CS_NO_INCR_UPDATE_AVAIL]= "No-Increment-Update-Available", + [RPKI_CS_ERROR_NO_DATA_AVAIL] = "Cache-Error-No-Data-Available", + [RPKI_CS_ERROR_FATAL] = "Fatal-Protocol-Error", + [RPKI_CS_ERROR_TRANSPORT] = "Transport-Error", + [RPKI_CS_SHUTDOWN] = "Down" +}; + +/** + * rpki_cache_state_to_str - give a text representation of cache state + * @state: A cache state + * + * The function converts logic cache state into string. + */ +const char * +rpki_cache_state_to_str(enum rpki_cache_state state) +{ + return str_cache_states[state]; +} + +/** + * rpki_start_cache - connect to a cache server + * @cache: RPKI connection instance + * + * This function is a high level method to kick up a connection to a cache server. + */ +static void +rpki_start_cache(struct rpki_cache *cache) +{ + rpki_cache_change_state(cache, RPKI_CS_CONNECTING); +} + +/** + * rpki_force_restart_proto - force shutdown and start protocol again + * @p: RPKI protocol instance + * + * This function calls shutdown and frees all protocol resources as well. + * After calling this function should be no operations with protocol data, + * they could be freed already. + */ +static void +rpki_force_restart_proto(struct rpki_proto *p) +{ + if (p->cache) + { + CACHE_DBG(p->cache, "Connection object destroying"); + } + + /* Sign as freed */ + p->cache = NULL; + + proto_notify_state(&p->p, PS_DOWN); +} + +/** + * rpki_cache_change_state - check and change cache state + * @cache: RPKI cache instance + * @new_state: suggested new state + * + * This function makes transitions between internal states. + * It represents the core of logic management of RPKI protocol. + * Cannot transit into the same state as cache is in already. + */ +void +rpki_cache_change_state(struct rpki_cache *cache, const enum rpki_cache_state new_state) +{ + const enum rpki_cache_state old_state = cache->state; + + if (old_state == new_state) + return; + + cache->state = new_state; + CACHE_TRACE(D_EVENTS, cache, "Changing from %s to %s state", rpki_cache_state_to_str(old_state), rpki_cache_state_to_str(new_state)); + + switch (new_state) + { + case RPKI_CS_CONNECTING: + { + sock *sk = cache->tr_sock->sk; + + if (sk == NULL || sk->fd < 0) + rpki_open_connection(cache); + else + rpki_cache_change_state(cache, RPKI_CS_SYNC_START); + + rpki_schedule_next_retry(cache); + break; + } + + case RPKI_CS_ESTABLISHED: + rpki_schedule_next_refresh(cache); + rpki_schedule_next_expire_check(cache); + rpki_stop_retry_timer_event(cache); + break; + + case RPKI_CS_RESET: + /* Resetting cache connection. */ + cache->request_session_id = 1; + cache->serial_num = 0; + rpki_cache_change_state(cache, RPKI_CS_SYNC_START); + break; + + case RPKI_CS_SYNC_START: + /* Requesting for receive ROAs from a cache server. */ + if (cache->request_session_id) + { + /* Send request for Session ID */ + if (rpki_send_reset_query(cache) != RPKI_SUCCESS) + rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); + } + else + { + /* We have already a session_id. So send a Serial Query and start an incremental sync */ + if (rpki_send_serial_query(cache) != RPKI_SUCCESS) + rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); + } + break; + + case RPKI_CS_SYNC_RUNNING: + /* The state between Cache Response and End of Data. Only waiting for + * receiving all IP Prefix PDUs and finally a End of Data PDU. */ + break; + + case RPKI_CS_NO_INCR_UPDATE_AVAIL: + /* Server was unable to answer the last Serial Query and sent Cache Reset. */ + rpki_cache_change_state(cache, RPKI_CS_RESET); + break; + + case RPKI_CS_ERROR_NO_DATA_AVAIL: + /* No validation records are available on the cache server. */ + rpki_cache_change_state(cache, RPKI_CS_RESET); + break; + + case RPKI_CS_ERROR_FATAL: + /* Fatal protocol error occurred. */ + rpki_force_restart_proto(cache->p); + break; + + case RPKI_CS_ERROR_TRANSPORT: + /* Error on the transport socket occurred. */ + rpki_close_connection(cache); + rpki_schedule_next_retry(cache); + rpki_stop_refresh_timer_event(cache); + break; + + case RPKI_CS_FAST_RECONNECT: + /* Reconnect without any waiting period */ + rpki_close_connection(cache); + rpki_cache_change_state(cache, RPKI_CS_CONNECTING); + break; + + case RPKI_CS_SHUTDOWN: + bug("This isn't never really called."); + break; + }; +} + + +/* + * RPKI Timer Events + */ + +static void +rpki_schedule_next_refresh(struct rpki_cache *cache) +{ + btime t = cache->refresh_interval S; + + CACHE_DBG(cache, "after %t s", t); + tm_start(cache->refresh_timer, t); +} + +static void +rpki_schedule_next_retry(struct rpki_cache *cache) +{ + btime t = cache->retry_interval S; + + CACHE_DBG(cache, "after %t s", t); + tm_start(cache->retry_timer, t); +} + +static void +rpki_schedule_next_expire_check(struct rpki_cache *cache) +{ + /* A minimum time to wait is 1 second */ + btime t = cache->last_update + cache->expire_interval S - current_time(); + t = MAX(t, 1 S); + + CACHE_DBG(cache, "after %t s", t); + tm_start(cache->expire_timer, t); +} + +static void +rpki_stop_refresh_timer_event(struct rpki_cache *cache) +{ + CACHE_DBG(cache, "Stop"); + tm_stop(cache->refresh_timer); +} + +static void +rpki_stop_retry_timer_event(struct rpki_cache *cache) +{ + CACHE_DBG(cache, "Stop"); + tm_stop(cache->retry_timer); +} + +static void UNUSED +rpki_stop_expire_timer_event(struct rpki_cache *cache) +{ + CACHE_DBG(cache, "Stop"); + tm_stop(cache->expire_timer); +} + +static int +rpki_do_we_recv_prefix_pdu_in_last_seconds(struct rpki_cache *cache) +{ + if (!cache->last_rx_prefix) + return 0; + + return ((current_time() - cache->last_rx_prefix) <= 2 S); +} + +/** + * rpki_refresh_hook - control a scheduling of downloading data from cache server + * @tm: refresh timer with cache connection instance in data + * + * This function is periodically called during &ESTABLISHED or &SYNC* state + * cache connection. The first refresh schedule is invoked after receiving a + * |End of Data| PDU and has run by some &ERROR is occurred. + */ +static void +rpki_refresh_hook(timer *tm) +{ + struct rpki_cache *cache = tm->data; + + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); + + switch (cache->state) + { + case RPKI_CS_ESTABLISHED: + rpki_cache_change_state(cache, RPKI_CS_SYNC_START); + break; + + case RPKI_CS_SYNC_START: + /* We sent Serial/Reset Query in last refresh hook call + * and didn't receive Cache Response yet. It is probably + * troubles with network. */ + case RPKI_CS_SYNC_RUNNING: + /* We sent Serial/Reset Query in last refresh hook call + * and we got Cache Response but didn't get End-Of-Data yet. + * It could be a trouble with network or only too long synchronization. */ + if (!rpki_do_we_recv_prefix_pdu_in_last_seconds(cache)) + { + CACHE_TRACE(D_EVENTS, cache, "Sync takes more time than refresh interval %us, resetting connection", cache->refresh_interval); + rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); + } + break; + + default: + break; + } + + if (cache->state != RPKI_CS_SHUTDOWN && cache->state != RPKI_CS_ERROR_TRANSPORT) + rpki_schedule_next_refresh(cache); + else + rpki_stop_refresh_timer_event(cache); +} + +/** + * rpki_retry_hook - control a scheduling of retrying connection to cache server + * @tm: retry timer with cache connection instance in data + * + * This function is periodically called during &ERROR* state cache connection. + * The first retry schedule is invoked after any &ERROR* state occurred and + * ends by reaching of &ESTABLISHED state again. + */ +static void +rpki_retry_hook(timer *tm) +{ + struct rpki_cache *cache = tm->data; + + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); + + switch (cache->state) + { + case RPKI_CS_ESTABLISHED: + case RPKI_CS_SHUTDOWN: + break; + + case RPKI_CS_CONNECTING: + case RPKI_CS_SYNC_START: + case RPKI_CS_SYNC_RUNNING: + if (!rpki_do_we_recv_prefix_pdu_in_last_seconds(cache)) + { + /* We tried to establish a connection in last retry hook call and haven't done + * yet. It looks like troubles with network. We are aggressive here. */ + CACHE_TRACE(D_EVENTS, cache, "Sync takes more time than retry interval %us, resetting connection.", cache->retry_interval); + rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); + } + break; + + default: + rpki_cache_change_state(cache, RPKI_CS_CONNECTING); + break; + } + + if (cache->state != RPKI_CS_ESTABLISHED) + rpki_schedule_next_retry(cache); + else + rpki_stop_retry_timer_event(cache); +} + +/** + * rpki_expire_hook - control a expiration of ROA entries + * @tm: expire timer with cache connection instance in data + * + * This function is scheduled after received a |End of Data| PDU. + * A waiting interval is calculated dynamically by last update. + * If we reach an expiration time then we invoke a restarting + * of the protocol. + */ +static void +rpki_expire_hook(timer *tm) +{ + struct rpki_cache *cache = tm->data; + + if (!cache->last_update) + return; + + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); + + btime t = cache->last_update + cache->expire_interval S - current_time(); + if (t <= 0) + { + CACHE_TRACE(D_EVENTS, cache, "All ROAs expired"); + rpki_force_restart_proto(cache->p); + } + else + { + CACHE_DBG(cache, "Remains %t seconds to become ROAs obsolete", t); + rpki_schedule_next_expire_check(cache); + } +} + +/** + * rpki_check_refresh_interval - check validity of refresh interval value + * @seconds: suggested value + * + * This function validates value and should return |NULL|. + * If the check doesn't pass then returns error message. + */ +const char * +rpki_check_refresh_interval(uint seconds) +{ + if (seconds < 1) + return "Minimum allowed refresh interval is 1 second"; + if (seconds > 86400) + return "Maximum allowed refresh interval is 86400 seconds"; + return NULL; +} + +/** + * rpki_check_retry_interval - check validity of retry interval value + * @seconds: suggested value + * + * This function validates value and should return |NULL|. + * If the check doesn't pass then returns error message. + */ +const char * +rpki_check_retry_interval(uint seconds) +{ + if (seconds < 1) + return "Minimum allowed retry interval is 1 second"; + if (seconds > 7200) + return "Maximum allowed retry interval is 7200 seconds"; + return NULL; +} + +/** + * rpki_check_expire_interval - check validity of expire interval value + * @seconds: suggested value + * + * This function validates value and should return |NULL|. + * If the check doesn't pass then returns error message. + */ +const char * +rpki_check_expire_interval(uint seconds) +{ + if (seconds < 600) + return "Minimum allowed expire interval is 600 seconds"; + if (seconds > 172800) + return "Maximum allowed expire interval is 172800 seconds"; + return NULL; +} + + +/* + * RPKI Cache + */ + +static struct rpki_cache * +rpki_init_cache(struct rpki_proto *p, struct rpki_config *cf) +{ + pool *pool = rp_new(p->p.pool, cf->hostname); + + struct rpki_cache *cache = mb_allocz(pool, sizeof(struct rpki_cache)); + + cache->pool = pool; + cache->p = p; + + cache->state = RPKI_CS_SHUTDOWN; + cache->request_session_id = 1; + cache->version = RPKI_MAX_VERSION; + + cache->refresh_interval = cf->refresh_interval; + cache->retry_interval = cf->retry_interval; + cache->expire_interval = cf->expire_interval; + cache->refresh_timer = tm_new_init(pool, &rpki_refresh_hook, cache, 0, 0); + cache->retry_timer = tm_new_init(pool, &rpki_retry_hook, cache, 0, 0); + cache->expire_timer = tm_new_init(pool, &rpki_expire_hook, cache, 0, 0); + + cache->tr_sock = mb_allocz(pool, sizeof(struct rpki_tr_sock)); + cache->tr_sock->cache = cache; + + switch (cf->tr_config.type) + { + case RPKI_TR_TCP: rpki_tr_tcp_init(cache->tr_sock); break; + case RPKI_TR_SSH: rpki_tr_ssh_init(cache->tr_sock); break; + }; + + CACHE_DBG(cache, "Connection object created"); + + return cache; +} + +/** + * rpki_get_cache_ident - give a text representation of cache server name + * @cache: RPKI connection instance + * + * The function converts cache connection into string. + */ +const char * +rpki_get_cache_ident(struct rpki_cache *cache) +{ + return rpki_tr_ident(cache->tr_sock); +} + +static int +rpki_open_connection(struct rpki_cache *cache) +{ + CACHE_TRACE(D_EVENTS, cache, "Opening a connection"); + + if (rpki_tr_open(cache->tr_sock) == RPKI_TR_ERROR) + { + rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); + return RPKI_TR_ERROR; + } + + return RPKI_TR_SUCCESS; +} + +static void +rpki_close_connection(struct rpki_cache *cache) +{ + CACHE_TRACE(D_EVENTS, cache, "Closing a connection"); + rpki_tr_close(cache->tr_sock); + proto_notify_state(&cache->p->p, PS_START); +} + +static int +rpki_shutdown(struct proto *P) +{ + struct rpki_proto *p = (void *) P; + + rpki_force_restart_proto(p); + + /* Protocol memory pool will be automatically freed */ + return PS_DOWN; +} + + +/* + * RPKI Reconfiguration + */ + +static int +rpki_try_fast_reconnect(struct rpki_cache *cache) +{ + if (cache->state == RPKI_CS_ESTABLISHED) + { + rpki_cache_change_state(cache, RPKI_CS_FAST_RECONNECT); + return SUCCESSFUL_RECONF; + } + + return NEED_RESTART; +} + +/** + * rpki_reconfigure_cache - a cache reconfiguration + * @p: RPKI protocol instance + * @cache: a cache connection + * @new: new RPKI configuration + * @old: old RPKI configuration + * + * This function reconfigures existing single cache server connection with new + * existing configuration. Generally, a change of time intervals could be + * reconfigured without restarting and all others changes requires a restart of + * protocol. Returns |NEED_TO_RESTART| or |SUCCESSFUL_RECONF|. + */ +static int +rpki_reconfigure_cache(struct rpki_proto *p UNUSED, struct rpki_cache *cache, struct rpki_config *new, struct rpki_config *old) +{ + u8 try_fast_reconnect = 0; + + if (strcmp(old->hostname, new->hostname) != 0) + { + CACHE_TRACE(D_EVENTS, cache, "Cache server address changed to %s", new->hostname); + return NEED_RESTART; + } + + if (old->port != new->port) + { + CACHE_TRACE(D_EVENTS, cache, "Cache server port changed to %u", new->port); + return NEED_RESTART; + } + + if (old->tr_config.type != new->tr_config.type) + { + CACHE_TRACE(D_EVENTS, cache, "Transport type changed"); + return NEED_RESTART; + } + else if (new->tr_config.type == RPKI_TR_SSH) + { + struct rpki_tr_ssh_config *ssh_old = (void *) old->tr_config.spec; + struct rpki_tr_ssh_config *ssh_new = (void *) new->tr_config.spec; + if ((strcmp(ssh_old->bird_private_key, ssh_new->bird_private_key) != 0) || + (strcmp(ssh_old->cache_public_key, ssh_new->cache_public_key) != 0) || + (strcmp(ssh_old->user, ssh_new->user) != 0)) + { + CACHE_TRACE(D_EVENTS, cache, "Settings of SSH transport configuration changed"); + try_fast_reconnect = 1; + } + } + +#define TEST_INTERVAL(name, Name) \ + if (cache->name##_interval != new->name##_interval || \ + old->keep_##name##_interval != new->keep_##name##_interval) \ + { \ + cache->name##_interval = new->name##_interval; \ + CACHE_TRACE(D_EVENTS, cache, #Name " interval changed to %u seconds %s", cache->name##_interval, (new->keep_##name##_interval ? "and keep it" : "")); \ + try_fast_reconnect = 1; \ + } + TEST_INTERVAL(refresh, Refresh); + TEST_INTERVAL(retry, Retry); + TEST_INTERVAL(expire, Expire); +#undef TEST_INTERVAL + + if (try_fast_reconnect) + return rpki_try_fast_reconnect(cache); + + return SUCCESSFUL_RECONF; +} + +/** + * rpki_reconfigure - a protocol reconfiguration hook + * @P: a protocol instance + * @CF: a new protocol configuration + * + * This function reconfigures whole protocol. + * It sets new protocol configuration into a protocol structure. + * Returns |NEED_TO_RESTART| or |SUCCESSFUL_RECONF|. + */ +static int +rpki_reconfigure(struct proto *P, struct proto_config *CF) +{ + struct rpki_proto *p = (void *) P; + struct rpki_config *new = (void *) CF; + struct rpki_config *old = (void *) p->p.cf; + struct rpki_cache *cache = p->cache; + + if (!proto_configure_channel(&p->p, &p->roa4_channel, proto_cf_find_channel(CF, NET_ROA4)) || + !proto_configure_channel(&p->p, &p->roa6_channel, proto_cf_find_channel(CF, NET_ROA6))) + return NEED_RESTART; + + if (rpki_reconfigure_cache(p, cache, new, old) != SUCCESSFUL_RECONF) + return NEED_RESTART; + + return SUCCESSFUL_RECONF; +} + + +/* + * RPKI Protocol Glue + */ + +static struct proto * +rpki_init(struct proto_config *CF) +{ + struct proto *P = proto_new(CF); + struct rpki_proto *p = (void *) P; + + proto_configure_channel(&p->p, &p->roa4_channel, proto_cf_find_channel(CF, NET_ROA4)); + proto_configure_channel(&p->p, &p->roa6_channel, proto_cf_find_channel(CF, NET_ROA6)); + + return P; +} + +static int +rpki_start(struct proto *P) +{ + struct rpki_proto *p = (void *) P; + struct rpki_config *cf = (void *) P->cf; + + p->cache = rpki_init_cache(p, cf); + rpki_start_cache(p->cache); + + return PS_START; +} + +static void +rpki_get_status(struct proto *P, byte *buf) +{ + struct rpki_proto *p = (struct rpki_proto *) P; + + if (P->proto_state == PS_DOWN) + { + *buf = 0; + return; + } + + if (p->cache) + bsprintf(buf, "%s", rpki_cache_state_to_str(p->cache->state)); + else + bsprintf(buf, "No cache server configured"); +} + +static void +rpki_show_proto_info_timer(const char *name, uint num, timer *t) +{ + if (tm_active(t)) + cli_msg(-1006, " %-16s: %t/%u", name, tm_remains(t), num); + else + cli_msg(-1006, " %-16s: ---", name); +} + +static void +rpki_show_proto_info(struct proto *P) +{ + struct rpki_proto *p = (struct rpki_proto *) P; + struct rpki_config *cf = (void *) p->p.cf; + struct rpki_cache *cache = p->cache; + + if (P->proto_state == PS_DOWN) + return; + + if (cache) + { + const char *transport_name = "---"; + + switch (cf->tr_config.type) + { + case RPKI_TR_SSH: transport_name = "SSHv2"; break; + case RPKI_TR_TCP: transport_name = "Unprotected over TCP"; break; + }; + + cli_msg(-1006, " Cache server: %s", rpki_get_cache_ident(cache)); + cli_msg(-1006, " Status: %s", rpki_cache_state_to_str(cache->state)); + cli_msg(-1006, " Transport: %s", transport_name); + cli_msg(-1006, " Protocol version: %u", cache->version); + + if (cache->request_session_id) + cli_msg(-1006, " Session ID: ---"); + else + cli_msg(-1006, " Session ID: %u", cache->session_id); + + if (cache->last_update) + { + cli_msg(-1006, " Serial number: %u", cache->serial_num); + cli_msg(-1006, " Last update: before %t s", current_time() - cache->last_update); + } + else + { + cli_msg(-1006, " Serial number: ---"); + cli_msg(-1006, " Last update: ---"); + } + + rpki_show_proto_info_timer("Refresh timer", cache->refresh_interval, cache->refresh_timer); + rpki_show_proto_info_timer("Retry timer", cache->retry_interval, cache->retry_timer); + rpki_show_proto_info_timer("Expire timer", cache->expire_interval, cache->expire_timer); + + if (p->roa4_channel) + channel_show_info(p->roa4_channel); + else + cli_msg(-1006, " No roa4 channel"); + + if (p->roa6_channel) + channel_show_info(p->roa6_channel); + else + cli_msg(-1006, " No roa6 channel"); + } +} + + +/* + * RPKI Protocol Configuration + */ + +/** + * rpki_check_config - check and complete configuration of RPKI protocol + * @cf: RPKI configuration + * + * This function is called at the end of parsing RPKI protocol configuration. + */ +void +rpki_check_config(struct rpki_config *cf) +{ + /* Do not check templates at all */ + if (cf->c.class == SYM_TEMPLATE) + return; + + if (ipa_zero(cf->ip) && cf->hostname == NULL) + cf_error("IP address or hostname of cache server must be set"); + + /* Set default transport type */ + if (cf->tr_config.spec == NULL) + { + cf->tr_config.spec = cfg_allocz(sizeof(struct rpki_tr_tcp_config)); + cf->tr_config.type = RPKI_TR_TCP; + } + + if (cf->port == 0) + { + /* Set default port numbers */ + switch (cf->tr_config.type) + { + case RPKI_TR_SSH: + cf->port = RPKI_SSH_PORT; + break; + default: + cf->port = RPKI_TCP_PORT; + } + } +} + +static void +rpki_postconfig(struct proto_config *CF) +{ + /* Define default channel */ + if (EMPTY_LIST(CF->channels)) + channel_config_new(NULL, net_label[CF->net_type], CF->net_type, CF); +} + +static void +rpki_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED) +{ + /* FIXME: Should copy transport */ +} + +struct protocol proto_rpki = { + .name = "RPKI", + .template = "rpki%d", + .preference = DEF_PREF_RPKI, + .proto_size = sizeof(struct rpki_proto), + .config_size = sizeof(struct rpki_config), + .init = rpki_init, + .start = rpki_start, + .postconfig = rpki_postconfig, + .channel_mask = (NB_ROA4 | NB_ROA6), + .show_proto_info = rpki_show_proto_info, + .shutdown = rpki_shutdown, + .copy_config = rpki_copy_config, + .reconfigure = rpki_reconfigure, + .get_status = rpki_get_status, +}; diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h new file mode 100644 index 00000000..8972b33a --- /dev/null +++ b/proto/rpki/rpki.h @@ -0,0 +1,165 @@ +/* + * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol + * + * (c) 2015 CZ.NIC + * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com> + * + * Using RTRlib: http://rpki.realmv6.org/ + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_RPKI_H_ +#define _BIRD_RPKI_H_ + +#include "nest/bird.h" +#include "nest/route.h" +#include "nest/protocol.h" +#include "lib/socket.h" +#include "lib/ip.h" + +#include "transport.h" +#include "packets.h" + +#define RPKI_TCP_PORT 323 +#define RPKI_SSH_PORT 22 +#define RPKI_RETRY_INTERVAL 600 +#define RPKI_REFRESH_INTERVAL 3600 +#define RPKI_EXPIRE_INTERVAL 7200 + +#define RPKI_VERSION_0 0 +#define RPKI_VERSION_1 1 +#define RPKI_MAX_VERSION RPKI_VERSION_1 + + +/* + * RPKI Cache + */ + +enum rpki_cache_state { + RPKI_CS_CONNECTING, /* Socket is establishing the transport connection. */ + RPKI_CS_ESTABLISHED, /* Connection is established, socket is waiting for a Serial Notify or expiration of the refresh_interval timer */ + RPKI_CS_RESET, /* Resetting RTR connection. */ + RPKI_CS_SYNC_START, /* Sending a Serial/Reset Query PDU and expecting a Cache Response PDU */ + RPKI_CS_SYNC_RUNNING, /* Receiving validation records from the RTR server. A state between Cache Response PDU and End of Data PDU */ + RPKI_CS_FAST_RECONNECT, /* Reconnect without any waiting period */ + RPKI_CS_NO_INCR_UPDATE_AVAIL, /* Server is unable to answer the last Serial Query and sent Cache Reset. */ + RPKI_CS_ERROR_NO_DATA_AVAIL, /* Server is unable to answer either a Serial Query or a Reset Query because it has no useful data available at this time. */ + RPKI_CS_ERROR_FATAL, /* Fatal protocol error occurred. */ + RPKI_CS_ERROR_TRANSPORT, /* Error on the transport socket occurred. */ + RPKI_CS_SHUTDOWN, /* RTR Socket is stopped. */ +}; + +struct rpki_cache { + pool *pool; /* Pool containing cache objects */ + struct rpki_proto *p; + + struct rpki_tr_sock *tr_sock; /* Transport specific socket */ + enum rpki_cache_state state; /* RPKI_CS_* */ + u32 session_id; + u8 request_session_id; /* 1: have to request new session id; 0: we have already received session id */ + u32 serial_num; /* Serial number denotes the logical version of data from cache server */ + u8 version; /* Protocol version */ + btime last_update; /* Last successful synchronization with cache server */ + btime last_rx_prefix; /* Last received prefix PDU */ + + /* Intervals can be changed by cache server on the fly */ + u32 refresh_interval; /* Actual refresh interval (in seconds) */ + u32 retry_interval; + u32 expire_interval; + timer *retry_timer; /* Retry timer event */ + timer *refresh_timer; /* Refresh timer event */ + timer *expire_timer; /* Expire timer event */ +}; + +const char *rpki_get_cache_ident(struct rpki_cache *cache); +const char *rpki_cache_state_to_str(enum rpki_cache_state state); + + +/* + * Routes handling + */ + +void rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr); +void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr); + + +/* + * RPKI Protocol Logic + */ + +void rpki_cache_change_state(struct rpki_cache *cache, const enum rpki_cache_state new_state); + + +/* + * RPKI Timer Events + */ + +const char *rpki_check_refresh_interval(uint seconds); +const char *rpki_check_retry_interval(uint seconds); +const char *rpki_check_expire_interval(uint seconds); + + +/* + * RPKI Protocol Configuration + */ + +struct rpki_proto { + struct proto p; + struct rpki_cache *cache; + + struct channel *roa4_channel; + struct channel *roa6_channel; + u8 refresh_channels; /* For non-incremental updates using rt_refresh_begin(), rt_refresh_end() */ +}; + +struct rpki_config { + struct proto_config c; + const char *hostname; /* Full domain name or stringified IP address of cache server */ + ip_addr ip; /* IP address of cache server or IPA_NONE */ + u16 port; /* Port number of cache server */ + struct rpki_tr_config tr_config; /* Specific transport configuration structure */ + u32 refresh_interval; /* Time interval (in seconds) for periodical downloading data from cache server */ + u32 retry_interval; /* Time interval (in seconds) for an unreachable server */ + u32 expire_interval; /* Maximal lifetime (in seconds) of ROAs without any successful refreshment */ + u8 keep_refresh_interval:1; /* Do not overwrite refresh interval by cache server update */ + u8 keep_retry_interval:1; /* Do not overwrite retry interval by cache server update */ + u8 keep_expire_interval:1; /* Do not overwrite expire interval by cache server update */ +}; + +void rpki_check_config(struct rpki_config *cf); + + +/* + * Logger + */ + +#define RPKI_LOG(log_level, rpki, msg, args...) \ + do { \ + log(log_level "%s: " msg, (rpki)->p.name , ## args); \ + } while(0) + +#if defined(LOCAL_DEBUG) || defined(GLOBAL_DEBUG) +#define CACHE_DBG(cache,msg,args...) \ + do { \ + RPKI_LOG(L_DEBUG, (cache)->p, "%s [%s] %s " msg, rpki_get_cache_ident(cache), rpki_cache_state_to_str((cache)->state), __func__, ## args); \ + } while(0) +#else +#define CACHE_DBG(cache,msg,args...) do { } while(0) +#endif + +#define RPKI_TRACE(level,rpki,msg,args...) \ + do { \ + if ((rpki)->p.debug & level) \ + RPKI_LOG(L_TRACE, rpki, msg, ## args); \ + } while(0) + +#define CACHE_TRACE(level,cache,msg,args...) \ + do { \ + if ((cache)->p->p.debug & level) \ + RPKI_LOG(L_TRACE, (cache)->p, msg, ## args); \ + } while(0) + +#define RPKI_WARN(p, msg, args...) RPKI_LOG(L_WARN, p, msg, ## args); + +#endif /* _BIRD_RPKI_H_ */ diff --git a/proto/rpki/ssh_transport.c b/proto/rpki/ssh_transport.c new file mode 100644 index 00000000..13e061fc --- /dev/null +++ b/proto/rpki/ssh_transport.c @@ -0,0 +1,73 @@ +/* + * BIRD -- An implementation of the SSH protocol for the RPKI transport + * + * (c) 2015 CZ.NIC + * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com> + * + * This file was a part of RTRlib: http://rpki.realmv6.org/ + * This transport implementation uses libssh (http://www.libssh.org/) + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> + +#include "rpki.h" + +static int +rpki_tr_ssh_open(struct rpki_tr_sock *tr) +{ + struct rpki_cache *cache = tr->cache; + struct rpki_config *cf = (void *) cache->p->p.cf; + struct rpki_tr_ssh_config *ssh_cf = (void *) cf->tr_config.spec; + sock *sk = tr->sk; + + sk->type = SK_SSH_ACTIVE; + sk->ssh = mb_allocz(sk->pool, sizeof(struct ssh_sock)); + sk->ssh->username = ssh_cf->user; + sk->ssh->client_privkey_path = ssh_cf->bird_private_key; + sk->ssh->server_hostkey_path = ssh_cf->cache_public_key; + sk->ssh->subsystem = "rpki-rtr"; + sk->ssh->state = SK_SSH_CONNECT; + + if (sk_open(sk) != 0) + return RPKI_TR_ERROR; + + return RPKI_TR_SUCCESS; +} + +static const char * +rpki_tr_ssh_ident(struct rpki_tr_sock *tr) +{ + struct rpki_cache *cache = tr->cache; + struct rpki_config *cf = (void *) cache->p->p.cf; + struct rpki_tr_ssh_config *ssh_cf = (void *) cf->tr_config.spec; + + if (tr->ident != NULL) + return tr->ident; + + const char *username = ssh_cf->user; + const char *host = cf->hostname; + u16 port = cf->port; + + size_t len = strlen(username) + 1 + strlen(host) + 1 + 5 + 1; /* <user> + '@' + <host> + ':' + <port> + '\0' */ + char *ident = mb_alloc(cache->pool, len); + bsnprintf(ident, len, "%s@%s:%u", username, host, port); + tr->ident = ident; + + return tr->ident; +} + +/** + * rpki_tr_ssh_init - initializes the RPKI transport structure for a SSH connection + * @tr: allocated RPKI transport structure + */ +void +rpki_tr_ssh_init(struct rpki_tr_sock *tr) +{ + tr->open_fp = &rpki_tr_ssh_open; + tr->ident_fp = &rpki_tr_ssh_ident; +} diff --git a/proto/rpki/tcp_transport.c b/proto/rpki/tcp_transport.c new file mode 100644 index 00000000..0d3c7e99 --- /dev/null +++ b/proto/rpki/tcp_transport.c @@ -0,0 +1,76 @@ +/* + * BIRD -- An implementation of the TCP protocol for the RPKI protocol transport + * + * (c) 2015 CZ.NIC + * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com> + * + * This file was a part of RTRlib: http://rpki.realmv6.org/ + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <errno.h> +#include <netdb.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "rpki.h" +#include "sysdep/unix/unix.h" + +static int +rpki_tr_tcp_open(struct rpki_tr_sock *tr) +{ + sock *sk = tr->sk; + + sk->type = SK_TCP_ACTIVE; + + if (sk_open(sk) != 0) + return RPKI_TR_ERROR; + + return RPKI_TR_SUCCESS; +} + +static const char * +rpki_tr_tcp_ident(struct rpki_tr_sock *tr) +{ + struct rpki_cache *cache = tr->cache; + struct rpki_config *cf = (void *) cache->p->p.cf; + + if (tr->ident != NULL) + return tr->ident; + + const char *host = cf->hostname; + ip_addr ip = cf->ip; + u16 port = cf->port; + + size_t colon_and_port_len = 6; /* max ":65535" */ + size_t ident_len; + if (host) + ident_len = strlen(host) + colon_and_port_len + 1; + else + ident_len = IPA_MAX_TEXT_LENGTH + colon_and_port_len + 1; + + char *ident = mb_alloc(cache->pool, ident_len); + if (host) + bsnprintf(ident, ident_len, "%s:%u", host, port); + else + bsnprintf(ident, ident_len, "%I:%u", ip, port); + + tr->ident = ident; + return tr->ident; +} + +/** + * rpki_tr_tcp_init - initializes the RPKI transport structure for a TCP connection + * @tr: allocated RPKI transport structure + */ +void +rpki_tr_tcp_init(struct rpki_tr_sock *tr) +{ + tr->open_fp = &rpki_tr_tcp_open; + tr->ident_fp = &rpki_tr_tcp_ident; +} diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c new file mode 100644 index 00000000..182667be --- /dev/null +++ b/proto/rpki/transport.c @@ -0,0 +1,135 @@ +/* + * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol + * + * (c) 2015 CZ.NIC + * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com> + * + * This file was a part of RTRlib: http://rpki.realmv6.org/ + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <sys/socket.h> +#include <netdb.h> + +#include "rpki.h" +#include "transport.h" +#include "sysdep/unix/unix.h" + +/** + * rpki_hostname_autoresolv - auto-resolve an IP address from a hostname + * @host: domain name of host, e.g. "rpki-validator.realmv6.org" + * + * This function resolves an IP address from a hostname. + * Returns &ip_addr structure with IP address or |IPA_NONE|. + */ +static ip_addr +rpki_hostname_autoresolv(const char *host) +{ + ip_addr addr = {}; + struct addrinfo *res; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_ADDRCONFIG, + }; + + if (!host) + return IPA_NONE; + + int err_code = getaddrinfo(host, NULL, &hints, &res); + if (err_code != 0) + { + log(L_DEBUG "getaddrinfo failed: %s", gai_strerror(err_code)); + return IPA_NONE; + } + + sockaddr sa = { + .sa = *res->ai_addr, + }; + + uint unused; + sockaddr_read(&sa, res->ai_family, &addr, NULL, &unused); + + freeaddrinfo(res); + return addr; +} + +/** + * rpki_tr_open - prepare and open a socket connection + * @tr: initialized transport socket + * + * Prepare and open a socket connection specified by @tr that must be initialized before. + * This function ends with a calling the sk_open() function. + * Returns RPKI_TR_SUCCESS or RPKI_TR_ERROR. + */ +int +rpki_tr_open(struct rpki_tr_sock *tr) +{ + struct rpki_cache *cache = tr->cache; + struct rpki_config *cf = (void *) cache->p->p.cf; + + ASSERT(tr->sk == NULL); + tr->sk = sk_new(cache->pool); + sock *sk = tr->sk; + + /* sk->type -1 is invalid value, a correct value MUST be set in the specific transport layer in open_fp() hook */ + sk->type = -1; + + sk->tx_hook = rpki_connected_hook; + sk->err_hook = rpki_err_hook; + sk->data = cache; + sk->daddr = cf->ip; + sk->dport = cf->port; + sk->host = cf->hostname; + sk->rbsize = RPKI_RX_BUFFER_SIZE; + sk->tbsize = RPKI_TX_BUFFER_SIZE; + sk->tos = IP_PREC_INTERNET_CONTROL; + + if (ipa_zero2(sk->daddr) && sk->host) + { + sk->daddr = rpki_hostname_autoresolv(sk->host); + if (ipa_zero(sk->daddr)) + { + CACHE_TRACE(D_EVENTS, cache, "Cannot resolve the hostname '%s'", sk->host); + return RPKI_TR_ERROR; + } + } + + return tr->open_fp(tr); +} + +/** + * rpki_tr_close - close socket and prepare it for possible next open + * @tr: successfully opened transport socket + * + * Close socket and free resources. + */ +void +rpki_tr_close(struct rpki_tr_sock *tr) +{ + if (tr->ident) + { + mb_free((char *) tr->ident); + tr->ident = NULL; + } + + if (tr->sk) + { + rfree(tr->sk); + tr->sk = NULL; + } +} + +/** + * rpki_tr_ident - Returns a string identifier for the rpki transport socket + * @tr: successfully opened transport socket + * + * Returns a \0 terminated string identifier for the socket endpoint, e.g. "<host>:<port>". + * Memory is allocated inside @tr structure. + */ +inline const char * +rpki_tr_ident(struct rpki_tr_sock *tr) +{ + return tr->ident_fp(tr); +} diff --git a/proto/rpki/transport.h b/proto/rpki/transport.h new file mode 100644 index 00000000..f90b7e42 --- /dev/null +++ b/proto/rpki/transport.h @@ -0,0 +1,79 @@ +/* + * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol + * + * (c) 2015 CZ.NIC + * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com> + * + * This file was a part of RTRlib: http://rpki.realmv6.org/ + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/* + * The RPKI transport sockets implement the communication channel + * (e.g., SSH, TCP, TCP-AO) between an RPKI server and client. + * + * Before using the transport socket, a tr_socket must be + * initialized based on a protocol-dependent init function (e.g., + * rpki_tr_tcp_init()). + * + * The rpki_tr_* functions call the corresponding function pointers, which are + * passed in the rpki_tr_sock structure, and forward the remaining arguments. + */ + +#ifndef _BIRD_RPKI_TRANSPORT_H_ +#define _BIRD_RPKI_TRANSPORT_H_ + +#include <time.h> + +/* The return values for rpki_tr_ functions */ +enum rpki_tr_rtvals { + RPKI_TR_SUCCESS = 0, /* Operation was successful */ + RPKI_TR_ERROR = -1, /* Error occurred */ + RPKI_TR_WOULDBLOCK = -2, /* No data is available on the socket */ + RPKI_TR_INTR = -3, /* Call was interrupted from a signal */ + RPKI_TR_CLOSED = -4 /* Connection closed */ +}; + +/* A transport socket structure */ +struct rpki_tr_sock { + sock *sk; /* Standard BIRD socket */ + struct rpki_cache *cache; /* Cache server */ + int (*open_fp)(struct rpki_tr_sock *); /* Function that establishes the socket connection */ + const char *(*ident_fp)(struct rpki_tr_sock *); /* Function that returns an identifier for the socket endpoint */ + const char *ident; /* Internal. Use ident_fp() hook instead of this pointer */ +}; + +int rpki_tr_open(struct rpki_tr_sock *tr); +void rpki_tr_close(struct rpki_tr_sock *tr); +const char *rpki_tr_ident(struct rpki_tr_sock *tr); + +/* Types of supported transports */ +enum rpki_tr_type { + RPKI_TR_TCP, /* Unprotected transport over TCP */ + RPKI_TR_SSH, /* Protected transport by SSHv2 connection */ +}; + +/* Common configure structure for transports */ +struct rpki_tr_config { + enum rpki_tr_type type; /* RPKI_TR_TCP or RPKI_TR_SSH */ + const void *spec; /* Specific transport configuration, i.e. rpki_tr_tcp_config or rpki_tr_ssh_config */ +}; + +struct rpki_tr_tcp_config { + /* No internal configuration data */ +}; + +struct rpki_tr_ssh_config { + const char *bird_private_key; /* Filepath to the BIRD server private key */ + const char *cache_public_key; /* Filepath to the public key of cache server, can be file known_hosts */ + const char *user; /* Username for SSH connection */ +}; + +/* ssh_transport.c */ +void rpki_tr_ssh_init(struct rpki_tr_sock *tr); + +/* tcp_transport.c */ +void rpki_tr_tcp_init(struct rpki_tr_sock *tr); + +#endif /* _BIRD_RPKI_TRANSPORT_H_ */ diff --git a/proto/static/Makefile b/proto/static/Makefile index 61fadbea..e38f9b74 100644 --- a/proto/static/Makefile +++ b/proto/static/Makefile @@ -1,6 +1,6 @@ -source=static.c -root-rel=../../ -dir-name=proto/static - -include ../../Rules +src := static.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/static/config.Y b/proto/static/config.Y index 182721b3..66e5ea4c 100644 --- a/proto/static/config.Y +++ b/proto/static/config.Y @@ -13,98 +13,119 @@ CF_HDR CF_DEFINES #define STATIC_CFG ((struct static_config *) this_proto) -static struct static_route *this_srt, *this_srt_nh, *last_srt_nh; +static struct static_route *this_srt, *this_snh; static struct f_inst **this_srt_last_cmd; +static struct static_route * +static_nexthop_new(void) +{ + struct static_route *nh = this_srt; + + if (this_snh) + { + /* Additional next hop */ + nh = cfg_allocz(sizeof(struct static_route)); + nh->net = this_srt->net; + this_snh->mp_next = nh; + } + + nh->dest = RTD_UNICAST; + nh->mp_head = this_srt; + return nh; +}; + static void static_route_finish(void) { - struct static_route *r; - - /* Update undefined use_bfd entries in multipath nexthops */ - if (this_srt->dest == RTD_MULTIPATH) - for (r = this_srt->mp_next; r; r = r->mp_next) - if (r->use_bfd < 0) - r->use_bfd = this_srt->use_bfd; + if (net_type_match(this_srt->net, NB_DEST) == !this_srt->dest) + cf_error("Unexpected or missing nexthop/type"); } CF_DECLS CF_KEYWORDS(STATIC, ROUTE, VIA, DROP, REJECT, PROHIBIT, PREFERENCE, CHECK, LINK) -CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE, BFD) +CF_KEYWORDS(ONLINK, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE, BFD, MPLS) CF_GRAMMAR CF_ADDTO(proto, static_proto '}') -static_proto_start: proto_start STATIC { - this_proto = proto_config_new(&proto_static, $1); - static_init_config((struct static_config *) this_proto); - } - ; +static_proto_start: proto_start STATIC +{ + this_proto = proto_config_new(&proto_static, $1); + init_list(&STATIC_CFG->routes); +}; static_proto: static_proto_start proto_name '{' | static_proto proto_item ';' + | static_proto proto_channel ';' { this_proto->net_type = $2->net_type; } | static_proto CHECK LINK bool ';' { STATIC_CFG->check_link = $4; } - | static_proto IGP TABLE rtable ';' { STATIC_CFG->igp_table = $4; } + | static_proto IGP TABLE rtable ';' { + if ($4->addr_type == NET_IP4) + STATIC_CFG->igp_table_ip4 = $4; + else if ($4->addr_type == NET_IP6) + STATIC_CFG->igp_table_ip6 = $4; + else + cf_error("Incompatible IGP table type"); + } | static_proto stat_route stat_route_opt_list ';' { static_route_finish(); } ; -stat_route0: ROUTE prefix { - this_srt = cfg_allocz(sizeof(struct static_route)); - add_tail(&STATIC_CFG->other_routes, &this_srt->n); - this_srt->net = $2.addr; - this_srt->masklen = $2.len; - this_srt_last_cmd = &(this_srt->cmds); +stat_nexthop: + VIA ipa ipa_scope { + this_snh = static_nexthop_new(); + this_snh->via = $2; + this_snh->iface = $3; + } + | VIA TEXT { + this_snh = static_nexthop_new(); + this_snh->via = IPA_NONE; + this_snh->iface = if_get_by_name($2); + } + | stat_nexthop MPLS label_stack { + this_snh->mls = $3; } - ; + | stat_nexthop ONLINK bool { + this_snh->onlink = $3; + } + | stat_nexthop WEIGHT expr { + this_snh->weight = $3 - 1; + if (($3<1) || ($3>256)) cf_error("Weight must be in range 1-256"); + } + | stat_nexthop BFD bool { + this_snh->use_bfd = $3; cf_check_bfd($3); + } +; -stat_multipath1: - VIA ipa ipa_scope { - last_srt_nh = this_srt_nh; - this_srt_nh = cfg_allocz(sizeof(struct static_route)); - this_srt_nh->dest = RTD_NONE; - this_srt_nh->via = $2; - this_srt_nh->via_if = $3; - this_srt_nh->if_name = (void *) this_srt; /* really */ - this_srt_nh->use_bfd = -1; /* undefined */ - } - | stat_multipath1 WEIGHT expr { - this_srt_nh->masklen = $3 - 1; /* really */ - if (($3<1) || ($3>256)) cf_error("Weight must be in range 1-256"); - } - | stat_multipath1 BFD bool { - this_srt_nh->use_bfd = $3; cf_check_bfd($3); - } - ; +stat_nexthops: + stat_nexthop + | stat_nexthops stat_nexthop +; -stat_multipath: - stat_multipath1 { this_srt->mp_next = this_srt_nh; } - | stat_multipath stat_multipath1 { last_srt_nh->mp_next = this_srt_nh; } +stat_route0: ROUTE net_any { + this_srt = cfg_allocz(sizeof(struct static_route)); + add_tail(&STATIC_CFG->routes, &this_srt->n); + this_srt->net = $2; + this_srt_last_cmd = &(this_srt->cmds); + this_srt->mp_next = NULL; + this_snh = NULL; + } ; stat_route: - stat_route0 VIA ipa ipa_scope { - this_srt->dest = RTD_ROUTER; + stat_route0 stat_nexthops + | stat_route0 RECURSIVE ipa { + this_srt->dest = RTDX_RECURSIVE; this_srt->via = $3; - this_srt->via_if = $4; - } - | stat_route0 VIA TEXT { - this_srt->dest = RTD_DEVICE; - this_srt->if_name = $3; - rem_node(&this_srt->n); - add_tail(&STATIC_CFG->iface_routes, &this_srt->n); } - | stat_route0 MULTIPATH stat_multipath { - this_srt->dest = RTD_MULTIPATH; - } - | stat_route0 RECURSIVE ipa { + | stat_route0 RECURSIVE ipa MPLS label_stack { this_srt->dest = RTDX_RECURSIVE; this_srt->via = $3; + this_srt->mls = $5; } - + | stat_route0 { this_srt->dest = RTD_NONE; } | stat_route0 DROP { this_srt->dest = RTD_BLACKHOLE; } | stat_route0 REJECT { this_srt->dest = RTD_UNREACHABLE; } | stat_route0 BLACKHOLE { this_srt->dest = RTD_BLACKHOLE; } @@ -114,7 +135,6 @@ stat_route: stat_route_item: cmd { *this_srt_last_cmd = $1; this_srt_last_cmd = &($1->next); } - | BFD bool ';' { this_srt->use_bfd = $2; cf_check_bfd($2); } ; stat_route_opts: diff --git a/proto/static/static.c b/proto/static/static.c index 849067b9..ede4c734 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -9,33 +9,32 @@ /** * DOC: Static * - * The Static protocol is implemented in a straightforward way. It keeps - * two lists of static routes: one containing interface routes and one - * holding the remaining ones. Interface routes are inserted and removed according - * to interface events received from the core via the if_notify() hook. Routes - * pointing to a neighboring router use a sticky node in the neighbor cache - * to be notified about gaining or losing the neighbor. Special - * routes like black holes or rejects are inserted all the time. + * The Static protocol is implemented in a straightforward way. It keeps a list + * of static routes. Routes of dest RTD_UNICAST have associated sticky node in + * the neighbor cache to be notified about gaining or losing the neighbor and + * about interface-related events (e.g. link down). They may also have a BFD + * request if associated with a BFD session. When a route is notified, + * static_decide() is used to see whether the route activeness is changed. In + * such case, the route is marked as dirty and scheduled to be announced or + * withdrawn, which is done asynchronously from event hook. Routes of other + * types (e.g. black holes) are announced all the time. * - * Multipath routes are tricky. Because these routes depends on - * several neighbors we need to integrate that to the neighbor - * notification handling, we use dummy static_route nodes, one for - * each nexthop. Therefore, a multipath route consists of a master - * static_route node (of dest RTD_MULTIPATH), which specifies prefix - * and is used in most circumstances, and a list of dummy static_route - * nodes (of dest RTD_NONE), which stores info about nexthops and are - * connected to neighbor entries and neighbor notifications. Dummy - * nodes are chained using mp_next, they aren't in other_routes list, - * and abuse some fields (masklen, if_name) for other purposes. + * Multipath routes are a bit tricky. To represent additional next hops, dummy + * static_route nodes are used, which are chained using @mp_next field and link + * to the master node by @mp_head field. Each next hop has a separate neighbor + * entry and an activeness state, but the master node is used for most purposes. + * Note that most functions DO NOT accept dummy nodes as arguments. * * The only other thing worth mentioning is that when asked for reconfiguration, * Static not only compares the two configurations, but it also calculates - * difference between the lists of static routes and it just inserts the - * newly added routes and removes the obsolete ones. + * difference between the lists of static routes and it just inserts the newly + * added routes, removes the obsolete ones and reannounces changed ones. */ #undef LOCAL_DEBUG +#include <stdlib.h> + #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" @@ -50,107 +49,117 @@ static linpool *static_lp; -static inline rtable * -p_igp_table(struct proto *p) -{ - struct static_config *cf = (void *) p->cf; - return cf->igp_table ? cf->igp_table->table : p->table; -} - static void -static_install(struct proto *p, struct static_route *r, struct iface *ifa) +static_announce_rte(struct static_proto *p, struct static_route *r) { - net *n; - rta a; - rte *e; + rta *a = allocz(RTA_MAX_SIZE); + a->src = p->p.main_source; + a->source = RTS_STATIC; + a->scope = SCOPE_UNIVERSE; + a->dest = r->dest; - if (r->installed > 0) - return; + if (r->dest == RTD_UNICAST) + { + struct static_route *r2; + struct nexthop *nhs = NULL; - DBG("Installing static route %I/%d, rtd=%d\n", r->net, r->masklen, r->dest); - bzero(&a, sizeof(a)); - a.src = p->main_source; - a.source = (r->dest == RTD_DEVICE) ? RTS_STATIC_DEVICE : RTS_STATIC; - a.scope = SCOPE_UNIVERSE; - a.cast = RTC_UNICAST; - a.dest = r->dest; - a.gw = r->via; - a.iface = ifa; - - if (r->dest == RTD_MULTIPATH) + for (r2 = r; r2; r2 = r2->mp_next) { - struct static_route *r2; - struct mpnh *nhs = NULL; - - for (r2 = r->mp_next; r2; r2 = r2->mp_next) - if (r2->installed) - { - struct mpnh *nh = alloca(sizeof(struct mpnh)); - nh->gw = r2->via; - nh->iface = r2->neigh->iface; - nh->weight = r2->masklen; /* really */ - mpnh_insert(&nhs, nh); - } - - /* There is at least one nexthop */ - if (!nhs->next) - { - /* Fallback to unipath route for exactly one nexthop */ - a.dest = RTD_ROUTER; - a.gw = nhs->gw; - a.iface = nhs->iface; - } - else - a.nexthops = nhs; + if (!r2->active) + continue; + + struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE); + nh->gw = r2->via; + nh->iface = r2->neigh->iface; + nh->flags = r2->onlink ? RNF_ONLINK : 0; + nh->weight = r2->weight; + if (r2->mls) + { + nh->labels = r2->mls->len; + memcpy(nh->label, r2->mls->stack, r2->mls->len * sizeof(u32)); + } + + nexthop_insert(&nhs, nh); } + if (!nhs) + goto withdraw; + + nexthop_link(a, nhs); + } + if (r->dest == RTDX_RECURSIVE) - rta_set_recursive_next_hop(p->table, &a, p_igp_table(p), &r->via, &r->via); + { + rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6; + rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls); + } - /* We skip rta_lookup() here */ + /* Already announced */ + if (r->state == SRS_CLEAN) + return; - n = net_get(p->table, r->net, r->masklen); - e = rte_get_temp(&a); - e->net = n; + /* We skip rta_lookup() here */ + rte *e = rte_get_temp(a); e->pflags = 0; if (r->cmds) f_eval_rte(r->cmds, &e, static_lp); - rte_update(p, n, e); - r->installed = 1; + rte_update(&p->p, r->net, e); + r->state = SRS_CLEAN; if (r->cmds) lp_flush(static_lp); + + return; + +withdraw: + if (r->state == SRS_DOWN) + return; + + rte_update(&p->p, r->net, NULL); + r->state = SRS_DOWN; } static void -static_remove(struct proto *p, struct static_route *r) +static_mark_rte(struct static_proto *p, struct static_route *r) { - net *n; - - if (!r->installed) + if (r->state == SRS_DIRTY) return; - DBG("Removing static route %I/%d via %I\n", r->net, r->masklen, r->via); - n = net_find(p->table, r->net, r->masklen); - rte_update(p, n, NULL); - r->installed = 0; + r->state = SRS_DIRTY; + BUFFER_PUSH(p->marked) = r; + + if (!ev_active(p->event)) + ev_schedule(p->event); +} + +static void +static_announce_marked(void *P) +{ + struct static_proto *p = P; + + BUFFER_WALK(p->marked, r) + static_announce_rte(P, r); + + BUFFER_FLUSH(p->marked); } static void static_bfd_notify(struct bfd_request *req); static void -static_update_bfd(struct proto *p, struct static_route *r) +static_update_bfd(struct static_proto *p, struct static_route *r) { + /* The @r is a RTD_UNICAST next hop, may be a dummy node */ + struct neighbor *nb = r->neigh; int bfd_up = (nb->scope > 0) && r->use_bfd; if (bfd_up && !r->bfd_req) { // ip_addr local = ipa_nonzero(r->local) ? r->local : nb->ifa->ip; - r->bfd_req = bfd_request_session(p->pool, r->via, nb->ifa->ip, nb->iface, + r->bfd_req = bfd_request_session(p->p.pool, r->via, nb->ifa->ip, nb->iface, static_bfd_notify, r); } @@ -162,212 +171,173 @@ static_update_bfd(struct proto *p, struct static_route *r) } static int -static_decide(struct static_config *cf, struct static_route *r) +static_decide(struct static_proto *p, struct static_route *r) { - /* r->dest != RTD_MULTIPATH, but may be RTD_NONE (part of multipath route) - the route also have to be valid (r->neigh != NULL) */ + /* The @r is a RTD_UNICAST next hop, may be a dummy node */ + + struct static_config *cf = (void *) p->p.cf; + uint old_active = r->active; if (r->neigh->scope < 0) - return 0; + goto fail; if (cf->check_link && !(r->neigh->iface->flags & IF_LINK_UP)) - return 0; + goto fail; - if (r->bfd_req && r->bfd_req->state != BFD_STATE_UP) - return 0; + if (r->bfd_req && (r->bfd_req->state != BFD_STATE_UP)) + goto fail; - return 1; -} + r->active = 1; + return !old_active; +fail: + r->active = 0; + return old_active; +} static void -static_add(struct proto *p, struct static_config *cf, struct static_route *r) +static_add_rte(struct static_proto *p, struct static_route *r) { - DBG("static_add(%I/%d,%d)\n", r->net, r->masklen, r->dest); - switch (r->dest) - { - case RTD_ROUTER: - { - struct neighbor *n = neigh_find2(p, &r->via, r->via_if, NEF_STICKY); - if (n) - { - r->chain = n->data; - n->data = r; - r->neigh = n; - - static_update_bfd(p, r); - if (static_decide(cf, r)) - static_install(p, r, n->iface); - else - static_remove(p, r); - } - else - { - log(L_ERR "Static route destination %I is invalid. Ignoring.", r->via); - static_remove(p, r); - } - break; - } + if (r->dest == RTD_UNICAST) + { + struct static_route *r2; + struct neighbor *n; - case RTD_DEVICE: - break; + for (r2 = r; r2; r2 = r2->mp_next) + { + n = ipa_nonzero(r2->via) ? + neigh_find2(&p->p, &r2->via, r2->iface, + NEF_STICKY | (r2->onlink ? NEF_ONLINK : 0)) : + neigh_find_iface(&p->p, r2->iface); - case RTD_MULTIPATH: + if (!n) { - int count = 0; - struct static_route *r2; - - for (r2 = r->mp_next; r2; r2 = r2->mp_next) - { - struct neighbor *n = neigh_find2(p, &r2->via, r2->via_if, NEF_STICKY); - if (n) - { - r2->chain = n->data; - n->data = r2; - r2->neigh = n; - - static_update_bfd(p, r2); - r2->installed = static_decide(cf, r2); - count += r2->installed; - } - else - { - log(L_ERR "Static route destination %I is invalid. Ignoring.", r2->via); - r2->installed = 0; - } - } - - if (count) - static_install(p, r, NULL); - else - static_remove(p, r); - break; + log(L_WARN "Invalid next hop %I of static route %N", r2->via, r2->net); + continue; } - default: - static_install(p, r, NULL); + r2->neigh = n; + r2->chain = n->data; + n->data = r2; + + static_update_bfd(p, r2); + static_decide(p, r2); } + } + + static_announce_rte(p, r); } static void -static_rte_cleanup(struct proto *p UNUSED, struct static_route *r) +static_reset_rte(struct static_proto *p UNUSED, struct static_route *r) { struct static_route *r2; - if (r->bfd_req) + for (r2 = r; r2; r2 = r2->mp_next) { - rfree(r->bfd_req); - r->bfd_req = NULL; - } + r2->neigh = NULL; + r2->chain = NULL; - if (r->dest == RTD_MULTIPATH) - for (r2 = r->mp_next; r2; r2 = r2->mp_next) - if (r2->bfd_req) - { - rfree(r2->bfd_req); - r2->bfd_req = NULL; - } + r2->state = 0; + r2->active = 0; + + rfree(r2->bfd_req); + r2->bfd_req = NULL; + } } -static int -static_start(struct proto *p) +static void +static_remove_rte(struct static_proto *p, struct static_route *r) { - struct static_config *cf = (void *) p->cf; - struct static_route *r; + if (r->state) + rte_update(&p->p, r->net, NULL); - DBG("Static: take off!\n"); + static_reset_rte(p, r); +} - if (!static_lp) - static_lp = lp_new(&root_pool, 1008); - if (cf->igp_table) - rt_lock_table(cf->igp_table->table); +static inline int +static_same_dest(struct static_route *x, struct static_route *y) +{ + if (x->dest != y->dest) + return 0; + + switch (x->dest) + { + case RTD_UNICAST: + for (; x && y; x = x->mp_next, y = y->mp_next) + { + if (!ipa_equal(x->via, y->via) || + (x->iface != y->iface) || + (x->onlink != y->onlink) || + (x->weight != y->weight) || + (x->use_bfd != y->use_bfd) || + (!x->mls != !y->mls) || + ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + return 0; + + if (!x->mls) + continue; + + for (uint i = 0; i < x->mls->len; i++) + if (x->mls->stack[i] != y->mls->stack[i]) + return 0; + } + return !x && !y; - /* We have to go UP before routes could be installed */ - proto_notify_state(p, PS_UP); + case RTDX_RECURSIVE: + if (!ipa_equal(x->via, y->via) || + (!x->mls != !y->mls) || + ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + return 0; - WALK_LIST(r, cf->other_routes) - static_add(p, cf, r); - return PS_UP; -} + if (!x->mls) + return 1; -static int -static_shutdown(struct proto *p) -{ - struct static_config *cf = (void *) p->cf; - struct static_route *r; + for (uint i = 0; i < x->mls->len; i++) + if (x->mls->stack[i] != y->mls->stack[i]) + return 0; - /* Just reset the flag, the routes will be flushed by the nest */ - WALK_LIST(r, cf->iface_routes) - r->installed = 0; - WALK_LIST(r, cf->other_routes) - { - static_rte_cleanup(p, r); - r->installed = 0; - } + return 1; - return PS_DOWN; + default: + return 1; + } } -static void -static_cleanup(struct proto *p) +static inline int +static_same_rte(struct static_route *or, struct static_route *nr) { - struct static_config *cf = (void *) p->cf; - - if (cf->igp_table) - rt_unlock_table(cf->igp_table->table); + /* Note that i_same() requires arguments in (new, old) order */ + return static_same_dest(or, nr) && i_same(nr->cmds, or->cmds); } static void -static_update_rte(struct proto *p, struct static_route *r) +static_reconfigure_rte(struct static_proto *p, struct static_route *or, struct static_route *nr) { - switch (r->dest) - { - case RTD_ROUTER: - if (static_decide((struct static_config *) p->cf, r)) - static_install(p, r, r->neigh->iface); - else - static_remove(p, r); - break; - - case RTD_NONE: /* a part of multipath route */ - { - int decision = static_decide((struct static_config *) p->cf, r); - if (decision == r->installed) - break; /* no change */ - r->installed = decision; - - struct static_route *r1, *r2; - int count = 0; - r1 = (void *) r->if_name; /* really */ - for (r2 = r1->mp_next; r2; r2 = r2->mp_next) - count += r2->installed; - - if (count) - { - /* Set of nexthops changed - force reinstall */ - r1->installed = 0; - static_install(p, r1, NULL); - } - else - static_remove(p, r1); + if ((or->state == SRS_CLEAN) && !static_same_rte(or, nr)) + nr->state = SRS_DIRTY; + else + nr->state = or->state; - break; - } - } + static_add_rte(p, nr); + static_reset_rte(p, or); } + static void static_neigh_notify(struct neighbor *n) { - struct proto *p = n->proto; + struct static_proto *p = (void *) n->proto; struct static_route *r; DBG("Static: neighbor notify for %I: iface %p\n", n->addr, n->iface); - for(r=n->data; r; r=r->chain) + for (r = n->data; r; r = r->chain) { static_update_bfd(p, r); - static_update_rte(p, r); + + if (static_decide(p, r)) + static_mark_rte(p, r->mp_head); } } @@ -375,241 +345,232 @@ static void static_bfd_notify(struct bfd_request *req) { struct static_route *r = req->data; - struct proto *p = r->neigh->proto; + struct static_proto *p = (void *) r->neigh->proto; // if (req->down) TRACE(D_EVENTS, "BFD session down for nbr %I on %s", XXXX); - static_update_rte(p, r); + if (static_decide(p, r)) + static_mark_rte(p, r->mp_head); } -static void -static_dump_rt(struct static_route *r) +static int +static_rte_mergable(rte *pri UNUSED, rte *sec UNUSED) { - debug("%-1I/%2d: ", r->net, r->masklen); - switch (r->dest) - { - case RTD_ROUTER: - debug("via %I\n", r->via); - break; - case RTD_DEVICE: - debug("dev %s\n", r->if_name); - break; - default: - debug("rtd %d\n", r->dest); - break; - } + return 1; } + static void -static_dump(struct proto *p) +static_postconfig(struct proto_config *CF) { - struct static_config *c = (void *) p->cf; + struct static_config *cf = (void *) CF; struct static_route *r; - debug("Independent static routes:\n"); - WALK_LIST(r, c->other_routes) - static_dump_rt(r); - debug("Device static routes:\n"); - WALK_LIST(r, c->iface_routes) - static_dump_rt(r); -} + if (EMPTY_LIST(CF->channels)) + cf_error("Channel not specified"); -static void -static_if_notify(struct proto *p, unsigned flags, struct iface *i) -{ - struct static_route *r; - struct static_config *c = (void *) p->cf; + struct channel_config *cc = proto_cf_main_channel(CF); - if (flags & IF_CHANGE_UP) - { - WALK_LIST(r, c->iface_routes) - if (!strcmp(r->if_name, i->name)) - static_install(p, r, i); - } - else if (flags & IF_CHANGE_DOWN) - { - WALK_LIST(r, c->iface_routes) - if (!strcmp(r->if_name, i->name)) - static_remove(p, r); - } -} + if (!cf->igp_table_ip4) + cf->igp_table_ip4 = (cc->table->addr_type == NET_IP4) ? + cc->table : cf->c.global->def_tables[NET_IP4]; -int -static_rte_mergable(rte *pri UNUSED, rte *sec UNUSED) -{ - return 1; -} + if (!cf->igp_table_ip6) + cf->igp_table_ip6 = (cc->table->addr_type == NET_IP6) ? + cc->table : cf->c.global->def_tables[NET_IP6]; -void -static_init_config(struct static_config *c) -{ - init_list(&c->iface_routes); - init_list(&c->other_routes); + WALK_LIST(r, cf->routes) + if (r->net && (r->net->type != CF->net_type)) + cf_error("Route %N incompatible with channel type", r->net); } static struct proto * -static_init(struct proto_config *c) +static_init(struct proto_config *CF) { - struct proto *p = proto_new(c, sizeof(struct proto)); + struct proto *P = proto_new(CF); + struct static_proto *p = (void *) P; + struct static_config *cf = (void *) CF; - p->neigh_notify = static_neigh_notify; - p->if_notify = static_if_notify; - p->rte_mergable = static_rte_mergable; + P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - return p; -} + P->neigh_notify = static_neigh_notify; + P->rte_mergable = static_rte_mergable; -static inline int -static_same_net(struct static_route *x, struct static_route *y) -{ - return ipa_equal(x->net, y->net) && (x->masklen == y->masklen); + if (cf->igp_table_ip4) + p->igp_table_ip4 = cf->igp_table_ip4->table; + + if (cf->igp_table_ip6) + p->igp_table_ip6 = cf->igp_table_ip6->table; + + return P; } -static inline int -static_same_dest(struct static_route *x, struct static_route *y) +static int +static_start(struct proto *P) { - if (x->dest != y->dest) - return 0; + struct static_proto *p = (void *) P; + struct static_config *cf = (void *) P->cf; + struct static_route *r; - switch (x->dest) - { - case RTD_ROUTER: - return ipa_equal(x->via, y->via) && (x->via_if == y->via_if); + if (!static_lp) + static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024)); - case RTD_DEVICE: - return !strcmp(x->if_name, y->if_name); + if (p->igp_table_ip4) + rt_lock_table(p->igp_table_ip4); - case RTD_MULTIPATH: - for (x = x->mp_next, y = y->mp_next; - x && y; - x = x->mp_next, y = y->mp_next) - if (!ipa_equal(x->via, y->via) || (x->via_if != y->via_if) || (x->use_bfd != y->use_bfd)) - return 0; - return !x && !y; + if (p->igp_table_ip6) + rt_lock_table(p->igp_table_ip6); - case RTDX_RECURSIVE: - return ipa_equal(x->via, y->via); + p->event = ev_new(p->p.pool); + p->event->hook = static_announce_marked; + p->event->data = p; - default: - return 1; - } + BUFFER_INIT(p->marked, p->p.pool, 4); + + /* We have to go UP before routes could be installed */ + proto_notify_state(P, PS_UP); + + WALK_LIST(r, cf->routes) + static_add_rte(p, r); + + return PS_UP; } -static inline int -static_same_rte(struct static_route *x, struct static_route *y) +static int +static_shutdown(struct proto *P) { - /* Note that i_same() requires arguments in (new, old) order */ - return static_same_dest(x, y) && i_same(y->cmds, x->cmds); -} + struct static_proto *p = (void *) P; + struct static_config *cf = (void *) P->cf; + struct static_route *r; + /* Just reset the flag, the routes will be flushed by the nest */ + WALK_LIST(r, cf->routes) + static_reset_rte(p, r); + + return PS_DOWN; +} static void -static_match(struct proto *p, struct static_route *r, struct static_config *n) +static_cleanup(struct proto *P) { - struct static_route *t; - - /* - * For given old route *r we find whether a route to the same - * network is also in the new route list. In that case, we keep the - * route and possibly update the route later if destination changed. - * Otherwise, we remove the route. - */ + struct static_proto *p = (void *) P; - if (r->neigh) - r->neigh->data = NULL; + if (p->igp_table_ip4) + rt_unlock_table(p->igp_table_ip4); - WALK_LIST(t, n->iface_routes) - if (static_same_net(r, t)) - goto found; + if (p->igp_table_ip6) + rt_unlock_table(p->igp_table_ip6); +} - WALK_LIST(t, n->other_routes) - if (static_same_net(r, t)) - goto found; +static void +static_dump_rte(struct static_route *r) +{ + debug("%-1N: ", r->net); + if (r->dest == RTD_UNICAST) + if (r->iface && ipa_zero(r->via)) + debug("dev %s\n", r->iface->name); + else + debug("via %I%J\n", r->via, r->iface); + else + debug("rtd %d\n", r->dest); +} - static_remove(p, r); - return; +static void +static_dump(struct proto *P) +{ + struct static_config *c = (void *) P->cf; + struct static_route *r; - found: - /* If destination is different, force reinstall */ - if ((r->installed > 0) && !static_same_rte(r, t)) - t->installed = -1; - else - t->installed = r->installed; + debug("Static routes:\n"); + WALK_LIST(r, c->routes) + static_dump_rte(r); } -static inline rtable * -cf_igp_table(struct static_config *cf) +#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL ) + +static inline int +static_cmp_rte(const void *X, const void *Y) { - return cf->igp_table ? cf->igp_table->table : NULL; + struct static_route *x = *(void **)X, *y = *(void **)Y; + return net_compare(x->net, y->net); } static int -static_reconfigure(struct proto *p, struct proto_config *new) +static_reconfigure(struct proto *P, struct proto_config *CF) { - struct static_config *o = (void *) p->cf; - struct static_config *n = (void *) new; - struct static_route *r; + struct static_proto *p = (void *) P; + struct static_config *o = (void *) P->cf; + struct static_config *n = (void *) CF; + struct static_route *r, *r2, *or, *nr; + + /* Check change in IGP tables */ + if ((IGP_TABLE(o, ip4) != IGP_TABLE(n, ip4)) || + (IGP_TABLE(o, ip6) != IGP_TABLE(n, ip6))) + return 0; - if (cf_igp_table(o) != cf_igp_table(n)) + if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF))) return 0; - /* Delete all obsolete routes and reset neighbor entries */ - WALK_LIST(r, o->iface_routes) - static_match(p, r, n); - WALK_LIST(r, o->other_routes) - static_match(p, r, n); + p->p.cf = CF; - /* Now add all new routes, those not changed will be ignored by static_install() */ - WALK_LIST(r, n->iface_routes) - { - struct iface *ifa; - if ((ifa = if_find_by_name(r->if_name)) && (ifa->flags & IF_UP)) - static_install(p, r, ifa); - } - WALK_LIST(r, n->other_routes) - static_add(p, n, r); + /* Reset route lists in neighbor entries */ + WALK_LIST(r, o->routes) + for (r2 = r; r2; r2 = r2->mp_next) + if (r2->neigh) + r2->neigh->data = NULL; - WALK_LIST(r, o->other_routes) - static_rte_cleanup(p, r); + /* Reconfigure initial matching sequence */ + for (or = HEAD(o->routes), nr = HEAD(n->routes); + NODE_VALID(or) && NODE_VALID(nr) && net_equal(or->net, nr->net); + or = NODE_NEXT(or), nr = NODE_NEXT(nr)) + static_reconfigure_rte(p, or, nr); - return 1; -} + if (!NODE_VALID(or) && !NODE_VALID(nr)) + return 1; -static void -static_copy_routes(list *dlst, list *slst) -{ - struct static_route *dr, *sr; + /* Reconfigure remaining routes, sort them to find matching pairs */ + struct static_route *or2, *nr2, **orbuf, **nrbuf; + uint ornum = 0, nrnum = 0, orpos = 0, nrpos = 0, i; - init_list(dlst); - WALK_LIST(sr, *slst) - { - /* copy one route */ - dr = cfg_alloc(sizeof(struct static_route)); - memcpy(dr, sr, sizeof(struct static_route)); - - /* This fn is supposed to be called on fresh src routes, which have 'live' - fields (like .chain, .neigh or .installed) zero, so no need to zero them */ - - /* We need to copy multipath chain, because there are backptrs in 'if_name' */ - if (dr->dest == RTD_MULTIPATH) - { - struct static_route *md, *ms, **mp_last; - - mp_last = &(dr->mp_next); - for (ms = sr->mp_next; ms; ms = ms->mp_next) - { - md = cfg_alloc(sizeof(struct static_route)); - memcpy(md, ms, sizeof(struct static_route)); - md->if_name = (void *) dr; /* really */ - - *mp_last = md; - mp_last = &(md->mp_next); - } - *mp_last = NULL; - } - - add_tail(dlst, (node *) dr); - } + for (or2 = or; NODE_VALID(or2); or2 = NODE_NEXT(or2)) + ornum++; + + for (nr2 = nr; NODE_VALID(nr2); nr2 = NODE_NEXT(nr2)) + nrnum++; + + orbuf = xmalloc(ornum * sizeof(void *)); + nrbuf = xmalloc(nrnum * sizeof(void *)); + + for (i = 0, or2 = or; i < ornum; i++, or2 = NODE_NEXT(or2)) + orbuf[i] = or2; + + for (i = 0, nr2 = nr; i < nrnum; i++, nr2 = NODE_NEXT(nr2)) + nrbuf[i] = nr2; + + qsort(orbuf, ornum, sizeof(struct static_route *), static_cmp_rte); + qsort(nrbuf, nrnum, sizeof(struct static_route *), static_cmp_rte); + + while ((orpos < ornum) && (nrpos < nrnum)) + { + int x = net_compare(orbuf[orpos]->net, nrbuf[nrpos]->net); + if (x < 0) + static_remove_rte(p, orbuf[orpos++]); + else if (x > 0) + static_add_rte(p, nrbuf[nrpos++]); + else + static_reconfigure_rte(p, orbuf[orpos++], nrbuf[nrpos++]); + } + + while (orpos < ornum) + static_remove_rte(p, orbuf[orpos++]); + + while (nrpos < nrnum) + static_add_rte(p, nrbuf[nrpos++]); + + xfree(orbuf); + xfree(nrbuf); + + return 1; } static void @@ -618,53 +579,66 @@ static_copy_config(struct proto_config *dest, struct proto_config *src) struct static_config *d = (struct static_config *) dest; struct static_config *s = (struct static_config *) src; - /* Shallow copy of everything */ - proto_copy_rest(dest, src, sizeof(struct static_config)); + struct static_route *srt, *snh; - /* Copy route lists */ - static_copy_routes(&d->iface_routes, &s->iface_routes); - static_copy_routes(&d->other_routes, &s->other_routes); -} + /* Copy route list */ + init_list(&d->routes); + WALK_LIST(srt, s->routes) + { + struct static_route *drt = NULL, *dnh = NULL, **dnp = &drt; + for (snh = srt; snh; snh = snh->mp_next) + { + dnh = cfg_alloc(sizeof(struct static_route)); + memcpy(dnh, snh, sizeof(struct static_route)); -struct protocol proto_static = { - .name = "Static", - .template = "static%d", - .preference = DEF_PREF_STATIC, - .config_size = sizeof(struct static_config), - .init = static_init, - .dump = static_dump, - .start = static_start, - .shutdown = static_shutdown, - .cleanup = static_cleanup, - .reconfigure = static_reconfigure, - .copy_config = static_copy_config -}; + if (!drt) + add_tail(&d->routes, &(dnh->n)); + + *dnp = dnh; + dnp = &(dnh->mp_next); + + if (snh->mp_head) + dnh->mp_head = drt; + } + } +} static void static_show_rt(struct static_route *r) { - byte via[STD_ADDRESS_P_LENGTH + 16]; - switch (r->dest) + { + case RTD_UNICAST: + { + struct static_route *r2; + + cli_msg(-1009, "%N", r->net); + for (r2 = r; r2; r2 = r2->mp_next) { - case RTD_ROUTER: bsprintf(via, "via %I%J", r->via, r->via_if); break; - case RTD_DEVICE: bsprintf(via, "dev %s", r->if_name); break; - case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break; - case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break; - case RTD_PROHIBIT: bsprintf(via, "prohibited"); break; - case RTD_MULTIPATH: bsprintf(via, "multipath"); break; - case RTDX_RECURSIVE: bsprintf(via, "recursive %I", r->via); break; - default: bsprintf(via, "???"); + if (r2->iface && ipa_zero(r2->via)) + cli_msg(-1009, "\tdev %s%s", r2->iface->name, + r2->active ? "" : " (dormant)"); + else + cli_msg(-1009, "\tvia %I%J%s%s%s", r2->via, r2->iface, + r2->onlink ? " onlink" : "", + r2->bfd_req ? " (bfd)" : "", + r2->active ? "" : " (dormant)"); } - cli_msg(-1009, "%I/%d %s%s%s", r->net, r->masklen, via, - r->bfd_req ? " (bfd)" : "", r->installed ? "" : " (dormant)"); + break; + } - struct static_route *r2; - if (r->dest == RTD_MULTIPATH) - for (r2 = r->mp_next; r2; r2 = r2->mp_next) - cli_msg(-1009, "\tvia %I%J weight %d%s%s", r2->via, r2->via_if, r2->masklen + 1, /* really */ - r2->bfd_req ? " (bfd)" : "", r2->installed ? "" : " (dormant)"); + case RTD_NONE: + case RTD_BLACKHOLE: + case RTD_UNREACHABLE: + case RTD_PROHIBIT: + cli_msg(-1009, "%N\t%s", r->net, rta_dest_names[r->dest]); + break; + + case RTDX_RECURSIVE: + cli_msg(-1009, "%N\trecursive %I", r->net, r->via); + break; + } } void @@ -673,9 +647,25 @@ static_show(struct proto *P) struct static_config *c = (void *) P->cf; struct static_route *r; - WALK_LIST(r, c->other_routes) - static_show_rt(r); - WALK_LIST(r, c->iface_routes) + WALK_LIST(r, c->routes) static_show_rt(r); cli_msg(0, ""); } + + +struct protocol proto_static = { + .name = "Static", + .template = "static%d", + .preference = DEF_PREF_STATIC, + .channel_mask = NB_ANY, + .proto_size = sizeof(struct static_proto), + .config_size = sizeof(struct static_config), + .postconfig = static_postconfig, + .init = static_init, + .dump = static_dump, + .start = static_start, + .shutdown = static_shutdown, + .cleanup = static_cleanup, + .reconfigure = static_reconfigure, + .copy_config = static_copy_config +}; diff --git a/proto/static/static.h b/proto/static/static.h index 6b047234..a3c30b87 100644 --- a/proto/static/static.h +++ b/proto/static/static.h @@ -11,41 +11,61 @@ #include "nest/route.h" #include "nest/bfd.h" +#include "lib/buffer.h" struct static_config { struct proto_config c; - list iface_routes; /* Routes to search on interface events */ - list other_routes; /* Routes hooked to neighbor cache and reject routes */ + list routes; /* List of static routes (struct static_route) */ int check_link; /* Whether iface link state is used */ - struct rtable_config *igp_table; /* Table used for recursive next hop lookups */ + struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ + struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ }; +struct static_proto { + struct proto p; -void static_init_config(struct static_config *); + struct event *event; /* Event for announcing updated routes */ + BUFFER_(struct static_route *) marked; /* Routes marked for reannouncement */ + rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ + rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ +}; struct static_route { node n; - struct static_route *chain; /* Next for the same neighbor */ - ip_addr net; /* Network we route */ - int masklen; /* Mask length */ - int dest; /* Destination type (RTD_*) */ + net_addr *net; /* Network we route */ ip_addr via; /* Destination router */ - struct iface *via_if; /* Destination iface, for link-local vias */ - struct neighbor *neigh; - byte *if_name; /* Name for RTD_DEVICE routes */ - struct static_route *mp_next; /* Nexthops for RTD_MULTIPATH routes */ + struct iface *iface; /* Destination iface, for link-local vias or device routes */ + struct neighbor *neigh; /* Associated neighbor entry */ + struct static_route *chain; /* Next for the same neighbor */ + struct static_route *mp_head; /* First nexthop of this route */ + struct static_route *mp_next; /* Nexthops for multipath routes */ struct f_inst *cmds; /* List of commands for setting attributes */ - int installed; /* Installed in rt table, -1 for reinstall */ - int use_bfd; /* Configured to use BFD */ + byte dest; /* Destination type (RTD_*) */ + byte state; /* State of route announcement (SRS_*) */ + byte active; /* Next hop is active (nbr/iface/BFD available) */ + byte onlink; /* Gateway is onlink regardless of IP ranges */ + byte weight; /* Multipath next hop weight */ + byte use_bfd; /* Configured to use BFD */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ + mpls_label_stack *mls; /* MPLS label stack; may be NULL */ }; -/* Dummy nodes (parts of multipath route) abuses masklen field for weight - and if_name field for a ptr to the master (RTD_MULTIPATH) node. */ - +/* + * Note that data fields neigh, chain, state, active and bfd_req are runtime + * data, not real configuration data. Must be handled carefully. + * + * Regular (i.e. dest == RTD_UNICAST) routes use static_route structure for + * additional next hops (fields mp_head, mp_next). Note that 'state' is for + * whole route, while 'active' is for each next hop. Also note that fields + * mp_head, mp_next, active are zero for other kinds of routes. + */ #define RTDX_RECURSIVE 0x7f /* Phony dest value for recursive routes */ +#define SRS_DOWN 0 /* Route is not announced */ +#define SRS_CLEAN 1 /* Route is active and announced */ +#define SRS_DIRTY 2 /* Route changed since announcement */ + void static_show(struct proto *); #endif diff --git a/sysdep/bsd/Makefile b/sysdep/bsd/Makefile new file mode 100644 index 00000000..dfa32747 --- /dev/null +++ b/sysdep/bsd/Makefile @@ -0,0 +1,6 @@ +src := krt-sock.c +obj := $(src-o-files) +$(all-daemon) +$(conf-y-targets): $(s)krt-sock.Y + +tests_objs := $(tests_objs) $(src-o-files) diff --git a/sysdep/bsd/Modules b/sysdep/bsd/Modules deleted file mode 100644 index 39db88e9..00000000 --- a/sysdep/bsd/Modules +++ /dev/null @@ -1,5 +0,0 @@ -krt-sock.c -krt-sock.Y -krt-sys.h -sysio.h -setkey.h diff --git a/sysdep/bsd/krt-sock.Y b/sysdep/bsd/krt-sock.Y index 0218f188..81422c79 100644 --- a/sysdep/bsd/krt-sock.Y +++ b/sysdep/bsd/krt-sock.Y @@ -20,8 +20,8 @@ kern_sys_item: KERNEL TABLE expr { if ($3 && (krt_max_tables == 1)) cf_error("Multiple kernel routing tables not supported"); - if ($3 < 0 || $3 >= krt_max_tables) - cf_error("Kernel table id must be in range 0-%d", krt_max_tables - 1); + if ($3 >= krt_max_tables) + cf_error("Kernel table id must be in range 0-%u", krt_max_tables - 1); THIS_KRT->sys.table_id = $3; } diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index f0cebd11..e56dd616 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -28,12 +28,12 @@ #include "nest/route.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "lib/timer.h" -#include "lib/unix.h" -#include "lib/krt.h" +#include "sysdep/unix/unix.h" +#include "sysdep/unix/krt.h" #include "lib/string.h" #include "lib/socket.h" +const int rt_default_ecmp = 0; /* * There are significant differences in multiple tables support between BSD variants. @@ -72,14 +72,13 @@ #endif - /* Dynamic max number of tables */ -int krt_max_tables; +uint krt_max_tables; #ifdef KRT_USE_SYSCTL_NET_FIBS -static int +static uint krt_get_max_tables(void) { int fibs; @@ -91,7 +90,11 @@ krt_get_max_tables(void) return 1; } - return MIN(fibs, KRT_MAX_TABLES); + /* Should not happen */ + if (fibs < 1) + return 1; + + return (uint) MIN(fibs, KRT_MAX_TABLES); } #else @@ -136,7 +139,7 @@ extern int setfib(int fib); /* table_id -> krt_proto map */ #ifdef KRT_SHARED_SOCKET -static struct krt_proto *krt_table_map[KRT_MAX_TABLES]; +static struct krt_proto *krt_table_map[KRT_MAX_TABLES][2]; #endif @@ -148,9 +151,7 @@ krt_capable(rte *e) rta *a = e->attrs; return - a->cast == RTC_UNICAST && - (a->dest == RTD_ROUTER - || a->dest == RTD_DEVICE + ((a->dest == RTD_UNICAST && !a->nh.next) /* No multipath support */ #ifdef RTF_REJECT || a->dest == RTD_UNREACHABLE #endif @@ -185,18 +186,27 @@ struct ks_msg memcpy(p, body, (l > sizeof(*p) ? sizeof(*p) : l));\ body += l;} +static inline void +sockaddr_fill_dl(struct sockaddr_dl *sa, struct iface *ifa) +{ + uint len = OFFSETOF(struct sockaddr_dl, sdl_data); + memset(sa, 0, len); + sa->sdl_len = len; + sa->sdl_family = AF_LINK; + sa->sdl_index = ifa->index; +} + static int krt_send_route(struct krt_proto *p, int cmd, rte *e) { net *net = e->net; rta *a = e->attrs; static int msg_seq; - struct iface *j, *i = a->iface; + struct iface *j, *i = a->nh.iface; int l; struct ks_msg msg; char *body = (char *)msg.buf; sockaddr gate, mask, dst; - ip_addr gw; DBG("krt-sock: send %I/%d via %I\n", net->n.prefix, net->n.pxlen, a->gw); @@ -207,7 +217,8 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) msg.rtm.rtm_addrs = RTA_DST; msg.rtm.rtm_flags = RTF_UP | RTF_PROTO1; - if (net->n.pxlen == MAX_PREFIX_LENGTH) + /* XXXX */ + if (net_pxlen(net->n.addr) == net_max_prefix_length[net->n.addr->type]) msg.rtm.rtm_flags |= RTF_HOST; else msg.rtm.rtm_addrs |= RTA_NETMASK; @@ -225,14 +236,12 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) msg.rtm.rtm_flags |= RTF_BLACKHOLE; #endif - /* This is really very nasty, but I'm not able - * to add "(reject|blackhole)" route without - * gateway set + /* + * This is really very nasty, but I'm not able to add reject/blackhole route + * without gateway address. */ - if(!i) + if (!i) { - i = HEAD(iface_list); - WALK_LIST(j, iface_list) { if (j->flags & IF_LOOPBACK) @@ -241,52 +250,83 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) break; } } + + if (!i) + { + log(L_ERR "KRT: Cannot find loopback iface"); + return -1; + } } - gw = a->gw; + int af = AF_UNSPEC; -#ifdef IPV6 - /* Embed interface ID to link-local address */ - if (ipa_is_link_local(gw)) - _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff); -#endif + switch (net->n.addr->type) { + case NET_IP4: + af = AF_INET; + break; + case NET_IP6: + af = AF_INET6; + break; + default: + log(L_ERR "KRT: Not sending route %N to kernel", net->n.addr); + return -1; + } - sockaddr_fill(&dst, BIRD_AF, net->n.prefix, NULL, 0); - sockaddr_fill(&mask, BIRD_AF, ipa_mkmask(net->n.pxlen), NULL, 0); - sockaddr_fill(&gate, BIRD_AF, gw, NULL, 0); + sockaddr_fill(&dst, af, net_prefix(net->n.addr), NULL, 0); + sockaddr_fill(&mask, af, net_pxmask(net->n.addr), NULL, 0); switch (a->dest) { - case RTD_ROUTER: + case RTD_UNICAST: + if (ipa_nonzero(a->nh.gw)) + { + ip_addr gw = a->nh.gw; + + /* Embed interface ID to link-local address */ + if (ipa_is_link_local(gw)) + _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff); + + sockaddr_fill(&gate, af, gw, NULL, 0); msg.rtm.rtm_flags |= RTF_GATEWAY; msg.rtm.rtm_addrs |= RTA_GATEWAY; break; + } #ifdef RTF_REJECT - case RTD_UNREACHABLE: + case RTD_UNREACHABLE: #endif #ifdef RTF_BLACKHOLE - case RTD_BLACKHOLE: + case RTD_BLACKHOLE: #endif - case RTD_DEVICE: - if(i) - { -#ifdef RTF_CLONING - if (cmd == RTM_ADD && (i->flags & IF_MULTIACCESS) != IF_MULTIACCESS) /* PTP */ - msg.rtm.rtm_flags |= RTF_CLONING; + { + /* Fallback for all other valid cases */ + +#if __OpenBSD__ + /* Keeping temporarily old code for OpenBSD */ + struct ifa *addr = (net->n.addr->type == NET_IP4) ? i->addr4 : (i->addr6 ?: i->llv6); + + if (!addr) + { + log(L_ERR "KRT: interface %s has no IP addess", i->name); + return -1; + } + + /* Embed interface ID to link-local address */ + ip_addr gw = addr->ip; + if (ipa_is_link_local(gw)) + _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff); + + sockaddr_fill(&gate, af, gw, i, 0); +#else + sockaddr_fill_dl(&gate, i); #endif - if(!i->addr) { - log(L_ERR "KRT: interface %s has no IP addess", i->name); - return -1; - } + msg.rtm.rtm_addrs |= RTA_GATEWAY; + break; + } - sockaddr_fill(&gate, BIRD_AF, i->addr->ip, NULL, 0); - msg.rtm.rtm_addrs |= RTA_GATEWAY; - } - break; - default: - bug("krt-sock: unknown flags, but not filtered"); + default: + bug("krt-sock: unknown flags, but not filtered"); } msg.rtm.rtm_index = i->index; @@ -299,7 +339,7 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) msg.rtm.rtm_msglen = l; if ((l = write(p->sys.sk->fd, (char *)&msg, l)) < 0) { - log(L_ERR "KRT: Error sending route %I/%d to kernel: %m", net->n.prefix, net->n.pxlen); + log(L_ERR "KRT: Error sending route %N to kernel: %m", net->n.addr); return -1; } @@ -331,10 +371,12 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) { /* p is NULL iff KRT_SHARED_SOCKET and !scan */ + int ipv6; rte *e; net *net; sockaddr dst, gate, mask; ip_addr idst, igate, imask; + net_addr ndst; void *body = (char *)msg->buf; int new = (msg->rtm.rtm_type != RTM_DELETE); char *errmsg = "KRT: Invalid route received"; @@ -352,42 +394,64 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) if (flags & RTF_LLINFO) SKIP("link-local\n"); -#ifdef KRT_SHARED_SOCKET - if (!scan) - { - int table_id = msg->rtm.rtm_tableid; - p = (table_id < KRT_MAX_TABLES) ? krt_table_map[table_id] : NULL; - - if (!p) - SKIP("unknown table id %d\n", table_id); - } -#endif - GETADDR(&dst, RTA_DST); GETADDR(&gate, RTA_GATEWAY); GETADDR(&mask, RTA_NETMASK); - if (dst.sa.sa_family != BIRD_AF) - SKIP("invalid DST"); + switch (dst.sa.sa_family) { + case AF_INET: + ipv6 = 0; + break; + case AF_INET6: + ipv6 = 1; + break; + default: + SKIP("invalid DST"); + } + + /* We do not test family for RTA_NETMASK, because BSD sends us + some strange values, but interpreting them as IPv4/IPv6 works */ + mask.sa.sa_family = dst.sa.sa_family; idst = ipa_from_sa(&dst); imask = ipa_from_sa(&mask); - igate = (gate.sa.sa_family == BIRD_AF) ? ipa_from_sa(&gate) : IPA_NONE; + igate = (gate.sa.sa_family == dst.sa.sa_family) ? ipa_from_sa(&gate) : IPA_NONE; - /* We do not test family for RTA_NETMASK, because BSD sends us - some strange values, but interpreting them as IPv4/IPv6 works */ +#ifdef KRT_SHARED_SOCKET + if (!scan) + { + int table_id = msg->rtm.rtm_tableid; + p = (table_id < KRT_MAX_TABLES) ? krt_table_map[table_id][ipv6] : NULL; + if (!p) + SKIP("unknown table id %d\n", table_id); + } +#endif + if ((!ipv6) && (p->p.main_channel->table->addr_type != NET_IP4)) + SKIP("reading only IPv4 routes"); + if ( ipv6 && (p->p.main_channel->table->addr_type != NET_IP6)) + SKIP("reading only IPv6 routes"); int c = ipa_classify_net(idst); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) SKIP("strange class/scope\n"); - int pxlen = (flags & RTF_HOST) ? MAX_PREFIX_LENGTH : ipa_masklen(imask); + int pxlen; + if (ipv6) + pxlen = (flags & RTF_HOST) ? IP6_MAX_PREFIX_LENGTH : ip6_masklen(&ipa_to_ip6(imask)); + else + pxlen = (flags & RTF_HOST) ? IP4_MAX_PREFIX_LENGTH : ip4_masklen(ipa_to_ip4(imask)); + if (pxlen < 0) { log(L_ERR "%s (%I) - netmask %I", errmsg, idst, imask); return; } + if (ipv6) + net_fill_ip6(&ndst, ipa_to_ip6(idst), pxlen); + else + net_fill_ip4(&ndst, ipa_to_ip4(idst), pxlen); + if ((flags & RTF_GATEWAY) && ipa_zero(igate)) - { log(L_ERR "%s (%I/%d) - missing gateway", errmsg, idst, pxlen); return; } + { log(L_ERR "%s (%N) - missing gateway", errmsg, ndst); return; } u32 self_mask = RTF_PROTO1; u32 alien_mask = RTF_STATIC | RTF_PROTO1 | RTF_GATEWAY; @@ -426,13 +490,12 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) else src = KRT_SRC_KERNEL; - net = net_get(p->p.table, idst, pxlen); + net = net_get(p->p.main_channel->table, &ndst); rta a = { .src = p->p.main_source, .source = RTS_INHERIT, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST }; /* reject/blackhole routes have also set RTF_GATEWAY, @@ -452,41 +515,37 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) } #endif - a.iface = if_find_by_index(msg->rtm.rtm_index); - if (!a.iface) + a.nh.iface = if_find_by_index(msg->rtm.rtm_index); + if (!a.nh.iface) { - log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u", - net->n.prefix, net->n.pxlen, msg->rtm.rtm_index); + log(L_ERR "KRT: Received route %N with unknown ifindex %u", + net->n.addr, msg->rtm.rtm_index); return; } + a.dest = RTD_UNICAST; if (flags & RTF_GATEWAY) { neighbor *ng; - a.dest = RTD_ROUTER; - a.gw = igate; + a.nh.gw = igate; -#ifdef IPV6 /* Clean up embedded interface ID returned in link-local address */ - if (ipa_is_link_local(a.gw)) - _I0(a.gw) = 0xfe800000; -#endif + if (ipa_is_link_local(a.nh.gw)) + _I0(a.nh.gw) = 0xfe800000; - ng = neigh_find2(&p->p, &a.gw, a.iface, 0); + ng = neigh_find2(&p->p, &a.nh.gw, a.nh.iface, 0); if (!ng || (ng->scope == SCOPE_HOST)) { /* Ignore routes with next-hop 127.0.0.1, host routes with such next-hop appear on OpenBSD for address aliases. */ - if (ipa_classify(a.gw) == (IADDR_HOST | SCOPE_HOST)) + if (ipa_classify(a.nh.gw) == (IADDR_HOST | SCOPE_HOST)) return; - log(L_ERR "KRT: Received route %I/%d with strange next-hop %I", - net->n.prefix, net->n.pxlen, a.gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", + net->n.addr, a.nh.gw); return; } } - else - a.dest = RTD_DEVICE; done: e = rte_get_temp(&a); @@ -643,22 +702,28 @@ krt_read_addr(struct ks_msg *msg, int scan) GETADDR (&null, RTA_AUTHOR); GETADDR (&brd, RTA_BRD); - /* Some other family address */ - if (addr.sa.sa_family != BIRD_AF) - return; + /* Is addr family IP4 or IP6? */ + int ipv6; + switch (addr.sa.sa_family) { + case AF_INET: ipv6 = 0; break; + case AF_INET6: ipv6 = 1; break; + default: return; + } + + /* We do not test family for RTA_NETMASK, because BSD sends us + some strange values, but interpreting them as IPv4/IPv6 works */ + mask.sa.sa_family = addr.sa.sa_family; iaddr = ipa_from_sa(&addr); imask = ipa_from_sa(&mask); ibrd = ipa_from_sa(&brd); - - if ((masklen = ipa_masklen(imask)) < 0) + if ((ipv6 ? (masklen = ip6_masklen(&ipa_to_ip6(imask))) : (masklen = ip4_masklen(ipa_to_ip4(imask)))) < 0) { - log(L_ERR "KIF: Invalid masklen %I for %s", imask, iface->name); + log(L_ERR "KIF: Invalid mask %I for %s", imask, iface->name); return; } -#ifdef IPV6 /* Clean up embedded interface ID returned in link-local address */ if (ipa_is_link_local(iaddr)) @@ -666,13 +731,11 @@ krt_read_addr(struct ks_msg *msg, int scan) if (ipa_is_link_local(ibrd)) _I0(ibrd) = 0xfe800000; -#endif bzero(&ifa, sizeof(ifa)); ifa.iface = iface; ifa.ip = iaddr; - ifa.pxlen = masklen; scope = ipa_classify(ifa.ip); if (scope < 0) @@ -682,17 +745,16 @@ krt_read_addr(struct ks_msg *msg, int scan) } ifa.scope = scope & IADDR_SCOPE_MASK; - if (masklen < BITS_PER_IP_ADDRESS) + if (masklen < (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH)) { - ifa.prefix = ipa_and(ifa.ip, ipa_mkmask(masklen)); + net_fill_ipa(&ifa.prefix, ifa.ip, masklen); + net_normalize(&ifa.prefix); - if (masklen == (BITS_PER_IP_ADDRESS - 1)) + if (masklen == ((ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH) - 1)) ifa.opposite = ipa_opposite_m1(ifa.ip); -#ifndef IPV6 - if (masklen == (BITS_PER_IP_ADDRESS - 2)) + if ((!ipv6) && (masklen == IP4_MAX_PREFIX_LENGTH - 2)) ifa.opposite = ipa_opposite_m2(ifa.ip); -#endif if (iface->flags & IF_BROADCAST) ifa.brd = ibrd; @@ -702,12 +764,13 @@ krt_read_addr(struct ks_msg *msg, int scan) } else if (!(iface->flags & IF_MULTIACCESS) && ipa_nonzero(ibrd)) { - ifa.prefix = ifa.opposite = ibrd; + net_fill_ipa(&ifa.prefix, ibrd, (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH)); + ifa.opposite = ibrd; ifa.flags |= IA_PEER; } else { - ifa.prefix = ifa.ip; + net_fill_ipa(&ifa.prefix, ifa.ip, (ipv6 ? IP6_MAX_PREFIX_LENGTH : IP4_MAX_PREFIX_LENGTH)); ifa.flags |= IA_HOST; } @@ -804,7 +867,7 @@ krt_sysctl_scan(struct proto *p, int cmd, int table_id) mib[0] = CTL_NET; mib[1] = PF_ROUTE; mib[2] = 0; - mib[3] = BIRD_AF; + mib[3] = 0; // Set AF to 0 for all available families mib[4] = cmd; mib[5] = 0; mcnt = 6; @@ -948,6 +1011,7 @@ krt_sock_open(pool *pool, void *data, int table_id UNUSED) return sk; } +static u32 krt_table_cf[(KRT_MAX_TABLES+31) / 32][2]; #ifdef KRT_SHARED_SOCKET @@ -979,7 +1043,17 @@ krt_sock_close_shared(void) int krt_sys_start(struct krt_proto *p) { - krt_table_map[KRT_CF->sys.table_id] = p; + int id = KRT_CF->sys.table_id; + + if (krt_table_cf[id/32][!!(p->af == AF_INET6)] & (1 << (id%32))) + { + log(L_ERR "%s: Multiple kernel syncers defined for table #%d", p->p.name, id); + return 0; + } + + krt_table_cf[id/32][!!(p->af == AF_INET6)] |= (1 << (id%32)); + + krt_table_map[KRT_CF->sys.table_id][!!(p->af == AF_INET6)] = p; krt_sock_open_shared(); p->sys.sk = krt_sock; @@ -990,10 +1064,12 @@ krt_sys_start(struct krt_proto *p) void krt_sys_shutdown(struct krt_proto *p) { + krt_table_cf[(KRT_CF->sys.table_id)/32][!!(p->af == AF_INET6)] &= ~(1 << ((KRT_CF->sys.table_id)%32)); + krt_sock_close_shared(); p->sys.sk = NULL; - krt_table_map[KRT_CF->sys.table_id] = NULL; + krt_table_map[KRT_CF->sys.table_id][!!(p->af == AF_INET6)] = NULL; krt_buffer_release(&p->p); } @@ -1003,6 +1079,16 @@ krt_sys_shutdown(struct krt_proto *p) int krt_sys_start(struct krt_proto *p) { + int id = KRT_CF->sys.table_id; + + if (krt_table_cf[id/32][!!(p->af == AF_INET6)] & (1 << (id%32))) + { + log(L_ERR "%s: Multiple kernel syncers defined for table #%d", p->p.name, id); + return 0; + } + + krt_table_cf[id/32][!!(p->af == AF_INET6)] |= (1 << (id%32)); + p->sys.sk = krt_sock_open(p->p.pool, p, KRT_CF->sys.table_id); return 1; } @@ -1010,6 +1096,8 @@ krt_sys_start(struct krt_proto *p) void krt_sys_shutdown(struct krt_proto *p) { + krt_table_cf[(KRT_CF->sys.table_id)/32][!!(p->af == AF_INET6)] &= ~(1 << ((KRT_CF->sys.table_id)%32)); + rfree(p->sys.sk); p->sys.sk = NULL; @@ -1021,8 +1109,6 @@ krt_sys_shutdown(struct krt_proto *p) /* KRT configuration callbacks */ -static u32 krt_table_cf[(KRT_MAX_TABLES+31) / 32]; - int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o) { @@ -1036,18 +1122,6 @@ krt_sys_preconfig(struct config *c UNUSED) bzero(&krt_table_cf, sizeof(krt_table_cf)); } -void -krt_sys_postconfig(struct krt_config *x) -{ - u32 *tbl = krt_table_cf; - int id = x->sys.table_id; - - if (tbl[id/32] & (1 << (id%32))) - cf_error("Multiple kernel syncers defined for table #%d", id); - - tbl[id/32] |= (1 << (id%32)); -} - void krt_sys_init_config(struct krt_config *c) { c->sys.table_id = 0; /* Default table */ @@ -1072,13 +1146,11 @@ kif_sys_shutdown(struct kif_proto *p) krt_buffer_release(&p->p); } - -struct ifa * -kif_get_primary_ip(struct iface *i UNUSED6) +int +kif_update_sysdep_addr(struct iface *i) { -#ifndef IPV6 static int fd = -1; - + if (fd < 0) fd = socket(AF_INET, SOCK_DGRAM, 0); @@ -1088,20 +1160,10 @@ kif_get_primary_ip(struct iface *i UNUSED6) int rv = ioctl(fd, SIOCGIFADDR, (char *) &ifr); if (rv < 0) - return NULL; - - ip_addr addr; - struct sockaddr_in *sin = (struct sockaddr_in *) &ifr.ifr_addr; - memcpy(&addr, &sin->sin_addr.s_addr, sizeof(ip_addr)); - ipa_ntoh(addr); + return 0; - struct ifa *a; - WALK_LIST(a, i->addrs) - { - if (ipa_equal(a->ip, addr)) - return a; - } -#endif + ip4_addr old = i->sysdep; + i->sysdep = ipa_to_ip4(ipa_from_sa4(&ifr.ifr_addr)); - return NULL; + return !ip4_equal(i->sysdep, old); } diff --git a/sysdep/bsd/krt-sys.h b/sysdep/bsd/krt-sys.h index 353ffcec..aa6cc72e 100644 --- a/sysdep/bsd/krt-sys.h +++ b/sysdep/bsd/krt-sys.h @@ -31,7 +31,7 @@ static inline void kif_sys_copy_config(struct kif_config *d UNUSED, struct kif_c /* Kernel routes */ -extern int krt_max_tables; +extern uint krt_max_tables; struct krt_params { int table_id; /* Kernel table ID we sync with */ @@ -44,8 +44,9 @@ struct krt_state { static inline void krt_sys_io_init(void) { } static inline void krt_sys_init(struct krt_proto *p UNUSED) { } +static inline void krt_sys_postconfig(struct krt_config *x UNUSED) { } -static inline int krt_sys_get_attr(eattr *a UNUSED, byte *buf UNUSED, int buflen UNUSED) { return 0; } +static inline int krt_sys_get_attr(eattr *a UNUSED, byte *buf UNUSED, int buflen UNUSED) { return GA_UNKNOWN; } #endif diff --git a/sysdep/bsd/setkey.h b/sysdep/bsd/setkey.h index b417faca..3bcd8623 100644 --- a/sysdep/bsd/setkey.h +++ b/sysdep/bsd/setkey.h @@ -11,7 +11,7 @@ #include <netipsec/ipsec.h> #include "nest/bird.h" -#include "lib/unix.h" +#include "sysdep/unix/unix.h" /* @@ -63,7 +63,7 @@ setkey_send(struct sadb_msg *msg, uint len) * operations to implement replace. */ static int -setkey_md5(sockaddr *src, sockaddr *dst, char *passwd, uint type) +setkey_md5(sockaddr *src, sockaddr *dst, uint pxlen, char *passwd, uint type) { uint passwd_len = passwd ? strlen(passwd) : 0; @@ -122,7 +122,7 @@ setkey_md5(sockaddr *src, sockaddr *dst, char *passwd, uint type) saddr->sadb_address_len = PFKEY_UNIT64(len); saddr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; saddr->sadb_address_proto = IPSEC_ULPROTO_ANY; - saddr->sadb_address_prefixlen = MAX_PREFIX_LENGTH; + saddr->sadb_address_prefixlen = pxlen; memcpy(pos + sizeof(struct sadb_address), &src->sa, src->sa.sa_len); pos += len; @@ -132,7 +132,7 @@ setkey_md5(sockaddr *src, sockaddr *dst, char *passwd, uint type) daddr->sadb_address_len = PFKEY_UNIT64(len); daddr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; daddr->sadb_address_proto = IPSEC_ULPROTO_ANY; - daddr->sadb_address_prefixlen = MAX_PREFIX_LENGTH; + daddr->sadb_address_prefixlen = pxlen; memcpy(pos + sizeof(struct sadb_address), &dst->sa, dst->sa.sa_len); pos += len; @@ -152,18 +152,20 @@ sk_set_md5_in_sasp_db(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, sockaddr_fill(&src, s->af, local, ifa, 0); sockaddr_fill(&dst, s->af, remote, ifa, 0); + uint pxlen = (s->af == AF_INET) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; + if (passwd && *passwd) { int len = strlen(passwd); if (len > TCP_KEYLEN_MAX) ERR_MSG("The password for TCP MD5 Signature is too long"); - if (setkey_md5(&src, &dst, passwd, SADB_ADD) < 0) + if (setkey_md5(&src, &dst, pxlen, passwd, SADB_ADD) < 0) ERR_MSG("Cannot add TCP-MD5 password into the IPsec SA/SP database"); } else { - if (setkey_md5(&src, &dst, NULL, SADB_DELETE) < 0) + if (setkey_md5(&src, &dst, pxlen, NULL, SADB_DELETE) < 0) ERR_MSG("Cannot delete TCP-MD5 password from the IPsec SA/SP database"); } return 0; diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h index 9b10e6e8..68296e65 100644 --- a/sysdep/bsd/sysio.h +++ b/sysdep/bsd/sysio.h @@ -38,12 +38,12 @@ */ #define INIT_MREQ4(maddr,ifa) \ - { .imr_multiaddr = ipa_to_in4(maddr), .imr_interface = ipa_to_in4(ifa->addr->ip) } + { .imr_multiaddr = ipa_to_in4(maddr), .imr_interface = ip4_to_in4(ifa->sysdep) } static inline int sk_setup_multicast4(sock *s) { - struct in_addr ifa = ipa_to_in4(s->iface->addr->ip); + struct in_addr ifa = ip4_to_in4(s->iface->sysdep); u8 ttl = s->ttl; u8 n = 0; @@ -201,7 +201,7 @@ sk_prepare_ip_header(sock *s, void *hdr, int dlen) #if defined(__FreeBSD__) #define USE_MD5SIG_SETKEY -#include "lib/setkey.h" +#include "sysdep/bsd/setkey.h" #endif int diff --git a/sysdep/cf/bsd-v6.h b/sysdep/cf/bsd-v6.h deleted file mode 100644 index 745dfba3..00000000 --- a/sysdep/cf/bsd-v6.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Configuration for *BSD based systems (tested on FreeBSD and NetBSD) - * - * (c) 2004 Ondrej Filip <feela@network.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#define IPV6 - -#define CONFIG_AUTO_ROUTES -#define CONFIG_SELF_CONSCIOUS -#define CONFIG_MULTIPLE_TABLES -#define CONFIG_SINGLE_ROUTE - -#define CONFIG_SKIP_MC_BIND -#define CONFIG_NO_IFACE_BIND - -/* -Link: sysdep/unix -Link: sysdep/bsd - */ diff --git a/sysdep/cf/bsd.h b/sysdep/cf/bsd.h index 51beb42b..22c54277 100644 --- a/sysdep/cf/bsd.h +++ b/sysdep/cf/bsd.h @@ -15,6 +15,9 @@ #define CONFIG_NO_IFACE_BIND #define CONFIG_USE_HDRINCL +#define CONFIG_INCLUDE_SYSIO_H "sysdep/bsd/sysio.h" +#define CONFIG_INCLUDE_KRTSYS_H "sysdep/bsd/krt-sys.h" + /* Link: sysdep/unix Link: sysdep/bsd diff --git a/sysdep/cf/linux-v6.h b/sysdep/cf/linux-v6.h deleted file mode 100644 index 09f60377..00000000 --- a/sysdep/cf/linux-v6.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Configuration for Linux based systems running IPv6 - * - * (c) 1998--1999 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#define IPV6 - -#define CONFIG_AUTO_ROUTES -#define CONFIG_SELF_CONSCIOUS -#define CONFIG_MULTIPLE_TABLES -#define CONFIG_ALL_TABLES_AT_ONCE - -#define CONFIG_RESTRICTED_PRIVILEGES - -/* -Link: sysdep/linux -Link: sysdep/unix - */ diff --git a/sysdep/cf/linux.h b/sysdep/cf/linux.h index 9e34f869..047d3764 100644 --- a/sysdep/cf/linux.h +++ b/sysdep/cf/linux.h @@ -10,11 +10,21 @@ #define CONFIG_SELF_CONSCIOUS #define CONFIG_MULTIPLE_TABLES #define CONFIG_ALL_TABLES_AT_ONCE +#define CONFIG_IP6_SADR_KERNEL #define CONFIG_MC_PROPER_SRC #define CONFIG_UNIX_DONTROUTE +#define CONFIG_INCLUDE_SYSIO_H "sysdep/linux/sysio.h" +#define CONFIG_INCLUDE_KRTSYS_H "sysdep/linux/krt-sys.h" + #define CONFIG_RESTRICTED_PRIVILEGES +#define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h" + + +#ifndef AF_MPLS +#define AF_MPLS 28 +#endif /* Link: sysdep/linux diff --git a/sysdep/config.h b/sysdep/config.h index e529cd86..a552e6b6 100644 --- a/sysdep/config.h +++ b/sysdep/config.h @@ -6,8 +6,15 @@ #ifndef _BIRD_CONFIG_H_ #define _BIRD_CONFIG_H_ +#define XSTR2(X) #X +#define XSTR1(X) XSTR2(X) + /* BIRD version */ -#define BIRD_VERSION "1.6.3" +#ifdef GIT_LABEL +#define BIRD_VERSION XSTR1(GIT_LABEL) +#else +#define BIRD_VERSION "2.0.1" +#endif /* Include parameters determined by configure script */ #include "sysdep/autoconf.h" diff --git a/sysdep/linux/Makefile b/sysdep/linux/Makefile new file mode 100644 index 00000000..188ac8de --- /dev/null +++ b/sysdep/linux/Makefile @@ -0,0 +1,6 @@ +src := netlink.c +obj := $(src-o-files) +$(all-daemon) +$(conf-y-targets): $(s)netlink.Y + +tests_objs := $(tests_objs) $(src-o-files) diff --git a/sysdep/linux/Modules b/sysdep/linux/Modules deleted file mode 100644 index 940660b6..00000000 --- a/sysdep/linux/Modules +++ /dev/null @@ -1,5 +0,0 @@ -krt-sys.h -netlink.c -netlink.Y -sysio.h -syspriv.h diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 4802897b..84591eb2 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -6,6 +6,7 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ +#include <alloca.h> #include <stdio.h> #include <unistd.h> #include <fcntl.h> @@ -19,9 +20,9 @@ #include "nest/route.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "lib/timer.h" -#include "lib/unix.h" -#include "lib/krt.h" +#include "lib/alloca.h" +#include "sysdep/unix/unix.h" +#include "sysdep/unix/krt.h" #include "lib/socket.h" #include "lib/string.h" #include "lib/hash.h" @@ -32,6 +33,9 @@ #include <linux/netlink.h> #include <linux/rtnetlink.h> +#ifdef HAVE_MPLS_KERNEL +#include <linux/lwtunnel.h> +#endif #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */ #define MSG_TRUNC 0x20 @@ -49,13 +53,26 @@ #define RTA_TABLE 15 #endif +#ifndef RTA_VIA +#define RTA_VIA 18 +#endif -#ifdef IPV6 -#define krt_ecmp6(X) 1 -#else -#define krt_ecmp6(X) 0 +#ifndef RTA_NEWDST +#define RTA_NEWDST 19 +#endif + +#ifndef RTA_ENCAP_TYPE +#define RTA_ENCAP_TYPE 21 +#endif + +#ifndef RTA_ENCAP +#define RTA_ENCAP 22 #endif +#define krt_ecmp6(p) ((p)->af == AF_INET6) + +const int rt_default_ecmp = 16; + /* * Structure nl_parse_state keeps state of received route processing. Ideally, * we could just independently parse received Netlink messages and immediately @@ -130,7 +147,7 @@ nl_open_sock(struct nl_sock *nl) nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (nl->fd < 0) die("Unable to open rtnetlink socket: %m"); - nl->seq = now; + nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */ nl->rx_buffer = xmalloc(NL_RX_SIZE); nl->last_hdr = NULL; nl->last_size = 0; @@ -307,35 +324,40 @@ static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = { #define BIRD_IFA_MAX (IFA_FLAGS+1) -#ifndef IPV6 static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = { [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) }, [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) }, [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) }, - [IFA_FLAGS] = { 1, 1, sizeof(u32) }, + [IFA_FLAGS] = { 1, 1, sizeof(u32) }, }; -#else + static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = { [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) }, [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) }, [IFA_FLAGS] = { 1, 1, sizeof(u32) }, }; -#endif -#define BIRD_RTA_MAX (RTA_TABLE+1) +#define BIRD_RTA_MAX (RTA_ENCAP+1) -#ifndef IPV6 -static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = { +static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = { [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, }; -#else -static struct nl_want_attrs mpnh_attr_want6[BIRD_RTA_MAX] = { + +static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = { [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, +}; + +#ifdef HAVE_MPLS_KERNEL +static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = { + [RTA_DST] = { 1, 0, 0 }, }; #endif -#ifndef IPV6 static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = { [RTA_DST] = { 1, 1, sizeof(ip4_addr) }, [RTA_OIF] = { 1, 1, sizeof(u32) }, @@ -346,10 +368,13 @@ static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = { [RTA_MULTIPATH] = { 1, 0, 0 }, [RTA_FLOW] = { 1, 1, sizeof(u32) }, [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, }; -#else + static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = { [RTA_DST] = { 1, 1, sizeof(ip6_addr) }, + [RTA_SRC] = { 1, 1, sizeof(ip6_addr) }, [RTA_IIF] = { 1, 1, sizeof(u32) }, [RTA_OIF] = { 1, 1, sizeof(u32) }, [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) }, @@ -359,6 +384,21 @@ static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = { [RTA_MULTIPATH] = { 1, 0, 0 }, [RTA_FLOW] = { 1, 1, sizeof(u32) }, [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, +}; + +#ifdef HAVE_MPLS_KERNEL +static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = { + [RTA_DST] = { 1, 1, sizeof(u32) }, + [RTA_IIF] = { 1, 1, sizeof(u32) }, + [RTA_OIF] = { 1, 1, sizeof(u32) }, + [RTA_PRIORITY] = { 1, 1, sizeof(u32) }, + [RTA_METRICS] = { 1, 0, 0 }, + [RTA_FLOW] = { 1, 1, sizeof(u32) }, + [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_VIA] = { 1, 0, 0 }, + [RTA_NEWDST] = { 1, 0, 0 }, }; #endif @@ -376,7 +416,7 @@ nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size)) { - log(L_ERR "nl_parse_attrs: Malformed message received"); + log(L_ERR "nl_parse_attrs: Malformed attribute received"); return 0; } @@ -392,6 +432,9 @@ nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, return 1; } +static inline u16 rta_get_u16(struct rtattr *a) +{ return *(u16 *) RTA_DATA(a); } + static inline u32 rta_get_u32(struct rtattr *a) { return *(u32 *) RTA_DATA(a); } @@ -401,6 +444,34 @@ static inline ip4_addr rta_get_ip4(struct rtattr *a) static inline ip6_addr rta_get_ip6(struct rtattr *a) { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); } +static inline ip_addr rta_get_ipa(struct rtattr *a) +{ + if (RTA_PAYLOAD(a) == sizeof(ip4_addr)) + return ipa_from_ip4(rta_get_ip4(a)); + else + return ipa_from_ip6(rta_get_ip6(a)); +} + +#ifdef HAVE_MPLS_KERNEL +static inline ip_addr rta_get_via(struct rtattr *a) +{ + struct rtvia *v = RTA_DATA(a); + switch(v->rtvia_family) { + case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr)); + case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr)); + } + return IPA_NONE; +} + +static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK]; +static inline int rta_get_mpls(struct rtattr *a, u32 *stack) +{ + if (RTA_PAYLOAD(a) % 4) + log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a)); + + return mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack); +} +#endif struct rtattr * nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen) @@ -422,31 +493,92 @@ nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint return a; } +static inline struct rtattr * +nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code) +{ + return nl_add_attr(h, bufsize, code, NULL, 0); +} + static inline void -nl_add_attr_u32(struct nlmsghdr *h, unsigned bufsize, int code, u32 data) +nl_close_attr(struct nlmsghdr *h, struct rtattr *a) +{ + a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a; +} + +static inline void +nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data) +{ + nl_add_attr(h, bufsize, code, &data, 2); +} + +static inline void +nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data) { nl_add_attr(h, bufsize, code, &data, 4); } static inline void -nl_add_attr_ipa(struct nlmsghdr *h, unsigned bufsize, int code, ip_addr ipa) +nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4) { - ipa_hton(ipa); - nl_add_attr(h, bufsize, code, &ipa, sizeof(ipa)); + ip4 = ip4_hton(ip4); + nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4)); } -static inline struct rtattr * -nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code) +static inline void +nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6) { - return nl_add_attr(h, bufsize, code, NULL, 0); + ip6 = ip6_hton(ip6); + nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6)); } static inline void -nl_close_attr(struct nlmsghdr *h, struct rtattr *a) +nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa) { - a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a; + if (ipa_is_ip4(ipa)) + nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa)); + else + nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa)); +} + +#ifdef HAVE_MPLS_KERNEL +static inline void +nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack) +{ + char buf[len*4]; + mpls_put(buf, len, stack); + nl_add_attr(h, bufsize, code, buf, len*4); +} + +static inline void +nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack) +{ + nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS); + + struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP); + nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack); + nl_close_attr(h, nest); } +static inline void +nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa) +{ + struct rtvia *via = alloca(sizeof(struct rtvia) + 16); + + if (ipa_is_ip4(ipa)) + { + via->rtvia_family = AF_INET; + put_ip4(via->rtvia_addr, ipa_to_ip4(ipa)); + nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4); + } + else + { + via->rtvia_family = AF_INET6; + put_ip6(via->rtvia_addr, ipa_to_ip6(ipa)); + nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16); + } +} +#endif + static inline struct rtnexthop * nl_open_nexthop(struct nlmsghdr *h, uint bufsize) { @@ -467,8 +599,30 @@ nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh) nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh; } +static inline void +nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED) +{ +#ifdef HAVE_MPLS_KERNEL + if (nh->labels > 0) + if (af == AF_MPLS) + nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label); + else + nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label); + + if (ipa_nonzero(nh->gw)) + if (af == AF_MPLS) + nl_add_attr_via(h, bufsize, nh->gw); + else + nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); +#else + + if (ipa_nonzero(nh->gw)) + nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); +#endif +} + static void -nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh) +nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af) { struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH); @@ -480,7 +634,10 @@ nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh) rtnh->rtnh_hops = nh->weight; rtnh->rtnh_ifindex = nh->iface->index; - nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); + nl_add_nexthop(h, bufsize, nh, af); + + if (nh->flags & RNF_ONLINK) + rtnh->rtnh_flags |= RTNH_F_ONLINK; nl_close_nexthop(h, rtnh); } @@ -488,22 +645,16 @@ nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh) nl_close_attr(h, a); } -static struct mpnh * -nl_parse_multipath(struct krt_proto *p, struct rtattr *ra, int af) +static struct nexthop * +nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af) { - /* Temporary buffer for multicast nexthops */ - static struct mpnh *nh_buffer; - static int nh_buf_size; /* in number of structures */ - static int nh_buf_used; - struct rtattr *a[BIRD_RTA_MAX]; struct rtnexthop *nh = RTA_DATA(ra); - struct mpnh *rv, *first, **last; + struct nexthop *rv, *first, **last; unsigned len = RTA_PAYLOAD(ra); first = NULL; last = &first; - nh_buf_used = 0; while (len) { @@ -511,13 +662,7 @@ nl_parse_multipath(struct krt_proto *p, struct rtattr *ra, int af) if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) return NULL; - if (nh_buf_used == nh_buf_size) - { - nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4; - nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh)); - } - *last = rv = nh_buffer + nh_buf_used++; - rv->next = NULL; + *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE); last = &(rv->next); rv->weight = nh->rtnh_hops; @@ -529,33 +674,52 @@ nl_parse_multipath(struct krt_proto *p, struct rtattr *ra, int af) nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0); switch (af) { -#ifndef IPV6 case AF_INET: - if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a))) + if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a))) return NULL; break; -#else + case AF_INET6: - if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want6, a, sizeof(a))) + if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a))) return NULL; break; -#endif + default: return NULL; } if (a[RTA_GATEWAY]) { - memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(rv->gw)); - ipa_ntoh(rv->gw); + rv->gw = rta_get_ipa(a[RTA_GATEWAY]); + + if (nh->rtnh_flags & RTNH_F_ONLINK) + rv->flags |= RNF_ONLINK; - neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface, - (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0); - if (!ng || (ng->scope == SCOPE_HOST)) + neighbor *nbr; + nbr = neigh_find2(&p->p, &rv->gw, rv->iface, + (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0); + if (!nbr || (nbr->scope == SCOPE_HOST)) return NULL; } else - return NULL; + rv->gw = IPA_NONE; + +#ifdef HAVE_MPLS_KERNEL + if (a[RTA_ENCAP_TYPE]) + { + if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) { + log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE])); + return NULL; + } + + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + rv->labels = rta_get_mpls(enca[RTA_DST], rv->label); + break; + } +#endif + len -= NLMSG_ALIGN(nh->rtnh_len); nh = RTNH_NEXT(nh); @@ -692,52 +856,131 @@ nl_parse_link(struct nlmsghdr *h, int scan) } static void -nl_parse_addr(struct nlmsghdr *h, int scan) +nl_parse_addr4(struct ifaddrmsg *i, int scan, int new) { - struct ifaddrmsg *i; struct rtattr *a[BIRD_IFA_MAX]; - int new = h->nlmsg_type == RTM_NEWADDR; - struct ifa ifa; struct iface *ifi; - int scope; u32 ifa_flags; + int scope; - if (!(i = nl_checkin(h, sizeof(*i)))) + if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a))) return; - switch (i->ifa_family) + if (!a[IFA_LOCAL]) { -#ifndef IPV6 - case AF_INET: - if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a))) - return; - if (!a[IFA_LOCAL]) - { - log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)"); - return; - } - break; -#else - case AF_INET6: - if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a))) - return; - break; -#endif - default: - return; + log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)"); + return; } - if (!a[IFA_ADDRESS]) { log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)"); return; } + ifi = if_find_by_index(i->ifa_index); + if (!ifi) + { + log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index); + return; + } + if (a[IFA_FLAGS]) ifa_flags = rta_get_u32(a[IFA_FLAGS]); else ifa_flags = i->ifa_flags; + struct ifa ifa; + bzero(&ifa, sizeof(ifa)); + ifa.iface = ifi; + if (ifa_flags & IFA_F_SECONDARY) + ifa.flags |= IA_SECONDARY; + + ifa.ip = rta_get_ipa(a[IFA_LOCAL]); + + if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH) + { + log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen); + new = 0; + } + if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH) + { + ifa.brd = rta_get_ipa(a[IFA_ADDRESS]); + net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen); + + /* It is either a host address or a peer address */ + if (ipa_equal(ifa.ip, ifa.brd)) + ifa.flags |= IA_HOST; + else + { + ifa.flags |= IA_PEER; + ifa.opposite = ifa.brd; + } + } + else + { + net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen); + net_normalize(&ifa.prefix); + + if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1) + ifa.opposite = ipa_opposite_m1(ifa.ip); + + if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2) + ifa.opposite = ipa_opposite_m2(ifa.ip); + + if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST]) + { + ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]); + ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen))); + + if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd)) + ifa.brd = ipa_from_ip4(xbrd); + else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */ + { + log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name); + ifa.brd = ipa_from_ip4(ybrd); + } + } + } + + scope = ipa_classify(ifa.ip); + if (scope < 0) + { + log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name); + return; + } + ifa.scope = scope & IADDR_SCOPE_MASK; + + DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n", + ifi->index, ifi->name, + new ? "added" : "removed", + ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite); + + if (new) + ifa_update(&ifa); + else + ifa_delete(&ifa); + + if (!scan) + if_end_partial_update(ifi); +} + +static void +nl_parse_addr6(struct ifaddrmsg *i, int scan, int new) +{ + struct rtattr *a[BIRD_IFA_MAX]; + struct iface *ifi; + u32 ifa_flags; + int scope; + + if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a))) + return; + + if (!a[IFA_ADDRESS]) + { + log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)"); + return; + } + ifi = if_find_by_index(i->ifa_index); if (!ifi) { @@ -745,65 +988,50 @@ nl_parse_addr(struct nlmsghdr *h, int scan) return; } + if (a[IFA_FLAGS]) + ifa_flags = rta_get_u32(a[IFA_FLAGS]); + else + ifa_flags = i->ifa_flags; + + struct ifa ifa; bzero(&ifa, sizeof(ifa)); ifa.iface = ifi; if (ifa_flags & IFA_F_SECONDARY) ifa.flags |= IA_SECONDARY; -#ifdef IPV6 /* Ignore tentative addresses silently */ if (ifa_flags & IFA_F_TENTATIVE) return; -#endif /* IFA_LOCAL can be unset for IPv6 interfaces */ - memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip)); - ipa_ntoh(ifa.ip); - ifa.pxlen = i->ifa_prefixlen; - if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS) + ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]); + + if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH) { log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen); new = 0; } - if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS) + if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH) { - ip_addr addr; - memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr)); - ipa_ntoh(addr); - ifa.prefix = ifa.brd = addr; + ifa.brd = rta_get_ipa(a[IFA_ADDRESS]); + net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen); /* It is either a host address or a peer address */ - if (ipa_equal(ifa.ip, addr)) + if (ipa_equal(ifa.ip, ifa.brd)) ifa.flags |= IA_HOST; else { ifa.flags |= IA_PEER; - ifa.opposite = addr; + ifa.opposite = ifa.brd; } } else { - ip_addr netmask = ipa_mkmask(ifa.pxlen); - ifa.prefix = ipa_and(ifa.ip, netmask); - ifa.brd = ipa_or(ifa.ip, ipa_not(netmask)); - if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1) - ifa.opposite = ipa_opposite_m1(ifa.ip); + net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen); + net_normalize(&ifa.prefix); -#ifndef IPV6 - if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2) - ifa.opposite = ipa_opposite_m2(ifa.ip); - - if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST]) - { - ip_addr xbrd; - memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd)); - ipa_ntoh(xbrd); - if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd)) - ifa.brd = xbrd; - else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */ - log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name); - } -#endif + if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1) + ifa.opposite = ipa_opposite_m1(ifa.ip); } scope = ipa_classify(ifa.ip); @@ -814,10 +1042,10 @@ nl_parse_addr(struct nlmsghdr *h, int scan) } ifa.scope = scope & IADDR_SCOPE_MASK; - DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n", + DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n", ifi->index, ifi->name, new ? "added" : "removed", - ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite); + ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite); if (new) ifa_update(&ifa); @@ -828,6 +1056,26 @@ nl_parse_addr(struct nlmsghdr *h, int scan) if_end_partial_update(ifi); } +static void +nl_parse_addr(struct nlmsghdr *h, int scan) +{ + struct ifaddrmsg *i; + + if (!(i = nl_checkin(h, sizeof(*i)))) + return; + + int new = (h->nlmsg_type == RTM_NEWADDR); + + switch (i->ifa_family) + { + case AF_INET: + return nl_parse_addr4(i, scan, new); + + case AF_INET6: + return nl_parse_addr6(i, scan, new); + } +} + void kif_do_scan(struct kif_proto *p UNUSED) { @@ -862,7 +1110,14 @@ kif_do_scan(struct kif_proto *p UNUSED) } } - nl_request_dump(BIRD_AF, RTM_GETADDR); + nl_request_dump(AF_INET, RTM_GETADDR); + while (h = nl_get_scan()) + if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) + nl_parse_addr(h, 1); + else + log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type); + + nl_request_dump(AF_INET6, RTM_GETADDR); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) nl_parse_addr(h, 1); @@ -884,10 +1139,10 @@ krt_table_id(struct krt_proto *p) static HASH(struct krt_proto) nl_table_map; -#define RTH_FN(k) u32_hash(k) -#define RTH_EQ(k1,k2) k1 == k2 -#define RTH_KEY(p) krt_table_id(p) -#define RTH_NEXT(p) p->sys.hash_next +#define RTH_KEY(p) p->af, krt_table_id(p) +#define RTH_NEXT(p) p->sys.hash_next +#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2 +#define RTH_FN(a,i) a ^ u32_hash(i) #define RTH_REHASH rth_rehash #define RTH_PARAMS /8, *2, 2, 2, 6, 20 @@ -899,28 +1154,21 @@ krt_capable(rte *e) { rta *a = e->attrs; - if (a->cast != RTC_UNICAST) - return 0; - switch (a->dest) - { - case RTD_ROUTER: - case RTD_DEVICE: - if (a->iface == NULL) - return 0; + { + case RTD_UNICAST: case RTD_BLACKHOLE: case RTD_UNREACHABLE: case RTD_PROHIBIT: - case RTD_MULTIPATH: - break; + return 1; + default: return 0; - } - return 1; + } } static inline int -nh_bufsize(struct mpnh *nh) +nh_bufsize(struct nexthop *nh) { int rv = 0; for (; nh != NULL; nh = nh->next) @@ -929,32 +1177,62 @@ nh_bufsize(struct mpnh *nh) } static int -nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface) +nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, struct nexthop *nh) { eattr *ea; net *net = e->net; rta *a = e->attrs; + int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh)); u32 priority = 0; struct { struct nlmsghdr h; struct rtmsg r; - char buf[128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops)]; - } r; - - DBG("nl_send_route(%I/%d,op=%x)\n", net->n.prefix, net->n.pxlen, op); - - bzero(&r.h, sizeof(r.h)); - bzero(&r.r, sizeof(r.r)); - r.h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE; - r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - r.h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; + char buf[0]; + } *r; + + int rsize = sizeof(*r) + bufsize; + r = alloca(rsize); + + DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op); + + bzero(&r->h, sizeof(r->h)); + bzero(&r->r, sizeof(r->r)); + r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE; + r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); + r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; + + r->r.rtm_family = p->af; + r->r.rtm_dst_len = net_pxlen(net->n.addr); + r->r.rtm_protocol = RTPROT_BIRD; + r->r.rtm_scope = RT_SCOPE_NOWHERE; +#ifdef HAVE_MPLS_KERNEL + if (p->af == AF_MPLS) + { + /* + * Kernel MPLS code is a bit picky. We must: + * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE) + * 2) Never use RTA_PRIORITY + */ + + u32 label = net_mpls(net->n.addr); + nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label); + r->r.rtm_scope = RT_SCOPE_UNIVERSE; + r->r.rtm_type = RTN_UNICAST; + } + else +#endif + { + nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); - r.r.rtm_family = BIRD_AF; - r.r.rtm_dst_len = net->n.pxlen; - r.r.rtm_protocol = RTPROT_BIRD; - r.r.rtm_scope = RT_SCOPE_NOWHERE; - nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix); + /* Add source address for IPv6 SADR routes */ + if (net->n.addr->type == NET_IP6_SADR) + { + net_addr_ip6_sadr *a = (void *) &net->n.addr; + nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix); + r->r.rtm_src_len = a->src_pxlen; + } + } /* * Strange behavior for RTM_DELROUTE: @@ -964,11 +1242,13 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d */ if (krt_table_id(p) < 256) - r.r.rtm_table = krt_table_id(p); + r->r.rtm_table = krt_table_id(p); else - nl_add_attr_u32(&r.h, sizeof(r), RTA_TABLE, krt_table_id(p)); + nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p)); - if (a->source == RTS_DUMMY) + if (p->af == AF_MPLS) + priority = 0; + else if (a->source == RTS_DUMMY) priority = e->u.krt.metric; else if (KRT_CF->sys.metric) priority = KRT_CF->sys.metric; @@ -976,23 +1256,25 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d priority = ea->u.data; if (priority) - nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, priority); + nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority); /* For route delete, we do not specify remaining route attributes */ if (op == NL_OP_DELETE) goto dest; /* Default scope is LINK for device routes, UNIVERSE otherwise */ - if (ea = ea_find(eattrs, EA_KRT_SCOPE)) - r.r.rtm_scope = ea->u.data; + if (p->af == AF_MPLS) + r->r.rtm_scope = RT_SCOPE_UNIVERSE; + else if (ea = ea_find(eattrs, EA_KRT_SCOPE)) + r->r.rtm_scope = ea->u.data; else - r.r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) - nl_add_attr_ipa(&r.h, sizeof(r), RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); + nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); if (ea = ea_find(eattrs, EA_KRT_REALM)) - nl_add_attr_u32(&r.h, sizeof(r), RTA_FLOW, ea->u.data); + nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data); u32 metrics[KRT_METRICS_MAX]; @@ -1007,34 +1289,33 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d } if (metrics[0]) - nl_add_metrics(&r.h, sizeof(r), metrics, KRT_METRICS_MAX); + nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX); dest: - /* a->iface != NULL checked in krt_capable() for router and device routes */ switch (dest) { - case RTD_ROUTER: - r.r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index); - nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, gw); - break; - case RTD_DEVICE: - r.r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index); + case RTD_UNICAST: + r->r.rtm_type = RTN_UNICAST; + if (nh->next && !krt_ecmp6(p)) + nl_add_multipath(&r->h, rsize, nh, p->af); + else + { + nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index); + nl_add_nexthop(&r->h, rsize, nh, p->af); + + if (nh->flags & RNF_ONLINK) + r->r.rtm_flags |= RTNH_F_ONLINK; + } break; case RTD_BLACKHOLE: - r.r.rtm_type = RTN_BLACKHOLE; + r->r.rtm_type = RTN_BLACKHOLE; break; case RTD_UNREACHABLE: - r.r.rtm_type = RTN_UNREACHABLE; + r->r.rtm_type = RTN_UNREACHABLE; break; case RTD_PROHIBIT: - r.r.rtm_type = RTN_PROHIBIT; - break; - case RTD_MULTIPATH: - r.r.rtm_type = RTN_UNICAST; - nl_add_multipath(&r.h, sizeof(r), a->nexthops); + r->r.rtm_type = RTN_PROHIBIT; break; case RTD_NONE: break; @@ -1043,7 +1324,7 @@ dest: } /* Ignore missing for DELETE */ - return nl_exchange(&r.h, (op == NL_OP_DELETE)); + return nl_exchange(&r->h, (op == NL_OP_DELETE)); } static inline int @@ -1052,21 +1333,21 @@ nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) rta *a = e->attrs; int err = 0; - if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH)) + if (krt_ecmp6(p) && a->nh.next) { - struct mpnh *nh = a->nexthops; + struct nexthop *nh = &(a->nh); - err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface); + err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_UNICAST, nh); if (err < 0) return err; for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface); + err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_UNICAST, nh); return err; } - return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface); + return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, &(a->nh)); } static inline int @@ -1076,7 +1357,7 @@ nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) /* For IPv6, we just repeatedly request DELETE until we get error */ do - err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL); + err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, NULL); while (krt_ecmp6(p) && !err); return err; @@ -1110,20 +1391,6 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list n->n.flags &= ~KRF_SYNC_ERROR; } - -static inline struct mpnh * -nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight) -{ - struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh)); - - nh->gw = gw; - nh->iface = iface; - nh->next = NULL; - nh->weight = weight; - - return nh; -} - static int nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type) { @@ -1191,59 +1458,83 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) struct rtattr *a[BIRD_RTA_MAX]; int new = h->nlmsg_type == RTM_NEWROUTE; - ip_addr dst = IPA_NONE; + net_addr dst, src = {}; u32 oif = ~0; - u32 table; + u32 table_id; u32 priority = 0; u32 def_scope = RT_SCOPE_UNIVERSE; - int src; + int krt_src; if (!(i = nl_checkin(h, sizeof(*i)))) return; switch (i->rtm_family) { -#ifndef IPV6 - case AF_INET: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a))) - return; - break; -#else - case AF_INET6: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) - return; - break; -#endif - default: + case AF_INET: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a))) return; - } - if (a[RTA_DST]) - { - memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst)); - ipa_ntoh(dst); + if (a[RTA_DST]) + net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len); + else + net_fill_ip4(&dst, IP4_NONE, 0); + break; + + case AF_INET6: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) + return; + + if (a[RTA_DST]) + net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len); + else + net_fill_ip6(&dst, IP6_NONE, 0); + + if (a[RTA_SRC]) + net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len); + else + net_fill_ip6(&src, IP6_NONE, 0); + break; + +#ifdef HAVE_MPLS_KERNEL + case AF_MPLS: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a))) + return; + + if (!a[RTA_DST]) + SKIP("MPLS route without RTA_DST"); + + if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1) + SKIP("MPLS route with multi-label RTA_DST"); + + net_fill_mpls(&dst, rta_mpls_stack[0]); + break; +#endif + + default: + return; } if (a[RTA_OIF]) oif = rta_get_u32(a[RTA_OIF]); if (a[RTA_TABLE]) - table = rta_get_u32(a[RTA_TABLE]); + table_id = rta_get_u32(a[RTA_TABLE]); else - table = i->rtm_table; + table_id = i->rtm_table; - p = HASH_FIND(nl_table_map, RTH, table); /* Do we know this table? */ - DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, table, i->rtm_protocol, p ? p->p.name : "(none)"); + /* Do we know this table? */ + p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id); if (!p) SKIP("unknown table %d\n", table); -#ifdef IPV6 + if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR)) + SKIP("src prefix for non-SADR channel\n"); + if (a[RTA_IIF]) SKIP("IIF set\n"); -#else + if (i->rtm_tos != 0) /* We don't support TOS */ SKIP("TOS %02x\n", i->rtm_tos); -#endif if (s->scan && !new) SKIP("RTM_DELROUTE in scan\n"); @@ -1251,7 +1542,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (a[RTA_PRIORITY]) priority = rta_get_u32(a[RTA_PRIORITY]); - int c = ipa_classify_net(dst); + int c = net_classify(&dst); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) SKIP("strange class/scope\n"); @@ -1261,88 +1552,98 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) SKIP("proto unspec\n"); case RTPROT_REDIRECT: - src = KRT_SRC_REDIRECT; + krt_src = KRT_SRC_REDIRECT; break; case RTPROT_KERNEL: - src = KRT_SRC_KERNEL; + krt_src = KRT_SRC_KERNEL; return; case RTPROT_BIRD: if (!s->scan) SKIP("echo\n"); - src = KRT_SRC_BIRD; + krt_src = KRT_SRC_BIRD; break; case RTPROT_BOOT: default: - src = KRT_SRC_ALIEN; + krt_src = KRT_SRC_ALIEN; } - net *net = net_get(p->p.table, dst, i->rtm_dst_len); + net_addr *n = &dst; + if (p->p.net_type == NET_IP6_SADR) + { + n = alloca(sizeof(net_addr_ip6_sadr)); + net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst), + net6_prefix(&src), net6_pxlen(&src)); + } + + net *net = net_get(p->p.main_channel->table, n); if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type)) nl_announce_route(s); - rta *ra = lp_allocz(s->pool, sizeof(rta)); + rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE); ra->src = p->p.main_source; ra->source = RTS_INHERIT; ra->scope = SCOPE_UNIVERSE; - ra->cast = RTC_UNICAST; switch (i->rtm_type) { case RTN_UNICAST: + ra->dest = RTD_UNICAST; if (a[RTA_MULTIPATH]) - { - ra->dest = RTD_MULTIPATH; - ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH], i->rtm_family); - if (!ra->nexthops) + { + struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family); + if (!nh) { - log(L_ERR "KRT: Received strange multipath route %I/%d", - net->n.prefix, net->n.pxlen); + log(L_ERR "KRT: Received strange multipath route %N", net->n.addr); return; } + ra->nh = *nh; break; } - ra->iface = if_find_by_index(oif); - if (!ra->iface) + ra->nh.iface = if_find_by_index(oif); + if (!ra->nh.iface) { - log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u", - net->n.prefix, net->n.pxlen, oif); + log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); return; } - if (a[RTA_GATEWAY]) + if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY] +#ifdef HAVE_MPLS_KERNEL + || (i->rtm_family == AF_MPLS) && a[RTA_VIA] +#endif + ) { - neighbor *ng; - ra->dest = RTD_ROUTER; - memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw)); - ipa_ntoh(ra->gw); +#ifdef HAVE_MPLS_KERNEL + if (i->rtm_family == AF_MPLS) + ra->nh.gw = rta_get_via(a[RTA_VIA]); + else +#endif + ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]); -#ifdef IPV6 /* Silently skip strange 6to4 routes */ - if (ipa_in_net(ra->gw, IPA_NONE, 96)) + const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); + if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit)) return; -#endif - ng = neigh_find2(&p->p, &ra->gw, ra->iface, - (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0); - if (!ng || (ng->scope == SCOPE_HOST)) + if (i->rtm_flags & RTNH_F_ONLINK) + ra->nh.flags |= RNF_ONLINK; + + neighbor *nbr; + nbr = neigh_find2(&p->p, &(ra->nh.gw), ra->nh.iface, + (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); + if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %I/%d with strange next-hop %I", - net->n.prefix, net->n.pxlen, ra->gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, + ra->nh.gw); return; } } - else - { - ra->dest = RTD_DEVICE; - def_scope = RT_SCOPE_LINK; - } break; case RTN_BLACKHOLE: @@ -1360,6 +1661,38 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; } +#ifdef HAVE_MPLS_KERNEL + int labels = 0; + if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next) + labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label); + + if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next) + { + switch (rta_get_u16(a[RTA_ENCAP_TYPE])) + { + case LWTUNNEL_ENCAP_MPLS: + { + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + labels = rta_get_mpls(enca[RTA_DST], ra->nh.label); + break; + } + default: + SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); + break; + } + } + + if (labels < 0) + { + log(L_WARN "KRT: Too long MPLS stack received, ignoring."); + ra->nh.labels = 0; + } + else + ra->nh.labels = labels; +#endif + if (i->rtm_scope != def_scope) { ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); @@ -1375,9 +1708,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (a[RTA_PREFSRC]) { - ip_addr ps; - memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps)); - ipa_ntoh(ps); + ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); ea->next = ra->eattrs; @@ -1413,8 +1744,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) { - log(L_ERR "KRT: Received route %I/%d with strange RTA_METRICS attribute", - net->n.prefix, net->n.pxlen); + log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr); return; } @@ -1441,8 +1771,8 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) * Ideally, now we would send the received route to the rest of kernel code. * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we * postpone it and merge next hops until the end of the sequence. Note that - * proper multipath updates are rejected by nl_mergable_route(), so it is - * always the first case for them. + * when doing merging of next hops, we expect the new route to be unipath. + * Otherwise, we ignore additional next hops in nexthop_insert(). */ if (!s->net) @@ -1452,7 +1782,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) s->attrs = ra; s->proto = p; s->new = new; - s->krt_src = src; + s->krt_src = krt_src; s->krt_type = i->rtm_type; s->krt_proto = i->rtm_protocol; s->krt_metric = priority; @@ -1460,15 +1790,20 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else { /* Merge next hops with the stored route */ - rta *a = s->attrs; + rta *oa = s->attrs; + + struct nexthop *nhs = &oa->nh; + nexthop_insert(&nhs, &ra->nh); - if (a->dest != RTD_MULTIPATH) + /* Perhaps new nexthop is inserted at the first position */ + if (nhs == &ra->nh) { - a->dest = RTD_MULTIPATH; - a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0); - } + /* Swap rtas */ + s->attrs = ra; - mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0)); + /* Keep old eattrs */ + ra->eattrs = oa->eattrs; + } } } @@ -1478,16 +1813,34 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL struct nlmsghdr *h; struct nl_parse_state s; - nl_parse_begin(&s, 1, krt_ecmp6(p)); + nl_parse_begin(&s, 1, 0); + nl_request_dump(AF_INET, RTM_GETROUTE); + while (h = nl_get_scan()) + if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) + nl_parse_route(&s, h); + else + log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); + nl_parse_end(&s); - nl_request_dump(BIRD_AF, RTM_GETROUTE); + nl_parse_begin(&s, 1, 1); + nl_request_dump(AF_INET6, RTM_GETROUTE); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) nl_parse_route(&s, h); else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); + nl_parse_end(&s); +#ifdef HAVE_MPLS_KERNEL + nl_parse_begin(&s, 1, 1); + nl_request_dump(AF_MPLS, RTM_GETROUTE); + while (h = nl_get_scan()) + if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) + nl_parse_route(&s, h); + else + log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); nl_parse_end(&s); +#endif } /* @@ -1609,11 +1962,10 @@ nl_open_async(void) bzero(&sa, sizeof(sa)); sa.nl_family = AF_NETLINK; -#ifdef IPV6 - sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE; -#else - sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE; -#endif + sa.nl_groups = RTMGRP_LINK | + RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE | + RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE; + if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) { log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m"); @@ -1640,14 +1992,14 @@ nl_open_async(void) void krt_sys_io_init(void) { - nl_linpool = lp_new(krt_pool, 4080); + nl_linpool = lp_new_default(krt_pool); HASH_INIT(nl_table_map, krt_pool, 6); } int krt_sys_start(struct krt_proto *p) { - struct krt_proto *old = HASH_FIND(nl_table_map, RTH, krt_table_id(p)); + struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p)); if (old) { @@ -1680,7 +2032,7 @@ void krt_sys_init_config(struct krt_config *cf) { cf->sys.table_id = RT_TABLE_MAIN; - cf->sys.metric = 0; + cf->sys.metric = 32; } void @@ -1751,3 +2103,9 @@ void kif_sys_shutdown(struct kif_proto *p UNUSED) { } + +int +kif_update_sysdep_addr(struct iface *i UNUSED) +{ + return 0; +} diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile new file mode 100644 index 00000000..f592399c --- /dev/null +++ b/sysdep/unix/Makefile @@ -0,0 +1,8 @@ +src := io.c krt.c log.c main.c random.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) +$(conf-y-targets): $(s)krt.Y + +src := $(filter-out main.c, $(src)) +tests_objs := $(tests_objs) $(src-o-files) diff --git a/sysdep/unix/Modules b/sysdep/unix/Modules deleted file mode 100644 index 2c6514df..00000000 --- a/sysdep/unix/Modules +++ /dev/null @@ -1,12 +0,0 @@ -log.c -main.c -timer.h -io.c -unix.h -endian.h -config.Y -random.c - -krt.c -krt.h -krt.Y diff --git a/sysdep/unix/config.Y b/sysdep/unix/config.Y index d6ab8cab..ccca4a62 100644 --- a/sysdep/unix/config.Y +++ b/sysdep/unix/config.Y @@ -8,14 +8,13 @@ CF_HDR -#include "lib/unix.h" +#include "sysdep/unix/unix.h" #include <stdio.h> CF_DECLS CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR, SOFT) -CF_KEYWORDS(TIMEFORMAT, ISO, OLD, SHORT, LONG, BASE, NAME, CONFIRM, UNDO, CHECK, TIMEOUT) -CF_KEYWORDS(DEBUG, LATENCY, LIMIT, WATCHDOG, WARNING, TIMEOUT) +CF_KEYWORDS(NAME, CONFIRM, UNDO, CHECK, TIMEOUT, DEBUG, LATENCY, LIMIT, WATCHDOG, WARNING) %type <i> log_mask log_mask_list log_cat cfg_timeout %type <g> log_file @@ -85,28 +84,6 @@ mrtdump_base: ; -CF_ADDTO(conf, timeformat_base) - -timeformat_which: - ROUTE { $$ = &new_config->tf_route; } - | PROTOCOL { $$ = &new_config->tf_proto; } - | BASE { $$ = &new_config->tf_base; } - | LOG { $$ = &new_config->tf_log; } - -timeformat_spec: - timeformat_which TEXT { *$1 = (struct timeformat){$2, NULL, 0}; } - | timeformat_which TEXT expr TEXT { *$1 = (struct timeformat){$2, $4, $3}; } - | timeformat_which ISO SHORT { *$1 = (struct timeformat){"%T", "%F", 20*3600}; } - | timeformat_which ISO LONG { *$1 = (struct timeformat){"%F %T", NULL, 0}; } - | timeformat_which OLD SHORT { *$1 = (struct timeformat){NULL, NULL, 0}; } - | timeformat_which OLD LONG { *$1 = (struct timeformat){"%d-%m-%Y %T", NULL, 0}; } - ; - -timeformat_base: - TIMEFORMAT timeformat_spec ';' - ; - - CF_ADDTO(conf, debug_unix) debug_unix: diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 53a37a50..012deaf0 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -34,14 +34,15 @@ #include "nest/bird.h" #include "lib/lists.h" #include "lib/resource.h" -#include "lib/timer.h" #include "lib/socket.h" #include "lib/event.h" +#include "lib/timer.h" #include "lib/string.h" #include "nest/iface.h" +#include "conf/conf.h" -#include "lib/unix.h" -#include "lib/sysio.h" +#include "sysdep/unix/unix.h" +#include CONFIG_INCLUDE_SYSIO_H /* Maximum number of calls of tx handler for one socket in one * poll iteration. Should be small enough to not monopolize CPU by @@ -101,381 +102,60 @@ tracked_fopen(pool *p, char *name, char *mode) return f; } -/** - * DOC: Timers - * - * Timers are resources which represent a wish of a module to call - * a function at the specified time. The platform dependent code - * doesn't guarantee exact timing, only that a timer function - * won't be called before the requested time. - * - * In BIRD, time is represented by values of the &bird_clock_t type - * which are integral numbers interpreted as a relative number of seconds since - * some fixed time point in past. The current time can be read - * from variable @now with reasonable accuracy and is monotonic. There is also - * a current 'absolute' time in variable @now_real reported by OS. - * - * Each timer is described by a &timer structure containing a pointer - * to the handler function (@hook), data private to this function (@data), - * time the function should be called at (@expires, 0 for inactive timers), - * for the other fields see |timer.h|. - */ - -#define NEAR_TIMER_LIMIT 4 -static list near_timers, far_timers; -static bird_clock_t first_far_timer = TIME_INFINITY; - -/* now must be different from 0, because 0 is a special value in timer->expires */ -bird_clock_t now = 1, now_real, boot_time; - -static void -update_times_plain(void) -{ - bird_clock_t new_time = time(NULL); - int delta = new_time - now_real; - - if ((delta >= 0) && (delta < 60)) - now += delta; - else if (now_real != 0) - log(L_WARN "Time jump, delta %d s", delta); +/* + * Time clock + */ - now_real = new_time; -} +btime boot_time; -static void -update_times_gettime(void) +void +times_init(struct timeloop *loop) { struct timespec ts; int rv; rv = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rv != 0) - die("clock_gettime: %m"); - - if (ts.tv_sec != now) { - if (ts.tv_sec < now) - log(L_ERR "Monotonic timer is broken"); - - now = ts.tv_sec; - now_real = time(NULL); - } -} - -static int clock_monotonic_available; - -static inline void -update_times(void) -{ - if (clock_monotonic_available) - update_times_gettime(); - else - update_times_plain(); -} - -static inline void -init_times(void) -{ - struct timespec ts; - clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0); - if (!clock_monotonic_available) - log(L_WARN "Monotonic timer is missing"); -} - - -static void -tm_free(resource *r) -{ - timer *t = (timer *) r; - - tm_stop(t); -} - -static void -tm_dump(resource *r) -{ - timer *t = (timer *) r; - - debug("(code %p, data %p, ", t->hook, t->data); - if (t->randomize) - debug("rand %d, ", t->randomize); - if (t->recurrent) - debug("recur %d, ", t->recurrent); - if (t->expires) - debug("expires in %d sec)\n", t->expires - now); - else - debug("inactive)\n"); -} - -static struct resclass tm_class = { - "Timer", - sizeof(timer), - tm_free, - tm_dump, - NULL, - NULL -}; - -/** - * tm_new - create a timer - * @p: pool - * - * This function creates a new timer resource and returns - * a pointer to it. To use the timer, you need to fill in - * the structure fields and call tm_start() to start timing. - */ -timer * -tm_new(pool *p) -{ - timer *t = ralloc(p, &tm_class); - return t; -} - -static inline void -tm_insert_near(timer *t) -{ - node *n = HEAD(near_timers); - - while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires)) - n = n->next; - insert_node(&t->n, n->prev); -} - -/** - * tm_start - start a timer - * @t: timer - * @after: number of seconds the timer should be run after - * - * This function schedules the hook function of the timer to - * be called after @after seconds. If the timer has been already - * started, it's @expire time is replaced by the new value. - * - * You can have set the @randomize field of @t, the timeout - * will be increased by a random number of seconds chosen - * uniformly from range 0 .. @randomize. - * - * You can call tm_start() from the handler function of the timer - * to request another run of the timer. Also, you can set the @recurrent - * field to have the timer re-added automatically with the same timeout. - */ -void -tm_start(timer *t, unsigned after) -{ - bird_clock_t when; - - if (t->randomize) - after += random() % (t->randomize + 1); - when = now + after; - if (t->expires == when) - return; - if (t->expires) - rem_node(&t->n); - t->expires = when; - if (after <= NEAR_TIMER_LIMIT) - tm_insert_near(t); - else - { - if (!first_far_timer || first_far_timer > when) - first_far_timer = when; - add_tail(&far_timers, &t->n); - } -} - -/** - * tm_stop - stop a timer - * @t: timer - * - * This function stops a timer. If the timer is already stopped, - * nothing happens. - */ -void -tm_stop(timer *t) -{ - if (t->expires) - { - rem_node(&t->n); - t->expires = 0; - } -} + if (rv < 0) + die("Monotonic clock is missing"); -static void -tm_dump_them(char *name, list *l) -{ - node *n; - timer *t; + if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40))) + log(L_WARN "Monotonic clock is crazy"); - debug("%s timers:\n", name); - WALK_LIST(n, *l) - { - t = SKIP_BACK(timer, n, n); - debug("%p ", t); - tm_dump(&t->r); - } - debug("\n"); + loop->last_time = ts.tv_sec S + ts.tv_nsec NS; + loop->real_time = 0; } void -tm_dump_all(void) -{ - tm_dump_them("Near", &near_timers); - tm_dump_them("Far", &far_timers); -} - -static inline time_t -tm_first_shot(void) +times_update(struct timeloop *loop) { - time_t x = first_far_timer; + struct timespec ts; + int rv; - if (!EMPTY_LIST(near_timers)) - { - timer *t = SKIP_BACK(timer, n, HEAD(near_timers)); - if (t->expires < x) - x = t->expires; - } - return x; -} + rv = clock_gettime(CLOCK_MONOTONIC, &ts); + if (rv < 0) + die("clock_gettime: %m"); -void io_log_event(void *hook, void *data); + btime new_time = ts.tv_sec S + ts.tv_nsec NS; -static void -tm_shot(void) -{ - timer *t; - node *n, *m; + if (new_time < loop->last_time) + log(L_ERR "Monotonic clock is broken"); - if (first_far_timer <= now) - { - bird_clock_t limit = now + NEAR_TIMER_LIMIT; - first_far_timer = TIME_INFINITY; - n = HEAD(far_timers); - while (m = n->next) - { - t = SKIP_BACK(timer, n, n); - if (t->expires <= limit) - { - rem_node(n); - tm_insert_near(t); - } - else if (t->expires < first_far_timer) - first_far_timer = t->expires; - n = m; - } - } - while ((n = HEAD(near_timers)) -> next) - { - int delay; - t = SKIP_BACK(timer, n, n); - if (t->expires > now) - break; - rem_node(n); - delay = t->expires - now; - t->expires = 0; - if (t->recurrent) - { - int i = t->recurrent - delay; - if (i < 0) - i = 0; - tm_start(t, i); - } - io_log_event(t->hook, t->data); - t->hook(t); - } -} - -/** - * tm_parse_datetime - parse a date and time - * @x: datetime string - * - * tm_parse_datetime() takes a textual representation of - * a date and time (dd-mm-yyyy hh:mm:ss) - * and converts it to the corresponding value of type &bird_clock_t. - */ -bird_clock_t -tm_parse_datetime(char *x) -{ - struct tm tm; - int n; - time_t t; - - if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n]) - return tm_parse_date(x); - tm.tm_mon--; - tm.tm_year -= 1900; - t = mktime(&tm); - if (t == (time_t) -1) - return 0; - return t; + loop->last_time = new_time; + loop->real_time = 0; } -/** - * tm_parse_date - parse a date - * @x: date string - * - * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy) - * and converts it to the corresponding value of type &bird_clock_t. - */ -bird_clock_t -tm_parse_date(char *x) -{ - struct tm tm; - int n; - time_t t; - if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n]) - return 0; - tm.tm_mon--; - tm.tm_year -= 1900; - tm.tm_hour = tm.tm_min = tm.tm_sec = 0; - t = mktime(&tm); - if (t == (time_t) -1) - return 0; - return t; -} - -static void -tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta) -{ - static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; - - if (delta < 20*3600) - bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min); - else if (delta < 360*86400) - bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday); - else - bsprintf(x, "%d", tm->tm_year+1900); -} - -#include "conf/conf.h" - -/** - * tm_format_datetime - convert date and time to textual representation - * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE - * @fmt_spec: specification of resulting textual representation of the time - * @t: time - * - * This function formats the given relative time value @t to a textual - * date/time representation (dd-mm-yyyy hh:mm:ss) in real time. - */ void -tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t) +times_update_real_time(struct timeloop *loop) { - const char *fmt_used; - struct tm *tm; - bird_clock_t delta = now - t; - t = now_real - delta; - tm = localtime(&t); - - if (fmt_spec->fmt1 == NULL) - return tm_format_reltime(x, tm, delta); + struct timespec ts; + int rv; - if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit)) - fmt_used = fmt_spec->fmt1; - else - fmt_used = fmt_spec->fmt2; + rv = clock_gettime(CLOCK_REALTIME, &ts); + if (rv < 0) + die("clock_gettime: %m"); - int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm); - if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE)) - strcpy(x, "<too-long>"); + loop->real_time = ts.tv_sec S + ts.tv_nsec NS; } @@ -1071,26 +751,63 @@ sk_free_bufs(sock *s) } } +#ifdef HAVE_LIBSSH +static void +sk_ssh_free(sock *s) +{ + struct ssh_sock *ssh = s->ssh; + + if (s->ssh == NULL) + return; + + s->ssh = NULL; + + if (ssh->channel) + { + if (ssh_channel_is_open(ssh->channel)) + ssh_channel_close(ssh->channel); + ssh_channel_free(ssh->channel); + ssh->channel = NULL; + } + + if (ssh->session) + { + ssh_disconnect(ssh->session); + ssh_free(ssh->session); + ssh->session = NULL; + } +} +#endif + static void sk_free(resource *r) { sock *s = (sock *) r; sk_free_bufs(s); - if (s->fd >= 0) - { - close(s->fd); - /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ - if (s->flags & SKF_THREAD) - return; +#ifdef HAVE_LIBSSH + if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE) + sk_ssh_free(s); +#endif + + if (s->fd < 0) + return; + /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ + if (!(s->flags & SKF_THREAD)) + { if (s == current_sock) current_sock = sk_next(s); if (s == stored_sock) stored_sock = sk_next(s); rem_node(&s->n); } + + if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE) + close(s->fd); + + s->fd = -1; } void @@ -1141,7 +858,7 @@ static void sk_dump(resource *r) { sock *s = (sock *) r; - static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" }; + static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" }; debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n", sk_type_names[s->type], @@ -1192,6 +909,9 @@ sk_setup(sock *s) int y = 1; int fd = s->fd; + if (s->type == SK_SSH_ACTIVE) + return 0; + if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) ERR("O_NONBLOCK"); @@ -1263,7 +983,7 @@ sk_setup(sock *s) if (sk_is_ipv6(s)) { - if (s->flags & SKF_V6ONLY) + if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP)) if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0) ERR("IPV6_V6ONLY"); @@ -1317,6 +1037,16 @@ sk_tcp_connected(sock *s) s->tx_hook(s); } +#ifdef HAVE_LIBSSH +static void +sk_ssh_connected(sock *s) +{ + sk_alloc_bufs(s); + s->type = SK_SSH; + s->tx_hook(s); +} +#endif + static int sk_passive_connected(sock *s, int type) { @@ -1334,8 +1064,8 @@ sk_passive_connected(sock *s, int type) sock *t = sk_new(s->pool); t->type = type; - t->fd = fd; t->af = s->af; + t->fd = fd; t->ttl = s->ttl; t->tos = s->tos; t->rbsize = s->rbsize; @@ -1369,6 +1099,201 @@ sk_passive_connected(sock *s, int type) return 1; } +#ifdef HAVE_LIBSSH +/* + * Return SSH_OK or SSH_AGAIN or SSH_ERROR + */ +static int +sk_ssh_connect(sock *s) +{ + s->fd = ssh_get_fd(s->ssh->session); + + /* Big fall thru automata */ + switch (s->ssh->state) + { + case SK_SSH_CONNECT: + { + switch (ssh_connect(s->ssh->session)) + { + case SSH_AGAIN: + /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1) + * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere + * documented but our code relies on that. + */ + return SSH_AGAIN; + + case SSH_OK: + break; + + default: + return SSH_ERROR; + } + } + + case SK_SSH_SERVER_KNOWN: + { + s->ssh->state = SK_SSH_SERVER_KNOWN; + + if (s->ssh->server_hostkey_path) + { + int server_identity_is_ok = 1; + + /* Check server identity */ + switch (ssh_is_server_known(s->ssh->session)) + { +#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args); + case SSH_SERVER_KNOWN_OK: + /* The server is known and has not changed. */ + break; + + case SSH_SERVER_NOT_KNOWN: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path); + break; + + case SSH_SERVER_KNOWN_CHANGED: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key."); + server_identity_is_ok = 0; + break; + + case SSH_SERVER_FILE_NOT_FOUND: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path); + server_identity_is_ok = 0; + break; + + case SSH_SERVER_ERROR: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened"); + server_identity_is_ok = 0; + break; + + case SSH_SERVER_FOUND_OTHER: + LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \ + "It is a possible attack."); + server_identity_is_ok = 0; + break; + } + + if (!server_identity_is_ok) + return SSH_ERROR; + } + } + + case SK_SSH_USERAUTH: + { + s->ssh->state = SK_SSH_USERAUTH; + switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL)) + { + case SSH_AUTH_AGAIN: + return SSH_AGAIN; + + case SSH_AUTH_SUCCESS: + break; + + default: + return SSH_ERROR; + } + } + + case SK_SSH_CHANNEL: + { + s->ssh->state = SK_SSH_CHANNEL; + s->ssh->channel = ssh_channel_new(s->ssh->session); + if (s->ssh->channel == NULL) + return SSH_ERROR; + } + + case SK_SSH_SESSION: + { + s->ssh->state = SK_SSH_SESSION; + switch (ssh_channel_open_session(s->ssh->channel)) + { + case SSH_AGAIN: + return SSH_AGAIN; + + case SSH_OK: + break; + + default: + return SSH_ERROR; + } + } + + case SK_SSH_SUBSYSTEM: + { + s->ssh->state = SK_SSH_SUBSYSTEM; + if (s->ssh->subsystem) + { + switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem)) + { + case SSH_AGAIN: + return SSH_AGAIN; + + case SSH_OK: + break; + + default: + return SSH_ERROR; + } + } + } + + case SK_SSH_ESTABLISHED: + s->ssh->state = SK_SSH_ESTABLISHED; + } + + return SSH_OK; +} + +/* + * Return file descriptor number if success + * Return -1 if failed + */ +static int +sk_open_ssh(sock *s) +{ + if (!s->ssh) + bug("sk_open() sock->ssh is not allocated"); + + ssh_session sess = ssh_new(); + if (sess == NULL) + ERR2("Cannot create a ssh session"); + s->ssh->session = sess; + + const int verbosity = SSH_LOG_NOLOG; + ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity); + ssh_options_set(sess, SSH_OPTIONS_HOST, s->host); + ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport)); + /* TODO: Add SSH_OPTIONS_BINDADDR */ + ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username); + + if (s->ssh->server_hostkey_path) + ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path); + + if (s->ssh->client_privkey_path) + ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path); + + ssh_set_blocking(sess, 0); + + switch (sk_ssh_connect(s)) + { + case SSH_AGAIN: + break; + + case SSH_OK: + sk_ssh_connected(s); + break; + + case SSH_ERROR: + ERR2(ssh_get_error(sess)); + break; + } + + return ssh_get_fd(sess); + + err: + return -1; +} +#endif + /** * sk_open - open a socket * @s: socket @@ -1382,13 +1307,46 @@ sk_passive_connected(sock *s, int type) int sk_open(sock *s) { - int af = BIRD_AF; + int af = AF_UNSPEC; int fd = -1; int do_bind = 0; int bind_port = 0; ip_addr bind_addr = IPA_NONE; sockaddr sa; + if (s->type <= SK_IP) + { + /* + * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either + * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr). + * But the specifications have to be consistent. + */ + + switch (s->subtype) + { + case 0: + ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) || + (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr))); + af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6; + break; + + case SK_IPV4: + ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr)); + ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr)); + af = AF_INET; + break; + + case SK_IPV6: + ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr)); + ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr)); + af = AF_INET6; + break; + + default: + bug("Invalid subtype %d", s->subtype); + } + } + switch (s->type) { case SK_TCP_ACTIVE: @@ -1401,6 +1359,13 @@ sk_open(sock *s) do_bind = bind_port || ipa_nonzero(bind_addr); break; +#ifdef HAVE_LIBSSH + case SK_SSH_ACTIVE: + s->ttx = ""; /* Force s->ttx != s->tpos */ + fd = sk_open_ssh(s); + break; +#endif + case SK_UDP: fd = socket(af, SOCK_DGRAM, IPPROTO_UDP); bind_port = s->sport; @@ -1456,7 +1421,7 @@ sk_open(sock *s) if (sk_set_high_port(s) < 0) log(L_WARN "Socket error: %s%#m", s->err); - sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port); + sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port); if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) ERR2("bind"); } @@ -1468,7 +1433,7 @@ sk_open(sock *s) switch (s->type) { case SK_TCP_ACTIVE: - sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport); + sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport); if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0) sk_tcp_connected(s); else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && @@ -1481,6 +1446,7 @@ sk_open(sock *s) ERR2("listen"); break; + case SK_SSH_ACTIVE: case SK_MAGIC: break; @@ -1490,6 +1456,7 @@ sk_open(sock *s) if (!(s->flags & SKF_THREAD)) sk_insert(s); + return 0; err: @@ -1672,6 +1639,28 @@ sk_maybe_write(sock *s) reset_tx_buffer(s); return 1; +#ifdef HAVE_LIBSSH + case SK_SSH: + while (s->ttx != s->tpos) + { + e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx); + + if (e < 0) + { + s->err = ssh_get_error(s->ssh->session); + s->err_hook(s, ssh_get_error_code(s->ssh->session)); + + reset_tx_buffer(s); + /* EPIPE is just a connection close notification during TX */ + s->err_hook(s, (errno != EPIPE) ? errno : 0); + return -1; + } + s->ttx += e; + } + reset_tx_buffer(s); + return 1; +#endif + case SK_UDP: case SK_IP: { @@ -1696,6 +1685,7 @@ sk_maybe_write(sock *s) reset_tx_buffer(s); return 1; } + default: bug("sk_maybe_write: unknown socket type %d", s->type); } @@ -1775,6 +1765,64 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa, } */ +static void +call_rx_hook(sock *s, int size) +{ + if (s->rx_hook(s, size)) + { + /* We need to be careful since the socket could have been deleted by the hook */ + if (current_sock == s) + s->rpos = s->rbuf; + } +} + +#ifdef HAVE_LIBSSH +static int +sk_read_ssh(sock *s) +{ + ssh_channel rchans[2] = { s->ssh->channel, NULL }; + struct timeval timev = { 1, 0 }; + + if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR) + return 1; /* Try again */ + + if (ssh_channel_is_eof(s->ssh->channel) != 0) + { + /* The remote side is closing the connection */ + s->err_hook(s, 0); + return 0; + } + + if (rchans[0] == NULL) + return 0; /* No data is available on the socket */ + + const uint used_bytes = s->rpos - s->rbuf; + const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0); + if (read_bytes > 0) + { + /* Received data */ + s->rpos += read_bytes; + call_rx_hook(s, used_bytes + read_bytes); + return 1; + } + else if (read_bytes == 0) + { + if (ssh_channel_is_eof(s->ssh->channel) != 0) + { + /* The remote side is closing the connection */ + s->err_hook(s, 0); + } + } + else + { + s->err = ssh_get_error(s->ssh->session); + s->err_hook(s, ssh_get_error_code(s->ssh->session)); + } + + return 0; /* No data is available on the socket */ +} +#endif + /* sk_read() and sk_write() are called from BFD's event loop */ int @@ -1808,17 +1856,17 @@ sk_read(sock *s, int revents) else { s->rpos += c; - if (s->rx_hook(s, s->rpos - s->rbuf)) - { - /* We need to be careful since the socket could have been deleted by the hook */ - if (current_sock == s) - s->rpos = s->rbuf; - } + call_rx_hook(s, s->rpos - s->rbuf); return 1; } return 0; } +#ifdef HAVE_LIBSSH + case SK_SSH: + return sk_read_ssh(s); +#endif + case SK_MAGIC: return s->rx_hook(s, 0); @@ -1857,6 +1905,27 @@ sk_write(sock *s) return 0; } +#ifdef HAVE_LIBSSH + case SK_SSH_ACTIVE: + { + switch (sk_ssh_connect(s)) + { + case SSH_OK: + sk_ssh_connected(s); + break; + + case SSH_AGAIN: + return 1; + + case SSH_ERROR: + s->err = ssh_get_error(s->ssh->session); + s->err_hook(s, ssh_get_error_code(s->ssh->session)); + break; + } + return 0; + } +#endif + default: if (s->ttx != s->tpos && sk_maybe_write(s) > 0) { @@ -1868,6 +1937,12 @@ sk_write(sock *s) } } +int sk_is_ipv4(sock *s) +{ return s->af == AF_INET; } + +int sk_is_ipv6(sock *s) +{ return s->af == AF_INET6; } + void sk_err(sock *s, int revents) { @@ -1925,9 +2000,6 @@ io_update_time(void) struct timespec ts; int rv; - if (!clock_monotonic_available) - return; - /* * This is third time-tracking procedure (after update_times() above and * times_update() in BFD), dedicated to internal event log and latency @@ -1938,7 +2010,7 @@ io_update_time(void) if (rv < 0) die("clock_gettime: %m"); - last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000); + last_time = ts.tv_sec S + ts.tv_nsec NS; if (event_open) { @@ -2066,15 +2138,13 @@ volatile int async_shutdown_flag; void io_init(void) { - init_list(&near_timers); - init_list(&far_timers); init_list(&sock_list); init_list(&global_event_list); krt_io_init(); - init_times(); - update_times(); - boot_time = now; - srandom((int) now_real); + // XXX init_times(); + // XXX update_times(); + boot_time = current_time(); + srandom((uint) (current_real_time() TO_S)); } static int short_loops = 0; @@ -2083,9 +2153,9 @@ static int short_loops = 0; void io_loop(void) { - int poll_tout; - time_t tout; + int poll_tout, timeout; int nfds, events, pout; + timer *t; sock *s; node *n; int fdmax = 256; @@ -2094,19 +2164,20 @@ io_loop(void) watchdog_start1(); for(;;) { + times_update(&main_timeloop); events = ev_run_list(&global_event_list); - timers: - update_times(); - tout = tm_first_shot(); - if (tout <= now) - { - tm_shot(); - goto timers; - } - poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */ - + timers_fire(&main_timeloop); io_close_event(); + // FIXME + poll_tout = (events ? 0 : 3000); /* Time in milliseconds */ + if (t = timers_first(&main_timeloop)) + { + times_update(&main_timeloop); + timeout = (tm_remains(t) TO_MS) + 1; + poll_tout = MIN(poll_tout, timeout); + } + nfds = 0; WALK_LIST(n, sock_list) { @@ -2177,6 +2248,8 @@ io_loop(void) } if (pout) { + times_update(&main_timeloop); + /* guaranteed to be non-empty */ current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 6fe39fa9..9aac8668 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -8,16 +8,29 @@ CF_HDR -#include "lib/krt.h" +#include "sysdep/unix/krt.h" CF_DEFINES #define THIS_KRT ((struct krt_config *) this_proto) #define THIS_KIF ((struct kif_config *) this_proto) +#define KIF_IFACE ((struct kif_iface_config *) this_ipatt) + +static void +kif_set_preferred(ip_addr ip) +{ + if (ipa_is_ip4(ip)) + KIF_IFACE->pref_v4 = ip; + else if (!ipa_is_link_local(ip)) + KIF_IFACE->pref_v6 = ip; + else + KIF_IFACE->pref_ll = ip; +} CF_DECLS CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS) +CF_KEYWORDS(INTERFACE, PREFERRED) %type <i> kern_mp_limit @@ -27,11 +40,12 @@ CF_GRAMMAR CF_ADDTO(proto, kern_proto '}') -kern_proto_start: proto_start KERNEL { this_proto = krt_init_config($1); } +kern_proto_start: proto_start KERNEL { + this_proto = krt_init_config($1); +} ; CF_ADDTO(kern_proto, kern_proto_start proto_name '{') -CF_ADDTO(kern_proto, kern_proto proto_item ';') CF_ADDTO(kern_proto, kern_proto kern_item ';') kern_mp_limit: @@ -40,10 +54,12 @@ kern_mp_limit: ; kern_item: - PERSIST bool { THIS_KRT->persist = $2; } + proto_item + | proto_channel { this_proto->net_type = $1->net_type; } + | PERSIST bool { THIS_KRT->persist = $2; } | SCAN TIME expr { /* Scan time of 0 means scan on startup only */ - THIS_KRT->scan_time = $3; + THIS_KRT->scan_time = $3 S_; } | LEARN bool { THIS_KRT->learn = $2; @@ -52,7 +68,6 @@ kern_item: cf_error("Learning of kernel routes not supported on this platform"); #endif } - | DEVICE ROUTES bool { THIS_KRT->devroutes = $3; } | GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; } | MERGE PATHS bool kern_mp_limit { THIS_KRT->merge_paths = $3 ? $4 : 0; @@ -71,23 +86,42 @@ kif_proto_start: proto_start DEVICE { this_proto = kif_init_config($1); } ; CF_ADDTO(kif_proto, kif_proto_start proto_name '{') -CF_ADDTO(kif_proto, kif_proto proto_item ';') CF_ADDTO(kif_proto, kif_proto kif_item ';') kif_item: - SCAN TIME expr { + proto_item + | INTERFACE kif_iface + | SCAN TIME expr { /* Scan time of 0 means scan on startup only */ - THIS_KIF->scan_time = $3; - } - | PRIMARY text_or_none prefix_or_ipa { - struct kif_primary_item *kpi = cfg_alloc(sizeof (struct kif_primary_item)); - kpi->pattern = $2; - kpi->prefix = $3.addr; - kpi->pxlen = $3.len; - add_tail(&THIS_KIF->primary, &kpi->n); + THIS_KIF->scan_time = $3 S_; } ; +kif_iface_start: +{ + this_ipatt = cfg_allocz(sizeof(struct kif_iface_config)); + add_tail(&THIS_KIF->iface_list, NODE this_ipatt); + init_list(&this_ipatt->ipn_list); +} + +kif_iface_item: + PREFERRED ipa { kif_set_preferred($2); } + ; + +kif_iface_opts: + /* empty */ + | kif_iface_opts kif_iface_item ';' + ; + +kif_iface_opt_list: + /* empty */ + | '{' kif_iface_opts '}' + ; + +kif_iface: + kif_iface_start iface_patt_list_nopx kif_iface_opt_list; + + CF_ADDTO(dynamic_attr, KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_KRT_SOURCE); }) CF_ADDTO(dynamic_attr, KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_KRT_METRIC); }) diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 526c0cab..b4fb1967 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -56,9 +56,9 @@ #include "nest/route.h" #include "nest/protocol.h" #include "filter/filter.h" -#include "lib/timer.h" #include "conf/conf.h" #include "lib/string.h" +#include "lib/timer.h" #include "unix.h" #include "krt.h" @@ -75,7 +75,7 @@ void krt_io_init(void) { krt_pool = rp_new(&root_pool, "Kernel Syncer"); - krt_filter_lp = lp_new(krt_pool, 4080); + krt_filter_lp = lp_new_default(krt_pool); init_list(&krt_proto_list); krt_sys_io_init(); } @@ -87,7 +87,17 @@ krt_io_init(void) struct kif_proto *kif_proto; static struct kif_config *kif_cf; static timer *kif_scan_timer; -static bird_clock_t kif_last_shot; +static btime kif_last_shot; + +static struct kif_iface_config kif_default_iface = {}; + +struct kif_iface_config * +kif_get_iface_config(struct iface *iface) +{ + struct kif_config *cf = (void *) (kif_proto->p.cf); + struct kif_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL); + return ic ?: &kif_default_iface; +} static void kif_scan(timer *t) @@ -95,14 +105,14 @@ kif_scan(timer *t) struct kif_proto *p = t->data; KRT_TRACE(p, D_EVENTS, "Scanning interfaces"); - kif_last_shot = now; + kif_last_shot = current_time(); kif_do_scan(p); } static void kif_force_scan(void) { - if (kif_proto && kif_last_shot + 2 < now) + if (kif_proto && ((kif_last_shot + 2 S) < current_time())) { kif_scan(kif_scan_timer); tm_start(kif_scan_timer, ((struct kif_config *) kif_proto->p.cf)->scan_time); @@ -112,65 +122,14 @@ kif_force_scan(void) void kif_request_scan(void) { - if (kif_proto && kif_scan_timer->expires > now) - tm_start(kif_scan_timer, 1); -} - -static inline int -prefer_addr(struct ifa *a, struct ifa *b) -{ - int sa = a->scope > SCOPE_LINK; - int sb = b->scope > SCOPE_LINK; - - if (sa < sb) - return 0; - else if (sa > sb) - return 1; - else - return ipa_compare(a->ip, b->ip) < 0; -} - -static inline struct ifa * -find_preferred_ifa(struct iface *i, ip_addr prefix, ip_addr mask) -{ - struct ifa *a, *b = NULL; - - WALK_LIST(a, i->addrs) - { - if (!(a->flags & IA_SECONDARY) && - ipa_equal(ipa_and(a->ip, mask), prefix) && - (!b || prefer_addr(a, b))) - b = a; - } - - return b; -} - -struct ifa * -kif_choose_primary(struct iface *i) -{ - struct kif_config *cf = (struct kif_config *) (kif_proto->p.cf); - struct kif_primary_item *it; - struct ifa *a; - - WALK_LIST(it, cf->primary) - { - if (!it->pattern || patmatch(it->pattern, i->name)) - if (a = find_preferred_ifa(i, it->prefix, ipa_mkmask(it->pxlen))) - return a; - } - - if (a = kif_get_primary_ip(i)) - return a; - - return find_preferred_ifa(i, IPA_NONE, IPA_NONE); + if (kif_proto && (kif_scan_timer->expires > (current_time() + 1 S))) + tm_start(kif_scan_timer, 1 S); } - static struct proto * kif_init(struct proto_config *c) { - struct kif_proto *p = proto_new(c, sizeof(struct kif_proto)); + struct kif_proto *p = proto_new(c); kif_sys_init(p); return &p->p; @@ -185,10 +144,7 @@ kif_start(struct proto *P) kif_sys_start(p); /* Start periodic interface scanning */ - kif_scan_timer = tm_new(P->pool); - kif_scan_timer->hook = kif_scan; - kif_scan_timer->data = p; - kif_scan_timer->recurrent = KIF_CF->scan_time; + kif_scan_timer = tm_new_init(P->pool, kif_scan, p, KIF_CF->scan_time, 0); kif_scan(kif_scan_timer); tm_start(kif_scan_timer, KIF_CF->scan_time); @@ -224,15 +180,15 @@ kif_reconfigure(struct proto *p, struct proto_config *new) tm_start(kif_scan_timer, n->scan_time); } - if (!EMPTY_LIST(o->primary) || !EMPTY_LIST(n->primary)) + if (!EMPTY_LIST(o->iface_list) || !EMPTY_LIST(n->iface_list)) { /* This is hack, we have to update a configuration * to the new value just now, because it is used - * for recalculation of primary addresses. + * for recalculation of preferred addresses. */ p->cf = new; - ifa_recalc_all_primary_addresses(); + if_recalc_all_preferred_addresses(); } return 1; @@ -253,8 +209,8 @@ kif_init_config(int class) cf_error("Kernel device protocol already defined"); kif_cf = (struct kif_config *) proto_config_new(&proto_unix_iface, class); - kif_cf->scan_time = 60; - init_list(&kif_cf->primary); + kif_cf->scan_time = 60 S; + init_list(&kif_cf->iface_list); kif_sys_init_config(kif_cf); return (struct proto_config *) kif_cf; @@ -266,21 +222,17 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src) struct kif_config *d = (struct kif_config *) dest; struct kif_config *s = (struct kif_config *) src; - /* Shallow copy of everything (just scan_time currently) */ - proto_copy_rest(dest, src, sizeof(struct kif_config)); - - /* Copy primary addr list */ - cfg_copy_list(&d->primary, &s->primary, sizeof(struct kif_primary_item)); + /* Copy interface config list */ + cfg_copy_list(&d->iface_list, &s->iface_list, sizeof(struct kif_iface_config)); /* Fix sysdep parts */ kif_sys_copy_config(d, s); } - struct protocol proto_unix_iface = { .name = "Device", .template = "device%d", - .preference = DEF_PREF_DIRECT, + .proto_size = sizeof(struct kif_proto), .config_size = sizeof(struct kif_config), .preconfig = kif_preconfig, .init = kif_init, @@ -298,14 +250,14 @@ static inline void krt_trace_in(struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) - log(L_TRACE "%s: %I/%d: %s", p->p.name, e->net->n.prefix, e->net->n.pxlen, msg); + log(L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg); } static inline void krt_trace_in_rl(struct tbf *f, struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) - log_rl(f, L_TRACE "%s: %I/%d: %s", p->p.name, e->net->n.prefix, e->net->n.pxlen, msg); + log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg); } /* @@ -348,19 +300,15 @@ krt_learn_announce_update(struct krt_proto *p, rte *e) net *n = e->net; rta *aa = rta_clone(e->attrs); rte *ee = rte_get_temp(aa); - net *nn = net_get(p->p.table, n->n.prefix, n->n.pxlen); - ee->net = nn; ee->pflags = 0; - ee->pref = p->p.preference; ee->u.krt = e->u.krt; - rte_update(&p->p, nn, ee); + rte_update(&p->p, n->n.addr, ee); } static void krt_learn_announce_delete(struct krt_proto *p, net *n) { - n = net_find(p->p.table, n->n.prefix, n->n.pxlen); - rte_update(&p->p, n, NULL); + rte_update(&p->p, n->n.addr, NULL); } /* Called when alien route is discovered during scan */ @@ -368,7 +316,7 @@ static void krt_learn_scan(struct krt_proto *p, rte *e) { net *n0 = e->net; - net *n = net_get(&p->krt_table, n0->n.prefix, n0->n.pxlen); + net *n = net_get(&p->krt_table, n0->n.addr); rte *m, **mm; e->attrs = rta_lookup(e->attrs); @@ -412,9 +360,8 @@ krt_learn_prune(struct krt_proto *p) FIB_ITERATE_INIT(&fit, fib); again: - FIB_ITERATE_START(fib, &fit, f) + FIB_ITERATE_START(fib, &fit, net, n) { - net *n = (net *) f; rte *e, **ee, *best, **pbest, *old_best; /* @@ -455,8 +402,8 @@ again: if (old_best) krt_learn_announce_delete(p, n); - FIB_ITERATE_PUT(&fit, f); - fib_delete(fib, f); + FIB_ITERATE_PUT(&fit); + fib_delete(fib, n); goto again; } @@ -473,7 +420,7 @@ again: else DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); } - FIB_ITERATE_END(f); + FIB_ITERATE_END; p->reload = 0; } @@ -482,7 +429,7 @@ static void krt_learn_async(struct krt_proto *p, rte *e, int new) { net *n0 = e->net; - net *n = net_get(&p->krt_table, n0->n.prefix, n0->n.pxlen); + net *n = net_get(&p->krt_table, n0->n.addr); rte *g, **gg, *best, **bestp, *old_best; e->attrs = rta_lookup(e->attrs); @@ -559,7 +506,13 @@ static void krt_learn_init(struct krt_proto *p) { if (KRT_CF->learn) - rt_setup(p->p.pool, &p->krt_table, "Inherited", NULL); + { + struct rtable_config *cf = mb_allocz(p->p.pool, sizeof(struct rtable_config)); + cf->name = "Inherited"; + cf->addr_type = p->p.net_type; + + rt_setup(p->p.pool, &p->krt_table, cf); + } } static void @@ -588,12 +541,11 @@ krt_dump_attrs(rte *e) static void krt_flush_routes(struct krt_proto *p) { - struct rtable *t = p->p.table; + struct rtable *t = p->p.main_channel->table; KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); - FIB_WALK(&t->fib, f) + FIB_WALK(&t->fib, net, n) { - net *n = (net *) f; rte *e = n->routes; if (rte_is_valid(e) && (n->n.flags & KRF_INSTALLED)) { @@ -608,12 +560,12 @@ krt_flush_routes(struct krt_proto *p) static struct rte * krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa) { - struct announce_hook *ah = p->p.main_ahook; - struct filter *filter = ah->out_filter; + struct channel *c = p->p.main_channel; + struct filter *filter = c->out_filter; rte *rt; - if (p->p.accept_ra_types == RA_MERGED) - return rt_export_merged(ah, net, rt_free, tmpa, krt_filter_lp, 1); + if (c->ra_mode == RA_MERGED) + return rt_export_merged(c, net, rt_free, tmpa, krt_filter_lp, 1); rt = net->routes; *rt_free = NULL; @@ -654,17 +606,11 @@ krt_same_dest(rte *k, rte *e) if (ka->dest != ea->dest) return 0; - switch (ka->dest) - { - case RTD_ROUTER: - return ipa_equal(ka->gw, ea->gw); - case RTD_DEVICE: - return !strcmp(ka->iface->name, ea->iface->name); - case RTD_MULTIPATH: - return mpnh_same(ka->nexthops, ea->nexthops); - default: - return 1; - } + + if (ka->dest == RTD_UNICAST) + return nexthop_same(&(ka->nh), &(ea->nh)); + + return 1; } /* @@ -760,13 +706,12 @@ krt_got_route(struct krt_proto *p, rte *e) static void krt_prune(struct krt_proto *p) { - struct rtable *t = p->p.table; + struct rtable *t = p->p.main_channel->table; KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name); - FIB_WALK(&t->fib, f) + FIB_WALK(&t->fib, net, n) { - net *n = (net *) f; - int verdict = f->flags & KRF_VERDICT_MASK; + int verdict = n->n.flags & KRF_VERDICT_MASK; rte *new, *old, *rt_free = NULL; ea_list *tmpa = NULL; @@ -795,7 +740,7 @@ krt_prune(struct krt_proto *p) switch (verdict) { case KRF_CREATE: - if (new && (f->flags & KRF_INSTALLED)) + if (new && (n->n.flags & KRF_INSTALLED)) { krt_trace_in(p, new, "reinstalling"); krt_replace_rte(p, n, new, NULL, tmpa); @@ -822,7 +767,7 @@ krt_prune(struct krt_proto *p) if (rt_free) rte_free(rt_free); lp_flush(krt_filter_lp); - f->flags &= ~KRF_VERDICT_MASK; + n->n.flags &= ~KRF_VERDICT_MASK; } FIB_WALK_END; @@ -901,11 +846,11 @@ static void krt_scan_timer_start(struct krt_proto *p) { if (!krt_scan_count) - krt_scan_timer = tm_new_set(krt_pool, krt_scan, NULL, 0, KRT_CF->scan_time); + krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0); krt_scan_count++; - tm_start(krt_scan_timer, 1); + tm_start(krt_scan_timer, 1 S); } static void @@ -943,8 +888,8 @@ krt_scan(timer *t) static void krt_scan_timer_start(struct krt_proto *p) { - p->scan_timer = tm_new_set(p->p.pool, krt_scan, p, 0, KRT_CF->scan_time); - tm_start(p->scan_timer, 1); + p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0); + tm_start(p->scan_timer, 1 S); } static void @@ -1000,7 +945,7 @@ krt_store_tmp_attrs(rte *rt, struct ea_list *attrs) static int krt_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED) { - struct krt_proto *p = (struct krt_proto *) P; + // struct krt_proto *p = (struct krt_proto *) P; rte *e = *new; if (e->attrs->src->proto == P) @@ -1021,11 +966,6 @@ krt_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li return -1; } - if (!KRT_CF->devroutes && - (e->attrs->dest == RTD_DEVICE) && - (e->attrs->source != RTS_STATIC_DEVICE)) - return -1; - if (!krt_capable(e)) return -1; @@ -1033,7 +973,7 @@ krt_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li } static void -krt_rt_notify(struct proto *P, struct rtable *table UNUSED, net *net, +krt_rt_notify(struct proto *P, struct channel *ch UNUSED, net *net, rte *new, rte *old, struct ea_list *eattrs) { struct krt_proto *p = (struct krt_proto *) P; @@ -1067,10 +1007,10 @@ krt_if_notify(struct proto *P, uint flags, struct iface *iface UNUSED) krt_scan_timer_kick(p); } -static int -krt_reload_routes(struct proto *P) +static void +krt_reload_routes(struct channel *C) { - struct krt_proto *p = (struct krt_proto *) P; + struct krt_proto *p = (void *) C->proto; /* Although we keep learned routes in krt_table, we rather schedule a scan */ @@ -1079,14 +1019,12 @@ krt_reload_routes(struct proto *P) p->reload = 1; krt_scan_timer_kick(p); } - - return 1; } static void -krt_feed_end(struct proto *P) +krt_feed_end(struct channel *C) { - struct krt_proto *p = (struct krt_proto *) P; + struct krt_proto *p = (void *) C->proto; p->ready = 1; krt_scan_timer_kick(p); @@ -1107,14 +1045,49 @@ krt_rte_same(rte *a, rte *b) struct krt_config *krt_cf; +static void +krt_preconfig(struct protocol *P UNUSED, struct config *c) +{ + krt_cf = NULL; + krt_sys_preconfig(c); +} + +static void +krt_postconfig(struct proto_config *CF) +{ + struct krt_config *cf = (void *) CF; + + if (EMPTY_LIST(CF->channels)) + cf_error("Channel not specified"); + +#ifdef CONFIG_ALL_TABLES_AT_ONCE + if (krt_cf->scan_time != cf->scan_time) + cf_error("All kernel syncers must use the same table scan interval"); +#endif + + struct channel_config *cc = proto_cf_main_channel(CF); + struct rtable_config *tab = cc->table; + if (tab->krt_attached) + cf_error("Kernel syncer (%s) already attached to table %s", tab->krt_attached->name, tab->name); + tab->krt_attached = CF; + + if (cf->merge_paths) + { + cc->ra_mode = RA_MERGED; + cc->merge_limit = cf->merge_paths; + } + + krt_sys_postconfig(cf); +} + static struct proto * -krt_init(struct proto_config *C) +krt_init(struct proto_config *CF) { - struct krt_proto *p = proto_new(C, sizeof(struct krt_proto)); - struct krt_config *c = (struct krt_config *) C; + struct krt_proto *p = proto_new(CF); + // struct krt_config *cf = (void *) CF; + + p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(CF)); - p->p.accept_ra_types = c->merge_paths ? RA_MERGED : RA_OPTIMAL; - p->p.merge_limit = c->merge_paths; p->p.import_control = krt_import_control; p->p.rt_notify = krt_rt_notify; p->p.if_notify = krt_if_notify; @@ -1133,6 +1106,17 @@ krt_start(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; + switch (p->p.net_type) + { + case NET_IP4: p->af = AF_INET; break; + case NET_IP6: p->af = AF_INET6; break; + case NET_IP6_SADR: p->af = AF_INET6; break; +#ifdef AF_MPLS + case NET_MPLS: p->af = AF_MPLS; break; +#endif + default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break; + } + add_tail(&krt_proto_list, &p->krt_node); #ifdef KRT_ALLOW_LEARN @@ -1147,8 +1131,8 @@ krt_start(struct proto *P) krt_scan_timer_start(p); - if (P->gr_recovery && KRT_CF->graceful_restart) - P->gr_wait = 1; + if (p->p.gr_recovery && KRT_CF->graceful_restart) + p->p.main_channel->gr_wait = 1; return PS_UP; } @@ -1177,40 +1161,19 @@ krt_shutdown(struct proto *P) } static int -krt_reconfigure(struct proto *p, struct proto_config *new) +krt_reconfigure(struct proto *p, struct proto_config *CF) { - struct krt_config *o = (struct krt_config *) p->cf; - struct krt_config *n = (struct krt_config *) new; + struct krt_config *o = (void *) p->cf; + struct krt_config *n = (void *) CF; + + if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF))) + return 0; if (!krt_sys_reconfigure((struct krt_proto *) p, n, o)) return 0; /* persist, graceful restart need not be the same */ - return o->scan_time == n->scan_time && o->learn == n->learn && - o->devroutes == n->devroutes && o->merge_paths == n->merge_paths; -} - -static void -krt_preconfig(struct protocol *P UNUSED, struct config *c) -{ - krt_cf = NULL; - krt_sys_preconfig(c); -} - -static void -krt_postconfig(struct proto_config *C) -{ - struct krt_config *c = (struct krt_config *) C; - -#ifdef CONFIG_ALL_TABLES_AT_ONCE - if (krt_cf->scan_time != c->scan_time) - cf_error("All kernel syncers must use the same table scan interval"); -#endif - - if (C->table->krt_attached) - cf_error("Kernel syncer (%s) already attached to table %s", C->table->krt_attached->name, C->table->name); - C->table->krt_attached = C; - krt_sys_postconfig(c); + return o->scan_time == n->scan_time && o->learn == n->learn; } struct proto_config * @@ -1222,7 +1185,7 @@ krt_init_config(int class) #endif krt_cf = (struct krt_config *) proto_config_new(&proto_unix_kernel, class); - krt_cf->scan_time = 60; + krt_cf->scan_time = 60 S; krt_sys_init_config(krt_cf); return (struct proto_config *) krt_cf; @@ -1234,9 +1197,6 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src) struct krt_config *d = (struct krt_config *) dest; struct krt_config *s = (struct krt_config *) src; - /* Shallow copy of everything */ - proto_copy_rest(dest, src, sizeof(struct krt_config)); - /* Fix sysdep parts */ krt_sys_copy_config(d, s); } @@ -1260,11 +1220,25 @@ krt_get_attr(eattr *a, byte *buf, int buflen) } +#ifdef CONFIG_IP6_SADR_KERNEL +#define MAYBE_IP6_SADR NB_IP6_SADR +#else +#define MAYBE_IP6_SADR 0 +#endif + +#ifdef HAVE_MPLS_KERNEL +#define MAYBE_MPLS NB_MPLS +#else +#define MAYBE_MPLS 0 +#endif + struct protocol proto_unix_kernel = { .name = "Kernel", .template = "kernel%d", .attr_class = EAP_KRT, .preference = DEF_PREF_INHERITED, + .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS, + .proto_size = sizeof(struct krt_proto), .config_size = sizeof(struct krt_config), .preconfig = krt_preconfig, .postconfig = krt_postconfig, diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index d4a8717e..b627882d 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -15,7 +15,9 @@ struct krt_proto; struct kif_config; struct kif_proto; -#include "lib/krt-sys.h" +#include "nest/iface.h" +#include "sysdep/config.h" +#include CONFIG_INCLUDE_KRTSYS_H /* Flags stored in net->n.flags, rest are in nest/route.h */ @@ -44,10 +46,9 @@ extern struct protocol proto_unix_kernel; struct krt_config { struct proto_config c; struct krt_params sys; /* Sysdep params */ + btime scan_time; /* How often we re-scan routes */ int persist; /* Keep routes when we exit */ - int scan_time; /* How often we re-scan routes */ int learn; /* Learn routes from other sources */ - int devroutes; /* Allow export of device routes */ int graceful_restart; /* Regard graceful restart recovery */ int merge_paths; /* Exported routes are merged for ECMP */ }; @@ -65,6 +66,7 @@ struct krt_proto { #endif node krt_node; /* Node in krt_proto_list */ + byte af; /* Kernel address family (AF_*) */ byte ready; /* Initial feed has been finished */ byte initialized; /* First scan has been finished */ byte reload; /* Next scan is doing reload */ @@ -93,18 +95,20 @@ void krt_got_route_async(struct krt_proto *p, struct rte *e, int new); extern struct protocol proto_unix_iface; -struct kif_primary_item { - node n; - byte *pattern; - ip_addr prefix; - int pxlen; -}; - struct kif_config { struct proto_config c; struct kif_params sys; /* Sysdep params */ - int scan_time; /* How often we re-scan interfaces */ - list primary; /* Preferences for primary addresses (struct kif_primary_item) */ + + list iface_list; /* List of iface configs (struct kif_iface_config) */ + btime scan_time; /* How often we re-scan interfaces */ +}; + +struct kif_iface_config { + struct iface_patt i; + + ip_addr pref_v4; + ip_addr pref_v6; + ip_addr pref_ll; }; struct kif_proto { @@ -112,10 +116,11 @@ struct kif_proto { struct kif_state sys; /* Sysdep state */ }; -struct kif_proto *kif_proto; +extern struct kif_proto *kif_proto; #define KIF_CF ((struct kif_config *)p->p.cf) +struct kif_iface_config * kif_get_iface_config(struct iface *iface); struct proto_config * krt_init_config(int class); @@ -150,6 +155,6 @@ void kif_sys_copy_config(struct kif_config *, struct kif_config *); void kif_do_scan(struct kif_proto *); -struct ifa *kif_get_primary_ip(struct iface *i); +int kif_update_sysdep_addr(struct iface *i); #endif diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 88a7188c..f9dccc39 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -27,7 +27,7 @@ #include "nest/mrtdump.h" #include "lib/string.h" #include "lib/lists.h" -#include "lib/unix.h" +#include "sysdep/unix/unix.h" static FILE *dbgf; static list *current_log_list; @@ -120,7 +120,7 @@ log_commit(int class, buffer *buf) else { byte tbuf[TM_DATETIME_BUFFER_SIZE]; - tm_format_datetime(tbuf, &config->tf_log, now); + tm_format_real_time(tbuf, config->tf_log.fmt1, current_real_time()); fprintf(l->fh, "%s <%s> ", tbuf, class_names[class]); } fputs(buf->start, l->fh); @@ -180,19 +180,18 @@ log_msg(const char *msg, ...) void log_rl(struct tbf *f, const char *msg, ...) { - int last_hit = f->mark; int class = 1; va_list args; /* Rate limiting is a bit tricky here as it also logs '...' during the first hit */ - if (tbf_limit(f) && last_hit) + if (tbf_limit(f) && (f->drop > 1)) return; if (*msg >= 1 && *msg <= 8) class = *msg++; va_start(args, msg); - vlog(class, (f->mark ? "..." : msg), args); + vlog(class, (f->drop ? "..." : msg), args); va_end(args); } @@ -332,7 +331,7 @@ void mrt_dump_message(struct proto *p, u16 type, u16 subtype, byte *buf, u32 len) { /* Prepare header */ - put_u32(buf+0, now_real); + put_u32(buf+0, current_real_time() TO_S); put_u16(buf+4, type); put_u16(buf+6, subtype); put_u32(buf+8, len - MRTDUMP_HDR_LENGTH); diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 8aa19fce..2251d3fb 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -27,6 +27,7 @@ #include "lib/resource.h" #include "lib/socket.h" #include "lib/event.h" +#include "lib/timer.h" #include "lib/string.h" #include "nest/route.h" #include "nest/protocol.h" @@ -56,7 +57,7 @@ async_dump(void) rdump(&root_pool); sk_dump_all(); - tm_dump_all(); + // XXXX tm_dump_all(); if_dump_all(); neigh_dump_all(); rta_dump_all(); @@ -71,7 +72,7 @@ async_dump(void) */ #ifdef CONFIG_RESTRICTED_PRIVILEGES -#include "lib/syspriv.h" +#include CONFIG_INCLUDE_SYSPRIV_H #else static inline void @@ -302,7 +303,7 @@ cmd_reconfig_undo_notify(void) } void -cmd_reconfig(char *name, int type, int timeout) +cmd_reconfig(char *name, int type, uint timeout) { if (cli_access_restricted()) return; @@ -571,6 +572,10 @@ sysdep_shutdown_done(void) * Signals */ +volatile int async_config_flag; +volatile int async_dump_flag; +volatile int async_shutdown_flag; + static void handle_sighup(int sig UNUSED) { @@ -815,12 +820,14 @@ main(int argc, char **argv) log_init_debug(""); log_switch(debug_flag, NULL, NULL); + net_init(); resource_init(); + timer_init(); olock_init(); io_init(); rt_init(); if_init(); - roa_init(); +// roa_init(); config_init(); uid_t use_uid = get_uid(use_user); diff --git a/sysdep/unix/timer.h b/sysdep/unix/timer.h deleted file mode 100644 index aa3ed143..00000000 --- a/sysdep/unix/timer.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * BIRD -- Unix Timers - * - * (c) 1998 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_TIMER_H_ -#define _BIRD_TIMER_H_ - -#include <time.h> - -#include "lib/resource.h" - -typedef time_t bird_clock_t; /* Use instead of time_t */ - -typedef struct timer { - resource r; - void (*hook)(struct timer *); - void *data; - uint randomize; /* Amount of randomization */ - uint recurrent; /* Timer recurrence */ - node n; /* Internal link */ - bird_clock_t expires; /* 0=inactive */ -} timer; - -timer *tm_new(pool *); -void tm_start(timer *, uint after); -void tm_stop(timer *); -void tm_dump_all(void); - -extern bird_clock_t now; /* Relative, monotonic time in seconds */ -extern bird_clock_t now_real; /* Time in seconds since fixed known epoch */ -extern bird_clock_t boot_time; - -static inline int -tm_active(timer *t) -{ - return t->expires != 0; -} - -static inline bird_clock_t -tm_remains(timer *t) -{ - return t->expires ? t->expires - now : 0; -} - -static inline void -tm_start_max(timer *t, bird_clock_t after) -{ - bird_clock_t rem = tm_remains(t); - tm_start(t, (rem > after) ? rem : after); -} - -static inline timer * -tm_new_set(pool *p, void (*hook)(struct timer *), void *data, uint rand, uint rec) -{ - timer *t = tm_new(p); - t->hook = hook; - t->data = data; - t->randomize = rand; - t->recurrent = rec; - return t; -} - - -struct timeformat { - char *fmt1, *fmt2; - bird_clock_t limit; -}; - -bird_clock_t tm_parse_date(char *); /* Convert date to bird_clock_t */ -bird_clock_t tm_parse_datetime(char *); /* Convert date to bird_clock_t */ - -#define TM_DATETIME_BUFFER_SIZE 32 /* Buffer size required by tm_format_datetime */ -void -tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t); - -#define TIME_T_IS_64BIT (sizeof(time_t) == 8) -#define TIME_T_IS_SIGNED ((time_t) -1 < 0) - -#define TIME_INFINITY \ - ((time_t) (TIME_T_IS_SIGNED ? \ - (TIME_T_IS_64BIT ? 0x7fffffffffffffff : 0x7fffffff): \ - (TIME_T_IS_64BIT ? 0xffffffffffffffff : 0xffffffff))) - -#endif diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h index 3ef2e3ef..cb12fad8 100644 --- a/sysdep/unix/unix.h +++ b/sysdep/unix/unix.h @@ -22,7 +22,7 @@ void async_config(void); void async_dump(void); void async_shutdown(void); void cmd_check_config(char *name); -void cmd_reconfig(char *name, int type, int timeout); +void cmd_reconfig(char *name, int type, uint timeout); void cmd_reconfig_confirm(void); void cmd_reconfig_undo(void); void cmd_shutdown(void); @@ -47,14 +47,6 @@ typedef struct sockaddr_bird { } sockaddr; -#ifdef IPV6 -#define BIRD_AF AF_INET6 -#define ipa_from_sa(x) ipa_from_sa6(x) -#else -#define BIRD_AF AF_INET -#define ipa_from_sa(x) ipa_from_sa4(x) -#endif - /* This is sloppy hack, it should be detected by configure script */ /* Linux systems have it defined so this is definition for BSD systems */ @@ -63,29 +55,36 @@ typedef struct sockaddr_bird { #endif -static inline ip_addr ipa_from_in4(struct in_addr a UNUSED6) +static inline ip_addr ipa_from_in4(struct in_addr a) { return ipa_from_u32(ntohl(a.s_addr)); } -static inline ip_addr ipa_from_in6(struct in6_addr a UNUSED4) +static inline ip_addr ipa_from_in6(struct in6_addr a) { return ipa_build6(ntohl(a.s6_addr32[0]), ntohl(a.s6_addr32[1]), ntohl(a.s6_addr32[2]), ntohl(a.s6_addr32[3])); } -static inline ip_addr ipa_from_sa4(sockaddr *sa UNUSED6) +static inline ip_addr ipa_from_sa4(sockaddr *sa) { return ipa_from_in4(((struct sockaddr_in *) sa)->sin_addr); } -static inline ip_addr ipa_from_sa6(sockaddr *sa UNUSED4) +static inline ip_addr ipa_from_sa6(sockaddr *sa) { return ipa_from_in6(((struct sockaddr_in6 *) sa)->sin6_addr); } +static inline ip_addr ipa_from_sa(sockaddr *sa) +{ + switch (sa->sa.sa_family) + { + case AF_INET: return ipa_from_sa4(sa); + case AF_INET6: return ipa_from_sa6(sa); + default: return IPA_NONE; + } +} + static inline struct in_addr ipa_to_in4(ip_addr a) { return (struct in_addr) { htonl(ipa_to_u32(a)) }; } -#ifdef IPV6 +static inline struct in_addr ip4_to_in4(ip4_addr a) +{ return (struct in_addr) { htonl(ip4_to_u32(a)) }; } + static inline struct in6_addr ipa_to_in6(ip_addr a) { return (struct in6_addr) { .s6_addr32 = { htonl(_I0(a)), htonl(_I1(a)), htonl(_I2(a)), htonl(_I3(a)) } }; } -#else -/* Temporary dummy */ -static inline struct in6_addr ipa_to_in6(ip_addr a UNUSED) -{ return (struct in6_addr) { .s6_addr32 = { 0, 0, 0, 0 } }; } -#endif void sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port); int sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port); @@ -95,9 +94,9 @@ int sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *po #define SUN_LEN(ptr) ((size_t) (((struct sockaddr_un *) 0)->sun_path) + strlen ((ptr)->sun_path)) #endif -volatile int async_config_flag; -volatile int async_dump_flag; -volatile int async_shutdown_flag; +extern volatile int async_config_flag; +extern volatile int async_dump_flag; +extern volatile int async_shutdown_flag; void io_init(void); void io_loop(void); @@ -106,7 +105,6 @@ int sk_open_unix(struct birdsock *s, char *name); void *tracked_fopen(struct pool *, char *name, char *mode); void test_old_bird(char *path); - /* krt.c bits */ void krt_io_init(void); diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 00000000..2cee9234 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,3 @@ +src := birdtest.c bt-utils.c +obj := $(src-o-files) +tests_objs := $(tests_objs) $(src-o-files) diff --git a/test/birdtest.c b/test/birdtest.c new file mode 100644 index 00000000..a4312e9b --- /dev/null +++ b/test/birdtest.c @@ -0,0 +1,502 @@ +/* + * BIRD -- Unit Test Framework (BIRD Test) + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> + +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/wait.h> + +#include "test/birdtest.h" +#include "lib/string.h" + +#ifdef HAVE_EXECINFO_H +#include <execinfo.h> +#endif + +#define BACKTRACE_MAX_LINES 100 + +#define sprintf_concat(s, format, ...) \ + snprintf(s + strlen(s), sizeof(s) - strlen(s), format, ##__VA_ARGS__) + +static const char *request; +static int list_tests; +static int do_core; +static int no_fork; +static int no_timeout; +static int is_terminal; /* Whether stdout is a live terminal or pipe redirect */ + +uint bt_verbose; +const char *bt_filename; +const char *bt_test_id; + +int bt_result; /* Overall program run result */ +int bt_suite_result; /* One suit result */ +char bt_out_fmt_buf[1024]; /* Temporary memory buffer for output of testing function */ + +long int +bt_random(void) +{ + /* Seeded in bt_init() */ + long int rand_low, rand_high; + + rand_low = random(); + rand_high = random(); + return (rand_low & 0xffff) | ((rand_high & 0xffff) << 16); +} + +void +bt_init(int argc, char *argv[]) +{ + int c; + + srandom(BT_RANDOM_SEED); + + bt_verbose = 0; + bt_filename = argv[0]; + bt_result = 1; + bt_test_id = NULL; + is_terminal = isatty(fileno(stdout)); + + while ((c = getopt(argc, argv, "lcftv")) >= 0) + switch (c) + { + case 'l': + list_tests = 1; + break; + + case 'c': + do_core = 1; + break; + + case 'f': + no_fork = 1; + break; + + case 't': + no_timeout = 1; + break; + + case 'v': + bt_verbose++; + break; + + default: + goto usage; + } + + /* Optional requested test_id */ + if ((optind + 1) == argc) + request = argv[optind++]; + + if (optind != argc) + goto usage; + + if (do_core) + { + struct rlimit rl = {RLIM_INFINITY, RLIM_INFINITY}; + int rv = setrlimit(RLIMIT_CORE, &rl); + bt_syscall(rv < 0, "setrlimit RLIMIT_CORE"); + } + + return; + + usage: + printf("Usage: %s [-l] [-c] [-f] [-t] [-vvv] [<test_suit_name>]\n", argv[0]); + printf("Options: \n"); + printf(" -l List all test suite names and descriptions \n"); + printf(" -c Force unlimit core dumps (needs root privileges) \n"); + printf(" -f No forking \n"); + printf(" -t No timeout limit \n"); + printf(" -v More verbosity, maximum is 3 -vvv \n"); + exit(3); +} + +static void +bt_dump_backtrace(void) +{ +#ifdef HAVE_EXECINFO_H + void *buf[BACKTRACE_MAX_LINES]; + char **pp_backtrace; + int lines, j; + + if (!bt_verbose) + return; + + lines = backtrace(buf, BACKTRACE_MAX_LINES); + bt_log("backtrace() returned %d addresses", lines); + + pp_backtrace = backtrace_symbols(buf, lines); + if (pp_backtrace == NULL) + { + perror("backtrace_symbols"); + exit(EXIT_FAILURE); + } + + for (j = 0; j < lines; j++) + bt_log("%s", pp_backtrace[j]); + + free(pp_backtrace); +#endif /* HAVE_EXECINFO_H */ +} + +static +int bt_run_test_fn(int (*fn)(const void *), const void *fn_arg, int timeout) +{ + int result; + alarm(timeout); + + if (fn_arg) + result = fn(fn_arg); + else + result = ((int (*)(void))fn)(); + + if (!bt_suite_result) + result = 0; + + return result; +} + +static uint +get_num_terminal_cols(void) +{ + struct winsize w = {}; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + uint cols = w.ws_col; + return (cols > 0 ? cols : 80); +} + +/** + * bt_log_result - pretty print of test result + * @result: 1 or 0 + * @fmt: a description message (could be long, over more lines) + * @argptr: variable argument list + * + * This function is used for pretty printing of test results on all verbose + * levels. + */ +static void +bt_log_result(int result, const char *fmt, va_list argptr) +{ + static char msg_buf[BT_BUFFER_SIZE]; + char *pos; + + snprintf(msg_buf, sizeof(msg_buf), "%s%s%s%s", + bt_filename, + bt_test_id ? ": " : "", + bt_test_id ? bt_test_id : "", + (fmt && strlen(fmt) > 0) ? ": " : ""); + pos = msg_buf + strlen(msg_buf); + + vsnprintf(pos, sizeof(msg_buf) - (pos - msg_buf), fmt, argptr); + + int chrs = 0; + for (uint i = 0; i < strlen(msg_buf); i += get_num_terminal_cols()) + { + if (i) + printf("\n"); + char *stop = msg_buf + i + get_num_terminal_cols(); + char backup = *stop; + *stop = 0; + chrs = printf("%s", msg_buf + i); + *stop = backup; + } + + int offset = get_num_terminal_cols() - chrs - BT_PROMPT_OK_FAIL_STRLEN; + if (offset < 0) + { + printf("\n"); + offset = get_num_terminal_cols() - BT_PROMPT_OK_FAIL_STRLEN; + } + + for (int i = 0; i < offset; i++) + putchar(' '); + + const char *result_str = is_terminal ? BT_PROMPT_OK : BT_PROMPT_OK_NO_COLOR; + if (!result) + result_str = is_terminal ? BT_PROMPT_FAIL : BT_PROMPT_FAIL_NO_COLOR; + + printf("%s\n", result_str); +} + +/** + * bt_log_overall_result - pretty print of suite case result + * @result: 1 or 0 + * @fmt: a description message (could be long, over more lines) + * ...: variable argument list + * + * This function is used for pretty printing of test suite case result. + */ +static void +bt_log_overall_result(int result, const char *fmt, ...) +{ + va_list argptr; + va_start(argptr, fmt); + bt_log_result(result, fmt, argptr); + va_end(argptr); +} + +/** + * bt_log_suite_result - pretty print of suite case result + * @result: 1 or 0 + * @fmt: a description message (could be long, over more lines) + * ...: variable argument list + * + * This function is used for pretty printing of test suite case result. + */ +void +bt_log_suite_result(int result, const char *fmt, ...) +{ + if(bt_verbose >= BT_VERBOSE_SUITE || !result) + { + va_list argptr; + va_start(argptr, fmt); + bt_log_result(result, fmt, argptr); + va_end(argptr); + } +} + +/** + * bt_log_suite_case_result - pretty print of suite result + * @result: 1 or 0 + * @fmt: a description message (could be long, over more lines) + * ...: variable argument list + * + * This function is used for pretty printing of test suite result. + */ +void +bt_log_suite_case_result(int result, const char *fmt, ...) +{ + if(bt_verbose >= BT_VERBOSE_SUITE_CASE) + { + va_list argptr; + va_start(argptr, fmt); + bt_log_result(result, fmt, argptr); + va_end(argptr); + } +} + +int +bt_test_suite_base(int (*fn)(const void *), const char *id, const void *fn_arg, int forked, int timeout, const char *dsc, ...) +{ + if (list_tests) + { + printf("%28s - ", id); + va_list args; + va_start(args, dsc); + vprintf(dsc, args); + va_end(args); + printf("\n"); + return 1; + } + + if (no_fork) + forked = 0; + + if (no_timeout) + timeout = 0; + + if (request && strcmp(id, request)) + return 1; + + bt_suite_result = 1; + bt_test_id = id; + + if (bt_verbose >= BT_VERBOSE_ABSOLUTELY_ALL) + bt_log("Starting"); + + if (!forked) + { + bt_suite_result = bt_run_test_fn(fn, fn_arg, timeout); + } + else + { + pid_t pid = fork(); + bt_syscall(pid < 0, "fork"); + + if (pid == 0) + { + /* child of fork */ + _exit(bt_run_test_fn(fn, fn_arg, timeout)); + } + + int s; + int rv = waitpid(pid, &s, 0); + bt_syscall(rv < 0, "waitpid"); + + if (WIFEXITED(s)) + { + /* Normal exit */ + bt_suite_result = WEXITSTATUS(s); + } + else if (WIFSIGNALED(s)) + { + /* Stopped by signal */ + bt_suite_result = 0; + + int sn = WTERMSIG(s); + if (sn == SIGALRM) + { + bt_log("Timeout expired"); + } + else if (sn == SIGSEGV) + { + bt_log("Segmentation fault"); + bt_dump_backtrace(); + } + else if (sn != SIGABRT) + bt_log("Signal %d received", sn); + } + + if (WCOREDUMP(s) && bt_verbose) + bt_log("Core dumped"); + } + + if (!bt_suite_result) + bt_result = 0; + + bt_log_suite_result(bt_suite_result, NULL); + bt_test_id = NULL; + + return bt_suite_result; +} + +int +bt_exit_value(void) +{ + if (!list_tests || (list_tests && !bt_result)) + bt_log_overall_result(bt_result, ""); + return bt_result ? EXIT_SUCCESS : EXIT_FAILURE; +} + +/** + * bt_assert_batch__ - test a batch of inputs/outputs tests + * @opts: includes all necessary data + * + * Should be called using macro bt_assert_batch(). + * Returns 1 or 0. + */ +int +bt_assert_batch__(struct bt_batch *opts) +{ + int i; + for (i = 0; i < opts->ndata; i++) + { + int bt_suit_case_result = opts->test_fn(opts->out_buf, opts->data[i].in, opts->data[i].out); + + if (bt_suit_case_result == 0) + bt_suite_result = 0; + + char b[BT_BUFFER_SIZE]; + snprintf(b, sizeof(b), "%s(", opts->test_fn_name); + + opts->in_fmt(b+strlen(b), sizeof(b)-strlen(b), opts->data[i].in); + sprintf_concat(b, ") gives "); + opts->out_fmt(b+strlen(b), sizeof(b)-strlen(b), opts->out_buf); + + if (bt_suit_case_result == 0) + { + sprintf_concat(b, ", but expecting is "); + opts->out_fmt(b+strlen(b), sizeof(b)-strlen(b), opts->data[i].out); + } + + bt_log_suite_case_result(bt_suit_case_result, "%s", b); + } + + return bt_suite_result; +} + +/** + * bt_fmt_str - formating string into output buffer + * @buf: buffer for write + * @size: empty size in @buf + * @data: null-byte terminated string + * + * This function can be used with bt_assert_batch() function. + * Input @data should be const char * string. + */ +void +bt_fmt_str(char *buf, size_t size, const void *data) +{ + const byte *s = data; + + snprintf(buf, size, "\""); + while (*s) + { + snprintf(buf+strlen(buf), size-strlen(buf), bt_is_char(*s) ? "%c" : "\\%03u", *s); + s++; + } + snprintf(buf+strlen(buf), size-strlen(buf), "\""); +} + +/** + * bt_fmt_unsigned - formating unsigned int into output buffer + * @buf: buffer for write + * @size: empty size in @buf + * @data: unsigned number + * + * This function can be used with bt_assert_batch() function. + */ +void +bt_fmt_unsigned(char *buf, size_t size, const void *data) +{ + const uint *n = data; + snprintf(buf, size, "0x%x (%u)", *n, *n); +} + +/** + * bt_fmt_ipa - formating ip_addr into output buffer + * @buf: buffer for write + * @size: empty size in @buf + * @data: should be struct ip_addr * + * + * This function can be used with bt_assert_batch() function. + */ +void +bt_fmt_ipa(char *buf, size_t size, const void *data) +{ + const ip_addr *ip = data; + bsnprintf(buf, size, "%I", *ip); +} + +int +bt_is_char(byte c) +{ + return (c >= (byte) 32 && c <= (byte) 126); +} + +/* + * Mock-ups of all necessary public functions in main.c + */ + +char *bird_name; +void async_config(void) {} +void async_dump(void) {} +void async_shutdown(void) {} +void cmd_check_config(char *name UNUSED) {} +void cmd_reconfig(char *name UNUSED, int type UNUSED, int timeout UNUSED) {} +void cmd_reconfig_confirm(void) {} +void cmd_reconfig_undo(void) {} +void cmd_shutdown(void) {} +void cmd_reconfig_undo_notify(void) {} + +#include "nest/bird.h" +#include "lib/net.h" +#include "conf/conf.h" +void sysdep_preconfig(struct config *c UNUSED) {} +int sysdep_commit(struct config *new UNUSED, struct config *old UNUSED) { return 0; } +void sysdep_shutdown_done(void) {} + +#include "nest/cli.h" +int cli_get_command(cli *c UNUSED) { return 0; } +void cli_write_trigger(cli *c UNUSED) {} +cli *cmd_reconfig_stored_cli; diff --git a/test/birdtest.h b/test/birdtest.h new file mode 100644 index 00000000..4443bfc1 --- /dev/null +++ b/test/birdtest.h @@ -0,0 +1,179 @@ +/* + * BIRD -- Unit Test Framework (BIRD Test) + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRDTEST_H_ +#define _BIRDTEST_H_ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <errno.h> +#include <sys/types.h> + +#include "nest/bird.h" + + +extern int bt_result; +extern int bt_suite_result; +extern char bt_out_fmt_buf[1024]; + +extern uint bt_verbose; +#define BT_VERBOSE_NO 0 +#define BT_VERBOSE_SUITE 1 +#define BT_VERBOSE_SUITE_CASE 2 +#define BT_VERBOSE_ABSOLUTELY_ALL 3 + +extern const char *bt_filename; +extern const char *bt_test_id; + +void bt_init(int argc, char *argv[]); +int bt_exit_value(void); +int bt_test_suite_base(int (*test_fn)(const void *), const char *test_id, const void *test_fn_argument, int forked, int timeout, const char *dsc, ...); +long int bt_random(void); + +void bt_log_suite_result(int result, const char *fmt, ...); +void bt_log_suite_case_result(int result, const char *fmt, ...); + +#define BT_TIMEOUT 5 /* Default timeout in seconds */ +#define BT_FORKING 1 /* Forking is enabled in default */ + +#define BT_RANDOM_SEED 982451653 + +#define BT_BUFFER_SIZE 10000 + +#define BT_PROMPT_GREEN "\e[1;32m" +#define BT_PROMPT_RED "\e[1;31m" +#define BT_PROMPT_NORMAL "\e[0m" +#define BT_PROMPT_OK " [" BT_PROMPT_GREEN " OK " BT_PROMPT_NORMAL "] " +#define BT_PROMPT_OK_NO_COLOR " [" " OK " "] " +#define BT_PROMPT_FAIL " [" BT_PROMPT_RED "FAIL" BT_PROMPT_NORMAL "] " +#define BT_PROMPT_FAIL_NO_COLOR " [" "FAIL" "] " +#define BT_PROMPT_OK_FAIL_STRLEN 8 /* strlen ' [FAIL] ' */ + +#define bt_test_suite(fn, dsc, ...) \ + bt_test_suite_extra(fn, BT_FORKING, BT_TIMEOUT, dsc, ##__VA_ARGS__) + +#define bt_test_suite_extra(fn, f, t, dsc, ...) \ + bt_test_suite_base((int (*)(const void *))fn, #fn, NULL, f, t, dsc, ##__VA_ARGS__) + +#define bt_test_suite_arg(fn, arg, dsc, ...) \ + bt_test_suite_arg_extra(fn, arg, BT_FORKING, BT_TIMEOUT, dsc, ##__VA_ARGS__) + +#define bt_test_suite_arg_extra(fn, arg, f, t, dsc, ...) \ + bt_test_suite_base(fn, #fn, arg, f, t, dsc, ##__VA_ARGS__) + +#define bt_abort() \ + bt_abort_msg("Aborted at %s:%d", __FILE__, __LINE__) + +#define bt_abort_msg(format, ...) \ + do \ + { \ + bt_log(format, ##__VA_ARGS__); \ + abort(); \ + } while (0) + +#define bt_log(format, ...) \ + do \ + { \ + if (bt_test_id) \ + printf("%s: %s: " format "\n", bt_filename, bt_test_id, ##__VA_ARGS__); \ + else \ + printf("%s: " format "\n", bt_filename, ##__VA_ARGS__); \ + } while(0) + +#define bt_debug(format, ...) \ + do \ + { \ + if (bt_verbose >= BT_VERBOSE_ABSOLUTELY_ALL) \ + printf(format, ##__VA_ARGS__); \ + } while (0) + +#define bt_assert(test) \ + bt_assert_msg(test, "Assertion (%s) at %s:%d", #test, __FILE__, __LINE__) + +#define bt_assert_msg(test, format, ...) \ + do \ + { \ + int bt_suit_case_result = 1; \ + if ((test) == 0) \ + { \ + bt_result = 0; \ + bt_suite_result = 0; \ + bt_suit_case_result = 0; \ + } \ + bt_log_suite_case_result(bt_suit_case_result, format, ##__VA_ARGS__); \ + } while (0) + +#define bt_syscall(test, format, ...) \ + do \ + { \ + if (test) \ + { \ + bt_log(format ": %s", ##__VA_ARGS__, strerror(errno)); \ + exit(3); \ + } \ + } while (0) + +#define bt_sprintf_concat(s, format, ...) \ + snprintf(s + strlen(s), sizeof(s) - strlen(s), format, ##__VA_ARGS__) + +struct bt_pair { + const void *in; + const void *out; +}; + +/* Data structure used by bt_assert_batch() function */ +struct bt_batch { + /* in_fmt / out_fmt - formating data + * @buf: buffer for write stringified @data + * @size: empty size in @buf + * @data: data for stringify + * + * There are some build-in functions, see bt_fmt_* functions */ + void (*in_fmt)(char *buf, size_t size, const void *data); + void (*out_fmt)(char *buf, size_t size, const void *data); + + /* Temporary output buffer */ + void *out_buf; + + /* test_fn - testing function + * @out: output data from tested function + * @in: data for input + * @expected_out: expected data from tested function + * + * Input arguments should not be stringified using in_fmt() or out_fmt() + * function already. This function should return only 0 or 1 */ + int (*test_fn)(void *out, const void *in, const void *expected_out); + + /* Name of testing function @test_fn */ + const char *test_fn_name; + + /* Number of items in data */ + int ndata; + + /* Array of input and expected output pairs */ + struct bt_pair *data; +}; + +void bt_fmt_str(char *buf, size_t size, const void *data); +void bt_fmt_unsigned(char *buf, size_t size, const void *data); +void bt_fmt_ipa(char *buf, size_t size, const void *data); +int bt_assert_batch__(struct bt_batch *opts); +int bt_is_char(byte c); + +#define bt_assert_batch(data__, fn__, in_fmt__, out_fmt__) \ + bt_assert_batch__(& (struct bt_batch) { \ + .data = data__, \ + .ndata = ARRAY_SIZE(data__), \ + .test_fn = fn__, \ + .test_fn_name = #fn__, \ + .in_fmt = in_fmt__, \ + .out_fmt = out_fmt__, \ + .out_buf = bt_out_fmt_buf, /* Global memory for this usage */ \ + }) + +#endif /* _BIRDTEST_H_ */ diff --git a/test/bt-utils.c b/test/bt-utils.c new file mode 100644 index 00000000..571ef2fa --- /dev/null +++ b/test/bt-utils.c @@ -0,0 +1,224 @@ +/* + * BIRD Test -- Utils for testing parsing configuration file + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> + +#include "test/birdtest.h" +#include "test/bt-utils.h" + +#include "nest/bird.h" +#include "nest/route.h" +#include "nest/protocol.h" + +#include "sysdep/unix/unix.h" +#include "sysdep/unix/krt.h" + +#include "nest/iface.h" +#include "nest/locks.h" + +#include "filter/filter.h" + +#define BETWEEN(a, b, c) (((a) >= (b)) && ((a) <= (c))) + +static const byte *bt_config_parse_pos; +static uint bt_config_parse_remain_len; + +/* This is cf_read_hook for hard-coded text configuration */ +static int +cf_static_read(byte *dest, uint max_len, int fd UNUSED) +{ + if (max_len > bt_config_parse_remain_len) + max_len = bt_config_parse_remain_len; + memcpy(dest, bt_config_parse_pos, max_len); + bt_config_parse_pos += max_len; + bt_config_parse_remain_len -= max_len; + return max_len; +} + +/* This is cf_read_hook for reading configuration files, + * function is copied from main.c, cf_read() */ +static int +cf_file_read(byte *dest, uint max_len, int fd) +{ + int l = read(fd, dest, max_len); + if (l < 0) + cf_error("Read error"); + return l; +} + +void +bt_bird_init(void) +{ + if(bt_verbose) + log_init_debug(""); + log_switch(bt_verbose != 0, NULL, NULL); + + resource_init(); + olock_init(); + timer_init(); + io_init(); + rt_init(); + if_init(); + config_init(); + + protos_build(); + proto_build(&proto_unix_kernel); + proto_build(&proto_unix_iface); +} + +void bt_bird_cleanup(void) +{ + for (int i = 0; i < EAP_MAX; i++) + attr_class_to_protocol[i] = NULL; + + config = new_config = NULL; +} + +static char * +bt_load_file(const char *filename, int quiet) +{ + FILE *f = fopen(filename, "rb"); + if (!quiet) + bt_assert_msg(f != NULL, "Open %s", filename); + + if (f == NULL) + return NULL; + + fseek(f, 0, SEEK_END); + long file_size_ = ftell(f); + fseek(f, 0, SEEK_SET); + + if (file_size_ < 0) + return NULL; + + size_t file_size = file_size_; + size_t read_size = 0; + + char *file_body = mb_allocz(&root_pool, file_size+1); + + /* XXX: copied from cf-lex.c */ + errno=0; + while ((read_size += fread(file_body+read_size, 1, file_size-read_size, f)) != file_size && ferror(f)) + { + bt_debug("iteration \n"); + if(errno != EINTR) + { + bt_abort_msg("errno: %d", errno); + break; + } + errno=0; + clearerr(f); + } + fclose(f); + + if (!quiet) + bt_assert_msg(read_size == file_size, "Read %s", filename); + + return file_body; +} + +static void +bt_show_cfg_error(const struct config *cfg) +{ + int lino = 0; + int lino_delta = 5; + int lino_err = cfg->err_lino; + + const char *str = bt_load_file(cfg->err_file_name, 1); + + while (str && *str) + { + lino++; + if (BETWEEN(lino, lino_err - lino_delta, lino_err + lino_delta)) + bt_debug("%4u%s", lino, (lino_err == lino ? " >> " : " ")); + do + { + if (BETWEEN(lino, lino_err - lino_delta, lino_err + lino_delta)) + bt_debug("%c", *str); + } while (*str && *(str++) != '\n'); + } + bt_debug("\n"); +} + +static struct config * +bt_config_parse__(struct config *cfg) +{ + bt_assert_msg(config_parse(cfg) == 1, "Parse %s", cfg->file_name); + + if (cfg->err_msg) + { + bt_debug("Parse error %s, line %d: %s\n", cfg->err_file_name, cfg->err_lino, cfg->err_msg); + bt_show_cfg_error(cfg); + return NULL; + } + + config_commit(cfg, RECONFIG_HARD, 0); + new_config = cfg; + + return cfg; +} + +struct config * +bt_config_parse(const char *cfg_str) +{ + struct config *cfg = config_alloc("configuration"); + + bt_config_parse_pos = cfg_str; + bt_config_parse_remain_len = strlen(cfg_str); + cf_read_hook = cf_static_read; + + return bt_config_parse__(cfg); +} + +struct config * +bt_config_file_parse(const char *filepath) +{ + struct config *cfg = config_alloc(filepath); + + cfg->file_fd = open(filepath, O_RDONLY); + bt_assert_msg(cfg->file_fd > 0, "Open %s", filepath); + if (cfg->file_fd < 0) + return NULL; + + cf_read_hook = cf_file_read; + + return bt_config_parse__(cfg); +} + +/* + * Returns @base raised to the power of @power. + */ +uint +bt_naive_pow(uint base, uint power) +{ + uint result = 1; + uint i; + for (i = 0; i < power; i++) + result *= base; + return result; +} + +/** + * bytes_to_hex - transform data into hexadecimal representation + * @buf: preallocated string buffer + * @in_data: data for transformation + * @size: the length of @in_data + * + * This function transforms @in_data of length @size into hexadecimal + * representation and writes it into @buf. + */ +void +bt_bytes_to_hex(char *buf, const byte *in_data, size_t size) +{ + size_t i; + for(i = 0; i < size; i++) + sprintf(buf + i*2, "%02x", in_data[i]); +} + diff --git a/test/bt-utils.h b/test/bt-utils.h new file mode 100644 index 00000000..13d267cc --- /dev/null +++ b/test/bt-utils.h @@ -0,0 +1,35 @@ +/* + * BIRD Test -- Utils for testing parsing configuration file + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRDTEST_UTILS_H_ +#define _BIRDTEST_UTILS_H_ + +#include "sysdep/config.h" + +#define PRIip4 "%d.%d.%d.%d" +#define ARGip4(x) (_I(x) >> 24) & 0xff, (_I(x) >> 16) & 0xff, (_I(x) >> 8) & 0xff, _I(x) & 0xff + +#define PRIip6 "%04X:%04X:%04X:%04X:%04X:%04X:%04X:%04X" +#define ARGip6_HIGH(x,i) (((x).addr[(i)] >> 16) & 0xffff) +#define ARGip6_LOW(x,i) ((x).addr[(i)] & 0xffff) +#define ARGip6_BOTH(x,i) ARGip6_HIGH(x,i), ARGip6_LOW(x,i) +#define ARGip6(x) ARGip6_BOTH((x), 0), ARGip6_BOTH((x), 1), ARGip6_BOTH((x), 2), ARGip6_BOTH((x), 3) + +#define BT_CONFIG_PARSE_ROUTER_ID "router id 1.1.1.1; \n" +#define BT_CONFIG_PARSE_STATIC_PROTO "protocol static { ipv4; } \n" +#define BT_CONFIG_SIMPLE BT_CONFIG_PARSE_ROUTER_ID BT_CONFIG_PARSE_STATIC_PROTO + +uint bt_naive_pow(uint base, uint power); +void bt_bytes_to_hex(char *buf, const byte *in_data, size_t size); + +void bt_bird_init(void); +void bt_bird_cleanup(void); +struct config *bt_config_parse(const char *cfg); +struct config *bt_config_file_parse(const char *filepath); + +#endif /* _BIRDTEST_UTILS_H_ */ diff --git a/tools/Makefile-top.in b/tools/Makefile-top.in deleted file mode 100644 index fa02b5e6..00000000 --- a/tools/Makefile-top.in +++ /dev/null @@ -1,20 +0,0 @@ -# Makefile for in place build of BIRD -# (c) 1999--2000 Martin Mares <mj@ucw.cz> - -objdir=@objdir@ - -all depend tags install install-docs: - $(MAKE) -C $(objdir) $@ - -docs userdocs progdocs: - $(MAKE) -C doc $@ - -clean: - $(MAKE) -C $(objdir) clean - find . -name "*~" -or -name "*.[oa]" -or -name "\#*\#" -or -name TAGS -or -name core -or -name depend -or -name ".#*" | xargs rm -f - -distclean: clean - $(MAKE) -C doc distclean - rm -rf $(objdir) autom4te.cache - rm -f config.* configure sysdep/autoconf.h.in sysdep/paths.h Makefile - diff --git a/tools/Makefile.in b/tools/Makefile.in deleted file mode 100644 index 01bb7a7c..00000000 --- a/tools/Makefile.in +++ /dev/null @@ -1,97 +0,0 @@ -# Makefile for the BIRD Internet Routing Daemon -# (c) 1999--2000 Martin Mares <mj@ucw.cz> - -include Rules - -.PHONY: all daemon birdc birdcl subdir depend clean distclean tags docs userdocs progdocs - -all: sysdep/paths.h .dep-stamp subdir daemon birdcl @CLIENT@ - -daemon: $(exedir)/bird - -birdc: $(exedir)/birdc - -birdcl: $(exedir)/birdcl - -bird-dep := $(addsuffix /all.o, $(static-dirs)) conf/all.o lib/birdlib.a - -$(bird-dep): sysdep/paths.h .dep-stamp subdir - -birdc-dep := client/birdc.o client/all.o lib/birdlib.a - -$(birdc-dep): sysdep/paths.h .dep-stamp subdir - -birdcl-dep := client/birdcl.o client/all.o lib/birdlib.a - -$(birdcl-dep): sysdep/paths.h .dep-stamp subdir - - -export client := @CLIENT@ - -depend: sysdep/paths.h .dir-stamp - set -e ; for a in $(dynamic-dirs) ; do $(MAKE) -C $$a $@ ; done - set -e ; for a in $(static-dirs) $(client-dirs) ; do $(MAKE) -C $$a -f $(srcdir_abs)/$$a/Makefile $@ ; done - -subdir: sysdep/paths.h .dir-stamp .dep-stamp - set -e ; for a in $(dynamic-dirs) ; do $(MAKE) -C $$a $@ ; done - set -e ; for a in $(static-dirs) $(client-dirs) ; do $(MAKE) -C $$a -f $(srcdir_abs)/$$a/Makefile $@ ; done - -$(exedir)/bird: $(bird-dep) - @echo LD $(LDFLAGS) -o $@ $^ $(LIBS) - @$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) - -$(exedir)/birdc: $(birdc-dep) - @echo LD $(LDFLAGS) -o $@ $^ $(LIBS) $(CLIENT_LIBS) - @$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) $(CLIENT_LIBS) - -$(exedir)/birdcl: $(birdcl-dep) - @echo LD $(LDFLAGS) -o $@ $^ $(LIBS) - @$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) - -.dir-stamp: sysdep/paths.h - mkdir -p $(static-dirs) $(client-dirs) $(doc-dirs) - touch .dir-stamp - -.dep-stamp: - $(MAKE) depend - touch .dep-stamp - -docs: userdocs progdocs - -userdocs progdocs: .dir-stamp - $(MAKE) -C doc -f $(srcdir_abs)/doc/Makefile $@ - -sysdep/paths.h: - echo >sysdep/paths.h "/* Generated by Makefile, don't edit manually! */" - echo >>sysdep/paths.h "#define PATH_CONFIG_FILE \"@CONFIG_FILE@\"" - echo >>sysdep/paths.h "#define PATH_CONTROL_SOCKET \"@CONTROL_SOCKET@\"" - if test -n "@iproutedir@" ; then echo >>sysdep/paths.h "#define PATH_IPROUTE_DIR \"@iproutedir@\"" ; fi - -tags: - cd $(srcdir) ; etags -lc `find $(static-dirs) $(addprefix $(objdir)/,$(dynamic-dirs)) $(client-dirs) -name *.[chY]` - -install: all - $(INSTALL) -d $(DESTDIR)/$(sbindir) $(DESTDIR)/$(sysconfdir) $(DESTDIR)/@runtimedir@ - $(INSTALL_PROGRAM) $(exedir)/bird $(DESTDIR)/$(sbindir)/bird@SUFFIX@ - $(INSTALL_PROGRAM) $(exedir)/birdcl $(DESTDIR)/$(sbindir)/birdcl@SUFFIX@ - if test -n "@CLIENT@" ; then \ - $(INSTALL_PROGRAM) $(exedir)/birdc $(DESTDIR)/$(sbindir)/birdc@SUFFIX@ ; \ - fi - if ! test -f $(DESTDIR)/@CONFIG_FILE@ ; then \ - $(INSTALL_DATA) $(srcdir)/doc/bird.conf.example $(DESTDIR)/@CONFIG_FILE@ ; \ - else \ - echo "Not overwriting old bird@SUFFIX@.conf" ; \ - fi - -install-docs: - $(INSTALL) -d $(DESTDIR)/$(docdir) - $(INSTALL_DATA) $(srcdir)/doc/{bird,prog}{,-*}.html $(DESTDIR)/$(docdir)/ - -clean: - find . -name "*.[oa]" -o -name core -o -name depend -o -name "*.html" | xargs rm -f - rm -f conf/cf-lex.c conf/cf-parse.* conf/commands.h conf/keywords.h - rm -f $(exedir)/bird $(exedir)/birdcl $(exedir)/birdc $(exedir)/bird.ctl $(exedir)/bird6.ctl .dep-stamp - -distclean: clean - rm -f config.* configure sysdep/autoconf.h sysdep/paths.h Makefile Rules - rm -rf .dir-stamp $(clean-dirs) diff --git a/tools/Rules.in b/tools/Rules.in deleted file mode 100644 index f00c85d1..00000000 --- a/tools/Rules.in +++ /dev/null @@ -1,91 +0,0 @@ -# Makefile fragments for the BIRD Internet Routing Daemon -# (c) 1999--2000 Martin Mares <mj@ucw.cz> - -srcdir=@srcdir_rel_mf@ -srcdir_abs := $(shell cd $(srcdir) ; pwd) -objdir=@objdir@ -exedir=@exedir@ - -protocols=@protocols@ -static-dirs := nest filter $(addprefix proto/,$(protocols)) -static-dir-paths := $(addprefix $(srcdir)/,$(static-dirs)) -dynamic-dirs := lib conf -dynamic-dir-paths := $(dynamic-dirs) -client-dirs := client -client-dir-paths := $(client-dirs) -doc-dirs := doc -doc-dir-paths := $(doc-dirs) - -all-dirs:=$(static-dirs) $(dynamic-dirs) $(client-dirs) $(doc-dirs) -clean-dirs:=$(all-dirs) proto sysdep - -CPPFLAGS=-I$(root-rel) -I$(srcdir) @CPPFLAGS@ -CFLAGS=$(CPPFLAGS) @CFLAGS@ -LDFLAGS=@LDFLAGS@ -LIBS=@LIBS@ -CLIENT_LIBS=@CLIENT_LIBS@ -CC=@CC@ -M4=@M4@ -BISON=@BISON@ -FLEX=@FLEX@ -RANLIB=@RANLIB@ -INSTALL=@INSTALL@ -INSTALL_PROGRAM=@INSTALL_PROGRAM@ -INSTALL_DATA=@INSTALL_DATA@ - -prefix=@prefix@ -exec_prefix=@exec_prefix@ -bindir=@bindir@ -sbindir=@sbindir@ -sysconfdir=@sysconfdir@ -localstatedir=@localstatedir@ -docdir=@prefix@/doc - -ifdef source - -objs := $(subst .c,.o,$(source)) - -ifdef dir-name -src-path := $(srcdir)/$(dir-name)/ -endif - -all: - cd $(root-rel) && make - -ifdef lib-dest - -subdir: $(lib-dest) - -$(lib-dest): $(objs) - rm -f $@ - ar rcs $@ $^ - $(RANLIB) $@ - -else - -subdir: all.o - -all.o: $(objs) -# $(LD) -r -o $@ $^ -# Changed to $(CC) because $(LD) has problems with crosscompiling - @echo LD -r -o $@ $^ - @$(CC) -nostdlib -r -o $@ $^ - -endif - -%.o: $(src-path)%.c - @echo CC -o $@ -c $< - @$(CC) $(CFLAGS) -o $@ -c $< - -ifndef source-dep -source-dep := $(source) -endif - -depend: - $(CC) $(CPPFLAGS) -MM $(addprefix $(src-path),$(source-dep)) >depend - -ifneq ($(wildcard depend),) -include depend -endif - -endif diff --git a/tools/gendist b/tools/gendist index d9b34941..0275db1a 100755 --- a/tools/gendist +++ b/tools/gendist @@ -4,28 +4,30 @@ # (c) 2000--2004 Martin Mares <mj@ucw.cz> # +VERSION=`sed <sysdep/config.h '/BIRD_VERSION/!d;s/^.*"\(.*\)"$/\1/'` +REL=bird-$VERSION +DREL=bird-doc-$VERSION +T=/tmp/bird set -e AC=`if [ -x /usr/bin/autoconf2.50 ] ; then echo autoconf2.50 ; else echo autoconf ; fi` $AC ./configure +make docs +rm -rf $T/$REL $T/$DREL +mkdir -p $T/$REL $T/$DREL $T/$DREL/doc +mv obj/doc/*.pdf $T/$DREL/doc make distclean +find . -name "*~" -exec rm -f '{}' '+' + $AC rm -rf autom4te*cache -( cd doc ; make docs ; make clean ) -VERSION=`sed <sysdep/config.h '/BIRD_VERSION/!d;s/^.*"\(.*\)"$/\1/'` -REL=bird-$VERSION -DREL=bird-doc-$VERSION -T=/tmp/bird echo Building $REL -rm -rf $T/$REL $T/$DREL -mkdir -p $T/$REL $T/$DREL $T/$DREL/doc cp -a . $T/$REL echo Generating ChangeLog git log >$T/$REL/ChangeLog -mv $T/$REL/doc/*.ps $T/$DREL/doc rm -f $T/$REL/bird.conf* rm -rf $T/$REL/.git/ -rm -rf `find $T/$REL -name CVS -o -name tmp` $T/$REL/{misc,rfc,doc/slides} +rm -rf `find $T/$REL -name CVS -o -name tmp` $T/$REL/{misc,rfc,doc/slides,doc/slt2001,doc/old,doc/*.out} ( cd $T ; tar czvvf $REL.tar.gz $REL ) ( cd $T ; tar czvvf $DREL.tar.gz $DREL ) rm -rf $T/$REL $T/$DREL diff --git a/tools/mergedirs b/tools/mergedirs deleted file mode 100755 index fb48c6c7..00000000 --- a/tools/mergedirs +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/sh - -if [ -z "$4" ] ; then - echo "Usage: mergedirs <src-dir> <src-dir-rel> <obj-dir> <list-of-dirs>" - exit 1 - fi -cpp=${CPP:-cc -E} -SRCDIR=$1 -shift -SRCREL=$1 -case $SRCDIR in - /*) ;; - *) SRCREL="../$SRCREL" ;; - esac -shift -OBJDIR=$1 -LIBDIR=$OBJDIR/lib -CONFDIR=$OBJDIR/conf -shift - -echo "Merging system-dependent modules ($@)" -MODULES=`for a in $@ ; do - cat $SRCDIR/sysdep/config.h $SRCDIR/$a/Modules | - $cpp -U unix -D MACROS_ONLY -I $OBJDIR - | - sed "/^[ ]*\$/d;/^#/d;s@\\(.*\\)@\\1 $a/\\1@" - done | - sort -k1,1 -u | - cut -d ' ' -f 2` -rm -rf $LIBDIR $CONFDIR -mkdir -p $LIBDIR $CONFDIR -for a in $MODULES ; do - b=`basename $a` - case $b in - *.h) ln -s $SRCREL/$a $LIBDIR/$b - ;; - *.c) OBJ=`echo $b | sed 's/\.c$/\.o/'` - OBJS="$OBJS $OBJ" - SRCS="$SRCS \\ - $b" - ln -s $SRCREL/$a $LIBDIR/$b - ;; - *.Y) CONFS="$CONFS\$(srcdir)/$a " - ln -s $SRCREL/$a $CONFDIR/$b - ;; - *) echo "$b: Unknown file type" - exit 1 - ;; - esac - done - -cat >$LIBDIR/Makefile <<EOF -source=$SRCS -lib-dest=birdlib.a -root-rel=../ - -include ../Rules -EOF - -sed <$SRCDIR/conf/Makefile >$CONFDIR/Makefile "s|@CONFS@|$CONFS|" -CONFS=`cd $SRCDIR ; ls conf/*.[chl]` -for a in $CONFS ; do - ln -s $SRCREL/$a $CONFDIR/ -done diff --git a/tools/progdoc b/tools/progdoc index ef44d3aa..fc4024bf 100755 --- a/tools/progdoc +++ b/tools/progdoc @@ -1,17 +1,18 @@ #!/usr/bin/perl $srcdir = $ARGV[0]; +$out = $ARGV[1]; -open(OUT, ">prog.sgml") || die "Cannot create output file"; -include("doc/prog-head.sgml"); -process(""); -include("doc/prog-foot.sgml"); +open(OUT, ">", $out) || die "Cannot create output file"; +process($srcdir); close OUT; +gen_deps(); exit 0; sub include { my $f = shift @_; - open(IN, "$srcdir/$f") || die "Unable to find $f"; + open(IN, "$f") || die "Unable to find $f"; + push(@deps, "$f"); while (<IN>) { print OUT; } @@ -21,7 +22,8 @@ sub include { sub process { my $dir = shift @_; print "$dir/Doc\n"; - open(IN, "$srcdir/$dir/Doc") || die "Unable to read $dir/Doc"; + open(IN, "$dir/Doc") || die "Unable to read $dir/Doc"; + push(@deps, "$dir/Doc"); my @docfile = <IN>; close IN; foreach $_ (@docfile) { @@ -36,7 +38,10 @@ sub process { print OUT "<chapt>$arg\n"; } elsif ($cmd eq "S") { print " $arg\n"; - open(DOC, "cd $srcdir/$dir ; $srcdir/doc/kernel-doc -bird $arg |") || die "Unable to start kernel-doc"; + my @files = map("$dir/$_", split(' ', $arg)); + my $fargs = join(' ', @files); + open(DOC, "$srcdir/doc/kernel-doc -bird $fargs |") || die "Unable to start kernel-doc"; + push(@deps, @files); while (<DOC>) { print OUT; } close DOC; } elsif ($cmd eq "D") { @@ -45,3 +50,17 @@ sub process { } else { die "Unknown command: $cmd"; } } } + +sub gen_deps { + open(DEP, ">", "$out.d"); + print DEP "$out:"; + foreach $f (@deps) { + print DEP " \\\n $f"; + } + print DEP "\n\n"; + + foreach $f (@deps) { + print DEP "$f:\n\n"; + } + close DEP; +} |