summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2013-11-23 11:50:34 +0100
committerOndrej Zajicek <santiago@crfreenet.org>2013-11-23 11:50:34 +0100
commit736e143fa50607fcd88132291e96089b899af979 (patch)
treec0fcd5fb3174bae8a39b3a32dfe582b2ccb6df17
parent094d2bdb79e1ffa0a02761fd651aa0f0b6b0c585 (diff)
parent2b3d52aa421ae1c31e30107beefd82fddbb42854 (diff)
Merge branch 'master' into add-path
Conflicts: filter/filter.c nest/proto.c nest/rt-table.c proto/bgp/bgp.h proto/bgp/config.Y
-rw-r--r--NEWS43
-rw-r--r--README2
-rw-r--r--aclocal.m442
-rw-r--r--bird.conf10
-rw-r--r--client/Makefile8
-rw-r--r--client/birdc.c223
-rw-r--r--client/birdcl.c160
-rw-r--r--client/client.c333
-rw-r--r--client/client.h20
-rw-r--r--conf/cf-lex.l21
-rw-r--r--conf/conf.c269
-rw-r--r--conf/conf.h40
-rw-r--r--conf/confbase.Y39
-rw-r--r--conf/gen_commands.m43
-rw-r--r--conf/gen_parser.m41
-rw-r--r--configure.in55
-rw-r--r--doc/bird.conf.example4
-rw-r--r--doc/bird.sgml685
-rw-r--r--doc/reply_codes8
-rw-r--r--filter/config.Y73
-rw-r--r--filter/filter.c510
-rw-r--r--filter/filter.h22
-rw-r--r--filter/test.conf56
-rw-r--r--filter/tree.c39
-rw-r--r--filter/trie.c38
-rw-r--r--lib/birdlib.h64
-rw-r--r--lib/buffer.h35
-rw-r--r--lib/hash.h123
-rw-r--r--lib/heap.h156
-rw-r--r--lib/ipv6.h7
-rw-r--r--lib/lists.c40
-rw-r--r--lib/lists.h1
-rw-r--r--lib/printf.c39
-rw-r--r--lib/resource.c32
-rw-r--r--lib/resource.h6
-rw-r--r--lib/socket.h7
-rw-r--r--lib/string.h4
-rw-r--r--misc/bird.spec2
-rw-r--r--nest/a-path.c90
-rw-r--r--nest/attrs.h10
-rw-r--r--nest/bfd.h51
-rw-r--r--nest/cli.c20
-rw-r--r--nest/cli.h2
-rw-r--r--nest/cmds.c24
-rw-r--r--nest/cmds.h3
-rw-r--r--nest/config.Y47
-rw-r--r--nest/iface.c70
-rw-r--r--nest/iface.h4
-rw-r--r--nest/neighbor.c20
-rw-r--r--nest/proto.c101
-rw-r--r--nest/protocol.h28
-rw-r--r--nest/route.h24
-rw-r--r--nest/rt-attr.c5
-rw-r--r--nest/rt-dev.c3
-rw-r--r--nest/rt-table.c326
-rw-r--r--proto/Doc1
-rw-r--r--proto/bfd/Doc1
-rw-r--r--proto/bfd/Makefile5
-rw-r--r--proto/bfd/bfd.c1114
-rw-r--r--proto/bfd/bfd.h191
-rw-r--r--proto/bfd/config.Y138
-rw-r--r--proto/bfd/io.c768
-rw-r--r--proto/bfd/io.h99
-rw-r--r--proto/bfd/packets.c248
-rw-r--r--proto/bgp/attrs.c15
-rw-r--r--proto/bgp/bgp.c70
-rw-r--r--proto/bgp/bgp.h9
-rw-r--r--proto/bgp/config.Y8
-rw-r--r--proto/bgp/packets.c24
-rw-r--r--proto/ospf/config.Y49
-rw-r--r--proto/ospf/hello.c29
-rw-r--r--proto/ospf/iface.c68
-rw-r--r--proto/ospf/lsupd.c22
-rw-r--r--proto/ospf/neighbor.c32
-rw-r--r--proto/ospf/neighbor.h1
-rw-r--r--proto/ospf/ospf.c63
-rw-r--r--proto/ospf/ospf.h16
-rw-r--r--proto/ospf/packet.c6
-rw-r--r--proto/ospf/rt.c6
-rw-r--r--proto/ospf/topology.c114
-rw-r--r--proto/pipe/pipe.c5
-rw-r--r--proto/radv/config.Y38
-rw-r--r--proto/radv/packets.c34
-rw-r--r--proto/radv/radv.c93
-rw-r--r--proto/radv/radv.h8
-rw-r--r--proto/rip/config.Y17
-rw-r--r--proto/rip/rip.c88
-rw-r--r--proto/rip/rip.h5
-rw-r--r--proto/static/config.Y11
-rw-r--r--proto/static/static.c2
-rw-r--r--sysdep/autoconf.h.in4
-rw-r--r--sysdep/bsd/Modules1
-rw-r--r--sysdep/bsd/krt-sock.Y32
-rw-r--r--sysdep/bsd/krt-sock.c510
-rw-r--r--sysdep/bsd/krt-sys.h15
-rw-r--r--sysdep/bsd/sysio.h94
-rw-r--r--sysdep/cf/bsd-v6.h2
-rw-r--r--sysdep/cf/bsd.h2
-rw-r--r--sysdep/config.h3
-rw-r--r--sysdep/linux/krt-sys.h4
-rw-r--r--sysdep/linux/netlink.Y6
-rw-r--r--sysdep/linux/netlink.c26
-rw-r--r--sysdep/linux/sysio.h57
-rw-r--r--sysdep/unix/config.Y33
-rw-r--r--sysdep/unix/io.c94
-rw-r--r--sysdep/unix/krt.c174
-rw-r--r--sysdep/unix/krt.h16
-rw-r--r--sysdep/unix/log.c103
-rw-r--r--sysdep/unix/main.c198
-rw-r--r--sysdep/unix/timer.h1
-rw-r--r--sysdep/unix/unix.h7
-rw-r--r--tools/Makefile.in27
-rw-r--r--tools/Rules.in8
113 files changed, 7293 insertions, 1570 deletions
diff --git a/NEWS b/NEWS
index 3aaa4dd6..1341fcd4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,44 @@
+Version 1.3.12 (2013-11-23)
+ o BFD protocol (RFC 5880).
+ o BFD support for OSPF and BGP.
+ o New 'allow local as' option for BGP.
+ o Filters allows setting gw, ifname and ifindex.
+ o Filter operator 'delete/filter' extended to bgp_paths.
+ o Filter operator 'len' extended to [e]clists.
+ o PID file support.
+ o Several bugfixes and minor improvements.
+
+Version 1.3.11 (2013-07-27)
+ o OSPF stub router option (RFC 3137).
+ o TTL security for OSPF and RIP.
+ o Protocol packet priority and traffic class handling.
+ o Multiple routing tables support for FreeBSD and OpenBSD.
+ o Extends constants to all filter data types.
+ o Implements eval command.
+ o 'bgppath ~ int set' filter operation.
+ o Several bugfixes.
+
+Version 1.3.10 (2013-04-30)
+ o Lightweight BIRD client for embedded environments.
+ o Dynamic IPv6 router advertisements.
+ o New 'next hop keep' option for BGP.
+ o Smart default routing table for 'show route export/preexport/protocol'.
+ o Automatic router ID selection could be configured to use address of loopback.
+ o Allows configured global addresses of NBMA neighbors in OSPFv3.
+ o Allows BIRD commands from UNIX shell even in restricted mode.
+ o Route limits inherited from templates can be disabled.
+ o Symbol names enclosed by apostrophes can contain dots.
+ o Several bugfixes.
+
+Version 1.3.9 (2013-01-11)
+ o BIRD can be configured to keep and show filtered routes.
+ o Separate receive and import limits.
+ o Several new reconfiguration cmd options (undo, timeout, check).
+ o Configurable automatic router ID selection.
+ o Dragonfly BSD support.
+ o Fixed OSPFv3 vlinks.
+ o Several minor bugfixes.
+
Version 1.3.8 (2012-08-07)
o Generalized import and export route limits.
o RDNSS and DNSSL support for RAdv.
@@ -11,7 +52,7 @@ Version 1.3.8 (2012-08-07)
Version 1.3.7 (2012-03-22)
o Route Origin Authorization basics.
o RIPng working again.
- o Extended clist operations in filters.
+ o Extended clist operations in filters.
o Fixes several bugs in BSD iface handling.
o Several minor bugfixes and enhancements.
diff --git a/README b/README
index 5c2ef076..daeb18bd 100644
--- a/README
+++ b/README
@@ -3,7 +3,7 @@
(c) 1998--2008 Martin Mares <mj@ucw.cz>
(c) 1998--2000 Pavel Machek <pavel@ucw.cz>
(c) 1998--2008 Ondrej Filip <feela@network.cz>
- (c) 2009--2011 CZ.NIC z.s.p.o.
+ (c) 2009--2013 CZ.NIC z.s.p.o.
================================================================================
diff --git a/aclocal.m4 b/aclocal.m4
index 75b3f92a..02c0f76b 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -133,19 +133,35 @@ if test "$bird_cv_struct_ip_mreqn" = yes ; then
fi
])
-AC_DEFUN(BIRD_CHECK_GCC_OPTIONS,
-[AC_CACHE_VAL(bird_cv_c_option_no_pointer_sign, [
-cat >conftest.c <<EOF
-int main(void)
-{ return 0; }
-EOF
-if $CC -Wall -Wno-pointer-sign conftest.c >&AS_MESSAGE_LOG_FD 2>&1 ; then
- bird_cv_c_option_no_pointer_sign=yes
-else
- bird_cv_c_option_no_pointer_sign=no
-fi
-rm -rf conftest* a.out
-])])
+AC_DEFUN(BIRD_CHECK_PTHREADS,
+[
+ bird_tmp_cflags="$CFLAGS"
+
+ CFLAGS="$CFLAGS -pthread"
+ AC_CACHE_CHECK([whether POSIX threads are available], bird_cv_lib_pthreads,
+ [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]], [[pthread_t pt; pthread_create(&pt, NULL, NULL, NULL); pthread_spinlock_t lock; pthread_spin_lock(&lock); ]])],
+ [bird_cv_lib_pthreads=yes], [bird_cv_lib_pthreads=no])])
+
+ CFLAGS="$bird_tmp_cflags"
+])
+
+AC_DEFUN(BIRD_CHECK_GCC_OPTION,
+[
+ bird_tmp_cflags="$CFLAGS"
+
+ CFLAGS="$3 $2"
+ AC_CACHE_CHECK([whether CC supports $2], $1,
+ [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([])], [$1=yes], [$1=no])])
+
+ CFLAGS="$bird_tmp_cflags"
+])
+
+AC_DEFUN(BIRD_ADD_GCC_OPTION,
+[
+ if test "$$1" = yes ; then
+ CFLAGS="$CFLAGS $2"
+ fi
+])
# BIRD_CHECK_PROG_FLAVOR_GNU(PROGRAM-PATH, IF-SUCCESS, [IF-FAILURE])
# copied autoconf internal _AC_PATH_PROG_FLAVOR_GNU
diff --git a/bird.conf b/bird.conf
index 2d10ef4b..bafd6ea1 100644
--- a/bird.conf
+++ b/bird.conf
@@ -25,14 +25,14 @@ protocol kernel {
protocol static {
# disabled;
- route fec0:2::/64 reject;
- route fec0:3::/64 reject;
- route fec0:4::/64 reject;
+ route fec0:2::/64 blackhole;
+ route fec0:3::/64 unreachable;
+ route fec0:4::/64 prohibit;
# route 0.0.0.0/0 via 195.113.31.113;
-# route 62.168.0.0/25 reject;
+# route 62.168.0.0/25 unreachable;
# route 1.2.3.4/32 via 195.113.31.124;
-# route 10.0.0.0/8 reject;
+# route 10.0.0.0/8 unreachable;
# route 10.1.1.0:255.255.255.0 via 62.168.0.3;
# route 10.1.2.0:255.255.255.0 via 62.168.0.3;
# route 10.1.3.0:255.255.255.0 via 62.168.0.4;
diff --git a/client/Makefile b/client/Makefile
index 867476cc..a1578766 100644
--- a/client/Makefile
+++ b/client/Makefile
@@ -1,5 +1,11 @@
-source=client.c commands.c util.c
+source=commands.c util.c client.c
root-rel=../
dir-name=client
+clients := $(client) birdcl
+
+source-dep := $(source) $(addsuffix .c,$(clients))
+
+subdir: $(addsuffix .o,$(clients))
+
include ../Rules
diff --git a/client/birdc.c b/client/birdc.c
new file mode 100644
index 00000000..bbe18331
--- /dev/null
+++ b/client/birdc.c
@@ -0,0 +1,223 @@
+/*
+ * BIRD Client - Readline variant I/O
+ *
+ * (c) 1999--2004 Martin Mares <mj@ucw.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <termios.h>
+
+#include <readline/readline.h>
+#include <readline/history.h>
+#include <curses.h>
+
+#include "nest/bird.h"
+#include "lib/resource.h"
+#include "lib/string.h"
+#include "client/client.h"
+#include "sysdep/unix/unix.h"
+
+static int input_hidden_end;
+static int prompt_active;
+
+/*** Input ***/
+
+/* HACK: libreadline internals we need to access */
+extern int _rl_vis_botlin;
+extern void _rl_move_vert(int);
+extern Function *rl_last_func;
+
+static void
+add_history_dedup(char *cmd)
+{
+ /* Add history line if it differs from the last one */
+ HIST_ENTRY *he = history_get(history_length);
+ if (!he || strcmp(he->line, cmd))
+ add_history(cmd);
+}
+
+static void
+input_got_line(char *cmd_buffer)
+{
+ if (!cmd_buffer)
+ {
+ cleanup();
+ exit(0);
+ }
+
+ if (cmd_buffer[0])
+ {
+ add_history_dedup(cmd_buffer);
+ submit_command(cmd_buffer);
+ }
+
+ free(cmd_buffer);
+}
+
+void
+input_start_list(void)
+{
+ /* Leave the currently edited line and make space for listing */
+ _rl_move_vert(_rl_vis_botlin);
+#ifdef HAVE_RL_CRLF
+ rl_crlf();
+#endif
+}
+
+void
+input_stop_list(void)
+{
+ /* Reprint the currently edited line after listing */
+ rl_on_new_line();
+ rl_redisplay();
+}
+
+static int
+input_complete(int arg UNUSED, int key UNUSED)
+{
+ static int complete_flag;
+ char buf[256];
+
+ if (rl_last_func != input_complete)
+ complete_flag = 0;
+ switch (cmd_complete(rl_line_buffer, rl_point, buf, complete_flag))
+ {
+ case 0:
+ complete_flag = 1;
+ break;
+ case 1:
+ rl_insert_text(buf);
+ break;
+ default:
+ complete_flag = 1;
+#ifdef HAVE_RL_DING
+ rl_ding();
+#endif
+ }
+ return 0;
+}
+
+static int
+input_help(int arg, int key UNUSED)
+{
+ int i, in_string, in_bracket;
+
+ if (arg != 1)
+ return rl_insert(arg, '?');
+
+ in_string = in_bracket = 0;
+ for (i = 0; i < rl_point; i++)
+ {
+
+ if (rl_line_buffer[i] == '"')
+ in_string = ! in_string;
+ else if (! in_string)
+ {
+ if (rl_line_buffer[i] == '[')
+ in_bracket++;
+ else if (rl_line_buffer[i] == ']')
+ in_bracket--;
+ }
+ }
+
+ /* `?' inside string or path -> insert */
+ if (in_string || in_bracket)
+ return rl_insert(1, '?');
+
+ rl_begin_undo_group(); /* HACK: We want to display `?' at point position */
+ rl_insert_text("?");
+ rl_redisplay();
+ rl_end_undo_group();
+ input_start_list();
+ cmd_help(rl_line_buffer, rl_point);
+ rl_undo_command(1, 0);
+ input_stop_list();
+ return 0;
+}
+
+void
+input_init(void)
+{
+ rl_readline_name = "birdc";
+ rl_add_defun("bird-complete", input_complete, '\t');
+ rl_add_defun("bird-help", input_help, '?');
+ rl_callback_handler_install("bird> ", input_got_line);
+
+ // rl_get_screen_size();
+ term_lns = LINES;
+ term_cls = COLS;
+
+ prompt_active = 1;
+
+ // readline library does strange things when stdin is nonblocking.
+ // if (fcntl(0, F_SETFL, O_NONBLOCK) < 0)
+ // die("fcntl: %m");
+}
+
+static void
+input_reveal(void)
+{
+ /* need this, otherwise some lib seems to eat pending output when
+ the prompt is displayed */
+ fflush(stdout);
+ tcdrain(STDOUT_FILENO);
+
+ rl_end = input_hidden_end;
+ rl_expand_prompt("bird> ");
+ rl_forced_update_display();
+
+ prompt_active = 1;
+}
+
+static void
+input_hide(void)
+{
+ input_hidden_end = rl_end;
+ rl_end = 0;
+ rl_expand_prompt("");
+ rl_redisplay();
+
+ prompt_active = 0;
+}
+
+void
+input_notify(int prompt)
+{
+ if (prompt == prompt_active)
+ return;
+
+ if (prompt)
+ input_reveal();
+ else
+ input_hide();
+}
+
+void
+input_read(void)
+{
+ rl_callback_read_char();
+}
+
+void
+more_begin(void)
+{
+}
+
+void
+more_end(void)
+{
+}
+
+void
+cleanup(void)
+{
+ if (init)
+ return;
+
+ input_hide();
+ rl_callback_handler_remove();
+}
diff --git a/client/birdcl.c b/client/birdcl.c
new file mode 100644
index 00000000..2d5e1067
--- /dev/null
+++ b/client/birdcl.c
@@ -0,0 +1,160 @@
+/*
+ * BIRD Client - Light variant I/O
+ *
+ * (c) 1999--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2013 Tomas Hlavacek <tomas.hlavacek@nic.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <termios.h>
+
+#include <sys/ioctl.h>
+#include <signal.h>
+
+#include "nest/bird.h"
+#include "lib/resource.h"
+#include "lib/string.h"
+#include "client/client.h"
+#include "sysdep/unix/unix.h"
+
+#define INPUT_BUF_LEN 2048
+
+struct termios tty_save;
+
+void
+input_start_list(void)
+{
+ /* Empty in non-ncurses version. */
+}
+
+void
+input_stop_list(void)
+{
+ /* Empty in non-ncurses version. */
+}
+
+void
+input_notify(int prompt)
+{
+ /* No ncurses -> no status to reveal/hide, print prompt manually. */
+ if (!prompt)
+ return;
+
+ printf("bird> ");
+ fflush(stdout);
+}
+
+
+static int
+lastnb(char *str, int i)
+{
+ while (i--)
+ if ((str[i] != ' ') && (str[i] != '\t'))
+ return str[i];
+
+ return 0;
+}
+
+void
+input_read(void)
+{
+ char buf[INPUT_BUF_LEN];
+
+ if ((fgets(buf, INPUT_BUF_LEN, stdin) == NULL) || (buf[0] == 0))
+ {
+ putchar('\n');
+ cleanup();
+ exit(0);
+ }
+
+ int l = strlen(buf);
+ if ((l+1) == INPUT_BUF_LEN)
+ {
+ printf("Input too long.\n");
+ return;
+ }
+
+ if (buf[l-1] == '\n')
+ buf[--l] = '\0';
+
+ if (!interactive)
+ printf("%s\n", buf);
+
+ if (l == 0)
+ return;
+
+ if (lastnb(buf, l) == '?')
+ {
+ cmd_help(buf, strlen(buf));
+ return;
+ }
+
+ submit_command(buf);
+}
+
+static struct termios stored_tty;
+static int more_active = 0;
+
+void
+more_begin(void)
+{
+ static struct termios tty;
+
+ tty = stored_tty;
+ tty.c_lflag &= (~ECHO);
+ tty.c_lflag &= (~ICANON);
+
+ if (tcsetattr (0, TCSANOW, &tty) < 0)
+ die("tcsetattr: %m");
+
+ more_active = 1;
+}
+
+void
+more_end(void)
+{
+ more_active = 0;
+
+ if (tcsetattr (0, TCSANOW, &stored_tty) < 0)
+ die("tcsetattr: %m");
+}
+
+static void
+sig_handler(int signal)
+{
+ cleanup();
+ exit(0);
+}
+
+void
+input_init(void)
+{
+ if (!interactive)
+ return;
+
+ if (tcgetattr(0, &stored_tty) < 0)
+ die("tcgetattr: %m");
+
+ if (signal(SIGINT, sig_handler) == SIG_IGN)
+ signal(SIGINT, SIG_IGN);
+ if (signal(SIGTERM, sig_handler) == SIG_IGN)
+ signal(SIGTERM, SIG_IGN);
+
+ struct winsize tws;
+ if (ioctl(0, TIOCGWINSZ, &tws) == 0)
+ {
+ term_lns = tws.ws_row;
+ term_cls = tws.ws_col;
+ }
+}
+
+void
+cleanup(void)
+{
+ if (more_active)
+ more_end();
+}
diff --git a/client/client.c b/client/client.c
index 8711cf0a..a9d0096d 100644
--- a/client/client.c
+++ b/client/client.c
@@ -2,22 +2,32 @@
* BIRD Client
*
* (c) 1999--2004 Martin Mares <mj@ucw.cz>
+ * (c) 2013 Tomas Hlavacek <tmshlvck@gmail.com>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
+/**
+ * DOC: BIRD client
+ *
+ * There are two variants of BIRD client: regular and light. regular
+ * variant depends on readline and ncurses libraries, while light
+ * variant uses just libc. Most of the code and the main() is common
+ * for both variants (in client.c file) and just a few functions are
+ * different (in birdc.c for regular and birdcl.c for light). Two
+ * binaries are generated by linking common object files like client.o
+ * (which is compiled from client.c just once) with either birdc.o or
+ * birdcl.o for each variant.
+ */
+
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
-#include <termios.h>
#include <errno.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/types.h>
-#include <readline/readline.h>
-#include <readline/history.h>
-#include <curses.h>
#include "nest/bird.h"
#include "lib/resource.h"
@@ -25,33 +35,31 @@
#include "client/client.h"
#include "sysdep/unix/unix.h"
+#define SERVER_READ_BUF_LEN 4096
+
static char *opt_list = "s:vr";
-static int verbose;
+static int verbose, restricted, once;
static char *init_cmd;
-static int once;
static char *server_path = PATH_CONTROL_SOCKET;
static int server_fd;
-static byte server_read_buf[4096];
+static byte server_read_buf[SERVER_READ_BUF_LEN];
static byte *server_read_pos = server_read_buf;
-#define STATE_PROMPT 0
-#define STATE_CMD_SERVER 1
-#define STATE_CMD_USER 2
+int init = 1; /* During intial sequence */
+int busy = 1; /* Executing BIRD command */
+int interactive; /* Whether stdin is terminal */
-static int input_initialized;
-static int input_hidden_end;
-static int cstate = STATE_CMD_SERVER;
-static int nstate = STATE_CMD_SERVER;
+static int num_lines, skip_input;
+int term_lns, term_cls;
-static int num_lines, skip_input, interactive;
/*** Parsing of arguments ***/
static void
-usage(void)
+usage(char *name)
{
- fprintf(stderr, "Usage: birdc [-s <control-socket>] [-v] [-r]\n");
+ fprintf(stderr, "Usage: %s [-s <control-socket>] [-v] [-r]\n", name);
exit(1);
}
@@ -70,10 +78,10 @@ parse_args(int argc, char **argv)
verbose++;
break;
case 'r':
- init_cmd = "restrict";
+ restricted = 1;
break;
default:
- usage();
+ usage(argv[0]);
}
/* If some arguments are not options, we take it as commands */
@@ -83,9 +91,6 @@ parse_args(int argc, char **argv)
int i;
int len = 0;
- if (init_cmd)
- usage();
-
for (i = optind; i < argc; i++)
len += strlen(argv[i]) + 1;
@@ -99,17 +104,14 @@ parse_args(int argc, char **argv)
tmp[-1] = 0;
once = 1;
+ interactive = 0;
}
}
-/*** Input ***/
-static void server_send(char *);
+/*** Input ***/
-/* HACK: libreadline internals we need to access */
-extern int _rl_vis_botlin;
-extern void _rl_move_vert(int);
-extern Function *rl_last_func;
+static void server_send(char *cmd);
static int
handle_internal_command(char *cmd)
@@ -127,181 +129,37 @@ handle_internal_command(char *cmd)
return 0;
}
-void
+static void
submit_server_command(char *cmd)
{
- server_send(cmd);
- nstate = STATE_CMD_SERVER;
+ busy = 1;
num_lines = 2;
-}
-
-static void
-add_history_dedup(char *cmd)
-{
- /* Add history line if it differs from the last one */
- HIST_ENTRY *he = history_get(history_length);
- if (!he || strcmp(he->line, cmd))
- add_history(cmd);
-}
-
-static void
-got_line(char *cmd_buffer)
-{
- char *cmd;
-
- if (!cmd_buffer)
- {
- cleanup();
- exit(0);
- }
- if (cmd_buffer[0])
- {
- cmd = cmd_expand(cmd_buffer);
- if (cmd)
- {
- add_history_dedup(cmd);
-
- if (!handle_internal_command(cmd))
- submit_server_command(cmd);
-
- free(cmd);
- }
- else
- add_history_dedup(cmd_buffer);
- }
- free(cmd_buffer);
-}
-
-void
-input_start_list(void) /* Leave the currently edited line and make space for listing */
-{
- _rl_move_vert(_rl_vis_botlin);
-#ifdef HAVE_RL_CRLF
- rl_crlf();
-#endif
+ server_send(cmd);
}
void
-input_stop_list(void) /* Reprint the currently edited line after listing */
-{
- rl_on_new_line();
- rl_redisplay();
-}
-
-static int
-input_complete(int arg UNUSED, int key UNUSED)
+submit_command(char *cmd_raw)
{
- static int complete_flag;
- char buf[256];
+ char *cmd = cmd_expand(cmd_raw);
- if (rl_last_func != input_complete)
- complete_flag = 0;
- switch (cmd_complete(rl_line_buffer, rl_point, buf, complete_flag))
- {
- case 0:
- complete_flag = 1;
- break;
- case 1:
- rl_insert_text(buf);
- break;
- default:
- complete_flag = 1;
-#ifdef HAVE_RL_DING
- rl_ding();
-#endif
- }
- return 0;
-}
-
-static int
-input_help(int arg, int key UNUSED)
-{
- int i, in_string, in_bracket;
-
- if (arg != 1)
- return rl_insert(arg, '?');
-
- in_string = in_bracket = 0;
- for (i = 0; i < rl_point; i++)
- {
-
- if (rl_line_buffer[i] == '"')
- in_string = ! in_string;
- else if (! in_string)
- {
- if (rl_line_buffer[i] == '[')
- in_bracket++;
- else if (rl_line_buffer[i] == ']')
- in_bracket--;
- }
- }
+ if (!cmd)
+ return;
- /* `?' inside string or path -> insert */
- if (in_string || in_bracket)
- return rl_insert(1, '?');
-
- rl_begin_undo_group(); /* HACK: We want to display `?' at point position */
- rl_insert_text("?");
- rl_redisplay();
- rl_end_undo_group();
- input_start_list();
- cmd_help(rl_line_buffer, rl_point);
- rl_undo_command(1, 0);
- input_stop_list();
- return 0;
-}
+ if (!handle_internal_command(cmd))
+ submit_server_command(cmd);
-static void
-input_init(void)
-{
- rl_readline_name = "birdc";
- rl_add_defun("bird-complete", input_complete, '\t');
- rl_add_defun("bird-help", input_help, '?');
- rl_callback_handler_install("bird> ", got_line);
- input_initialized = 1;
-// readline library does strange things when stdin is nonblocking.
-// if (fcntl(0, F_SETFL, O_NONBLOCK) < 0)
-// die("fcntl: %m");
-}
-
-static void
-input_hide(void)
-{
- input_hidden_end = rl_end;
- rl_end = 0;
- rl_expand_prompt("");
- rl_redisplay();
+ free(cmd);
}
static void
-input_reveal(void)
-{
- /* need this, otherwise some lib seems to eat pending output when
- the prompt is displayed */
- fflush(stdout);
- tcdrain(fileno(stdout));
-
- rl_end = input_hidden_end;
- rl_expand_prompt("bird> ");
- rl_forced_update_display();
-}
-
-void
-cleanup(void)
+init_commands(void)
{
- if (input_initialized)
+ if (restricted)
{
- input_initialized = 0;
- input_hide();
- rl_callback_handler_remove();
+ submit_server_command("restrict");
+ restricted = 0;
+ return;
}
-}
-
-void
-update_state(void)
-{
- if (nstate == cstate)
- return;
if (init_cmd)
{
@@ -312,40 +170,39 @@ update_state(void)
return;
}
- if (!init_cmd && once)
+ if (once)
{
/* Initial command is finished and we want to exit */
cleanup();
exit(0);
}
- if (nstate == STATE_PROMPT)
- {
- if (input_initialized)
- input_reveal();
- else
- input_init();
- }
+ input_init();
- if (nstate != STATE_PROMPT)
- input_hide();
+ term_lns = (term_lns > 0) ? term_lns : 25;
+ term_cls = (term_cls > 0) ? term_cls : 80;
- cstate = nstate;
+ init = 0;
}
+
+/*** Output ***/
+
void
more(void)
{
+ more_begin();
printf("--More--\015");
fflush(stdout);
redo:
switch (getchar())
{
- case 32:
+ case ' ':
num_lines = 2;
break;
- case 13:
+ case '\n':
+ case '\r':
num_lines--;
break;
case 'q':
@@ -357,6 +214,7 @@ more(void)
printf(" \015");
fflush(stdout);
+ more_end();
}
@@ -383,6 +241,7 @@ server_connect(void)
die("fcntl: %m");
}
+
#define PRINTF(LEN, PARGS...) do { if (!skip_input) len = printf(PARGS); } while(0)
static void
@@ -391,36 +250,32 @@ server_got_reply(char *x)
int code;
int len = 0;
- if (*x == '+') /* Async reply */
+ if (*x == '+') /* Async reply */
PRINTF(len, ">>> %s\n", x+1);
- else if (x[0] == ' ') /* Continuation */
+ else if (x[0] == ' ') /* Continuation */
PRINTF(len, "%s%s\n", verbose ? " " : "", x+1);
else if (strlen(x) > 4 &&
- sscanf(x, "%d", &code) == 1 && code >= 0 && code < 10000 &&
- (x[4] == ' ' || x[4] == '-'))
+ sscanf(x, "%d", &code) == 1 && code >= 0 && code < 10000 &&
+ (x[4] == ' ' || x[4] == '-'))
{
if (code)
- PRINTF(len, "%s\n", verbose ? x : x+5);
+ PRINTF(len, "%s\n", verbose ? x : x+5);
+
if (x[4] == ' ')
{
- nstate = STATE_PROMPT;
- skip_input = 0;
- return;
+ busy = 0;
+ skip_input = 0;
+ return;
}
}
else
PRINTF(len, "??? <%s>\n", x);
- if (skip_input)
- return;
-
- if (interactive && input_initialized && (len > 0))
+ if (interactive && busy && !skip_input && !init && (len > 0))
{
- int lns = LINES ? LINES : 25;
- int cls = COLS ? COLS : 80;
- num_lines += (len + cls - 1) / cls; /* Divide and round up */
- if ((num_lines >= lns) && (cstate == STATE_CMD_SERVER))
- more();
+ num_lines += (len + term_cls - 1) / term_cls; /* Divide and round up */
+ if (num_lines >= term_lns)
+ more();
}
}
@@ -465,19 +320,23 @@ server_read(void)
}
}
-static fd_set select_fds;
-
static void
select_loop(void)
{
int rv;
while (1)
{
+ if (init && !busy)
+ init_commands();
+
+ if (!init)
+ input_notify(!busy);
+
+ fd_set select_fds;
FD_ZERO(&select_fds);
- if (cstate != STATE_CMD_USER)
- FD_SET(server_fd, &select_fds);
- if (cstate != STATE_CMD_SERVER)
+ FD_SET(server_fd, &select_fds);
+ if (!busy)
FD_SET(0, &select_fds);
rv = select(server_fd+1, &select_fds, NULL, NULL, NULL);
@@ -489,16 +348,16 @@ select_loop(void)
die("select: %m");
}
- if (FD_ISSET(server_fd, &select_fds))
+ if (FD_ISSET(0, &select_fds))
{
- server_read();
- update_state();
+ input_read();
+ continue;
}
- if (FD_ISSET(0, &select_fds))
+ if (FD_ISSET(server_fd, &select_fds))
{
- rl_callback_read_char();
- update_state();
+ server_read();
+ continue;
}
}
}
@@ -556,14 +415,22 @@ server_send(char *cmd)
}
}
+
+/* XXXX
+
+ get_term_size();
+
+ if (tcgetattr(0, &tty_save) != 0)
+ {
+ perror("tcgetattr error");
+ return(EXIT_FAILURE);
+ }
+ }
+
+ */
int
main(int argc, char **argv)
{
-#ifdef HAVE_LIBDMALLOC
- if (!getenv("DMALLOC_OPTIONS"))
- dmalloc_debug(0x2f03d00);
-#endif
-
interactive = isatty(0);
parse_args(argc, argv);
cmd_build_tree();
diff --git a/client/client.h b/client/client.h
index 64de97ec..b194a772 100644
--- a/client/client.h
+++ b/client/client.h
@@ -6,15 +6,31 @@
* Can be freely distributed and used under the terms of the GNU GPL.
*/
-/* client.c */
-void cleanup(void);
+extern int init, busy, interactive;
+extern int term_lns, term_cls;
+
+/* birdc.c / birdcl.c */
+
void input_start_list(void);
void input_stop_list(void);
+void input_init(void);
+void input_notify(int prompt);
+void input_read(void);
+
+void more_begin(void);
+void more_end(void);
+
+void cleanup(void);
+
/* commands.c */
void cmd_build_tree(void);
void cmd_help(char *cmd, int len);
int cmd_complete(char *cmd, int len, char *buf, int again);
char *cmd_expand(char *cmd);
+
+/* client.c */
+
+void submit_command(char *cmd_raw);
diff --git a/conf/cf-lex.l b/conf/cf-lex.l
index c8eae0e8..b1bbeae2 100644
--- a/conf/cf-lex.l
+++ b/conf/cf-lex.l
@@ -15,10 +15,10 @@
* symbols and keywords.
*
* Each symbol is represented by a &symbol structure containing name
- * of the symbol, its lexical scope, symbol class (%SYM_PROTO for a name of a protocol,
- * %SYM_NUMBER for a numeric constant etc.) and class dependent data.
- * When an unknown symbol is encountered, it's automatically added to the
- * symbol table with class %SYM_VOID.
+ * of the symbol, its lexical scope, symbol class (%SYM_PROTO for a
+ * name of a protocol, %SYM_CONSTANT for a constant etc.) and class
+ * dependent data. When an unknown symbol is encountered, it's
+ * automatically added to the symbol table with class %SYM_VOID.
*
* The keyword tables are generated from the grammar templates
* using the |gen_keywords.m4| script.
@@ -172,7 +172,7 @@ else: {
return ELSECOL;
}
-({ALPHA}{ALNUM}*|[']({ALNUM}|[-])*[']) {
+({ALPHA}{ALNUM}*|[']({ALNUM}|[-]|[\.]|[:])*[']) {
if(*yytext == '\'') {
yytext[yyleng-1] = 0;
yytext++;
@@ -623,24 +623,23 @@ cf_walk_symbols(struct config *cf, struct symbol *sym, int *pos)
char *
cf_symbol_class_name(struct symbol *sym)
{
+ if ((sym->class & 0xff00) == SYM_CONSTANT)
+ return "constant";
+
switch (sym->class)
{
case SYM_VOID:
return "undefined";
case SYM_PROTO:
return "protocol";
- case SYM_NUMBER:
- return "numeric constant";
+ case SYM_TEMPLATE:
+ return "protocol template";
case SYM_FUNCTION:
return "function";
case SYM_FILTER:
return "filter";
case SYM_TABLE:
return "routing table";
- case SYM_IPA:
- return "network address";
- case SYM_TEMPLATE:
- return "protocol template";
case SYM_ROA:
return "ROA table";
default:
diff --git a/conf/conf.c b/conf/conf.c
index 9375861f..14225d3b 100644
--- a/conf/conf.c
+++ b/conf/conf.c
@@ -21,9 +21,12 @@
* There can exist up to four different configurations at one time: an active
* one (pointed to by @config), configuration we are just switching from
* (@old_config), one queued for the next reconfiguration (@future_config;
- * if it's non-%NULL and the user wants to reconfigure once again, we just
+ * if there is one and the user wants to reconfigure once again, we just
* free the previous queued config and replace it with the new one) and
- * finally a config being parsed (@new_config).
+ * finally a config being parsed (@new_config). The stored @old_config
+ * is also used for undo reconfiguration, which works in a similar way.
+ * Reconfiguration could also have timeout (using @config_timer) and undo
+ * is automatically called if the new configuration is not confirmed later.
*
* Loading of new configuration is very simple: just call config_alloc()
* to get a new &config structure, then use config_parse() to parse a
@@ -55,10 +58,23 @@
static jmp_buf conf_jmpbuf;
-struct config *config, *new_config, *old_config, *future_config;
-static event *config_event;
-int shutting_down, future_type;
-bird_clock_t boot_time;
+struct config *config, *new_config;
+
+static struct config *old_config; /* Old configuration */
+static struct config *future_config; /* New config held here if recon requested during recon */
+static int old_cftype; /* Type of transition old_config -> config (RECONFIG_SOFT/HARD) */
+static int future_cftype; /* Type of scheduled transition, may also be RECONFIG_UNDO */
+/* Note that when future_cftype is RECONFIG_UNDO, then future_config is NULL,
+ therefore proper check for future scheduled config checks future_cftype */
+
+static event *config_event; /* Event for finalizing reconfiguration */
+static timer *config_timer; /* Timer for scheduled configuration rollback */
+
+/* These are public just for cmd_show_status(), should not be accessed elsewhere */
+int shutting_down; /* Shutdown requested, do not accept new config changes */
+int configuring; /* Reconfiguration is running */
+int undo_available; /* Undo was not requested from last reconfiguration */
+/* Note that both shutting_down and undo_available are related to requests, not processing */
/**
* config_alloc - allocate a new configuration
@@ -82,8 +98,6 @@ config_alloc(byte *name)
c->load_time = now;
c->tf_base.fmt1 = c->tf_log.fmt1 = "%d-%m-%Y %T";
- if (!boot_time)
- boot_time = now;
return c;
}
@@ -154,7 +168,8 @@ cli_parse(struct config *c)
void
config_free(struct config *c)
{
- rfree(c->pool);
+ if (c)
+ rfree(c->pool);
}
void
@@ -170,10 +185,7 @@ config_del_obstacle(struct config *c)
DBG("+++ deleting obstacle %d\n", c->obstacle_count);
c->obstacle_count--;
if (!c->obstacle_count)
- {
- ASSERT(config_event);
- ev_schedule(config_event);
- }
+ ev_schedule(config_event);
}
static int
@@ -188,25 +200,50 @@ global_commit(struct config *new, struct config *old)
log(L_WARN "Reconfiguration of BGP listening socket not implemented, please restart BIRD.");
if (!new->router_id)
- new->router_id = old->router_id;
- if (new->router_id != old->router_id)
- return 1;
+ {
+ new->router_id = old->router_id;
+
+ if (new->router_id_from)
+ {
+ u32 id = if_choose_router_id(new->router_id_from, old->router_id);
+ if (!id)
+ log(L_WARN "Cannot determine router ID, using old one");
+ else
+ new->router_id = id;
+ }
+ }
+
return 0;
}
static int
config_do_commit(struct config *c, int type)
{
- int force_restart, nobs;
+ if (type == RECONFIG_UNDO)
+ {
+ c = old_config;
+ type = old_cftype;
+ }
+ else
+ config_free(old_config);
- DBG("do_commit\n");
old_config = config;
- config = new_config = c;
+ old_cftype = type;
+ config = c;
+
+ configuring = 1;
+ if (old_config && !config->shutdown)
+ log(L_INFO "Reconfiguring");
+
+ /* This should not be necessary, but it seems there are some
+ functions that access new_config instead of config */
+ new_config = config;
+
if (old_config)
old_config->obstacle_count++;
DBG("sysdep_commit\n");
- force_restart = sysdep_commit(c, old_config);
+ int force_restart = sysdep_commit(c, old_config);
DBG("global_commit\n");
force_restart |= global_commit(c, old_config);
DBG("rt_commit\n");
@@ -214,38 +251,38 @@ config_do_commit(struct config *c, int type)
roa_commit(c, old_config);
DBG("protos_commit\n");
protos_commit(c, old_config, force_restart, type);
- new_config = NULL; /* Just to be sure nobody uses that now */
+
+ /* Just to be sure nobody uses that now */
+ new_config = NULL;
+
+ int obs = 0;
if (old_config)
- nobs = --old_config->obstacle_count;
- else
- nobs = 0;
- DBG("do_commit finished with %d obstacles remaining\n", nobs);
- return !nobs;
+ obs = --old_config->obstacle_count;
+
+ DBG("do_commit finished with %d obstacles remaining\n", obs);
+ return !obs;
}
static void
config_done(void *unused UNUSED)
{
- struct config *c;
+ if (config->shutdown)
+ sysdep_shutdown_done();
- DBG("config_done\n");
- for(;;)
+ configuring = 0;
+ if (old_config)
+ log(L_INFO "Reconfigured");
+
+ if (future_cftype)
{
- if (config->shutdown)
- sysdep_shutdown_done();
- log(L_INFO "Reconfigured");
- if (old_config)
- {
- config_free(old_config);
- old_config = NULL;
- }
- if (!future_config)
- break;
- c = future_config;
+ int type = future_cftype;
+ struct config *conf = future_config;
+ future_cftype = RECONFIG_NONE;
future_config = NULL;
+
log(L_INFO "Reconfiguring to queued configuration");
- if (!config_do_commit(c, future_type))
- break;
+ if (config_do_commit(conf, type))
+ config_done(NULL);
}
}
@@ -253,6 +290,7 @@ config_done(void *unused UNUSED)
* config_commit - commit a configuration
* @c: new configuration
* @type: type of reconfiguration (RECONFIG_SOFT or RECONFIG_HARD)
+ * @timeout: timeout for undo (or 0 for no timeout)
*
* When a configuration is parsed and prepared for use, the
* config_commit() function starts the process of reconfiguration.
@@ -265,6 +303,10 @@ config_done(void *unused UNUSED)
* using config_del_obstacle(), the old configuration is freed and
* everything runs according to the new one.
*
+ * When @timeout is nonzero, the undo timer is activated with given
+ * timeout. The timer is deactivated when config_commit(),
+ * config_confirm() or config_undo() is called.
+ *
* Result: %CONF_DONE if the configuration has been accepted immediately,
* %CONF_PROGRESS if it will take some time to switch to it, %CONF_QUEUED
* if it's been queued due to another reconfiguration being in progress now
@@ -272,49 +314,147 @@ config_done(void *unused UNUSED)
* are accepted.
*/
int
-config_commit(struct config *c, int type)
+config_commit(struct config *c, int type, int timeout)
{
- if (!config) /* First-time configuration */
+ if (shutting_down)
{
- config_do_commit(c, RECONFIG_HARD);
- return CONF_DONE;
+ config_free(c);
+ return CONF_SHUTDOWN;
}
- if (old_config) /* Reconfiguration already in progress */
+
+ undo_available = 1;
+ if (timeout > 0)
+ tm_start(config_timer, timeout);
+ else
+ tm_stop(config_timer);
+
+ if (configuring)
{
- if (shutting_down == 2)
- {
- log(L_INFO "New configuration discarded due to shutdown");
- config_free(c);
- return CONF_SHUTDOWN;
- }
- if (future_config)
+ if (future_cftype)
{
log(L_INFO "Queueing new configuration, ignoring the one already queued");
config_free(future_config);
}
else
- log(L_INFO "Queued new configuration");
+ log(L_INFO "Queueing new configuration");
+
+ future_cftype = type;
future_config = c;
- future_type = type;
return CONF_QUEUED;
}
- if (!shutting_down)
- log(L_INFO "Reconfiguring");
-
if (config_do_commit(c, type))
{
config_done(NULL);
return CONF_DONE;
}
- if (!config_event)
+ return CONF_PROGRESS;
+}
+
+/**
+ * config_confirm - confirm a commited configuration
+ *
+ * When the undo timer is activated by config_commit() with nonzero timeout,
+ * this function can be used to deactivate it and therefore confirm
+ * the current configuration.
+ *
+ * Result: %CONF_CONFIRM when the current configuration is confirmed,
+ * %CONF_NONE when there is nothing to confirm (i.e. undo timer is not active).
+ */
+int
+config_confirm(void)
+{
+ if (config_timer->expires == 0)
+ return CONF_NOTHING;
+
+ tm_stop(config_timer);
+
+ return CONF_CONFIRM;
+}
+
+/**
+ * config_undo - undo a configuration
+ *
+ * Function config_undo() can be used to change the current
+ * configuration back to stored %old_config. If no reconfiguration is
+ * running, this stored configuration is commited in the same way as a
+ * new configuration in config_commit(). If there is already a
+ * reconfiguration in progress and no next reconfiguration is
+ * scheduled, then the undo is scheduled for later processing as
+ * usual, but if another reconfiguration is already scheduled, then
+ * such reconfiguration is removed instead (i.e. undo is applied on
+ * the last commit that scheduled it).
+ *
+ * Result: %CONF_DONE if the configuration has been accepted immediately,
+ * %CONF_PROGRESS if it will take some time to switch to it, %CONF_QUEUED
+ * if it's been queued due to another reconfiguration being in progress now,
+ * %CONF_UNQUEUED if a scheduled reconfiguration is removed, %CONF_NOTHING
+ * if there is no relevant configuration to undo (the previous config request
+ * was config_undo() too) or %CONF_SHUTDOWN if BIRD is in shutdown mode and
+ * no new configuration changes are accepted.
+ */
+int
+config_undo(void)
+{
+ if (shutting_down)
+ return CONF_SHUTDOWN;
+
+ if (!undo_available || !old_config)
+ return CONF_NOTHING;
+
+ undo_available = 0;
+ tm_stop(config_timer);
+
+ if (configuring)
+ {
+ if (future_cftype)
+ {
+ config_free(future_config);
+ future_config = NULL;
+
+ log(L_INFO "Removing queued configuration");
+ future_cftype = RECONFIG_NONE;
+ return CONF_UNQUEUED;
+ }
+ else
+ {
+ log(L_INFO "Queueing undo configuration");
+ future_cftype = RECONFIG_UNDO;
+ return CONF_QUEUED;
+ }
+ }
+
+ if (config_do_commit(NULL, RECONFIG_UNDO))
{
- config_event = ev_new(&root_pool);
- config_event->hook = config_done;
+ config_done(NULL);
+ return CONF_DONE;
}
return CONF_PROGRESS;
}
+extern void cmd_reconfig_undo_notify(void);
+
+static void
+config_timeout(struct timer *t)
+{
+ log(L_INFO "Config timeout expired, starting undo");
+ cmd_reconfig_undo_notify();
+
+ int r = config_undo();
+ if (r < 0)
+ log(L_ERR "Undo request failed");
+}
+
+void
+config_init(void)
+{
+ config_event = ev_new(&root_pool);
+ config_event->hook = config_done;
+
+ config_timer = tm_new(&root_pool);
+ config_timer->hook = config_timeout;
+}
+
/**
* order_shutdown - order BIRD shutdown
*
@@ -328,15 +468,16 @@ order_shutdown(void)
if (shutting_down)
return;
+
log(L_INFO "Shutting down");
c = lp_alloc(config->mem, sizeof(struct config));
memcpy(c, config, sizeof(struct config));
init_list(&c->protos);
init_list(&c->tables);
c->shutdown = 1;
+
+ config_commit(c, RECONFIG_HARD, 0);
shutting_down = 1;
- config_commit(c, RECONFIG_HARD);
- shutting_down = 2;
}
/**
diff --git a/conf/conf.h b/conf/conf.h
index c76832b6..28624294 100644
--- a/conf/conf.h
+++ b/conf/conf.h
@@ -26,6 +26,7 @@ struct config {
int mrtdump_file; /* Configured MRTDump file (sysdep, fd in unix) */
char *syslog_name; /* Name used for syslog (NULL -> no syslog) */
struct rtable_config *master_rtc; /* Configuration of master routing table */
+ struct iface_patt *router_id_from; /* Configured list of router ID iface patterns */
u32 router_id; /* Our Router ID */
ip_addr listen_bgp_addr; /* Listening BGP socket should use this address */
@@ -54,28 +55,33 @@ struct config {
/* Please don't use these variables in protocols. Use proto_config->global instead. */
extern struct config *config; /* Currently active configuration */
extern struct config *new_config; /* Configuration being parsed */
-extern struct config *old_config; /* Old configuration when reconfiguration is in progress */
-extern struct config *future_config; /* New config held here if recon requested during recon */
-
-extern int shutting_down;
-extern bird_clock_t boot_time;
struct config *config_alloc(byte *name);
int config_parse(struct config *);
int cli_parse(struct config *);
void config_free(struct config *);
-int config_commit(struct config *, int type);
-#define RECONFIG_HARD 0
-#define RECONFIG_SOFT 1
+int config_commit(struct config *, int type, int timeout);
+int config_confirm(void);
+int config_undo(void);
+void config_init(void);
void cf_error(char *msg, ...) NORET;
void config_add_obstacle(struct config *);
void config_del_obstacle(struct config *);
void order_shutdown(void);
-#define CONF_DONE 0
-#define CONF_PROGRESS 1
-#define CONF_QUEUED 2
-#define CONF_SHUTDOWN 3
+#define RECONFIG_NONE 0
+#define RECONFIG_HARD 1
+#define RECONFIG_SOFT 2
+#define RECONFIG_UNDO 3
+
+#define CONF_DONE 0
+#define CONF_PROGRESS 1
+#define CONF_QUEUED 2
+#define CONF_UNQUEUED 3
+#define CONF_CONFIRM 4
+#define CONF_SHUTDOWN -1
+#define CONF_NOTHING -2
+
/* Pools */
@@ -104,15 +110,17 @@ struct symbol {
/* Remember to update cf_symbol_class_name() */
#define SYM_VOID 0
#define SYM_PROTO 1
-#define SYM_NUMBER 2
+#define SYM_TEMPLATE 2
#define SYM_FUNCTION 3
#define SYM_FILTER 4
#define SYM_TABLE 5
-#define SYM_IPA 6
-#define SYM_TEMPLATE 7
-#define SYM_ROA 8
+#define SYM_ROA 6
#define SYM_VARIABLE 0x100 /* 0x100-0x1ff are variable types */
+#define SYM_CONSTANT 0x200 /* 0x200-0x2ff are variable types */
+
+#define SYM_TYPE(s) (((struct f_val *) (s)->def)->type)
+#define SYM_VAL(s) (((struct f_val *) (s)->def)->val)
struct include_file_stack {
void *buffer; /* Internal lexer state */
diff --git a/conf/confbase.Y b/conf/confbase.Y
index dcb0719f..8b9f206a 100644
--- a/conf/confbase.Y
+++ b/conf/confbase.Y
@@ -73,6 +73,7 @@ CF_DECLS
%type <iface> ipa_scope
%type <i> expr bool pxlen
+%type <i32> expr_us
%type <time> datetime
%type <a> ipa
%type <px> prefix prefix_or_ipa
@@ -86,7 +87,7 @@ CF_DECLS
%left '!'
%nonassoc '.'
-CF_KEYWORDS(DEFINE, ON, OFF, YES, NO)
+CF_KEYWORDS(DEFINE, ON, OFF, YES, NO, S, MS, US)
CF_GRAMMAR
@@ -103,28 +104,36 @@ conf_entries:
CF_ADDTO(conf, ';')
+
/* Constant expressions */
+CF_ADDTO(conf, definition)
+definition:
+ DEFINE SYM '=' term ';' {
+ struct f_val *val = cfg_alloc(sizeof(struct f_val));
+ *val = f_eval($4, cfg_mem);
+ if (val->type == T_RETURN) cf_error("Runtime error");
+ cf_define_symbol($2, SYM_CONSTANT | val->type, val);
+ }
+ ;
+
expr:
NUM
| '(' term ')' { $$ = f_eval_int($2); }
- | SYM { if ($1->class != SYM_NUMBER) cf_error("Number expected"); else $$ = $1->aux; }
+ | SYM {
+ if ($1->class != (SYM_CONSTANT | T_INT)) cf_error("Number expected");
+ $$ = SYM_VAL($1).i; }
;
-/* expr_u16: expr { check_u16($1); $$ = $1; }; */
-CF_ADDTO(conf, definition)
-definition:
- DEFINE SYM '=' expr ';' {
- cf_define_symbol($2, SYM_NUMBER, NULL);
- $2->aux = $4;
- }
- | DEFINE SYM '=' IPA ';' {
- cf_define_symbol($2, SYM_IPA, cfg_alloc(sizeof(ip_addr)));
- *(ip_addr *)$2->def = $4;
- }
+expr_us:
+ expr S { $$ = (u32) $1 * 1000000; }
+ | expr MS { $$ = (u32) $1 * 1000; }
+ | expr US { $$ = (u32) $1 * 1; }
;
+/* expr_u16: expr { check_u16($1); $$ = $1; }; */
+
/* Switches */
bool:
@@ -141,8 +150,8 @@ bool:
ipa:
IPA
| SYM {
- if ($1->class != SYM_IPA) cf_error("IP address expected");
- $$ = *(ip_addr *)$1->def;
+ if ($1->class != (SYM_CONSTANT | T_IP)) cf_error("IP address expected");
+ $$ = SYM_VAL($1).px.ip;
}
;
diff --git a/conf/gen_commands.m4 b/conf/gen_commands.m4
index a88ba014..3ed21f13 100644
--- a/conf/gen_commands.m4
+++ b/conf/gen_commands.m4
@@ -10,6 +10,9 @@ m4_divert(-1)m4_dnl
m4_define(CF_CLI, `m4_divert(0){ "m4_translit($1,A-Z,a-z)", "$3", "$4", 1 },
m4_divert(-1)')
+m4_define(CF_CLI_CMD, `m4_divert(0){ "m4_translit($1,A-Z,a-z)", "$2", "$3", 1 },
+m4_divert(-1)')
+
m4_define(CF_CLI_HELP, `m4_divert(0){ "m4_translit($1,A-Z,a-z)", "$2", "$3", 0 },
m4_divert(-1)')
diff --git a/conf/gen_parser.m4 b/conf/gen_parser.m4
index 74385f32..00b55023 100644
--- a/conf/gen_parser.m4
+++ b/conf/gen_parser.m4
@@ -44,6 +44,7 @@ m4_define(CF_CLI, `m4_define([[CF_cmd]], cmd_[[]]m4_translit($1, [[ ]], _))DNL
m4_divert(2)CF_KEYWORDS(m4_translit($1, [[ ]], [[,]]))
m4_divert(3)CF_ADDTO(cli_cmd, CF_cmd)
CF_cmd: $1 $2 END')
+m4_define(CF_CLI_CMD, `')
m4_define(CF_CLI_HELP, `')
# ENUM declarations are ignored
diff --git a/configure.in b/configure.in
index 54993dfc..5af574a5 100644
--- a/configure.in
+++ b/configure.in
@@ -10,6 +10,7 @@ AC_ARG_ENABLE(debug, [ --enable-debug enable internal debugging routin
AC_ARG_ENABLE(memcheck, [ --enable-memcheck check memory allocations when debugging (default: enabled)],,enable_memcheck=yes)
AC_ARG_ENABLE(client, [ --enable-client enable building of BIRD client (default: enabled)],,enable_client=yes)
AC_ARG_ENABLE(ipv6, [ --enable-ipv6 enable building of IPv6 version (default: disabled)],,enable_ipv6=no)
+AC_ARG_ENABLE(pthreads, [ --enable-pthreads enable POSIX threads support (default: detect)],,enable_pthreads=try)
AC_ARG_WITH(suffix, [ --with-suffix=STRING use specified suffix for BIRD files (default: 6 for IPv6 version)],[given_suffix="yes"])
AC_ARG_WITH(sysconfig, [ --with-sysconfig=FILE use specified BIRD system configuration file])
AC_ARG_WITH(protocols, [ --with-protocols=LIST include specified routing protocols (default: all)],,[with_protocols="all"])
@@ -47,11 +48,10 @@ AC_SUBST(runtimedir)
if test "$enable_ipv6" = yes ; then
ip=ipv6
SUFFIX=6
- all_protocols=bgp,ospf,pipe,radv,rip,static
+ proto_radv=radv
else
ip=ipv4
SUFFIX=""
- all_protocols=bgp,ospf,pipe,rip,static
fi
if test "$given_suffix" = yes ; then
@@ -59,10 +59,6 @@ if test "$given_suffix" = yes ; then
fi
AC_SUBST(SUFFIX)
-if test "$with_protocols" = all ; then
- with_protocols="$all_protocols"
-fi
-
if test "$enable_debug" = yes ; then
CONFIG_FILE="bird$SUFFIX.conf"
CONTROL_SOCKET="bird$SUFFIX.ctl"
@@ -87,15 +83,42 @@ if test -z "$GCC" ; then
AC_MSG_ERROR([This program requires the GNU C Compiler.])
fi
-AC_MSG_CHECKING([what CFLAGS should we use])
+# Enable threads by default just in Linux and FreeBSD
+if test "$enable_pthreads" = try ; then
+ case "$host_os" in
+ (linux* | freebsd*) enable_pthreads=try ;;
+ (*) enable_pthreads=no ;;
+ esac
+fi
+
+if test "$enable_pthreads" != no ; then
+ BIRD_CHECK_PTHREADS
+
+ if test "$bird_cv_lib_pthreads" = yes ; then
+ AC_DEFINE(USE_PTHREADS)
+ CFLAGS="$CFLAGS -pthread"
+ LDFLAGS="$LDFLAGS -pthread"
+ proto_bfd=bfd
+ elif test "$enable_pthreads" = yes ; then
+ AC_MSG_ERROR([POSIX threads not available.])
+ fi
+
+ if test "$enable_pthreads" = try ; then
+ enable_pthreads="$bird_cv_lib_pthreads"
+ fi
+fi
+
if test "$bird_cflags_default" = yes ; then
- BIRD_CHECK_GCC_OPTIONS
+ BIRD_CHECK_GCC_OPTION(bird_cv_c_option_wno_pointer_sign, -Wno-pointer-sign, -Wall)
+ BIRD_CHECK_GCC_OPTION(bird_cv_c_option_fno_strict_aliasing, -fno-strict-aliasing)
+ BIRD_CHECK_GCC_OPTION(bird_cv_c_option_fno_strict_overflow, -fno-strict-overflow)
CFLAGS="$CFLAGS -Wall -Wstrict-prototypes -Wno-parentheses"
- if test "$bird_cv_c_option_no_pointer_sign" = yes ; then
- CFLAGS="$CFLAGS -Wno-pointer-sign"
- fi
+ BIRD_ADD_GCC_OPTION(bird_cv_c_option_wno_pointer_sign, -Wno-pointer-sign)
+ BIRD_ADD_GCC_OPTION(bird_cv_c_option_fno_strict_aliasing, -fno-strict-aliasing)
+ BIRD_ADD_GCC_OPTION(bird_cv_c_option_fno_strict_overflow, -fno-strict-overflow)
fi
+AC_MSG_CHECKING([CFLAGS])
AC_MSG_RESULT($CFLAGS)
@@ -181,6 +204,13 @@ fi
AC_SUBST(iproutedir)
+all_protocols="$proto_bfd bgp ospf pipe $proto_radv rip static"
+all_protocols=`echo $all_protocols | sed 's/ /,/g'`
+
+if test "$with_protocols" = all ; then
+ with_protocols="$all_protocols"
+fi
+
AC_MSG_CHECKING([protocols])
protocols=`echo "$with_protocols" | sed 's/,/ /g'`
if test "$protocols" = no ; then protocols= ; fi
@@ -234,7 +264,7 @@ fi
CLIENT=
CLIENT_LIBS=
if test "$enable_client" = yes ; then
- CLIENT=client
+ CLIENT=birdc
AC_CHECK_LIB(history, add_history, CLIENT_LIBS="-lhistory")
AC_CHECK_LIB(ncurses, tgetent, USE_TERMCAP_LIB=-lncurses,
AC_CHECK_LIB(curses, tgetent, USE_TERMCAP_LIB=-lcurses,
@@ -270,6 +300,7 @@ BIRD was configured with the following options:
Iproute2 directory: $iproutedir
System configuration: $sysdesc
Debugging: $enable_debug
+ POSIX threads: $enable_pthreads
Routing protocols: $protocols
Client: $enable_client
EOF
diff --git a/doc/bird.conf.example b/doc/bird.conf.example
index 5e07ab5a..dcc62e29 100644
--- a/doc/bird.conf.example
+++ b/doc/bird.conf.example
@@ -67,8 +67,8 @@ protocol static {
# debug { states, routes, filters, interfaces, events, packets };
# debug all;
# route 0.0.0.0/0 via 198.51.100.13;
-# route 198.51.100.0/25 reject;
-# route 10.0.0.0/8 reject;
+# route 198.51.100.0/25 unreachable;
+# route 10.0.0.0/8 unreachable;
# route 10.1.1.0:255.255.255.0 via 198.51.100.3;
# route 10.1.2.0:255.255.255.0 via 198.51.100.3;
# route 10.1.3.0:255.255.255.0 via 198.51.100.4;
diff --git a/doc/bird.sgml b/doc/bird.sgml
index 24bc3026..46d2e026 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -144,13 +144,19 @@ options. The most important ones are:
nonzero if there are some errors.
<tag>-s <m/name of communication socket/</tag>
- use given filename for a socket for communications with the client, default is <it/prefix/<file>/var/run/bird.ctl</file>.
+ use given filename for a socket for communications with the client, default is <it/prefix/<file>/var/run/bird.ctl</file>.
+
+ <tag>-P <m/name of PID file/</tag>
+ create a PID file with given filename</file>.
<tag>-u <m/user/</tag>
drop privileges and use that user ID, see the next section for details.
<tag>-g <m/group/</tag>
use that group ID, see the next section for details.
+
+ <tag>-f</tag>
+ run bird in foreground.
</descrip>
<p>BIRD writes messages about its work to log files or syslog (according to config).
@@ -282,7 +288,7 @@ protocol rip {
<tag>include "<m/filename/"</tag>
This statement causes inclusion of a new file. The maximal depth is set to 5.
- <tag>log "<m/filename/"|syslog [name <m/name/]|stderr all|{ <m/list of classes/ }</tag>
+ <tag><label id="dsc-log">log "<m/filename/"|syslog [name <m/name/]|stderr all|{ <m/list of classes/ }</tag>
Set logging of messages having the given class (either <cf/all/ or <cf/{
error, trace }/ etc.) into selected destination (a file specified as a filename string,
syslog with optional name argument, or the stderr output). Classes are:
@@ -332,12 +338,23 @@ protocol rip {
expression and the name of the template. At the moment templates (and <cf/from/ expression)
are not implemented for OSPF protocol.
- <tag>define <m/constant/ = (<m/expression/)|<m/number/|<m/IP address/</tag>
- Define a constant. You can use it later in every place you could use a simple integer or an IP address.
+ <tag>define <m/constant/ = <m/expression/</tag>
+ Define a constant. You can use it later in every place you could use a value of the same type.
Besides, there are some predefined numeric constants based on /etc/iproute2/rt_* files.
A list of defined constants can be seen (together with other symbols) using 'show symbols' command.
- <tag>router id <m/IPv4 address/</tag> Set BIRD's router ID. It's a world-wide unique identification of your router, usually one of router's IPv4 addresses. Default: in IPv4 version, the lowest IP address of a non-loopback interface. In IPv6 version, this option is mandatory.
+ <tag>router id <m/IPv4 address/</tag>
+ Set BIRD's router ID. It's a world-wide unique identification
+ of your router, usually one of router's IPv4 addresses.
+ Default: in IPv4 version, the lowest IP address of a
+ non-loopback interface. In IPv6 version, this option is
+ mandatory.
+
+ <tag>router id from [-] [ "<m/mask/" ] [ <m/prefix/ ] [, ...]</tag>
+ Set BIRD's router ID based on an IP address of an interface
+ specified by an interface pattern. The option is applicable
+ for IPv4 version only. See <ref id="dsc-iface" name="interface">
+ section for detailed description of interface patterns.
<tag>listen bgp [address <m/address/] [port <m/port/] [dual]</tag>
This option allows to specify address and port where BGP
@@ -397,8 +414,8 @@ protocol rip {
entries. The option may be used multiple times. Other entries
can be added dynamically by <cf/add roa/ command.
- <tag>eval <m/expr/</tag> Evaluates given filter expression. It
- is used by us for testing of filters.
+ <tag>eval <m/expr/</tag>
+ Evaluates given filter expression. It is used by us for testing of filters.
</descrip>
<sect>Protocol options
@@ -432,7 +449,6 @@ to zero to disable it. An empty <cf><m/switch/</cf> is equivalent to <cf/on/
<cf/packets/ for packets sent and received by the protocol. Default: off.
<tag>mrtdump all|off|{ states, messages }</tag>
-
Set protocol MRTdump flags. MRTdump is a standard binary
format for logging information from routing protocols and
daemons. These flags control what kind of information is
@@ -444,9 +460,9 @@ to zero to disable it. An empty <cf><m/switch/</cf> is equivalent to <cf/on/
state changes and <cf/messages/ logs received BGP messages.
Other protocols does not support MRTdump yet.
- <tag>router id <m/IPv4 address/</tag> This option can be used
- to override global router id for a given protocol. Default:
- uses global router id.
+ <tag>router id <m/IPv4 address/</tag>
+ This option can be used to override global router id for a
+ given protocol. Default: uses global router id.
<tag>import all | none | filter <m/name/ | filter { <m/filter commands/ } | where <m/filter expression/</tag>
Specify a filter to be used for filtering routes coming from
@@ -459,18 +475,37 @@ to zero to disable it. An empty <cf><m/switch/</cf> is equivalent to <cf/on/
works in the direction from the routing table to the protocol.
Default: <cf/none/.
- <tag>import limit <m/number/ [action warn | block | restart | disable]</tag>
+ <tag>import keep filtered <m/switch/</tag>
+ Usually, if an import filter rejects a route, the route is
+ forgotten. When this option is active, these routes are
+ kept in the routing table, but they are hidden and not
+ propagated to other protocols. But it is possible to show them
+ using <cf/show route filtered/. Note that this option does not
+ work for the pipe protocol. Default: off.
+
+ <tag><label id="import-limit">import limit [<m/number/ | off ] [action warn | block | restart | disable]</tag>
Specify an import route limit (a maximum number of routes
imported from the protocol) and optionally the action to be
taken when the limit is hit. Warn action just prints warning
- log message. Block action ignores new routes coming from the
+ log message. Block action discards new routes coming from the
protocol. Restart and disable actions shut the protocol down
like appropriate commands. Disable is the default action if an
action is not explicitly specified. Note that limits are reset
- during protocol reconfigure, reload or restart.
- Default: <cf/none/.
-
- <tag>export limit <m/number/ [action warn | block | restart | disable]</tag>
+ during protocol reconfigure, reload or restart. Default: <cf/off/.
+
+ <tag>receive limit [<m/number/ | off ] [action warn | block | restart | disable]</tag>
+ Specify an receive route limit (a maximum number of routes
+ received from the protocol and remembered). It works almost
+ identically to <cf>import limit</cf> option, the only
+ difference is that if <cf/import keep filtered/ option is
+ active, filtered routes are counted towards the limit and
+ blocked routes are forgotten, as the main purpose of the
+ receive limit is to protect routing tables from
+ overflow. Import limit, on the contrary, counts accepted
+ routes only and routes blocked by the limit are handled like
+ filtered routes. Default: <cf/off/.
+
+ <tag>export limit [ <m/number/ | off ] [action warn | block | restart | disable]</tag>
Specify an export route limit, works similarly to
the <cf>import limit</cf> option, but for the routes exported
to the protocol. This option is experimental, there are some
@@ -479,13 +514,14 @@ to zero to disable it. An empty <cf><m/switch/</cf> is equivalent to <cf/on/
during protocol reload, exported routes counter ignores route
blocking and block action also blocks route updates of already
accepted routes -- and these details will probably change in
- the future. Default: <cf/none/.
+ the future. Default: <cf/off/.
- <tag>description "<m/text/"</tag> This is an optional
- description of the protocol. It is displayed as a part of the
- output of 'show route all' command.
+ <tag>description "<m/text/"</tag>
+ This is an optional description of the protocol. It is
+ displayed as a part of the output of 'show route all' command.
- <tag>table <m/name/</tag> Connect this protocol to a non-default routing table.
+ <tag>table <m/name/</tag>
+ Connect this protocol to a non-default routing table.
</descrip>
<p>There are several options that give sense only with certain protocols:
@@ -537,6 +573,22 @@ to zero to disable it. An empty <cf><m/switch/</cf> is equivalent to <cf/on/
<cf>interface "eth*" 192.168.1.0/24;</cf> - start the protocol on all
ethernet interfaces that have address from 192.168.1.0/24.
+ <tag><label id="dsc-prio">tx class|dscp <m/num/</tag>
+ This option specifies the value of ToS/DS/Class field in IP
+ headers of the outgoing protocol packets. This may affect how the
+ protocol packets are processed by the network relative to the
+ other network traffic. With <cf/class/ keyword, the value
+ (0-255) is used for the whole ToS/Class octet (but two bits
+ reserved for ECN are ignored). With <cf/dscp/ keyword, the
+ value (0-63) is used just for the DS field in the
+ octet. Default value is 0xc0 (DSCP 0x30 - CS6).
+
+ <tag>tx priority <m/num/</tag>
+ This option specifies the local packet priority. This may
+ affect how the protocol packets are processed in the local TX
+ queues. This option is Linux specific. Default value is 7
+ (highest priority, privileged traffic).
+
<tag><label id="dsc-pass">password "<m/password/" [ { id <m/num/; generate from <m/time/; generate to <m/time/; accept from <m/time/; accept to <m/time/; } ]</tag>
Specifies a password that can be used by the protocol. Password option can
be used more times to specify more passwords. If more passwords are
@@ -593,15 +645,18 @@ codes along with the messages. You do not necessarily need to use
-- the format of communication between BIRD and <file/birdc/ is stable
(see the programmer's documentation).
-Many commands have the <m/name/ of the protocol instance as an argument.
+<p>There is also lightweight variant of BIRD client called
+<file/birdcl/, which does not support command line editing and history
+and has minimal dependencies. This is useful for running BIRD in
+resource constrained environments, where Readline library (required
+for regular BIRD client) is not available.
+
+<p>Many commands have the <m/name/ of the protocol instance as an argument.
This argument can be omitted if there exists only a single instance.
<p>Here is a brief list of supported functions:
<descrip>
- <tag>dump resources|sockets|interfaces|neighbors|attributes|routes|protocols</tag>
- Dump contents of internal data structures to the debugging output.
-
<tag>show status</tag>
Show router status, that is BIRD version, uptime and time from last reconfiguration.
@@ -640,7 +695,8 @@ This argument can be omitted if there exists only a single instance.
Show the list of symbols defined in the configuration (names of protocols, routing tables etc.).
<tag>show route [[for] <m/prefix/|<m/IP/] [table <m/sym/] [filter <m/f/|where <m/c/] [(export|preexport) <m/p/] [protocol <m/p/] [<m/options/]</tag>
- Show contents of a routing table (by default of the main one),
+ Show contents of a routing table (by default of the main one or
+ the table attached to a respective protocol),
that is routes, their metrics and (in case the <cf/all/ switch is given)
all their attributes.
@@ -661,6 +717,9 @@ This argument can be omitted if there exists only a single instance.
<p>You can also select just routes added by a specific protocol.
<cf>protocol <m/p/</cf>.
+ <p>If BIRD is configured to keep filtered routes (see <cf/import keep filtered/
+ option), you can show them instead of routes by using <cf/filtered/ switch.
+
<p>The <cf/stats/ switch requests showing of route statistics (the
number of networks, number of routes before and after filtering). If
you use <cf/count/ instead, only the statistics will be printed.
@@ -688,19 +747,48 @@ This argument can be omitted if there exists only a single instance.
<tag>flush roa [table <m/t/>]</tag>
Remove all dynamic ROA entries from a ROA table.
- <tag>configure [soft] ["<m/config file/"]</tag>
+ <tag>configure [soft] ["<m/config file/"] [timeout [<m/num/]]</tag>
Reload configuration from a given file. BIRD will smoothly
switch itself to the new configuration, protocols are
reconfigured if possible, restarted otherwise. Changes in
- filters usually lead to restart of affected protocols. If
- <cf/soft/ option is used, changes in filters does not cause
+ filters usually lead to restart of affected protocols.
+
+ If <cf/soft/ option is used, changes in filters does not cause
BIRD to restart affected protocols, therefore already accepted
routes (according to old filters) would be still propagated,
but new routes would be processed according to the new
filters.
+ If <cf/timeout/ option is used, config timer is activated. The
+ new configuration could be either confirmed using
+ <cf/configure confirm/ command, or it will be reverted to the
+ old one when the config timer expires. This is useful for cases
+ when reconfiguration breaks current routing and a router becames
+ inaccessible for an administrator. The config timeout expiration is
+ equivalent to <cf/configure undo/ command. The timeout duration
+ could be specified, default is 300 s.
+
+ <tag>configure confirm</tag>
+ Deactivate the config undo timer and therefore confirm the current
+ configuration.
+
+ <tag>configure undo</tag>
+ Undo the last configuration change and smoothly switch back to
+ the previous (stored) configuration. If the last configuration
+ change was soft, the undo change is also soft. There is only
+ one level of undo, but in some specific cases when several
+ reconfiguration requests are given immediately in a row and
+ the intermediate ones are skipped then the undo also skips them back.
+
+ <tag>configure check ["<m/config file/"]</tag>
+ Read and parse given config file, but do not use it. useful
+ for checking syntactic and some semantic validity of an config
+ file.
+
<tag>enable|disable|restart <m/name/|"<m/pattern/"|all</tag>
- Enable, disable or restart a given protocol instance, instances matching the <cf><m/pattern/</cf> or <cf/all/ instances.
+ Enable, disable or restart a given protocol instance,
+ instances matching the <cf><m/pattern/</cf> or
+ <cf/all/ instances.
<tag>reload [in|out] <m/name/|"<m/pattern/"|all</tag>
@@ -727,6 +815,17 @@ This argument can be omitted if there exists only a single instance.
<tag>debug <m/protocol/|<m/pattern/|all all|off|{ states | routes | filters | events | packets }</tag>
Control protocol debugging.
+
+ <tag>dump resources|sockets|interfaces|neighbors|attributes|routes|protocols</tag>
+ Dump contents of internal data structures to the debugging output.
+
+ <tag>echo all|off|{ <m/list of log classes/ } [ <m/buffer-size/ ]</tag>
+ Control echoing of log messages to the command-line output.
+ See <ref id="dsc-log" name="log option"> for a list of log classes.
+
+ <tag>eval <m/expr/</tag>
+ Evaluate given expression.
+
</descrip>
<chapt>Filters
@@ -819,56 +918,63 @@ bird>
incompatible with each other (that is to prevent you from shooting in the foot).
<descrip>
- <tag/bool/ This is a boolean type, it can have only two values, <cf/true/ and
- <cf/false/. Boolean is the only type you can use in <cf/if/
- statements.
-
- <tag/int/ This is a general integer type, you can expect it to store signed values from -2000000000
- to +2000000000. Overflows are not checked. You can use <cf/0x1234/ syntax to write hexadecimal values.
-
- <tag/pair/ This is a pair of two short integers. Each component can have values from 0 to
- 65535. Literals of this type are written as <cf/(1234,5678)/. The same syntax can also be
- used to construct a pair from two arbitrary integer expressions (for example <cf/(1+2,a)/).
-
- <tag/quad/ This is a dotted quad of numbers used to represent
- router IDs (and others). Each component can have a value
- from 0 to 255. Literals of this type are written like IPv4
- addresses.
-
- <tag/string/ This is a string of characters. There are no ways to modify strings in
- filters. You can pass them between functions, assign them to variables of type <cf/string/, print
- such variables, but you can't concatenate two strings. String literals
- are written as <cf/"This is a string constant"/.
-
- <tag/ip/ This type can hold a single IP address. Depending on the compile-time configuration of BIRD you are using, it
- is either an IPv4 or IPv6 address. IP addresses are written in the standard notation (<cf/10.20.30.40/ or <cf/fec0:3:4::1/). You can apply special operator <cf>.mask(<M>num</M>)</cf>
- on values of type ip. It masks out all but first <cf><M>num</M></cf> bits from the IP
- address. So <cf/1.2.3.4.mask(8) = 1.0.0.0/ is true.
-
- <tag/prefix/ This type can hold a network prefix consisting of IP address and prefix length. Prefix literals are written as
- <cf><M>ipaddress</M>/<M>pxlen</M></cf>, or
+ <tag/bool/ This is a boolean type, it can have only two values,
+ <cf/true/ and <cf/false/. Boolean is the only type you can use in
+ <cf/if/ statements.
+
+ <tag/int/ This is a general integer type, you can expect it to store
+ signed values from -2000000000 to +2000000000. Overflows are not
+ checked. You can use <cf/0x1234/ syntax to write hexadecimal values.
+
+ <tag/pair/ This is a pair of two short integers. Each component can have
+ values from 0 to 65535. Literals of this type are written as
+ <cf/(1234,5678)/. The same syntax can also be used to construct a pair
+ from two arbitrary integer expressions (for example <cf/(1+2,a)/).
+
+ <tag/quad/ This is a dotted quad of numbers used to represent router IDs
+ (and others). Each component can have a value from 0 to 255. Literals
+ of this type are written like IPv4 addresses.
+
+ <tag/string/ This is a string of characters. There are no ways to modify
+ strings in filters. You can pass them between functions, assign them
+ to variables of type <cf/string/, print such variables, use standard
+ string comparison operations (e.g. <cf/=, !=, &lt;, &gt;, &lt;=,
+ &gt;=/), but you can't concatenate two strings. String literals are
+ written as <cf/"This is a string constant"/. Additionaly matching
+ <cf/&tilde;/ operator could be used to match a string value against a
+ shell pattern (represented also as a string).
+
+ <tag/ip/ This type can hold a single IP address. Depending on the
+ compile-time configuration of BIRD you are using, it is either an IPv4
+ or IPv6 address. IP addresses are written in the standard notation
+ (<cf/10.20.30.40/ or <cf/fec0:3:4::1/). You can apply special
+ operator <cf>.mask(<M>num</M>)</cf> on values of type ip. It masks out
+ all but first <cf><M>num</M></cf> bits from the IP address. So
+ <cf/1.2.3.4.mask(8) = 1.0.0.0/ is true.
+
+ <tag/prefix/ This type can hold a network prefix consisting of IP
+ address and prefix length. Prefix literals are written
+ as <cf><M>ipaddress</M>/<M>pxlen</M></cf>, or
<cf><m>ipaddress</m>/<m>netmask</m></cf>. There are two special
- operators on prefixes:
- <cf/.ip/ which extracts the IP address from the pair, and <cf/.len/, which separates prefix
- length from the pair. So <cf>1.2.0.0/16.pxlen = 16</cf> is true.
-
- <tag/ec/ This is a specialized type used to represent BGP
- extended community values. It is essentially a 64bit value,
- literals of this type are usually written as <cf>(<m/kind/,
- <m/key/, <m/value/)</cf>, where <cf/kind/ is a kind of
- extended community (e.g. <cf/rt/ / <cf/ro/ for a route
- target / route origin communities), the format and possible
- values of <cf/key/ and <cf/value/ are usually integers, but
+ operators on prefixes: <cf/.ip/ which extracts the IP address from the
+ pair, and <cf/.len/, which separates prefix length from the
+ pair. So <cf>1.2.0.0/16.pxlen = 16</cf> is true.
+
+ <tag/ec/ This is a specialized type used to represent BGP extended
+ community values. It is essentially a 64bit value, literals of this
+ type are usually written as <cf>(<m/kind/, <m/key/, <m/value/)</cf>,
+ where <cf/kind/ is a kind of extended community (e.g. <cf/rt/ /
+ <cf/ro/ for a route target / route origin communities), the format and
+ possible values of <cf/key/ and <cf/value/ are usually integers, but
it depends on the used kind. Similarly to pairs, ECs can be
- constructed using expressions for <cf/key/ and
- <cf/value/ parts, (e.g. <cf/(ro, myas, 3*10)/, where
- <cf/myas/ is an integer variable).
+ constructed using expressions for <cf/key/ and <cf/value/ parts,
+ (e.g. <cf/(ro, myas, 3*10)/, where <cf/myas/ is an integer variable).
- <tag/int|pair|quad|ip|prefix|ec|enum set/
- Filters recognize four types of sets. Sets are similar to strings: you can pass them around
- but you can't modify them. Literals of type <cf>int set</cf> look like <cf>
- [ 1, 2, 5..7 ]</cf>. As you can see, both simple values and ranges are permitted in
- sets.
+ <tag/int|pair|quad|ip|prefix|ec|enum set/ Filters recognize four types
+ of sets. Sets are similar to strings: you can pass them around but you
+ can't modify them. Literals of type <cf>int set</cf> look like <cf> [
+ 1, 2, 5..7 ]</cf>. As you can see, both simple values and ranges are
+ permitted in sets.
For pair sets, expressions like <cf/(123,*)/ can be used to denote ranges (in
that case <cf/(123,0)..(123,65535)/). You can also use <cf/(123,5..100)/ for range
@@ -940,10 +1046,23 @@ incompatible with each other (that is to prevent you from shooting in the foot).
<cf><m/P/.len</cf> returns the length of path <m/P/.
- <cf>prepend(<m/P/,<m/A/)</cf> prepends ASN <m/A/ to path <m/P/ and returns the result.
+ <cf>prepend(<m/P/,<m/A/)</cf> prepends ASN <m/A/ to path
+ <m/P/ and returns the result.
+
+ <cf>delete(<m/P/,<m/A/)</cf> deletes all instances of ASN
+ <m/A/ from from path <m/P/ and returns the result.
+ <m/A/ may also be an integer set, in that case the
+ operator deletes all ASNs from path <m/P/ that are also
+ members of set <m/A/.
+
+ <cf>filter(<m/P/,<m/A/)</cf> deletes all ASNs from path
+ <m/P/ that are not members of integer set <m/A/.
+ I.e., <cf/filter/ do the same as <cf/delete/ with inverted
+ set <m/A/.
+
Statement <cf><m/P/ = prepend(<m/P/, <m/A/);</cf> can be shortened to
<cf><m/P/.prepend(<m/A/);</cf> if <m/P/ is appropriate route attribute
- (for example <cf/bgp_path/).
+ (for example <cf/bgp_path/). Similarly for <cf/delete/ and <cf/filter/.
<tag/bgpmask/
BGP masks are patterns used for BGP path matching
@@ -965,6 +1084,8 @@ incompatible with each other (that is to prevent you from shooting in the foot).
no literals of this type. There are three special operators on
clists:
+ <cf><m/C/.len</cf> returns the length of clist <m/C/.
+
<cf>add(<m/C/,<m/P/)</cf> adds pair (or quad) <m/P/ to clist
<m/C/ and returns the result. If item <m/P/ is already in
clist <m/C/, it does nothing. <m/P/ may also be a clist,
@@ -1005,7 +1126,7 @@ incompatible with each other (that is to prevent you from shooting in the foot).
Special operators include <cf/&tilde;/ for "is element of a set" operation - it can be
used on element and set of elements of the same type (returning true if element is contained in the given set), or
on two strings (returning true if first string matches a shell-like pattern stored in second string) or on IP and prefix (returning true if IP is within the range defined by that prefix), or on
-prefix and prefix (returning true if first prefix is more specific than second one) or on bgppath and bgpmask (returning true if the path matches the mask) or on number and bgppath (returning true if the number is in the path) or on pair/quad and clist (returning true if the pair/quad is element of the clist) or on clist and pair/quad set (returning true if there is an element of the clist that is also a member of the pair/quad set).
+prefix and prefix (returning true if first prefix is more specific than second one) or on bgppath and bgpmask (returning true if the path matches the mask) or on number and bgppath (returning true if the number is in the path) or on bgppath and int (number) set (returning true if any ASN from the path is in the set) or on pair/quad and clist (returning true if the pair/quad is element of the clist) or on clist and pair/quad set (returning true if there is an element of the clist that is also a member of the pair/quad set).
<p>There is one operator related to ROA infrastructure -
<cf/roa_check()/. It examines a ROA table and does RFC 6483 route
@@ -1076,7 +1197,7 @@ undefined value is regarded as empty clist for most purposes.
Preference of the route. Valid values are 0-65535. (See the chapter about routing tables.)
<tag><m/ip/ from</tag>
- The router which the route has originated from. Read-only.
+ The router which the route has originated from.
<tag><m/ip/ gw</tag>
Next hop packets routed using this route should be forwarded to.
@@ -1088,7 +1209,6 @@ undefined value is regarded as empty clist for most purposes.
what protocol has told me about this route. Possible values: <cf/RTS_DUMMY/, <cf/RTS_STATIC/, <cf/RTS_INHERIT/, <cf/RTS_DEVICE/, <cf/RTS_STATIC_DEVICE/, <cf/RTS_REDIRECT/, <cf/RTS_RIP/, <cf/RTS_OSPF/, <cf/RTS_OSPF_IA/, <cf/RTS_OSPF_EXT1/, <cf/RTS_OSPF_EXT2/, <cf/RTS_BGP/, <cf/RTS_PIPE/.
<tag><m/enum/ cast</tag>
-
Route type (Currently <cf/RTC_UNICAST/ for normal routes,
<cf/RTC_BROADCAST/, <cf/RTC_MULTICAST/, <cf/RTC_ANYCAST/ will
be used in the future for broadcast, multicast and anycast
@@ -1106,6 +1226,19 @@ undefined value is regarded as empty clist for most purposes.
only to <cf/RTD_BLACKHOLE/, <cf/RTD_UNREACHABLE/ or
<cf/RTD_PROHIBIT/.
+ <tag><m/string/ ifname</tag>
+ Name of the outgoing interface. Sink routes (like blackhole,
+ unreachable or prohibit) and multipath routes have no interface
+ associated with them, so <cf/ifname/ returns an empty string for
+ such routes. Read-only.
+
+ <tag><m/int/ ifindex</tag>
+ Index of the outgoing interface. System wide index of the
+ interface. May be used for interface matching, however
+ indexes might change on interface creation/removal. Zero is
+ returned for routes with undefined outgoing
+ interfaces. Read-only.
+
<tag><m/int/ igp_metric</tag>
The optional attribute that can be used to specify a distance
to the network for routes that do not have a native protocol
@@ -1138,6 +1271,178 @@ undefined value is regarded as empty clist for most purposes.
<chapt>Protocols
+<sect><label id="sect-bfd">BFD
+
+<sect1>Introduction
+
+<p>Bidirectional Forwarding Detection (BFD) is not a routing protocol itself, it
+is an independent tool providing liveness and failure detection. Routing
+protocols like OSPF and BGP use integrated periodic "hello" messages to monitor
+liveness of neighbors, but detection times of these mechanisms are high (e.g. 40
+seconds by default in OSPF, could be set down to several seconds). BFD offers
+universal, fast and low-overhead mechanism for failure detection, which could be
+attached to any routing protocol in an advisory role.
+
+<p>BFD consists of mostly independent BFD sessions. Each session monitors an
+unicast bidirectional path between two BFD-enabled routers. This is done by
+periodically sending control packets in both directions. BFD does not handle
+neighbor discovery, BFD sessions are created on demand by request of other
+protocols (like OSPF or BGP), which supply appropriate information like IP
+addresses and associated interfaces. When a session changes its state, these
+protocols are notified and act accordingly (e.g. break an OSPF adjacency when
+the BFD session went down).
+
+<p>BIRD implements basic BFD behavior as defined in
+RFC 5880<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc5880.txt">
+(some advanced features like the echo mode or authentication are not implemented),
+IP transport for BFD as defined in
+RFC 5881<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc5881.txt"> and
+RFC 5883<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc5883.txt">
+and interaction with client protocols as defined in
+RFC 5882<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc5882.txt">.
+
+<p>Note that BFD implementation in BIRD is currently a new feature in
+development, expect some rough edges and possible UI and configuration changes
+in the future. Also note that we currently support at most one protocol instance.
+
+<sect1>Configuration
+
+<p>BFD configuration consists mainly of multiple definitions of interfaces.
+Most BFD config options are session specific. When a new session is requested
+and dynamically created, it is configured from one of these definitions. For
+sessions to directly connected neighbors, <cf/interface/ definitions are chosen
+based on the interface associated with the session, while <cf/multihop/
+definition is used for multihop sessions. If no definition is relevant, the
+session is just created with the default configuration. Therefore, an empty BFD
+configuration is often sufficient.
+
+<p>Note that to use BFD for other protocols like OSPF or BGP, these protocols
+also have to be configured to request BFD sessions, usually by <cf/bfd/ option.
+
+<p>Some of BFD session options require <m/time/ value, which has to be specified
+with the appropriate unit: <m/num/ <cf/s/|<cf/ms/|<cf/us/. Although microseconds
+are allowed as units, practical minimum values are usually in order of tens of
+milliseconds.
+
+<code>
+protocol bfd [&lt;name&gt;] {
+ interface &lt;interface pattern&gt; {
+ interval &lt;time&gt;;
+ min rx interval &lt;time&gt;;
+ min tx interval &lt;time&gt;;
+ idle tx interval &lt;time&gt;;
+ multiplier &lt;num&gt;;
+ passive &lt;switch&gt;;
+ };
+ multihop {
+ interval &lt;time&gt;;
+ min rx interval &lt;time&gt;;
+ min tx interval &lt;time&gt;;
+ idle tx interval &lt;time&gt;;
+ multiplier &lt;num&gt;;
+ passive &lt;switch&gt;;
+ };
+ neighbor &lt;ip&gt; [dev "&lt;interface&gt;"] [local &lt;ip&gt;] [multihop &lt;switch&gt;];
+}
+</code>
+
+<descrip>
+ <tag>interface <m/pattern [, ...]/ { <m/options/ }</tag>
+ Interface definitions allow to specify options for sessions associated
+ with such interfaces and also may contain interface specific options.
+ See <ref id="dsc-iface" name="interface"> common option for a detailed
+ description of interface patterns. Note that contrary to the behavior of
+ <cf/interface/ definitions of other protocols, BFD protocol would accept
+ sessions (in default configuration) even on interfaces not covered by
+ such definitions.
+
+ <tag>multihop { <m/options/ }</tag>
+ Multihop definitions allow to specify options for multihop BFD sessions,
+ in the same manner as <cf/interface/ definitions are used for directly
+ connected sessions. Currently only one such definition (for all multihop
+ sessions) could be used.
+
+ <tag>neighbor <m/ip/ [dev "<m/interface/"] [local <m/ip/] [multihop <m/switch/]</tag>
+ BFD sessions are usually created on demand as requested by other
+ protocols (like OSPF or BGP). This option allows to explicitly add
+ a BFD session to the specified neighbor regardless of such requests.
+
+ The session is identified by the IP address of the neighbor, with
+ optional specification of used interface and local IP. By default
+ the neighbor must be directly connected, unless the the session is
+ configured as multihop. Note that local IP must be specified for
+ multihop sessions.
+</descrip>
+
+<p>Session specific options (part of <cf/interface/ and <cf/multihop/ definitions):
+
+<descrip>
+ <tag>interval <m/time/</tag>
+ BFD ensures availability of the forwarding path associated with the
+ session by periodically sending BFD control packets in both
+ directions. The rate of such packets is controlled by two options,
+ <cf/min rx interval/ and <cf/min tx interval/ (see below). This option
+ is just a shorthand to set both of these options together.
+
+ <tag>min rx interval <m/time/</tag>
+ This option specifies the minimum RX interval, which is announced to the
+ neighbor and used there to limit the neighbor's rate of generated BFD
+ control packets. Default: 10 ms.
+
+ <tag>min tx interval <m/time/</tag>
+ This option specifies the desired TX interval, which controls the rate
+ of generated BFD control packets (together with <cf/min rx interval/
+ announced by the neighbor). Note that this value is used only if the BFD
+ session is up, otherwise the value of <cf/idle tx interval/ is used
+ instead. Default: 100 ms.
+
+ <tag>idle tx interval <m/time/</tag>
+ In order to limit unnecessary traffic in cases where a neighbor is not
+ available or not running BFD, the rate of generated BFD control packets
+ is lower when the BFD session is not up. This option specifies the
+ desired TX interval in such cases instead of <cf/min tx interval/.
+ Default: 1 s.
+
+ <tag>multiplier <m/num/</tag>
+ Failure detection time for BFD sessions is based on established rate of
+ BFD control packets (<cf>min rx/tx interval</cf>) multiplied by this
+ multiplier, which is essentially (ignoring jitter) a number of missed
+ packets after which the session is declared down. Note that rates and
+ multipliers could be different in each direction of a BFD session.
+ Default: 5.
+
+ <tag>passive <m/switch/</tag>
+ Generally, both BFD session endpoinds try to establish the session by
+ sending control packets to the other side. This option allows to enable
+ passive mode, which means that the router does not send BFD packets
+ until it has received one from the other side. Default: disabled.
+</descrip>
+
+<sect1>Example
+
+<p><code>
+protocol bfd {
+ interface "eth*" {
+ min rx interval 20 ms;
+ min tx interval 50 ms;
+ idle tx interval 300 ms;
+ };
+ interface "gre*" {
+ interval 200 ms;
+ multiplier 10;
+ passive;
+ };
+ multihop {
+ interval 200 ms;
+ multiplier 10;
+ };
+
+ neighbor 192.168.1.10;
+ neighbor 192.168.2.2 dev "eth2";
+ neighbor 192.168.10.1 local 192.168.1.1 multihop;
+}
+</code>
+
<sect>BGP
<p>The Border Gateway Protocol is the routing protocol used for backbone
@@ -1152,8 +1457,8 @@ AS). Each AS is a part of the network with common management and
common routing policy. It is identified by a unique 16-bit number
(ASN). Routers within each AS usually exchange AS-internal routing
information with each other using an interior gateway protocol (IGP,
-such as OSPF or RIP). Boundary routers at the border of
-the AS communicate global (inter-AS) network reachability information with
+such as OSPF or RIP). Boundary routers at the border of the AS
+communicate global (inter-AS) network reachability information with
their neighbors in the neighboring AS'es via exterior BGP (eBGP) and
redistribute received information to other routers in the AS via
interior BGP (iBGP).
@@ -1266,6 +1571,11 @@ for each neighbor using the following configuration parameters:
circumvent misconfigurations of other routers. Default:
disabled.
+ <tag>next hop keep</tag> Forward the received Next Hop
+ attribute even in situations where the local address should be
+ used instead, like when the route is sent to an interface with
+ a different subnet. Default: disabled.
+
<tag>missing lladdr self|drop|ignore</tag>Next Hop attribute
in BGP-IPv6 sometimes contains just the global IPv6 address,
but sometimes it has to contain both global and link-local
@@ -1301,7 +1611,15 @@ for each neighbor using the following configuration parameters:
<tag>igp table <m/name/</tag> Specifies a table that is used
as an IGP routing table. Default: the same as the table BGP is
connected to.
-
+
+ <tag>bfd <M>switch</M></tag>
+ BGP could use BFD protocol as an advisory mechanism for neighbor
+ liveness and failure detection. If enabled, BIRD setups a BFD session
+ for the BGP neighbor and tracks its liveness by it. This has an
+ advantage of an order of magnitude lower detection times in case of
+ failure. Note that BFD protocol also has to be configured, see
+ <ref id="sect-bfd" name="BFD"> section for details. Default: disabled.
+
<tag>ttl security <m/switch/</tag> Use GTSM (RFC 5082 - the
generalized TTL security mechanism). GTSM protects against
spoofed packets by ignoring received packets with a smaller
@@ -1352,6 +1670,16 @@ for each neighbor using the following configuration parameters:
This option requires that the connected routing table is
<ref id="dsc-sorted" name="sorted">. Default: off.
+ <tag>allow local as [<m/number/]</tag>
+ BGP prevents routing loops by rejecting received routes with
+ the local AS number in the AS path. This option allows to
+ loose or disable the check. Optional <cf/number/ argument can
+ be used to specify the maximum number of local ASNs in the AS
+ path that is allowed for received routes. When the option is
+ used without the argument, the check is completely disabled
+ and you should ensure loop-free behavior by some other means.
+ Default: 0 (no local AS number allowed).
+
<tag>enable route refresh <m/switch/</tag> When BGP speaker
changes its import filter, it has to re-examine all routes
received from its neighbor against the new filter. As these
@@ -1398,8 +1726,9 @@ for each neighbor using the following configuration parameters:
<tag>route limit <m/number/</tag> The maximal number of routes
that may be imported from the protocol. If the route limit is
- exceeded, the connection is closed with error. Limit is currently implemented as
- <cf/import limit number exceed restart/. Default: no limit.
+ exceeded, the connection is closed with an error. Limit is currently implemented as
+ <cf/import limit <m/number/ action restart/. This option is obsolete and it is
+ replaced by <ref id="import-limit" name="import limit option">. Default: no limit.
<tag>disable after error <m/switch/</tag> When an error is encountered (either
locally or by the other side), disable the instance automatically
@@ -1640,6 +1969,15 @@ use cases that use the direct protocol (like abusing eBGP as an IGP
routing protocol), in most cases it is not needed to have these device
routes in BIRD routing table and to use the direct protocol.
+<p>There is one notable case when you definitely want to use the
+direct protocol -- running BIRD on BSD systems. Having high priority
+device routes for directly connected networks from the direct protocol
+protects kernel device routes from being overwritten or removed by IGP
+routes during some transient network conditions, because a lower
+priority IGP route for the same network is not exported to the kernel
+routing table. This is an issue on BSD systems only, as on Linux
+systems BIRD cannot change non-BIRD route in the kernel routing table.
+
<p>The only configurable thing about direct is what interfaces it watches:
<p><descrip>
@@ -1821,6 +2159,7 @@ on nonbroadcast networks.
<code>
protocol ospf &lt;name&gt; {
rfc1583compat &lt;switch&gt;;
+ stub router &lt;switch&gt;;
tick &lt;num&gt;;
ecmp &lt;switch&gt; [limit &lt;num&gt;];
area &lt;id&gt; {
@@ -1863,8 +2202,13 @@ protocol ospf &lt;name&gt; {
nonbroadcast|nbma|pointomultipoint|ptmp];
strict nonbroadcast &lt;switch&gt;;
real broadcast &lt;switch&gt;;
+ ptp netmask &lt;switch&gt;;
check link &lt;switch&gt;;
+ bfd &lt;switch&gt;;
ecmp weight &lt;num&gt;;
+ ttl security [&lt;switch&gt;; | tx only]
+ tx class|dscp &lt;num&gt;;
+ tx priority &lt;num&gt;;
authentication [none|simple|cryptographic];
password "&lt;text&gt;";
password "&lt;text&gt;" {
@@ -1899,6 +2243,15 @@ protocol ospf &lt;name&gt; {
url="ftp://ftp.rfc-editor.org/in-notes/rfc1583.txt">. Default
value is no.
+ <tag>stub router <M>switch</M></tag>
+ This option configures the router to be a stub router, i.e.,
+ a router that participates in the OSPF topology but does not
+ allow transit traffic. In OSPFv2, this is implemented by
+ advertising maximum metric for outgoing links, as suggested
+ by RFC 3137<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc3137.txt">.
+ In OSPFv3, the stub router behavior is announced by clearing
+ the R-bit in the router LSA. Default value is no.
+
<tag>tick <M>num</M></tag>
The routing table calculation and clean-up of areas' databases
is not performed when a single link state
@@ -2106,6 +2459,18 @@ protocol ospf &lt;name&gt; {
probably is not interoperable with other OSPF
implementations. Default value is no.
+ <tag>ptp netmask <m/switch/</tag>
+ In <cf/type ptp/ network configurations, OSPFv2
+ implementations should ignore received netmask field in hello
+ packets and should send hello packets with zero netmask field
+ on unnumbered PtP links. But some OSPFv2 implementations
+ perform netmask checking even for PtP links. This option
+ specifies whether real netmask will be used in hello packets
+ on <cf/type ptp/ interfaces. You should ignore this option
+ unless you meet some compatibility problems related to this
+ issue. Default value is no for unnumbered PtP links, yes
+ otherwise.
+
<tag>check link <M>switch</M></tag>
If set, a hardware link state (reported by OS) is taken into
consideration. When a link disappears (e.g. an ethernet cable is
@@ -2114,6 +2479,33 @@ protocol ospf &lt;name&gt; {
prefix) is propagated. It is possible that some hardware
drivers or platforms do not implement this feature. Default value is no.
+ <tag>bfd <M>switch</M></tag>
+ OSPF could use BFD protocol as an advisory mechanism for neighbor
+ liveness and failure detection. If enabled, BIRD setups a BFD session
+ for each OSPF neighbor and tracks its liveness by it. This has an
+ advantage of an order of magnitude lower detection times in case of
+ failure. Note that BFD protocol also has to be configured, see
+ <ref id="sect-bfd" name="BFD"> section for details. Default value is no.
+
+ <tag>ttl security [<m/switch/ | tx only]</tag>
+ TTL security is a feature that protects routing protocols
+ from remote spoofed packets by using TTL 255 instead of TTL 1
+ for protocol packets destined to neighbors. Because TTL is
+ decremented when packets are forwarded, it is non-trivial to
+ spoof packets with TTL 255 from remote locations. Note that
+ this option would interfere with OSPF virtual links.
+
+ If this option is enabled, the router will send OSPF packets
+ with TTL 255 and drop received packets with TTL less than
+ 255. If this option si set to <cf/tx only/, TTL 255 is used
+ for sent packets, but is not checked for received
+ packets. Default value is no.
+
+ <tag>tx class|dscp|priority <m/num/</tag>
+ These options specify the ToS/DiffServ/Traffic class/Priority
+ of the outgoing OSPF packets. See <ref id="dsc-prio" name="tx
+ class"> common option for detailed description.
+
<tag>ecmp weight <M>num</M></tag>
When ECMP (multipath) routes are allowed, this value specifies
a relative weight used for nexthops going through the iface.
@@ -2140,7 +2532,10 @@ protocol ospf &lt;name&gt; {
<tag>neighbors { <m/set/ } </tag>
A set of neighbors to which Hello messages on NBMA or PtMP
networks are to be sent. For NBMA networks, some of them
- could be marked as eligible.
+ could be marked as eligible. In OSPFv3, link-local addresses
+ should be used, using global ones is possible, but it is
+ nonstandard and might be problematic. And definitely,
+ link-local and global addresses should not be mixed.
</descrip>
@@ -2272,7 +2667,7 @@ another one.
<tag>peer table <m/table/</tag> Defines secondary routing table to connect to. The
primary one is selected by the <cf/table/ keyword.
- <tag>mode opaque|transparent</tag> Specifies the mode for the pipe to work in. Default is opaque.
+ <tag>mode opaque|transparent</tag> Specifies the mode for the pipe to work in. Default is transparent.
</descrip>
<sect1>Attributes
@@ -2408,6 +2803,26 @@ interface definitions, prefix definitions and DNS definitions:
also as interface-specific options and there is a short
variant <cf>dnssl <m/domain/</cf> that just specifies one DNS
search domain.
+
+ <label id="dsc-trigger"> <tag>trigger <m/prefix/</tag>
+ RAdv protocol could be configured to change its behavior based
+ on availability of routes. When this option is used, the
+ protocol waits in suppressed state until a <it/trigger route/
+ (for the specified network) is exported to the protocol, the
+ protocol also returnsd to suppressed state if the
+ <it/trigger route/ disappears. Note that route export depends
+ on specified export filter, as usual. This option could be
+ used, e.g., for handling failover in multihoming scenarios.
+
+ During suppressed state, router advertisements are generated,
+ but with some fields zeroed. Exact behavior depends on which
+ fields are zeroed, this can be configured by
+ <cf/sensitive/ option for appropriate fields. By default, just
+ <cf/default lifetime/ (also called <cf/router lifetime/) is
+ zeroed, which means hosts cannot use the router as a default
+ router. <cf/preferred lifetime/ and <cf/valid lifetime/ could
+ also be configured as <cf/sensitive/ for a prefix, which would
+ cause autoconfigured IPs to be deprecated or even removed.
</descrip>
<p>Interface specific options:
@@ -2454,19 +2869,20 @@ interface definitions, prefix definitions and DNS definitions:
This option specifies which value of Hop Limit should be used
by hosts. Valid values are 0-255, 0 means unspecified. Default: 64
- <tag>default lifetime <m/expr/</tag>
+ <tag>default lifetime <m/expr/ [sensitive <m/switch/]</tag>
This option specifies the time (in seconds) how long (after
the receipt of RA) hosts may use the router as a default
- router. 0 means do not use as a default router. Default: 3 *
- <cf/max ra interval/.
+ router. 0 means do not use as a default router. For
+ <cf/sensitive/ option, see <ref id="dsc-trigger" name="trigger">.
+ Default: 3 * <cf/max ra interval/, <cf/sensitive/ yes.
- <tag>rdnss local <m/bool/</tag>
+ <tag>rdnss local <m/switch/</tag>
Use only local (interface-specific) RDNSS definitions for this
interface. Otherwise, both global and local definitions are
used. Could also be used to disable RDNSS for given interface
if no local definitons are specified. Default: no.
- <tag>dnssl local <m/bool/</tag>
+ <tag>dnssl local <m/switch/</tag>
Use only local DNSSL definitions for this interface. See
<cf/rdnss local/ option above. Default: no.
</descrip>
@@ -2475,6 +2891,13 @@ interface definitions, prefix definitions and DNS definitions:
<p>Prefix specific options:
<descrip>
+ <tag>skip <m/switch/</tag>
+ This option allows to specify that given prefix should not be
+ advertised. This is useful for making exceptions from a
+ default policy of advertising all prefixes. Note that for
+ withdrawing an already advertised prefix it is more useful to
+ advertise it with zero valid lifetime. Default: no
+
<tag>onlink <m/switch/</tag>
This option specifies whether hosts may use the advertised
prefix for onlink determination. Default: yes
@@ -2483,18 +2906,20 @@ interface definitions, prefix definitions and DNS definitions:
This option specifies whether hosts may use the advertised
prefix for stateless autoconfiguration. Default: yes
- <tag>valid lifetime <m/expr/</tag>
+ <tag>valid lifetime <m/expr/ [sensitive <m/switch/]</tag>
This option specifies the time (in seconds) how long (after
the receipt of RA) the prefix information is valid, i.e.,
autoconfigured IP addresses can be assigned and hosts with
that IP addresses are considered directly reachable. 0 means
- the prefix is no longer valid. Default: 86400 (1 day)
+ the prefix is no longer valid. For <cf/sensitive/ option, see
+ <ref id="dsc-trigger" name="trigger">. Default: 86400 (1 day), <cf/sensitive/ no.
- <tag>preferred lifetime <m/expr/</tag>
+ <tag>preferred lifetime <m/expr/ [sensitive <m/switch/]</tag>
This option specifies the time (in seconds) how long (after
the receipt of RA) IP addresses generated from the prefix
- using stateless autoconfiguration remain preferred. Default:
- 14400 (4 hours)
+ using stateless autoconfiguration remain preferred. For
+ <cf/sensitive/ option, see <ref id="dsc-trigger" name="trigger">.
+ Default: 14400 (4 hours), <cf/sensitive/ no.
</descrip>
@@ -2609,13 +3034,46 @@ makes it pretty much obsolete. (It is still usable on very small networks.)
neighbors, that is not configurable. Default: never.
</descrip>
-<p>There are two options that can be specified per-interface. First is <cf>metric</cf>, with
-default one. Second is <cf>mode multicast|broadcast|quiet|nolisten|version1</cf>, it selects mode for
-rip to work in. If nothing is specified, rip runs in multicast mode. <cf>version1</cf> is
-currently equivalent to <cf>broadcast</cf>, and it makes RIP talk to a broadcast address even
-through multicast mode is possible. <cf>quiet</cf> option means that RIP will not transmit
-any periodic messages to this interface and <cf>nolisten</cf> means that RIP will send to this
-interface but not listen to it.
+<p>There are some options that can be specified per-interface:
+
+<descrip>
+ <tag>metric <m/num/</tag>
+ This option specifies the metric of the interface. Valid
+
+ <tag>mode multicast|broadcast|quiet|nolisten|version1</tag>
+ This option selects the mode for RIP to work in. If nothing is
+ specified, RIP runs in multicast mode. <cf/version1/ is
+ currently equivalent to <cf/broadcast/, and it makes RIP talk
+ to a broadcast address even through multicast mode is
+ possible. <cf/quiet/ option means that RIP will not transmit
+ any periodic messages to this interface and <cf/nolisten/
+ means that RIP will send to this interface butnot listen to it.
+
+ <tag>ttl security [<m/switch/ | tx only]</tag>
+ TTL security is a feature that protects routing protocols
+ from remote spoofed packets by using TTL 255 instead of TTL 1
+ for protocol packets destined to neighbors. Because TTL is
+ decremented when packets are forwarded, it is non-trivial to
+ spoof packets with TTL 255 from remote locations.
+
+ If this option is enabled, the router will send RIP packets
+ with TTL 255 and drop received packets with TTL less than
+ 255. If this option si set to <cf/tx only/, TTL 255 is used
+ for sent packets, but is not checked for received
+ packets. Such setting does not offer protection, but offers
+ compatibility with neighbors regardless of whether they use
+ ttl security.
+
+ Note that for RIPng, TTL security is a standard behavior
+ (required by RFC 2080), but BIRD uses <cf/tx only/ by
+ default, for compatibility with older versions. For IPv4 RIP,
+ default value is no.
+
+ <tag>tx class|dscp|priority <m/num/</tag>
+ These options specify the ToS/DiffServ/Traffic class/Priority
+ of the outgoing RIP packets. See <ref id="dsc-prio" name="tx
+ class"> common option for detailed description.
+</descrip>
<p>The following options generally override behavior specified in RFC. If you use any of these
options, BIRD will no longer be RFC-compliant, which means it will not be able to talk to anything
@@ -2633,7 +3091,7 @@ other than equally configured BIRD. I have warned you.
<tag>period <M>number</M>
</tag>specifies the number of seconds between periodic updates. Default is 30 seconds. A lower
number will mean faster convergence but bigger network
- load. Do not use values lower than 10.
+ load. Do not use values lower than 12.
<tag>timeout time <M>number</M>
</tag>specifies how old route has to be to be considered unreachable. Default is 4*<cf/period/.
@@ -2663,7 +3121,7 @@ other than equally configured BIRD. I have warned you.
protocol rip MyRIP_test {
debug all;
port 1520;
- period 10;
+ period 12;
garbage time 60;
interface "eth0" { metric 3; mode multicast; };
interface "eth*" { metric 2; mode broadcast; };
@@ -2710,9 +3168,10 @@ definition of the protocol contains mainly a list of static routes:
route through an interface to hosts on a directly connected network.
<tag>route <m/prefix/ recursive <m/ip/</tag> Static recursive route,
its nexthop depends on a route table lookup for given IP address.
- <tag>route <m/prefix/ drop|reject|prohibit</tag> Special routes
- specifying to drop the packet, return it as unreachable or return
- it as administratively prohibited.
+ <tag>route <m/prefix/ blackhole|unreachable|prohibit</tag> Special routes
+ specifying to silently drop the packet, return it as unreachable or return
+ it as administratively prohibited. First two targets are also known
+ as <cf/drop/ and <cf/reject/.
<tag>check link <m/switch/</tag>
If set, hardware link states of network interfaces are taken
@@ -2738,7 +3197,7 @@ protocol static {
via 198.51.100.10 weight 2
via 198.51.100.20
via 192.0.2.1;
- route 203.0.113.0/24 reject; # Sink route
+ route 203.0.113.0/24 unreachable; # Sink route
route 10.2.0.0/24 via "arc0"; # Secondary network
}
</code>
diff --git a/doc/reply_codes b/doc/reply_codes
index 7ec2e27d..e9996eef 100644
--- a/doc/reply_codes
+++ b/doc/reply_codes
@@ -25,6 +25,13 @@ Reply codes of BIRD command-line interface
0014 Route count
0015 Reloading
0016 Access restricted
+0017 Reconfiguration already in progress, removing queued config
+0018 Reconfiguration confirmed
+0019 Nothing to do (configure undo/confirm)
+0020 Configuration OK
+0021 Undo requested
+0022 Undo scheduled
+0023 Evaluation of expression
1000 BIRD version
1001 Interface list
@@ -55,6 +62,7 @@ Reply codes of BIRD command-line interface
8005 Protocol is down => cannot dump
8006 Reload failed
8007 Access denied
+8008 Evaluation runtime error
9000 Command too long
9001 Parse error
diff --git a/filter/config.Y b/filter/config.Y
index 0eeb2ce1..04acfbab 100644
--- a/filter/config.Y
+++ b/filter/config.Y
@@ -193,7 +193,14 @@ f_generate_ec(u16 kind, struct f_inst *tk, struct f_inst *tv)
else if (tk->code == 'C') {
c1 = 1;
struct f_val *val = tk->a1.p;
- if (val->type == T_IP) {
+
+ if (val->type == T_INT) {
+ ipv4_used = 0; key = val->val.i;
+ }
+ else if (val->type == T_QUAD) {
+ ipv4_used = 1; key = val->val.i;
+ }
+ else if (val->type == T_IP) {
ipv4_used = 1; key = ipa_to_u32(val->val.px.ip);
}
else
@@ -254,7 +261,8 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN,
SET, STRING, BGPMASK, BGPPATH, CLIST, ECLIST,
IF, THEN, ELSE, CASE,
TRUE, FALSE, RT, RO, UNKNOWN, GENERIC,
- FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, CAST, DEST, PREFERENCE,
+ FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, CAST, DEST, IFNAME, IFINDEX,
+ PREFERENCE,
LEN,
DEFINED,
ADD, DELETE, CONTAINS, RESET,
@@ -329,8 +337,8 @@ type:
one_decl:
type SYM {
- struct f_val * val = cfg_alloc(sizeof(struct f_val));
- val->type = $1;
+ struct f_val * val = cfg_alloc(sizeof(struct f_val));
+ val->type = T_VOID;
$2 = cf_define_symbol($2, SYM_VARIABLE | $1, val);
DBG( "New variable %s type %x\n", $2->name, $1 );
$2->aux2 = NULL;
@@ -661,49 +669,28 @@ function_call:
symbol:
SYM {
$$ = f_new_inst();
- switch ($1->class) {
- case SYM_NUMBER:
- $$ = f_new_inst();
- $$->code = 'c';
- $$->aux = T_INT;
- $$->a2.i = $1->aux;
- break;
- case SYM_IPA:
- { NEW_F_VAL; $$ = f_new_inst(); $$->code = 'C'; $$->a1.p = val; val->type = T_IP; val->val.px.ip = * (ip_addr *) ($1->def); }
- break;
- case SYM_VARIABLE | T_BOOL:
- case SYM_VARIABLE | T_INT:
- case SYM_VARIABLE | T_PAIR:
- case SYM_VARIABLE | T_QUAD:
- case SYM_VARIABLE | T_EC:
- case SYM_VARIABLE | T_STRING:
- case SYM_VARIABLE | T_IP:
- case SYM_VARIABLE | T_PREFIX:
- case SYM_VARIABLE | T_PREFIX_SET:
- case SYM_VARIABLE | T_SET:
- case SYM_VARIABLE | T_PATH:
- case SYM_VARIABLE | T_PATH_MASK:
- case SYM_VARIABLE | T_CLIST:
- case SYM_VARIABLE | T_ECLIST:
- $$->code = 'V';
- $$->a1.p = $1->def;
- $$->a2.p = $1->name;
- break;
- default:
- cf_error("%s: variable expected.", $1->name );
+
+ switch ($1->class & 0xff00) {
+ case SYM_CONSTANT: $$->code = 'C'; break;
+ case SYM_VARIABLE: $$->code = 'V'; break;
+ default: cf_error("%s: variable expected.", $1->name);
}
+
+ $$->a1.p = $1->def;
+ $$->a2.p = $1->name;
}
static_attr:
- FROM { $$ = f_new_inst(); $$->aux = T_IP; $$->a2.i = OFFSETOF(struct rta, from); $$->a1.i = 1; }
-
- | GW { $$ = f_new_inst(); $$->aux = T_IP; $$->a2.i = OFFSETOF(struct rta, gw); $$->a1.i = 1; }
- | NET { $$ = f_new_inst(); $$->aux = T_PREFIX; $$->a2.i = 0x12345678; /* This is actually ok - T_PREFIX is special-cased. */ }
- | PROTO { $$ = f_new_inst(); $$->aux = T_STRING; $$->a2.i = 0x12345678; /* T_STRING is also special-cased. */ }
- | SOURCE { $$ = f_new_inst(); $$->aux = T_ENUM_RTS; $$->a2.i = OFFSETOF(struct rta, source); }
- | SCOPE { $$ = f_new_inst(); $$->aux = T_ENUM_SCOPE; $$->a2.i = OFFSETOF(struct rta, scope); $$->a1.i = 1; }
- | CAST { $$ = f_new_inst(); $$->aux = T_ENUM_RTC; $$->a2.i = OFFSETOF(struct rta, cast); }
- | DEST { $$ = f_new_inst(); $$->aux = T_ENUM_RTD; $$->a2.i = OFFSETOF(struct rta, dest); $$->a1.i = 1; }
+ FROM { $$ = f_new_inst(); $$->aux = T_IP; $$->a2.i = SA_FROM; $$->a1.i = 1; }
+ | GW { $$ = f_new_inst(); $$->aux = T_IP; $$->a2.i = SA_GW; $$->a1.i = 1; }
+ | NET { $$ = f_new_inst(); $$->aux = T_PREFIX; $$->a2.i = SA_NET; }
+ | PROTO { $$ = f_new_inst(); $$->aux = T_STRING; $$->a2.i = SA_PROTO; }
+ | SOURCE { $$ = f_new_inst(); $$->aux = T_ENUM_RTS; $$->a2.i = SA_SOURCE; }
+ | SCOPE { $$ = f_new_inst(); $$->aux = T_ENUM_SCOPE; $$->a2.i = SA_SCOPE; $$->a1.i = 1; }
+ | CAST { $$ = f_new_inst(); $$->aux = T_ENUM_RTC; $$->a2.i = SA_CAST; }
+ | DEST { $$ = f_new_inst(); $$->aux = T_ENUM_RTD; $$->a2.i = SA_DEST; $$->a1.i = 1; }
+ | IFNAME { $$ = f_new_inst(); $$->aux = T_STRING; $$->a2.i = SA_IFNAME; }
+ | IFINDEX { $$ = f_new_inst(); $$->aux = T_INT; $$->a2.i = SA_IFINDEX; }
;
term:
diff --git a/filter/filter.c b/filter/filter.c
index 7c883fff..ed8efd54 100644
--- a/filter/filter.c
+++ b/filter/filter.c
@@ -58,79 +58,54 @@ adata_empty(struct linpool *pool, int l)
return res;
}
-static int
-pm_path_compare(struct f_path_mask *m1, struct f_path_mask *m2)
-{
- while (1) {
- if ((!m1) || (!m2))
- return !((!m1) && (!m2));
-
- /* FIXME: buggy, should return -1, 0, 1; but it doesn't matter */
- if ((m1->kind != m2->kind) || (m1->val != m2->val)) return 1;
- m1 = m1->next;
- m2 = m2->next;
- }
-}
-
-u32 f_eval_asn(struct f_inst *expr);
-
static void
-pm_format(struct f_path_mask *p, byte *buf, unsigned int size)
+pm_format(struct f_path_mask *p, buffer *buf)
{
- byte *end = buf + size - 16;
+ buffer_puts(buf, "[= ");
while (p)
+ {
+ switch(p->kind)
{
- if (buf > end)
- {
- strcpy(buf, " ...");
- return;
- }
-
- switch(p->kind)
- {
- case PM_ASN:
- buf += bsprintf(buf, " %u", p->val);
- break;
-
- case PM_QUESTION:
- buf += bsprintf(buf, " ?");
- break;
+ case PM_ASN:
+ buffer_print(buf, "%u ", p->val);
+ break;
- case PM_ASTERISK:
- buf += bsprintf(buf, " *");
- break;
+ case PM_QUESTION:
+ buffer_puts(buf, "? ");
+ break;
- case PM_ASN_EXPR:
- buf += bsprintf(buf, " %u", f_eval_asn((struct f_inst *) p->val));
- break;
- }
+ case PM_ASTERISK:
+ buffer_puts(buf, "* ");
+ break;
- p = p->next;
+ case PM_ASN_EXPR:
+ buffer_print(buf, "%u ", f_eval_asn((struct f_inst *) p->val));
+ break;
}
- *buf = 0;
+ p = p->next;
+ }
+
+ buffer_puts(buf, "=]");
}
-static inline int int_cmp(int i1, int i2)
+static inline int
+int_cmp(int i1, int i2)
{
- if (i1 == i2) return 0;
- if (i1 < i2) return -1;
- else return 1;
+ return (i1 > i2) - (i1 < i2);
}
-static inline int uint_cmp(unsigned int i1, unsigned int i2)
+static inline int
+uint_cmp(uint i1, uint i2)
{
- if (i1 == i2) return 0;
- if (i1 < i2) return -1;
- else return 1;
+ return (int)(i1 > i2) - (int)(i1 < i2);
}
-static inline int u64_cmp(u64 i1, u64 i2)
+static inline int
+u64_cmp(u64 i1, u64 i2)
{
- if (i1 == i2) return 0;
- if (i1 < i2) return -1;
- else return 1;
+ return (int)(i1 > i2) - (int)(i1 < i2);
}
/**
@@ -138,23 +113,21 @@ static inline int u64_cmp(u64 i1, u64 i2)
* @v1: first value
* @v2: second value
*
- * Compares two values and returns -1, 0, 1 on <, =, > or 999 on error.
- * Tree module relies on this giving consistent results so that it can
- * build balanced trees.
+ * Compares two values and returns -1, 0, 1 on <, =, > or CMP_ERROR on
+ * error. Tree module relies on this giving consistent results so
+ * that it can be used for building balanced trees.
*/
int
val_compare(struct f_val v1, struct f_val v2)
{
int rc;
- if ((v1.type == T_VOID) && (v2.type == T_VOID))
- return 0;
- if (v1.type == T_VOID) /* Hack for else */
- return -1;
- if (v2.type == T_VOID)
- return 1;
-
if (v1.type != v2.type) {
+ if (v1.type == T_VOID) /* Hack for else */
+ return -1;
+ if (v2.type == T_VOID)
+ return 1;
+
#ifndef IPV6
/* IP->Quad implicit conversion */
if ((v1.type == T_QUAD) && (v2.type == T_IP))
@@ -166,7 +139,10 @@ val_compare(struct f_val v1, struct f_val v2)
debug( "Types do not match in val_compare\n" );
return CMP_ERROR;
}
+
switch (v1.type) {
+ case T_VOID:
+ return 0;
case T_ENUM:
case T_INT:
case T_BOOL:
@@ -181,25 +157,63 @@ val_compare(struct f_val v1, struct f_val v2)
case T_PREFIX:
if (rc = ipa_compare(v1.val.px.ip, v2.val.px.ip))
return rc;
- if (v1.val.px.len < v2.val.px.len)
- return -1;
- if (v1.val.px.len > v2.val.px.len)
- return 1;
- return 0;
- case T_PATH_MASK:
- return pm_path_compare(v1.val.path_mask, v2.val.path_mask);
+ return int_cmp(v1.val.px.len, v2.val.px.len);
case T_STRING:
return strcmp(v1.val.s, v2.val.s);
default:
- debug( "Compare of unknown entities: %x\n", v1.type );
return CMP_ERROR;
}
}
-int
-tree_compare(const void *p1, const void *p2)
+static int
+pm_path_same(struct f_path_mask *m1, struct f_path_mask *m2)
{
- return val_compare((* (struct f_tree **) p1)->from, (* (struct f_tree **) p2)->from);
+ while (m1 && m2)
+ {
+ if ((m1->kind != m2->kind) || (m1->val != m2->val))
+ return 0;
+
+ m1 = m1->next;
+ m2 = m2->next;
+ }
+
+ return !m1 && !m2;
+}
+
+/**
+ * val_same - compare two values
+ * @v1: first value
+ * @v2: second value
+ *
+ * Compares two values and returns 1 if they are same and 0 if not.
+ * Comparison of values of different types is valid and returns 0.
+ */
+int
+val_same(struct f_val v1, struct f_val v2)
+{
+ int rc;
+
+ rc = val_compare(v1, v2);
+ if (rc != CMP_ERROR)
+ return !rc;
+
+ if (v1.type != v2.type)
+ return 0;
+
+ switch (v1.type) {
+ case T_PATH_MASK:
+ return pm_path_same(v1.val.path_mask, v2.val.path_mask);
+ case T_PATH:
+ case T_CLIST:
+ case T_ECLIST:
+ return adata_same(v1.val.ad, v2.val.ad);
+ case T_SET:
+ return same_tree(v1.val.t, v2.val.t);
+ case T_PREFIX_SET:
+ return trie_same(v1.val.ti, v2.val.ti);
+ default:
+ bug("Invalid type in val_same(): %x", v1.type);
+ }
}
void
@@ -220,39 +234,6 @@ fprefix_get_bounds(struct f_prefix *px, int *l, int *h)
}
}
-/*
- * val_simple_in_range - check if @v1 ~ @v2 for everything except sets
- */
-static int
-val_simple_in_range(struct f_val v1, struct f_val v2)
-{
- if ((v1.type == T_PATH) && (v2.type == T_PATH_MASK))
- return as_path_match(v1.val.ad, v2.val.path_mask);
- if ((v1.type == T_INT) && (v2.type == T_PATH))
- return as_path_is_member(v2.val.ad, v1.val.i);
-
- if (((v1.type == T_PAIR) || (v1.type == T_QUAD)) && (v2.type == T_CLIST))
- return int_set_contains(v2.val.ad, v1.val.i);
-#ifndef IPV6
- /* IP->Quad implicit conversion */
- if ((v1.type == T_IP) && (v2.type == T_CLIST))
- return int_set_contains(v2.val.ad, ipa_to_u32(v1.val.px.ip));
-#endif
- if ((v1.type == T_EC) && (v2.type == T_ECLIST))
- return ec_set_contains(v2.val.ad, v1.val.ec);
-
- if ((v1.type == T_STRING) && (v2.type == T_STRING))
- return patmatch(v2.val.s, v1.val.s);
-
- if ((v1.type == T_IP) && (v2.type == T_PREFIX))
- return ipa_in_net(v1.val.px.ip, v2.val.px.ip, v2.val.px.len);
-
- if ((v1.type == T_PREFIX) && (v2.type == T_PREFIX))
- return net_in_net(v1.val.px.ip, v1.val.px.len, v2.val.px.ip, v2.val.px.len);
-
- return CMP_ERROR;
-}
-
static int
clist_set_type(struct f_tree *set, struct f_val *v)
{
@@ -396,103 +377,86 @@ eclist_filter(struct linpool *pool, struct adata *list, struct f_val set, int po
* @v1: element
* @v2: set
*
- * Checks if @v1 is element (|~| operator) of @v2. Sets are internally represented as balanced trees, see
- * |tree.c| module (this is not limited to sets, but for non-set cases, val_simple_in_range() is called early).
+ * Checks if @v1 is element (|~| operator) of @v2.
*/
static int
val_in_range(struct f_val v1, struct f_val v2)
{
- int res;
+ if ((v1.type == T_PATH) && (v2.type == T_PATH_MASK))
+ return as_path_match(v1.val.ad, v2.val.path_mask);
- res = val_simple_in_range(v1, v2);
+ if ((v1.type == T_INT) && (v2.type == T_PATH))
+ return as_path_contains(v2.val.ad, v1.val.i, 1);
- if (res != CMP_ERROR)
- return res;
-
- if ((v1.type == T_PREFIX) && (v2.type == T_PREFIX_SET))
- return trie_match_fprefix(v2.val.ti, &v1.val.px);
+ if (((v1.type == T_PAIR) || (v1.type == T_QUAD)) && (v2.type == T_CLIST))
+ return int_set_contains(v2.val.ad, v1.val.i);
+#ifndef IPV6
+ /* IP->Quad implicit conversion */
+ if ((v1.type == T_IP) && (v2.type == T_CLIST))
+ return int_set_contains(v2.val.ad, ipa_to_u32(v1.val.px.ip));
+#endif
- if ((v1.type == T_CLIST) && (v2.type == T_SET))
- return clist_match_set(v1.val.ad, v2.val.t);
+ if ((v1.type == T_EC) && (v2.type == T_ECLIST))
+ return ec_set_contains(v2.val.ad, v1.val.ec);
- if ((v1.type == T_ECLIST) && (v2.type == T_SET))
- return eclist_match_set(v1.val.ad, v2.val.t);
+ if ((v1.type == T_STRING) && (v2.type == T_STRING))
+ return patmatch(v2.val.s, v1.val.s);
- if (v2.type == T_SET)
- switch (v1.type) {
- case T_ENUM:
- case T_INT:
- case T_PAIR:
- case T_QUAD:
- case T_IP:
- case T_EC:
- {
- struct f_tree *n;
- n = find_tree(v2.val.t, v1);
- if (!n)
- return 0;
- return !! (val_simple_in_range(v1, n->from)); /* We turn CMP_ERROR into compared ok, and that's fine */
- }
- }
- return CMP_ERROR;
-}
+ if ((v1.type == T_IP) && (v2.type == T_PREFIX))
+ return ipa_in_net(v1.val.px.ip, v2.val.px.ip, v2.val.px.len);
-static void val_print(struct f_val v);
+ if ((v1.type == T_PREFIX) && (v2.type == T_PREFIX))
+ return net_in_net(v1.val.px.ip, v1.val.px.len, v2.val.px.ip, v2.val.px.len);
-static void
-tree_node_print(struct f_tree *t, char **sep)
-{
- if (t == NULL)
- return;
+ if ((v1.type == T_PREFIX) && (v2.type == T_PREFIX_SET))
+ return trie_match_fprefix(v2.val.ti, &v1.val.px);
- tree_node_print(t->left, sep);
+ if (v2.type != T_SET)
+ return CMP_ERROR;
- logn(*sep);
- val_print(t->from);
- if (val_compare(t->from, t->to) != 0)
- {
- logn( ".." );
- val_print(t->to);
- }
- *sep = ", ";
+ /* With integrated Quad<->IP implicit conversion */
+ if ((v1.type == v2.val.t->from.type) ||
+ ((IP_VERSION == 4) && (v1.type == T_QUAD) && (v2.val.t->from.type == T_IP)))
+ return !!find_tree(v2.val.t, v1);
- tree_node_print(t->right, sep);
-}
+ if (v1.type == T_CLIST)
+ return clist_match_set(v1.val.ad, v2.val.t);
-static void
-tree_print(struct f_tree *t)
-{
- char *sep = "";
- logn( "[" );
- tree_node_print(t, &sep);
- logn( "] " );
+ if (v1.type == T_ECLIST)
+ return eclist_match_set(v1.val.ad, v2.val.t);
+
+ if (v1.type == T_PATH)
+ return as_path_match_set(v1.val.ad, v2.val.t);
+
+ return CMP_ERROR;
}
/*
- * val_print - format filter value
+ * val_format - format filter value
*/
-static void
-val_print(struct f_val v)
+void
+val_format(struct f_val v, buffer *buf)
{
char buf2[1024];
- switch (v.type) {
- case T_VOID: logn("(void)"); return;
- case T_BOOL: logn(v.val.i ? "TRUE" : "FALSE"); return;
- case T_INT: logn("%d", v.val.i); return;
- case T_STRING: logn("%s", v.val.s); return;
- case T_IP: logn("%I", v.val.px.ip); return;
- case T_PREFIX: logn("%I/%d", v.val.px.ip, v.val.px.len); return;
- case T_PAIR: logn("(%d,%d)", v.val.i >> 16, v.val.i & 0xffff); return;
- case T_QUAD: logn("%R", v.val.i); return;
- case T_EC: ec_format(buf2, v.val.ec); logn("%s", buf2); return;
- case T_PREFIX_SET: trie_print(v.val.ti); return;
- case T_SET: tree_print(v.val.t); return;
- case T_ENUM: logn("(enum %x)%d", v.type, v.val.i); return;
- case T_PATH: as_path_format(v.val.ad, buf2, 1000); logn("(path %s)", buf2); return;
- case T_CLIST: int_set_format(v.val.ad, 1, -1, buf2, 1000); logn("(clist %s)", buf2); return;
- case T_ECLIST: ec_set_format(v.val.ad, -1, buf2, 1000); logn("(eclist %s)", buf2); return;
- case T_PATH_MASK: pm_format(v.val.path_mask, buf2, 1000); logn("(pathmask%s)", buf2); return;
- default: logn( "[unknown type %x]", v.type ); return;
+ switch (v.type)
+ {
+ case T_VOID: buffer_puts(buf, "(void)"); return;
+ case T_BOOL: buffer_puts(buf, v.val.i ? "TRUE" : "FALSE"); return;
+ case T_INT: buffer_print(buf, "%d", v.val.i); return;
+ case T_STRING: buffer_print(buf, "%s", v.val.s); return;
+ case T_IP: buffer_print(buf, "%I", v.val.px.ip); return;
+ case T_PREFIX: buffer_print(buf, "%I/%d", v.val.px.ip, v.val.px.len); return;
+ case T_PAIR: buffer_print(buf, "(%d,%d)", v.val.i >> 16, v.val.i & 0xffff); return;
+ case T_QUAD: buffer_print(buf, "%R", v.val.i); return;
+ case T_EC: ec_format(buf2, v.val.ec); buffer_print(buf, "%s", buf2); return;
+ case T_PREFIX_SET: trie_format(v.val.ti, buf); return;
+ case T_SET: tree_format(v.val.t, buf); return;
+ case T_ENUM: buffer_print(buf, "(enum %x)%d", v.type, v.val.i); return;
+ case T_PATH: as_path_format(v.val.ad, buf2, 1000); buffer_print(buf, "(path %s)", buf2); return;
+ case T_CLIST: int_set_format(v.val.ad, 1, -1, buf2, 1000); buffer_print(buf, "(clist %s)", buf2); return;
+ case T_ECLIST: ec_set_format(v.val.ad, -1, buf2, 1000); buffer_print(buf, "(eclist %s)", buf2); return;
+ case T_PATH_MASK: pm_format(v.val.path_mask, buf); return;
+ default: buffer_print(buf, "[unknown type %x]", v.type); return;
}
}
@@ -500,6 +464,7 @@ static struct rte **f_rte;
static struct rta *f_old_rta;
static struct ea_list **f_tmp_attrs;
static struct linpool *f_pool;
+static struct buffer f_buf;
static int f_flags;
static inline void f_rte_cow(void)
@@ -555,6 +520,8 @@ static struct rate_limit rl_runtime_err;
#define TWOARGS_C TWOARGS \
if (v1.type != v2.type) \
runtime( "Can't operate with values of incompatible types" );
+#define ACCESS_RTE \
+ do { if (!f_rte) runtime("No route to access"); } while (0)
/**
* interpret
@@ -622,9 +589,6 @@ interpret(struct f_inst *what)
case T_VOID: runtime( "Can't operate with values of type void" );
case T_INT: if (v2.val.i == 0) runtime( "Mother told me not to divide by 0" );
res.val.i = v1.val.i / v2.val.i; break;
- case T_IP: if (v2.type != T_INT)
- runtime( "Incompatible types in / operator" );
- break;
default: runtime( "Usage of unknown type" );
}
break;
@@ -717,8 +681,15 @@ interpret(struct f_inst *what)
res.val.i = (x); \
break;
- case P('!','='): COMPARE(i!=0);
- case P('=','='): COMPARE(i==0);
+#define SAME(x) \
+ TWOARGS; \
+ i = val_same(v1, v2); \
+ res.type = T_BOOL; \
+ res.val.i = (x); \
+ break;
+
+ case P('!','='): SAME(!i);
+ case P('=','='): SAME(i);
case '<': COMPARE(i==-1);
case P('<','='): COMPARE(i!=1);
@@ -782,7 +753,7 @@ interpret(struct f_inst *what)
break;
case 'p':
ONEARG;
- val_print(v1);
+ val_format(v1, &f_buf);
break;
case '?': /* ? has really strange error value, so we can implement if ... else nicely :-) */
ONEARG;
@@ -800,7 +771,7 @@ interpret(struct f_inst *what)
case P('p',','):
ONEARG;
if (what->a2.i == F_NOP || (what->a2.i != F_NONL && what->a1.p))
- log_commit(*L_INFO);
+ log_commit(*L_INFO, &f_buf);
switch (what->a2.i) {
case F_QUITBIRD:
@@ -821,62 +792,83 @@ interpret(struct f_inst *what)
break;
case 'a': /* rta access */
{
+ ACCESS_RTE;
struct rta *rta = (*f_rte)->attrs;
res.type = what->aux;
- switch(res.type) {
- case T_IP:
- res.val.px.ip = * (ip_addr *) ((char *) rta + what->a2.i);
- break;
- case T_ENUM:
- res.val.i = * ((char *) rta + what->a2.i);
- break;
- case T_STRING: /* Warning: this is a special case for proto attribute */
- res.val.s = rta->src->proto->name;
- break;
- case T_PREFIX: /* Warning: this works only for prefix of network */
- {
- res.val.px.ip = (*f_rte)->net->n.prefix;
- res.val.px.len = (*f_rte)->net->n.pxlen;
- break;
- }
+
+ switch (what->a2.i)
+ {
+ case SA_FROM: res.val.px.ip = rta->from; break;
+ case SA_GW: res.val.px.ip = rta->gw; break;
+ case SA_NET: res.val.px.ip = (*f_rte)->net->n.prefix;
+ res.val.px.len = (*f_rte)->net->n.pxlen; break;
+ case SA_PROTO: res.val.s = rta->src->proto->name; break;
+ case SA_SOURCE: res.val.i = rta->source; break;
+ case SA_SCOPE: res.val.i = rta->scope; break;
+ case SA_CAST: res.val.i = rta->cast; break;
+ case SA_DEST: res.val.i = rta->dest; break;
+ case SA_IFNAME: res.val.s = rta->iface ? rta->iface->name : ""; break;
+ case SA_IFINDEX: res.val.i = rta->iface ? rta->iface->index : 0; break;
+
default:
- bug( "Invalid type for rta access (%x)", res.type );
+ bug("Invalid static attribute access (%x)", res.type);
}
}
break;
case P('a','S'):
+ ACCESS_RTE;
ONEARG;
if (what->aux != v1.type)
runtime( "Attempt to set static attribute to incompatible type" );
+
f_rta_cow();
{
struct rta *rta = (*f_rte)->attrs;
- switch (what->aux) {
- case T_IP:
- * (ip_addr *) ((char *) rta + what->a2.i) = v1.val.px.ip;
+ switch (what->a2.i)
+ {
+ case SA_FROM:
+ rta->from = v1.val.px.ip;
break;
- case T_ENUM_SCOPE:
+ case SA_GW:
+ {
+ ip_addr ip = v1.val.px.ip;
+ neighbor *n = neigh_find(rta->src->proto, &ip, 0);
+ if (!n || (n->scope == SCOPE_HOST))
+ runtime( "Invalid gw address" );
+
+ rta->dest = RTD_ROUTER;
+ rta->gw = ip;
+ rta->iface = n->iface;
+ rta->nexthops = NULL;
+ rta->hostentry = NULL;
+ }
+ break;
+
+ case SA_SCOPE:
rta->scope = v1.val.i;
break;
- case T_ENUM_RTD:
+ case SA_DEST:
i = v1.val.i;
if ((i != RTD_BLACKHOLE) && (i != RTD_UNREACHABLE) && (i != RTD_PROHIBIT))
runtime( "Destination can be changed only to blackhole, unreachable or prohibit" );
+
rta->dest = i;
rta->gw = IPA_NONE;
rta->iface = NULL;
rta->nexthops = NULL;
+ rta->hostentry = NULL;
break;
default:
- bug( "Unknown type in set of static attribute" );
+ bug("Invalid static attribute access (%x)", res.type);
}
}
break;
case P('e','a'): /* Access to extended attributes */
+ ACCESS_RTE;
{
eattr *e = NULL;
if (!(f_flags & FF_FORCE_TMPATTR))
@@ -944,6 +936,7 @@ interpret(struct f_inst *what)
}
break;
case P('e','S'):
+ ACCESS_RTE;
ONEARG;
{
struct ea_list *l = lp_alloc(f_pool, sizeof(struct ea_list) + sizeof(eattr));
@@ -956,11 +949,25 @@ interpret(struct f_inst *what)
l->attrs[0].type = what->aux | EAF_ORIGINATED;
switch (what->aux & EAF_TYPE_MASK) {
case EAF_TYPE_INT:
- case EAF_TYPE_ROUTER_ID:
if (v1.type != T_INT)
runtime( "Setting int attribute to non-int value" );
l->attrs[0].u.data = v1.val.i;
break;
+
+ case EAF_TYPE_ROUTER_ID:
+#ifndef IPV6
+ /* IP->Quad implicit conversion */
+ if (v1.type == T_IP) {
+ l->attrs[0].u.data = ipa_to_u32(v1.val.px.ip);
+ break;
+ }
+#endif
+ /* T_INT for backward compatibility */
+ if ((v1.type != T_QUAD) && (v1.type != T_INT))
+ runtime( "Setting quad attribute to non-quad value" );
+ l->attrs[0].u.data = v1.val.i;
+ break;
+
case EAF_TYPE_OPAQUE:
runtime( "Setting opaque attribute is not allowed" );
break;
@@ -1007,10 +1014,12 @@ interpret(struct f_inst *what)
}
break;
case 'P':
+ ACCESS_RTE;
res.type = T_INT;
res.val.i = (*f_rte)->pref;
break;
case P('P','S'):
+ ACCESS_RTE;
ONEARG;
if (v1.type != T_INT)
runtime( "Can't set preference to non-integer" );
@@ -1025,7 +1034,9 @@ interpret(struct f_inst *what)
switch(v1.type) {
case T_PREFIX: res.val.i = v1.val.px.len; break;
case T_PATH: res.val.i = as_path_getlen(v1.val.ad); break;
- default: runtime( "Prefix or path expected" );
+ case T_CLIST: res.val.i = int_set_get_size(v1.val.ad); break;
+ case T_ECLIST: res.val.i = ec_set_get_size(v1.val.ad); break;
+ default: runtime( "Prefix, path, clist or eclist expected" );
}
break;
case P('c','p'): /* Convert prefix to ... */
@@ -1124,7 +1135,34 @@ interpret(struct f_inst *what)
case P('C','a'): /* (Extended) Community list add or delete */
TWOARGS;
- if (v1.type == T_CLIST)
+ if (v1.type == T_PATH)
+ {
+ struct f_tree *set = NULL;
+ u32 key = 0;
+ int pos;
+
+ if (v2.type == T_INT)
+ key = v2.val.i;
+ else if ((v2.type == T_SET) && (v2.val.t->from.type == T_INT))
+ set = v2.val.t;
+ else
+ runtime("Can't delete non-integer (set)");
+
+ switch (what->aux)
+ {
+ case 'a': runtime("Can't add to path");
+ case 'd': pos = 0; break;
+ case 'f': pos = 1; break;
+ default: bug("unknown Ca operation");
+ }
+
+ if (pos && !set)
+ runtime("Can't filter integer");
+
+ res.type = T_PATH;
+ res.val.ad = as_path_filter(f_pool, v1.val.ad, set, key, pos);
+ }
+ else if (v1.type == T_CLIST)
{
/* Community (or cluster) list */
struct f_val dummy;
@@ -1232,6 +1270,7 @@ interpret(struct f_inst *what)
}
else
{
+ ACCESS_RTE;
v1.val.px.ip = (*f_rte)->net->n.prefix;
v1.val.px.len = (*f_rte)->net->n.pxlen;
@@ -1343,10 +1382,12 @@ i_same(struct f_inst *f1, struct f_inst *f2)
A2_SAME;
}
break;
- case 'C':
- if (val_compare(* (struct f_val *) f1->a1.p, * (struct f_val *) f2->a1.p))
+
+ case 'C':
+ if (!val_same(* (struct f_val *) f1->a1.p, * (struct f_val *) f2->a1.p))
return 0;
break;
+
case 'V':
if (strcmp((char *) f1->a2.p, (char *) f2->a2.p))
return 0;
@@ -1418,6 +1459,12 @@ i_same(struct f_inst *f1, struct f_inst *f2)
int
f_run(struct filter *filter, struct rte **rte, struct ea_list **tmp_attrs, struct linpool *tmp_pool, int flags)
{
+ if (filter == FILTER_ACCEPT)
+ return F_ACCEPT;
+
+ if (filter == FILTER_REJECT)
+ return F_REJECT;
+
int rte_cow = ((*rte)->flags & REF_COW);
DBG( "Running filter `%s'...", filter->name );
@@ -1427,7 +1474,8 @@ f_run(struct filter *filter, struct rte **rte, struct ea_list **tmp_attrs, struc
f_pool = tmp_pool;
f_flags = flags;
- log_reset();
+ LOG_BUFFER_INIT(f_buf);
+
struct f_val res = interpret(filter->root);
if (f_old_rta) {
@@ -1458,22 +1506,28 @@ f_run(struct filter *filter, struct rte **rte, struct ea_list **tmp_attrs, struc
return res.val.i;
}
-int
-f_eval_int(struct f_inst *expr)
+struct f_val
+f_eval(struct f_inst *expr, struct linpool *tmp_pool)
{
- /* Called independently in parse-time to eval expressions */
- struct f_val res;
-
f_flags = 0;
f_tmp_attrs = NULL;
f_rte = NULL;
- f_pool = cfg_mem;
+ f_pool = tmp_pool;
+
+ LOG_BUFFER_INIT(f_buf);
+
+ return interpret(expr);
+}
- log_reset();
- res = interpret(expr);
+int
+f_eval_int(struct f_inst *expr)
+{
+ /* Called independently in parse-time to eval expressions */
+ struct f_val res = f_eval(expr, cfg_mem);
if (res.type != T_INT)
cf_error("Integer expression expected");
+
return res.val.i;
}
diff --git a/filter/filter.h b/filter/filter.h
index 2386fc95..07a4c9e4 100644
--- a/filter/filter.h
+++ b/filter/filter.h
@@ -78,12 +78,13 @@ struct f_inst *f_generate_roa_check(struct symbol *sym, struct f_inst *prefix, s
struct f_tree *build_tree(struct f_tree *);
struct f_tree *find_tree(struct f_tree *t, struct f_val val);
int same_tree(struct f_tree *t1, struct f_tree *t2);
+void tree_format(struct f_tree *t, buffer *buf);
struct f_trie *f_new_trie(linpool *lp);
void trie_add_prefix(struct f_trie *t, ip_addr px, int plen, int l, int h);
int trie_match_prefix(struct f_trie *t, ip_addr px, int plen);
int trie_same(struct f_trie *t1, struct f_trie *t2);
-void trie_print(struct f_trie *t);
+void trie_format(struct f_trie *t, buffer *buf);
void fprefix_get_bounds(struct f_prefix *px, int *l, int *h);
@@ -106,6 +107,7 @@ struct ea_list;
struct rte;
int f_run(struct filter *filter, struct rte **rte, struct ea_list **tmp_attrs, struct linpool *tmp_pool, int flags);
+struct f_val f_eval(struct f_inst *expr, struct linpool *tmp_pool);
int f_eval_int(struct f_inst *expr);
u32 f_eval_asn(struct f_inst *expr);
@@ -115,7 +117,10 @@ int filter_same(struct filter *new, struct filter *old);
int i_same(struct f_inst *f1, struct f_inst *f2);
int val_compare(struct f_val v1, struct f_val v2);
-int tree_compare(const void *p1, const void *p2);
+int val_same(struct f_val v1, struct f_val v2);
+
+void val_format(struct f_val v, buffer *buf);
+
#define F_NOP 0
#define F_NONL 1
@@ -169,6 +174,19 @@ int tree_compare(const void *p1, const void *p2);
#define T_SET 0x80
#define T_PREFIX_SET 0x81
+
+#define SA_FROM 1
+#define SA_GW 2
+#define SA_NET 3
+#define SA_PROTO 4
+#define SA_SOURCE 5
+#define SA_SCOPE 6
+#define SA_CAST 7
+#define SA_DEST 8
+#define SA_IFNAME 9
+#define SA_IFINDEX 10
+
+
struct f_tree {
struct f_tree *left, *right;
struct f_val from, to;
diff --git a/filter/test.conf b/filter/test.conf
index 64e6d91b..ae8a95a6 100644
--- a/filter/test.conf
+++ b/filter/test.conf
@@ -95,16 +95,18 @@ eclist el2;
p2 = prepend( p2, 3 );
p2 = prepend( p2, 4 );
print "Testing paths: ", p2;
- print "Should be true: ", p2 ~ pm1, " ", p2 ~ pm2, " ", 3 ~ p2;
+ print "Should be true: ", p2 ~ pm1, " ", p2 ~ pm2, " ", 3 ~ p2, " ", p2 ~ [2, 10..20], " ", p2 ~ [4, 10..20];
print "4 = ", p2.len;
p2 = prepend( p2, 5 );
- print "Should be false: ", p2 ~ pm1, " ", p2 ~ pm2, " ", 10 ~ p2;
+ print "Should be false: ", p2 ~ pm1, " ", p2 ~ pm2, " ", 10 ~ p2, " ", p2 ~ [8, 10..20],;
print "Should be true: ", p2 ~ / ? 4 3 2 1 /, " ", p2, " ", / ? 4 3 2 1 /;
print "Should be true: ", p2 ~ [= * 4 3 * 1 =], " ", p2, " ", [= * 4 3 * 1 =];
print "Should be true: ", p2 ~ [= (3+2) (2*2) 3 2 1 =], " ", p2 ~ mkpath(5, 4);
print "Should be true: ", p2.len = 5, " ", p2.first = 5, " ", p2.last = 1;
print "5 = ", p2.len;
-
+ print "Delete 3: ", delete(p2, 3);
+ print "Filter 1-3: ", filter(p2, [1..3]);
+
pm1 = [= 1 2 * 3 4 5 =];
p2 = prepend( + empty +, 5 );
p2 = prepend( p2, 4 );
@@ -113,6 +115,8 @@ eclist el2;
p2 = prepend( p2, 2 );
p2 = prepend( p2, 1 );
print "Should be true: ", p2 ~ pm1, " ", p2, " ", pm1;
+ print "Delete 3: ", delete(p2, 3);
+ print "Delete 4-5: ", delete(p2, [4..5]);
l = - empty -;
print "Should be false in this special case: ", l ~ [(*,*)];
@@ -138,10 +142,10 @@ eclist el2;
l = add( l, (3,5) );
l2 = filter( l, [(3,*)] );
l = delete( l, [(3,2..4)] );
- print "Community list (1,2) (3,1) (3,5) ", l;
+ print "Community list (1,2) (3,1) (3,5) ", l, " len: ", l.len;
l = add( l, (3,2) );
l = add( l, (4,5) );
- print "Community list (1,2) (3,1) (3,2) (3,5) (4,5) ", l;
+ print "Community list (1,2) (3,1) (3,2) (3,5) (4,5) ", l, " len: ", l.len;
print "Should be true: ", l ~ [(*,2)], " ", l ~ [(*,5)], " ", l ~ [(*, one)];
print "Should be false: ", l ~ [(*,3)], " ", l ~ [(*,(one+6))], " ", l ~ [(*, (one+one+one))];
l = delete( l, [(*,(one+onef(3)))] );
@@ -164,6 +168,7 @@ eclist el2;
el = add(el, (ro, 11.21.31.41.mask(16), 200));
print "EC list (rt, 10, 20) (ro, 10.20.30.40, 100) (ro, 11.21.0.0, 200):";
print el;
+ print "EC len: ", el.len;
el = delete(el, (rt, 10, 20));
el = delete(el, (rt, 10, 30));
el = add(el, (unknown 2, ten, 1));
@@ -240,6 +245,15 @@ int b;
print "Defined: ", a, " ", b, " ", defined(b);
}
+define is1 = [ one, (2+1), (6-one), 8, 11, 15, 17, 19];
+define is2 = [(17+2), 17, 15, 11, 8, 5, 3, 2];
+define is3 = [5, 17, 2, 11, 8, 15, 3, 19];
+
+define pxs2 = [ 10.0.0.0/16{8,12}, 20.0.0.0/16{24,28} ];
+
+define ecs2 = [(rt, ten, (one+onef(0))*10), (ro, 100000, 100..200), (rt, 12345, *)];
+
+
function __startup()
int i;
bool b;
@@ -249,13 +263,11 @@ pair pp;
quad qq;
ec cc;
int set is;
-int set is1;
-int set is2;
-int set is3;
pair set ps;
ec set ecs;
+ip set ips;
prefix set pxs;
-string s;
+string st;
{
print "1a-a1 = 30: ", '1a-a1';
print "Testing filter language:";
@@ -272,6 +284,12 @@ string s;
# if 1 <= 1 then printn "."; else { print "*** FAIL: test 3"; }
if 1234 < 1234 then { print "*** FAIL: test 4"; quitbird; } else print "ok";
is = [ 2, 3, 4, 7..11 ];
+
+ print "must be true: ", 1 = 1, " ", 1 != (0,1), " ", 1 != "a", " ", +empty+ = +empty+, " ", -empty- = -empty-, " ", --empty-- = --empty-- ,
+ " ", [1,4..10,20] = [1,4..10,20] , " ", [ 10.0.0.0/8{ 15 , 17 } ] = [ 10.0.0.0/8{ 15 , 17 } ];
+ print "must be false: ", 1 != 1, " ", 1 = (0,1), " ", 1 = "a", " ", +empty+ = -empty-, " ", -empty- = --empty--, " ", --empty-- = +empty+ ,
+ " ", [1,2] = [1,3], " ", [ 10.0.0.0/8{ 15 , 17 } ] = [ 11.0.0.0/8{ 15 , 17 } ];
+
print " must be true: ", 1.2.0.0/16 ~ [ 1.0.0.0/8{ 15 , 17 } ];
print " data types; must be true: ", 1.2.3.4 = 1.2.3.4, ",", 1 ~ [1,2,3], ",", 5 ~ [1..20], ",", 10 ~ is, ",", 2 ~ [ 1, 2, 3 ], ",", 5 ~ [ 4 .. 7 ], ",", 1.2.3.4 ~ [ 1.2.3.3..1.2.3.5 ], ",", 1.2.3.4 ~ 1.0.0.0/8, ",", 1.0.0.0/8 ~ 1.0.0.0/8, ",", 1.0.0.0/8 ~ [ 1.0.0.0/8+ ];
print " must be true: ", true && true, ",", true || false, ",", ! false && ! false && true, ",", 1 < 2 && 1 != 3, ",", true && true && ! false, ",", true || 1+"a", ",", !(false && 1+"a");
@@ -279,11 +297,6 @@ string s;
print " must be true: ", defined(1), ",", defined(1.2.3.4), ",", 1 != 2, ",", 1 <= 2;
print " data types: must be false: ", 1 ~ [ 2, 3, 4 ], ",", 5 ~ is, ",", 1.2.3.4 ~ [ 1.2.3.3, 1.2.3.5 ], ",", (1,2) > (2,2), ",", (1,1) > (1,1), ",", 1.0.0.0/9 ~ [ 1.0.0.0/8- ], ",", 1.2.0.0/17 ~ [ 1.0.0.0/8{ 15 , 16 } ], ",", true && false;
- is1 = [ 1, 5, 8, 11, 15, 17, 19];
-
- is1 = [ one, (2+1), (6-one), 8, 11, 15, 17, 19];
- is2 = [(17+2), 17, 15, 11, 8, 5, 3, 2];
- is3 = [5, 17, 2, 11, 8, 15, 3, 19];
print " must be true: ", 1 ~ is1, " ", 3 ~ is1, " ", 5 ~ is1;
print " must be true: ", (one+2) ~ is1, " ", 2 ~ is2, " ", 2 ~ is3;
@@ -333,13 +346,14 @@ string s;
ecs = [(rt, ten, (one+onef(0))*10), (ro, 100000, 100..200), (rt, 12345, *)];
print "EC set: ", ecs;
+ print "EC set: ", ecs2;
print "Testing EC set, true: ", (rt, 10, 20) ~ ecs, " ", (ro, 100000, 100) ~ ecs, " ", (ro, 100000, 200) ~ ecs,
" ", (rt, 12345, 0) ~ ecs, " ", cc ~ ecs, " ", (rt, 12345, 4000000) ~ ecs;
print "Testing EC set, false: ", (ro, 10, 20) ~ ecs, " ", (rt, 10, 21) ~ ecs, " ", (ro, 100000, 99) ~ ecs,
" ", (ro, 12345, 10) ~ ecs, " ", (rt, 12346, 0) ~ ecs, " ", (ro, 0.1.134.160, 150) ~ ecs;
- s = "Hello";
- print "Testing string: ", s, " true: ", s ~ "Hell*", " false: ", s ~ "ell*";
+ st = "Hello";
+ print "Testing string: ", st, " true: ", st ~ "Hell*", " false: ", st ~ "ell*";
b = true;
print "Testing bool: ", b, ", ", !b;
@@ -347,6 +361,12 @@ string s;
if ( b = true ) then print "Testing bool comparison b = true: ", b;
else { print "*** FAIL: TRUE test failed" ; quitbird; }
+ ips = [ 1.1.1.0 .. 1.1.1.255, 1.2.2.2];
+ print "Testing IP sets: ";
+ print ips;
+ print " must be true: ", 1.1.1.0 ~ ips, ",", 1.1.1.100 ~ ips, ",", 1.2.2.2 ~ ips;
+ print " must be false: ", 1.1.0.255 ~ ips, ",", 1.1.2.0 ~ ips, ",", 1.2.2.3 ~ ips, ",", 192.168.1.1 ~ ips;
+
pxs = [ 1.2.0.0/16, 1.4.0.0/16+];
print "Testing prefix sets: ";
print pxs;
@@ -354,6 +374,7 @@ string s;
print " must be false: ", 1.1.0.0/16 ~ pxs, ",", 1.3.0.0/16 ~ pxs, ",", 1.2.0.0/15 ~ pxs, ",", 1.2.0.0/17 ~ pxs, ",",
1.2.0.0/32 ~ pxs, ",", 1.4.0.0/15 ~ pxs;
+ test_pxset(pxs2);
test_pxset([ 10.0.0.0/16{8,12}, 20.0.0.0/16{24,28} ]);
print "What will this do? ", [ 1, 2, 1, 1, 1, 3, 4, 1, 1, 1, 5 ];
@@ -372,6 +393,9 @@ string s;
print "1.2.3.4 = ", onetwo;
+ i = 4200000000;
+ print "4200000000 = ", i, " false: ", i = 4200000000, " ", i > 4100000000, " false: ", i > 4250000000;
+
test_undef(2);
test_undef(3);
test_undef(2);
diff --git a/filter/tree.c b/filter/tree.c
index f6ab75b4..ee9f448a 100644
--- a/filter/tree.c
+++ b/filter/tree.c
@@ -53,6 +53,11 @@ build_tree_rec(struct f_tree **buf, int l, int h)
return n;
}
+static int
+tree_compare(const void *p1, const void *p2)
+{
+ return val_compare((* (struct f_tree **) p1)->from, (* (struct f_tree **) p2)->from);
+}
/**
* build_tree
@@ -132,3 +137,37 @@ same_tree(struct f_tree *t1, struct f_tree *t2)
return 0;
return 1;
}
+
+
+static void
+tree_node_format(struct f_tree *t, buffer *buf)
+{
+ if (t == NULL)
+ return;
+
+ tree_node_format(t->left, buf);
+
+ val_format(t->from, buf);
+ if (val_compare(t->from, t->to) != 0)
+ {
+ buffer_puts(buf, "..");
+ val_format(t->to, buf);
+ }
+ buffer_puts(buf, ", ");
+
+ tree_node_format(t->right, buf);
+}
+
+void
+tree_format(struct f_tree *t, buffer *buf)
+{
+ buffer_puts(buf, "[");
+
+ tree_node_format(t, buf);
+
+ /* Undo last separator */
+ if (buf->pos[-1] != '[')
+ buf->pos -= 2;
+
+ buffer_puts(buf, "]");
+}
diff --git a/filter/trie.c b/filter/trie.c
index 581332c6..217d72c3 100644
--- a/filter/trie.c
+++ b/filter/trie.c
@@ -265,37 +265,37 @@ trie_same(struct f_trie *t1, struct f_trie *t2)
}
static void
-trie_node_print(struct f_trie_node *t, char **sep)
+trie_node_format(struct f_trie_node *t, buffer *buf)
{
if (t == NULL)
return;
if (ipa_nonzero(t->accept))
- {
- logn("%s%I/%d{%I}", *sep, t->addr, t->plen, t->accept);
- *sep = ", ";
- }
+ buffer_print(buf, "%I/%d{%I}, ", t->addr, t->plen, t->accept);
- trie_node_print(t->c[0], sep);
- trie_node_print(t->c[1], sep);
+ trie_node_format(t->c[0], buf);
+ trie_node_format(t->c[1], buf);
}
/**
- * trie_print
- * @t: trie to be printed
+ * trie_format
+ * @t: trie to be formatted
+ * @buf: destination buffer
*
- * Prints the trie to the log buffer.
+ * Prints the trie to the supplied buffer.
*/
void
-trie_print(struct f_trie *t)
+trie_format(struct f_trie *t, buffer *buf)
{
- char *sep = "";
- logn("[");
+ buffer_puts(buf, "[");
+
if (t->zero)
- {
- logn("0.0.0.0/0");
- sep = ", ";
- }
- trie_node_print(&t->root, &sep);
- logn("]");
+ buffer_print(buf, "%I/%d", IPA_NONE, 0);
+ trie_node_format(&t->root, buf);
+
+ /* Undo last separator */
+ if (buf->pos[-1] != '[')
+ buf->pos -= 2;
+
+ buffer_puts(buf, "]");
}
diff --git a/lib/birdlib.h b/lib/birdlib.h
index 479f3d5c..04fb7fed 100644
--- a/lib/birdlib.h
+++ b/lib/birdlib.h
@@ -10,6 +10,7 @@
#define _BIRD_BIRDLIB_H_
#include "timer.h"
+#include "alloca.h"
/* Ugly structure offset handling macros */
@@ -19,12 +20,14 @@
/* Utility macros */
-#ifdef PARSER
-#define _MIN(a,b) (((a)<(b))?(a):(b))
-#define _MAX(a,b) (((a)>(b))?(a):(b))
-#else
-#define MIN(a,b) (((a)<(b))?(a):(b))
-#define MAX(a,b) (((a)>(b))?(a):(b))
+#define MIN_(a,b) (((a)<(b))?(a):(b))
+#define MAX_(a,b) (((a)>(b))?(a):(b))
+
+#ifndef PARSER
+#undef MIN
+#undef MAX
+#define MIN(a,b) MIN_(a,b)
+#define MAX(a,b) MAX_(a,b)
#endif
#define ABS(a) ((a)>=0 ? (a) : -(a))
@@ -34,24 +37,67 @@
#define NULL ((void *) 0)
#endif
+#ifndef IPV6
+#define IP_VERSION 4
+#else
+#define IP_VERSION 6
+#endif
+
+
/* Macros for gcc attributes */
#define NORET __attribute__((noreturn))
#define UNUSED __attribute__((unused))
+
+/* Microsecond time */
+
+typedef s64 btime;
+
+#define S_ *1000000
+#define MS_ *1000
+#define US_ *1
+#define TO_S /1000000
+#define TO_MS /1000
+#define TO_US /1
+
+#ifndef PARSER
+#define S S_
+#define MS MS_
+#define US US_
+#endif
+
+
/* Logging and dying */
+typedef struct buffer {
+ byte *start;
+ byte *pos;
+ byte *end;
+} buffer;
+
+#define STACK_BUFFER_INIT(buf,size) \
+ do { \
+ buf.start = alloca(size); \
+ buf.pos = buf.start; \
+ buf.end = buf.start + size; \
+ } while(0)
+
+#define LOG_BUFFER_INIT(buf) \
+ STACK_BUFFER_INIT(buf, LOG_BUFFER_SIZE)
+
+#define LOG_BUFFER_SIZE 1024
+
+
struct rate_limit {
bird_clock_t timestamp;
int count;
};
#define log log_msg
-void log_reset(void);
-void log_commit(int class);
+void log_commit(int class, buffer *buf);
void log_msg(char *msg, ...);
void log_rl(struct rate_limit *rl, char *msg, ...);
-void logn(char *msg, ...);
void die(char *msg, ...) NORET;
void bug(char *msg, ...) NORET;
diff --git a/lib/buffer.h b/lib/buffer.h
new file mode 100644
index 00000000..cf073e88
--- /dev/null
+++ b/lib/buffer.h
@@ -0,0 +1,35 @@
+
+#define BUFFER(type) struct { type *data; uint used, size; }
+
+#define BUFFER_SIZE(v) ((v).size * sizeof(* (v).data))
+
+#define BUFFER_INIT(v,pool,isize) \
+ ({ \
+ (v).used = 0; \
+ (v).size = (isize); \
+ (v).data = mb_alloc(pool, BUFFER_SIZE(v)); \
+ })
+
+#define BUFFER_SET(v,nsize) \
+ ({ \
+ (v).used = (nsize); \
+ if ((v).used > (v).size) \
+ buffer_realloc((void **) &((v).data), &((v).size), (v).used, sizeof(* (v).data)); \
+ })
+
+#define BUFFER_INC(v,step) \
+ ({ \
+ uint _o = (v).used; \
+ BUFFER_SET(v, (v).used + (step)); \
+ (v).data + _o; \
+ })
+
+#define BUFFER_DEC(v,step) ({ (v).used -= (step); })
+
+#define BUFFER_PUSH(v) (*BUFFER_INC(v,1))
+
+#define BUFFER_POP(v) BUFFER_DEC(v,1)
+
+#define BUFFER_FLUSH(v) ({ (v).used = 0; })
+
+
diff --git a/lib/hash.h b/lib/hash.h
new file mode 100644
index 00000000..3ac9eebd
--- /dev/null
+++ b/lib/hash.h
@@ -0,0 +1,123 @@
+
+
+#define HASH(type) struct { type **data; uint count, order; }
+#define HASH_TYPE(v) typeof(** (v).data)
+#define HASH_SIZE(v) (1 << (v).order)
+#define HASH_MASK(v) ((1 << (v).order)-1)
+
+
+#define HASH_INIT(v,pool,init_order) \
+ ({ \
+ (v).count = 0; \
+ (v).order = (init_order); \
+ (v).data = mb_allocz(pool, HASH_SIZE(v) * sizeof(* (v).data)); \
+ })
+
+#define HASH_FIND(v,id,key...) \
+ ({ \
+ uint _h = id##_FN((key)) & HASH_MASK(v); \
+ HASH_TYPE(v) *_n = (v).data[_h]; \
+ while (_n && !id##_EQ(id##_KEY(_n), (key))) \
+ _n = id##_NEXT(_n); \
+ _n; \
+ })
+
+#define HASH_INSERT(v,id,node) \
+ ({ \
+ uint _h = id##_FN(id##_KEY((node))) & HASH_MASK(v); \
+ HASH_TYPE(v) **_nn = (v).data + _h; \
+ id##_NEXT(node) = *_nn; \
+ *_nn = node; \
+ (v).count++; \
+ })
+
+#define HASH_DO_REMOVE(v,id,_nn) \
+ ({ \
+ HASH_TYPE(v) *_n = *_nn; \
+ if (_n) \
+ { \
+ *_nn = id##_NEXT(_n); \
+ (v).count--; \
+ } \
+ _n; \
+ })
+
+#define HASH_DELETE(v,id,key...) \
+ ({ \
+ uint _h = id##_FN((key)) & HASH_MASK(v); \
+ HASH_TYPE(v) **_nn = (v).data + _h; \
+ \
+ while ((*_nn) && !id##_EQ(id##_KEY((*_nn)), (key))) \
+ _nn = &(id##_NEXT((*_nn))); \
+ \
+ HASH_DO_REMOVE(v,id,_nn); \
+ })
+
+#define HASH_REMOVE(v,id,node) \
+ ({ \
+ uint _h = id##_FN(id##_KEY((node))) & HASH_MASK(v); \
+ HASH_TYPE(v) **_nn = (v).data + _h; \
+ \
+ while ((*_nn) && (*_nn != (node))) \
+ _nn = &(id##_NEXT((*_nn))); \
+ \
+ HASH_DO_REMOVE(v,id,_nn); \
+ })
+
+
+#define HASH_REHASH(v,id,pool,step) \
+ ({ \
+ HASH_TYPE(v) *_n, *_n2, **_od; \
+ uint _i, _s; \
+ \
+ _s = HASH_SIZE(v); \
+ _od = (v).data; \
+ (v).count = 0; \
+ (v).order += (step); \
+ (v).data = mb_allocz(pool, HASH_SIZE(v) * sizeof(* (v).data)); \
+ \
+ for (_i = 0; _i < _s; _i++) \
+ for (_n = _od[_i]; _n && (_n2 = id##_NEXT(_n), 1); _n = _n2) \
+ HASH_INSERT(v, id, _n); \
+ \
+ mb_free(_od); \
+ })
+
+#define HASH_DEFINE_REHASH_FN(id, type) \
+ static void id##_REHASH_FN(void *v, pool *p, int step) \
+ { HASH_REHASH(* (HASH(type) *) v, id, p, step); }
+
+#define HASH_TRY_REHASH_UP(v,id,pool) \
+ ({ \
+ if (((v).order < id##_REHASH_MAX) && ((v).count > HASH_SIZE(v))) \
+ id##_REHASH_FN(&v, pool, 1); \
+ })
+
+#define HASH_TRY_REHASH_DOWN(v,id,pool) \
+ ({ \
+ if (((v).order > id##_REHASH_MIN) && ((v).count < HASH_SIZE(v)/2)) \
+ id##_REHASH_FN(&v, pool, -1); \
+ })
+
+#define HASH_WALK(v,next,n) \
+ do { \
+ HASH_TYPE(v) *n; \
+ uint _i; \
+ uint _s = HASH_SIZE(v); \
+ for (_i = 0; _i < _s; _i++) \
+ for (n = (v).data[_i]; n; n = n->next)
+
+#define HASH_WALK_END } while (0)
+
+
+#define HASH_WALK_DELSAFE(v,next,n) \
+ do { \
+ HASH_TYPE(v) *n, *_next; \
+ uint _i; \
+ uint _s = HASH_SIZE(v); \
+ for (_i = 0; _i < _s; _i++) \
+ for (n = (v).data[_i]; n && (_next = n->next, 1); n = _next)
+
+#define HASH_WALK_DELSAFE_END } while (0)
+
+
diff --git a/lib/heap.h b/lib/heap.h
new file mode 100644
index 00000000..c8c3d348
--- /dev/null
+++ b/lib/heap.h
@@ -0,0 +1,156 @@
+/*
+ * UCW Library -- Universal Heap Macros
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/**
+ * [[intro]]
+ * Introduction
+ * ------------
+ *
+ * Binary heap is a simple data structure, which for example supports efficient insertions, deletions
+ * and access to the minimal inserted item. We define several macros for such operations.
+ * Note that because of simplicity of heaps, we have decided to define direct macros instead
+ * of a <<generic:,macro generator>> as for several other data structures in the Libucw.
+ *
+ * A heap is represented by a number of elements and by an array of values. Beware that we
+ * index this array from one, not from zero as do the standard C arrays.
+ *
+ * Most macros use these parameters:
+ *
+ * - @type - the type of elements
+ * - @num - a variable (signed or unsigned integer) with the number of elements
+ * - @heap - a C array of type @type; the heap is stored in `heap[1] .. heap[num]`; `heap[0]` is unused
+ * - @less - a callback to compare two element values; `less(x, y)` shall return a non-zero value iff @x is lower than @y
+ * - @swap - a callback to swap two array elements; `swap(heap, i, j, t)` must swap `heap[i]` with `heap[j]` with possible help of temporary variable @t (type @type).
+ *
+ * A valid heap must follow these rules:
+ *
+ * - `num >= 0`
+ * - `heap[i] >= heap[i / 2]` for each `i` in `[2, num]`
+ *
+ * The first element `heap[1]` is always lower or equal to all other elements.
+ *
+ * [[macros]]
+ * Macros
+ * ------
+ */
+
+/* For internal usage. */
+#define HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
+ for (;;) \
+ { \
+ _l = 2*_j; \
+ if (_l > num) \
+ break; \
+ if (less(heap[_j],heap[_l]) && (_l == num || less(heap[_j],heap[_l+1]))) \
+ break; \
+ if (_l != num && less(heap[_l+1],heap[_l])) \
+ _l++; \
+ swap(heap,_j,_l,x); \
+ _j = _l; \
+ }
+
+/* For internal usage. */
+#define HEAP_BUBBLE_UP_J(heap,num,less,swap) \
+ while (_j > 1) \
+ { \
+ _u = _j/2; \
+ if (less(heap[_u], heap[_j])) \
+ break; \
+ swap(heap,_u,_j,x); \
+ _j = _u; \
+ }
+
+/**
+ * Shuffle the unordered array @heap of @num elements to become a valid heap. The time complexity is linear.
+ **/
+#define HEAP_INIT(heap,num,type,less,swap) \
+ do { \
+ uint _i = num; \
+ uint _j, _l; \
+ type x; \
+ while (_i >= 1) \
+ { \
+ _j = _i; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
+ _i--; \
+ } \
+ } while(0)
+
+/**
+ * Delete the minimum element `heap[1]` in `O(log(n))` time.
+ * The removed value is moved just after the resulting heap (`heap[num + 1]`).
+ **/
+#define HEAP_DELMIN(heap,num,type,less,swap) \
+ do { \
+ uint _j, _l; \
+ type x; \
+ swap(heap,1,num,x); \
+ num--; \
+ _j = 1; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * Insert `heap[num]` in `O(log(n))` time. The value of @num must be increased before.
+ **/
+#define HEAP_INSERT(heap,num,type,less,swap) \
+ do { \
+ uint _j, _u; \
+ type x; \
+ _j = num; \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * If you need to increase the value of `heap[pos]`, just do it and then call this macro to rebuild the heap.
+ * Only `heap[pos]` can be changed, the rest of the array must form a valid heap.
+ * The time complexity is `O(log(n))`.
+ **/
+#define HEAP_INCREASE(heap,num,type,less,swap,pos) \
+ do { \
+ uint _j, _l; \
+ type x; \
+ _j = pos; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * If you need to decrease the value of `heap[pos]`, just do it and then call this macro to rebuild the heap.
+ * Only `heap[pos]` can be changed, the rest of the array must form a valid heap.
+ * The time complexity is `O(log(n))`.
+ **/
+#define HEAP_DECREASE(heap,num,type,less,swap,pos) \
+ do { \
+ uint _j, _u; \
+ type x; \
+ _j = pos; \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * Delete `heap[pos]` in `O(log(n))` time.
+ **/
+#define HEAP_DELETE(heap,num,type,less,swap,pos) \
+ do { \
+ uint _j, _l, _u; \
+ type x; \
+ _j = pos; \
+ swap(heap,_j,num,x); \
+ num--; \
+ if (less(heap[_j], heap[num+1])) \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap) \
+ else \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * Default swapping macro.
+ **/
+#define HEAP_SWAP(heap,a,b,t) (t=heap[a], heap[a]=heap[b], heap[b]=t)
diff --git a/lib/ipv6.h b/lib/ipv6.h
index 6f8e7b3c..2247d3fd 100644
--- a/lib/ipv6.h
+++ b/lib/ipv6.h
@@ -128,11 +128,6 @@ static inline byte * ipv6_put_addr(byte *buf, ip_addr a)
return buf+16;
}
-/*
- * RFC 1883 defines packet precendece, but RFC 2460 replaces it
- * by generic Traffic Class ID with no defined semantics. Better
- * not use it yet.
- */
-#define IP_PREC_INTERNET_CONTROL -1
+#define IP_PREC_INTERNET_CONTROL 0xc0
#endif
diff --git a/lib/lists.c b/lib/lists.c
index 6d97ff50..d323a4b6 100644
--- a/lib/lists.c
+++ b/lib/lists.c
@@ -101,6 +101,46 @@ rem_node(node *n)
}
/**
+ * rem2_node - remove a node from a list, with cleanup
+ * @n: node to be removed
+ *
+ * Removes a node @n from the list it's linked in and resets its pointers to NULL.
+ * Useful if you want to distinguish between linked and unlinked nodes.
+ */
+LIST_INLINE void
+rem2_node(node *n)
+{
+ node *z = n->prev;
+ node *x = n->next;
+
+ z->next = x;
+ x->prev = z;
+ n->next = NULL;
+ n->prev = NULL;
+}
+
+/**
+ * replace_node - replace a node in a list with another one
+ * @old: node to be removed
+ * @new: node to be inserted
+ *
+ * Replaces node @old in the list it's linked in with node @new. Node
+ * @old may be a copy of the original node, which is not accessed
+ * through the list. The function could be called with @old == @new,
+ * which just fixes neighbors' pointers in the case that the node
+ * was reallocated.
+ */
+LIST_INLINE void
+replace_node(node *old, node *new)
+{
+ old->next->prev = new;
+ old->prev->next = new;
+
+ new->prev = old->prev;
+ new->next = old->next;
+}
+
+/**
* init_list - create an empty list
* @l: list
*
diff --git a/lib/lists.h b/lib/lists.h
index 0b0fdbe3..9153029c 100644
--- a/lib/lists.h
+++ b/lib/lists.h
@@ -51,6 +51,7 @@ typedef struct list { /* In fact two overlayed nodes */
void add_tail(list *, node *);
void add_head(list *, node *);
void rem_node(node *);
+void rem2_node(node *);
void add_tail_list(list *, list *);
void init_list(list *);
void insert_node(node *, node *);
diff --git a/lib/printf.c b/lib/printf.c
index 14af1062..41e1cc0d 100644
--- a/lib/printf.c
+++ b/lib/printf.c
@@ -276,7 +276,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args)
ip_ntox(va_arg(args, ip_addr), ipbuf);
else {
ip_ntop(va_arg(args, ip_addr), ipbuf);
- if (field_width > 0)
+ if (field_width == 1)
field_width = STD_ADDRESS_P_LENGTH;
}
s = ipbuf;
@@ -410,3 +410,40 @@ int bsnprintf(char * buf, int size, const char *fmt, ...)
va_end(args);
return i;
}
+
+int
+buffer_vprint(buffer *buf, const char *fmt, va_list args)
+{
+ int i = bvsnprintf((char *) buf->pos, buf->end - buf->pos, fmt, args);
+ buf->pos = (i >= 0) ? (buf->pos + i) : buf->end;
+ return i;
+}
+
+int
+buffer_print(buffer *buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i=bvsnprintf((char *) buf->pos, buf->end - buf->pos, fmt, args);
+ va_end(args);
+
+ buf->pos = (i >= 0) ? (buf->pos + i) : buf->end;
+ return i;
+}
+
+void
+buffer_puts(buffer *buf, const char *str)
+{
+ byte *bp = buf->pos;
+ byte *be = buf->end;
+
+ while (bp < be && *str)
+ *bp++ = *str++;
+
+ if (bp < be)
+ *bp = 0;
+
+ buf->pos = bp;
+}
diff --git a/lib/resource.c b/lib/resource.c
index 42243aa2..bf4b3ae9 100644
--- a/lib/resource.c
+++ b/lib/resource.c
@@ -220,7 +220,8 @@ ralloc(pool *p, struct resclass *c)
bzero(r, c->size);
r->class = c;
- add_tail(&p->inside, &r->n);
+ if (p)
+ add_tail(&p->inside, &r->n);
return r;
}
@@ -366,21 +367,21 @@ mb_allocz(pool *p, unsigned size)
/**
* mb_realloc - reallocate a memory block
- * @p: pool
* @m: memory block
* @size: new size of the block
*
* mb_realloc() changes the size of the memory block @m to a given size.
* The contents will be unchanged to the minimum of the old and new sizes;
- * newly allocated memory will be uninitialized. If @m is NULL, the call
- * is equivalent to mb_alloc(@p, @size).
+ * newly allocated memory will be uninitialized. Contrary to realloc()
+ * behavior, @m must be non-NULL, because the resource pool is inherited
+ * from it.
*
* Like mb_alloc(), mb_realloc() also returns a pointer to the memory
- * chunk , not to the resource, hence you have to free it using
+ * chunk, not to the resource, hence you have to free it using
* mb_free(), not rfree().
*/
void *
-mb_realloc(pool *p, void *m, unsigned size)
+mb_realloc(void *m, unsigned size)
{
struct mblock *ob = NULL;
@@ -392,9 +393,7 @@ mb_realloc(pool *p, void *m, unsigned size)
}
struct mblock *b = xrealloc(ob, sizeof(struct mblock) + size);
-
- b->r.class = &mb_class;
- add_tail(&p->inside, &b->r.n);
+ replace_node(&b->r.n, &b->r.n);
b->size = size;
return b->data;
}
@@ -413,3 +412,18 @@ mb_free(void *m)
rfree(b);
}
+
+
+#define STEP_UP(x) ((x) + (x)/2 + 4)
+
+void
+buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size)
+{
+ unsigned nsize = MIN(*size, need);
+
+ while (nsize < need)
+ nsize = STEP_UP(nsize);
+
+ *buf = mb_realloc(*buf, nsize * item_size);
+ *size = nsize;
+}
diff --git a/lib/resource.h b/lib/resource.h
index 5cb5e274..1a62d389 100644
--- a/lib/resource.h
+++ b/lib/resource.h
@@ -52,7 +52,7 @@ extern pool root_pool;
void *mb_alloc(pool *, unsigned size);
void *mb_allocz(pool *, unsigned size);
-void *mb_realloc(pool *p, void *m, unsigned size);
+void *mb_realloc(void *m, unsigned size);
void mb_free(void *);
/* Memory pools with linear allocation */
@@ -78,6 +78,9 @@ void sl_free(slab *, void *);
* outside resource manager and possibly sysdep code.
*/
+void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size);
+
+
#ifdef HAVE_LIBDMALLOC
/*
* The standard dmalloc macros tend to produce lots of namespace
@@ -103,3 +106,4 @@ void *xrealloc(void *, unsigned);
#endif
#endif
+
diff --git a/lib/socket.h b/lib/socket.h
index 0ee43b52..780d596b 100644
--- a/lib/socket.h
+++ b/lib/socket.h
@@ -20,7 +20,8 @@ typedef struct birdsock {
void *data; /* User data */
ip_addr saddr, daddr; /* IPA_NONE = unspecified */
unsigned sport, dport; /* 0 = unspecified (for IP: protocol type) */
- int tos; /* TOS and priority, -1 = default */
+ int tos; /* TOS / traffic class, -1 = default */
+ int priority; /* Local socket priority, -1 = default */
int ttl; /* Time To Live, -1 = default */
u32 flags;
struct iface *iface; /* Interface; specify this for broad/multicast sockets */
@@ -43,6 +44,7 @@ typedef struct birdsock {
/* laddr and lifindex are valid only if SKF_LADDR_RX flag is set to request it */
int fd; /* System-dependent data */
+ int index; /* Index in poll buffer */
node n;
void *rbuf_alloc, *tbuf_alloc;
char *password; /* Password for MD5 authentication */
@@ -81,13 +83,16 @@ sk_send_buffer_empty(sock *sk)
return sk->tbuf == sk->tpos;
}
+extern int sk_priority_control; /* Suggested priority for control traffic, should be sysdep define */
/* Socket flags */
#define SKF_V6ONLY 1 /* Use IPV6_V6ONLY socket option */
#define SKF_LADDR_RX 2 /* Report local address for RX packets */
#define SKF_LADDR_TX 4 /* Allow to specify local address for TX packets */
+#define SKF_TTL_RX 8 /* Report TTL / Hop Limit for RX packets */
+#define SKF_THREAD 0x100 /* Socked used in thread, Do not add to main loop */
/*
* Socket types SA SP DA DP IF TTL SendTo (?=may, -=must not, *=must)
diff --git a/lib/string.h b/lib/string.h
index 7432d9a4..528a1a19 100644
--- a/lib/string.h
+++ b/lib/string.h
@@ -18,6 +18,10 @@ int bvsprintf(char *str, const char *fmt, va_list args);
int bsnprintf(char *str, int size, const char *fmt, ...);
int bvsnprintf(char *str, int size, const char *fmt, va_list args);
+int buffer_vprint(buffer *buf, const char *fmt, va_list args);
+int buffer_print(buffer *buf, const char *fmt, ...);
+void buffer_puts(buffer *buf, const char *str);
+
int patmatch(byte *pat, byte *str);
#endif
diff --git a/misc/bird.spec b/misc/bird.spec
index de63a6a0..c27eabbf 100644
--- a/misc/bird.spec
+++ b/misc/bird.spec
@@ -1,6 +1,6 @@
Summary: BIRD Internet Routing Daemon
Name: bird
-Version: 1.3.8
+Version: 1.3.12
Release: 1
Copyright: GPL
Group: Networking/Daemons
diff --git a/nest/a-path.c b/nest/a-path.c
index 63ac402e..dc36e653 100644
--- a/nest/a-path.c
+++ b/nest/a-path.c
@@ -244,10 +244,11 @@ as_path_get_first(struct adata *path, u32 *last_as)
}
int
-as_path_is_member(struct adata *path, u32 as)
+as_path_contains(struct adata *path, u32 as, int min)
{
u8 *p = path->data;
u8 *q = p+path->length;
+ int num = 0;
int i, n;
while (p<q)
@@ -257,13 +258,100 @@ as_path_is_member(struct adata *path, u32 as)
for(i=0; i<n; i++)
{
if (get_as(p) == as)
+ if (++num == min)
+ return 1;
+ p += BS;
+ }
+ }
+ return 0;
+}
+
+int
+as_path_match_set(struct adata *path, struct f_tree *set)
+{
+ u8 *p = path->data;
+ u8 *q = p+path->length;
+ int i, n;
+
+ while (p<q)
+ {
+ n = p[1];
+ p += 2;
+ for (i=0; i<n; i++)
+ {
+ struct f_val v = {T_INT, .val.i = get_as(p)};
+ if (find_tree(set, v))
return 1;
p += BS;
}
}
+
return 0;
}
+struct adata *
+as_path_filter(struct linpool *pool, struct adata *path, struct f_tree *set, u32 key, int pos)
+{
+ if (!path)
+ return NULL;
+
+ int len = path->length;
+ u8 *p = path->data;
+ u8 *q = path->data + len;
+ u8 *d, *d2;
+ int i, bt, sn, dn;
+ u8 buf[len];
+
+ d = buf;
+ while (p<q)
+ {
+ /* Read block header (type and length) */
+ bt = p[0];
+ sn = p[1];
+ dn = 0;
+ p += 2;
+ d2 = d + 2;
+
+ for (i = 0; i < sn; i++)
+ {
+ u32 as = get_as(p);
+ int match;
+
+ if (set)
+ match = !!find_tree(set, (struct f_val){T_INT, .val.i = as});
+ else
+ match = (as == key);
+
+ if (match == pos)
+ {
+ put_as(d2, as);
+ d2 += BS;
+ dn++;
+ }
+
+ p += BS;
+ }
+
+ if (dn > 0)
+ {
+ /* Nonempty block, set block header and advance */
+ d[0] = bt;
+ d[1] = dn;
+ d = d2;
+ }
+ }
+
+ int nl = d - buf;
+ if (nl == path->length)
+ return path;
+
+ struct adata *res = lp_alloc(pool, sizeof(struct adata) + nl);
+ res->length = nl;
+ memcpy(res->data, buf, nl);
+
+ return res;
+}
+
struct pm_pos
{
diff --git a/nest/attrs.h b/nest/attrs.h
index 42f81a10..b6e067cb 100644
--- a/nest/attrs.h
+++ b/nest/attrs.h
@@ -25,6 +25,8 @@
* to 16bit slot (like in 16bit AS_PATH). See RFC 4893 for details
*/
+struct f_tree;
+
struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, u32 as);
int as_path_convert_to_old(struct adata *path, byte *dst, int *new_used);
int as_path_convert_to_new(struct adata *path, byte *dst, int req_as);
@@ -33,7 +35,10 @@ int as_path_getlen(struct adata *path);
int as_path_getlen_int(struct adata *path, int bs);
int as_path_get_first(struct adata *path, u32 *orig_as);
int as_path_get_last(struct adata *path, u32 *last_as);
-int as_path_is_member(struct adata *path, u32 as);
+int as_path_contains(struct adata *path, u32 as, int min);
+int as_path_match_set(struct adata *path, struct f_tree *set);
+struct adata *as_path_filter(struct linpool *pool, struct adata *path, struct f_tree *set, u32 key, int pos);
+
#define PM_ASN 0
#define PM_QUESTION 1
@@ -64,6 +69,9 @@ int as_path_match(struct adata *path, struct f_path_mask *mask);
static inline int int_set_get_size(struct adata *list)
{ return list->length / 4; }
+static inline int ec_set_get_size(struct adata *list)
+{ return list->length / 8; }
+
static inline u32 *int_set_get_data(struct adata *list)
{ return (u32 *) list->data; }
diff --git a/nest/bfd.h b/nest/bfd.h
new file mode 100644
index 00000000..79c3c921
--- /dev/null
+++ b/nest/bfd.h
@@ -0,0 +1,51 @@
+/*
+ * BIRD -- Bidirectional Forwarding Detection (BFD)
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_NBFD_H_
+#define _BIRD_NBFD_H_
+
+#include "lib/lists.h"
+#include "lib/resource.h"
+
+struct bfd_session;
+
+struct bfd_request {
+ resource r;
+ node n;
+
+ ip_addr addr;
+ ip_addr local;
+ struct iface *iface;
+
+ void (*hook)(struct bfd_request *);
+ void *data;
+
+ struct bfd_session *session;
+
+ u8 state;
+ u8 diag;
+ u8 old_state;
+ u8 down;
+};
+
+
+#ifdef CONFIG_BFD
+
+struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, void (*hook)(struct bfd_request *), void *data);
+
+static inline void cf_check_bfd(int use) { }
+
+#else
+
+static inline struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, void (*hook)(struct bfd_request *), void *data) { return NULL; }
+
+static inline void cf_check_bfd(int use) { if (use) cf_error("BFD not available"); }
+
+#endif /* CONFIG_BFD */
+
+
+
+#endif /* _BIRD_NBFD_H_ */
diff --git a/nest/cli.c b/nest/cli.c
index d245790b..11f98794 100644
--- a/nest/cli.c
+++ b/nest/cli.c
@@ -122,6 +122,7 @@ cli_printf(cli *c, int code, char *msg, ...)
va_list args;
byte buf[CLI_LINE_SIZE];
int cd = code;
+ int errcode;
int size, cnt;
if (cd < 0)
@@ -131,16 +132,26 @@ cli_printf(cli *c, int code, char *msg, ...)
size = bsprintf(buf, " ");
else
size = bsprintf(buf, "%04d-", cd);
+ errcode = -8000;
+ }
+ else if (cd == CLI_ASYNC_CODE)
+ {
+ size = 1; buf[0] = '+';
+ errcode = cd;
}
else
- size = bsprintf(buf, "%04d ", cd);
+ {
+ size = bsprintf(buf, "%04d ", cd);
+ errcode = 8000;
+ }
+
c->last_reply = cd;
va_start(args, msg);
cnt = bvsnprintf(buf+size, sizeof(buf)-size-1, msg, args);
va_end(args);
if (cnt < 0)
{
- cli_printf(c, code < 0 ? -8000 : 8000, "<line overflow>");
+ cli_printf(c, errcode, "<line overflow>");
return;
}
size += cnt;
@@ -385,12 +396,17 @@ cli_echo(unsigned int class, byte *msg)
}
}
+/* Hack for scheduled undo notification */
+extern cli *cmd_reconfig_stored_cli;
+
void
cli_free(cli *c)
{
cli_set_log_echo(c, 0, 0);
if (c->cleanup)
c->cleanup(c);
+ if (c == cmd_reconfig_stored_cli)
+ cmd_reconfig_stored_cli = NULL;
rfree(c->pool);
}
diff --git a/nest/cli.h b/nest/cli.h
index ea64680a..396656e8 100644
--- a/nest/cli.h
+++ b/nest/cli.h
@@ -49,6 +49,8 @@ typedef struct cli {
extern pool *cli_pool;
extern struct cli *this_cli; /* Used during parsing */
+#define CLI_ASYNC_CODE 10000
+
/* Functions to be called by command handlers */
void cli_printf(cli *, int, char *, ...);
diff --git a/nest/cmds.c b/nest/cmds.c
index 2a803930..ec6bc762 100644
--- a/nest/cmds.c
+++ b/nest/cmds.c
@@ -13,6 +13,10 @@
#include "nest/cmds.h"
#include "lib/string.h"
#include "lib/resource.h"
+#include "filter/filter.h"
+
+extern int shutting_down;
+extern int configuring;
void
cmd_show_status(void)
@@ -27,9 +31,10 @@ cmd_show_status(void)
cli_msg(-1011, "Last reboot on %s", tim);
tm_format_datetime(tim, &config->tf_base, config->load_time);
cli_msg(-1011, "Last reconfiguration on %s", tim);
+
if (shutting_down)
cli_msg(13, "Shutdown in progress");
- else if (old_config)
+ else if (configuring)
cli_msg(13, "Reconfiguration in progress");
else
cli_msg(13, "Daemon is up and running");
@@ -86,3 +91,20 @@ cmd_show_memory(void)
print_size("Total:", rmemsize(&root_pool));
cli_msg(0, "");
}
+
+void
+cmd_eval(struct f_inst *expr)
+{
+ struct f_val v = f_eval(expr, this_cli->parser_pool);
+
+ if (v.type == T_RETURN)
+ {
+ cli_msg(8008, "runtime error");
+ return;
+ }
+
+ buffer buf;
+ LOG_BUFFER_INIT(buf);
+ val_format(v, &buf);
+ cli_msg(23, "%s", buf.start);
+}
diff --git a/nest/cmds.h b/nest/cmds.h
index 8b0bff7e..4cf8fb1b 100644
--- a/nest/cmds.h
+++ b/nest/cmds.h
@@ -11,6 +11,9 @@ struct sym_show_data {
struct symbol *sym;
};
+struct f_inst;
+
void cmd_show_status(void);
void cmd_show_symbols(struct sym_show_data *sym);
void cmd_show_memory(void);
+void cmd_eval(struct f_inst *expr);
diff --git a/nest/config.Y b/nest/config.Y
index a75dd0c3..e9b8a21b 100644
--- a/nest/config.Y
+++ b/nest/config.Y
@@ -44,11 +44,11 @@ CF_DECLS
CF_KEYWORDS(ROUTER, ID, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT)
CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, TABLE, STATES, ROUTES, FILTERS)
-CF_KEYWORDS(LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE)
+CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED)
CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES)
-CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH)
+CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH, AS)
CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED)
-CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC)
+CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP)
CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT,
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
@@ -65,7 +65,7 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID)
%type <ro> roa_args
%type <rot> roa_table_arg
%type <sd> sym_args
-%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted
+%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted tos
%type <ps> proto_patt proto_patt2
%type <g> limit_spec
@@ -75,9 +75,9 @@ CF_GRAMMAR
CF_ADDTO(conf, rtrid)
-rtrid: ROUTER ID idval ';' {
- new_config->router_id = $3;
- }
+rtrid:
+ ROUTER ID idval ';' { new_config->router_id = $3; }
+ | ROUTER ID FROM iface_patt ';' { new_config->router_id_from = this_ipatt; }
;
idval:
@@ -185,8 +185,10 @@ proto_item:
| MRTDUMP mrtdump_mask { this_proto->mrtdump = $2; }
| IMPORT imexport { this_proto->in_filter = $2; }
| EXPORT imexport { this_proto->out_filter = $2; }
+ | RECEIVE LIMIT limit_spec { this_proto->rx_limit = $3; }
| IMPORT LIMIT limit_spec { this_proto->in_limit = $3; }
| EXPORT LIMIT limit_spec { this_proto->out_limit = $3; }
+ | IMPORT KEEP FILTERED bool { this_proto->in_keep_filtered = $4; }
| TABLE rtable { this_proto->table = $2; }
| ROUTER ID idval { this_proto->router_id = $3; }
| DESCRIPTION TEXT { this_proto->dsc = $2; }
@@ -214,6 +216,7 @@ limit_spec:
l->action = $2;
$$ = l;
}
+ | OFF { $$ = NULL; }
;
rtable:
@@ -263,6 +266,21 @@ iface_patt_list:
| iface_patt_list ',' iface_patt_node
;
+iface_patt_init: {
+ /* Generic this_ipatt init */
+ this_ipatt = cfg_allocz(sizeof(struct iface_patt));
+ init_list(&this_ipatt->ipn_list);
+ }
+ ;
+
+iface_patt:
+ iface_patt_init iface_patt_list
+ ;
+
+tos:
+ CLASS expr { $$ = $2 & 0xfc; if (($2 < 0) || ($2 > 255)) cf_error("TX class must be in range 0-255"); }
+ | DSCP expr { $$ = ($2 & 0x3f) << 2; if (($2 < 0) || ($2 > 63)) cf_error("TX DSCP must be in range 0-63"); }
+ ;
/* Direct device route protocol */
@@ -405,7 +423,7 @@ CF_CLI(SHOW INTERFACES SUMMARY,,, [[Show summary of network interfaces]])
{ if_show_summary(); } ;
CF_CLI_HELP(SHOW ROUTE, ..., [[Show routing table]])
-CF_CLI(SHOW ROUTE, r_args, [[[<prefix>|for <prefix>|for <ip>] [table <t>] [filter <f>|where <cond>] [all] [primary] [(export|preexport) <p>] [protocol <p>] [stats|count]]], [[Show routing table]])
+CF_CLI(SHOW ROUTE, r_args, [[[<prefix>|for <prefix>|for <ip>] [table <t>] [filter <f>|where <cond>] [all] [primary] [filtered] [(export|preexport) <p>] [protocol <p>] [stats|count]]], [[Show routing table]])
{ rt_show($3); } ;
r_args:
@@ -413,7 +431,6 @@ r_args:
$$ = cfg_allocz(sizeof(struct rt_show_data));
$$->pxlen = 256;
$$->filter = FILTER_ACCEPT;
- $$->table = config->master_rtc->table;
}
| r_args prefix {
$$ = $1;
@@ -451,6 +468,10 @@ r_args:
$$ = $1;
$$->primary_only = 1;
}
+ | r_args FILTERED {
+ $$ = $1;
+ $$->filtered = 1;
+ }
| r_args export_or_preexport SYM {
struct proto_config *c = (struct proto_config *) $3->def;
$$ = $1;
@@ -591,7 +612,11 @@ CF_CLI(DUMP ROUTES,,, [[Dump routing table]])
CF_CLI(DUMP PROTOCOLS,,, [[Dump protocol information]])
{ protos_dump_all(); cli_msg(0, ""); } ;
-CF_CLI(ECHO, echo_mask echo_size, [all | off | <mask>] [<buffer-size>], [[Configure echoing of log messages]]) {
+CF_CLI(EVAL, term, <expr>, [[Evaluate an expression]])
+{ cmd_eval($2); } ;
+
+CF_CLI_HELP(ECHO, ..., [[Control echoing of log messages]])
+CF_CLI(ECHO, echo_mask echo_size, (all | off | { debug | trace | info | remote | warning | error | auth }) [<buffer-size>], [[Control echoing of log messages]]) {
cli_set_log_echo(this_cli, $2, $3);
cli_msg(0, "");
} ;
@@ -599,7 +624,7 @@ CF_CLI(ECHO, echo_mask echo_size, [all | off | <mask>] [<buffer-size>], [[Config
echo_mask:
ALL { $$ = ~0; }
| OFF { $$ = 0; }
- | NUM
+ | '{' log_mask_list '}' { $$ = $2; }
;
echo_size:
diff --git a/nest/iface.c b/nest/iface.c
index eea3d3b1..b4ab70c3 100644
--- a/nest/iface.c
+++ b/nest/iface.c
@@ -35,8 +35,6 @@
static pool *if_pool;
-static void auto_router_id(void);
-
list iface_list;
/**
@@ -354,9 +352,6 @@ if_end_update(void)
struct iface *i;
struct ifa *a, *b;
- if (!config->router_id)
- auto_router_id();
-
WALK_LIST(i, iface_list)
{
if (!(i->flags & IF_UPDATED))
@@ -583,24 +578,61 @@ ifa_delete(struct ifa *a)
}
}
-static void
-auto_router_id(void)
+u32
+if_choose_router_id(struct iface_patt *mask, u32 old_id)
{
#ifndef IPV6
- struct iface *i, *j;
+ struct iface *i;
+ struct ifa *a, *b;
- j = NULL;
+ b = NULL;
WALK_LIST(i, iface_list)
- if ((i->flags & IF_ADMIN_UP) &&
- !(i->flags & (IF_IGNORE | IF_SHUTDOWN)) &&
- i->addr &&
- !(i->addr->flags & IA_PEER) &&
- (!j || ipa_to_u32(i->addr->ip) < ipa_to_u32(j->addr->ip)))
- j = i;
- if (!j)
- die("Cannot determine router ID (no suitable network interface found), please configure it manually");
- log(L_INFO "Guessed router ID %I according to interface %s", j->addr->ip, j->name);
- config->router_id = ipa_to_u32(j->addr->ip);
+ {
+ if (!(i->flags & IF_ADMIN_UP) ||
+ (i->flags & IF_SHUTDOWN))
+ continue;
+
+ WALK_LIST(a, i->addrs)
+ {
+ if (a->flags & IA_SECONDARY)
+ continue;
+
+ if (a->scope <= SCOPE_LINK)
+ continue;
+
+ /* FIXME: This should go away */
+ if (a->flags & IA_PEER)
+ continue;
+
+ /* FIXME: This should go away too */
+ if (!mask && (a != i->addr))
+ continue;
+
+ /* Check pattern if specified */
+ if (mask && !iface_patt_match(mask, i, a))
+ continue;
+
+ /* FIXME: This should go away too */
+ if ((i->flags & IF_IGNORE) && !mask)
+ continue;
+
+ /* No pattern or pattern matched */
+ if (!b || ipa_to_u32(a->ip) < ipa_to_u32(b->ip))
+ b = a;
+ }
+ }
+
+ if (!b)
+ return 0;
+
+ u32 id = ipa_to_u32(b->ip);
+ if (id != old_id)
+ log(L_INFO "Chosen router ID %R according to interface %s", id, b->iface->name);
+
+ return id;
+
+#else
+ return 0;
#endif
}
diff --git a/nest/iface.h b/nest/iface.h
index 2416f82f..697ea543 100644
--- a/nest/iface.h
+++ b/nest/iface.h
@@ -101,6 +101,7 @@ struct iface *if_find_by_name(char *);
struct iface *if_get_by_name(char *);
void ifa_recalc_all_primary_addresses(void);
+
/* The Neighbor Cache */
typedef struct neighbor {
@@ -161,4 +162,7 @@ int iface_patt_match(struct iface_patt *ifp, struct iface *i, struct ifa *a);
struct iface_patt *iface_patt_find(list *l, struct iface *i, struct ifa *a);
int iface_patts_equal(list *, list *, int (*)(struct iface_patt *, struct iface_patt *));
+
+u32 if_choose_router_id(struct iface_patt *mask, u32 old_id);
+
#endif
diff --git a/nest/neighbor.c b/nest/neighbor.c
index 506d9bde..11a980b2 100644
--- a/nest/neighbor.c
+++ b/nest/neighbor.c
@@ -114,7 +114,7 @@ neighbor *
neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags)
{
neighbor *n;
- int class, scope = -1; ;
+ int class, scope = -1;
unsigned int h = neigh_hash(p, a);
struct iface *i;
@@ -231,7 +231,7 @@ neigh_up(neighbor *n, struct iface *i, int scope)
static void
neigh_down(neighbor *n)
{
- DBG("Flushing neighbor %I on %s\n", n->addr, i->name);
+ DBG("Flushing neighbor %I on %s\n", n->addr, n->iface->name);
rem_node(&n->if_n);
if (! (n->flags & NEF_BIND))
n->iface = NULL;
@@ -240,7 +240,21 @@ neigh_down(neighbor *n)
n->proto->neigh_notify(n);
rem_node(&n->n);
if (n->flags & NEF_STICKY)
- add_tail(&sticky_neigh_list, &n->n);
+ {
+ add_tail(&sticky_neigh_list, &n->n);
+
+ /* Respawn neighbor if there is another matching prefix */
+ struct iface *i;
+ int scope;
+
+ if (!n->iface)
+ WALK_LIST(i, iface_list)
+ if ((scope = if_connected(&n->addr, i)) >= 0)
+ {
+ neigh_up(n, i, scope);
+ return;
+ }
+ }
else
sl_free(neigh_slab, n);
}
diff --git a/nest/proto.c b/nest/proto.c
index 399c02e3..019b846e 100644
--- a/nest/proto.c
+++ b/nest/proto.c
@@ -345,6 +345,7 @@ protos_postconfig(struct config *c)
WALK_LIST(x, c->protos)
{
DBG(" %s", x->name);
+
p = x->protocol;
if (p->postconfig)
p->postconfig(x);
@@ -376,6 +377,7 @@ int proto_reconfig_type; /* Hack to propagate type info to pipe reconfigure hoo
static int
proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config *nc, int type)
{
+ struct announce_hook *ah = p->main_ahook;
/* If the protocol is DOWN, we just restart it */
if (p->proto_state == PS_DOWN)
return 0;
@@ -383,11 +385,9 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config
/* If there is a too big change in core attributes, ... */
if ((nc->protocol != oc->protocol) ||
(nc->disabled != p->disabled) ||
- (nc->table->table != oc->table->table) ||
- (proto_get_router_id(nc) != proto_get_router_id(oc)))
+ (nc->table->table != oc->table->table))
return 0;
-
p->debug = nc->debug;
p->mrtdump = nc->mrtdump;
proto_reconfig_type = type;
@@ -409,12 +409,31 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config
/* Update filters and limits in the main announce hook
Note that this also resets limit state */
- if (p->main_ahook)
+ if (ah)
{
- p->main_ahook->in_filter = nc->in_filter;
- p->main_ahook->out_filter = nc->out_filter;
- p->main_ahook->in_limit = nc->in_limit;
- p->main_ahook->out_limit = nc->out_limit;
+ ah->in_filter = nc->in_filter;
+ ah->out_filter = nc->out_filter;
+ ah->rx_limit = nc->rx_limit;
+ ah->in_limit = nc->in_limit;
+ ah->out_limit = nc->out_limit;
+ ah->in_keep_filtered = nc->in_keep_filtered;
+
+ if (p->proto_state == PS_UP) /* Recheck export/import/receive limit */
+ {
+ struct proto_stats *stats = ah->stats;
+ struct proto_limit *l = ah->in_limit;
+ u32 all_routes = stats->imp_routes + stats->filt_routes;
+
+ if (l && (stats->imp_routes >= l->limit)) proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
+
+ l = ah->rx_limit;
+
+ if (l && ( all_routes >= l->limit)) proto_notify_limit(ah, l, PLD_RX, all_routes );
+
+ l = ah->out_limit;
+
+ if (l && ( stats->exp_routes >= l->limit)) proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
+ }
}
/* Update routes when filters changed. If the protocol in not UP,
@@ -516,7 +535,7 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty
p->down_code = nc->disabled ? PDC_CF_DISABLE : PDC_CF_RESTART;
p->cf_new = nc;
}
- else if (!shutting_down)
+ else if (!new->shutdown)
{
log(L_INFO "Removing protocol %s", p->name);
p->down_code = PDC_CF_REMOVE;
@@ -537,7 +556,7 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty
WALK_LIST(nc, new->protos)
if (!nc->proto)
{
- if (old_config) /* Not a first-time configuration */
+ if (old) /* Not a first-time configuration */
log(L_INFO "Adding protocol %s", nc->name);
proto_init(nc);
}
@@ -552,6 +571,16 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty
initial_device_proto = NULL;
}
+ /* Determine router ID for the first time - it has to be here and not in
+ global_commit() because it is postponed after start of device protocol */
+ if (!config->router_id)
+ {
+ config->router_id = if_choose_router_id(config->router_id_from, 0);
+ if (!config->router_id)
+ die("Cannot determine router ID, please configure it manually");
+ }
+
+ /* Start all other protocols */
WALK_LIST_DELSAFE(p, n, initial_proto_list)
proto_rethink_goal(p);
}
@@ -671,6 +700,9 @@ proto_build(struct protocol *p)
}
}
+/* FIXME: convert this call to some protocol hook */
+extern void bfd_init_all(void);
+
/**
* protos_build - build a protocol list
*
@@ -708,6 +740,11 @@ protos_build(void)
#ifdef CONFIG_BGP
proto_build(&proto_bgp);
#endif
+#ifdef CONFIG_BFD
+ proto_build(&proto_bfd);
+ bfd_init_all();
+#endif
+
proto_pool = rp_new(&root_pool, "Protocols");
proto_flush_event = ev_new(proto_pool);
proto_flush_event->hook = proto_flush_loop;
@@ -720,8 +757,9 @@ proto_fell_down(struct proto *p)
{
DBG("Protocol %s down\n", p->name);
- if (p->stats.imp_routes != 0)
- log(L_ERR "Protocol %s is down but still has %d routes", p->name, p->stats.imp_routes);
+ u32 all_routes = p->stats.imp_routes + p->stats.filt_routes;
+ if (all_routes != 0)
+ log(L_ERR "Protocol %s is down but still has %d routes", p->name, all_routes);
bzero(&p->stats, sizeof(struct proto_stats));
proto_free_ahooks(p);
@@ -798,9 +836,12 @@ proto_schedule_feed(struct proto *p, int initial)
p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats);
p->main_ahook->in_filter = p->cf->in_filter;
p->main_ahook->out_filter = p->cf->out_filter;
+ p->main_ahook->rx_limit = p->cf->rx_limit;
p->main_ahook->in_limit = p->cf->in_limit;
p->main_ahook->out_limit = p->cf->out_limit;
+ p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered;
+ proto_reset_limit(p->main_ahook->rx_limit);
proto_reset_limit(p->main_ahook->in_limit);
proto_reset_limit(p->main_ahook->out_limit);
}
@@ -825,14 +866,18 @@ static void
proto_schedule_flush_loop(void)
{
struct proto *p;
+ struct announce_hook *h;
if (flush_loop_state)
return;
flush_loop_state = 1;
- rt_schedule_prune_all();
WALK_LIST(p, flush_proto_list)
+ {
p->flushing = 1;
+ for (h=p->ahooks; h; h=h->next)
+ h->table->prune_state = 1;
+ }
ev_schedule(proto_flush_event);
}
@@ -974,6 +1019,7 @@ proto_limit_name(struct proto_limit *l)
* proto_notify_limit: notify about limit hit and take appropriate action
* @ah: announce hook
* @l: limit being hit
+ * @dir: limit direction (PLD_*)
* @rt_count: the number of routes
*
* The function is called by the route processing core when limit @l
@@ -981,10 +1027,11 @@ proto_limit_name(struct proto_limit *l)
* according to @l->action.
*/
void
-proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, u32 rt_count)
+proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 rt_count)
{
+ const char *dir_name[PLD_MAX] = { "receive", "import" , "export" };
+ const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT };
struct proto *p = ah->proto;
- int dir = (ah->in_limit == l);
if (l->state == PLS_BLOCKED)
return;
@@ -992,7 +1039,7 @@ proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, u32 rt_count
/* For warning action, we want the log message every time we hit the limit */
if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit)))
log(L_WARN "Protocol %s hits route %s limit (%d), action: %s",
- p->name, dir ? "import" : "export", l->limit, proto_limit_name(l));
+ p->name, dir_name[dir], l->limit, proto_limit_name(l));
switch (l->action)
{
@@ -1007,8 +1054,7 @@ proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, u32 rt_count
case PLA_RESTART:
case PLA_DISABLE:
l->state = PLS_BLOCKED;
- proto_schedule_down(p, l->action == PLA_RESTART,
- dir ? PDC_IN_LIMIT_HIT : PDC_OUT_LIMIT_HIT);
+ proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]);
break;
}
}
@@ -1106,10 +1152,15 @@ proto_state_name(struct proto *p)
}
static void
-proto_show_stats(struct proto_stats *s)
+proto_show_stats(struct proto_stats *s, int in_keep_filtered)
{
- cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred",
- s->imp_routes, s->exp_routes, s->pref_routes);
+ if (in_keep_filtered)
+ cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred",
+ s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes);
+ else
+ cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred",
+ s->imp_routes, s->exp_routes, s->pref_routes);
+
cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted");
cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u",
s->imp_updates_received, s->imp_updates_invalid,
@@ -1143,11 +1194,12 @@ proto_show_basic_info(struct proto *p)
cli_msg(-1006, " Input filter: %s", filter_name(p->cf->in_filter));
cli_msg(-1006, " Output filter: %s", filter_name(p->cf->out_filter));
+ proto_show_limit(p->cf->rx_limit, "Receive limit:");
proto_show_limit(p->cf->in_limit, "Import limit:");
proto_show_limit(p->cf->out_limit, "Export limit:");
if (p->proto_state != PS_DOWN)
- proto_show_stats(&p->stats);
+ proto_show_stats(&p->stats, p->cf->in_keep_filtered);
}
void
@@ -1264,7 +1316,10 @@ proto_cmd_reload(struct proto *p, unsigned int dir, int cnt UNUSED)
* Perhaps, but these hooks work asynchronously.
*/
if (!p->proto->multitable)
- proto_reset_limit(p->main_ahook->in_limit);
+ {
+ proto_reset_limit(p->main_ahook->rx_limit);
+ proto_reset_limit(p->main_ahook->in_limit);
+ }
}
/* re-exporting routes */
diff --git a/nest/protocol.h b/nest/protocol.h
index d80201f3..b58f9e67 100644
--- a/nest/protocol.h
+++ b/nest/protocol.h
@@ -75,7 +75,7 @@ void protos_dump_all(void);
extern struct protocol
proto_device, proto_radv, proto_rip, proto_static,
- proto_ospf, proto_pipe, proto_bgp;
+ proto_ospf, proto_pipe, proto_bgp, proto_bfd;
/*
* Routing Protocol Instance
@@ -91,9 +91,12 @@ struct proto_config {
int class; /* SYM_PROTO or SYM_TEMPLATE */
u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */
unsigned preference, disabled; /* Generic parameters */
+ int in_keep_filtered; /* Routes rejected in import filter are kept */
u32 router_id; /* Protocol specific router ID */
struct rtable_config *table; /* Table we're attached to */
struct filter *in_filter, *out_filter; /* Attached filters */
+ struct proto_limit *rx_limit; /* Limit for receiving routes from protocol
+ (relevant when in_keep_filtered is active) */
struct proto_limit *in_limit; /* Limit for importing routes from protocol */
struct proto_limit *out_limit; /* Limit for exporting routes to protocol */
@@ -106,7 +109,8 @@ struct proto_config {
struct proto_stats {
/* Import - from protocol to core */
u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */
- u32 pref_routes; /* Number of routes that are preferred, sum over all routing table */
+ u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */
+ u32 pref_routes; /* Number of routes that are preferred, sum over all routing tables */
u32 imp_updates_received; /* Number of route updates received */
u32 imp_updates_invalid; /* Number of route updates rejected as invalid */
u32 imp_updates_filtered; /* Number of route updates rejected by filters */
@@ -224,8 +228,9 @@ struct proto_spec {
#define PDC_CMD_DISABLE 0x11 /* Result of disable command */
#define PDC_CMD_RESTART 0x12 /* Result of restart command */
#define PDC_CMD_SHUTDOWN 0x13 /* Result of global shutdown */
-#define PDC_IN_LIMIT_HIT 0x21 /* Route import limit reached */
-#define PDC_OUT_LIMIT_HIT 0x22 /* Route export limit reached */
+#define PDC_RX_LIMIT_HIT 0x21 /* Route receive limit reached */
+#define PDC_IN_LIMIT_HIT 0x22 /* Route import limit reached */
+#define PDC_OUT_LIMIT_HIT 0x23 /* Route export limit reached */
void *proto_new(struct proto_config *, unsigned size);
@@ -354,6 +359,12 @@ void proto_notify_state(struct proto *p, unsigned state);
#define D_EVENTS 16 /* Protocol events */
#define D_PACKETS 32 /* Packets sent/received */
+#ifndef PARSER
+#define TRACE(flags, msg, args...) \
+ do { if (p->p.debug & flags) log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
+#endif
+
+
/*
* MRTDump flags
*/
@@ -372,6 +383,11 @@ extern struct proto_config *cf_dev_proto;
* Protocol limits
*/
+#define PLD_RX 0 /* Receive limit */
+#define PLD_IN 1 /* Import limit */
+#define PLD_OUT 2 /* Export limit */
+#define PLD_MAX 3
+
#define PLA_WARN 1 /* Issue log warning */
#define PLA_BLOCK 2 /* Block new routes */
#define PLA_RESTART 4 /* Force protocol restart */
@@ -387,7 +403,7 @@ struct proto_limit {
byte state; /* State of limit (PLS_*) */
};
-void proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, u32 rt_count);
+void proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 rt_count);
static inline void
proto_reset_limit(struct proto_limit *l)
@@ -407,10 +423,12 @@ struct announce_hook {
struct proto *proto;
struct filter *in_filter; /* Input filter */
struct filter *out_filter; /* Output filter */
+ struct proto_limit *rx_limit; /* Receive limit (for in_keep_filtered) */
struct proto_limit *in_limit; /* Input limit */
struct proto_limit *out_limit; /* Output limit */
struct proto_stats *stats; /* Per-table protocol statistics */
struct announce_hook *next; /* Next hook for the same protocol */
+ int in_keep_filtered; /* Routes rejected in import filter are kept */
};
struct announce_hook *proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *stats);
diff --git a/nest/route.h b/nest/route.h
index 3b65a855..f00f8b2b 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -141,7 +141,7 @@ typedef struct rtable {
int gc_counter; /* Number of operations since last GC */
bird_clock_t gc_time; /* Time of last GC */
byte gc_scheduled; /* GC is scheduled */
- byte prune_state; /* Table prune state, 1 -> prune is running */
+ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */
byte hcu_scheduled; /* Hostcache update is scheduled */
byte nhu_state; /* Next Hop Update state */
struct fib_iterator prune_fit; /* Rtable prune FIB iterator */
@@ -221,12 +221,26 @@ typedef struct rte {
} rte;
#define REF_COW 1 /* Copy this rte on write */
+#define REF_FILTERED 2 /* Route is rejected by import filter */
+
+/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
+static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); }
+
+/* Route just has REF_FILTERED flag */
+static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); }
+
/* Types of route announcement, also used as flags */
#define RA_OPTIMAL 1 /* Announcement of optimal route change */
#define RA_ACCEPTED 2 /* Announcement of first accepted route */
#define RA_ANY 3 /* Announcement of any route change */
+/* Return value of import_control() callback */
+#define RIC_ACCEPT 1 /* Accepted by protocol */
+#define RIC_PROCESS 0 /* Process it through import filter */
+#define RIC_REJECT -1 /* Rejected by protocol */
+#define RIC_DROP -2 /* Silently dropped by protocol */
+
struct config;
void rt_init(void);
@@ -242,6 +256,7 @@ rte *rte_get_temp(struct rta *);
void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src);
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
void rte_discard(rtable *tab, rte *old);
+int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
void rte_dump(rte *);
void rte_free(rte *);
rte *rte_do_cow(rte *);
@@ -250,7 +265,6 @@ void rt_dump(rtable *);
void rt_dump_all(void);
int rt_feed_baby(struct proto *p);
void rt_feed_baby_abort(struct proto *p);
-void rt_schedule_prune_all(void);
int rt_prune_loop(void);
struct rtable_config *rt_new_table(struct symbol *s);
@@ -263,7 +277,7 @@ struct rt_show_data {
struct fib_iterator fit;
struct proto *show_protocol;
struct proto *export_protocol;
- int export_mode, primary_only;
+ int export_mode, primary_only, filtered;
struct config *running_on_config;
int net_counter, rt_counter, show_counter;
int stats, show_for;
@@ -400,6 +414,10 @@ struct adata {
byte data[0];
};
+static inline int adata_same(struct adata *a, struct adata *b)
+{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); }
+
+
typedef struct ea_list {
struct ea_list *next; /* In case we have an override list */
byte flags; /* Flags: EALF_... */
diff --git a/nest/rt-attr.c b/nest/rt-attr.c
index b2bb152f..0fb7c820 100644
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -108,7 +108,7 @@ rte_src_alloc_id(void)
if (src_id_used > (src_id_size * 28))
{
src_id_size *= 2;
- src_ids = mb_realloc(rta_pool, src_ids, src_id_size * sizeof(u32));
+ src_ids = mb_realloc(src_ids, src_id_size * sizeof(u32));
bzero(src_ids + i, (src_id_size - i) * sizeof(u32));
goto found;
}
@@ -551,8 +551,7 @@ ea_same(ea_list *x, ea_list *y)
if (a->id != b->id ||
a->flags != b->flags ||
a->type != b->type ||
- ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data :
- (a->u.ptr->length != b->u.ptr->length || memcmp(a->u.ptr->data, b->u.ptr->data, a->u.ptr->length))))
+ ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr)))
return 0;
}
return 1;
diff --git a/nest/rt-dev.c b/nest/rt-dev.c
index 7319018f..1a859dac 100644
--- a/nest/rt-dev.c
+++ b/nest/rt-dev.c
@@ -34,6 +34,9 @@ dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad)
/* Empty list is automagically treated as "*" */
return;
+ if (ad->flags & IA_SECONDARY)
+ return;
+
if (ad->scope <= SCOPE_LINK)
return;
diff --git a/nest/rt-table.c b/nest/rt-table.c
index ecd6e324..8c91ea0a 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -77,7 +77,7 @@ net_route(rtable *tab, ip_addr a, int len)
{
a0 = ipa_and(a, ipa_mkmask(len));
n = fib_find(&tab->fib, &a0, len);
- if (n && n->routes)
+ if (n && rte_is_valid(n->routes))
return n;
len--;
}
@@ -147,8 +147,11 @@ rte_better(rte *new, rte *old)
{
int (*better)(rte *, rte *);
- if (!old)
+ if (!rte_is_valid(old))
return 1;
+ if (!rte_is_valid(new))
+ return 0;
+
if (new->pref > old->pref)
return 1;
if (new->pref < old->pref)
@@ -217,7 +220,8 @@ export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa,
goto reject;
stats->exp_updates_rejected++;
- rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
+ if (v == RIC_REJECT)
+ rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
goto reject;
}
if (v > 0)
@@ -289,7 +293,7 @@ do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tm
if (l && new)
{
if ((!old || refeed) && (stats->exp_routes >= l->limit))
- proto_notify_limit(ah, l, stats->exp_routes);
+ proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
if (l->state == PLS_BLOCKED)
{
@@ -406,9 +410,13 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
rte *old_free = NULL;
rte *r;
- /* Used to track whether we met old_changed position. If it is NULL
- it was the first and met it implicitly before current best route. */
- int old_meet = (old_changed && !before_old) ? 1 : 0;
+ /* Used to track whether we met old_changed position. If before_old is NULL
+ old_changed was the first and we met it implicitly before current best route. */
+ int old_meet = old_changed && !before_old;
+
+ /* Note that before_old is either NULL or valid (not rejected) route.
+ If old_changed is valid, before_old have to be too. If old changed route
+ was not valid, caller must use NULL for both old_changed and before_old. */
if (new_changed)
stats->exp_updates_received++;
@@ -416,7 +424,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
stats->exp_withdraws_received++;
/* First, find the new_best route - first accepted by filters */
- for (r=net->routes; r; r=r->next)
+ for (r=net->routes; rte_is_valid(r); r=r->next)
{
if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
break;
@@ -435,7 +443,8 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
if (feed)
{
if (feed == 2) /* refeed */
- old_best = new_best ? new_best : net->routes;
+ old_best = new_best ? new_best :
+ (rte_is_valid(net->routes) ? net->routes : NULL);
else
old_best = NULL;
@@ -484,7 +493,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
}
/* Fourth case */
- for (r=r->next; r; r=r->next)
+ for (r=r->next; rte_is_valid(r); r=r->next)
{
if (old_best = export_filter(ah, r, &old_free, NULL, 1))
goto found;
@@ -538,7 +547,14 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
static void
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old, ea_list *tmpa)
{
- struct announce_hook *a;
+ if (!rte_is_valid(old))
+ old = before_old = NULL;
+
+ if (!rte_is_valid(new))
+ new = NULL;
+
+ if (!old && !new)
+ return;
if (type == RA_OPTIMAL)
{
@@ -551,6 +567,7 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
rt_notify_hostcache(tab, net);
}
+ struct announce_hook *a;
WALK_LIST(a, tab->hooks)
{
ASSERT(a->proto->core_state == FS_HAPPY || a->proto->core_state == FS_FEEDING);
@@ -618,12 +635,15 @@ rte_same(rte *x, rte *y)
(!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
}
+static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
+
static void
rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, struct rte_src *src)
{
struct proto *p = ah->proto;
struct rtable *table = ah->table;
struct proto_stats *stats = ah->stats;
+ static struct rate_limit rl_pipe;
rte *before_old = NULL;
rte *old_best = net->routes;
rte *old = NULL;
@@ -647,7 +667,7 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str
{
if (new)
{
- log(L_ERR "Pipe collision detected when sending %I/%d to table %s",
+ log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %I/%d to table %s",
net->n.prefix, net->n.pxlen, table->name);
rte_free_quick(new);
}
@@ -657,8 +677,13 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str
if (new && rte_same(old, new))
{
/* No changes, ignore the new route */
- stats->imp_updates_ignored++;
- rte_trace_in(D_ROUTES, p, new, "ignored");
+
+ if (!rte_is_filtered(new))
+ {
+ stats->imp_updates_ignored++;
+ rte_trace_in(D_ROUTES, p, new, "ignored");
+ }
+
rte_free_quick(new);
#ifdef CONFIG_RIP
/* lastmod is used internally by RIP as the last time
@@ -684,14 +709,22 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str
return;
}
- struct proto_limit *l = ah->in_limit;
+ int new_ok = rte_is_ok(new);
+ int old_ok = rte_is_ok(old);
+
+ struct proto_limit *l = ah->rx_limit;
if (l && !old && new)
{
- if (stats->imp_routes >= l->limit)
- proto_notify_limit(ah, l, stats->imp_routes);
+ u32 all_routes = stats->imp_routes + stats->filt_routes;
+
+ if (all_routes >= l->limit)
+ proto_notify_limit(ah, l, PLD_RX, all_routes);
if (l->state == PLS_BLOCKED)
{
+ /* In receive limit the situation is simple, old is NULL so
+ we just free new and exit like nothing happened */
+
stats->imp_updates_ignored++;
rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
rte_free_quick(new);
@@ -699,15 +732,53 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str
}
}
- if (new)
+ l = ah->in_limit;
+ if (l && !old_ok && new_ok)
+ {
+ if (stats->imp_routes >= l->limit)
+ proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
+
+ if (l->state == PLS_BLOCKED)
+ {
+ /* In import limit the situation is more complicated. We
+ shouldn't just drop the route, we should handle it like
+ it was filtered. We also have to continue the route
+ processing if old or new is non-NULL, but we should exit
+ if both are NULL as this case is probably assumed to be
+ already handled. */
+
+ stats->imp_updates_ignored++;
+ rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
+
+ if (ah->in_keep_filtered)
+ new->flags |= REF_FILTERED;
+ else
+ { rte_free_quick(new); new = NULL; }
+
+ /* Note that old && !new could be possible when
+ ah->in_keep_filtered changed in the recent past. */
+
+ if (!old && !new)
+ return;
+
+ new_ok = 0;
+ goto skip_stats1;
+ }
+ }
+
+ if (new_ok)
stats->imp_updates_accepted++;
- else
+ else if (old_ok)
stats->imp_withdraws_accepted++;
+ else
+ stats->imp_withdraws_ignored++;
+
+ skip_stats1:
if (new)
- stats->imp_routes++;
+ rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
if (old)
- stats->imp_routes--;
+ rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
if (table->config->sorted)
{
@@ -792,17 +863,19 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str
new->lastmod = now;
/* Log the route change */
- if (new)
- rte_trace_in(D_ROUTES, p, new, net->routes == new ? "added [best]" : "added");
-
- if (!new && (p->debug & D_ROUTES))
+ if (p->debug & D_ROUTES)
{
- if (old != old_best)
- rte_trace_in(D_ROUTES, p, old, "removed");
- else if (net->routes)
- rte_trace_in(D_ROUTES, p, old, "removed [replaced]");
- else
- rte_trace_in(D_ROUTES, p, old, "removed [sole]");
+ if (new_ok)
+ rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
+ else if (old_ok)
+ {
+ if (old != old_best)
+ rte_trace(p, old, '>', "removed");
+ else if (rte_is_ok(net->routes))
+ rte_trace(p, old, '>', "removed [replaced]");
+ else
+ rte_trace(p, old, '>', "removed [sole]");
+ }
}
/* Propagate the route change */
@@ -817,17 +890,13 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str
(table->gc_time + table->config->gc_min_time <= now))
rt_schedule_gc(table);
+ if (old_ok && p->rte_remove)
+ p->rte_remove(net, old);
+ if (new_ok && p->rte_insert)
+ p->rte_insert(net, new);
+
if (old)
- {
- if (p->rte_remove)
- p->rte_remove(net, old);
- rte_free_quick(old);
- }
- if (new)
- {
- if (p->rte_insert)
- p->rte_insert(net, new);
- }
+ rte_free_quick(old);
}
static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
@@ -845,6 +914,26 @@ rte_update_unlock(void)
lp_flush(rte_update_pool);
}
+static inline void
+rte_hide_dummy_routes(net *net, rte **dummy)
+{
+ if (net->routes && net->routes->attrs->source == RTS_DUMMY)
+ {
+ *dummy = net->routes;
+ net->routes = (*dummy)->next;
+ }
+}
+
+static inline void
+rte_unhide_dummy_routes(net *net, rte **dummy)
+{
+ if (*dummy)
+ {
+ (*dummy)->next = net->routes;
+ net->routes = *dummy;
+ }
+}
+
/**
* rte_update - enter a new update to a routing table
* @table: table to be updated
@@ -894,6 +983,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
struct proto_stats *stats = ah->stats;
struct filter *filter = ah->in_filter;
ea_list *tmpa = NULL;
+ rte *dummy = NULL;
rte_update_lock();
if (new)
@@ -907,28 +997,39 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
stats->imp_updates_invalid++;
goto drop;
}
+
if (filter == FILTER_REJECT)
{
stats->imp_updates_filtered++;
rte_trace_in(D_FILTERS, p, new, "filtered out");
- goto drop;
- }
- tmpa = make_tmp_attrs(new, rte_update_pool);
- if (filter)
+ if (! ah->in_keep_filtered)
+ goto drop;
+
+ /* new is a private copy, i could modify it */
+ new->flags |= REF_FILTERED;
+ }
+ else
{
- ea_list *old_tmpa = tmpa;
- int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
- if (fr > F_ACCEPT)
+ tmpa = make_tmp_attrs(new, rte_update_pool);
+ if (filter && (filter != FILTER_REJECT))
{
- stats->imp_updates_filtered++;
- rte_trace_in(D_FILTERS, p, new, "filtered out");
- goto drop;
+ ea_list *old_tmpa = tmpa;
+ int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
+ if (fr > F_ACCEPT)
+ {
+ stats->imp_updates_filtered++;
+ rte_trace_in(D_FILTERS, p, new, "filtered out");
+
+ if (! ah->in_keep_filtered)
+ goto drop;
+
+ new->flags |= REF_FILTERED;
+ }
+ if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
+ src->proto->store_tmp_attrs(new, tmpa);
}
- if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
- src->proto->store_tmp_attrs(new, tmpa);
}
-
if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
new->attrs = rta_lookup(new->attrs);
new->flags |= REF_COW;
@@ -945,14 +1046,18 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
}
}
+ recalc:
+ rte_hide_dummy_routes(net, &dummy);
rte_recalculate(ah, net, new, tmpa, src);
+ rte_unhide_dummy_routes(net, &dummy);
rte_update_unlock();
return;
-drop:
+ drop:
rte_free(new);
- rte_recalculate(ah, net, NULL, NULL, src);
- rte_update_unlock();
+ new = NULL;
+ tmpa = NULL;
+ goto recalc;
}
/* Independent call to rte_announce(), used from next hop
@@ -976,6 +1081,33 @@ rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during gar
rte_update_unlock();
}
+/* Check rtable for best route to given net whether it would be exported do p */
+int
+rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter)
+{
+ net *n = net_find(t, prefix, pxlen);
+ rte *rt = n ? n->routes : NULL;
+
+ if (!rte_is_valid(rt))
+ return 0;
+
+ rte_update_lock();
+
+ /* Rest is stripped down export_filter() */
+ ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
+ int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
+ if (v == RIC_PROCESS)
+ v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
+
+ /* Discard temporary rte */
+ if (rt != n->routes)
+ rte_free(rt);
+
+ rte_update_unlock();
+
+ return v > 0;
+}
+
/**
* rte_dump - dump a route
* @e: &rte to be dumped
@@ -1151,20 +1283,10 @@ rt_init(void)
}
-/* Called from proto_schedule_flush_loop() only,
- ensuring that all prune states are zero */
-void
-rt_schedule_prune_all(void)
-{
- rtable *t;
-
- WALK_LIST(t, routing_tables)
- t->prune_state = 1;
-}
-
static inline int
-rt_prune_step(rtable *tab, int *max_feed)
+rt_prune_step(rtable *tab, int step, int *max_feed)
{
+ static struct rate_limit rl_flush;
struct fib_iterator *fit = &tab->prune_fit;
DBG("Pruning route table %s\n", tab->name);
@@ -1189,8 +1311,8 @@ again:
rescan:
for (e=n->routes; e; e=e->next)
- if (e->sender->proto->core_state != FS_HAPPY &&
- e->sender->proto->core_state != FS_FEEDING)
+ if (e->sender->proto->flushing ||
+ (step && e->attrs->src->proto->flushing))
{
if (*max_feed <= 0)
{
@@ -1198,6 +1320,10 @@ again:
return 0;
}
+ if (step)
+ log_rl(&rl_flush, L_WARN "Route %I/%d from %s still in %s after flush",
+ n->n.prefix, n->n.pxlen, e->attrs->src->proto->name, tab->name);
+
rte_discard(tab, e);
(*max_feed)--;
@@ -1222,23 +1348,42 @@ again:
/**
* rt_prune_loop - prune routing tables
- * @tab: routing table to be pruned
*
* The prune loop scans routing tables and removes routes belonging to
- * inactive protocols and also stale network entries. Returns 1 when
+ * flushing protocols and also stale network entries. Returns 1 when
* all such routes are pruned. It is a part of the protocol flushing
* loop.
+ *
+ * The prune loop runs in two steps. In the first step it prunes just
+ * the routes with flushing senders (in explicitly marked tables) so
+ * the route removal is propagated as usual. In the second step, all
+ * remaining relevant routes are removed. Ideally, there shouldn't be
+ * any, but it happens when pipe filters are changed.
*/
int
rt_prune_loop(void)
{
- rtable *t;
+ static int step = 0;
int max_feed = 512;
+ rtable *t;
+ again:
WALK_LIST(t, routing_tables)
- if (! rt_prune_step(t, &max_feed))
+ if (! rt_prune_step(t, step, &max_feed))
return 0;
+ if (step == 0)
+ {
+ /* Prepare for the second step */
+ WALK_LIST(t, routing_tables)
+ t->prune_state = 1;
+
+ step = 1;
+ goto again;
+ }
+
+ /* Done */
+ step = 0;
return 1;
}
@@ -1570,9 +1715,11 @@ again:
return 0;
}
+ /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
+
if ((p->accept_ra_types == RA_OPTIMAL) ||
(p->accept_ra_types == RA_ACCEPTED))
- if (e)
+ if (rte_is_valid(e))
{
if (p->core_state != FS_FEEDING)
return 1; /* In the meantime, the protocol fell down. */
@@ -1581,7 +1728,7 @@ again:
}
if (p->accept_ra_types == RA_ANY)
- for(e = n->routes; e != NULL; e = e->next)
+ for(e = n->routes; rte_is_valid(e); e = e->next)
{
if (p->core_state != FS_FEEDING)
return 1; /* In the meantime, the protocol fell down. */
@@ -1834,7 +1981,8 @@ rt_update_hostentry(rtable *tab, struct hostentry *he)
net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
if (n)
{
- rta *a = n->routes->attrs;
+ rte *e = n->routes;
+ rta *a = e->attrs;
pxlen = n->n.pxlen;
if (a->hostentry)
@@ -1867,7 +2015,7 @@ rt_update_hostentry(rtable *tab, struct hostentry *he)
}
he->src = rta_clone(a);
- he->igp_metric = rt_get_igp_metric(n->routes);
+ he->igp_metric = rt_get_igp_metric(e);
}
done:
@@ -2001,19 +2149,24 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
int ok;
bsprintf(ia, "%I/%d", n->n.prefix, n->n.pxlen);
- if (n->routes)
- d->net_counter++;
+
for(e=n->routes; e; e=e->next)
{
+ if (rte_is_filtered(e) != d->filtered)
+ continue;
+
struct ea_list *tmpa;
struct rte_src *src = e->attrs->src;
struct proto *p1 = d->export_protocol;
struct proto *p2 = d->show_protocol;
+
+ if (ia[0])
+ d->net_counter++;
d->rt_counter++;
ee = e;
rte_update_lock(); /* We use the update buffer for filtering */
tmpa = make_tmp_attrs(e, rte_update_pool);
- ok = (d->filter == FILTER_ACCEPT || f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
+ ok = f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT;
if (p2 && p2 != src->proto) ok = 0;
if (ok && d->export_mode)
{
@@ -2027,8 +2180,8 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
'configure soft' command may change the export filter
and do not update routes */
- if ((a = proto_find_announce_hook(p1, d->table)) && ((a->out_filter == FILTER_REJECT) ||
- (a->out_filter && f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)))
+ if ((a = proto_find_announce_hook(p1, d->table)) &&
+ (f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT))
ok = 0;
}
}
@@ -2107,6 +2260,11 @@ rt_show(struct rt_show_data *d)
{
net *n;
+ /* Default is either a master table or a table related to a respective protocol */
+ if ((!d->table) && d->export_protocol) d->table = d->export_protocol->table;
+ if ((!d->table) && d->show_protocol) d->table = d->show_protocol->table;
+ if (!d->table) d->table = config->master_rtc->table;
+
if (d->pxlen == 256)
{
FIB_ITERATE_INIT(&d->fit, &d->table->fib);
diff --git a/proto/Doc b/proto/Doc
index 16b084fb..7863472f 100644
--- a/proto/Doc
+++ b/proto/Doc
@@ -1,4 +1,5 @@
H Protocols
+C bfd
C bgp
C ospf
C pipe
diff --git a/proto/bfd/Doc b/proto/bfd/Doc
new file mode 100644
index 00000000..7ee5d3ef
--- /dev/null
+++ b/proto/bfd/Doc
@@ -0,0 +1 @@
+S bfd.c
diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile
new file mode 100644
index 00000000..c28cedec
--- /dev/null
+++ b/proto/bfd/Makefile
@@ -0,0 +1,5 @@
+source=bfd.c packets.c io.c
+root-rel=../../
+dir-name=proto/bfd
+
+include ../../Rules
diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c
new file mode 100644
index 00000000..5ebfadc1
--- /dev/null
+++ b/proto/bfd/bfd.c
@@ -0,0 +1,1114 @@
+/*
+ * BIRD -- Bidirectional Forwarding Detection (BFD)
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/**
+ * DOC: Bidirectional Forwarding Detection
+ *
+ * The BFD protocol is implemented in three files: |bfd.c| containing the
+ * protocol logic and the protocol glue with BIRD core, |packets.c| handling BFD
+ * packet processing, RX, TX and protocol sockets. |io.c| then contains generic
+ * code for the event loop, threads and event sources (sockets, microsecond
+ * timers). This generic code will be merged to the main BIRD I/O code in the
+ * future.
+ *
+ * The BFD implementation uses a separate thread with an internal event loop for
+ * handling the protocol logic, which requires high-res and low-latency timing,
+ * so it is not affected by the rest of BIRD, which has several low-granularity
+ * hooks in the main loop, uses second-based timers and cannot offer good
+ * latency. The core of BFD protocol (the code related to BFD sessions,
+ * interfaces and packets) runs in the BFD thread, while the rest (the code
+ * related to BFD requests, BFD neighbors and the protocol glue) runs in the
+ * main thread.
+ *
+ * BFD sessions are represented by structure &bfd_session that contains a state
+ * related to the session and two timers (TX timer for periodic packets and hold
+ * timer for session timeout). These sessions are allocated from @session_slab
+ * and are accessible by two hash tables, @session_hash_id (by session ID) and
+ * @session_hash_ip (by IP addresses of neighbors). Slab and both hashes are in
+ * the main protocol structure &bfd_proto. The protocol logic related to BFD
+ * sessions is implemented in internal functions bfd_session_*(), which are
+ * expected to be called from the context of BFD thread, and external functions
+ * bfd_add_session(), bfd_remove_session() and bfd_reconfigure_session(), which
+ * form an interface to the BFD core for the rest and are expected to be called
+ * from the context of main thread.
+ *
+ * Each BFD session has an associated BFD interface, represented by structure
+ * &bfd_iface. A BFD interface contains a socket used for TX (the one for RX is
+ * shared in &bfd_proto), an interface configuration and reference counter.
+ * Compared to interface structures of other protocols, these structures are not
+ * created and removed based on interface notification events, but according to
+ * the needs of BFD sessions. When a new session is created, it requests a
+ * proper BFD interface by function bfd_get_iface(), which either finds an
+ * existing one in &iface_list (from &bfd_proto) or allocates a new one. When a
+ * session is removed, an associated iface is dicharged by bfd_free_iface().
+ *
+ * BFD requests are the external API for the other protocols. When a protocol
+ * wants a BFD session, it calls bfd_request_session(), which creates a
+ * structure &bfd_request containing approprite information and an notify hook.
+ * This structure is a resource associated with the caller's resource pool. When
+ * a BFD protocol is available, a BFD request is submitted to the protocol, an
+ * appropriate BFD session is found or created and the request is attached to
+ * the session. When a session changes state, all attached requests (and related
+ * protocols) are notified. Note that BFD requests do not depend on BFD protocol
+ * running. When the BFD protocol is stopped or removed (or not available from
+ * beginning), related BFD requests are stored in @bfd_wait_list, where waits
+ * for a new protocol.
+ *
+ * BFD neighbors are just a way to statically configure BFD sessions without
+ * requests from other protocol. Structures &bfd_neighbor are part of BFD
+ * configuration (like static routes in the static protocol). BFD neighbors are
+ * handled by BFD protocol like it is a BFD client -- when a BFD neighbor is
+ * ready, the protocol just creates a BFD request like any other protocol.
+ *
+ * The protocol uses a new generic event loop (structure &birdloop) from |io.c|,
+ * which supports sockets, timers and events like the main loop. Timers
+ * (structure &timer2) are new microsecond based timers, while sockets and
+ * events are the same. A birdloop is associated with a thread (field @thread)
+ * in which event hooks are executed. Most functions for setting event sources
+ * (like sk_start() or tm2_start()) must be called from the context of that
+ * thread. Birdloop allows to temporarily acquire the context of that thread for
+ * the main thread by calling birdloop_enter() and then birdloop_leave(), which
+ * also ensures mutual exclusion with all event hooks. Note that resources
+ * associated with a birdloop (like timers) should be attached to the
+ * independent resource pool, detached from the main resource tree.
+ *
+ * There are two kinds of interaction between the BFD core (running in the BFD
+ * thread) and the rest of BFD (running in the main thread). The first kind are
+ * configuration calls from main thread to the BFD thread (like bfd_add_session()).
+ * These calls are synchronous and use birdloop_enter() mechanism for mutual
+ * exclusion. The second kind is a notification about session changes from the
+ * BFD thread to the main thread. This is done in an asynchronous way, sesions
+ * with pending notifications are linked (in the BFD thread) to @notify_list in
+ * &bfd_proto, and then bfd_notify_hook() in the main thread is activated using
+ * bfd_notify_kick() and a pipe. The hook then processes scheduled sessions and
+ * calls hooks from associated BFD requests. This @notify_list (and state fields
+ * in structure &bfd_session) is protected by a spinlock in &bfd_proto and
+ * functions bfd_lock_sessions() / bfd_unlock_sessions().
+ *
+ * There are few data races (accessing @p->p.debug from TRACE() from the BFD
+ * thread and accessing some some private fields of %bfd_session from
+ * bfd_show_sessions() from the main thread, but these are harmless (i hope).
+ *
+ * TODO: document functions and access restrictions for fields in BFD structures.
+ *
+ * Supported standards:
+ * - RFC 5880 - main BFD standard
+ * - RFC 5881 - BFD for IP links
+ * - RFC 5882 - generic application of BFD
+ * - RFC 5883 - BFD for multihop paths
+ */
+
+#include "bfd.h"
+
+
+#define HASH_ID_KEY(n) n->loc_id
+#define HASH_ID_NEXT(n) n->next_id
+#define HASH_ID_EQ(a,b) (a == b)
+#define HASH_ID_FN(k) (k)
+
+#define HASH_IP_KEY(n) n->addr
+#define HASH_IP_NEXT(n) n->next_ip
+#define HASH_IP_EQ(a,b) ipa_equal(a,b)
+#define HASH_IP_FN(k) ipa_hash(k)
+
+static list bfd_proto_list;
+static list bfd_wait_list;
+
+const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" };
+
+static void bfd_session_set_min_tx(struct bfd_session *s, u32 val);
+static struct bfd_iface *bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface);
+static void bfd_free_iface(struct bfd_iface *ifa);
+static inline void bfd_notify_kick(struct bfd_proto *p);
+
+
+/*
+ * BFD sessions
+ */
+
+static void
+bfd_session_update_state(struct bfd_session *s, uint state, uint diag)
+{
+ struct bfd_proto *p = s->ifa->bfd;
+ uint old_state = s->loc_state;
+ int notify;
+
+ if (state == old_state)
+ return;
+
+ TRACE(D_EVENTS, "Session to %I changed state from %s to %s",
+ s->addr, bfd_state_names[old_state], bfd_state_names[state]);
+
+ bfd_lock_sessions(p);
+ s->loc_state = state;
+ s->loc_diag = diag;
+
+ notify = !NODE_VALID(&s->n);
+ if (notify)
+ add_tail(&p->notify_list, &s->n);
+ bfd_unlock_sessions(p);
+
+ if (state == BFD_STATE_UP)
+ bfd_session_set_min_tx(s, s->ifa->cf->min_tx_int);
+
+ if (old_state == BFD_STATE_UP)
+ bfd_session_set_min_tx(s, s->ifa->cf->idle_tx_int);
+
+ if (notify)
+ bfd_notify_kick(p);
+}
+
+static void
+bfd_session_update_tx_interval(struct bfd_session *s)
+{
+ u32 tx_int = MAX(s->des_min_tx_int, s->rem_min_rx_int);
+ u32 tx_int_l = tx_int - (tx_int / 4); // 75 %
+ u32 tx_int_h = tx_int - (tx_int / 10); // 90 %
+
+ s->tx_timer->recurrent = tx_int_l;
+ s->tx_timer->randomize = tx_int_h - tx_int_l;
+
+ /* Do not set timer if no previous event */
+ if (!s->last_tx)
+ return;
+
+ /* Set timer relative to last tx_timer event */
+ tm2_set(s->tx_timer, s->last_tx + tx_int_l);
+}
+
+static void
+bfd_session_update_detection_time(struct bfd_session *s, int kick)
+{
+ btime timeout = (btime) MAX(s->req_min_rx_int, s->rem_min_tx_int) * s->rem_detect_mult;
+
+ if (kick)
+ s->last_rx = current_time();
+
+ if (!s->last_rx)
+ return;
+
+ tm2_set(s->hold_timer, s->last_rx + timeout);
+}
+
+static void
+bfd_session_control_tx_timer(struct bfd_session *s, int reset)
+{
+ // if (!s->opened) goto stop;
+
+ if (s->passive && (s->rem_id == 0))
+ goto stop;
+
+ if (s->rem_demand_mode &&
+ !s->poll_active &&
+ (s->loc_state == BFD_STATE_UP) &&
+ (s->rem_state == BFD_STATE_UP))
+ goto stop;
+
+ if (s->rem_min_rx_int == 0)
+ goto stop;
+
+ /* So TX timer should run */
+ if (reset || !tm2_active(s->tx_timer))
+ {
+ s->last_tx = 0;
+ tm2_start(s->tx_timer, 0);
+ }
+
+ return;
+
+ stop:
+ tm2_stop(s->tx_timer);
+ s->last_tx = 0;
+}
+
+static void
+bfd_session_request_poll(struct bfd_session *s, u8 request)
+{
+ /* Not sure about this, but doing poll in this case does not make sense */
+ if (s->rem_id == 0)
+ return;
+
+ s->poll_scheduled |= request;
+
+ if (s->poll_active)
+ return;
+
+ s->poll_active = s->poll_scheduled;
+ s->poll_scheduled = 0;
+
+ bfd_session_control_tx_timer(s, 1);
+}
+
+static void
+bfd_session_terminate_poll(struct bfd_session *s)
+{
+ u8 poll_done = s->poll_active & ~s->poll_scheduled;
+
+ if (poll_done & BFD_POLL_TX)
+ s->des_min_tx_int = s->des_min_tx_new;
+
+ if (poll_done & BFD_POLL_RX)
+ s->req_min_rx_int = s->req_min_rx_new;
+
+ s->poll_active = s->poll_scheduled;
+ s->poll_scheduled = 0;
+
+ /* Timers are updated by caller - bfd_session_process_ctl() */
+}
+
+void
+bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old_rx_int)
+{
+ if (s->poll_active && (flags & BFD_FLAG_FINAL))
+ bfd_session_terminate_poll(s);
+
+ if ((s->des_min_tx_int != old_tx_int) || (s->rem_min_rx_int != old_rx_int))
+ bfd_session_update_tx_interval(s);
+
+ bfd_session_update_detection_time(s, 1);
+
+ /* Update session state */
+ int next_state = 0;
+ int diag = BFD_DIAG_NOTHING;
+
+ switch (s->loc_state)
+ {
+ case BFD_STATE_ADMIN_DOWN:
+ return;
+
+ case BFD_STATE_DOWN:
+ if (s->rem_state == BFD_STATE_DOWN) next_state = BFD_STATE_INIT;
+ else if (s->rem_state == BFD_STATE_INIT) next_state = BFD_STATE_UP;
+ break;
+
+ case BFD_STATE_INIT:
+ if (s->rem_state == BFD_STATE_ADMIN_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN;
+ else if (s->rem_state >= BFD_STATE_INIT) next_state = BFD_STATE_UP;
+ break;
+
+ case BFD_STATE_UP:
+ if (s->rem_state <= BFD_STATE_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN;
+ break;
+ }
+
+ if (next_state)
+ bfd_session_update_state(s, next_state, diag);
+
+ bfd_session_control_tx_timer(s, 0);
+
+ if (flags & BFD_FLAG_POLL)
+ bfd_send_ctl(s->ifa->bfd, s, 1);
+}
+
+static void
+bfd_session_timeout(struct bfd_session *s)
+{
+ struct bfd_proto *p = s->ifa->bfd;
+
+ TRACE(D_EVENTS, "Session to %I expired", s->addr);
+
+ s->rem_state = BFD_STATE_DOWN;
+ s->rem_id = 0;
+ s->rem_min_tx_int = 0;
+ s->rem_min_rx_int = 1;
+ s->rem_demand_mode = 0;
+ s->rem_detect_mult = 0;
+
+ s->poll_active = 0;
+ s->poll_scheduled = 0;
+
+ bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT);
+
+ bfd_session_control_tx_timer(s, 1);
+}
+
+static void
+bfd_session_set_min_tx(struct bfd_session *s, u32 val)
+{
+ /* Note that des_min_tx_int <= des_min_tx_new */
+
+ if (val == s->des_min_tx_new)
+ return;
+
+ s->des_min_tx_new = val;
+
+ /* Postpone timer update if des_min_tx_int increases and the session is up */
+ if ((s->loc_state != BFD_STATE_UP) || (val < s->des_min_tx_int))
+ {
+ s->des_min_tx_int = val;
+ bfd_session_update_tx_interval(s);
+ }
+
+ bfd_session_request_poll(s, BFD_POLL_TX);
+}
+
+static void
+bfd_session_set_min_rx(struct bfd_session *s, u32 val)
+{
+ /* Note that req_min_rx_int >= req_min_rx_new */
+
+ if (val == s->req_min_rx_new)
+ return;
+
+ s->req_min_rx_new = val;
+
+ /* Postpone timer update if req_min_rx_int decreases and the session is up */
+ if ((s->loc_state != BFD_STATE_UP) || (val > s->req_min_rx_int))
+ {
+ s->req_min_rx_int = val;
+ bfd_session_update_detection_time(s, 0);
+ }
+
+ bfd_session_request_poll(s, BFD_POLL_RX);
+}
+
+struct bfd_session *
+bfd_find_session_by_id(struct bfd_proto *p, u32 id)
+{
+ return HASH_FIND(p->session_hash_id, HASH_ID, id);
+}
+
+struct bfd_session *
+bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr)
+{
+ return HASH_FIND(p->session_hash_ip, HASH_IP, addr);
+}
+
+static void
+bfd_tx_timer_hook(timer2 *t)
+{
+ struct bfd_session *s = t->data;
+
+ s->last_tx = current_time();
+ bfd_send_ctl(s->ifa->bfd, s, 0);
+}
+
+static void
+bfd_hold_timer_hook(timer2 *t)
+{
+ bfd_session_timeout(t->data);
+}
+
+static u32
+bfd_get_free_id(struct bfd_proto *p)
+{
+ u32 id;
+ for (id = random_u32(); 1; id++)
+ if (id && !bfd_find_session_by_id(p, id))
+ break;
+
+ return id;
+}
+
+static struct bfd_session *
+bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface)
+{
+ birdloop_enter(p->loop);
+
+ struct bfd_iface *ifa = bfd_get_iface(p, local, iface);
+
+ struct bfd_session *s = sl_alloc(p->session_slab);
+ bzero(s, sizeof(struct bfd_session));
+
+ s->addr = addr;
+ s->ifa = ifa;
+ s->loc_id = bfd_get_free_id(p);
+
+ HASH_INSERT(p->session_hash_id, HASH_ID, s);
+ HASH_INSERT(p->session_hash_ip, HASH_IP, s);
+
+
+ /* Initialization of state variables - see RFC 5880 6.8.1 */
+ s->loc_state = BFD_STATE_DOWN;
+ s->rem_state = BFD_STATE_DOWN;
+ s->des_min_tx_int = s->des_min_tx_new = ifa->cf->idle_tx_int;
+ s->req_min_rx_int = s->req_min_rx_new = ifa->cf->min_rx_int;
+ s->rem_min_rx_int = 1;
+ s->detect_mult = ifa->cf->multiplier;
+ s->passive = ifa->cf->passive;
+
+ s->tx_timer = tm2_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0);
+ s->hold_timer = tm2_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0);
+ bfd_session_update_tx_interval(s);
+ bfd_session_control_tx_timer(s, 1);
+
+ init_list(&s->request_list);
+ s->last_state_change = now;
+
+ TRACE(D_EVENTS, "Session to %I added", s->addr);
+
+ birdloop_leave(p->loop);
+
+ return s;
+}
+
+/*
+static void
+bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa)
+{
+ birdloop_enter(p->loop);
+
+ s->opened = 1;
+
+ bfd_session_control_tx_timer(s);
+
+ birdloop_leave(p->loop);
+}
+
+static void
+bfd_close_session(struct bfd_proto *p, struct bfd_session *s)
+{
+ birdloop_enter(p->loop);
+
+ s->opened = 0;
+
+ bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN);
+ bfd_session_control_tx_timer(s);
+
+ birdloop_leave(p->loop);
+}
+*/
+
+static void
+bfd_remove_session(struct bfd_proto *p, struct bfd_session *s)
+{
+ ip_addr ip = s->addr;
+
+ birdloop_enter(p->loop);
+
+ bfd_free_iface(s->ifa);
+
+ rfree(s->tx_timer);
+ rfree(s->hold_timer);
+
+ HASH_REMOVE(p->session_hash_id, HASH_ID, s);
+ HASH_REMOVE(p->session_hash_ip, HASH_IP, s);
+
+ sl_free(p->session_slab, s);
+
+ TRACE(D_EVENTS, "Session to %I removed", ip);
+
+ birdloop_leave(p->loop);
+}
+
+static void
+bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s)
+{
+ birdloop_enter(p->loop);
+
+ struct bfd_iface_config *cf = s->ifa->cf;
+
+ u32 tx = (s->loc_state == BFD_STATE_UP) ? cf->min_tx_int : cf->idle_tx_int;
+ bfd_session_set_min_tx(s, tx);
+ bfd_session_set_min_rx(s, cf->min_rx_int);
+ s->detect_mult = cf->multiplier;
+ s->passive = cf->passive;
+
+ bfd_session_control_tx_timer(s, 0);
+
+ birdloop_leave(p->loop);
+
+ TRACE(D_EVENTS, "Session to %I reconfigured", s->addr);
+}
+
+
+/*
+ * BFD interfaces
+ */
+
+static struct bfd_iface_config bfd_default_iface = {
+ .min_rx_int = BFD_DEFAULT_MIN_RX_INT,
+ .min_tx_int = BFD_DEFAULT_MIN_TX_INT,
+ .idle_tx_int = BFD_DEFAULT_IDLE_TX_INT,
+ .multiplier = BFD_DEFAULT_MULTIPLIER
+};
+
+static inline struct bfd_iface_config *
+bfd_find_iface_config(struct bfd_config *cf, struct iface *iface)
+{
+ struct bfd_iface_config *ic;
+
+ ic = iface ? (void *) iface_patt_find(&cf->patt_list, iface, NULL) : cf->multihop;
+
+ return ic ? ic : &bfd_default_iface;
+}
+
+static struct bfd_iface *
+bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface)
+{
+ struct bfd_iface *ifa;
+
+ WALK_LIST(ifa, p->iface_list)
+ if (ipa_equal(ifa->local, local) && (ifa->iface == iface))
+ return ifa->uc++, ifa;
+
+ struct bfd_config *cf = (struct bfd_config *) (p->p.cf);
+ struct bfd_iface_config *ic = bfd_find_iface_config(cf, iface);
+
+ ifa = mb_allocz(p->tpool, sizeof(struct bfd_iface));
+ ifa->local = local;
+ ifa->iface = iface;
+ ifa->cf = ic;
+ ifa->bfd = p;
+
+ ifa->sk = bfd_open_tx_sk(p, local, iface);
+ ifa->uc = 1;
+
+ add_tail(&p->iface_list, &ifa->n);
+
+ return ifa;
+}
+
+static void
+bfd_free_iface(struct bfd_iface *ifa)
+{
+ if (!ifa || --ifa->uc)
+ return;
+
+ rem_node(&ifa->n);
+ sk_stop(ifa->sk);
+ rfree(ifa->sk);
+ mb_free(ifa);
+}
+
+static void
+bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_config *nc)
+{
+ struct bfd_iface_config *nic = bfd_find_iface_config(nc, ifa->iface);
+ ifa->changed = !!memcmp(nic, ifa->cf, sizeof(struct bfd_iface_config));
+
+ /* This should be probably changed to not access ifa->cf from the BFD thread */
+ birdloop_enter(p->loop);
+ ifa->cf = nic;
+ birdloop_leave(p->loop);
+}
+
+
+/*
+ * BFD requests
+ */
+
+static void
+bfd_request_notify(struct bfd_request *req, u8 state, u8 diag)
+{
+ u8 old_state = req->state;
+
+ if (state == old_state)
+ return;
+
+ req->state = state;
+ req->diag = diag;
+ req->old_state = old_state;
+ req->down = (old_state == BFD_STATE_UP) && (state == BFD_STATE_DOWN);
+
+ if (req->hook)
+ req->hook(req);
+}
+
+static int
+bfd_add_request(struct bfd_proto *p, struct bfd_request *req)
+{
+ struct bfd_session *s = bfd_find_session_by_addr(p, req->addr);
+ u8 state, diag;
+
+ if (!s)
+ s = bfd_add_session(p, req->addr, req->local, req->iface);
+
+ rem_node(&req->n);
+ add_tail(&s->request_list, &req->n);
+ req->session = s;
+
+ bfd_lock_sessions(p);
+ state = s->loc_state;
+ diag = s->loc_diag;
+ bfd_unlock_sessions(p);
+
+ bfd_request_notify(req, state, diag);
+
+ return 1;
+}
+
+static void
+bfd_submit_request(struct bfd_request *req)
+{
+ node *n;
+
+ WALK_LIST(n, bfd_proto_list)
+ if (bfd_add_request(SKIP_BACK(struct bfd_proto, bfd_node, n), req))
+ return;
+
+ rem_node(&req->n);
+ add_tail(&bfd_wait_list, &req->n);
+ req->session = NULL;
+ bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0);
+}
+
+static void
+bfd_take_requests(struct bfd_proto *p)
+{
+ node *n, *nn;
+
+ WALK_LIST_DELSAFE(n, nn, bfd_wait_list)
+ bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n));
+}
+
+static void
+bfd_drop_requests(struct bfd_proto *p)
+{
+ node *n;
+
+ HASH_WALK(p->session_hash_id, next_id, s)
+ {
+ /* We assume that p is not in bfd_proto_list */
+ WALK_LIST_FIRST(n, s->request_list)
+ bfd_submit_request(SKIP_BACK(struct bfd_request, n, n));
+ }
+ HASH_WALK_END;
+}
+
+static struct resclass bfd_request_class;
+
+struct bfd_request *
+bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface,
+ void (*hook)(struct bfd_request *), void *data)
+{
+ struct bfd_request *req = ralloc(p, &bfd_request_class);
+
+ /* Hack: self-link req->n, we will call rem_node() on it */
+ req->n.prev = req->n.next = &req->n;
+
+ req->addr = addr;
+ req->local = local;
+ req->iface = iface;
+
+ bfd_submit_request(req);
+
+ req->hook = hook;
+ req->data = data;
+
+ return req;
+}
+
+static void
+bfd_request_free(resource *r)
+{
+ struct bfd_request *req = (struct bfd_request *) r;
+ struct bfd_session *s = req->session;
+
+ rem_node(&req->n);
+
+ /* Remove the session if there is no request for it. Skip that if
+ inside notify hooks, will be handled by bfd_notify_hook() itself */
+
+ if (s && EMPTY_LIST(s->request_list) && !s->notify_running)
+ bfd_remove_session(s->ifa->bfd, s);
+}
+
+static void
+bfd_request_dump(resource *r)
+{
+ struct bfd_request *req = (struct bfd_request *) r;
+
+ debug("(code %p, data %p)\n", req->hook, req->data);
+}
+
+static struct resclass bfd_request_class = {
+ "BFD request",
+ sizeof(struct bfd_request),
+ bfd_request_free,
+ bfd_request_dump,
+ NULL,
+ NULL
+};
+
+
+/*
+ * BFD neighbors
+ */
+
+static void
+bfd_neigh_notify(struct neighbor *nb)
+{
+ struct bfd_proto *p = (struct bfd_proto *) nb->proto;
+ struct bfd_neighbor *n = nb->data;
+
+ if (!n)
+ return;
+
+ if ((nb->scope > 0) && !n->req)
+ {
+ ip_addr local = ipa_nonzero(n->local) ? n->local : nb->iface->addr->ip;
+ n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, NULL, NULL);
+ }
+
+ if ((nb->scope <= 0) && n->req)
+ {
+ rfree(n->req);
+ n->req = NULL;
+ }
+}
+
+static void
+bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n)
+{
+ n->active = 1;
+
+ if (n->multihop)
+ {
+ n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, NULL, NULL);
+ return;
+ }
+
+ struct neighbor *nb = neigh_find2(&p->p, &n->addr, n->iface, NEF_STICKY);
+ if (!nb)
+ {
+ log(L_ERR "%s: Invalid remote address %I%J", p->p.name, n->addr, n->iface);
+ return;
+ }
+
+ if (nb->data)
+ {
+ log(L_ERR "%s: Duplicate neighbor %I", p->p.name, n->addr);
+ return;
+ }
+
+ n->neigh = nb;
+ nb->data = n;
+
+ if (nb->scope > 0)
+ bfd_neigh_notify(nb);
+ else
+ TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", n->addr, n->iface);
+}
+
+static void
+bfd_stop_neighbor(struct bfd_proto *p, struct bfd_neighbor *n)
+{
+ if (n->neigh)
+ n->neigh->data = NULL;
+ n->neigh = NULL;
+
+ rfree(n->req);
+ n->req = NULL;
+}
+
+static inline int
+bfd_same_neighbor(struct bfd_neighbor *x, struct bfd_neighbor *y)
+{
+ return ipa_equal(x->addr, y->addr) && ipa_equal(x->local, y->local) &&
+ (x->iface == y->iface) && (x->multihop == y->multihop);
+}
+
+static void
+bfd_reconfigure_neighbors(struct bfd_proto *p, struct bfd_config *new)
+{
+ struct bfd_config *old = (struct bfd_config *) (p->p.cf);
+ struct bfd_neighbor *on, *nn;
+
+ WALK_LIST(on, old->neigh_list)
+ {
+ WALK_LIST(nn, new->neigh_list)
+ if (bfd_same_neighbor(nn, on))
+ {
+ nn->neigh = on->neigh;
+ if (nn->neigh)
+ nn->neigh->data = nn;
+
+ nn->req = on->req;
+ nn->active = 1;
+ return;
+ }
+
+ bfd_stop_neighbor(p, on);
+ }
+
+ WALK_LIST(nn, new->neigh_list)
+ if (!nn->active)
+ bfd_start_neighbor(p, nn);
+}
+
+
+/*
+ * BFD notify socket
+ */
+
+/* This core notify code should be replaced after main loop transition to birdloop */
+
+int pipe(int pipefd[2]);
+void pipe_drain(int fd);
+void pipe_kick(int fd);
+
+static int
+bfd_notify_hook(sock *sk, int len)
+{
+ struct bfd_proto *p = sk->data;
+ struct bfd_session *s;
+ list tmp_list;
+ u8 state, diag;
+ node *n, *nn;
+
+ pipe_drain(sk->fd);
+
+ bfd_lock_sessions(p);
+ init_list(&tmp_list);
+ add_tail_list(&tmp_list, &p->notify_list);
+ init_list(&p->notify_list);
+ bfd_unlock_sessions(p);
+
+ WALK_LIST_FIRST(s, tmp_list)
+ {
+ bfd_lock_sessions(p);
+ rem2_node(&s->n);
+ state = s->loc_state;
+ diag = s->loc_diag;
+ bfd_unlock_sessions(p);
+
+ /* FIXME: convert to btime and move to bfd_session_update_state() */
+ s->last_state_change = now;
+
+ s->notify_running = 1;
+ WALK_LIST_DELSAFE(n, nn, s->request_list)
+ bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag);
+ s->notify_running = 0;
+
+ /* Remove the session if all requests were removed in notify hooks */
+ if (EMPTY_LIST(s->request_list))
+ bfd_remove_session(p, s);
+ }
+
+ return 0;
+}
+
+static inline void
+bfd_notify_kick(struct bfd_proto *p)
+{
+ pipe_kick(p->notify_ws->fd);
+}
+
+static void
+bfd_noterr_hook(sock *sk, int err)
+{
+ struct bfd_proto *p = sk->data;
+ log(L_ERR "%s: Notify socket error: %m", p->p.name, err);
+}
+
+static void
+bfd_notify_init(struct bfd_proto *p)
+{
+ int pfds[2];
+ sock *sk;
+
+ int rv = pipe(pfds);
+ if (rv < 0)
+ die("pipe: %m");
+
+ sk = sk_new(p->p.pool);
+ sk->type = SK_MAGIC;
+ sk->rx_hook = bfd_notify_hook;
+ sk->err_hook = bfd_noterr_hook;
+ sk->fd = pfds[0];
+ sk->data = p;
+ if (sk_open(sk) < 0)
+ die("bfd: sk_open failed");
+ p->notify_rs = sk;
+
+ /* The write sock is not added to any event loop */
+ sk = sk_new(p->p.pool);
+ sk->type = SK_MAGIC;
+ sk->fd = pfds[1];
+ sk->data = p;
+ sk->flags = SKF_THREAD;
+ if (sk_open(sk) < 0)
+ die("bfd: sk_open failed");
+ p->notify_ws = sk;
+}
+
+
+/*
+ * BFD protocol glue
+ */
+
+void
+bfd_init_all(void)
+{
+ init_list(&bfd_proto_list);
+ init_list(&bfd_wait_list);
+}
+
+static struct proto *
+bfd_init(struct proto_config *c)
+{
+ struct proto *p = proto_new(c, sizeof(struct bfd_proto));
+
+ p->neigh_notify = bfd_neigh_notify;
+
+ return p;
+}
+
+static int
+bfd_start(struct proto *P)
+{
+ struct bfd_proto *p = (struct bfd_proto *) P;
+ struct bfd_config *cf = (struct bfd_config *) (P->cf);
+
+ p->loop = birdloop_new();
+ p->tpool = rp_new(NULL, "BFD thread root");
+ pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE);
+
+ p->session_slab = sl_new(P->pool, sizeof(struct bfd_session));
+ HASH_INIT(p->session_hash_id, P->pool, 8);
+ HASH_INIT(p->session_hash_ip, P->pool, 8);
+
+ init_list(&p->iface_list);
+
+ init_list(&p->notify_list);
+ bfd_notify_init(p);
+
+ add_tail(&bfd_proto_list, &p->bfd_node);
+
+ birdloop_enter(p->loop);
+ p->rx_1 = bfd_open_rx_sk(p, 0);
+ p->rx_m = bfd_open_rx_sk(p, 1);
+ birdloop_leave(p->loop);
+
+ bfd_take_requests(p);
+
+ struct bfd_neighbor *n;
+ WALK_LIST(n, cf->neigh_list)
+ bfd_start_neighbor(p, n);
+
+ birdloop_start(p->loop);
+
+ return PS_UP;
+}
+
+
+static int
+bfd_shutdown(struct proto *P)
+{
+ struct bfd_proto *p = (struct bfd_proto *) P;
+ struct bfd_config *cf = (struct bfd_config *) (P->cf);
+
+ rem_node(&p->bfd_node);
+
+ birdloop_stop(p->loop);
+
+ struct bfd_neighbor *n;
+ WALK_LIST(n, cf->neigh_list)
+ bfd_stop_neighbor(p, n);
+
+ bfd_drop_requests(p);
+
+ /* FIXME: This is hack */
+ birdloop_enter(p->loop);
+ rfree(p->tpool);
+ birdloop_leave(p->loop);
+
+ birdloop_free(p->loop);
+
+ return PS_DOWN;
+}
+
+static int
+bfd_reconfigure(struct proto *P, struct proto_config *c)
+{
+ struct bfd_proto *p = (struct bfd_proto *) P;
+ // struct bfd_config *old = (struct bfd_config *) (P->cf);
+ struct bfd_config *new = (struct bfd_config *) c;
+ struct bfd_iface *ifa;
+
+ birdloop_mask_wakeups(p->loop);
+
+ WALK_LIST(ifa, p->iface_list)
+ bfd_reconfigure_iface(p, ifa, new);
+
+ HASH_WALK(p->session_hash_id, next_id, s)
+ {
+ if (s->ifa->changed)
+ bfd_reconfigure_session(p, s);
+ }
+ HASH_WALK_END;
+
+ bfd_reconfigure_neighbors(p, new);
+
+ birdloop_unmask_wakeups(p->loop);
+
+ return 1;
+}
+
+/* Ensure one instance */
+struct bfd_config *bfd_cf;
+
+static void
+bfd_preconfig(struct protocol *P UNUSED, struct config *c UNUSED)
+{
+ bfd_cf = NULL;
+}
+
+static void
+bfd_copy_config(struct proto_config *dest, struct proto_config *src)
+{
+ struct bfd_config *d = (struct bfd_config *) dest;
+ // struct bfd_config *s = (struct bfd_config *) src;
+
+ /* We clean up patt_list and neigh_list, neighbors and ifaces are non-sharable */
+ init_list(&d->patt_list);
+ init_list(&d->neigh_list);
+}
+
+void
+bfd_show_sessions(struct proto *P)
+{
+ byte tbuf[TM_DATETIME_BUFFER_SIZE];
+ struct bfd_proto *p = (struct bfd_proto *) P;
+ uint state, diag;
+ u32 tx_int, timeout;
+ const char *ifname;
+
+ if (p->p.proto_state != PS_UP)
+ {
+ cli_msg(-1013, "%s: is not up", p->p.name);
+ cli_msg(0, "");
+ return;
+ }
+
+ cli_msg(-1013, "%s:", p->p.name);
+ cli_msg(-1013, "%-25s %-10s %-10s %-10s %8s %8s",
+ "IP address", "Interface", "State", "Since", "Interval", "Timeout");
+
+
+ HASH_WALK(p->session_hash_id, next_id, s)
+ {
+ /* FIXME: this is thread-unsafe, but perhaps harmless */
+ state = s->loc_state;
+ diag = s->loc_diag;
+ ifname = (s->ifa && s->ifa->sk->iface) ? s->ifa->sk->iface->name : "---";
+ tx_int = s->last_tx ? (MAX(s->des_min_tx_int, s->rem_min_rx_int) TO_MS) : 0;
+ timeout = (MAX(s->req_min_rx_int, s->rem_min_tx_int) TO_MS) * s->rem_detect_mult;
+
+ state = (state < 4) ? state : 0;
+ tm_format_datetime(tbuf, &config->tf_proto, s->last_state_change);
+
+ cli_msg(-1013, "%-25I %-10s %-10s %-10s %3u.%03u %3u.%03u",
+ s->addr, ifname, bfd_state_names[state], tbuf,
+ tx_int / 1000, tx_int % 1000, timeout / 1000, timeout % 1000);
+ }
+ HASH_WALK_END;
+
+ cli_msg(0, "");
+}
+
+
+struct protocol proto_bfd = {
+ .name = "BFD",
+ .template = "bfd%d",
+ .init = bfd_init,
+ .start = bfd_start,
+ .shutdown = bfd_shutdown,
+ .reconfigure = bfd_reconfigure,
+ .preconfig = bfd_preconfig,
+ .copy_config = bfd_copy_config,
+};
diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h
new file mode 100644
index 00000000..9b61be64
--- /dev/null
+++ b/proto/bfd/bfd.h
@@ -0,0 +1,191 @@
+/*
+ * BIRD -- Bidirectional Forwarding Detection (BFD)
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_BFD_H_
+#define _BIRD_BFD_H_
+
+#include <pthread.h>
+
+#include "nest/bird.h"
+#include "nest/cli.h"
+#include "nest/iface.h"
+#include "nest/protocol.h"
+#include "nest/route.h"
+#include "conf/conf.h"
+#include "lib/hash.h"
+#include "lib/resource.h"
+#include "lib/socket.h"
+#include "lib/string.h"
+
+#include "nest/bfd.h"
+#include "io.h"
+
+
+#define BFD_CONTROL_PORT 3784
+#define BFD_ECHO_PORT 3785
+#define BFD_MULTI_CTL_PORT 4784
+
+#define BFD_DEFAULT_MIN_RX_INT (10 MS_)
+#define BFD_DEFAULT_MIN_TX_INT (100 MS_)
+#define BFD_DEFAULT_IDLE_TX_INT (1 S_)
+#define BFD_DEFAULT_MULTIPLIER 5
+
+
+struct bfd_iface_config;
+
+struct bfd_config
+{
+ struct proto_config c;
+ list patt_list; /* List of iface configs (struct bfd_iface_config) */
+ list neigh_list; /* List of configured neighbors (struct bfd_neighbor) */
+ struct bfd_iface_config *multihop; /* Multihop pseudoiface config */
+};
+
+struct bfd_iface_config
+{
+ struct iface_patt i;
+ u32 min_rx_int;
+ u32 min_tx_int;
+ u32 idle_tx_int;
+ u8 multiplier;
+ u8 passive;
+};
+
+struct bfd_neighbor
+{
+ node n;
+ ip_addr addr;
+ ip_addr local;
+ struct iface *iface;
+
+ struct neighbor *neigh;
+ struct bfd_request *req;
+
+ u8 multihop;
+ u8 active;
+};
+
+struct bfd_proto
+{
+ struct proto p;
+ struct birdloop *loop;
+ pool *tpool;
+ pthread_spinlock_t lock;
+ node bfd_node;
+
+ slab *session_slab;
+ HASH(struct bfd_session) session_hash_id;
+ HASH(struct bfd_session) session_hash_ip;
+
+ sock *notify_rs;
+ sock *notify_ws;
+ list notify_list;
+
+ sock *rx_1;
+ sock *rx_m;
+ list iface_list;
+};
+
+struct bfd_iface
+{
+ node n;
+ ip_addr local;
+ struct iface *iface;
+ struct bfd_iface_config *cf;
+ struct bfd_proto *bfd;
+
+ sock *sk;
+ u32 uc;
+ u8 changed;
+};
+
+struct bfd_session
+{
+ node n;
+ ip_addr addr; /* Address of session */
+ struct bfd_iface *ifa; /* Iface associated with session */
+ struct bfd_session *next_id; /* Next in bfd.session_hash_id */
+ struct bfd_session *next_ip; /* Next in bfd.session_hash_ip */
+
+ u8 opened_unused;
+ u8 passive;
+ u8 poll_active;
+ u8 poll_scheduled;
+
+ u8 loc_state;
+ u8 rem_state;
+ u8 loc_diag;
+ u8 rem_diag;
+ u32 loc_id; /* Local session ID (local discriminator) */
+ u32 rem_id; /* Remote session ID (remote discriminator) */
+ u32 des_min_tx_int; /* Desired min rx interval, local option */
+ u32 des_min_tx_new; /* Used for des_min_tx_int change */
+ u32 req_min_rx_int; /* Required min tx interval, local option */
+ u32 req_min_rx_new; /* Used for req_min_rx_int change */
+ u32 rem_min_tx_int; /* Last received des_min_tx_int */
+ u32 rem_min_rx_int; /* Last received req_min_rx_int */
+ u8 demand_mode; /* Currently unused */
+ u8 rem_demand_mode;
+ u8 detect_mult; /* Announced detect_mult, local option */
+ u8 rem_detect_mult; /* Last received detect_mult */
+
+ btime last_tx; /* Time of last sent periodic control packet */
+ btime last_rx; /* Time of last received valid control packet */
+
+ timer2 *tx_timer; /* Periodic control packet timer */
+ timer2 *hold_timer; /* Timer for session down detection time */
+
+ list request_list; /* List of client requests (struct bfd_request) */
+ bird_clock_t last_state_change; /* Time of last state change */
+ u8 notify_running; /* 1 if notify hooks are running */
+};
+
+
+extern const char *bfd_state_names[];
+
+#define BFD_STATE_ADMIN_DOWN 0
+#define BFD_STATE_DOWN 1
+#define BFD_STATE_INIT 2
+#define BFD_STATE_UP 3
+
+#define BFD_DIAG_NOTHING 0
+#define BFD_DIAG_TIMEOUT 1
+#define BFD_DIAG_ECHO_FAILED 2
+#define BFD_DIAG_NEIGHBOR_DOWN 3
+#define BFD_DIAG_FWD_RESET 4
+#define BFD_DIAG_PATH_DOWN 5
+#define BFD_DIAG_C_PATH_DOWN 6
+#define BFD_DIAG_ADMIN_DOWN 7
+#define BFD_DIAG_RC_PATH_DOWN 8
+
+#define BFD_POLL_TX 1
+#define BFD_POLL_RX 2
+
+#define BFD_FLAGS 0x3f
+#define BFD_FLAG_POLL (1 << 5)
+#define BFD_FLAG_FINAL (1 << 4)
+#define BFD_FLAG_CPI (1 << 3)
+#define BFD_FLAG_AP (1 << 2)
+#define BFD_FLAG_DEMAND (1 << 1)
+#define BFD_FLAG_MULTIPOINT (1 << 0)
+
+
+static inline void bfd_lock_sessions(struct bfd_proto *p) { pthread_spin_lock(&p->lock); }
+static inline void bfd_unlock_sessions(struct bfd_proto *p) { pthread_spin_unlock(&p->lock); }
+
+/* bfd.c */
+struct bfd_session * bfd_find_session_by_id(struct bfd_proto *p, u32 id);
+struct bfd_session * bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr);
+void bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old_rx_int);
+void bfd_show_sessions(struct proto *P);
+
+/* packets.c */
+void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final);
+sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop);
+sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa);
+
+
+#endif /* _BIRD_BFD_H_ */
diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y
new file mode 100644
index 00000000..1bf8764f
--- /dev/null
+++ b/proto/bfd/config.Y
@@ -0,0 +1,138 @@
+/*
+ * BIRD -- Router Advertisement Configuration
+ *
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+CF_HDR
+
+#include "proto/bfd/bfd.h"
+
+CF_DEFINES
+
+#define BFD_CFG ((struct bfd_config *) this_proto)
+#define BFD_IFACE ((struct bfd_iface_config *) this_ipatt)
+#define BFD_NEIGHBOR this_bfd_neighbor
+
+static struct bfd_neighbor *this_bfd_neighbor;
+
+extern struct bfd_config *bfd_cf;
+
+CF_DECLS
+
+CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE,
+ INTERFACE, MULTIHOP, NEIGHBOR, DEV, LOCAL)
+
+%type <iface> bfd_neigh_iface
+%type <a> bfd_neigh_local
+%type <i> bfd_neigh_multihop
+
+CF_GRAMMAR
+
+CF_ADDTO(proto, bfd_proto)
+
+bfd_proto_start: proto_start BFD
+{
+ this_proto = proto_config_new(&proto_bfd, sizeof(struct bfd_config), $1);
+ init_list(&BFD_CFG->patt_list);
+ init_list(&BFD_CFG->neigh_list);
+
+ if (bfd_cf)
+ cf_error("Only one BFD instance allowed");
+ bfd_cf = BFD_CFG;
+};
+
+bfd_proto_item:
+ proto_item
+ | INTERFACE bfd_iface
+ | MULTIHOP bfd_multihop
+ | NEIGHBOR bfd_neighbor
+ ;
+
+bfd_proto_opts:
+ /* empty */
+ | bfd_proto_opts bfd_proto_item ';'
+ ;
+
+bfd_proto:
+ bfd_proto_start proto_name '{' bfd_proto_opts '}';
+
+
+bfd_iface_start:
+{
+ this_ipatt = cfg_allocz(sizeof(struct bfd_iface_config));
+ init_list(&this_ipatt->ipn_list);
+
+ BFD_IFACE->min_rx_int = BFD_DEFAULT_MIN_RX_INT;
+ BFD_IFACE->min_tx_int = BFD_DEFAULT_MIN_TX_INT;
+ BFD_IFACE->idle_tx_int = BFD_DEFAULT_IDLE_TX_INT;
+ BFD_IFACE->multiplier = BFD_DEFAULT_MULTIPLIER;
+};
+
+bfd_iface_item:
+ INTERVAL expr_us { BFD_IFACE->min_rx_int = BFD_IFACE->min_tx_int = $2; }
+ | MIN RX INTERVAL expr_us { BFD_IFACE->min_rx_int = $4; }
+ | MIN TX INTERVAL expr_us { BFD_IFACE->min_tx_int = $4; }
+ | IDLE TX INTERVAL expr_us { BFD_IFACE->idle_tx_int = $4; }
+ | MULTIPLIER expr { BFD_IFACE->multiplier = $2; }
+ | PASSIVE bool { BFD_IFACE->passive = $2; }
+ ;
+
+bfd_iface_opts:
+ /* empty */
+ | bfd_iface_opts bfd_iface_item ';'
+ ;
+
+bfd_iface_opt_list:
+ /* empty */
+ | '{' bfd_iface_opts '}'
+ ;
+
+bfd_iface: bfd_iface_start iface_patt_list bfd_iface_opt_list
+{ add_tail(&BFD_CFG->patt_list, NODE this_ipatt); };
+
+bfd_multihop: bfd_iface_start bfd_iface_opt_list
+{ BFD_CFG->multihop = BFD_IFACE; };
+
+
+bfd_neigh_iface:
+ /* empty */ { $$ = NULL; }
+ | '%' SYM { $$ = if_get_by_name($2->name); }
+ | DEV TEXT { $$ = if_get_by_name($2); }
+ ;
+
+bfd_neigh_local:
+ /* empty */ { $$ = IPA_NONE; }
+ | LOCAL ipa { $$ = $2; }
+ ;
+
+bfd_neigh_multihop:
+ /* empty */ { $$ = 0; }
+ | MULTIHOP bool { $$ = $2; }
+ ;
+
+bfd_neighbor: ipa bfd_neigh_iface bfd_neigh_local bfd_neigh_multihop
+{
+ this_bfd_neighbor = cfg_allocz(sizeof(struct bfd_neighbor));
+ add_tail(&BFD_CFG->neigh_list, NODE this_bfd_neighbor);
+
+ BFD_NEIGHBOR->addr = $1;
+ BFD_NEIGHBOR->local = $3;
+ BFD_NEIGHBOR->iface = $2;
+ BFD_NEIGHBOR->multihop = $4;
+
+ if ($4 && $2)
+ cf_error("Neighbor cannot set both interface and multihop");
+
+ if ($4 && ipa_zero($3))
+ cf_error("Multihop neighbor requires specified local address");
+};
+
+
+CF_CLI(SHOW BFD SESSIONS, optsym, [<name>], [[Show information about BFD sessions]])
+{ bfd_show_sessions(proto_get_named($4, &proto_bfd)); };
+
+CF_CODE
+
+CF_END
diff --git a/proto/bfd/io.c b/proto/bfd/io.c
new file mode 100644
index 00000000..fb150040
--- /dev/null
+++ b/proto/bfd/io.c
@@ -0,0 +1,768 @@
+/*
+ * BIRD -- I/O and event loop
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+#include <time.h>
+#include <sys/time.h>
+
+#include "nest/bird.h"
+#include "proto/bfd/io.h"
+
+#include "lib/buffer.h"
+#include "lib/heap.h"
+#include "lib/lists.h"
+#include "lib/resource.h"
+#include "lib/event.h"
+#include "lib/socket.h"
+
+
+struct birdloop
+{
+ pool *pool;
+ pthread_t thread;
+ pthread_mutex_t mutex;
+
+ btime last_time;
+ btime real_time;
+ u8 use_monotonic_clock;
+
+ u8 stop_called;
+ u8 poll_active;
+ u8 wakeup_masked;
+ int wakeup_fds[2];
+
+ BUFFER(timer2 *) timers;
+ list event_list;
+ list sock_list;
+ uint sock_num;
+
+ BUFFER(sock *) poll_sk;
+ BUFFER(struct pollfd) poll_fd;
+ u8 poll_changed;
+ u8 close_scheduled;
+};
+
+
+/*
+ * Current thread context
+ */
+
+static pthread_key_t current_loop_key;
+
+static inline struct birdloop *
+birdloop_current(void)
+{
+ return pthread_getspecific(current_loop_key);
+}
+
+static inline void
+birdloop_set_current(struct birdloop *loop)
+{
+ pthread_setspecific(current_loop_key, loop);
+}
+
+static inline void
+birdloop_init_current(void)
+{
+ pthread_key_create(&current_loop_key, NULL);
+}
+
+
+/*
+ * Time clock
+ */
+
+static void times_update_alt(struct birdloop *loop);
+
+static void
+times_init(struct birdloop *loop)
+{
+ struct timespec ts;
+ int rv;
+
+ rv = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rv < 0)
+ {
+ log(L_WARN "Monotonic clock is missing");
+
+ loop->use_monotonic_clock = 0;
+ loop->last_time = 0;
+ loop->real_time = 0;
+ times_update_alt(loop);
+ return;
+ }
+
+ if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40)))
+ log(L_WARN "Monotonic clock is crazy");
+
+ loop->use_monotonic_clock = 1;
+ loop->last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
+ loop->real_time = 0;
+}
+
+static void
+times_update_pri(struct birdloop *loop)
+{
+ struct timespec ts;
+ int rv;
+
+ rv = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rv < 0)
+ die("clock_gettime: %m");
+
+ btime new_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
+
+ if (new_time < loop->last_time)
+ log(L_ERR "Monotonic clock is broken");
+
+ loop->last_time = new_time;
+ loop->real_time = 0;
+}
+
+static void
+times_update_alt(struct birdloop *loop)
+{
+ struct timeval tv;
+ int rv;
+
+ rv = gettimeofday(&tv, NULL);
+ if (rv < 0)
+ die("gettimeofday: %m");
+
+ btime new_time = ((s64) tv.tv_sec S) + tv.tv_usec;
+ btime delta = new_time - loop->real_time;
+
+ if ((delta < 0) || (delta > (60 S)))
+ {
+ if (loop->real_time)
+ log(L_WARN "Time jump, delta %d us", (int) delta);
+
+ delta = 100 MS;
+ }
+
+ loop->last_time += delta;
+ loop->real_time = new_time;
+}
+
+static void
+times_update(struct birdloop *loop)
+{
+ if (loop->use_monotonic_clock)
+ times_update_pri(loop);
+ else
+ times_update_alt(loop);
+}
+
+btime
+current_time(void)
+{
+ return birdloop_current()->last_time;
+}
+
+
+/*
+ * Wakeup code for birdloop
+ */
+
+static void
+pipe_new(int *pfds)
+{
+ int rv = pipe(pfds);
+ if (rv < 0)
+ die("pipe: %m");
+
+ if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0)
+ die("fcntl(O_NONBLOCK): %m");
+
+ if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0)
+ die("fcntl(O_NONBLOCK): %m");
+}
+
+void
+pipe_drain(int fd)
+{
+ char buf[64];
+ int rv;
+
+ try:
+ rv = read(fd, buf, 64);
+ if (rv < 0)
+ {
+ if (errno == EINTR)
+ goto try;
+ if (errno == EAGAIN)
+ return;
+ die("wakeup read: %m");
+ }
+ if (rv == 64)
+ goto try;
+}
+
+void
+pipe_kick(int fd)
+{
+ u64 v = 1;
+ int rv;
+
+ try:
+ rv = write(fd, &v, sizeof(u64));
+ if (rv < 0)
+ {
+ if (errno == EINTR)
+ goto try;
+ if (errno == EAGAIN)
+ return;
+ die("wakeup write: %m");
+ }
+}
+
+static inline void
+wakeup_init(struct birdloop *loop)
+{
+ pipe_new(loop->wakeup_fds);
+}
+
+static inline void
+wakeup_drain(struct birdloop *loop)
+{
+ pipe_drain(loop->wakeup_fds[0]);
+}
+
+static inline void
+wakeup_do_kick(struct birdloop *loop)
+{
+ pipe_kick(loop->wakeup_fds[1]);
+}
+
+static inline void
+wakeup_kick(struct birdloop *loop)
+{
+ if (!loop->wakeup_masked)
+ wakeup_do_kick(loop);
+ else
+ loop->wakeup_masked = 2;
+}
+
+
+/*
+ * Events
+ */
+
+static inline uint
+events_waiting(struct birdloop *loop)
+{
+ return !EMPTY_LIST(loop->event_list);
+}
+
+static inline void
+events_init(struct birdloop *loop)
+{
+ init_list(&loop->event_list);
+}
+
+static void
+events_fire(struct birdloop *loop)
+{
+ times_update(loop);
+ ev_run_list(&loop->event_list);
+}
+
+void
+ev2_schedule(event *e)
+{
+ struct birdloop *loop = birdloop_current();
+
+ if (loop->poll_active && EMPTY_LIST(loop->event_list))
+ wakeup_kick(loop);
+
+ if (e->n.next)
+ rem_node(&e->n);
+
+ add_tail(&loop->event_list, &e->n);
+}
+
+
+/*
+ * Timers
+ */
+
+#define TIMER_LESS(a,b) ((a)->expires < (b)->expires)
+#define TIMER_SWAP(heap,a,b,t) (t = heap[a], heap[a] = heap[b], heap[b] = t, \
+ heap[a]->index = (a), heap[b]->index = (b))
+
+static inline uint timers_count(struct birdloop *loop)
+{ return loop->timers.used - 1; }
+
+static inline timer2 *timers_first(struct birdloop *loop)
+{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; }
+
+
+static void
+tm2_free(resource *r)
+{
+ timer2 *t = (timer2 *) r;
+
+ tm2_stop(t);
+}
+
+static void
+tm2_dump(resource *r)
+{
+ timer2 *t = (timer2 *) r;
+
+ debug("(code %p, data %p, ", t->hook, t->data);
+ if (t->randomize)
+ debug("rand %d, ", t->randomize);
+ if (t->recurrent)
+ debug("recur %d, ", t->recurrent);
+ if (t->expires)
+ debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS);
+ else
+ debug("inactive)\n");
+}
+
+
+static struct resclass tm2_class = {
+ "Timer",
+ sizeof(timer2),
+ tm2_free,
+ tm2_dump,
+ NULL,
+ NULL
+};
+
+timer2 *
+tm2_new(pool *p)
+{
+ timer2 *t = ralloc(p, &tm2_class);
+ t->index = -1;
+ return t;
+}
+
+void
+tm2_set(timer2 *t, btime when)
+{
+ struct birdloop *loop = birdloop_current();
+ uint tc = timers_count(loop);
+
+ if (!t->expires)
+ {
+ t->index = ++tc;
+ t->expires = when;
+ BUFFER_PUSH(loop->timers) = t;
+ HEAP_INSERT(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP);
+ }
+ else if (t->expires < when)
+ {
+ t->expires = when;
+ HEAP_INCREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
+ }
+ else if (t->expires > when)
+ {
+ t->expires = when;
+ HEAP_DECREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
+ }
+
+ if (loop->poll_active && (t->index == 1))
+ wakeup_kick(loop);
+}
+
+void
+tm2_start(timer2 *t, btime after)
+{
+ tm2_set(t, current_time() + MAX(after, 0));
+}
+
+void
+tm2_stop(timer2 *t)
+{
+ if (!t->expires)
+ return;
+
+ struct birdloop *loop = birdloop_current();
+ uint tc = timers_count(loop);
+
+ HEAP_DELETE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
+ BUFFER_POP(loop->timers);
+
+ t->index = -1;
+ t->expires = 0;
+}
+
+static void
+timers_init(struct birdloop *loop)
+{
+ BUFFER_INIT(loop->timers, loop->pool, 4);
+ BUFFER_PUSH(loop->timers) = NULL;
+}
+
+static void
+timers_fire(struct birdloop *loop)
+{
+ btime base_time;
+ timer2 *t;
+
+ times_update(loop);
+ base_time = loop->last_time;
+
+ while (t = timers_first(loop))
+ {
+ if (t->expires > base_time)
+ return;
+
+ if (t->recurrent)
+ {
+ btime when = t->expires + t->recurrent;
+
+ if (when <= loop->last_time)
+ when = loop->last_time + t->recurrent;
+
+ if (t->randomize)
+ when += random() % (t->randomize + 1);
+
+ tm2_set(t, when);
+ }
+ else
+ tm2_stop(t);
+
+ t->hook(t);
+ }
+}
+
+
+/*
+ * Sockets
+ */
+
+static void
+sockets_init(struct birdloop *loop)
+{
+ init_list(&loop->sock_list);
+ loop->sock_num = 0;
+
+ BUFFER_INIT(loop->poll_sk, loop->pool, 4);
+ BUFFER_INIT(loop->poll_fd, loop->pool, 4);
+ loop->poll_changed = 1; /* add wakeup fd */
+}
+
+static void
+sockets_add(struct birdloop *loop, sock *s)
+{
+ add_tail(&loop->sock_list, &s->n);
+ loop->sock_num++;
+
+ s->index = -1;
+ loop->poll_changed = 1;
+
+ if (loop->poll_active)
+ wakeup_kick(loop);
+}
+
+void
+sk_start(sock *s)
+{
+ struct birdloop *loop = birdloop_current();
+
+ sockets_add(loop, s);
+}
+
+static void
+sockets_remove(struct birdloop *loop, sock *s)
+{
+ rem_node(&s->n);
+ loop->sock_num--;
+
+ if (s->index >= 0)
+ loop->poll_sk.data[s->index] = NULL;
+
+ s->index = -1;
+ loop->poll_changed = 1;
+
+ /* Wakeup moved to sk_stop() */
+}
+
+void
+sk_stop(sock *s)
+{
+ struct birdloop *loop = birdloop_current();
+
+ sockets_remove(loop, s);
+
+ if (loop->poll_active)
+ {
+ loop->close_scheduled = 1;
+ wakeup_kick(loop);
+ }
+ else
+ close(s->fd);
+
+ s->fd = -1;
+}
+
+static inline uint sk_want_events(sock *s)
+{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); }
+
+/*
+FIXME: this should be called from sock code
+
+static void
+sockets_update(struct birdloop *loop, sock *s)
+{
+ if (s->index >= 0)
+ loop->poll_fd.data[s->index].events = sk_want_events(s);
+}
+*/
+
+static void
+sockets_prepare(struct birdloop *loop)
+{
+ BUFFER_SET(loop->poll_sk, loop->sock_num + 1);
+ BUFFER_SET(loop->poll_fd, loop->sock_num + 1);
+
+ struct pollfd *pfd = loop->poll_fd.data;
+ sock **psk = loop->poll_sk.data;
+ int i = 0;
+ node *n;
+
+ WALK_LIST(n, loop->sock_list)
+ {
+ sock *s = SKIP_BACK(sock, n, n);
+
+ ASSERT(i < loop->sock_num);
+
+ s->index = i;
+ *psk = s;
+ pfd->fd = s->fd;
+ pfd->events = sk_want_events(s);
+ pfd->revents = 0;
+
+ pfd++;
+ psk++;
+ i++;
+ }
+
+ ASSERT(i == loop->sock_num);
+
+ /* Add internal wakeup fd */
+ *psk = NULL;
+ pfd->fd = loop->wakeup_fds[0];
+ pfd->events = POLLIN;
+ pfd->revents = 0;
+
+ loop->poll_changed = 0;
+}
+
+static void
+sockets_close_fds(struct birdloop *loop)
+{
+ struct pollfd *pfd = loop->poll_fd.data;
+ sock **psk = loop->poll_sk.data;
+ int poll_num = loop->poll_fd.used - 1;
+
+ int i;
+ for (i = 0; i < poll_num; i++)
+ if (psk[i] == NULL)
+ close(pfd[i].fd);
+
+ loop->close_scheduled = 0;
+}
+
+int sk_read(sock *s);
+int sk_write(sock *s);
+
+static void
+sockets_fire(struct birdloop *loop)
+{
+ struct pollfd *pfd = loop->poll_fd.data;
+ sock **psk = loop->poll_sk.data;
+ int poll_num = loop->poll_fd.used - 1;
+
+ times_update(loop);
+
+ /* Last fd is internal wakeup fd */
+ if (pfd[loop->sock_num].revents & POLLIN)
+ wakeup_drain(loop);
+
+ int i;
+ for (i = 0; i < poll_num; pfd++, psk++, i++)
+ {
+ int e = 1;
+
+ if (! pfd->revents)
+ continue;
+
+ if (pfd->revents & POLLNVAL)
+ die("poll: invalid fd %d", pfd->fd);
+
+ if (pfd->revents & POLLIN)
+ while (e && *psk && (*psk)->rx_hook)
+ e = sk_read(*psk);
+
+ e = 1;
+ if (pfd->revents & POLLOUT)
+ while (e && *psk)
+ e = sk_write(*psk);
+ }
+}
+
+
+/*
+ * Birdloop
+ */
+
+static void * birdloop_main(void *arg);
+
+struct birdloop *
+birdloop_new(void)
+{
+ /* FIXME: this init should be elsewhere and thread-safe */
+ static int init = 0;
+ if (!init)
+ { birdloop_init_current(); init = 1; }
+
+ pool *p = rp_new(NULL, "Birdloop root");
+ struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop));
+ loop->pool = p;
+ pthread_mutex_init(&loop->mutex, NULL);
+
+ times_init(loop);
+ wakeup_init(loop);
+
+ events_init(loop);
+ timers_init(loop);
+ sockets_init(loop);
+
+ return loop;
+}
+
+void
+birdloop_start(struct birdloop *loop)
+{
+ int rv = pthread_create(&loop->thread, NULL, birdloop_main, loop);
+ if (rv)
+ die("pthread_create(): %M", rv);
+}
+
+void
+birdloop_stop(struct birdloop *loop)
+{
+ pthread_mutex_lock(&loop->mutex);
+ loop->stop_called = 1;
+ wakeup_do_kick(loop);
+ pthread_mutex_unlock(&loop->mutex);
+
+ int rv = pthread_join(loop->thread, NULL);
+ if (rv)
+ die("pthread_join(): %M", rv);
+}
+
+void
+birdloop_free(struct birdloop *loop)
+{
+ rfree(loop->pool);
+}
+
+
+void
+birdloop_enter(struct birdloop *loop)
+{
+ /* TODO: these functions could save and restore old context */
+ pthread_mutex_lock(&loop->mutex);
+ birdloop_set_current(loop);
+}
+
+void
+birdloop_leave(struct birdloop *loop)
+{
+ /* TODO: these functions could save and restore old context */
+ birdloop_set_current(NULL);
+ pthread_mutex_unlock(&loop->mutex);
+}
+
+void
+birdloop_mask_wakeups(struct birdloop *loop)
+{
+ pthread_mutex_lock(&loop->mutex);
+ loop->wakeup_masked = 1;
+ pthread_mutex_unlock(&loop->mutex);
+}
+
+void
+birdloop_unmask_wakeups(struct birdloop *loop)
+{
+ pthread_mutex_lock(&loop->mutex);
+ if (loop->wakeup_masked == 2)
+ wakeup_do_kick(loop);
+ loop->wakeup_masked = 0;
+ pthread_mutex_unlock(&loop->mutex);
+}
+
+static void *
+birdloop_main(void *arg)
+{
+ struct birdloop *loop = arg;
+ timer2 *t;
+ int rv, timeout;
+
+ birdloop_set_current(loop);
+
+ pthread_mutex_lock(&loop->mutex);
+ while (1)
+ {
+ events_fire(loop);
+ timers_fire(loop);
+
+ times_update(loop);
+ if (events_waiting(loop))
+ timeout = 0;
+ else if (t = timers_first(loop))
+ timeout = (tm2_remains(t) TO_MS) + 1;
+ else
+ timeout = -1;
+
+ if (loop->poll_changed)
+ sockets_prepare(loop);
+
+ loop->poll_active = 1;
+ pthread_mutex_unlock(&loop->mutex);
+
+ try:
+ rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout);
+ if (rv < 0)
+ {
+ if (errno == EINTR || errno == EAGAIN)
+ goto try;
+ die("poll: %m");
+ }
+
+ pthread_mutex_lock(&loop->mutex);
+ loop->poll_active = 0;
+
+ if (loop->close_scheduled)
+ sockets_close_fds(loop);
+
+ if (loop->stop_called)
+ break;
+
+ if (rv)
+ sockets_fire(loop);
+
+ timers_fire(loop);
+ }
+
+ loop->stop_called = 0;
+ pthread_mutex_unlock(&loop->mutex);
+
+ return NULL;
+}
+
+
diff --git a/proto/bfd/io.h b/proto/bfd/io.h
new file mode 100644
index 00000000..641ee054
--- /dev/null
+++ b/proto/bfd/io.h
@@ -0,0 +1,99 @@
+/*
+ * BIRD -- I/O and event loop
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_BFD_IO_H_
+#define _BIRD_BFD_IO_H_
+
+#include "nest/bird.h"
+#include "lib/lists.h"
+#include "lib/resource.h"
+#include "lib/event.h"
+#include "lib/socket.h"
+// #include "lib/timer.h"
+
+
+typedef struct timer2
+{
+ resource r;
+ void (*hook)(struct timer2 *);
+ void *data;
+
+ btime expires; /* 0=inactive */
+ uint randomize; /* Amount of randomization */
+ uint recurrent; /* Timer recurrence */
+
+ int index;
+} timer2;
+
+
+btime current_time(void);
+
+void ev2_schedule(event *e);
+
+
+timer2 *tm2_new(pool *p);
+void tm2_set(timer2 *t, btime when);
+void tm2_start(timer2 *t, btime after);
+void tm2_stop(timer2 *t);
+
+static inline int
+tm2_active(timer2 *t)
+{
+ return t->expires != 0;
+}
+
+static inline btime
+tm2_remains(timer2 *t)
+{
+ btime now = current_time();
+ return (t->expires > now) ? (t->expires - now) : 0;
+}
+
+static inline timer2 *
+tm2_new_init(pool *p, void (*hook)(struct timer2 *), void *data, uint rec, uint rand)
+{
+ timer2 *t = tm2_new(p);
+ t->hook = hook;
+ t->data = data;
+ t->recurrent = rec;
+ t->randomize = rand;
+ return t;
+}
+
+static inline void
+tm2_set_max(timer2 *t, btime when)
+{
+ if (when > t->expires)
+ tm2_set(t, when);
+}
+
+/*
+static inline void
+tm2_start_max(timer2 *t, btime after)
+{
+ btime rem = tm2_remains(t);
+ tm2_start(t, MAX_(rem, after));
+}
+*/
+
+
+void sk_start(sock *s);
+void sk_stop(sock *s);
+
+
+
+struct birdloop *birdloop_new(void);
+void birdloop_start(struct birdloop *loop);
+void birdloop_stop(struct birdloop *loop);
+void birdloop_free(struct birdloop *loop);
+
+void birdloop_enter(struct birdloop *loop);
+void birdloop_leave(struct birdloop *loop);
+void birdloop_mask_wakeups(struct birdloop *loop);
+void birdloop_unmask_wakeups(struct birdloop *loop);
+
+
+#endif /* _BIRD_BFD_IO_H_ */
diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c
new file mode 100644
index 00000000..fc2616ca
--- /dev/null
+++ b/proto/bfd/packets.c
@@ -0,0 +1,248 @@
+/*
+ * BIRD -- Bidirectional Forwarding Detection (BFD)
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include "bfd.h"
+
+
+struct bfd_ctl_packet
+{
+ u8 vdiag; /* version and diagnostic */
+ u8 flags; /* state and flags */
+ u8 detect_mult;
+ u8 length;
+ u32 snd_id; /* sender ID, aka 'my discriminator' */
+ u32 rcv_id; /* receiver ID, aka 'your discriminator' */
+ u32 des_min_tx_int;
+ u32 req_min_rx_int;
+ u32 req_min_echo_rx_int;
+};
+
+#define BFD_BASE_LEN sizeof(struct bfd_ctl_packet)
+#define BFD_MAX_LEN 64
+
+static inline u8 bfd_pack_vdiag(u8 version, u8 diag)
+{ return (version << 5) | diag; }
+
+static inline u8 bfd_pack_flags(u8 state, u8 flags)
+{ return (state << 6) | flags; }
+
+static inline u8 bfd_pkt_get_version(struct bfd_ctl_packet *pkt)
+{ return pkt->vdiag >> 5; }
+
+static inline u8 bfd_pkt_get_diag(struct bfd_ctl_packet *pkt)
+{ return pkt->vdiag && 0x1f; }
+
+
+static inline u8 bfd_pkt_get_state(struct bfd_ctl_packet *pkt)
+{ return pkt->flags >> 6; }
+
+static inline void bfd_pkt_set_state(struct bfd_ctl_packet *pkt, u8 val)
+{ pkt->flags = val << 6; }
+
+
+char *
+bfd_format_flags(u8 flags, char *buf)
+{
+ char *bp = buf;
+ if (flags & BFD_FLAGS) *bp++ = ' ';
+ if (flags & BFD_FLAG_POLL) *bp++ = 'P';
+ if (flags & BFD_FLAG_FINAL) *bp++ = 'F';
+ if (flags & BFD_FLAG_CPI) *bp++ = 'C';
+ if (flags & BFD_FLAG_AP) *bp++ = 'A';
+ if (flags & BFD_FLAG_DEMAND) *bp++ = 'D';
+ if (flags & BFD_FLAG_MULTIPOINT) *bp++ = 'M';
+ *bp = 0;
+
+ return buf;
+}
+
+void
+bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final)
+{
+ sock *sk = s->ifa->sk;
+ struct bfd_ctl_packet *pkt = (struct bfd_ctl_packet *) sk->tbuf;
+ char fb[8];
+
+ pkt->vdiag = bfd_pack_vdiag(1, s->loc_diag);
+ pkt->flags = bfd_pack_flags(s->loc_state, 0);
+ pkt->detect_mult = s->detect_mult;
+ pkt->length = BFD_BASE_LEN;
+ pkt->snd_id = htonl(s->loc_id);
+ pkt->rcv_id = htonl(s->rem_id);
+ pkt->des_min_tx_int = htonl(s->des_min_tx_new);
+ pkt->req_min_rx_int = htonl(s->req_min_rx_new);
+ pkt->req_min_echo_rx_int = 0;
+
+ if (final)
+ pkt->flags |= BFD_FLAG_FINAL;
+ else if (s->poll_active)
+ pkt->flags |= BFD_FLAG_POLL;
+
+ if (sk->tbuf != sk->tpos)
+ log(L_WARN "%s: Old packet overwritten in TX buffer", p->p.name);
+
+ TRACE(D_PACKETS, "Sending CTL to %I [%s%s]", s->addr,
+ bfd_state_names[s->loc_state], bfd_format_flags(pkt->flags, fb));
+
+ sk_send_to(sk, pkt->length, s->addr, sk->dport);
+}
+
+#define DROP(DSC,VAL) do { err_dsc = DSC; err_val = VAL; goto drop; } while(0)
+
+static int
+bfd_rx_hook(sock *sk, int len)
+{
+ struct bfd_proto *p = sk->data;
+ struct bfd_ctl_packet *pkt = (struct bfd_ctl_packet *) sk->rbuf;
+ const char *err_dsc = NULL;
+ uint err_val = 0;
+ char fb[8];
+
+ if ((sk->sport == BFD_CONTROL_PORT) && (sk->ttl < 255))
+ DROP("wrong TTL", sk->ttl);
+
+ if (len < BFD_BASE_LEN)
+ DROP("too short", len);
+
+ u8 version = bfd_pkt_get_version(pkt);
+ if (version != 1)
+ DROP("version mismatch", version);
+
+ if ((pkt->length < BFD_BASE_LEN) || (pkt->length > len))
+ DROP("length mismatch", pkt->length);
+
+ if (pkt->detect_mult == 0)
+ DROP("invalid detect mult", 0);
+
+ if ((pkt->flags & BFD_FLAG_MULTIPOINT) ||
+ ((pkt->flags & BFD_FLAG_POLL) && (pkt->flags & BFD_FLAG_FINAL)))
+ DROP("invalid flags", pkt->flags);
+
+ if (pkt->snd_id == 0)
+ DROP("invalid my discriminator", 0);
+
+ struct bfd_session *s;
+ u32 id = ntohl(pkt->rcv_id);
+
+ if (id)
+ {
+ s = bfd_find_session_by_id(p, id);
+
+ if (!s)
+ DROP("unknown session id", id);
+ }
+ else
+ {
+ u8 ps = bfd_pkt_get_state(pkt);
+ if (ps > BFD_STATE_DOWN)
+ DROP("invalid init state", ps);
+
+ s = bfd_find_session_by_addr(p, sk->faddr);
+
+ /* FIXME: better session matching and message */
+ if (!s)
+ return 1;
+ }
+
+ /* FIXME: better authentication handling and message */
+ if (pkt->flags & BFD_FLAG_AP)
+ DROP("authentication not supported", 0);
+
+
+ u32 old_tx_int = s->des_min_tx_int;
+ u32 old_rx_int = s->rem_min_rx_int;
+
+ s->rem_id= ntohl(pkt->snd_id);
+ s->rem_state = bfd_pkt_get_state(pkt);
+ s->rem_diag = bfd_pkt_get_diag(pkt);
+ s->rem_demand_mode = pkt->flags & BFD_FLAG_DEMAND;
+ s->rem_min_tx_int = ntohl(pkt->des_min_tx_int);
+ s->rem_min_rx_int = ntohl(pkt->req_min_rx_int);
+ s->rem_detect_mult = pkt->detect_mult;
+
+ TRACE(D_PACKETS, "CTL received from %I [%s%s]", sk->faddr,
+ bfd_state_names[s->rem_state], bfd_format_flags(pkt->flags, fb));
+
+ bfd_session_process_ctl(s, pkt->flags, old_tx_int, old_rx_int);
+ return 1;
+
+ drop:
+ log(L_REMOTE "%s: Bad packet from %I - %s (%u)", p->p.name, sk->faddr, err_dsc, err_val);
+ return 1;
+}
+
+static void
+bfd_err_hook(sock *sk, int err)
+{
+ struct bfd_proto *p = sk->data;
+ log(L_ERR "%s: Socket error: %m", p->p.name, err);
+}
+
+sock *
+bfd_open_rx_sk(struct bfd_proto *p, int multihop)
+{
+ sock *sk = sk_new(p->tpool);
+ sk->type = SK_UDP;
+ sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT;
+ sk->data = p;
+
+ sk->rbsize = BFD_MAX_LEN;
+ sk->rx_hook = bfd_rx_hook;
+ sk->err_hook = bfd_err_hook;
+
+ /* TODO: configurable ToS and priority */
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->priority = sk_priority_control;
+ sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0);
+
+#ifdef IPV6
+ sk->flags |= SKF_V6ONLY;
+#endif
+
+ if (sk_open(sk) < 0)
+ goto err;
+
+ sk_start(sk);
+ return sk;
+
+ err:
+ rfree(sk);
+ return NULL;
+}
+
+sock *
+bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa)
+{
+ sock *sk = sk_new(p->tpool);
+ sk->type = SK_UDP;
+ sk->saddr = local;
+ sk->dport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT;
+ sk->iface = ifa;
+ sk->data = p;
+
+ sk->tbsize = BFD_MAX_LEN;
+ sk->err_hook = bfd_err_hook;
+
+ /* TODO: configurable ToS, priority and TTL security */
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->priority = sk_priority_control;
+ sk->ttl = ifa ? 255 : -1;
+ sk->flags = SKF_THREAD;
+
+#ifdef IPV6
+ sk->flags |= SKF_V6ONLY;
+#endif
+
+ if (sk_open(sk) < 0)
+ goto err;
+
+ sk_start(sk);
+ return sk;
+
+ err:
+ rfree(sk);
+ return NULL;
+}
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 837a6861..d34e2ae3 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -238,7 +238,7 @@ bgp_format_aggregator(eattr *a, byte *buf, int buflen UNUSED)
as = get_u32(data);
data += 4;
- bsprintf(buf, "%d.%d.%d.%d AS%d", data[0], data[1], data[2], data[3], as);
+ bsprintf(buf, "%d.%d.%d.%d AS%u", data[0], data[1], data[2], data[3], as);
}
static int
@@ -1032,7 +1032,8 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
rta->dest != RTD_ROUTER ||
ipa_equal(rta->gw, IPA_NONE) ||
ipa_has_link_scope(rta->gw) ||
- (!p->is_internal && (!p->neigh || (rta->iface != p->neigh->iface))))
+ (!p->is_internal && !p->cf->next_hop_keep &&
+ (!p->neigh || (rta->iface != p->neigh->iface))))
set_next_hop(z, p->source_addr);
else
set_next_hop(z, rta->gw);
@@ -1046,8 +1047,9 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
static inline int
bgp_as_path_loopy(struct bgp_proto *p, rta *a)
{
+ int num = p->cf->allow_local_as + 1;
eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- return (e && as_path_is_member(e->u.ptr, p->local_as));
+ return (e && (num > 0) && as_path_contains(e->u.ptr, p->local_as, num));
}
static inline int
@@ -1100,10 +1102,13 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
/* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr,
* eBGP single-hop -> keep next_hop if on the same iface.
* If the next_hop is zero (i.e. link-local), keep only if on the same iface.
+ *
+ * Note that same-iface-check uses iface from route, which is based on gw.
*/
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
if (a && !p->cf->next_hop_self &&
- ((p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) ||
+ (p->cf->next_hop_keep ||
+ (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) ||
(p->neigh && (e->attrs->iface == p->neigh->iface))))
{
/* Leave the original next hop attribute, will check later where does it point */
@@ -1444,7 +1449,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
/* The default case - find a new best-in-group route */
r = new; /* new may not be in the list */
- for (s=net->routes; s; s=s->next)
+ for (s=net->routes; rte_is_valid(s); s=s->next)
if (use_deterministic_med(s) && same_group(s, lpref, lasn))
{
s->u.bgp.suppressed = 1;
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index f290f227..81a263bb 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -59,8 +59,8 @@
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
-#include "nest/locks.h"
#include "nest/cli.h"
+#include "nest/locks.h"
#include "conf/conf.h"
#include "lib/socket.h"
#include "lib/resource.h"
@@ -76,6 +76,7 @@ static void bgp_close(struct bgp_proto *p, int apply_md5);
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
+static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
/**
@@ -153,8 +154,12 @@ bgp_initiate(struct bgp_proto *p)
if (rv < 0)
return;
+ if (p->cf->bfd)
+ bgp_update_bfd(p, p->cf->bfd);
+
if (p->startup_delay)
{
+ p->start_state = BSS_DELAY;
BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay);
bgp_start_timer(p->startup_timer, p->startup_delay);
}
@@ -386,10 +391,12 @@ bgp_conn_enter_close_state(struct bgp_conn *conn)
int os = conn->state;
bgp_conn_set_state(conn, BS_CLOSE);
- tm_stop(conn->hold_timer);
tm_stop(conn->keepalive_timer);
conn->sk->rx_hook = NULL;
+ /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
+ bgp_start_timer(conn->hold_timer, 10);
+
if (os == BS_ESTABLISHED)
bgp_conn_leave_established_state(p);
}
@@ -483,9 +490,18 @@ static void
bgp_hold_timeout(timer *t)
{
struct bgp_conn *conn = t->data;
+ struct bgp_proto *p = conn->bgp;
DBG("BGP: Hold timeout\n");
+ /* We are already closing the connection - just do hangup */
+ if (conn->state == BS_CLOSE)
+ {
+ BGP_TRACE(D_EVENTS, "Connection stalled");
+ bgp_conn_enter_idle_state(conn);
+ return;
+ }
+
/* If there is something in input queue, we are probably congested
and perhaps just not processed BGP packets in time. */
@@ -737,6 +753,9 @@ bgp_neigh_notify(neighbor *n)
{
struct bgp_proto *p = (struct bgp_proto *) n->proto;
+ if (! (n->flags & NEF_STICKY))
+ return;
+
if (n->scope > 0)
{
if ((p->p.proto_state == PS_START) && (p->start_state == BSS_PREPARE))
@@ -756,6 +775,37 @@ bgp_neigh_notify(neighbor *n)
}
}
+static void
+bgp_bfd_notify(struct bfd_request *req)
+{
+ struct bgp_proto *p = req->data;
+ int ps = p->p.proto_state;
+
+ if (req->down && ((ps == PS_START) || (ps == PS_UP)))
+ {
+ BGP_TRACE(D_EVENTS, "BFD session down");
+ bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
+ if (ps == PS_UP)
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0);
+ }
+}
+
+static void
+bgp_update_bfd(struct bgp_proto *p, int use_bfd)
+{
+ if (use_bfd && !p->bfd_req)
+ p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
+ p->cf->multihop ? NULL : p->neigh->iface,
+ bgp_bfd_notify, p);
+
+ if (!use_bfd && p->bfd_req)
+ {
+ rfree(p->bfd_req);
+ p->bfd_req = NULL;
+ }
+}
+
static int
bgp_reload_routes(struct proto *P)
{
@@ -816,6 +866,7 @@ bgp_start(struct proto *P)
p->outgoing_conn.state = BS_IDLE;
p->incoming_conn.state = BS_IDLE;
p->neigh = NULL;
+ p->bfd_req = NULL;
rt_lock_table(p->igp_table);
@@ -845,7 +896,6 @@ bgp_start(struct proto *P)
lock->iface = p->cf->iface;
lock->type = OBJLOCK_TCP;
lock->port = BGP_PORT;
- lock->iface = NULL;
lock->hook = bgp_start_locked;
lock->data = p;
olock_acquire(lock);
@@ -883,6 +933,7 @@ bgp_shutdown(struct proto *P)
subcode = 4; // Errcode 6, 4 - administrative reset
break;
+ case PDC_RX_LIMIT_HIT:
case PDC_IN_LIMIT_HIT:
subcode = 1; // Errcode 6, 1 - max number of prefixes reached
/* log message for compatibility */
@@ -981,6 +1032,9 @@ bgp_check_config(struct bgp_config *c)
ipa_has_link_scope(c->source_addr)))
cf_error("Multihop BGP cannot be used with link-local addresses");
+ if (c->multihop && c->bfd && ipa_zero(c->source_addr))
+ cf_error("Multihop BGP with BFD requires specified source address");
+
/* Different default based on rs_client */
if (!c->missing_lladdr)
@@ -1012,6 +1066,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
struct bgp_proto *p = (struct bgp_proto *) P;
struct bgp_config *old = p->cf;
+ if (proto_get_router_id(C) != p->local_id)
+ return 0;
+
int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
((byte *) new) + sizeof(struct proto_config),
// password item is last and must be checked separately
@@ -1020,6 +1077,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
|| (old->password && new->password && !strcmp(old->password, new->password)))
&& (get_igp_table(old) == get_igp_table(new));
+ if (same && (p->start_state > BSS_PREPARE))
+ bgp_update_bfd(p, new->bfd);
+
/* We should update our copy of configuration ptr as old configuration will be freed */
if (same)
p->cf = new;
@@ -1101,7 +1161,7 @@ bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
-static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket" };
+static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" };
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
static const char *
@@ -1195,7 +1255,7 @@ bgp_show_proto_info(struct proto *P)
cli_msg(-1006, " Source address: %I", p->source_addr);
if (P->cf->in_limit)
cli_msg(-1006, " Route limit: %d/%d",
- p->p.stats.imp_routes, P->cf->in_limit->limit);
+ p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
cli_msg(-1006, " Hold timer: %d/%d",
tm_remains(c->hold_timer), c->hold_time);
cli_msg(-1006, " Keepalive timer: %d/%d",
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index b87de46e..a35c362c 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -11,6 +11,7 @@
#include <stdint.h>
#include "nest/route.h"
+#include "nest/bfd.h"
struct linpool;
struct eattr;
@@ -24,6 +25,7 @@ struct bgp_config {
int multihop; /* Number of hops if multihop */
int ttl_security; /* Enable TTL security [RFC5082] */
int next_hop_self; /* Always set next hop to local IP address */
+ int next_hop_keep; /* Do not touch next hop attribute */
int missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */
int gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
int compare_path_lengths; /* Use path lengths when selecting best route */
@@ -44,6 +46,7 @@ struct bgp_config {
int interpret_communities; /* Hardwired handling of well-known communities */
int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
int add_path; /* Use ADD-PATH extension [draft] */
+ int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */
unsigned connect_retry_time;
unsigned hold_time, initial_hold_time;
unsigned keepalive_time;
@@ -52,8 +55,10 @@ struct bgp_config {
unsigned error_delay_time_min; /* Time to wait after an error is detected */
unsigned error_delay_time_max;
unsigned disable_after_error; /* Disable the protocol when error is detected */
+
char *password; /* Password used for MD5 authentication */
struct rtable_config *igp_table; /* Table used for recursive next hop lookups */
+ int bfd; /* Use BFD for liveness detection */
};
#define MLL_SELF 1
@@ -106,6 +111,7 @@ struct bgp_proto {
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
struct object_lock *lock; /* Lock for neighbor connection */
struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */
+ struct bfd_request *bfd_req; /* BFD request, if BFD is used */
ip_addr source_addr; /* Local address used as an advertised next hop */
rtable *igp_table; /* Table used for recursive next hop lookups */
struct event *event; /* Event for respawning and shutting process */
@@ -274,6 +280,8 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define BS_ESTABLISHED 5
#define BS_CLOSE 6 /* Used during transition to BS_IDLE */
+#define BS_MAX 7
+
/* BGP start states
*
* Used in PS_START for fine-grained specification of starting state.
@@ -305,6 +313,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define BEM_INVALID_NEXT_HOP 2
#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */
#define BEM_NO_SOCKET 4
+#define BEM_BFD_DOWN 5
/* Automatic shutdown error codes */
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 0b096339..ab12fed5 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -26,7 +26,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY,
PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC,
- SECONDARY, ADD, PATHS, RX, TX)
+ SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX)
CF_GRAMMAR
@@ -76,7 +76,8 @@ bgp_proto:
| bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; }
| bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; }
| bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); }
- | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; }
+ | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; BGP_CFG->next_hop_keep = 0; }
+ | bgp_proto NEXT HOP KEEP ';' { BGP_CFG->next_hop_keep = 1; BGP_CFG->next_hop_self = 0; }
| bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; }
| bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; }
| bgp_proto MISSING LLADDR IGNORE ';' { BGP_CFG->missing_lladdr = MLL_IGNORE; }
@@ -110,8 +111,11 @@ bgp_proto:
| bgp_proto ADD PATHS RX ';' { BGP_CFG->add_path = ADD_PATH_RX; }
| bgp_proto ADD PATHS TX ';' { BGP_CFG->add_path = ADD_PATH_TX; }
| bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; }
+ | bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; }
+ | bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
| bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
| bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
+ | bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); }
;
CF_ADDTO(dynamic_attr, BGP_ORIGIN
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 3fae2c24..42064332 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -24,6 +24,13 @@
static struct rate_limit rl_rcv_update, rl_snd_update;
+/* Table for state -> RFC 6608 FSM error subcodes */
+static byte fsm_err_subcode[BS_MAX] = {
+ [BS_OPENSENT] = 1,
+ [BS_OPENCONFIRM] = 2,
+ [BS_ESTABLISHED] = 3
+};
+
/*
* MRT Dump format is not semantically specified.
* We will use these values in appropriate fields:
@@ -58,7 +65,7 @@ mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
buf+=4;
}
- put_u16(buf+0, p->neigh ? p->neigh->iface->index : 0);
+ put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
put_u16(buf+2, BGP_AF);
buf+=4;
buf = ipa_put_addr(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
@@ -758,7 +765,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
/* Check state */
if (conn->state != BS_OPENSENT)
- { bgp_error(conn, 5, 0, NULL, 0); return; }
+ { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
/* Check message contents */
if (len < 29 || len != 29 + pkt[28])
@@ -917,7 +924,7 @@ bgp_set_next_hop(struct bgp_proto *p, rta *a)
ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
#ifdef IPV6
- int second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
+ int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
/* First address should not be link-local, but may be zero in direct mode */
if (ipa_has_link_scope(*nexthop))
@@ -1148,7 +1155,7 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
bgp_conn_enter_established_state(conn);
if (conn->state != BS_ESTABLISHED)
- { bgp_error(conn, 5, 0, NULL, 0); return; }
+ { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
bgp_start_timer(conn->hold_timer, conn->hold_time);
/* Find parts of the packet and check sizes */
@@ -1210,7 +1217,10 @@ static struct {
{ 3, 10, "Invalid network field" },
{ 3, 11, "Malformed AS_PATH" },
{ 4, 0, "Hold timer expired" },
- { 5, 0, "Finite state machine error" },
+ { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
+ { 5, 1, "Unexpected message in OpenSent state" },
+ { 5, 2, "Unexpected message in OpenConfirm state" },
+ { 5, 3, "Unexpected message in Established state" },
{ 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
{ 6, 1, "Maximum number of prefixes reached" },
{ 6, 2, "Administrative shutdown" },
@@ -1341,7 +1351,7 @@ bgp_rx_keepalive(struct bgp_conn *conn)
case BS_ESTABLISHED:
break;
default:
- bgp_error(conn, 5, 0, NULL, 0);
+ bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
}
}
@@ -1353,7 +1363,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, int len)
BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
if (conn->state != BS_ESTABLISHED)
- { bgp_error(conn, 5, 0, NULL, 0); return; }
+ { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
if (!p->cf->enable_refresh)
{ bgp_error(conn, 1, 3, pkt+18, 1); return; }
diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y
index 67b0785f..c47a8cd2 100644
--- a/proto/ospf/config.Y
+++ b/proto/ospf/config.Y
@@ -92,6 +92,7 @@ ospf_proto_finish(void)
if (cf->abr && !backbone)
{
struct ospf_area_config *ac = cfg_allocz(sizeof(struct ospf_area_config));
+ ac->type = OPT_E; /* Backbone is non-stub */
add_head(&cf->area_list, NODE ac);
init_list(&ac->patt_list);
init_list(&ac->net_list);
@@ -124,16 +125,17 @@ CF_DECLS
CF_KEYWORDS(OSPF, AREA, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID)
CF_KEYWORDS(NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT)
-CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST)
+CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT)
CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP)
-CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC)
-CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK)
+CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC, TTL, SECURITY)
+CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK, ONLY, BFD)
CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL)
CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY)
-CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL)
+CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY)
%type <t> opttext
%type <ld> lsadb_args
+%type <i> nbma_eligible
CF_GRAMMAR
@@ -156,6 +158,7 @@ ospf_proto:
ospf_proto_item:
proto_item
| RFC1583COMPAT bool { OSPF_CFG->rfc1583 = $2; }
+ | STUB ROUTER bool { OSPF_CFG->stub_router = $3; }
| ECMP bool { OSPF_CFG->ecmp = $2 ? DEFAULT_ECMP_LIMIT : 0; }
| ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); }
| TICK expr { OSPF_CFG->tick = $2; if($2<=0) cf_error("Tick must be greater than zero"); }
@@ -288,19 +291,25 @@ ospf_iface_item:
| TYPE POINTOMULTIPOINT { OSPF_PATT->type = OSPF_IT_PTMP ; }
| TYPE PTMP { OSPF_PATT->type = OSPF_IT_PTMP ; }
| REAL BROADCAST bool { OSPF_PATT->real_bcast = $3; if (OSPF_VERSION != 2) cf_error("Real broadcast option requires OSPFv2"); }
+ | PTP NETMASK bool { OSPF_PATT->ptp_netmask = $3; if (OSPF_VERSION != 2) cf_error("Real netmask option requires OSPFv2"); }
| TRANSMIT DELAY expr { OSPF_PATT->inftransdelay = $3 ; if (($3<=0) || ($3>65535)) cf_error("Transmit delay must be in range 1-65535"); }
| PRIORITY expr { OSPF_PATT->priority = $2 ; if (($2<0) || ($2>255)) cf_error("Priority must be in range 0-255"); }
| STRICT NONBROADCAST bool { OSPF_PATT->strictnbma = $3 ; }
| STUB bool { OSPF_PATT->stub = $2 ; }
| CHECK LINK bool { OSPF_PATT->check_link = $3; }
| ECMP WEIGHT expr { OSPF_PATT->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); }
- | NEIGHBORS '{' ipa_list '}'
+ | NEIGHBORS '{' nbma_list '}'
| AUTHENTICATION NONE { OSPF_PATT->autype = OSPF_AUTH_NONE ; }
| AUTHENTICATION SIMPLE { OSPF_PATT->autype = OSPF_AUTH_SIMPLE ; }
| AUTHENTICATION CRYPTOGRAPHIC { OSPF_PATT->autype = OSPF_AUTH_CRYPT ; }
| RX BUFFER LARGE { OSPF_PATT->rxbuf = OSPF_RXBUF_LARGE ; }
| RX BUFFER NORMAL { OSPF_PATT->rxbuf = OSPF_RXBUF_NORMAL ; }
| RX BUFFER expr { OSPF_PATT->rxbuf = $3 ; if (($3 < OSPF_RXBUF_MINSIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("Buffer size must be in range 256-65535"); }
+ | TX tos { OSPF_PATT->tx_tos = $2; }
+ | TX PRIORITY expr { OSPF_PATT->tx_priority = $3; }
+ | TTL SECURITY bool { OSPF_PATT->ttl_security = $3; }
+ | TTL SECURITY TX ONLY { OSPF_PATT->ttl_security = 2; }
+ | BFD bool { OSPF_PATT->bfd = $2; cf_check_bfd($2); }
| password_list
;
@@ -326,33 +335,24 @@ pref_opt:
| TAG expr { this_pref->tag = $2; }
;
-ipa_list:
+nbma_list:
/* empty */
- | ipa_list ipa_item
+ | nbma_list nbma_item
;
-ipa_item:
- ipa_el
- | ipa_ne;
+nbma_eligible:
+ /* empty */ { $$ = 0; }
+ | ELIGIBLE { $$ = 1; }
+ ;
-ipa_el: IPA ';'
+nbma_item: IPA nbma_eligible ';'
{
this_nbma = cfg_allocz(sizeof(struct nbma_node));
add_tail(&OSPF_PATT->nbma_list, NODE this_nbma);
this_nbma->ip=$1;
- this_nbma->eligible=0;
+ this_nbma->eligible=$2;
}
;
-
-ipa_ne: IPA ELIGIBLE ';'
- {
- this_nbma = cfg_allocz(sizeof(struct nbma_node));
- add_tail(&OSPF_PATT->nbma_list, NODE this_nbma);
- this_nbma->ip=$1;
- this_nbma->eligible=1;
- }
-;
-
ospf_iface_start:
{
@@ -371,6 +371,9 @@ ospf_iface_start:
OSPF_PATT->type = OSPF_IT_UNDEF;
init_list(&OSPF_PATT->nbma_list);
OSPF_PATT->autype = OSPF_AUTH_NONE;
+ OSPF_PATT->ptp_netmask = 2; /* not specified */
+ OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL;
+ OSPF_PATT->tx_priority = sk_priority_control;
reset_passwords();
}
;
@@ -439,7 +442,7 @@ lsadb_args:
$$ = cfg_allocz(sizeof(struct lsadb_show_data));
}
| lsadb_args GLOBAL { $$ = $1; $$->scope = LSA_SCOPE_AS; }
- | lsadb_args AREA idval { $$ = $1; $$->scope = LSA_SCOPE_AREA; $$->area = $3 }
+ | lsadb_args AREA idval { $$ = $1; $$->scope = LSA_SCOPE_AREA; $$->area = $3; }
| lsadb_args LINK { $$ = $1; $$->scope = 1; /* hack, 0 is no filter */ }
| lsadb_args TYPE NUM { $$ = $1; $$->type = $3; }
| lsadb_args LSID idval { $$ = $1; $$->lsid = $3; }
diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c
index f9ba28f6..b6b11004 100644
--- a/proto/ospf/hello.c
+++ b/proto/ospf/hello.c
@@ -101,6 +101,17 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa,
return;
}
+#ifdef OSPFv2
+ if (n && (n->rid != ntohl(ps_i->routerid)))
+ {
+ OSPF_TRACE(D_EVENTS,
+ "Neighbor %I has changed router id from %R to %R.",
+ n->ip, n->rid, ntohl(ps_i->routerid));
+ ospf_neigh_remove(n);
+ n = NULL;
+ }
+#endif
+
if (!n)
{
if ((ifa->type == OSPF_IT_NBMA) || (ifa->type == OSPF_IT_PTMP))
@@ -132,7 +143,7 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa,
n = ospf_neighbor_new(ifa);
- n->rid = ntohl(((struct ospf_packet *) ps)->routerid);
+ n->rid = ntohl(ps_i->routerid);
n->ip = faddr;
n->dr = ntohl(ps->dr);
n->bdr = ntohl(ps->bdr);
@@ -140,7 +151,18 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa,
#ifdef OSPFv3
n->iface_id = ntohl(ps->iface_id);
#endif
+
+ if (n->ifa->cf->bfd)
+ ospf_neigh_update_bfd(n, n->ifa->bfd);
}
+#ifdef OSPFv3 /* NOTE: this could also be relevant for OSPFv2 on PtP ifaces */
+ else if (!ipa_equal(faddr, n->ip))
+ {
+ OSPF_TRACE(D_EVENTS, "Neighbor address changed from %I to %I", n->ip, faddr);
+ n->ip = faddr;
+ }
+#endif
+
ospf_neigh_sm(n, INM_HELLOREC);
pnrid = (u32 *) ((struct ospf_hello_packet *) (ps + 1));
@@ -253,7 +275,8 @@ ospf_hello_send(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn)
#ifdef OSPFv2
pkt->netmask = ipa_mkmask(ifa->addr->pxlen);
ipa_hton(pkt->netmask);
- if ((ifa->type == OSPF_IT_VLINK) || (ifa->type == OSPF_IT_PTP))
+ if ((ifa->type == OSPF_IT_VLINK) ||
+ ((ifa->type == OSPF_IT_PTP) && !ifa->ptp_netmask))
pkt->netmask = IPA_NONE;
#endif
@@ -261,7 +284,7 @@ ospf_hello_send(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn)
pkt->priority = ifa->priority;
#ifdef OSPFv3
- pkt->iface_id = htonl(ifa->iface->index);
+ pkt->iface_id = htonl(ifa->iface_id);
pkt->options3 = ifa->oa->options >> 16;
pkt->options2 = ifa->oa->options >> 8;
diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c
index a6a0c6c1..f1409840 100644
--- a/proto/ospf/iface.c
+++ b/proto/ospf/iface.c
@@ -77,7 +77,8 @@ ospf_sk_open(struct ospf_iface *ifa)
sk->dport = OSPF_PROTO;
sk->saddr = IPA_NONE;
- sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->tos = ifa->cf->tx_tos;
+ sk->priority = ifa->cf->tx_priority;
sk->rx_hook = ospf_rx_hook;
sk->tx_hook = ospf_tx_hook;
sk->err_hook = ospf_err_hook;
@@ -85,7 +86,8 @@ ospf_sk_open(struct ospf_iface *ifa)
sk->rbsize = rxbufsize(ifa);
sk->tbsize = rxbufsize(ifa);
sk->data = (void *) ifa;
- sk->flags = SKF_LADDR_RX;
+ sk->flags = SKF_LADDR_RX | (ifa->check_ttl ? SKF_TTL_RX : 0);
+ sk->ttl = ifa->cf->ttl_security ? 255 : -1;
if (sk_open(sk) != 0)
goto err;
@@ -130,7 +132,7 @@ ospf_sk_open(struct ospf_iface *ifa)
else
{
ifa->all_routers = AllSPFRouters;
- sk->ttl = 1; /* Hack, this will affect just multicast packets */
+ sk->ttl = ifa->cf->ttl_security ? 255 : 1;
if (sk_setup_multicast(sk) < 0)
goto err;
@@ -533,10 +535,15 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
ifa->rxbuf = ip->rxbuf;
ifa->check_link = ip->check_link;
ifa->ecmp_weight = ip->ecmp_weight;
+ ifa->check_ttl = (ip->ttl_security == 1);
+ ifa->bfd = ip->bfd;
#ifdef OSPFv2
ifa->autype = ip->autype;
ifa->passwords = ip->passwords;
+ ifa->ptp_netmask = addr ? !(addr->flags & IA_PEER) : 0;
+ if (ip->ptp_netmask < 2)
+ ifa->ptp_netmask = ip->ptp_netmask;
#endif
#ifdef OSPFv3
@@ -567,13 +574,29 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
log(L_WARN "%s: Cannot use interface %s as %s, forcing %s",
p->name, iface->name, ospf_it[old_type], ospf_it[ifa->type]);
+ /* Assign iface ID, for vlinks, this is ugly hack */
+ ifa->iface_id = (ifa->type != OSPF_IT_VLINK) ? iface->index : oa->po->last_vlink_id++;
init_list(&ifa->neigh_list);
init_list(&ifa->nbma_list);
WALK_LIST(nb, ip->nbma_list)
- if (ipa_in_net(nb->ip, addr->prefix, addr->pxlen))
- add_nbma_node(ifa, nb, 0);
+ {
+ /* In OSPFv3, addr is link-local while configured neighbors could
+ have global IP (although RFC 5340 C.5 says link-local addresses
+ should be used). Because OSPFv3 iface is not subnet-specific,
+ there is no need for ipa_in_net() check */
+
+#ifdef OSPFv2
+ if (!ipa_in_net(nb->ip, addr->prefix, addr->pxlen))
+ continue;
+#else
+ if (!ipa_has_link_scope(nb->ip))
+ log(L_WARN "In OSPFv3, configured neighbor address (%I) should be link-local", nb->ip);
+#endif
+
+ add_nbma_node(ifa, nb, 0);
+ }
ifa->state = OSPF_IS_DOWN;
add_tail(&oa->po->iface_list, NODE ifa);
@@ -640,7 +663,11 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
if (ifa->stub != new_stub)
return 0;
- if (new->real_bcast != ifa->cf->real_bcast)
+ /* Change of these options would require to reset the iface socket */
+ if ((new->real_bcast != ifa->cf->real_bcast) ||
+ (new->tx_tos != ifa->cf->tx_tos) ||
+ (new->tx_priority != ifa->cf->tx_priority) ||
+ (new->ttl_security != ifa->cf->ttl_security))
return 0;
ifa->cf = new;
@@ -769,8 +796,14 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
/* NBMA LIST - add new */
WALK_LIST(nb, new->nbma_list)
{
+ /* See related note in ospf_iface_new() */
+#ifdef OSPFv2
if (!ipa_in_net(nb->ip, ifa->addr->prefix, ifa->addr->pxlen))
continue;
+#else
+ if (!ipa_has_link_scope(nb->ip))
+ log(L_WARN "In OSPFv3, configured neighbor address (%I) should be link-local", nb->ip);
+#endif
if (! find_nbma_node(ifa, nb->ip))
{
@@ -808,6 +841,19 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
ifa->ecmp_weight = new->ecmp_weight;
}
+ /* BFD */
+ if (ifa->bfd != new->bfd)
+ {
+ OSPF_TRACE(D_EVENTS, "%s BFD on interface %s",
+ new->bfd ? "Enabling" : "Disabling", ifname);
+ ifa->bfd = new->bfd;
+
+ struct ospf_neighbor *n;
+ WALK_LIST(n, ifa->neigh_list)
+ ospf_neigh_update_bfd(n, ifa->bfd);
+ }
+
+
/* instance_id is not updated - it is part of key */
return 1;
@@ -884,6 +930,10 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac)
struct ifa *a;
WALK_LIST(iface, iface_list)
+ {
+ if (! (iface->flags & IF_UP))
+ continue;
+
WALK_LIST(a, iface->addrs)
{
if (a->flags & IA_SECONDARY)
@@ -909,6 +959,7 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac)
ospf_iface_new(oa, a, ip);
}
}
+ }
}
@@ -1012,6 +1063,10 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac)
struct ifa *a;
WALK_LIST(iface, iface_list)
+ {
+ if (! (iface->flags & IF_UP))
+ continue;
+
WALK_LIST(a, iface->addrs)
{
if (a->flags & IA_SECONDARY)
@@ -1040,6 +1095,7 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac)
ospf_iface_new(oa, a, ip);
}
}
+ }
}
#endif
diff --git a/proto/ospf/lsupd.c b/proto/ospf/lsupd.c
index 16967a7f..beac6c83 100644
--- a/proto/ospf/lsupd.c
+++ b/proto/ospf/lsupd.c
@@ -112,6 +112,10 @@ ospf_lsa_flooding_allowed(struct ospf_lsa_header *lsa, u32 domain, struct ospf_i
{
u32 scope = LSA_SCOPE(lsa);
+ /* Handle inactive vlinks */
+ if (ifa->state == OSPF_IS_DOWN)
+ return 0;
+
/* 4.5.2 (Case 2) */
if (unknown_lsa_type(lsa) && !(lsa->type & LSA_UBIT))
scope = LSA_SCOPE_LINK;
@@ -119,7 +123,7 @@ ospf_lsa_flooding_allowed(struct ospf_lsa_header *lsa, u32 domain, struct ospf_i
switch (scope)
{
case LSA_SCOPE_LINK:
- return ifa->iface->index == domain;
+ return ifa->iface_id == domain;
case LSA_SCOPE_AREA:
return ifa->oa->areaid == domain;
@@ -201,7 +205,7 @@ ospf_lsupd_flood(struct proto_ospf *po,
en->lsa_body = NULL;
DBG("Removing from lsreq list for neigh %R\n", nn->rid);
ospf_hash_delete(nn->lsrqh, en);
- if (EMPTY_SLIST(nn->lsrql))
+ if ((EMPTY_SLIST(nn->lsrql)) && (nn->state == NEIGHBOR_LOADING))
ospf_neigh_sm(nn, INM_LOADDONE);
continue;
break;
@@ -212,7 +216,7 @@ ospf_lsupd_flood(struct proto_ospf *po,
en->lsa_body = NULL;
DBG("Removing from lsreq list for neigh %R\n", nn->rid);
ospf_hash_delete(nn->lsrqh, en);
- if (EMPTY_SLIST(nn->lsrql))
+ if ((EMPTY_SLIST(nn->lsrql)) && (nn->state == NEIGHBOR_LOADING))
ospf_neigh_sm(nn, INM_LOADDONE);
break;
default:
@@ -280,6 +284,16 @@ ospf_lsupd_flood(struct proto_ospf *po,
ospf_pkt_fill_hdr(ifa, pk, LSUPD_P);
pk->lsano = htonl(1);
+ /* Check iface buffer size */
+ int len2 = sizeof(struct ospf_lsupd_packet) + (hn ? ntohs(hn->length) : hh->length);
+ if (len2 > ospf_pkt_bufsize(ifa))
+ {
+ /* Cannot fit in a tx buffer, skip that iface */
+ log(L_ERR "OSPF: LSA too large to flood on %s (Type: %04x, Id: %R, Rt: %R)",
+ ifa->iface->name, hh->type, hh->id, hh->rt);
+ continue;
+ }
+
lh = (struct ospf_lsa_header *) (pk + 1);
/* Copy LSA into the packet */
@@ -395,7 +409,7 @@ ospf_lsupd_send_list(struct ospf_neighbor *n, list * l)
if (len2 > ospf_pkt_bufsize(n->ifa))
{
/* Cannot fit in a tx buffer, skip that */
- log(L_WARN "OSPF: LSA too large to send (Type: %04x, Id: %R, Rt: %R)",
+ log(L_ERR "OSPF: LSA too large to send (Type: %04x, Id: %R, Rt: %R)",
lsr->lsh.type, lsr->lsh.id, lsr->lsh.rt);
lsr = NODE_NEXT(lsr);
continue;
diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c
index 642365b3..61224ec2 100644
--- a/proto/ospf/neighbor.c
+++ b/proto/ospf/neighbor.c
@@ -459,7 +459,7 @@ bdr_election(struct ospf_iface *ifa)
#else /* OSPFv3 */
me.dr = ifa->drid;
me.bdr = ifa->bdrid;
- me.iface_id = ifa->iface->index;
+ me.iface_id = ifa->iface_id;
#endif
add_tail(&ifa->neigh_list, NODE & me);
@@ -582,6 +582,36 @@ ospf_neigh_remove(struct ospf_neighbor *n)
OSPF_TRACE(D_EVENTS, "Deleting neigbor.");
}
+static void
+ospf_neigh_bfd_hook(struct bfd_request *req)
+{
+ struct ospf_neighbor *n = req->data;
+ struct proto *p = &n->ifa->oa->po->proto;
+
+ if (req->down)
+ {
+ OSPF_TRACE(D_EVENTS, "BFD session down for %I on %s",
+ n->ip, n->ifa->iface->name);
+
+ ospf_neigh_remove(n);
+ }
+}
+
+void
+ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd)
+{
+ if (use_bfd && !n->bfd_req)
+ n->bfd_req = bfd_request_session(n->pool, n->ip, n->ifa->addr->ip, n->ifa->iface,
+ ospf_neigh_bfd_hook, n);
+
+ if (!use_bfd && n->bfd_req)
+ {
+ rfree(n->bfd_req);
+ n->bfd_req = NULL;
+ }
+}
+
+
void
ospf_sh_neigh_info(struct ospf_neighbor *n)
{
diff --git a/proto/ospf/neighbor.h b/proto/ospf/neighbor.h
index f593faed..e674927d 100644
--- a/proto/ospf/neighbor.h
+++ b/proto/ospf/neighbor.h
@@ -16,6 +16,7 @@ void bdr_election(struct ospf_iface *ifa);
struct ospf_neighbor *find_neigh(struct ospf_iface *ifa, u32 rid);
struct ospf_neighbor *find_neigh_by_ip(struct ospf_iface *ifa, ip_addr ip);
void ospf_neigh_remove(struct ospf_neighbor *n);
+void ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd);
void ospf_sh_neigh_info(struct ospf_neighbor *n);
#endif /* _BIRD_OSPF_NEIGHBOR_H_ */
diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c
index 1aa7407a..232f3f6c 100644
--- a/proto/ospf/ospf.c
+++ b/proto/ospf/ospf.c
@@ -167,7 +167,7 @@ ospf_area_add(struct proto_ospf *po, struct ospf_area_config *ac, int reconf)
#ifdef OSPFv2
oa->options = ac->type;
#else /* OSPFv3 */
- oa->options = OPT_R | ac->type | OPT_V6;
+ oa->options = ac->type | OPT_V6 | (po->stub_router ? 0 : OPT_R);
#endif
/*
@@ -232,7 +232,9 @@ ospf_start(struct proto *p)
struct ospf_area_config *ac;
po->router_id = proto_get_router_id(p->cf);
+ po->last_vlink_id = 0x80000000;
po->rfc1583 = c->rfc1583;
+ po->stub_router = c->stub_router;
po->ebit = 0;
po->ecmp = c->ecmp;
po->tick = c->tick;
@@ -689,7 +691,7 @@ ospf_area_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac)
#ifdef OSPFv2
oa->options = nac->type;
#else /* OSPFv3 */
- oa->options = OPT_R | nac->type | OPT_V6;
+ oa->options = nac->type | OPT_V6 | (oa->po->stub_router ? 0 : OPT_R);
#endif
if (oa_is_nssa(oa) && (oa->po->areano > 1))
oa->po->ebit = 1;
@@ -728,12 +730,16 @@ ospf_reconfigure(struct proto *p, struct proto_config *c)
struct ospf_iface *ifa, *ifx;
struct ospf_iface_patt *ip;
+ if (proto_get_router_id(c) != po->router_id)
+ return 0;
+
if (po->rfc1583 != new->rfc1583)
return 0;
if (old->abr != new->abr)
return 0;
+ po->stub_router = new->stub_router;
po->ecmp = new->ecmp;
po->tick = new->tick;
po->disp_timer->recurrent = po->tick;
@@ -827,6 +833,7 @@ ospf_sh(struct proto *p)
cli_msg(-1014, "%s:", p->name);
cli_msg(-1014, "RFC1583 compatibility: %s", (po->rfc1583 ? "enable" : "disabled"));
+ cli_msg(-1014, "Stub router: %s", (po->stub_router ? "Yes" : "No"));
cli_msg(-1014, "RT scheduler tick: %d", po->tick);
cli_msg(-1014, "Number of areas: %u", po->areano);
cli_msg(-1014, "Number of LSAs in DB:\t%u", po->gr->hash_entries);
@@ -953,8 +960,10 @@ lsa_compare_for_state(const void *p1, const void *p2)
struct ospf_lsa_header *lsa1 = &(he1->lsa);
struct ospf_lsa_header *lsa2 = &(he2->lsa);
- if (he1->domain != he2->domain)
- return he1->domain - he2->domain;
+ if (he1->domain < he2->domain)
+ return -1;
+ if (he1->domain > he2->domain)
+ return 1;
#ifdef OSPFv3
struct ospf_lsa_header lsatmp1, lsatmp2;
@@ -979,14 +988,18 @@ lsa_compare_for_state(const void *p1, const void *p2)
{
#ifdef OSPFv3
/* In OSPFv3, neworks are named base on ID of DR */
- if (lsa1->rt != lsa2->rt)
- return lsa1->rt - lsa2->rt;
+ if (lsa1->rt < lsa2->rt)
+ return -1;
+ if (lsa1->rt > lsa2->rt)
+ return 1;
#endif
/* For OSPFv2, this is IP of the network,
for OSPFv3, this is interface ID */
- if (lsa1->id != lsa2->id)
- return lsa1->id - lsa2->id;
+ if (lsa1->id < lsa2->id)
+ return -1;
+ if (lsa1->id > lsa2->id)
+ return 1;
#ifdef OSPFv3
if (px1 != px2)
@@ -997,14 +1010,20 @@ lsa_compare_for_state(const void *p1, const void *p2)
}
else
{
- if (lsa1->rt != lsa2->rt)
- return lsa1->rt - lsa2->rt;
+ if (lsa1->rt < lsa2->rt)
+ return -1;
+ if (lsa1->rt > lsa2->rt)
+ return 1;
- if (lsa1->type != lsa2->type)
- return lsa1->type - lsa2->type;
-
- if (lsa1->id != lsa2->id)
- return lsa1->id - lsa2->id;
+ if (lsa1->type < lsa2->type)
+ return -1;
+ if (lsa1->type > lsa2->type)
+ return 1;
+
+ if (lsa1->id < lsa2->id)
+ return -1;
+ if (lsa1->id > lsa2->id)
+ return 1;
#ifdef OSPFv3
if (px1 != px2)
@@ -1023,12 +1042,16 @@ ext_compare_for_state(const void *p1, const void *p2)
struct ospf_lsa_header *lsa1 = &(he1->lsa);
struct ospf_lsa_header *lsa2 = &(he2->lsa);
- if (lsa1->rt != lsa2->rt)
- return lsa1->rt - lsa2->rt;
+ if (lsa1->rt < lsa2->rt)
+ return -1;
+ if (lsa1->rt > lsa2->rt)
+ return 1;
+
+ if (lsa1->id < lsa2->id)
+ return -1;
+ if (lsa1->id > lsa2->id)
+ return 1;
- if (lsa1->id != lsa2->id)
- return lsa1->id - lsa2->id;
-
return lsa1->sn - lsa2->sn;
}
diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h
index 3bffaf91..46a1c3c1 100644
--- a/proto/ospf/ospf.h
+++ b/proto/ospf/ospf.h
@@ -46,6 +46,7 @@ do { if ((p->debug & D_PACKETS) || OSPF_FORCE_DEBUG) \
#include "nest/route.h"
#include "nest/cli.h"
#include "nest/locks.h"
+#include "nest/bfd.h"
#include "conf/conf.h"
#include "lib/string.h"
@@ -83,6 +84,7 @@ struct ospf_config
struct proto_config c;
unsigned tick;
byte rfc1583;
+ byte stub_router;
byte abr;
int ecmp;
list area_list; /* list of struct ospf_area_config */
@@ -189,7 +191,8 @@ struct ospf_iface
u32 rxmtint; /* number of seconds between LSA retransmissions */
u32 pollint; /* Poll interval */
u32 deadint; /* after "deadint" missing hellos is router dead */
- u32 vid; /* Id of peer of virtual link */
+ u32 iface_id; /* Interface ID (iface->index or new value for vlinks) */
+ u32 vid; /* ID of peer of virtual link */
ip_addr vip; /* IP of peer of virtual link */
struct ospf_iface *vifa; /* OSPF iface which the vlink goes through */
struct ospf_area *voa; /* OSPF area which the vlink goes through */
@@ -272,6 +275,9 @@ struct ospf_iface
u16 rxbuf; /* Buffer size */
u8 check_link; /* Whether iface link change is used */
u8 ecmp_weight; /* Weight used for ECMP */
+ u8 ptp_netmask; /* Send real netmask for P2P */
+ u8 check_ttl; /* Check incoming packets for TTL 255 */
+ u8 bfd; /* Use BFD on iface */
};
struct ospf_md5
@@ -704,6 +710,7 @@ struct ospf_neighbor
#define ACKL_DIRECT 0
#define ACKL_DELAY 1
timer *ackd_timer; /* Delayed ack timer */
+ struct bfd_request *bfd_req; /* BFD request, if BFD is used */
u32 csn; /* Last received crypt seq number (for MD5) */
};
@@ -769,6 +776,7 @@ struct proto_ospf
int areano; /* Number of area I belong to */
struct fib rtf; /* Routing table */
byte rfc1583; /* RFC1583 compatibility */
+ byte stub_router; /* Do not forward transit traffic */
byte ebit; /* Did I originate any ext lsa? */
byte ecmp; /* Maximal number of nexthops in ECMP route, or 0 */
struct ospf_area *backbone; /* If exists */
@@ -776,6 +784,7 @@ struct proto_ospf
int lsab_size, lsab_used;
linpool *nhpool; /* Linpool used for next hops computed in SPF */
u32 router_id;
+ u32 last_vlink_id; /* Interface IDs for vlinks (starts at 0x80000000) */
};
struct ospf_iface_patt
@@ -795,6 +804,8 @@ struct ospf_iface_patt
u32 priority;
u32 voa;
u32 vid;
+ int tx_tos;
+ int tx_priority;
u16 rxbuf;
#define OSPF_RXBUF_NORMAL 0
#define OSPF_RXBUF_LARGE 1
@@ -808,6 +819,9 @@ struct ospf_iface_patt
u8 check_link;
u8 ecmp_weight;
u8 real_bcast; /* Not really used in OSPFv3 */
+ u8 ptp_netmask; /* bool + 2 for unspecified */
+ u8 ttl_security; /* bool + 2 for TX only */
+ u8 bfd;
#ifdef OSPFv2
list *passwords;
diff --git a/proto/ospf/packet.c b/proto/ospf/packet.c
index 241a58f7..4338bc1a 100644
--- a/proto/ospf/packet.c
+++ b/proto/ospf/packet.c
@@ -309,6 +309,12 @@ ospf_rx_hook(sock *sk, int size)
return 1;
}
+ if (ifa->check_ttl && (sk->ttl < 255))
+ {
+ log(L_ERR "%s%I - TTL %d (< 255)", mesg, sk->faddr, sk->ttl);
+ return 1;
+ }
+
if ((unsigned) size < sizeof(struct ospf_packet))
{
log(L_ERR "%s%I - too short (%u bytes)", mesg, sk->faddr, size);
diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c
index 1053fd07..52110aa1 100644
--- a/proto/ospf/rt.c
+++ b/proto/ospf/rt.c
@@ -501,6 +501,10 @@ ospf_rt_spfa(struct ospf_area *oa)
#ifdef OSPFv2
ospf_rt_spfa_rtlinks(oa, act, act);
#else /* OSPFv3 */
+ /* Errata 2078 to RFC 5340 4.8.1 - skip links from non-routing nodes */
+ if ((act != oa->rt) && !(rt->options & OPT_R))
+ break;
+
for (tmp = ospf_hash_find_rt_first(po->gr, act->domain, act->lsa.rt);
tmp; tmp = ospf_hash_find_rt_next(tmp))
ospf_rt_spfa_rtlinks(oa, act, tmp);
@@ -1839,7 +1843,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
if (en->lsa.type == LSA_T_RT)
{
struct ospf_lsa_rt *rt = en->lsa_body;
- if (!(rt->options & OPT_V6) || !(rt->options & OPT_R))
+ if (!(rt->options & OPT_V6))
return;
}
#endif
diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c
index ec012b22..f25db9a7 100644
--- a/proto/ospf/topology.c
+++ b/proto/ospf/topology.c
@@ -103,7 +103,8 @@ lsab_alloc(struct proto_ospf *po, unsigned size)
if (po->lsab_used > po->lsab_size)
{
po->lsab_size = MAX(po->lsab_used, 2 * po->lsab_size);
- po->lsab = mb_realloc(po->proto.pool, po->lsab, po->lsab_size);
+ po->lsab = po->lsab ? mb_realloc(po->lsab, po->lsab_size):
+ mb_alloc(po->proto.pool, po->lsab_size);
}
return ((byte *) po->lsab) + offset;
}
@@ -233,6 +234,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length)
WALK_LIST(ifa, po->iface_list)
{
int net_lsa = 0;
+ u32 link_cost = po->stub_router ? 0xffff : ifa->cost;
if ((ifa->type == OSPF_IT_VLINK) && (ifa->voa == oa) &&
(!EMPTY_LIST(ifa->neigh_list)))
@@ -258,9 +260,17 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length)
ln = lsab_alloc(po, sizeof(struct ospf_lsa_rt_link));
ln->type = LSART_PTP;
ln->id = neigh->rid;
- ln->data = (ifa->addr->flags & IA_PEER) ?
- ifa->iface->index : ipa_to_u32(ifa->addr->ip);
- ln->metric = ifa->cost;
+
+ /*
+ * ln->data should be ifa->iface_id in case of no/ptp
+ * address (ifa->addr->flags & IA_PEER) on PTP link (see
+ * RFC 2328 12.4.1.1.), but the iface ID value has no use,
+ * while using IP address even in this case is here for
+ * compatibility with some broken implementations that use
+ * this address as a next-hop.
+ */
+ ln->data = ipa_to_u32(ifa->addr->ip);
+ ln->metric = link_cost;
ln->padding = 0;
i++;
}
@@ -274,7 +284,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length)
ln->type = LSART_NET;
ln->id = ipa_to_u32(ifa->drip);
ln->data = ipa_to_u32(ifa->addr->ip);
- ln->metric = ifa->cost;
+ ln->metric = link_cost;
ln->padding = 0;
i++;
net_lsa = 1;
@@ -289,7 +299,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length)
ln->type = LSART_VLNK;
ln->id = neigh->rid;
ln->data = ipa_to_u32(ifa->addr->ip);
- ln->metric = ifa->cost;
+ ln->metric = link_cost;
ln->padding = 0;
i++;
}
@@ -305,7 +315,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length)
/* Now we will originate stub area if there is no primary */
if (net_lsa ||
(ifa->type == OSPF_IT_VLINK) ||
- (ifa->addr->flags & IA_PEER) ||
+ ((ifa->addr->flags & IA_PEER) && ! ifa->cf->stub) ||
configured_stubnet(oa, ifa->addr))
continue;
@@ -368,7 +378,7 @@ add_lsa_rt_link(struct proto_ospf *po, struct ospf_iface *ifa, u8 type, u32 nif,
ln->type = type;
ln->padding = 0;
ln->metric = ifa->cost;
- ln->lif = ifa->iface->index;
+ ln->lif = ifa->iface_id;
ln->nif = nif;
ln->id = id;
}
@@ -546,7 +556,7 @@ originate_net_lsa_body(struct ospf_iface *ifa, u16 *length,
if (n->state == NEIGHBOR_FULL)
{
#ifdef OSPFv3
- en = ospf_hash_find(po->gr, ifa->iface->index, n->iface_id, n->rid, LSA_T_LINK);
+ en = ospf_hash_find(po->gr, ifa->iface_id, n->iface_id, n->rid, LSA_T_LINK);
if (en)
options |= ((struct ospf_lsa_link *) en->lsa_body)->options;
#endif
@@ -596,7 +606,7 @@ originate_net_lsa(struct ospf_iface *ifa)
lsa.options = ifa->oa->options;
lsa.id = ipa_to_u32(ifa->addr->ip);
#else /* OSPFv3 */
- lsa.id = ifa->iface->index;
+ lsa.id = ifa->iface_id;
#endif
lsa.rt = po->router_id;
@@ -1207,10 +1217,10 @@ originate_link_lsa(struct ospf_iface *ifa)
lsa.age = 0;
lsa.type = LSA_T_LINK;
- lsa.id = ifa->iface->index;
+ lsa.id = ifa->iface_id;
lsa.rt = po->router_id;
lsa.sn = get_seqnum(ifa->link_lsa);
- u32 dom = ifa->iface->index;
+ u32 dom = ifa->iface_id;
body = originate_link_lsa_body(ifa, &lsa.length);
lsasum_calculate(&lsa, body);
@@ -1249,7 +1259,6 @@ originate_prefix_rt_lsa_body(struct ospf_area *oa, u16 *length)
struct ospf_config *cf = (struct ospf_config *) (po->proto.cf);
struct ospf_iface *ifa;
struct ospf_lsa_prefix *lp;
- struct ifa *vlink_addr = NULL;
int host_addr = 0;
int net_lsa;
int i = 0;
@@ -1263,7 +1272,7 @@ originate_prefix_rt_lsa_body(struct ospf_area *oa, u16 *length)
WALK_LIST(ifa, po->iface_list)
{
- if ((ifa->oa != oa) || (ifa->state == OSPF_IS_DOWN))
+ if ((ifa->oa != oa) || (ifa->type == OSPF_IT_VLINK) || (ifa->state == OSPF_IS_DOWN))
continue;
ifa->px_pos_beg = i;
@@ -1282,9 +1291,6 @@ originate_prefix_rt_lsa_body(struct ospf_area *oa, u16 *length)
(a->scope <= SCOPE_LINK))
continue;
- if (!vlink_addr)
- vlink_addr = a;
-
if (((a->pxlen < MAX_PREFIX_LENGTH) && net_lsa) ||
configured_stubnet(oa, a))
continue;
@@ -1304,23 +1310,41 @@ originate_prefix_rt_lsa_body(struct ospf_area *oa, u16 *length)
ifa->px_pos_end = i;
}
- /* If there are some configured vlinks, add some global address,
- which will be used as a vlink endpoint. */
- if (!EMPTY_LIST(cf->vlink_list) && !host_addr && vlink_addr)
- {
- lsa_put_prefix(po, vlink_addr->ip, MAX_PREFIX_LENGTH, 0);
- i++;
- }
-
struct ospf_stubnet_config *sn;
if (oa->ac)
WALK_LIST(sn, oa->ac->stubnet_list)
if (!sn->hidden)
{
lsa_put_prefix(po, sn->px.addr, sn->px.len, sn->cost);
+ if (sn->px.len == MAX_PREFIX_LENGTH)
+ host_addr = 1;
+ i++;
+ }
+
+ /* If there are some configured vlinks, find some global address
+ (even from another area), which will be used as a vlink endpoint. */
+ if (!EMPTY_LIST(cf->vlink_list) && !host_addr)
+ {
+ WALK_LIST(ifa, po->iface_list)
+ {
+ if ((ifa->type == OSPF_IT_VLINK) || (ifa->state == OSPF_IS_DOWN))
+ continue;
+
+ struct ifa *a;
+ WALK_LIST(a, ifa->iface->addrs)
+ {
+ if ((a->flags & IA_SECONDARY) || (a->scope <= SCOPE_LINK))
+ continue;
+
+ /* Found some IP */
+ lsa_put_prefix(po, a->ip, MAX_PREFIX_LENGTH, 0);
i++;
+ goto done;
}
+ }
+ }
+ done:
lp = po->lsab;
lp->pxcount = i;
*length = po->lsab_used + sizeof(struct ospf_lsa_header);
@@ -1389,15 +1413,12 @@ add_prefix(struct proto_ospf *po, u32 *px, int offset, int *pxc)
{
u32 *pxl = lsab_offset(po, offset);
int i;
- for (i = 0; i < *pxc; i++)
+ for (i = 0; i < *pxc; pxl = prefix_advance(pxl), i++)
+ if (prefix_same(px, pxl))
{
- if (prefix_same(px, pxl))
- {
- /* Options should be logically OR'ed together */
- *pxl |= *px;
- return;
- }
- pxl = prefix_advance(pxl);
+ /* Options should be logically OR'ed together */
+ *pxl |= (*px & 0x00FF0000);
+ return;
}
ASSERT(pxl == lsab_end(po));
@@ -1405,6 +1426,7 @@ add_prefix(struct proto_ospf *po, u32 *px, int offset, int *pxc)
int pxspace = prefix_space(px);
pxl = lsab_alloc(po, pxspace);
memcpy(pxl, px, pxspace);
+ *pxl &= 0xFFFF0000; /* Set metric to zero */
(*pxc)++;
}
@@ -1415,11 +1437,21 @@ add_link_lsa(struct proto_ospf *po, struct top_hash_entry *en, int offset, int *
u32 *pxb = ll->rest;
int j;
- for (j = 0; j < ll->pxcount; j++)
- {
- add_prefix(po, pxb, offset, pxc);
- pxb = prefix_advance(pxb);
- }
+ for (j = 0; j < ll->pxcount; pxb = prefix_advance(pxb), j++)
+ {
+ u8 pxlen = (pxb[0] >> 24);
+ u8 pxopts = (pxb[0] >> 16);
+
+ /* Skip NU or LA prefixes */
+ if (pxopts & (OPT_PX_NU | OPT_PX_LA))
+ continue;
+
+ /* Skip link-local prefixes */
+ if ((pxlen >= 10) && ((pxb[1] & 0xffc00000) == 0xfe800000))
+ continue;
+
+ add_prefix(po, pxb, offset, pxc);
+ }
}
@@ -1449,7 +1481,7 @@ originate_prefix_net_lsa_body(struct ospf_iface *ifa, u16 *length)
WALK_LIST(n, ifa->neigh_list)
if ((n->state == NEIGHBOR_FULL) &&
- (en = ospf_hash_find(po->gr, ifa->iface->index, n->iface_id, n->rid, LSA_T_LINK)))
+ (en = ospf_hash_find(po->gr, ifa->iface_id, n->iface_id, n->rid, LSA_T_LINK)))
add_link_lsa(po, en, offset, &pxc);
lp = po->lsab;
@@ -1471,7 +1503,7 @@ originate_prefix_net_lsa(struct ospf_iface *ifa)
lsa.age = 0;
lsa.type = LSA_T_PREFIX;
- lsa.id = ifa->iface->index;
+ lsa.id = ifa->iface_id;
lsa.rt = po->router_id;
lsa.sn = get_seqnum(ifa->pxn_lsa);
u32 dom = ifa->oa->areaid;
@@ -1642,7 +1674,7 @@ ospf_lsa_domain(u32 type, struct ospf_iface *ifa)
switch (type & LSA_SCOPE_MASK)
{
case LSA_SCOPE_LINK:
- return ifa->iface->index;
+ return ifa->iface_id;
case LSA_SCOPE_AREA:
return ifa->oa->areaid;
diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c
index 5bae8614..2e206038 100644
--- a/proto/pipe/pipe.c
+++ b/proto/pipe/pipe.c
@@ -210,6 +210,11 @@ pipe_postconfig(struct proto_config *C)
cf_error("Name of peer routing table not specified");
if (c->peer == C->table)
cf_error("Primary table and peer table must be different");
+
+ if (C->in_keep_filtered)
+ cf_error("Pipe protocol prohibits keeping filtered routes");
+ if (C->rx_limit)
+ cf_error("Pipe protocol does not support receive limits");
}
extern int proto_reconfig_type;
diff --git a/proto/radv/config.Y b/proto/radv/config.Y
index abccd2c7..ff70a2f7 100644
--- a/proto/radv/config.Y
+++ b/proto/radv/config.Y
@@ -30,9 +30,9 @@ CF_KEYWORDS(RADV, PREFIX, INTERFACE, MIN, MAX, RA, DELAY, INTERVAL,
MANAGED, OTHER, CONFIG, LINK, MTU, REACHABLE, TIME, RETRANS,
TIMER, CURRENT, HOP, LIMIT, DEFAULT, VALID, PREFERRED, MULT,
LIFETIME, SKIP, ONLINK, AUTONOMOUS, RDNSS, DNSSL, NS, DOMAIN,
- LOCAL)
+ LOCAL, TRIGGER, SENSITIVE)
-%type<i> radv_mult
+%type<i> radv_mult radv_sensitive
CF_GRAMMAR
@@ -53,6 +53,11 @@ radv_proto_item:
| PREFIX radv_prefix { add_tail(&RADV_CFG->pref_list, NODE this_radv_prefix); }
| RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_CFG->rdnss_list, &radv_dns_list); }
| DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_CFG->dnssl_list, &radv_dns_list); }
+ | TRIGGER prefix {
+ RADV_CFG->trigger_prefix = $2.addr;
+ RADV_CFG->trigger_pxlen = $2.len;
+ RADV_CFG->trigger_valid = 1;
+ }
;
radv_proto_opts:
@@ -78,6 +83,7 @@ radv_iface_start:
RADV_IFACE->min_delay = DEFAULT_MIN_DELAY;
RADV_IFACE->current_hop_limit = DEFAULT_CURRENT_HOP_LIMIT;
RADV_IFACE->default_lifetime = -1;
+ RADV_IFACE->default_lifetime_sensitive = 1;
};
radv_iface_item:
@@ -90,7 +96,11 @@ radv_iface_item:
| REACHABLE TIME expr { RADV_IFACE->reachable_time = $3; if (($3 < 0) || ($3 > 3600000)) cf_error("Reachable time must be in range 0-3600000"); }
| RETRANS TIMER expr { RADV_IFACE->retrans_timer = $3; if ($3 < 0) cf_error("Retrans timer must be 0 or positive"); }
| CURRENT HOP LIMIT expr { RADV_IFACE->current_hop_limit = $4; if (($4 < 0) || ($4 > 255)) cf_error("Current hop limit must be in range 0-255"); }
- | DEFAULT LIFETIME expr { RADV_IFACE->default_lifetime = $3; if (($3 < 0) || ($3 > 9000)) cf_error("Default lifetime must be in range 0-9000"); }
+ | DEFAULT LIFETIME expr radv_sensitive {
+ RADV_IFACE->default_lifetime = $3;
+ if (($3 < 0) || ($3 > 9000)) cf_error("Default lifetime must be in range 0-9000");
+ if ($4 != -1) RADV_IFACE->default_lifetime_sensitive = $4;
+ }
| PREFIX radv_prefix { add_tail(&RADV_IFACE->pref_list, NODE this_radv_prefix); }
| RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_IFACE->rdnss_list, &radv_dns_list); }
| DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_IFACE->dnssl_list, &radv_dns_list); }
@@ -103,7 +113,7 @@ radv_iface_finish:
struct radv_iface_config *ic = RADV_IFACE;
if (ic->min_ra_int == (u32) -1)
- ic->min_ra_int = _MAX(ic->max_ra_int / 3, 3);
+ ic->min_ra_int = MAX_(ic->max_ra_int / 3, 3);
if (ic->default_lifetime == (u32) -1)
ic->default_lifetime = 3 * ic->max_ra_int;
@@ -147,14 +157,25 @@ radv_prefix_item:
SKIP bool { RADV_PREFIX->skip = $2; }
| ONLINK bool { RADV_PREFIX->onlink = $2; }
| AUTONOMOUS bool { RADV_PREFIX->autonomous = $2; }
- | VALID LIFETIME expr { RADV_PREFIX->valid_lifetime = $3; if ($3 < 0) cf_error("Valid lifetime must be 0 or positive"); }
- | PREFERRED LIFETIME expr { RADV_PREFIX->preferred_lifetime = $3; if ($3 < 0) cf_error("Preferred lifetime must be 0 or positive"); }
+ | VALID LIFETIME expr radv_sensitive {
+ RADV_PREFIX->valid_lifetime = $3;
+ if ($3 < 0) cf_error("Valid lifetime must be 0 or positive");
+ if ($4 != -1) RADV_PREFIX->valid_lifetime_sensitive = $4;
+ }
+ | PREFERRED LIFETIME expr radv_sensitive {
+ RADV_PREFIX->preferred_lifetime = $3;
+ if ($3 < 0) cf_error("Preferred lifetime must be 0 or positive");
+ if ($4 != -1) RADV_PREFIX->preferred_lifetime_sensitive = $4;
+ }
;
radv_prefix_finish:
{
if (RADV_PREFIX->preferred_lifetime > RADV_PREFIX->valid_lifetime)
cf_error("Preferred lifetime must be at most Valid lifetime");
+
+ if (RADV_PREFIX->valid_lifetime_sensitive > RADV_PREFIX->preferred_lifetime_sensitive)
+ cf_error("Valid lifetime sensitive requires that Preferred lifetime is sensitive too");
};
radv_prefix_opts:
@@ -268,6 +289,11 @@ radv_mult:
| MULT expr { $$ = 0; radv_mult_val = $2; if (($2 < 1) || ($2 > 254)) cf_error("Multiplier must be in range 1-254"); }
;
+radv_sensitive:
+ /* empty */ { $$ = -1; }
+ | SENSITIVE bool { $$ = $2; }
+ ;
+
CF_CODE
CF_END
diff --git a/proto/radv/packets.c b/proto/radv/packets.c
index 6fdfcaa3..38abaa4c 100644
--- a/proto/radv/packets.c
+++ b/proto/radv/packets.c
@@ -240,6 +240,7 @@ radv_prepare_ra(struct radv_iface *ifa)
{
struct proto_radv *ra = ifa->ra;
struct radv_config *cf = (struct radv_config *) (ra->p.cf);
+ struct radv_iface_config *ic = ifa->cf;
char *buf = ifa->sk->tbuf;
char *bufstart = buf;
@@ -249,21 +250,22 @@ radv_prepare_ra(struct radv_iface *ifa)
pkt->type = ICMPV6_RA;
pkt->code = 0;
pkt->checksum = 0;
- pkt->current_hop_limit = ifa->cf->current_hop_limit;
- pkt->flags = (ifa->cf->managed ? OPT_RA_MANAGED : 0) |
- (ifa->cf->other_config ? OPT_RA_OTHER_CFG : 0);
- pkt->router_lifetime = htons(ifa->cf->default_lifetime);
- pkt->reachable_time = htonl(ifa->cf->reachable_time);
- pkt->retrans_timer = htonl(ifa->cf->retrans_timer);
+ pkt->current_hop_limit = ic->current_hop_limit;
+ pkt->flags = (ic->managed ? OPT_RA_MANAGED : 0) |
+ (ic->other_config ? OPT_RA_OTHER_CFG : 0);
+ pkt->router_lifetime = (ra->active || !ic->default_lifetime_sensitive) ?
+ htons(ic->default_lifetime) : 0;
+ pkt->reachable_time = htonl(ic->reachable_time);
+ pkt->retrans_timer = htonl(ic->retrans_timer);
buf += sizeof(*pkt);
- if (ifa->cf->link_mtu)
+ if (ic->link_mtu)
{
struct radv_opt_mtu *om = (void *) buf;
om->type = OPT_MTU;
om->length = 1;
om->reserved = 0;
- om->mtu = htonl(ifa->cf->link_mtu);
+ om->mtu = htonl(ic->link_mtu);
buf += sizeof (*om);
}
@@ -288,26 +290,28 @@ radv_prepare_ra(struct radv_iface *ifa)
op->pxlen = addr->pxlen;
op->flags = (pc->onlink ? OPT_PX_ONLINK : 0) |
(pc->autonomous ? OPT_PX_AUTONOMOUS : 0);
- op->valid_lifetime = htonl(pc->valid_lifetime);
- op->preferred_lifetime = htonl(pc->preferred_lifetime);
+ op->valid_lifetime = (ra->active || !pc->valid_lifetime_sensitive) ?
+ htonl(pc->valid_lifetime) : 0;
+ op->preferred_lifetime = (ra->active || !pc->preferred_lifetime_sensitive) ?
+ htonl(pc->preferred_lifetime) : 0;
op->reserved = 0;
op->prefix = addr->prefix;
ipa_hton(op->prefix);
buf += sizeof(*op);
}
- if (! ifa->cf->rdnss_local)
+ if (! ic->rdnss_local)
if (radv_prepare_rdnss(ifa, &cf->rdnss_list, &buf, bufend) < 0)
goto done;
- if (radv_prepare_rdnss(ifa, &ifa->cf->rdnss_list, &buf, bufend) < 0)
+ if (radv_prepare_rdnss(ifa, &ic->rdnss_list, &buf, bufend) < 0)
goto done;
- if (! ifa->cf->dnssl_local)
+ if (! ic->dnssl_local)
if (radv_prepare_dnssl(ifa, &cf->dnssl_list, &buf, bufend) < 0)
goto done;
- if (radv_prepare_dnssl(ifa, &ifa->cf->dnssl_list, &buf, bufend) < 0)
+ if (radv_prepare_dnssl(ifa, &ic->dnssl_list, &buf, bufend) < 0)
goto done;
done:
@@ -391,7 +395,7 @@ static void
radv_err_hook(sock *sk, int err)
{
struct radv_iface *ifa = sk->data;
- log(L_ERR "%s: Socket error: %m", ifa->ra->p.name, err);
+ log(L_ERR "%s: Socket error on %s: %M", ifa->ra->p.name, ifa->iface->name, err);
}
int
diff --git a/proto/radv/radv.c b/proto/radv/radv.c
index 5e7296a3..90408536 100644
--- a/proto/radv/radv.c
+++ b/proto/radv/radv.c
@@ -15,7 +15,7 @@
* The RAdv protocol is implemented in two files: |radv.c| containing
* the interface with BIRD core and the protocol logic and |packets.c|
* handling low level protocol stuff (RX, TX and packet formats).
- * The protocol does not import or export any routes.
+ * The protocol does not export any routes.
*
* The RAdv is structured in the usual way - for each handled interface
* there is a structure &radv_iface that contains a state related to
@@ -30,6 +30,13 @@
* by RA_EV_* codes), and radv_timer(), which triggers sending RAs and
* computes the next timeout.
*
+ * The RAdv protocol could receive routes (through
+ * radv_import_control() and radv_rt_notify()), but only the
+ * configured trigger route is tracked (in &active var). When a radv
+ * protocol is reconfigured, the connected routing table is examined
+ * (in radv_check_active()) to have proper &active value in case of
+ * the specified trigger prefix was changed.
+ *
* Supported standards:
* - RFC 4861 - main RA standard
* - RFC 6106 - DNS extensions (RDDNS, DNSSL)
@@ -93,6 +100,16 @@ radv_iface_notify(struct radv_iface *ifa, int event)
tm_start(ifa->timer, after);
}
+static void
+radv_iface_notify_all(struct proto_radv *ra, int event)
+{
+ struct radv_iface *ifa;
+
+ WALK_LIST(ifa, ra->iface_list)
+ radv_iface_notify(ifa, event);
+}
+
+
static struct radv_iface *
radv_iface_find(struct proto_radv *ra, struct iface *what)
{
@@ -238,11 +255,68 @@ radv_ifa_notify(struct proto *p, unsigned flags, struct ifa *a)
radv_iface_notify(ifa, RA_EV_CHANGE);
}
+static inline int radv_net_match_trigger(struct radv_config *cf, net *n)
+{
+ return cf->trigger_valid &&
+ (n->n.pxlen == cf->trigger_pxlen) &&
+ ipa_equal(n->n.prefix, cf->trigger_prefix);
+}
+
+int
+radv_import_control(struct proto *p, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED)
+{
+ // struct proto_radv *ra = (struct proto_radv *) p;
+ struct radv_config *cf = (struct radv_config *) (p->cf);
+
+ if (radv_net_match_trigger(cf, (*new)->net))
+ return RIC_PROCESS;
+
+ return RIC_DROP;
+}
+
+static void
+radv_rt_notify(struct proto *p, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs UNUSED)
+{
+ struct proto_radv *ra = (struct proto_radv *) p;
+ struct radv_config *cf = (struct radv_config *) (p->cf);
+
+ if (radv_net_match_trigger(cf, n))
+ {
+ u8 old_active = ra->active;
+ ra->active = !!new;
+
+ if (ra->active == old_active)
+ return;
+
+ if (ra->active)
+ RADV_TRACE(D_EVENTS, "Triggered");
+ else
+ RADV_TRACE(D_EVENTS, "Suppressed");
+
+ radv_iface_notify_all(ra, RA_EV_CHANGE);
+ }
+}
+
+static int
+radv_check_active(struct proto_radv *ra)
+{
+ struct radv_config *cf = (struct radv_config *) (ra->p.cf);
+
+ if (! cf->trigger_valid)
+ return 1;
+
+ return rt_examine(ra->p.table, cf->trigger_prefix, cf->trigger_pxlen,
+ &(ra->p), ra->p.cf->out_filter);
+}
+
static struct proto *
radv_init(struct proto_config *c)
{
struct proto *p = proto_new(c, sizeof(struct proto_radv));
+ p->accept_ra_types = RA_OPTIMAL;
+ p->import_control = radv_import_control;
+ p->rt_notify = radv_rt_notify;
p->if_notify = radv_if_notify;
p->ifa_notify = radv_ifa_notify;
return p;
@@ -252,9 +326,10 @@ static int
radv_start(struct proto *p)
{
struct proto_radv *ra = (struct proto_radv *) p;
- // struct radv_config *cf = (struct radv_config *) (p->cf);
+ struct radv_config *cf = (struct radv_config *) (p->cf);
init_list(&(ra->iface_list));
+ ra->active = !cf->trigger_valid;
return PS_UP;
}
@@ -293,6 +368,9 @@ radv_reconfigure(struct proto *p, struct proto_config *c)
* causing nodes to temporary remove their default routes.
*/
+ p->cf = c; /* radv_check_active() requires proper p->cf */
+ ra->active = radv_check_active(ra);
+
struct iface *iface;
WALK_LIST(iface, iface_list)
{
@@ -335,6 +413,14 @@ radv_copy_config(struct proto_config *dest, struct proto_config *src)
cfg_copy_list(&d->pref_list, &s->pref_list, sizeof(struct radv_prefix_config));
}
+static void
+radv_get_status(struct proto *p, byte *buf)
+{
+ struct proto_radv *ra = (struct proto_radv *) p;
+
+ if (!ra->active)
+ strcpy(buf, "Suppressed");
+}
struct protocol proto_radv = {
.name = "RAdv",
@@ -343,5 +429,6 @@ struct protocol proto_radv = {
.start = radv_start,
.shutdown = radv_shutdown,
.reconfigure = radv_reconfigure,
- .copy_config = radv_copy_config
+ .copy_config = radv_copy_config,
+ .get_status = radv_get_status
};
diff --git a/proto/radv/radv.h b/proto/radv/radv.h
index 48af8c00..f80e4530 100644
--- a/proto/radv/radv.h
+++ b/proto/radv/radv.h
@@ -52,6 +52,10 @@ struct radv_config
list pref_list; /* Global list of prefix configs (struct radv_prefix_config) */
list rdnss_list; /* Global list of RDNSS configs (struct radv_rdnss_config) */
list dnssl_list; /* Global list of DNSSL configs (struct radv_dnssl_config) */
+
+ ip_addr trigger_prefix; /* Prefix of a trigger route, if defined */
+ u8 trigger_pxlen; /* Pxlen of a trigger route, if defined */
+ u8 trigger_valid; /* Whether a trigger route is defined */
};
struct radv_iface_config
@@ -75,6 +79,7 @@ struct radv_iface_config
u32 retrans_timer;
u32 current_hop_limit;
u32 default_lifetime;
+ u8 default_lifetime_sensitive; /* Whether default_lifetime depends on trigger */
};
struct radv_prefix_config
@@ -88,6 +93,8 @@ struct radv_prefix_config
u8 autonomous;
u32 valid_lifetime;
u32 preferred_lifetime;
+ u8 valid_lifetime_sensitive; /* Whether valid_lifetime depends on trigger */
+ u8 preferred_lifetime_sensitive; /* Whether preferred_lifetime depends on trigger */
};
struct radv_rdnss_config
@@ -113,6 +120,7 @@ struct proto_radv
{
struct proto p;
list iface_list; /* List of active ifaces */
+ u8 active; /* Whether radv is active w.r.t. triggers */
};
struct radv_iface
diff --git a/proto/rip/config.Y b/proto/rip/config.Y
index cd4f30e7..791c43a2 100644
--- a/proto/rip/config.Y
+++ b/proto/rip/config.Y
@@ -22,12 +22,18 @@ CF_DEFINES
#define RIP_CFG ((struct rip_proto_config *) this_proto)
#define RIP_IPATT ((struct rip_patt *) this_ipatt)
+#ifdef IPV6
+#define RIP_DEFAULT_TTL_SECURITY 2
+#else
+#define RIP_DEFAULT_TTL_SECURITY 0
+#endif
+
CF_DECLS
CF_KEYWORDS(RIP, INFINITY, METRIC, PORT, PERIOD, GARBAGE, TIMEOUT,
MODE, BROADCAST, MULTICAST, QUIET, NOLISTEN, VERSION1,
- AUTHENTICATION, NONE, PLAINTEXT, MD5,
- HONOR, NEVER, NEIGHBOR, ALWAYS,
+ AUTHENTICATION, NONE, PLAINTEXT, MD5, TTL, SECURITY,
+ HONOR, NEVER, NEIGHBOR, ALWAYS, TX, PRIORITY, ONLY,
RIP_METRIC, RIP_TAG)
%type <i> rip_mode rip_auth
@@ -76,6 +82,10 @@ rip_mode:
rip_iface_item:
| METRIC expr { RIP_IPATT->metric = $2; }
| MODE rip_mode { RIP_IPATT->mode |= $2; }
+ | TX tos { RIP_IPATT->tx_tos = $2; }
+ | TX PRIORITY expr { RIP_IPATT->tx_priority = $3; }
+ | TTL SECURITY bool { RIP_IPATT->ttl_security = $3; }
+ | TTL SECURITY TX ONLY { RIP_IPATT->ttl_security = 2; }
;
rip_iface_opts:
@@ -94,6 +104,9 @@ rip_iface_init:
add_tail(&RIP_CFG->iface_list, NODE this_ipatt);
init_list(&this_ipatt->ipn_list);
RIP_IPATT->metric = 1;
+ RIP_IPATT->tx_tos = IP_PREC_INTERNET_CONTROL;
+ RIP_IPATT->tx_priority = sk_priority_control;
+ RIP_IPATT->ttl_security = RIP_DEFAULT_TTL_SECURITY;
}
;
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index 9f4f0856..5cc40403 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -6,15 +6,14 @@
*
* Can be freely distributed and used under the terms of the GNU GPL.
*
- FIXME: IpV6 support: packet size
- FIXME: (nonurgent) IpV6 support: receive "route using" blocks
- FIXME: (nonurgent) IpV6 support: generate "nexthop" blocks
- next hops are only advisory, and they are pretty ugly in IpV6.
+ FIXME: IPv6 support: packet size
+ FIXME: (nonurgent) IPv6 support: receive "route using" blocks
+ FIXME: (nonurgent) IPv6 support: generate "nexthop" blocks
+ next hops are only advisory, and they are pretty ugly in IPv6.
I suggest just forgetting about them.
FIXME: (nonurgent): fold rip_connection into rip_interface?
- FIXME: (nonurgent) allow bigger frequencies than 1 regular update in 6 seconds (?)
FIXME: propagation of metric=infinity into main routing table may or may not be good idea.
*/
@@ -47,6 +46,7 @@
*/
#undef LOCAL_DEBUG
+#define LOCAL_DEBUG 1
#include "nest/bird.h"
#include "nest/iface.h"
@@ -59,11 +59,11 @@
#include "lib/string.h"
#include "rip.h"
-#include <assert.h>
#define P ((struct rip_proto *) p)
#define P_CF ((struct rip_proto_config *)p->cf)
+#undef TRACE
#define TRACE(level, msg, args...) do { if (p->debug & level) { log(L_TRACE "%s: " msg, p->name , ## args); } } while(0)
static struct rip_interface *new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_patt *patt);
@@ -163,7 +163,7 @@ rip_tx( sock *s )
FIB_ITERATE_START(&P->rtable, &c->iter, z) {
struct rip_entry *e = (struct rip_entry *) z;
- if (!rif->triggered || (!(e->updated < now-5))) {
+ if (!rif->triggered || (!(e->updated < now-2))) { /* FIXME: Should be probably 1 or some different algorithm */
nullupdate = 0;
i = rip_tx_prepare( p, packet->block + i, e, rif, i );
if (i >= maxi) {
@@ -361,26 +361,26 @@ advertise_entry( struct proto *p, struct rip_block *b, ip_addr whotoldme, struct
static void
process_block( struct proto *p, struct rip_block *block, ip_addr whotoldme, struct iface *iface )
{
+ int metric, pxlen;
+
#ifndef IPV6
- int metric = ntohl( block->metric );
+ metric = ntohl( block->metric );
+ pxlen = ipa_mklen(block->netmask);
#else
- int metric = block->metric;
+ metric = block->metric;
+ pxlen = block->pxlen;
#endif
ip_addr network = block->network;
CHK_MAGIC;
-#ifdef IPV6
- TRACE(D_ROUTES, "block: %I tells me: %I/%d available, metric %d... ",
- whotoldme, network, block->pxlen, metric );
-#else
+
TRACE(D_ROUTES, "block: %I tells me: %I/%d available, metric %d... ",
- whotoldme, network, ipa_mklen(block->netmask), metric );
-#endif
+ whotoldme, network, pxlen, metric );
if ((!metric) || (metric > P_CF->infinity)) {
-#ifdef IPV6 /* Someone is sedning us nexthop and we are ignoring it */
+#ifdef IPV6 /* Someone is sending us nexthop and we are ignoring it */
if (metric == 0xff)
- { DBG( "IpV6 nexthop ignored" ); return; }
+ { DBG( "IPv6 nexthop ignored" ); return; }
#endif
log( L_WARN "%s: Got metric %d from %I", p->name, metric, whotoldme );
return;
@@ -483,6 +483,14 @@ rip_rx(sock *s, int size)
iface = i->iface;
#endif
+ if (i->check_ttl && (s->ttl < 255))
+ {
+ log( L_REMOTE "%s: Discarding packet with TTL %d (< 255) from %I on %s",
+ p->name, s->ttl, s->faddr, i->iface->name);
+ return 1;
+ }
+
+
CHK_MAGIC;
DBG( "RIP: message came: %d bytes from %I via %s\n", size, s->faddr, i->iface ? i->iface->name : "(dummy)" );
size -= sizeof( struct rip_packet_heading );
@@ -535,13 +543,10 @@ rip_timer(timer *t)
WALK_LIST_DELSAFE( e, et, P->garbage ) {
rte *rte;
rte = SKIP_BACK( struct rte, u.rip.garbage, e );
-#ifdef LOCAL_DEBUG
- {
- struct proto *p = rte->attrs->proto;
- CHK_MAGIC;
- }
+
+ CHK_MAGIC;
+
DBG( "Garbage: (%p)", rte ); rte_dump( rte );
-#endif
if (now - rte->lastmod > P_CF->timeout_time) {
TRACE(D_EVENTS, "entry is too old: %I", rte->net->n.prefix );
@@ -560,17 +565,23 @@ rip_timer(timer *t)
DBG( "RIP: Broadcasting routing tables\n" );
{
struct rip_interface *rif;
+
+ if ( P_CF->period > 2 ) { /* Bring some randomness into sending times */
+ if (! (P->tx_count % P_CF->period)) P->rnd_count = random_u32() % 2;
+ } else P->rnd_count = P->tx_count % P_CF->period;
+
WALK_LIST( rif, P->interfaces ) {
struct iface *iface = rif->iface;
if (!iface) continue;
if (rif->mode & IM_QUIET) continue;
if (!(iface->flags & IF_UP)) continue;
+ rif->triggered = P->rnd_count;
- rif->triggered = (P->tx_count % 6);
rip_sendto( p, IPA_NONE, 0, rif );
}
- P->tx_count ++;
+ P->tx_count++;
+ P->rnd_count--;
}
DBG( "RIP: tick tock done\n" );
@@ -585,9 +596,9 @@ rip_start(struct proto *p)
struct rip_interface *rif;
DBG( "RIP: starting instance...\n" );
- assert( sizeof(struct rip_packet_heading) == 4);
- assert( sizeof(struct rip_block) == 20);
- assert( sizeof(struct rip_block_auth) == 20);
+ ASSERT(sizeof(struct rip_packet_heading) == 4);
+ ASSERT(sizeof(struct rip_block) == 20);
+ ASSERT(sizeof(struct rip_block_auth) == 20);
#ifdef LOCAL_DEBUG
P->magic = RIP_MAGIC;
@@ -598,10 +609,9 @@ rip_start(struct proto *p)
init_list( &P->interfaces );
P->timer = tm_new( p->pool );
P->timer->data = p;
- P->timer->randomize = 5;
- P->timer->recurrent = (P_CF->period / 6)+1;
+ P->timer->recurrent = 1;
P->timer->hook = rip_timer;
- tm_start( P->timer, 5 );
+ tm_start( P->timer, 2 );
rif = new_iface(p, NULL, 0, NULL); /* Initialize dummy interface */
add_head( &P->interfaces, NODE rif );
CHK_MAGIC;
@@ -677,6 +687,7 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_
rif->mode = PATT->mode;
rif->metric = PATT->metric;
rif->multicast = (!(PATT->mode & IM_BROADCAST)) && (flags & IF_MULTICAST);
+ rif->check_ttl = (PATT->ttl_security == 1);
}
/* lookup multicasts over unnumbered links - no: rip is not defined over unnumbered links */
@@ -697,9 +708,10 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_
rif->sock->dport = P_CF->port;
if (new)
{
- rif->sock->ttl = 1;
- rif->sock->tos = IP_PREC_INTERNET_CONTROL;
- rif->sock->flags = SKF_LADDR_RX;
+ rif->sock->tos = PATT->tx_tos;
+ rif->sock->priority = PATT->tx_priority;
+ rif->sock->ttl = PATT->ttl_security ? 255 : 1;
+ rif->sock->flags = SKF_LADDR_RX | (rif->check_ttl ? SKF_TTL_RX : 0);
}
if (new) {
@@ -948,9 +960,11 @@ rip_rte_insert(net *net UNUSED, rte *rte)
static void
rip_rte_remove(net *net UNUSED, rte *rte)
{
- // struct proto *p = rte->attrs->proto;
+#ifdef LOCAL_DEBUG
+ struct proto *p = rte->attrs->src->proto;
CHK_MAGIC;
DBG( "rip_rte_remove: %p\n", rte );
+#endif
rem_node( &rte->u.rip.garbage );
}
@@ -1000,7 +1014,9 @@ static int
rip_pat_compare(struct rip_patt *a, struct rip_patt *b)
{
return ((a->metric == b->metric) &&
- (a->mode == b->mode));
+ (a->mode == b->mode) &&
+ (a->tx_tos == b->tx_tos) &&
+ (a->tx_priority == b->tx_priority));
}
static int
diff --git a/proto/rip/rip.h b/proto/rip/rip.h
index 6a8af379..2a327260 100644
--- a/proto/rip/rip.h
+++ b/proto/rip/rip.h
@@ -114,6 +114,7 @@ struct rip_interface {
struct rip_connection *busy;
int metric; /* You don't want to put struct rip_patt *patt here -- think about reconfigure */
int mode;
+ int check_ttl; /* Check incoming packets for TTL 255 */
int triggered;
struct object_lock *lock;
int multicast;
@@ -128,6 +129,9 @@ struct rip_patt {
#define IM_QUIET 4
#define IM_NOLISTEN 8
#define IM_VERSION1 16
+ int tx_tos;
+ int tx_priority;
+ int ttl_security; /* bool + 2 for TX only (send, but do not check on RX) */
};
struct rip_proto_config {
@@ -162,6 +166,7 @@ struct rip_proto {
int magic;
#endif
int tx_count; /* Do one regular update once in a while */
+ int rnd_count; /* Randomize sending time */
};
#ifdef LOCAL_DEBUG
diff --git a/proto/static/config.Y b/proto/static/config.Y
index f8e84f92..2d9d4b42 100644
--- a/proto/static/config.Y
+++ b/proto/static/config.Y
@@ -18,7 +18,7 @@ static struct static_route *this_srt, *this_srt_nh, *last_srt_nh;
CF_DECLS
CF_KEYWORDS(STATIC, ROUTE, VIA, DROP, REJECT, PROHIBIT, PREFERENCE, CHECK, LINK)
-CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE)
+CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE)
CF_GRAMMAR
@@ -86,9 +86,12 @@ stat_route:
this_srt->dest = RTDX_RECURSIVE;
this_srt->via = $3;
}
- | stat_route0 DROP { this_srt->dest = RTD_BLACKHOLE; }
- | stat_route0 REJECT { this_srt->dest = RTD_UNREACHABLE; }
- | stat_route0 PROHIBIT { this_srt->dest = RTD_PROHIBIT; }
+
+ | stat_route0 DROP { this_srt->dest = RTD_BLACKHOLE; }
+ | stat_route0 REJECT { this_srt->dest = RTD_UNREACHABLE; }
+ | stat_route0 BLACKHOLE { this_srt->dest = RTD_BLACKHOLE; }
+ | stat_route0 UNREACHABLE { this_srt->dest = RTD_UNREACHABLE; }
+ | stat_route0 PROHIBIT { this_srt->dest = RTD_PROHIBIT; }
;
CF_CLI(SHOW STATIC, optsym, [<name>], [[Show details of static protocol]])
diff --git a/proto/static/static.c b/proto/static/static.c
index b6c8948f..9b115acd 100644
--- a/proto/static/static.c
+++ b/proto/static/static.c
@@ -461,7 +461,7 @@ static_reconfigure(struct proto *p, struct proto_config *new)
WALK_LIST(r, n->iface_routes)
{
struct iface *ifa;
- if (ifa = if_find_by_name(r->if_name))
+ if ((ifa = if_find_by_name(r->if_name)) && (ifa->flags & IF_UP))
static_install(p, r, ifa);
}
WALK_LIST(r, n->other_routes)
diff --git a/sysdep/autoconf.h.in b/sysdep/autoconf.h.in
index ac6f7a87..a9e46e27 100644
--- a/sysdep/autoconf.h.in
+++ b/sysdep/autoconf.h.in
@@ -39,10 +39,14 @@
#undef CONFIG_STATIC
#undef CONFIG_RIP
#undef CONFIG_RADV
+#undef CONFIG_BFD
#undef CONFIG_BGP
#undef CONFIG_OSPF
#undef CONFIG_PIPE
+/* We use multithreading */
+#undef USE_PTHREADS
+
/* We have <syslog.h> and syslog() */
#undef HAVE_SYSLOG
diff --git a/sysdep/bsd/Modules b/sysdep/bsd/Modules
index 3729587d..96455db7 100644
--- a/sysdep/bsd/Modules
+++ b/sysdep/bsd/Modules
@@ -1,3 +1,4 @@
krt-sock.c
+krt-sock.Y
krt-sys.h
sysio.h
diff --git a/sysdep/bsd/krt-sock.Y b/sysdep/bsd/krt-sock.Y
new file mode 100644
index 00000000..0218f188
--- /dev/null
+++ b/sysdep/bsd/krt-sock.Y
@@ -0,0 +1,32 @@
+/*
+ * BIRD -- BSD Kernel Syncer Configuration
+ *
+ * (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+CF_HDR
+
+CF_DECLS
+
+CF_KEYWORDS(KERNEL, TABLE)
+
+CF_GRAMMAR
+
+CF_ADDTO(kern_proto, kern_proto kern_sys_item ';')
+
+kern_sys_item:
+ KERNEL TABLE expr {
+ if ($3 && (krt_max_tables == 1))
+ cf_error("Multiple kernel routing tables not supported");
+ if ($3 < 0 || $3 >= krt_max_tables)
+ cf_error("Kernel table id must be in range 0-%d", krt_max_tables - 1);
+
+ THIS_KRT->sys.table_id = $3;
+ }
+ ;
+
+CF_CODE
+
+CF_END
diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c
index 8d45cbfe..69a476d9 100644
--- a/sysdep/bsd/krt-sock.c
+++ b/sysdep/bsd/krt-sock.c
@@ -1,5 +1,5 @@
/*
- * BIRD -- Unix Routing Table Syncing
+ * BIRD -- BSD Routing Table Syncing
*
* (c) 2004 Ondrej Filip <feela@network.cz>
*
@@ -7,6 +7,7 @@
*/
#include <stdio.h>
+#include <stdlib.h>
#include <ctype.h>
#include <fcntl.h>
#include <unistd.h>
@@ -34,18 +35,112 @@
#include "lib/socket.h"
-#ifndef RTAX_MAX
-#define RTAX_MAX 8
+/*
+ * There are significant differences in multiple tables support between BSD variants.
+ *
+ * OpenBSD has table_id field for routes in route socket protocol, therefore all
+ * tables could be managed by one kernel socket. FreeBSD lacks such field,
+ * therefore multiple sockets (locked to specific table using SO_SETFIB socket
+ * option) must be used.
+ *
+ * Both FreeBSD and OpenBSD uses separate scans for each table. In OpenBSD,
+ * table_id is specified explicitly as sysctl scan argument, while in FreeBSD it
+ * is handled implicitly by changing default table using setfib() syscall.
+ *
+ * KRT_SHARED_SOCKET - use shared kernel socked instead of one for each krt_proto
+ * KRT_USE_SETFIB_SCAN - use setfib() for sysctl() route scan
+ * KRT_USE_SETFIB_SOCK - use SO_SETFIB socket option for kernel sockets
+ * KRT_USE_SYSCTL_7 - use 7-th arg of sysctl() as table id for route scans
+ * KRT_USE_SYSCTL_NET_FIBS - use net.fibs sysctl() for dynamic max number of fibs
+ */
+
+#ifdef __FreeBSD__
+#define KRT_MAX_TABLES 256
+#define KRT_USE_SETFIB_SCAN
+#define KRT_USE_SETFIB_SOCK
+#define KRT_USE_SYSCTL_NET_FIBS
#endif
-struct ks_msg
+#ifdef __OpenBSD__
+#define KRT_MAX_TABLES (RT_TABLEID_MAX+1)
+#define KRT_SHARED_SOCKET
+#define KRT_USE_SYSCTL_7
+#endif
+
+#ifndef KRT_MAX_TABLES
+#define KRT_MAX_TABLES 1
+#endif
+
+
+
+/* Dynamic max number of tables */
+
+int krt_max_tables;
+
+#ifdef KRT_USE_SYSCTL_NET_FIBS
+
+static int
+krt_get_max_tables(void)
{
- struct rt_msghdr rtm;
- struct sockaddr_storage buf[RTAX_MAX];
-};
+ int fibs;
+ size_t fibs_len = sizeof(fibs);
+
+ if (sysctlbyname("net.fibs", &fibs, &fibs_len, NULL, 0) < 0)
+ {
+ log(L_WARN "KRT: unable to get max number of fib tables: %m");
+ return 1;
+ }
+
+ return MIN(fibs, KRT_MAX_TABLES);
+}
+
+#else
+
+static int
+krt_get_max_tables(void)
+{
+ return KRT_MAX_TABLES;
+}
+
+#endif /* KRT_USE_SYSCTL_NET_FIBS */
+
+
+/* setfib() syscall for FreeBSD scans */
+
+#ifdef KRT_USE_SETFIB_SCAN
+
+/*
+static int krt_default_fib;
+
+static int
+krt_get_active_fib(void)
+{
+ int fib;
+ size_t fib_len = sizeof(fib);
+
+ if (sysctlbyname("net.my_fibnum", &fib, &fib_len, NULL, 0) < 0)
+ {
+ log(L_WARN "KRT: unable to get active fib number: %m");
+ return 0;
+ }
+
+ return fib;
+}
+*/
+
+extern int setfib(int fib);
+
+#endif /* KRT_USE_SETFIB_SCAN */
+
+
+/* table_id -> krt_proto map */
+
+#ifdef KRT_SHARED_SOCKET
+static struct krt_proto *krt_table_map[KRT_MAX_TABLES];
+#endif
-static int rt_sock = 0;
+/* Route socket message processing */
int
krt_capable(rte *e)
@@ -65,6 +160,16 @@ krt_capable(rte *e)
);
}
+#ifndef RTAX_MAX
+#define RTAX_MAX 8
+#endif
+
+struct ks_msg
+{
+ struct rt_msghdr rtm;
+ struct sockaddr_storage buf[RTAX_MAX];
+};
+
#define ROUNDUP(a) \
((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
@@ -81,7 +186,7 @@ krt_capable(rte *e)
body += l;}
static int
-krt_sock_send(int cmd, rte *e)
+krt_send_route(struct krt_proto *p, int cmd, rte *e)
{
net *net = e->net;
rta *a = e->attrs;
@@ -103,13 +208,13 @@ krt_sock_send(int cmd, rte *e)
msg.rtm.rtm_flags = RTF_UP | RTF_PROTO1;
if (net->n.pxlen == MAX_PREFIX_LENGTH)
- {
msg.rtm.rtm_flags |= RTF_HOST;
- }
else
- {
msg.rtm.rtm_addrs |= RTA_NETMASK;
- }
+
+#ifdef KRT_SHARED_SOCKET
+ msg.rtm.rtm_tableid = KRT_CF->sys.table_id;
+#endif
#ifdef RTF_REJECT
if(a->dest == RTD_UNREACHABLE)
@@ -192,7 +297,7 @@ krt_sock_send(int cmd, rte *e)
l = body - (char *)&msg;
msg.rtm.rtm_msglen = l;
- if ((l = write(rt_sock, (char *)&msg, l)) < 0) {
+ if ((l = write(p->sys.sk->fd, (char *)&msg, l)) < 0) {
log(L_ERR "KRT: Error sending route %I/%d to kernel: %m", net->n.prefix, net->n.pxlen);
return -1;
}
@@ -201,16 +306,16 @@ krt_sock_send(int cmd, rte *e)
}
void
-krt_replace_rte(struct krt_proto *p UNUSED, net *n, rte *new, rte *old,
+krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old,
struct ea_list *eattrs UNUSED)
{
int err = 0;
if (old)
- krt_sock_send(RTM_DELETE, old);
+ krt_send_route(p, RTM_DELETE, old);
if (new)
- err = krt_sock_send(RTM_ADD, new);
+ err = krt_send_route(p, RTM_ADD, new);
if (err < 0)
n->n.flags |= KRF_SYNC_ERROR;
@@ -221,8 +326,10 @@ krt_replace_rte(struct krt_proto *p UNUSED, net *n, rte *new, rte *old,
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
static void
-krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan)
+krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan)
{
+ /* p is NULL iff KRT_SHARED_SOCKET and !scan */
+
rte *e;
net *net;
sockaddr dst, gate, mask;
@@ -244,6 +351,17 @@ krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan)
if (flags & RTF_LLINFO)
SKIP("link-local\n");
+#ifdef KRT_SHARED_SOCKET
+ if (!scan)
+ {
+ int table_id = msg->rtm.rtm_tableid;
+ p = (table_id < KRT_MAX_TABLES) ? krt_table_map[table_id] : NULL;
+
+ if (!p)
+ SKIP("unknown table id %d\n", table_id);
+ }
+#endif
+
GETADDR(&dst, RTA_DST);
GETADDR(&gate, RTA_GATEWAY);
GETADDR(&mask, RTA_NETMASK);
@@ -536,17 +654,25 @@ krt_read_addr(struct ks_msg *msg)
if ((masklen = ipa_mklen(imask)) < 0)
{
- log("Invalid masklen");
+ log(L_ERR "KIF: Invalid masklen %I for %s", imask, iface->name);
return;
}
- bzero(&ifa, sizeof(ifa));
+#ifdef IPV6
+ /* Clean up embedded interface ID returned in link-local address */
+
+ if (ipa_has_link_scope(iaddr))
+ _I0(iaddr) = 0xfe800000;
+
+ if (ipa_has_link_scope(ibrd))
+ _I0(ibrd) = 0xfe800000;
+#endif
- ifa.iface = iface;
- memcpy(&ifa.ip, &iaddr, sizeof(ip_addr));
+ bzero(&ifa, sizeof(ifa));
+ ifa.iface = iface;
+ ifa.ip = iaddr;
ifa.pxlen = masklen;
- memcpy(&ifa.brd, &ibrd, sizeof(ip_addr));
scope = ipa_classify(ifa.ip);
if (scope < 0)
@@ -556,24 +682,10 @@ krt_read_addr(struct ks_msg *msg)
}
ifa.scope = scope & IADDR_SCOPE_MASK;
-#ifdef IPV6
- /* Clean up embedded interface ID returned in link-local address */
- if (ipa_has_link_scope(ifa.ip))
- _I0(ifa.ip) = 0xfe800000;
-#endif
-
-#ifdef IPV6
- /* Why not the same check also for IPv4? */
- if ((iface->flags & IF_MULTIACCESS) || (masklen != BITS_PER_IP_ADDRESS))
-#else
- if (iface->flags & IF_MULTIACCESS)
-#endif
+ if (masklen < BITS_PER_IP_ADDRESS)
{
ifa.prefix = ipa_and(ifa.ip, ipa_mkmask(masklen));
- if (masklen == BITS_PER_IP_ADDRESS)
- ifa.flags |= IA_HOST;
-
if (masklen == (BITS_PER_IP_ADDRESS - 1))
ifa.opposite = ipa_opposite_m1(ifa.ip);
@@ -581,11 +693,22 @@ krt_read_addr(struct ks_msg *msg)
if (masklen == (BITS_PER_IP_ADDRESS - 2))
ifa.opposite = ipa_opposite_m2(ifa.ip);
#endif
+
+ if (iface->flags & IF_BROADCAST)
+ ifa.brd = ibrd;
+
+ if (!(iface->flags & IF_MULTIACCESS))
+ ifa.opposite = ibrd;
}
- else /* PtP iface */
+ else if (!(iface->flags & IF_MULTIACCESS) && ipa_nonzero(ibrd))
{
+ ifa.prefix = ifa.opposite = ibrd;
ifa.flags |= IA_PEER;
- ifa.prefix = ifa.opposite = ifa.brd;
+ }
+ else
+ {
+ ifa.prefix = ifa.ip;
+ ifa.flags |= IA_HOST;
}
if (new)
@@ -594,17 +717,18 @@ krt_read_addr(struct ks_msg *msg)
ifa_delete(&ifa);
}
-
-void
+static void
krt_read_msg(struct proto *p, struct ks_msg *msg, int scan)
{
+ /* p is NULL iff KRT_SHARED_SOCKET and !scan */
+
switch (msg->rtm.rtm_type)
{
case RTM_GET:
if(!scan) return;
case RTM_ADD:
case RTM_DELETE:
- krt_read_rt(msg, (struct krt_proto *)p, scan);
+ krt_read_route(msg, (struct krt_proto *)p, scan);
break;
case RTM_IFANNOUNCE:
krt_read_ifannounce(msg);
@@ -621,14 +745,57 @@ krt_read_msg(struct proto *p, struct ks_msg *msg, int scan)
}
}
+
+/* Sysctl based scans */
+
+static byte *krt_buffer;
+static size_t krt_buflen, krt_bufmin;
+static struct proto *krt_buffer_owner;
+
+static byte *
+krt_buffer_update(struct proto *p, size_t *needed)
+{
+ size_t req = *needed;
+
+ if ((req > krt_buflen) ||
+ ((p == krt_buffer_owner) && (req < krt_bufmin)))
+ {
+ /* min buflen is 32 kB, step is 8 kB, or 128 kB if > 1 MB */
+ size_t step = (req < 0x100000) ? 0x2000 : 0x20000;
+ krt_buflen = (req < 0x6000) ? 0x8000 : (req + step);
+ krt_bufmin = (req < 0x8000) ? 0 : (req - 2*step);
+
+ if (krt_buffer)
+ mb_free(krt_buffer);
+ krt_buffer = mb_alloc(krt_pool, krt_buflen);
+ krt_buffer_owner = p;
+ }
+
+ *needed = krt_buflen;
+ return krt_buffer;
+}
+
static void
-krt_sysctl_scan(struct proto *p, pool *pool, byte **buf, size_t *bl, int cmd)
+krt_buffer_release(struct proto *p)
{
- byte *next;
- int mib[6];
- size_t obl, needed;
+ if (p == krt_buffer_owner)
+ {
+ mb_free(krt_buffer);
+ krt_buffer = NULL;
+ krt_buflen = 0;
+ krt_buffer_owner = 0;
+ }
+}
+
+static void
+krt_sysctl_scan(struct proto *p, int cmd, int table_id)
+{
+ byte *buf, *next;
+ int mib[7], mcnt;
+ size_t needed;
struct ks_msg *m;
int retries = 3;
+ int rv;
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
@@ -636,125 +803,258 @@ krt_sysctl_scan(struct proto *p, pool *pool, byte **buf, size_t *bl, int cmd)
mib[3] = BIRD_PF;
mib[4] = cmd;
mib[5] = 0;
+ mcnt = 6;
- try:
- if (sysctl(mib, 6 , NULL , &needed, NULL, 0) < 0)
- die("krt_sysctl_scan 1: %m");
-
- obl = *bl;
+#ifdef KRT_USE_SYSCTL_7
+ if (table_id >= 0)
+ {
+ mib[6] = table_id;
+ mcnt = 7;
+ }
+#endif
- while (needed > *bl) *bl *= 2;
- while (needed < (*bl/2)) *bl /= 2;
+#ifdef KRT_USE_SETFIB_SCAN
+ if (table_id > 0)
+ if (setfib(table_id) < 0)
+ {
+ log(L_ERR "KRT: setfib(%d) failed: %m", table_id);
+ return;
+ }
+#endif
- if ((obl!=*bl) || !*buf)
+ try:
+ rv = sysctl(mib, mcnt, NULL, &needed, NULL, 0);
+ if (rv < 0)
{
- if (*buf) mb_free(*buf);
- if ((*buf = mb_alloc(pool, *bl)) == NULL) die("RT scan buf alloc");
+ /* OpenBSD returns EINVAL for not yet used tables */
+ if ((errno == EINVAL) && (table_id > 0))
+ goto exit;
+
+ log(L_ERR "KRT: Route scan estimate failed: %m");
+ goto exit;
}
- if (sysctl(mib, 6 , *buf, &needed, NULL, 0) < 0)
+ /* The table is empty */
+ if (needed == 0)
+ goto exit;
+
+ buf = krt_buffer_update(p, &needed);
+
+ rv = sysctl(mib, mcnt, buf, &needed, NULL, 0);
+ if (rv < 0)
{
- if (errno == ENOMEM)
- {
- /* The buffer size changed since last sysctl ('needed' is not changed) */
- if (retries--)
- goto try;
+ /* The buffer size changed since last sysctl ('needed' is not changed) */
+ if ((errno == ENOMEM) && retries--)
+ goto try;
- log(L_ERR "KRT: Route scan failed");
- return;
- }
- die("krt_sysctl_scan 2: %m");
+ log(L_ERR "KRT: Route scan failed: %m");
+ goto exit;
}
- for (next = *buf; next < (*buf + needed); next += m->rtm.rtm_msglen)
+#ifdef KRT_USE_SETFIB_SCAN
+ if (table_id > 0)
+ if (setfib(0) < 0)
+ die("KRT: setfib(%d) failed: %m", 0);
+#endif
+
+ /* Process received messages */
+ for (next = buf; next < (buf + needed); next += m->rtm.rtm_msglen)
{
m = (struct ks_msg *)next;
krt_read_msg(p, m, 1);
}
-}
-static byte *krt_buffer = NULL;
-static byte *kif_buffer = NULL;
-static size_t krt_buflen = 32768;
-static size_t kif_buflen = 4096;
+ return;
+
+ exit:
+ krt_buffer_release(p);
+
+#ifdef KRT_USE_SETFIB_SCAN
+ if (table_id > 0)
+ if (setfib(0) < 0)
+ die("KRT: setfib(%d) failed: %m", 0);
+#endif
+}
void
krt_do_scan(struct krt_proto *p)
{
- krt_sysctl_scan((struct proto *)p, p->krt_pool, &krt_buffer, &krt_buflen, NET_RT_DUMP);
+ krt_sysctl_scan(&p->p, NET_RT_DUMP, KRT_CF->sys.table_id);
}
void
kif_do_scan(struct kif_proto *p)
{
- struct proto *P = (struct proto *)p;
if_start_update();
- krt_sysctl_scan(P, P->pool, &kif_buffer, &kif_buflen, NET_RT_IFLIST);
+ krt_sysctl_scan(&p->p, NET_RT_IFLIST, -1);
if_end_update();
}
+
+/* Kernel sockets */
+
static int
krt_sock_hook(sock *sk, int size UNUSED)
{
struct ks_msg msg;
int l = read(sk->fd, (char *)&msg, sizeof(msg));
- if(l <= 0)
+ if (l <= 0)
log(L_ERR "krt-sock: read failed");
else
- krt_read_msg((struct proto *)sk->data, &msg, 0);
+ krt_read_msg((struct proto *) sk->data, &msg, 0);
return 0;
}
+static sock *
+krt_sock_open(pool *pool, void *data, int table_id)
+{
+ sock *sk;
+ int fd;
+
+ fd = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC);
+ if (fd < 0)
+ die("Cannot open kernel socket for routes");
+
+#ifdef KRT_USE_SETFIB_SOCK
+ if (table_id > 0)
+ {
+ if (setsockopt(fd, SOL_SOCKET, SO_SETFIB, &table_id, sizeof(table_id)) < 0)
+ die("Cannot set FIB %d for kernel socket: %m", table_id);
+ }
+#endif
+
+ sk = sk_new(pool);
+ sk->type = SK_MAGIC;
+ sk->rx_hook = krt_sock_hook;
+ sk->fd = fd;
+ sk->data = data;
+
+ if (sk_open(sk) < 0)
+ bug("krt-sock: sk_open failed");
+
+ return sk;
+}
+
+
+#ifdef KRT_SHARED_SOCKET
+
+static sock *krt_sock;
+static int krt_sock_count;
+
+
+static void
+krt_sock_open_shared(void)
+{
+ if (!krt_sock_count)
+ krt_sock = krt_sock_open(krt_pool, NULL, -1);
+
+ krt_sock_count++;
+}
+
+static void
+krt_sock_close_shared(void)
+{
+ krt_sock_count--;
+
+ if (!krt_sock_count)
+ {
+ rfree(krt_sock);
+ krt_sock = NULL;
+ }
+}
+
void
-krt_sys_start(struct krt_proto *x, int first UNUSED)
+krt_sys_start(struct krt_proto *p)
{
- sock *sk_rt;
- static int ks_open_tried = 0;
+ krt_table_map[KRT_CF->sys.table_id] = p;
- if (ks_open_tried)
- return;
+ krt_sock_open_shared();
+ p->sys.sk = krt_sock;
+}
- ks_open_tried = 1;
+void
+krt_sys_shutdown(struct krt_proto *p)
+{
+ krt_sock_close_shared();
+ p->sys.sk = NULL;
- DBG("KRT: Opening kernel socket\n");
+ krt_table_map[KRT_CF->sys.table_id] = NULL;
- if( (rt_sock = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC)) < 0)
- die("Cannot open kernel socket for routes");
+ krt_buffer_release(&p->p);
+}
- sk_rt = sk_new(krt_pool);
- sk_rt->type = SK_MAGIC;
- sk_rt->rx_hook = krt_sock_hook;
- sk_rt->fd = rt_sock;
- sk_rt->data = x;
- if (sk_open(sk_rt))
- bug("krt-sock: sk_open failed");
+#else
+
+void
+krt_sys_start(struct krt_proto *p)
+{
+ p->sys.sk = krt_sock_open(p->p.pool, p, KRT_CF->sys.table_id);
}
void
-krt_sys_shutdown(struct krt_proto *x UNUSED, int last UNUSED)
+krt_sys_shutdown(struct krt_proto *p)
{
- if (!krt_buffer)
- return;
+ rfree(p->sys.sk);
+ p->sys.sk = NULL;
- mb_free(krt_buffer);
- krt_buffer = NULL;
+ krt_buffer_release(&p->p);
}
+#endif /* KRT_SHARED_SOCKET */
+
+
+/* KRT configuration callbacks */
+
+static u32 krt_table_cf[(KRT_MAX_TABLES+31) / 32];
+
+int
+krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
+{
+ return n->sys.table_id == o->sys.table_id;
+}
void
-kif_sys_start(struct kif_proto *p UNUSED)
+krt_sys_preconfig(struct config *c UNUSED)
{
+ krt_max_tables = krt_get_max_tables();
+ bzero(&krt_table_cf, sizeof(krt_table_cf));
}
void
-kif_sys_shutdown(struct kif_proto *p UNUSED)
+krt_sys_postconfig(struct krt_config *x)
{
- if (!kif_buffer)
- return;
+ u32 *tbl = krt_table_cf;
+ int id = x->sys.table_id;
+
+ if (tbl[id/32] & (1 << (id%32)))
+ cf_error("Multiple kernel syncers defined for table #%d", id);
+
+ tbl[id/32] |= (1 << (id%32));
+}
+
+void krt_sys_init_config(struct krt_config *c)
+{
+ c->sys.table_id = 0; /* Default table */
+}
- mb_free(kif_buffer);
- kif_buffer = NULL;
+void krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
+{
+ d->sys.table_id = s->sys.table_id;
+}
+
+
+/* KIF misc code */
+
+void
+kif_sys_start(struct kif_proto *p UNUSED)
+{
+}
+
+void
+kif_sys_shutdown(struct kif_proto *p)
+{
+ krt_buffer_release(&p->p);
}
diff --git a/sysdep/bsd/krt-sys.h b/sysdep/bsd/krt-sys.h
index 88915dde..9c0d4972 100644
--- a/sysdep/bsd/krt-sys.h
+++ b/sysdep/bsd/krt-sys.h
@@ -9,13 +9,14 @@
#ifndef _BIRD_KRT_SYS_H_
#define _BIRD_KRT_SYS_H_
+struct birdsock;
/* Kernel interfaces */
struct kif_params {
};
-struct kif_status {
+struct kif_state {
};
@@ -30,20 +31,18 @@ static inline void kif_sys_copy_config(struct kif_config *d UNUSED, struct kif_c
/* Kernel routes */
+extern int krt_max_tables;
+
struct krt_params {
+ int table_id; /* Kernel table ID we sync with */
};
-struct krt_status {
+struct krt_state {
+ struct birdsock *sk;
};
static inline void krt_sys_init(struct krt_proto *p UNUSED) { }
-static inline int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n UNUSED, struct krt_config *o UNUSED) { return 1; }
-
-static inline void krt_sys_preconfig(struct config *c UNUSED) { }
-static inline void krt_sys_postconfig(struct krt_config *c UNUSED) { }
-static inline void krt_sys_init_config(struct krt_config *c UNUSED) { }
-static inline void krt_sys_copy_config(struct krt_config *d UNUSED, struct krt_config *s UNUSED) { }
#endif
diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h
index 4f91def5..cf049a0b 100644
--- a/sysdep/bsd/sysio.h
+++ b/sysdep/bsd/sysio.h
@@ -6,9 +6,22 @@
* Can be freely distributed and used under the terms of the GNU GPL.
*/
+#ifdef __NetBSD__
+
+#ifndef IP_RECVTTL
+#define IP_RECVTTL 23
+#endif
+
+#ifndef IP_MINTTL
+#define IP_MINTTL 24
+#endif
+
+#endif
+
#ifdef __DragonFly__
#define TCP_MD5SIG TCP_SIGNATURE_ENABLE
#endif
+
#ifdef IPV6
static inline void
@@ -113,7 +126,9 @@ sysio_leave_group(sock *s, ip_addr maddr)
/* BSD RX/TX packet info handling for IPv4 */
/* it uses IP_RECVDSTADDR / IP_RECVIF socket options instead of IP_PKTINFO */
-#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in_addr)) + CMSG_SPACE(sizeof(struct sockaddr_dl)))
+#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in_addr)) + \
+ CMSG_SPACE(sizeof(struct sockaddr_dl)) + \
+ CMSG_SPACE(sizeof(char)))
#define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in_addr))
static char *
@@ -121,13 +136,18 @@ sysio_register_cmsgs(sock *s)
{
int ok = 1;
if (s->flags & SKF_LADDR_RX)
- {
- if (setsockopt(s->fd, IPPROTO_IP, IP_RECVDSTADDR, &ok, sizeof(ok)) < 0)
- return "IP_RECVDSTADDR";
+ {
+ if (setsockopt(s->fd, IPPROTO_IP, IP_RECVDSTADDR, &ok, sizeof(ok)) < 0)
+ return "IP_RECVDSTADDR";
+
+ if (setsockopt(s->fd, IPPROTO_IP, IP_RECVIF, &ok, sizeof(ok)) < 0)
+ return "IP_RECVIF";
+ }
+
+ if ((s->flags & SKF_TTL_RX) &&
+ (setsockopt(s->fd, IPPROTO_IP, IP_RECVTTL, &ok, sizeof(ok)) < 0))
+ return "IP_RECVTTL";
- if (setsockopt(s->fd, IPPROTO_IP, IP_RECVIF, &ok, sizeof(ok)) < 0)
- return "IP_RECVIF";
- }
return NULL;
}
@@ -136,27 +156,35 @@ static void
sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
{
struct cmsghdr *cm;
+ struct in_addr *ra = NULL;
+ struct sockaddr_dl *ri = NULL;
+ unsigned char *ttl = NULL;
- if (!(s->flags & SKF_LADDR_RX))
- return;
+ for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
+ {
+ if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVDSTADDR)
+ ra = (struct in_addr *) CMSG_DATA(cm);
- s->laddr = IPA_NONE;
- s->lifindex = 0;
+ if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVIF)
+ ri = (struct sockaddr_dl *) CMSG_DATA(cm);
- for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
- {
- if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVDSTADDR)
- {
- struct in_addr *ra = (struct in_addr *) CMSG_DATA(cm);
- get_inaddr(&s->laddr, ra);
- }
+ if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVTTL)
+ ttl = (unsigned char *) CMSG_DATA(cm);
+ }
- if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVIF)
- {
- struct sockaddr_dl *ri = (struct sockaddr_dl *) CMSG_DATA(cm);
- s->lifindex = ri->sdl_index;
- }
- }
+ if (s->flags & SKF_LADDR_RX)
+ {
+ s->laddr = IPA_NONE;
+ s->lifindex = 0;
+
+ if (ra)
+ get_inaddr(&s->laddr, ra);
+ if (ri)
+ s->lifindex = ri->sdl_index;
+ }
+
+ if (s->flags & SKF_TTL_RX)
+ s->ttl = ttl ? *ttl : -1;
// log(L_WARN "RX %I %d", s->laddr, s->lifindex);
}
@@ -244,8 +272,6 @@ sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd)
#ifndef IPV6
-#ifdef IP_MINTTL
-
static int
sk_set_min_ttl4(sock *s, int ttl)
{
@@ -262,25 +288,23 @@ sk_set_min_ttl4(sock *s, int ttl)
return 0;
}
-#else /* no IP_MINTTL */
+#else /* IPv6 */
static int
-sk_set_min_ttl4(sock *s, int ttl)
+sk_set_min_ttl6(sock *s, int ttl)
{
- log(L_ERR "IPv4 TTL security not supported");
+ log(L_ERR "IPv6 TTL security not supported");
return -1;
}
#endif
-#else /* IPv6 */
+
+int sk_priority_control = -1;
static int
-sk_set_min_ttl6(sock *s, int ttl)
+sk_set_priority(sock *s, int prio UNUSED)
{
- log(L_ERR "IPv6 TTL security not supported");
+ log(L_WARN "Socket priority not supported");
return -1;
}
-
-#endif
-
diff --git a/sysdep/cf/bsd-v6.h b/sysdep/cf/bsd-v6.h
index b7f25f64..47a7c7ff 100644
--- a/sysdep/cf/bsd-v6.h
+++ b/sysdep/cf/bsd-v6.h
@@ -10,8 +10,10 @@
#define CONFIG_AUTO_ROUTES
#define CONFIG_SELF_CONSCIOUS
+#define CONFIG_MULTIPLE_TABLES
#define CONFIG_SKIP_MC_BIND
+#define CONFIG_NO_IFACE_BIND
/*
Link: sysdep/unix
diff --git a/sysdep/cf/bsd.h b/sysdep/cf/bsd.h
index e7cc135f..5e6d03e8 100644
--- a/sysdep/cf/bsd.h
+++ b/sysdep/cf/bsd.h
@@ -8,8 +8,10 @@
#define CONFIG_AUTO_ROUTES
#define CONFIG_SELF_CONSCIOUS
+#define CONFIG_MULTIPLE_TABLES
#define CONFIG_SKIP_MC_BIND
+#define CONFIG_NO_IFACE_BIND
/*
Link: sysdep/unix
diff --git a/sysdep/config.h b/sysdep/config.h
index 7106e4ba..914c1090 100644
--- a/sysdep/config.h
+++ b/sysdep/config.h
@@ -7,7 +7,7 @@
#define _BIRD_CONFIG_H_
/* BIRD version */
-#define BIRD_VERSION "1.3.8"
+#define BIRD_VERSION "1.3.12"
/* Include parameters determined by configure script */
#include "sysdep/autoconf.h"
@@ -34,6 +34,7 @@ typedef INTEGER_64 s64;
typedef unsigned INTEGER_64 u64;
typedef u8 byte;
typedef u16 word;
+typedef unsigned int uint;
#endif
diff --git a/sysdep/linux/krt-sys.h b/sysdep/linux/krt-sys.h
index cdee7fe3..7b3043a7 100644
--- a/sysdep/linux/krt-sys.h
+++ b/sysdep/linux/krt-sys.h
@@ -15,7 +15,7 @@
struct kif_params {
};
-struct kif_status {
+struct kif_state {
};
@@ -36,7 +36,7 @@ struct krt_params {
int table_id; /* Kernel table ID we sync with */
};
-struct krt_status {
+struct krt_state {
};
diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y
index 51689ff9..b0e35151 100644
--- a/sysdep/linux/netlink.Y
+++ b/sysdep/linux/netlink.Y
@@ -10,13 +10,13 @@ CF_HDR
CF_DECLS
-CF_KEYWORDS(ASYNC, KERNEL, TABLE, KRT_PREFSRC, KRT_REALM)
+CF_KEYWORDS(KERNEL, TABLE, KRT_PREFSRC, KRT_REALM)
CF_GRAMMAR
-CF_ADDTO(kern_proto, kern_proto nl_item ';')
+CF_ADDTO(kern_proto, kern_proto kern_sys_item ';')
-nl_item:
+kern_sys_item:
KERNEL TABLE expr {
if ($3 <= 0 || $3 >= NL_NUM_TABLES)
cf_error("Kernel routing table number out of range");
diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
index 791f715e..90443ed6 100644
--- a/sysdep/linux/netlink.c
+++ b/sysdep/linux/netlink.c
@@ -7,6 +7,7 @@
*/
#include <stdio.h>
+#include <unistd.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/uio.h>
@@ -843,9 +844,11 @@ nl_parse_route(struct nlmsghdr *h, int scan)
memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw));
ipa_ntoh(ra.gw);
+#ifdef IPV6
/* Silently skip strange 6to4 routes */
if (ipa_in_net(ra.gw, IPA_NONE, 96))
return;
+#endif
ng = neigh_find2(&p->p, &ra.gw, ra.iface,
(i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
@@ -1038,11 +1041,9 @@ nl_open_async(void)
sock *sk;
struct sockaddr_nl sa;
int fd;
- static int nl_open_tried = 0;
- if (nl_open_tried)
+ if (nl_async_sk)
return;
- nl_open_tried = 1;
DBG("KRT: Opening async netlink socket\n");
@@ -1063,18 +1064,18 @@ nl_open_async(void)
if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
{
log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
+ close(fd);
return;
}
+ nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
+
sk = nl_async_sk = sk_new(krt_pool);
sk->type = SK_MAGIC;
sk->rx_hook = nl_async_hook;
sk->fd = fd;
if (sk_open(sk))
bug("Netlink: sk_open failed");
-
- if (!nl_async_rx_buffer)
- nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
}
/*
@@ -1084,19 +1085,18 @@ nl_open_async(void)
static u8 nl_cf_table[(NL_NUM_TABLES+7) / 8];
void
-krt_sys_start(struct krt_proto *p, int first)
+krt_sys_start(struct krt_proto *p)
{
nl_table_map[KRT_CF->sys.table_id] = p;
- if (first)
- {
- nl_open();
- nl_open_async();
- }
+
+ nl_open();
+ nl_open_async();
}
void
-krt_sys_shutdown(struct krt_proto *p UNUSED, int last UNUSED)
+krt_sys_shutdown(struct krt_proto *p UNUSED)
{
+ nl_table_map[KRT_CF->sys.table_id] = NULL;
}
int
diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h
index 90b3ebd9..250ed586 100644
--- a/sysdep/linux/sysio.h
+++ b/sysdep/linux/sysio.h
@@ -194,17 +194,22 @@ sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd)
/* RX/TX packet info handling for IPv4 */
/* Mostly similar to standardized IPv6 code */
-#define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in_pktinfo))
+#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in_pktinfo)) + CMSG_SPACE(sizeof(int)))
#define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in_pktinfo))
static char *
sysio_register_cmsgs(sock *s)
{
int ok = 1;
+
if ((s->flags & SKF_LADDR_RX) &&
- setsockopt(s->fd, IPPROTO_IP, IP_PKTINFO, &ok, sizeof(ok)) < 0)
+ (setsockopt(s->fd, IPPROTO_IP, IP_PKTINFO, &ok, sizeof(ok)) < 0))
return "IP_PKTINFO";
+ if ((s->flags & SKF_TTL_RX) &&
+ (setsockopt(s->fd, IPPROTO_IP, IP_RECVTTL, &ok, sizeof(ok)) < 0))
+ return "IP_RECVTTL";
+
return NULL;
}
@@ -213,25 +218,34 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
{
struct cmsghdr *cm;
struct in_pktinfo *pi = NULL;
-
- if (!(s->flags & SKF_LADDR_RX))
- return;
+ int *ttl = NULL;
for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
+ {
+ if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_PKTINFO)
+ pi = (struct in_pktinfo *) CMSG_DATA(cm);
+
+ if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_TTL)
+ ttl = (int *) CMSG_DATA(cm);
+ }
+
+ if (s->flags & SKF_LADDR_RX)
+ {
+ if (pi)
{
- if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_PKTINFO)
- pi = (struct in_pktinfo *) CMSG_DATA(cm);
+ get_inaddr(&s->laddr, &pi->ipi_addr);
+ s->lifindex = pi->ipi_ifindex;
}
-
- if (!pi)
+ else
{
s->laddr = IPA_NONE;
s->lifindex = 0;
- return;
}
+ }
+
+ if (s->flags & SKF_TTL_RX)
+ s->ttl = ttl ? *ttl : -1;
- get_inaddr(&s->laddr, &pi->ipi_addr);
- s->lifindex = pi->ipi_ifindex;
return;
}
@@ -310,3 +324,22 @@ sk_set_min_ttl6(sock *s, int ttl)
}
#endif
+
+
+#ifndef IPV6_TCLASS
+#define IPV6_TCLASS 67
+#endif
+
+int sk_priority_control = 7;
+
+static int
+sk_set_priority(sock *s, int prio)
+{
+ if (setsockopt(s->fd, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)) < 0)
+ {
+ log(L_WARN "sk_set_priority: setsockopt: %m");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/sysdep/unix/config.Y b/sysdep/unix/config.Y
index 844f53df..7bade918 100644
--- a/sysdep/unix/config.Y
+++ b/sysdep/unix/config.Y
@@ -14,9 +14,9 @@ CF_HDR
CF_DECLS
CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR, SOFT)
-CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, BASE, NAME)
+CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, BASE, NAME, CONFIRM, UNDO, CHECK, TIMEOUT)
-%type <i> log_mask log_mask_list log_cat
+%type <i> log_mask log_mask_list log_cat cfg_timeout
%type <g> log_file
%type <t> cfg_name
%type <tf> timeformat_which
@@ -104,13 +104,26 @@ timeformat_base:
/* Unix specific commands */
-CF_CLI_HELP(CONFIGURE, [soft] [\"<file>\"], [[Reload configuration]])
+CF_CLI_HELP(CONFIGURE, ..., [[Reload configuration]])
-CF_CLI(CONFIGURE, cfg_name, [\"<file>\"], [[Reload configuration]])
-{ cmd_reconfig($2, RECONFIG_HARD); } ;
+CF_CLI(CONFIGURE, cfg_name cfg_timeout, [\"<file>\"] [timeout [<sec>]], [[Reload configuration]])
+{ cmd_reconfig($2, RECONFIG_HARD, $3); } ;
-CF_CLI(CONFIGURE SOFT, cfg_name, [\"<file>\"], [[Reload configuration and ignore changes in filters]])
-{ cmd_reconfig($3, RECONFIG_SOFT); } ;
+CF_CLI(CONFIGURE SOFT, cfg_name cfg_timeout, [\"<file>\"] [timeout [<sec>]], [[Reload configuration and ignore changes in filters]])
+{ cmd_reconfig($3, RECONFIG_SOFT, $4); } ;
+
+/* Hack to get input completion for 'timeout' */
+CF_CLI_CMD(CONFIGURE TIMEOUT, [<sec>], [[Reload configuration with undo timeout]])
+CF_CLI_CMD(CONFIGURE SOFT TIMEOUT, [<sec>], [[Reload configuration with undo timeout]])
+
+CF_CLI(CONFIGURE CONFIRM,,, [[Confirm last configuration change - deactivate undo timeout]])
+{ cmd_reconfig_confirm(); } ;
+
+CF_CLI(CONFIGURE UNDO,,, [[Undo last configuration change]])
+{ cmd_reconfig_undo(); } ;
+
+CF_CLI(CONFIGURE CHECK, cfg_name, [\"<file>\"], [[Parse configuration and check its validity]])
+{ cmd_check_config($3); } ;
CF_CLI(DOWN,,, [[Shut the daemon down]])
{ cmd_shutdown(); } ;
@@ -120,6 +133,12 @@ cfg_name:
| TEXT
;
+cfg_timeout:
+ /* empty */ { $$ = 0; }
+ | TIMEOUT { $$ = UNIX_DEFAULT_CONFIGURE_TIMEOUT; }
+ | TIMEOUT expr { $$ = $2; }
+ ;
+
CF_CODE
CF_END
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index f91b5278..6e3f1e4d 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -17,10 +17,10 @@
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
-#include <sys/fcntl.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <unistd.h>
+#include <fcntl.h>
#include <errno.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
@@ -121,7 +121,7 @@ static list near_timers, far_timers;
static bird_clock_t first_far_timer = TIME_INFINITY;
/* now must be different from 0, because 0 is a special value in timer->expires */
-bird_clock_t now = 1, now_real;
+bird_clock_t now = 1, now_real, boot_time;
static void
update_times_plain(void)
@@ -538,6 +538,11 @@ sk_free(resource *r)
if (s->fd >= 0)
{
close(s->fd);
+
+ /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
+ if (s->flags & SKF_THREAD)
+ return;
+
if (s == current_sock)
current_sock = sk_next(s);
if (s == stored_sock)
@@ -598,7 +603,7 @@ sock_new(pool *p)
sock *s = ralloc(p, &sk_class);
s->pool = p;
// s->saddr = s->daddr = IPA_NONE;
- s->tos = s->ttl = -1;
+ s->tos = s->priority = s->ttl = -1;
s->fd = -1;
return s;
}
@@ -673,7 +678,7 @@ get_sockaddr(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, unsigned *p
#ifdef IPV6
/* PKTINFO handling is also standardized in IPv6 */
-#define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
+#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in6_pktinfo)) + CMSG_SPACE(sizeof(int)))
#define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
/*
@@ -685,15 +690,26 @@ get_sockaddr(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, unsigned *p
#ifndef IPV6_RECVPKTINFO
#define IPV6_RECVPKTINFO IPV6_PKTINFO
#endif
+/*
+ * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
+ */
+#ifndef IPV6_RECVHOPLIMIT
+#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
+#endif
static char *
sysio_register_cmsgs(sock *s)
{
int ok = 1;
+
if ((s->flags & SKF_LADDR_RX) &&
- setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)
+ (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0))
return "IPV6_RECVPKTINFO";
+ if ((s->flags & SKF_TTL_RX) &&
+ (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0))
+ return "IPV6_RECVHOPLIMIT";
+
return NULL;
}
@@ -702,25 +718,34 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
{
struct cmsghdr *cm;
struct in6_pktinfo *pi = NULL;
-
- if (!(s->flags & SKF_LADDR_RX))
- return;
+ int *hlim = NULL;
for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
+ {
+ if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
+ pi = (struct in6_pktinfo *) CMSG_DATA(cm);
+
+ if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT)
+ hlim = (int *) CMSG_DATA(cm);
+ }
+
+ if (s->flags & SKF_LADDR_RX)
+ {
+ if (pi)
{
- if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
- pi = (struct in6_pktinfo *) CMSG_DATA(cm);
+ get_inaddr(&s->laddr, &pi->ipi6_addr);
+ s->lifindex = pi->ipi6_ifindex;
}
-
- if (!pi)
+ else
{
s->laddr = IPA_NONE;
s->lifindex = 0;
- return;
}
+ }
+
+ if (s->flags & SKF_TTL_RX)
+ s->ttl = hlim ? *hlim : -1;
- get_inaddr(&s->laddr, &pi->ipi6_addr);
- s->lifindex = pi->ipi6_ifindex;
return;
}
@@ -783,21 +808,28 @@ sk_setup(sock *s)
ERR("fcntl(O_NONBLOCK)");
if (s->type == SK_UNIX)
return NULL;
-#ifndef IPV6
+
+#ifdef IPV6
+ if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0)
+ WARN("IPV6_TCLASS");
+#else
if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
WARN("IP_TOS");
#endif
+ if (s->priority >= 0)
+ sk_set_priority(s, s->priority);
+
#ifdef IPV6
int v = 1;
if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
WARN("IPV6_V6ONLY");
#endif
- if (s->ttl >= 0)
- err = sk_set_ttl_int(s);
+ if ((s->ttl >= 0) && (err = sk_set_ttl_int(s)))
+ goto bad;
- sysio_register_cmsgs(s);
+ err = sysio_register_cmsgs(s);
bad:
return err;
}
@@ -1154,6 +1186,15 @@ sk_open(sock *s)
port = s->sport;
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
ERR("SO_REUSEADDR");
+
+#ifdef CONFIG_NO_IFACE_BIND
+ /* Workaround missing ability to bind to an iface */
+ if ((type == SK_UDP) && s->iface && ipa_zero(s->saddr))
+ {
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
+ ERR("SO_REUSEPORT");
+ }
+#endif
}
fill_in_sockaddr(&sa, s->saddr, s->iface, port);
if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
@@ -1204,7 +1245,8 @@ sk_open(sock *s)
#endif
}
- sk_insert(s);
+ if (!(s->flags & SKF_THREAD))
+ sk_insert(s);
return 0;
bad:
@@ -1392,7 +1434,9 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa,
}
*/
-static int
+ /* sk_read() and sk_write() are called from BFD's event loop */
+
+int
sk_read(sock *s)
{
switch (s->type)
@@ -1469,7 +1513,7 @@ sk_read(sock *s)
}
}
-static int
+int
sk_write(sock *s)
{
switch (s->type)
@@ -1487,7 +1531,8 @@ sk_write(sock *s)
default:
if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
{
- s->tx_hook(s);
+ if (s->tx_hook)
+ s->tx_hook(s);
return 1;
}
return 0;
@@ -1530,6 +1575,7 @@ io_init(void)
krt_io_init();
init_times();
update_times();
+ boot_time = now;
srandom((int) now_real);
}
@@ -1557,7 +1603,7 @@ io_loop(void)
tm_shot();
continue;
}
- timo.tv_sec = events ? 0 : tout - now;
+ timo.tv_sec = events ? 0 : MIN(tout - now, 3);
timo.tv_usec = 0;
if (sock_recalc_fdsets_p)
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 497d328d..3f9e1479 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -69,12 +69,14 @@
pool *krt_pool;
static linpool *krt_filter_lp;
+static list krt_proto_list;
void
krt_io_init(void)
{
krt_pool = rp_new(&root_pool, "Kernel Syncer");
krt_filter_lp = lp_new(krt_pool, 4080);
+ init_list(&krt_proto_list);
}
/*
@@ -114,12 +116,18 @@ kif_request_scan(void)
}
static inline int
-prefer_scope(struct ifa *a, struct ifa *b)
-{ return (a->scope > SCOPE_LINK) && (b->scope <= SCOPE_LINK); }
-
-static inline int
prefer_addr(struct ifa *a, struct ifa *b)
-{ return ipa_compare(a->ip, b->ip) < 0; }
+{
+ int sa = a->scope > SCOPE_LINK;
+ int sb = b->scope > SCOPE_LINK;
+
+ if (sa < sb)
+ return 0;
+ else if (sa > sb)
+ return 1;
+ else
+ return ipa_compare(a->ip, b->ip) < 0;
+}
static inline struct ifa *
find_preferred_ifa(struct iface *i, ip_addr prefix, ip_addr mask)
@@ -130,7 +138,7 @@ find_preferred_ifa(struct iface *i, ip_addr prefix, ip_addr mask)
{
if (!(a->flags & IA_SECONDARY) &&
ipa_equal(ipa_and(a->ip, mask), prefix) &&
- (!b || prefer_scope(a, b) || prefer_addr(a, b)))
+ (!b || prefer_addr(a, b)))
b = a;
}
@@ -558,12 +566,6 @@ krt_dump_attrs(rte *e)
* Routes
*/
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
-static timer *krt_scan_timer;
-static int krt_instance_count;
-static list krt_instance_list;
-#endif
-
static void
krt_flush_routes(struct krt_proto *p)
{
@@ -574,7 +576,7 @@ krt_flush_routes(struct krt_proto *p)
{
net *n = (net *) f;
rte *e = n->routes;
- if (e && (n->n.flags & KRF_INSTALLED))
+ if (rte_is_valid(e) && (n->n.flags & KRF_INSTALLED))
{
/* FIXME: this does not work if gw is changed in export filter */
krt_replace_rte(p, e->net, NULL, e, NULL);
@@ -649,7 +651,7 @@ krt_got_route(struct krt_proto *p, rte *e)
}
old = net->routes;
- if ((net->n.flags & KRF_INSTALLED) && old)
+ if ((net->n.flags & KRF_INSTALLED) && rte_is_valid(old))
{
/* There may be changes in route attributes, we ignore that.
Also, this does not work well if gw is changed in export filter */
@@ -727,6 +729,13 @@ krt_prune(struct krt_proto *p)
/* Route rejected, should not happen (KRF_INSTALLED) but to be sure .. */
verdict = (verdict == KRF_CREATE) ? KRF_IGNORE : KRF_DELETE;
}
+ else
+ {
+ ea_list **x = &tmpa;
+ while (*x)
+ x = &((*x)->next);
+ *x = new ? new->attrs->eattrs : NULL;
+ }
}
switch (verdict)
@@ -805,34 +814,87 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new)
* Periodic scanning
*/
+
+#ifdef CONFIG_ALL_TABLES_AT_ONCE
+
+static timer *krt_scan_timer;
+static int krt_scan_count;
+
static void
krt_scan(timer *t UNUSED)
{
struct krt_proto *p;
kif_force_scan();
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
+
+ /* We need some node to decide whether to print the debug messages or not */
+ p = SKIP_BACK(struct krt_proto, krt_node, HEAD(krt_proto_list));
+ KRT_TRACE(p, D_EVENTS, "Scanning routing table");
+
+ krt_do_scan(NULL);
+
+ void *q;
+ WALK_LIST(q, krt_proto_list)
{
- void *q;
- /* We need some node to decide whether to print the debug messages or not */
- p = SKIP_BACK(struct krt_proto, instance_node, HEAD(krt_instance_list));
- if (p->instance_node.next)
- KRT_TRACE(p, D_EVENTS, "Scanning routing table");
- krt_do_scan(NULL);
- WALK_LIST(q, krt_instance_list)
- {
- p = SKIP_BACK(struct krt_proto, instance_node, q);
- krt_prune(p);
- }
+ p = SKIP_BACK(struct krt_proto, krt_node, q);
+ krt_prune(p);
+ }
+}
+
+static void
+krt_scan_timer_start(struct krt_proto *p)
+{
+ if (!krt_scan_count)
+ krt_scan_timer = tm_new_set(krt_pool, krt_scan, NULL, 0, KRT_CF->scan_time);
+
+ krt_scan_count++;
+
+ tm_start(krt_scan_timer, 0);
+}
+
+static void
+krt_scan_timer_stop(struct krt_proto *p)
+{
+ krt_scan_count--;
+
+ if (!krt_scan_count)
+ {
+ rfree(krt_scan_timer);
+ krt_scan_timer = NULL;
}
+}
+
#else
- p = t->data;
+
+static void
+krt_scan(timer *t)
+{
+ struct krt_proto *p = t->data;
+
+ kif_force_scan();
+
KRT_TRACE(p, D_EVENTS, "Scanning routing table");
krt_do_scan(p);
krt_prune(p);
-#endif
}
+static void
+krt_scan_timer_start(struct krt_proto *p)
+{
+ p->scan_timer = tm_new_set(p->p.pool, krt_scan, p, 0, KRT_CF->scan_time);
+ tm_start(p->scan_timer, 0);
+}
+
+static void
+krt_scan_timer_stop(struct krt_proto *p)
+{
+ tm_stop(p->scan_timer);
+}
+
+#endif
+
+
+
/*
* Updates
@@ -893,7 +955,7 @@ krt_notify(struct proto *P, struct rtable *table UNUSED, net *net,
{
struct krt_proto *p = (struct krt_proto *) P;
- if (shutting_down)
+ if (config->shutdown)
return;
if (!(net->n.flags & KRF_INSTALLED))
old = NULL;
@@ -935,52 +997,20 @@ krt_init(struct proto_config *c)
return &p->p;
}
-static timer *
-krt_start_timer(struct krt_proto *p)
-{
- timer *t;
-
- t = tm_new(p->krt_pool);
- t->hook = krt_scan;
- t->data = p;
- t->recurrent = KRT_CF->scan_time;
- tm_start(t, 0);
- return t;
-}
-
static int
krt_start(struct proto *P)
{
struct krt_proto *p = (struct krt_proto *) P;
- int first = 1;
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
- if (!krt_instance_count++)
- init_list(&krt_instance_list);
- else
- first = 0;
- p->krt_pool = krt_pool;
- add_tail(&krt_instance_list, &p->instance_node);
-#else
- p->krt_pool = P->pool;
-#endif
+ add_tail(&krt_proto_list, &p->krt_node);
#ifdef KRT_ALLOW_LEARN
krt_learn_init(p);
#endif
- krt_sys_start(p, first);
+ krt_sys_start(p);
- /* Start periodic routing table scanning */
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
- if (first)
- krt_scan_timer = krt_start_timer(p);
- else
- tm_start(krt_scan_timer, 0);
- p->scan_timer = krt_scan_timer;
-#else
- p->scan_timer = krt_start_timer(p);
-#endif
+ krt_scan_timer_start(p);
return PS_UP;
}
@@ -989,26 +1019,16 @@ static int
krt_shutdown(struct proto *P)
{
struct krt_proto *p = (struct krt_proto *) P;
- int last = 1;
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
- rem_node(&p->instance_node);
- if (--krt_instance_count)
- last = 0;
- else
-#endif
- tm_stop(p->scan_timer);
+ krt_scan_timer_stop(p);
/* FIXME we should flush routes even when persist during reconfiguration */
if (p->initialized && !KRT_CF->persist)
krt_flush_routes(p);
- krt_sys_shutdown(p, last);
+ krt_sys_shutdown(p);
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
- if (last)
- rfree(krt_scan_timer);
-#endif
+ rem_node(&p->krt_node);
return PS_DOWN;
}
diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h
index d6fbf721..446914d2 100644
--- a/sysdep/unix/krt.h
+++ b/sysdep/unix/krt.h
@@ -52,15 +52,17 @@ struct krt_config {
struct krt_proto {
struct proto p;
- struct krt_status sys; /* Sysdep state */
+ struct krt_state sys; /* Sysdep state */
+
#ifdef KRT_ALLOW_LEARN
struct rtable krt_table; /* Internal table of inherited routes */
#endif
- pool *krt_pool; /* Pool used for common krt data */
+
+#ifndef CONFIG_ALL_TABLES_AT_ONCE
timer *scan_timer;
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
- node instance_node; /* Node in krt instance list */
#endif
+
+ node krt_node; /* Node in krt_proto_list */
int initialized; /* First scan has already been finished */
};
@@ -103,7 +105,7 @@ struct kif_config {
struct kif_proto {
struct proto p;
- struct kif_status sys; /* Sysdep state */
+ struct kif_state sys; /* Sysdep state */
};
#define KIF_CF ((struct kif_config *)p->p.cf)
@@ -114,8 +116,8 @@ struct proto_config * krt_init_config(int class);
/* krt sysdep */
void krt_sys_init(struct krt_proto *);
-void krt_sys_start(struct krt_proto *, int);
-void krt_sys_shutdown(struct krt_proto *, int);
+void krt_sys_start(struct krt_proto *);
+void krt_sys_shutdown(struct krt_proto *);
int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o);
void krt_sys_preconfig(struct config *);
diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c
index 92f12f1e..0f4c06e9 100644
--- a/sysdep/unix/log.c
+++ b/sysdep/unix/log.c
@@ -32,8 +32,24 @@ static FILE *dbgf;
static list *current_log_list;
static char *current_syslog_name; /* NULL -> syslog closed */
-bird_clock_t rate_limit_time = 5;
-int rate_limit_count = 5;
+static const bird_clock_t rate_limit_time = 5;
+static const int rate_limit_count = 5;
+
+
+#ifdef USE_PTHREADS
+
+#include <pthread.h>
+static pthread_mutex_t log_mutex;
+static inline void log_lock(void) { pthread_mutex_lock(&log_mutex); }
+static inline void log_unlock(void) { pthread_mutex_unlock(&log_mutex); }
+
+#else
+
+static inline void log_lock(void) { }
+static inline void log_unlock(void) { }
+
+#endif
+
#ifdef HAVE_SYSLOG
#include <sys/syslog.h>
@@ -65,26 +81,6 @@ static char *class_names[] = {
"BUG"
};
-#define LOG_BUFFER_SIZE 1024
-static char log_buffer[LOG_BUFFER_SIZE];
-static char *log_buffer_pos;
-static int log_buffer_remains;
-
-
-/**
- * log_reset - reset the log buffer
- *
- * This function resets a log buffer and discards buffered
- * messages. Should be used before a log message is prepared
- * using logn().
- */
-void
-log_reset(void)
-{
- log_buffer_pos = log_buffer;
- log_buffer_remains = LOG_BUFFER_SIZE;
- log_buffer[0] = 0;
-}
/**
* log_commit - commit a log message
@@ -99,10 +95,14 @@ log_reset(void)
* in log(), so it should be written like *L_INFO.
*/
void
-log_commit(int class)
+log_commit(int class, buffer *buf)
{
struct log_config *l;
+ if (buf->pos == buf->end)
+ strcpy(buf->end - 100, " ... <too long>");
+
+ log_lock();
WALK_LIST(l, *current_log_list)
{
if (!(l->mask & (1 << class)))
@@ -117,47 +117,32 @@ log_commit(int class)
tm_format_datetime(tbuf, &config->tf_log, now);
fprintf(l->fh, "%s <%s> ", tbuf, class_names[class]);
}
- fputs(log_buffer, l->fh);
+ fputs(buf->start, l->fh);
fputc('\n', l->fh);
fflush(l->fh);
}
#ifdef HAVE_SYSLOG
else
- syslog(syslog_priorities[class], "%s", log_buffer);
+ syslog(syslog_priorities[class], "%s", buf->start);
#endif
}
- cli_echo(class, log_buffer);
-
- log_reset();
-}
+ log_unlock();
-static void
-log_print(const char *msg, va_list args)
-{
- int i;
-
- if (log_buffer_remains == 0)
- return;
-
- i=bvsnprintf(log_buffer_pos, log_buffer_remains, msg, args);
- if (i < 0)
- {
- bsprintf(log_buffer + LOG_BUFFER_SIZE - 100, " ... <too long>");
- log_buffer_remains = 0;
- return;
- }
+ /* FIXME: cli_echo is not thread-safe */
+ cli_echo(class, buf->start);
- log_buffer_pos += i;
- log_buffer_remains -= i;
+ buf->pos = buf->start;
}
+int buffer_vprint(buffer *buf, const char *fmt, va_list args);
static void
vlog(int class, const char *msg, va_list args)
{
- log_reset();
- log_print(msg, args);
- log_commit(class);
+ buffer buf;
+ LOG_BUFFER_INIT(buf);
+ buffer_vprint(&buf, msg, args);
+ log_commit(class, &buf);
}
@@ -186,26 +171,6 @@ log_msg(char *msg, ...)
va_end(args);
}
-/**
- * logn - prepare a partial message in the log buffer
- * @msg: printf-like formatting string (without message class information)
- *
- * This function formats a message according to the format string @msg
- * and adds it to the log buffer. Messages in the log buffer are
- * logged when the buffer is flushed using log_commit() function. The
- * message should not contain |\n|, log_commit() also terminates a
- * line.
- */
-void
-logn(char *msg, ...)
-{
- va_list args;
-
- va_start(args, msg);
- log_print(msg, args);
- va_end(args);
-}
-
void
log_rl(struct rate_limit *rl, char *msg, ...)
{
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index f0344a8f..7a945826 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -97,9 +97,10 @@ static inline void
add_num_const(char *name, int val)
{
struct symbol *s = cf_find_symbol(name);
- s->class = SYM_NUMBER;
- s->def = NULL;
- s->aux = val;
+ s->class = SYM_CONSTANT | T_INT;
+ s->def = cfg_allocz(sizeof(struct f_val));
+ SYM_TYPE(s) = T_INT;
+ SYM_VAL(s).i = val;
}
/* the code of read_iproute_table() is based on
@@ -198,7 +199,7 @@ unix_read_config(struct config **cp, char *name)
return ret;
}
-static void
+static struct config *
read_config(void)
{
struct config *conf;
@@ -210,7 +211,8 @@ read_config(void)
else
die("Unable to open configuration file %s: %m", config_name);
}
- config_commit(conf, RECONFIG_HARD);
+
+ return conf;
}
void
@@ -228,19 +230,17 @@ async_config(void)
config_free(conf);
}
else
- config_commit(conf, RECONFIG_HARD);
+ config_commit(conf, RECONFIG_HARD, 0);
}
-void
-cmd_reconfig(char *name, int type)
+static struct config *
+cmd_read_config(char *name)
{
struct config *conf;
- if (cli_access_restricted())
- return;
-
if (!name)
name = config_name;
+
cli_msg(-2, "Reading configuration from %s", name);
if (!unix_read_config(&conf, name))
{
@@ -249,24 +249,94 @@ cmd_reconfig(char *name, int type)
else
cli_msg(8002, "%s: %m", name);
config_free(conf);
+ conf = NULL;
}
- else
+
+ return conf;
+}
+
+void
+cmd_check_config(char *name)
+{
+ struct config *conf = cmd_read_config(name);
+ if (!conf)
+ return;
+
+ cli_msg(20, "Configuration OK");
+ config_free(conf);
+}
+
+static void
+cmd_reconfig_msg(int r)
+{
+ switch (r)
{
- switch (config_commit(conf, type))
- {
- case CONF_DONE:
- cli_msg(3, "Reconfigured.");
- break;
- case CONF_PROGRESS:
- cli_msg(4, "Reconfiguration in progress.");
- break;
- case CONF_SHUTDOWN:
- cli_msg(6, "Reconfiguration ignored, shutting down.");
- break;
- default:
- cli_msg(5, "Reconfiguration already in progress, queueing new config");
- }
+ case CONF_DONE: cli_msg( 3, "Reconfigured"); break;
+ case CONF_PROGRESS: cli_msg( 4, "Reconfiguration in progress"); break;
+ case CONF_QUEUED: cli_msg( 5, "Reconfiguration already in progress, queueing new config"); break;
+ case CONF_UNQUEUED: cli_msg(17, "Reconfiguration already in progress, removing queued config"); break;
+ case CONF_CONFIRM: cli_msg(18, "Reconfiguration confirmed"); break;
+ case CONF_SHUTDOWN: cli_msg( 6, "Reconfiguration ignored, shutting down"); break;
+ case CONF_NOTHING: cli_msg(19, "Nothing to do"); break;
+ default: break;
+ }
+}
+
+/* Hack for scheduled undo notification */
+cli *cmd_reconfig_stored_cli;
+
+void
+cmd_reconfig_undo_notify(void)
+{
+ if (cmd_reconfig_stored_cli)
+ {
+ cli *c = cmd_reconfig_stored_cli;
+ cli_printf(c, CLI_ASYNC_CODE, "Config timeout expired, starting undo");
+ cli_write_trigger(c);
+ }
+}
+
+void
+cmd_reconfig(char *name, int type, int timeout)
+{
+ if (cli_access_restricted())
+ return;
+
+ struct config *conf = cmd_read_config(name);
+ if (!conf)
+ return;
+
+ int r = config_commit(conf, type, timeout);
+
+ if ((r >= 0) && (timeout > 0))
+ {
+ cmd_reconfig_stored_cli = this_cli;
+ cli_msg(-22, "Undo scheduled in %d s", timeout);
}
+
+ cmd_reconfig_msg(r);
+}
+
+void
+cmd_reconfig_confirm(void)
+{
+ if (cli_access_restricted())
+ return;
+
+ int r = config_confirm();
+ cmd_reconfig_msg(r);
+}
+
+void
+cmd_reconfig_undo(void)
+{
+ if (cli_access_restricted())
+ return;
+
+ cli_msg(-21, "Undo requested");
+
+ int r = config_undo();
+ cmd_reconfig_msg(r);
}
/*
@@ -404,6 +474,58 @@ cli_init_unix(uid_t use_uid, gid_t use_gid)
}
/*
+ * PID file
+ */
+
+static char *pid_file;
+static int pid_fd;
+
+static inline void
+open_pid_file(void)
+{
+ if (!pid_file)
+ return;
+
+ pid_fd = open(pid_file, O_WRONLY|O_CREAT, 0664);
+ if (pid_fd < 0)
+ die("Cannot create PID file %s: %m", pid_file);
+}
+
+static inline void
+write_pid_file(void)
+{
+ int pl, rv;
+ char ps[24];
+
+ if (!pid_file)
+ return;
+
+ /* We don't use PID file for uniqueness, so no need for locking */
+
+ pl = bsnprintf(ps, sizeof(ps), "%ld\n", (long) getpid());
+ if (pl < 0)
+ bug("PID buffer too small");
+
+ rv = ftruncate(pid_fd, 0);
+ if (rv < 0)
+ die("fruncate: %m");
+
+ rv = write(pid_fd, ps, pl);
+ if(rv < 0)
+ die("write: %m");
+
+ close(pid_fd);
+}
+
+static inline void
+unlink_pid_file(void)
+{
+ if (pid_file)
+ unlink(pid_file);
+}
+
+
+/*
* Shutdown
*/
@@ -427,6 +549,7 @@ async_shutdown(void)
void
sysdep_shutdown_done(void)
{
+ unlink_pid_file();
unlink(path_control_socket);
log_msg(L_FATAL "Shutdown completed");
exit(0);
@@ -479,16 +602,17 @@ signal_init(void)
* Parsing of command-line arguments
*/
-static char *opt_list = "c:dD:ps:u:g:";
+static char *opt_list = "c:dD:ps:P:u:g:f";
static int parse_and_exit;
char *bird_name;
static char *use_user;
static char *use_group;
+static int run_in_foreground = 0;
static void
usage(void)
{
- fprintf(stderr, "Usage: %s [-c <config-file>] [-d] [-D <debug-file>] [-p] [-s <control-socket>] [-u <user>] [-g <group>]\n", bird_name);
+ fprintf(stderr, "Usage: %s [-c <config-file>] [-d] [-D <debug-file>] [-p] [-s <control-socket>] [-P <pid-file>] [-u <user>] [-g <group>] [-f]\n", bird_name);
exit(1);
}
@@ -587,12 +711,18 @@ parse_args(int argc, char **argv)
case 's':
path_control_socket = optarg;
break;
+ case 'P':
+ pid_file = optarg;
+ break;
case 'u':
use_user = optarg;
break;
case 'g':
use_group = optarg;
break;
+ case 'f':
+ run_in_foreground = 1;
+ break;
default:
usage();
}
@@ -623,6 +753,7 @@ main(int argc, char **argv)
rt_init();
if_init();
roa_init();
+ config_init();
uid_t use_uid = get_uid(use_user);
gid_t use_gid = get_gid(use_group);
@@ -639,16 +770,19 @@ main(int argc, char **argv)
if (use_uid)
drop_uid(use_uid);
+ if (!parse_and_exit)
+ open_pid_file();
+
protos_build();
proto_build(&proto_unix_kernel);
proto_build(&proto_unix_iface);
- read_config();
+ struct config *conf = read_config();
if (parse_and_exit)
exit(0);
- if (!debug_flag)
+ if (!(debug_flag||run_in_foreground))
{
pid_t pid = fork();
if (pid < 0)
@@ -663,8 +797,12 @@ main(int argc, char **argv)
dup2(0, 2);
}
+ write_pid_file();
+
signal_init();
+ config_commit(conf, RECONFIG_HARD, 0);
+
#ifdef LOCAL_DEBUG
async_dump_flag = 1;
#endif
diff --git a/sysdep/unix/timer.h b/sysdep/unix/timer.h
index a788ae27..17450322 100644
--- a/sysdep/unix/timer.h
+++ b/sysdep/unix/timer.h
@@ -32,6 +32,7 @@ void tm_dump_all(void);
extern bird_clock_t now; /* Relative, monotonic time in seconds */
extern bird_clock_t now_real; /* Time in seconds since fixed known epoch */
+extern bird_clock_t boot_time;
static inline bird_clock_t
tm_remains(timer *t)
diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h
index 3e85c85c..1fc26db2 100644
--- a/sysdep/unix/unix.h
+++ b/sysdep/unix/unix.h
@@ -19,9 +19,14 @@ extern char *bird_name;
void async_config(void);
void async_dump(void);
void async_shutdown(void);
-void cmd_reconfig(char *name, int type);
+void cmd_check_config(char *name);
+void cmd_reconfig(char *name, int type, int timeout);
+void cmd_reconfig_confirm(void);
+void cmd_reconfig_undo(void);
void cmd_shutdown(void);
+#define UNIX_DEFAULT_CONFIGURE_TIMEOUT 300
+
/* io.c */
volatile int async_config_flag;
diff --git a/tools/Makefile.in b/tools/Makefile.in
index 728e5797..062ba916 100644
--- a/tools/Makefile.in
+++ b/tools/Makefile.in
@@ -3,22 +3,31 @@
include Rules
-.PHONY: all daemon client subdir depend clean distclean tags docs userdocs progdocs
+.PHONY: all daemon birdc birdcl subdir depend clean distclean tags docs userdocs progdocs
-all: sysdep/paths.h .dep-stamp subdir daemon @CLIENT@
+all: sysdep/paths.h .dep-stamp subdir daemon birdcl @CLIENT@
daemon: $(exedir)/bird
-client: $(exedir)/birdc
+birdc: $(exedir)/birdc
+
+birdcl: $(exedir)/birdcl
bird-dep := $(addsuffix /all.o, $(static-dirs)) conf/all.o lib/birdlib.a
$(bird-dep): sysdep/paths.h .dep-stamp subdir
-birdc-dep := client/all.o lib/birdlib.a
+birdc-dep := client/birdc.o client/all.o lib/birdlib.a
$(birdc-dep): sysdep/paths.h .dep-stamp subdir
+birdcl-dep := client/birdcl.o client/all.o lib/birdlib.a
+
+$(birdcl-dep): sysdep/paths.h .dep-stamp subdir
+
+
+export client := @CLIENT@
+
depend: sysdep/paths.h .dir-stamp
set -e ; for a in $(dynamic-dirs) ; do $(MAKE) -C $$a $@ ; done
set -e ; for a in $(static-dirs) $(client-dirs) ; do $(MAKE) -C $$a -f $(srcdir_abs)/$$a/Makefile $@ ; done
@@ -33,6 +42,9 @@ $(exedir)/bird: $(bird-dep)
$(exedir)/birdc: $(birdc-dep)
$(CC) $(LDFLAGS) -o $@ $^ $(LIBS) $(CLIENT_LIBS)
+$(exedir)/birdcl: $(birdcl-dep)
+ $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+
.dir-stamp: sysdep/paths.h
mkdir -p $(static-dirs) $(client-dirs) $(doc-dirs)
touch .dir-stamp
@@ -57,9 +69,10 @@ tags:
install: all
$(INSTALL) -d $(DESTDIR)/$(sbindir) $(DESTDIR)/$(sysconfdir) $(DESTDIR)/@runtimedir@
- $(INSTALL_PROGRAM) -s $(exedir)/bird $(DESTDIR)/$(sbindir)/bird@SUFFIX@
+ $(INSTALL_PROGRAM) $(exedir)/bird $(DESTDIR)/$(sbindir)/bird@SUFFIX@
+ $(INSTALL_PROGRAM) $(exedir)/birdcl $(DESTDIR)/$(sbindir)/birdcl@SUFFIX@
if test -n "@CLIENT@" ; then \
- $(INSTALL_PROGRAM) -s $(exedir)/birdc $(DESTDIR)/$(sbindir)/birdc@SUFFIX@ ; \
+ $(INSTALL_PROGRAM) $(exedir)/birdc $(DESTDIR)/$(sbindir)/birdc@SUFFIX@ ; \
fi
if ! test -f $(DESTDIR)/@CONFIG_FILE@ ; then \
$(INSTALL_DATA) $(srcdir)/doc/bird.conf.example $(DESTDIR)/@CONFIG_FILE@ ; \
@@ -74,7 +87,7 @@ install-docs:
clean:
find . -name "*.[oa]" -o -name core -o -name depend -o -name "*.html" | xargs rm -f
rm -f conf/cf-lex.c conf/cf-parse.* conf/commands.h conf/keywords.h
- rm -f $(exedir)/bird $(exedir)/birdc $(exedir)/bird.ctl $(exedir)/bird6.ctl .dep-stamp
+ rm -f $(exedir)/bird $(exedir)/birdcl $(exedir)/birdc $(exedir)/bird.ctl $(exedir)/bird6.ctl .dep-stamp
distclean: clean
rm -f config.* configure sysdep/autoconf.h sysdep/paths.h Makefile Rules
diff --git a/tools/Rules.in b/tools/Rules.in
index fc06aeb1..ca930ec8 100644
--- a/tools/Rules.in
+++ b/tools/Rules.in
@@ -11,7 +11,7 @@ static-dirs := nest filter $(addprefix proto/,$(protocols))
static-dir-paths := $(addprefix $(srcdir)/,$(static-dirs))
dynamic-dirs := lib conf
dynamic-dir-paths := $(dynamic-dirs)
-client-dirs := @CLIENT@
+client-dirs := client
client-dir-paths := $(client-dirs)
doc-dirs := doc
doc-dir-paths := $(doc-dirs)
@@ -75,8 +75,12 @@ endif
%.o: $(src-path)%.c
$(CC) $(CFLAGS) -o $@ -c $<
+ifndef source-dep
+source-dep := $(source)
+endif
+
depend:
- $(CC) $(CPPFLAGS) -MM $(addprefix $(src-path),$(source)) >depend
+ $(CC) $(CPPFLAGS) -MM $(addprefix $(src-path),$(source-dep)) >depend
ifneq ($(wildcard depend),)
include depend