summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2013-11-22 02:43:41 +0100
committerOndrej Zajicek <santiago@crfreenet.org>2013-11-22 02:48:44 +0100
commit0aeac9cb7f9887374ce0258c8653f9518529bf08 (patch)
tree2d16e0904be1658c08a4c779cf4132bc8bf20efa
parent8931425d02dd8656b48142f608d3119ab6f4a96f (diff)
parent7c9930f9c8feb3b08f7a9e94a08807ccbbc096f5 (diff)
Merge commit 'origin/bfd'
-rw-r--r--aclocal.m412
-rw-r--r--conf/confbase.Y10
-rw-r--r--configure.in33
-rw-r--r--doc/bird.sgml307
-rw-r--r--filter/filter.c129
-rw-r--r--filter/filter.h6
-rw-r--r--filter/test.conf2
-rw-r--r--filter/tree.c34
-rw-r--r--filter/trie.c38
-rw-r--r--lib/birdlib.h54
-rw-r--r--lib/buffer.h35
-rw-r--r--lib/hash.h123
-rw-r--r--lib/heap.h156
-rw-r--r--lib/lists.c40
-rw-r--r--lib/lists.h1
-rw-r--r--lib/printf.c39
-rw-r--r--lib/resource.c32
-rw-r--r--lib/resource.h6
-rw-r--r--lib/socket.h2
-rw-r--r--lib/string.h4
-rw-r--r--nest/bfd.h51
-rw-r--r--nest/cmds.c10
-rw-r--r--nest/proto.c8
-rw-r--r--nest/protocol.h8
-rw-r--r--proto/Doc1
-rw-r--r--proto/bfd/Doc1
-rw-r--r--proto/bfd/Makefile5
-rw-r--r--proto/bfd/bfd.c1114
-rw-r--r--proto/bfd/bfd.h191
-rw-r--r--proto/bfd/config.Y138
-rw-r--r--proto/bfd/io.c768
-rw-r--r--proto/bfd/io.h99
-rw-r--r--proto/bfd/packets.c248
-rw-r--r--proto/bgp/bgp.c48
-rw-r--r--proto/bgp/bgp.h5
-rw-r--r--proto/bgp/config.Y3
-rw-r--r--proto/ospf/config.Y1
-rw-r--r--proto/ospf/hello.c3
-rw-r--r--proto/ospf/iface.c14
-rw-r--r--proto/ospf/neighbor.c30
-rw-r--r--proto/ospf/neighbor.h1
-rw-r--r--proto/ospf/ospf.h4
-rw-r--r--proto/ospf/topology.c3
-rw-r--r--proto/radv/radv.c2
-rw-r--r--proto/rip/rip.c1
-rw-r--r--sysdep/autoconf.h.in4
-rw-r--r--sysdep/config.h1
-rw-r--r--sysdep/unix/io.c17
-rw-r--r--sysdep/unix/log.c105
-rw-r--r--sysdep/unix/main.c9
50 files changed, 3681 insertions, 275 deletions
diff --git a/aclocal.m4 b/aclocal.m4
index 3ceb6eb6..02c0f76b 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -133,6 +133,18 @@ if test "$bird_cv_struct_ip_mreqn" = yes ; then
fi
])
+AC_DEFUN(BIRD_CHECK_PTHREADS,
+[
+ bird_tmp_cflags="$CFLAGS"
+
+ CFLAGS="$CFLAGS -pthread"
+ AC_CACHE_CHECK([whether POSIX threads are available], bird_cv_lib_pthreads,
+ [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]], [[pthread_t pt; pthread_create(&pt, NULL, NULL, NULL); pthread_spinlock_t lock; pthread_spin_lock(&lock); ]])],
+ [bird_cv_lib_pthreads=yes], [bird_cv_lib_pthreads=no])])
+
+ CFLAGS="$bird_tmp_cflags"
+])
+
AC_DEFUN(BIRD_CHECK_GCC_OPTION,
[
bird_tmp_cflags="$CFLAGS"
diff --git a/conf/confbase.Y b/conf/confbase.Y
index c6678e77..8b9f206a 100644
--- a/conf/confbase.Y
+++ b/conf/confbase.Y
@@ -73,6 +73,7 @@ CF_DECLS
%type <iface> ipa_scope
%type <i> expr bool pxlen
+%type <i32> expr_us
%type <time> datetime
%type <a> ipa
%type <px> prefix prefix_or_ipa
@@ -86,7 +87,7 @@ CF_DECLS
%left '!'
%nonassoc '.'
-CF_KEYWORDS(DEFINE, ON, OFF, YES, NO)
+CF_KEYWORDS(DEFINE, ON, OFF, YES, NO, S, MS, US)
CF_GRAMMAR
@@ -124,6 +125,13 @@ expr:
$$ = SYM_VAL($1).i; }
;
+
+expr_us:
+ expr S { $$ = (u32) $1 * 1000000; }
+ | expr MS { $$ = (u32) $1 * 1000; }
+ | expr US { $$ = (u32) $1 * 1; }
+ ;
+
/* expr_u16: expr { check_u16($1); $$ = $1; }; */
/* Switches */
diff --git a/configure.in b/configure.in
index 96f2a50e..9b5dc3e2 100644
--- a/configure.in
+++ b/configure.in
@@ -10,6 +10,7 @@ AC_ARG_ENABLE(debug, [ --enable-debug enable internal debugging routin
AC_ARG_ENABLE(memcheck, [ --enable-memcheck check memory allocations when debugging (default: enabled)],,enable_memcheck=yes)
AC_ARG_ENABLE(client, [ --enable-client enable building of BIRD client (default: enabled)],,enable_client=yes)
AC_ARG_ENABLE(ipv6, [ --enable-ipv6 enable building of IPv6 version (default: disabled)],,enable_ipv6=no)
+AC_ARG_ENABLE(pthreads, [ --enable-pthreads enable POSIX threads support (default: detect)],,enable_pthreads=try)
AC_ARG_WITH(suffix, [ --with-suffix=STRING use specified suffix for BIRD files (default: 6 for IPv6 version)],[given_suffix="yes"])
AC_ARG_WITH(sysconfig, [ --with-sysconfig=FILE use specified BIRD system configuration file])
AC_ARG_WITH(protocols, [ --with-protocols=LIST include specified routing protocols (default: all)],,[with_protocols="all"])
@@ -47,11 +48,10 @@ AC_SUBST(runtimedir)
if test "$enable_ipv6" = yes ; then
ip=ipv6
SUFFIX=6
- all_protocols=bgp,ospf,pipe,radv,rip,static
+ proto_radv=radv
else
ip=ipv4
SUFFIX=""
- all_protocols=bgp,ospf,pipe,rip,static
fi
if test "$given_suffix" = yes ; then
@@ -59,10 +59,6 @@ if test "$given_suffix" = yes ; then
fi
AC_SUBST(SUFFIX)
-if test "$with_protocols" = all ; then
- with_protocols="$all_protocols"
-fi
-
if test "$enable_debug" = yes ; then
CONFIG_FILE="bird$SUFFIX.conf"
CONTROL_SOCKET="bird$SUFFIX.ctl"
@@ -87,6 +83,23 @@ if test -z "$GCC" ; then
AC_MSG_ERROR([This program requires the GNU C Compiler.])
fi
+if test "$enable_pthreads" != no ; then
+ BIRD_CHECK_PTHREADS
+
+ if test "$bird_cv_lib_pthreads" = yes ; then
+ AC_DEFINE(USE_PTHREADS)
+ CFLAGS="$CFLAGS -pthread"
+ LDFLAGS="$LDFLAGS -pthread"
+ proto_bfd=bfd
+ elif test "$enable_pthreads" = yes ; then
+ AC_MSG_ERROR([POSIX threads not available.])
+ fi
+
+ if test "$enable_pthreads" = try ; then
+ enable_pthreads="$bird_cv_lib_pthreads"
+ fi
+fi
+
if test "$bird_cflags_default" = yes ; then
BIRD_CHECK_GCC_OPTION(bird_cv_c_option_wno_pointer_sign, -Wno-pointer-sign, -Wall)
BIRD_CHECK_GCC_OPTION(bird_cv_c_option_fno_strict_aliasing, -fno-strict-aliasing)
@@ -183,6 +196,13 @@ fi
AC_SUBST(iproutedir)
+all_protocols="$proto_bfd bgp ospf pipe $proto_radv rip static"
+all_protocols=`echo $all_protocols | sed 's/ /,/g'`
+
+if test "$with_protocols" = all ; then
+ with_protocols="$all_protocols"
+fi
+
AC_MSG_CHECKING([protocols])
protocols=`echo "$with_protocols" | sed 's/,/ /g'`
if test "$protocols" = no ; then protocols= ; fi
@@ -272,6 +292,7 @@ BIRD was configured with the following options:
Iproute2 directory: $iproutedir
System configuration: $sysdesc
Debugging: $enable_debug
+ POSIX threads: $enable_pthreads
Routing protocols: $protocols
Client: $enable_client
EOF
diff --git a/doc/bird.sgml b/doc/bird.sgml
index 63890031..46d2e026 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -144,7 +144,10 @@ options. The most important ones are:
nonzero if there are some errors.
<tag>-s <m/name of communication socket/</tag>
- use given filename for a socket for communications with the client, default is <it/prefix/<file>/var/run/bird.ctl</file>.
+ use given filename for a socket for communications with the client, default is <it/prefix/<file>/var/run/bird.ctl</file>.
+
+ <tag>-P <m/name of PID file/</tag>
+ create a PID file with given filename</file>.
<tag>-u <m/user/</tag>
drop privileges and use that user ID, see the next section for details.
@@ -915,62 +918,63 @@ bird>
incompatible with each other (that is to prevent you from shooting in the foot).
<descrip>
- <tag/bool/ This is a boolean type, it can have only two values, <cf/true/ and
- <cf/false/. Boolean is the only type you can use in <cf/if/
- statements.
-
- <tag/int/ This is a general integer type, you can expect it to store signed values from -2000000000
- to +2000000000. Overflows are not checked. You can use <cf/0x1234/ syntax to write hexadecimal values.
-
- <tag/pair/ This is a pair of two short integers. Each component can have values from 0 to
- 65535. Literals of this type are written as <cf/(1234,5678)/. The same syntax can also be
- used to construct a pair from two arbitrary integer expressions (for example <cf/(1+2,a)/).
-
- <tag/quad/ This is a dotted quad of numbers used to represent
- router IDs (and others). Each component can have a value
- from 0 to 255. Literals of this type are written like IPv4
- addresses.
-
- <tag/string/ This is a string of characters. There are no ways
- to modify strings in filters. You can pass them between
- functions, assign them to variables of type <cf/string/,
- print such variables, use standard string comparison
- operations (e.g. <cf/=, !=, &lt;, &gt;, &lt;=, &gt;=/), but
- you can't concatenate two strings. String literals are
- written as <cf/"This is a string constant"/. Additionaly
- matching <cf/&tilde;/ operator could be used to match a
- string value against a shell pattern (represented also as a
- string).
-
- <tag/ip/ This type can hold a single IP address. Depending on the compile-time configuration of BIRD you are using, it
- is either an IPv4 or IPv6 address. IP addresses are written in the standard notation (<cf/10.20.30.40/ or <cf/fec0:3:4::1/). You can apply special operator <cf>.mask(<M>num</M>)</cf>
- on values of type ip. It masks out all but first <cf><M>num</M></cf> bits from the IP
- address. So <cf/1.2.3.4.mask(8) = 1.0.0.0/ is true.
-
- <tag/prefix/ This type can hold a network prefix consisting of IP address and prefix length. Prefix literals are written as
- <cf><M>ipaddress</M>/<M>pxlen</M></cf>, or
+ <tag/bool/ This is a boolean type, it can have only two values,
+ <cf/true/ and <cf/false/. Boolean is the only type you can use in
+ <cf/if/ statements.
+
+ <tag/int/ This is a general integer type, you can expect it to store
+ signed values from -2000000000 to +2000000000. Overflows are not
+ checked. You can use <cf/0x1234/ syntax to write hexadecimal values.
+
+ <tag/pair/ This is a pair of two short integers. Each component can have
+ values from 0 to 65535. Literals of this type are written as
+ <cf/(1234,5678)/. The same syntax can also be used to construct a pair
+ from two arbitrary integer expressions (for example <cf/(1+2,a)/).
+
+ <tag/quad/ This is a dotted quad of numbers used to represent router IDs
+ (and others). Each component can have a value from 0 to 255. Literals
+ of this type are written like IPv4 addresses.
+
+ <tag/string/ This is a string of characters. There are no ways to modify
+ strings in filters. You can pass them between functions, assign them
+ to variables of type <cf/string/, print such variables, use standard
+ string comparison operations (e.g. <cf/=, !=, &lt;, &gt;, &lt;=,
+ &gt;=/), but you can't concatenate two strings. String literals are
+ written as <cf/"This is a string constant"/. Additionaly matching
+ <cf/&tilde;/ operator could be used to match a string value against a
+ shell pattern (represented also as a string).
+
+ <tag/ip/ This type can hold a single IP address. Depending on the
+ compile-time configuration of BIRD you are using, it is either an IPv4
+ or IPv6 address. IP addresses are written in the standard notation
+ (<cf/10.20.30.40/ or <cf/fec0:3:4::1/). You can apply special
+ operator <cf>.mask(<M>num</M>)</cf> on values of type ip. It masks out
+ all but first <cf><M>num</M></cf> bits from the IP address. So
+ <cf/1.2.3.4.mask(8) = 1.0.0.0/ is true.
+
+ <tag/prefix/ This type can hold a network prefix consisting of IP
+ address and prefix length. Prefix literals are written
+ as <cf><M>ipaddress</M>/<M>pxlen</M></cf>, or
<cf><m>ipaddress</m>/<m>netmask</m></cf>. There are two special
- operators on prefixes:
- <cf/.ip/ which extracts the IP address from the pair, and <cf/.len/, which separates prefix
- length from the pair. So <cf>1.2.0.0/16.pxlen = 16</cf> is true.
-
- <tag/ec/ This is a specialized type used to represent BGP
- extended community values. It is essentially a 64bit value,
- literals of this type are usually written as <cf>(<m/kind/,
- <m/key/, <m/value/)</cf>, where <cf/kind/ is a kind of
- extended community (e.g. <cf/rt/ / <cf/ro/ for a route
- target / route origin communities), the format and possible
- values of <cf/key/ and <cf/value/ are usually integers, but
+ operators on prefixes: <cf/.ip/ which extracts the IP address from the
+ pair, and <cf/.len/, which separates prefix length from the
+ pair. So <cf>1.2.0.0/16.pxlen = 16</cf> is true.
+
+ <tag/ec/ This is a specialized type used to represent BGP extended
+ community values. It is essentially a 64bit value, literals of this
+ type are usually written as <cf>(<m/kind/, <m/key/, <m/value/)</cf>,
+ where <cf/kind/ is a kind of extended community (e.g. <cf/rt/ /
+ <cf/ro/ for a route target / route origin communities), the format and
+ possible values of <cf/key/ and <cf/value/ are usually integers, but
it depends on the used kind. Similarly to pairs, ECs can be
- constructed using expressions for <cf/key/ and
- <cf/value/ parts, (e.g. <cf/(ro, myas, 3*10)/, where
- <cf/myas/ is an integer variable).
+ constructed using expressions for <cf/key/ and <cf/value/ parts,
+ (e.g. <cf/(ro, myas, 3*10)/, where <cf/myas/ is an integer variable).
- <tag/int|pair|quad|ip|prefix|ec|enum set/
- Filters recognize four types of sets. Sets are similar to strings: you can pass them around
- but you can't modify them. Literals of type <cf>int set</cf> look like <cf>
- [ 1, 2, 5..7 ]</cf>. As you can see, both simple values and ranges are permitted in
- sets.
+ <tag/int|pair|quad|ip|prefix|ec|enum set/ Filters recognize four types
+ of sets. Sets are similar to strings: you can pass them around but you
+ can't modify them. Literals of type <cf>int set</cf> look like <cf> [
+ 1, 2, 5..7 ]</cf>. As you can see, both simple values and ranges are
+ permitted in sets.
For pair sets, expressions like <cf/(123,*)/ can be used to denote ranges (in
that case <cf/(123,0)..(123,65535)/). You can also use <cf/(123,5..100)/ for range
@@ -1267,6 +1271,178 @@ undefined value is regarded as empty clist for most purposes.
<chapt>Protocols
+<sect><label id="sect-bfd">BFD
+
+<sect1>Introduction
+
+<p>Bidirectional Forwarding Detection (BFD) is not a routing protocol itself, it
+is an independent tool providing liveness and failure detection. Routing
+protocols like OSPF and BGP use integrated periodic "hello" messages to monitor
+liveness of neighbors, but detection times of these mechanisms are high (e.g. 40
+seconds by default in OSPF, could be set down to several seconds). BFD offers
+universal, fast and low-overhead mechanism for failure detection, which could be
+attached to any routing protocol in an advisory role.
+
+<p>BFD consists of mostly independent BFD sessions. Each session monitors an
+unicast bidirectional path between two BFD-enabled routers. This is done by
+periodically sending control packets in both directions. BFD does not handle
+neighbor discovery, BFD sessions are created on demand by request of other
+protocols (like OSPF or BGP), which supply appropriate information like IP
+addresses and associated interfaces. When a session changes its state, these
+protocols are notified and act accordingly (e.g. break an OSPF adjacency when
+the BFD session went down).
+
+<p>BIRD implements basic BFD behavior as defined in
+RFC 5880<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc5880.txt">
+(some advanced features like the echo mode or authentication are not implemented),
+IP transport for BFD as defined in
+RFC 5881<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc5881.txt"> and
+RFC 5883<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc5883.txt">
+and interaction with client protocols as defined in
+RFC 5882<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc5882.txt">.
+
+<p>Note that BFD implementation in BIRD is currently a new feature in
+development, expect some rough edges and possible UI and configuration changes
+in the future. Also note that we currently support at most one protocol instance.
+
+<sect1>Configuration
+
+<p>BFD configuration consists mainly of multiple definitions of interfaces.
+Most BFD config options are session specific. When a new session is requested
+and dynamically created, it is configured from one of these definitions. For
+sessions to directly connected neighbors, <cf/interface/ definitions are chosen
+based on the interface associated with the session, while <cf/multihop/
+definition is used for multihop sessions. If no definition is relevant, the
+session is just created with the default configuration. Therefore, an empty BFD
+configuration is often sufficient.
+
+<p>Note that to use BFD for other protocols like OSPF or BGP, these protocols
+also have to be configured to request BFD sessions, usually by <cf/bfd/ option.
+
+<p>Some of BFD session options require <m/time/ value, which has to be specified
+with the appropriate unit: <m/num/ <cf/s/|<cf/ms/|<cf/us/. Although microseconds
+are allowed as units, practical minimum values are usually in order of tens of
+milliseconds.
+
+<code>
+protocol bfd [&lt;name&gt;] {
+ interface &lt;interface pattern&gt; {
+ interval &lt;time&gt;;
+ min rx interval &lt;time&gt;;
+ min tx interval &lt;time&gt;;
+ idle tx interval &lt;time&gt;;
+ multiplier &lt;num&gt;;
+ passive &lt;switch&gt;;
+ };
+ multihop {
+ interval &lt;time&gt;;
+ min rx interval &lt;time&gt;;
+ min tx interval &lt;time&gt;;
+ idle tx interval &lt;time&gt;;
+ multiplier &lt;num&gt;;
+ passive &lt;switch&gt;;
+ };
+ neighbor &lt;ip&gt; [dev "&lt;interface&gt;"] [local &lt;ip&gt;] [multihop &lt;switch&gt;];
+}
+</code>
+
+<descrip>
+ <tag>interface <m/pattern [, ...]/ { <m/options/ }</tag>
+ Interface definitions allow to specify options for sessions associated
+ with such interfaces and also may contain interface specific options.
+ See <ref id="dsc-iface" name="interface"> common option for a detailed
+ description of interface patterns. Note that contrary to the behavior of
+ <cf/interface/ definitions of other protocols, BFD protocol would accept
+ sessions (in default configuration) even on interfaces not covered by
+ such definitions.
+
+ <tag>multihop { <m/options/ }</tag>
+ Multihop definitions allow to specify options for multihop BFD sessions,
+ in the same manner as <cf/interface/ definitions are used for directly
+ connected sessions. Currently only one such definition (for all multihop
+ sessions) could be used.
+
+ <tag>neighbor <m/ip/ [dev "<m/interface/"] [local <m/ip/] [multihop <m/switch/]</tag>
+ BFD sessions are usually created on demand as requested by other
+ protocols (like OSPF or BGP). This option allows to explicitly add
+ a BFD session to the specified neighbor regardless of such requests.
+
+ The session is identified by the IP address of the neighbor, with
+ optional specification of used interface and local IP. By default
+ the neighbor must be directly connected, unless the the session is
+ configured as multihop. Note that local IP must be specified for
+ multihop sessions.
+</descrip>
+
+<p>Session specific options (part of <cf/interface/ and <cf/multihop/ definitions):
+
+<descrip>
+ <tag>interval <m/time/</tag>
+ BFD ensures availability of the forwarding path associated with the
+ session by periodically sending BFD control packets in both
+ directions. The rate of such packets is controlled by two options,
+ <cf/min rx interval/ and <cf/min tx interval/ (see below). This option
+ is just a shorthand to set both of these options together.
+
+ <tag>min rx interval <m/time/</tag>
+ This option specifies the minimum RX interval, which is announced to the
+ neighbor and used there to limit the neighbor's rate of generated BFD
+ control packets. Default: 10 ms.
+
+ <tag>min tx interval <m/time/</tag>
+ This option specifies the desired TX interval, which controls the rate
+ of generated BFD control packets (together with <cf/min rx interval/
+ announced by the neighbor). Note that this value is used only if the BFD
+ session is up, otherwise the value of <cf/idle tx interval/ is used
+ instead. Default: 100 ms.
+
+ <tag>idle tx interval <m/time/</tag>
+ In order to limit unnecessary traffic in cases where a neighbor is not
+ available or not running BFD, the rate of generated BFD control packets
+ is lower when the BFD session is not up. This option specifies the
+ desired TX interval in such cases instead of <cf/min tx interval/.
+ Default: 1 s.
+
+ <tag>multiplier <m/num/</tag>
+ Failure detection time for BFD sessions is based on established rate of
+ BFD control packets (<cf>min rx/tx interval</cf>) multiplied by this
+ multiplier, which is essentially (ignoring jitter) a number of missed
+ packets after which the session is declared down. Note that rates and
+ multipliers could be different in each direction of a BFD session.
+ Default: 5.
+
+ <tag>passive <m/switch/</tag>
+ Generally, both BFD session endpoinds try to establish the session by
+ sending control packets to the other side. This option allows to enable
+ passive mode, which means that the router does not send BFD packets
+ until it has received one from the other side. Default: disabled.
+</descrip>
+
+<sect1>Example
+
+<p><code>
+protocol bfd {
+ interface "eth*" {
+ min rx interval 20 ms;
+ min tx interval 50 ms;
+ idle tx interval 300 ms;
+ };
+ interface "gre*" {
+ interval 200 ms;
+ multiplier 10;
+ passive;
+ };
+ multihop {
+ interval 200 ms;
+ multiplier 10;
+ };
+
+ neighbor 192.168.1.10;
+ neighbor 192.168.2.2 dev "eth2";
+ neighbor 192.168.10.1 local 192.168.1.1 multihop;
+}
+</code>
+
<sect>BGP
<p>The Border Gateway Protocol is the routing protocol used for backbone
@@ -1281,8 +1457,8 @@ AS). Each AS is a part of the network with common management and
common routing policy. It is identified by a unique 16-bit number
(ASN). Routers within each AS usually exchange AS-internal routing
information with each other using an interior gateway protocol (IGP,
-such as OSPF or RIP). Boundary routers at the border of
-the AS communicate global (inter-AS) network reachability information with
+such as OSPF or RIP). Boundary routers at the border of the AS
+communicate global (inter-AS) network reachability information with
their neighbors in the neighboring AS'es via exterior BGP (eBGP) and
redistribute received information to other routers in the AS via
interior BGP (iBGP).
@@ -1435,7 +1611,15 @@ for each neighbor using the following configuration parameters:
<tag>igp table <m/name/</tag> Specifies a table that is used
as an IGP routing table. Default: the same as the table BGP is
connected to.
-
+
+ <tag>bfd <M>switch</M></tag>
+ BGP could use BFD protocol as an advisory mechanism for neighbor
+ liveness and failure detection. If enabled, BIRD setups a BFD session
+ for the BGP neighbor and tracks its liveness by it. This has an
+ advantage of an order of magnitude lower detection times in case of
+ failure. Note that BFD protocol also has to be configured, see
+ <ref id="sect-bfd" name="BFD"> section for details. Default: disabled.
+
<tag>ttl security <m/switch/</tag> Use GTSM (RFC 5082 - the
generalized TTL security mechanism). GTSM protects against
spoofed packets by ignoring received packets with a smaller
@@ -2020,6 +2204,7 @@ protocol ospf &lt;name&gt; {
real broadcast &lt;switch&gt;;
ptp netmask &lt;switch&gt;;
check link &lt;switch&gt;;
+ bfd &lt;switch&gt;;
ecmp weight &lt;num&gt;;
ttl security [&lt;switch&gt;; | tx only]
tx class|dscp &lt;num&gt;;
@@ -2294,6 +2479,14 @@ protocol ospf &lt;name&gt; {
prefix) is propagated. It is possible that some hardware
drivers or platforms do not implement this feature. Default value is no.
+ <tag>bfd <M>switch</M></tag>
+ OSPF could use BFD protocol as an advisory mechanism for neighbor
+ liveness and failure detection. If enabled, BIRD setups a BFD session
+ for each OSPF neighbor and tracks its liveness by it. This has an
+ advantage of an order of magnitude lower detection times in case of
+ failure. Note that BFD protocol also has to be configured, see
+ <ref id="sect-bfd" name="BFD"> section for details. Default value is no.
+
<tag>ttl security [<m/switch/ | tx only]</tag>
TTL security is a feature that protects routing protocols
from remote spoofed packets by using TTL 255 instead of TTL 1
diff --git a/filter/filter.c b/filter/filter.c
index a28de5df..e0451aa1 100644
--- a/filter/filter.c
+++ b/filter/filter.c
@@ -59,41 +59,35 @@ adata_empty(struct linpool *pool, int l)
}
static void
-pm_format(struct f_path_mask *p, byte *buf, unsigned int size)
+pm_format(struct f_path_mask *p, buffer *buf)
{
- byte *end = buf + size - 16;
+ buffer_puts(buf, "[= ");
while (p)
+ {
+ switch(p->kind)
{
- if (buf > end)
- {
- strcpy(buf, " ...");
- return;
- }
-
- switch(p->kind)
- {
- case PM_ASN:
- buf += bsprintf(buf, " %u", p->val);
- break;
-
- case PM_QUESTION:
- buf += bsprintf(buf, " ?");
- break;
+ case PM_ASN:
+ buffer_print(buf, "%u ", p->val);
+ break;
- case PM_ASTERISK:
- buf += bsprintf(buf, " *");
- break;
+ case PM_QUESTION:
+ buffer_puts(buf, "? ");
+ break;
- case PM_ASN_EXPR:
- buf += bsprintf(buf, " %u", f_eval_asn((struct f_inst *) p->val));
- break;
- }
+ case PM_ASTERISK:
+ buffer_puts(buf, "* ");
+ break;
- p = p->next;
+ case PM_ASN_EXPR:
+ buffer_print(buf, "%u ", f_eval_asn((struct f_inst *) p->val));
+ break;
}
- *buf = 0;
+ p = p->next;
+ }
+
+ buffer_puts(buf, "=]");
}
static inline int
@@ -103,7 +97,7 @@ int_cmp(int i1, int i2)
}
static inline int
-uint_cmp(unsigned int i1, unsigned int i2)
+uint_cmp(uint i1, uint i2)
{
return (int)(i1 > i2) - (int)(i1 < i2);
}
@@ -437,60 +431,32 @@ val_in_range(struct f_val v1, struct f_val v2)
return CMP_ERROR;
}
-static void
-tree_node_print(struct f_tree *t, char **sep)
-{
- if (t == NULL)
- return;
-
- tree_node_print(t->left, sep);
-
- logn(*sep);
- val_print(t->from);
- if (val_compare(t->from, t->to) != 0)
- {
- logn( ".." );
- val_print(t->to);
- }
- *sep = ", ";
-
- tree_node_print(t->right, sep);
-}
-
-static void
-tree_print(struct f_tree *t)
-{
- char *sep = "";
- logn( "[" );
- tree_node_print(t, &sep);
- logn( "] " );
-}
-
/*
- * val_print - format filter value
+ * val_format - format filter value
*/
void
-val_print(struct f_val v)
+val_format(struct f_val v, buffer *buf)
{
char buf2[1024];
- switch (v.type) {
- case T_VOID: logn("(void)"); return;
- case T_BOOL: logn(v.val.i ? "TRUE" : "FALSE"); return;
- case T_INT: logn("%d", v.val.i); return;
- case T_STRING: logn("%s", v.val.s); return;
- case T_IP: logn("%I", v.val.px.ip); return;
- case T_PREFIX: logn("%I/%d", v.val.px.ip, v.val.px.len); return;
- case T_PAIR: logn("(%d,%d)", v.val.i >> 16, v.val.i & 0xffff); return;
- case T_QUAD: logn("%R", v.val.i); return;
- case T_EC: ec_format(buf2, v.val.ec); logn("%s", buf2); return;
- case T_PREFIX_SET: trie_print(v.val.ti); return;
- case T_SET: tree_print(v.val.t); return;
- case T_ENUM: logn("(enum %x)%d", v.type, v.val.i); return;
- case T_PATH: as_path_format(v.val.ad, buf2, 1000); logn("(path %s)", buf2); return;
- case T_CLIST: int_set_format(v.val.ad, 1, -1, buf2, 1000); logn("(clist %s)", buf2); return;
- case T_ECLIST: ec_set_format(v.val.ad, -1, buf2, 1000); logn("(eclist %s)", buf2); return;
- case T_PATH_MASK: pm_format(v.val.path_mask, buf2, 1000); logn("(pathmask%s)", buf2); return;
- default: logn( "[unknown type %x]", v.type ); return;
+ switch (v.type)
+ {
+ case T_VOID: buffer_puts(buf, "(void)"); return;
+ case T_BOOL: buffer_puts(buf, v.val.i ? "TRUE" : "FALSE"); return;
+ case T_INT: buffer_print(buf, "%d", v.val.i); return;
+ case T_STRING: buffer_print(buf, "%s", v.val.s); return;
+ case T_IP: buffer_print(buf, "%I", v.val.px.ip); return;
+ case T_PREFIX: buffer_print(buf, "%I/%d", v.val.px.ip, v.val.px.len); return;
+ case T_PAIR: buffer_print(buf, "(%d,%d)", v.val.i >> 16, v.val.i & 0xffff); return;
+ case T_QUAD: buffer_print(buf, "%R", v.val.i); return;
+ case T_EC: ec_format(buf2, v.val.ec); buffer_print(buf, "%s", buf2); return;
+ case T_PREFIX_SET: trie_format(v.val.ti, buf); return;
+ case T_SET: tree_format(v.val.t, buf); return;
+ case T_ENUM: buffer_print(buf, "(enum %x)%d", v.type, v.val.i); return;
+ case T_PATH: as_path_format(v.val.ad, buf2, 1000); buffer_print(buf, "(path %s)", buf2); return;
+ case T_CLIST: int_set_format(v.val.ad, 1, -1, buf2, 1000); buffer_print(buf, "(clist %s)", buf2); return;
+ case T_ECLIST: ec_set_format(v.val.ad, -1, buf2, 1000); buffer_print(buf, "(eclist %s)", buf2); return;
+ case T_PATH_MASK: pm_format(v.val.path_mask, buf); return;
+ default: buffer_print(buf, "[unknown type %x]", v.type); return;
}
}
@@ -498,6 +464,7 @@ static struct rte **f_rte;
static struct rta *f_old_rta;
static struct ea_list **f_tmp_attrs;
static struct linpool *f_pool;
+static struct buffer f_buf;
static int f_flags;
static inline void f_rte_cow(void)
@@ -786,7 +753,7 @@ interpret(struct f_inst *what)
break;
case 'p':
ONEARG;
- val_print(v1);
+ val_format(v1, &f_buf);
break;
case '?': /* ? has really strange error value, so we can implement if ... else nicely :-) */
ONEARG;
@@ -804,7 +771,7 @@ interpret(struct f_inst *what)
case P('p',','):
ONEARG;
if (what->a2.i == F_NOP || (what->a2.i != F_NONL && what->a1.p))
- log_commit(*L_INFO);
+ log_commit(*L_INFO, &f_buf);
switch (what->a2.i) {
case F_QUITBIRD:
@@ -1507,7 +1474,8 @@ f_run(struct filter *filter, struct rte **rte, struct ea_list **tmp_attrs, struc
f_pool = tmp_pool;
f_flags = flags;
- log_reset();
+ LOG_BUFFER_INIT(f_buf);
+
struct f_val res = interpret(filter->root);
if (f_old_rta) {
@@ -1546,7 +1514,8 @@ f_eval(struct f_inst *expr, struct linpool *tmp_pool)
f_rte = NULL;
f_pool = tmp_pool;
- log_reset();
+ LOG_BUFFER_INIT(f_buf);
+
return interpret(expr);
}
diff --git a/filter/filter.h b/filter/filter.h
index 5570a8a3..07a4c9e4 100644
--- a/filter/filter.h
+++ b/filter/filter.h
@@ -78,12 +78,13 @@ struct f_inst *f_generate_roa_check(struct symbol *sym, struct f_inst *prefix, s
struct f_tree *build_tree(struct f_tree *);
struct f_tree *find_tree(struct f_tree *t, struct f_val val);
int same_tree(struct f_tree *t1, struct f_tree *t2);
+void tree_format(struct f_tree *t, buffer *buf);
struct f_trie *f_new_trie(linpool *lp);
void trie_add_prefix(struct f_trie *t, ip_addr px, int plen, int l, int h);
int trie_match_prefix(struct f_trie *t, ip_addr px, int plen);
int trie_same(struct f_trie *t1, struct f_trie *t2);
-void trie_print(struct f_trie *t);
+void trie_format(struct f_trie *t, buffer *buf);
void fprefix_get_bounds(struct f_prefix *px, int *l, int *h);
@@ -118,7 +119,8 @@ int i_same(struct f_inst *f1, struct f_inst *f2);
int val_compare(struct f_val v1, struct f_val v2);
int val_same(struct f_val v1, struct f_val v2);
-void val_print(struct f_val v);
+void val_format(struct f_val v, buffer *buf);
+
#define F_NOP 0
#define F_NONL 1
diff --git a/filter/test.conf b/filter/test.conf
index c4108f9d..62c807b7 100644
--- a/filter/test.conf
+++ b/filter/test.conf
@@ -106,7 +106,7 @@ eclist el2;
print "5 = ", p2.len;
print "Delete 3: ", delete(p2, 3);
print "Filter 1-3: ", filter(p2, [1..3]);
-
+
pm1 = [= 1 2 * 3 4 5 =];
p2 = prepend( + empty +, 5 );
p2 = prepend( p2, 4 );
diff --git a/filter/tree.c b/filter/tree.c
index d27db18b..ee9f448a 100644
--- a/filter/tree.c
+++ b/filter/tree.c
@@ -137,3 +137,37 @@ same_tree(struct f_tree *t1, struct f_tree *t2)
return 0;
return 1;
}
+
+
+static void
+tree_node_format(struct f_tree *t, buffer *buf)
+{
+ if (t == NULL)
+ return;
+
+ tree_node_format(t->left, buf);
+
+ val_format(t->from, buf);
+ if (val_compare(t->from, t->to) != 0)
+ {
+ buffer_puts(buf, "..");
+ val_format(t->to, buf);
+ }
+ buffer_puts(buf, ", ");
+
+ tree_node_format(t->right, buf);
+}
+
+void
+tree_format(struct f_tree *t, buffer *buf)
+{
+ buffer_puts(buf, "[");
+
+ tree_node_format(t, buf);
+
+ /* Undo last separator */
+ if (buf->pos[-1] != '[')
+ buf->pos -= 2;
+
+ buffer_puts(buf, "]");
+}
diff --git a/filter/trie.c b/filter/trie.c
index f42afb84..217d72c3 100644
--- a/filter/trie.c
+++ b/filter/trie.c
@@ -265,37 +265,37 @@ trie_same(struct f_trie *t1, struct f_trie *t2)
}
static void
-trie_node_print(struct f_trie_node *t, char **sep)
+trie_node_format(struct f_trie_node *t, buffer *buf)
{
if (t == NULL)
return;
if (ipa_nonzero(t->accept))
- {
- logn("%s%I/%d{%I}", *sep, t->addr, t->plen, t->accept);
- *sep = ", ";
- }
+ buffer_print(buf, "%I/%d{%I}, ", t->addr, t->plen, t->accept);
- trie_node_print(t->c[0], sep);
- trie_node_print(t->c[1], sep);
+ trie_node_format(t->c[0], buf);
+ trie_node_format(t->c[1], buf);
}
/**
- * trie_print
- * @t: trie to be printed
+ * trie_format
+ * @t: trie to be formatted
+ * @buf: destination buffer
*
- * Prints the trie to the log buffer.
+ * Prints the trie to the supplied buffer.
*/
void
-trie_print(struct f_trie *t)
+trie_format(struct f_trie *t, buffer *buf)
{
- char *sep = "";
- logn("[");
+ buffer_puts(buf, "[");
+
if (t->zero)
- {
- logn("%I/%d", IPA_NONE, 0);
- sep = ", ";
- }
- trie_node_print(&t->root, &sep);
- logn("]");
+ buffer_print(buf, "%I/%d", IPA_NONE, 0);
+ trie_node_format(&t->root, buf);
+
+ /* Undo last separator */
+ if (buf->pos[-1] != '[')
+ buf->pos -= 2;
+
+ buffer_puts(buf, "]");
}
diff --git a/lib/birdlib.h b/lib/birdlib.h
index 7e6e8526..b7a5a6a6 100644
--- a/lib/birdlib.h
+++ b/lib/birdlib.h
@@ -10,6 +10,7 @@
#define _BIRD_BIRDLIB_H_
#include "timer.h"
+#include "alloca.h"
/* Ugly structure offset handling macros */
@@ -19,12 +20,14 @@
/* Utility macros */
-#ifdef PARSER
#define _MIN(a,b) (((a)<(b))?(a):(b))
#define _MAX(a,b) (((a)>(b))?(a):(b))
-#else
-#define MIN(a,b) (((a)<(b))?(a):(b))
-#define MAX(a,b) (((a)>(b))?(a):(b))
+
+#ifndef PARSER
+#undef MIN
+#undef MAX
+#define MIN(a,b) _MIN(a,b)
+#define MAX(a,b) _MAX(a,b)
#endif
#define ABS(a) ((a)>=0 ? (a) : -(a))
@@ -40,24 +43,61 @@
#define IP_VERSION 6
#endif
+
/* Macros for gcc attributes */
#define NORET __attribute__((noreturn))
#define UNUSED __attribute__((unused))
+
+/* Microsecond time */
+
+typedef s64 btime;
+
+#define _S *1000000
+#define _MS *1000
+#define _US *1
+#define TO_S /1000000
+#define TO_MS /1000
+#define TO_US /1
+
+#ifndef PARSER
+#define S _S
+#define MS _MS
+#define US _US
+#endif
+
+
/* Logging and dying */
+typedef struct buffer {
+ byte *start;
+ byte *pos;
+ byte *end;
+} buffer;
+
+#define STACK_BUFFER_INIT(buf,size) \
+ do { \
+ buf.start = alloca(size); \
+ buf.pos = buf.start; \
+ buf.end = buf.start + size; \
+ } while(0)
+
+#define LOG_BUFFER_INIT(buf) \
+ STACK_BUFFER_INIT(buf, LOG_BUFFER_SIZE)
+
+#define LOG_BUFFER_SIZE 1024
+
+
struct rate_limit {
bird_clock_t timestamp;
int count;
};
#define log log_msg
-void log_reset(void);
-void log_commit(int class);
+void log_commit(int class, buffer *buf);
void log_msg(char *msg, ...);
void log_rl(struct rate_limit *rl, char *msg, ...);
-void logn(char *msg, ...);
void die(char *msg, ...) NORET;
void bug(char *msg, ...) NORET;
diff --git a/lib/buffer.h b/lib/buffer.h
new file mode 100644
index 00000000..cf073e88
--- /dev/null
+++ b/lib/buffer.h
@@ -0,0 +1,35 @@
+
+#define BUFFER(type) struct { type *data; uint used, size; }
+
+#define BUFFER_SIZE(v) ((v).size * sizeof(* (v).data))
+
+#define BUFFER_INIT(v,pool,isize) \
+ ({ \
+ (v).used = 0; \
+ (v).size = (isize); \
+ (v).data = mb_alloc(pool, BUFFER_SIZE(v)); \
+ })
+
+#define BUFFER_SET(v,nsize) \
+ ({ \
+ (v).used = (nsize); \
+ if ((v).used > (v).size) \
+ buffer_realloc((void **) &((v).data), &((v).size), (v).used, sizeof(* (v).data)); \
+ })
+
+#define BUFFER_INC(v,step) \
+ ({ \
+ uint _o = (v).used; \
+ BUFFER_SET(v, (v).used + (step)); \
+ (v).data + _o; \
+ })
+
+#define BUFFER_DEC(v,step) ({ (v).used -= (step); })
+
+#define BUFFER_PUSH(v) (*BUFFER_INC(v,1))
+
+#define BUFFER_POP(v) BUFFER_DEC(v,1)
+
+#define BUFFER_FLUSH(v) ({ (v).used = 0; })
+
+
diff --git a/lib/hash.h b/lib/hash.h
new file mode 100644
index 00000000..3ac9eebd
--- /dev/null
+++ b/lib/hash.h
@@ -0,0 +1,123 @@
+
+
+#define HASH(type) struct { type **data; uint count, order; }
+#define HASH_TYPE(v) typeof(** (v).data)
+#define HASH_SIZE(v) (1 << (v).order)
+#define HASH_MASK(v) ((1 << (v).order)-1)
+
+
+#define HASH_INIT(v,pool,init_order) \
+ ({ \
+ (v).count = 0; \
+ (v).order = (init_order); \
+ (v).data = mb_allocz(pool, HASH_SIZE(v) * sizeof(* (v).data)); \
+ })
+
+#define HASH_FIND(v,id,key...) \
+ ({ \
+ uint _h = id##_FN((key)) & HASH_MASK(v); \
+ HASH_TYPE(v) *_n = (v).data[_h]; \
+ while (_n && !id##_EQ(id##_KEY(_n), (key))) \
+ _n = id##_NEXT(_n); \
+ _n; \
+ })
+
+#define HASH_INSERT(v,id,node) \
+ ({ \
+ uint _h = id##_FN(id##_KEY((node))) & HASH_MASK(v); \
+ HASH_TYPE(v) **_nn = (v).data + _h; \
+ id##_NEXT(node) = *_nn; \
+ *_nn = node; \
+ (v).count++; \
+ })
+
+#define HASH_DO_REMOVE(v,id,_nn) \
+ ({ \
+ HASH_TYPE(v) *_n = *_nn; \
+ if (_n) \
+ { \
+ *_nn = id##_NEXT(_n); \
+ (v).count--; \
+ } \
+ _n; \
+ })
+
+#define HASH_DELETE(v,id,key...) \
+ ({ \
+ uint _h = id##_FN((key)) & HASH_MASK(v); \
+ HASH_TYPE(v) **_nn = (v).data + _h; \
+ \
+ while ((*_nn) && !id##_EQ(id##_KEY((*_nn)), (key))) \
+ _nn = &(id##_NEXT((*_nn))); \
+ \
+ HASH_DO_REMOVE(v,id,_nn); \
+ })
+
+#define HASH_REMOVE(v,id,node) \
+ ({ \
+ uint _h = id##_FN(id##_KEY((node))) & HASH_MASK(v); \
+ HASH_TYPE(v) **_nn = (v).data + _h; \
+ \
+ while ((*_nn) && (*_nn != (node))) \
+ _nn = &(id##_NEXT((*_nn))); \
+ \
+ HASH_DO_REMOVE(v,id,_nn); \
+ })
+
+
+#define HASH_REHASH(v,id,pool,step) \
+ ({ \
+ HASH_TYPE(v) *_n, *_n2, **_od; \
+ uint _i, _s; \
+ \
+ _s = HASH_SIZE(v); \
+ _od = (v).data; \
+ (v).count = 0; \
+ (v).order += (step); \
+ (v).data = mb_allocz(pool, HASH_SIZE(v) * sizeof(* (v).data)); \
+ \
+ for (_i = 0; _i < _s; _i++) \
+ for (_n = _od[_i]; _n && (_n2 = id##_NEXT(_n), 1); _n = _n2) \
+ HASH_INSERT(v, id, _n); \
+ \
+ mb_free(_od); \
+ })
+
+#define HASH_DEFINE_REHASH_FN(id, type) \
+ static void id##_REHASH_FN(void *v, pool *p, int step) \
+ { HASH_REHASH(* (HASH(type) *) v, id, p, step); }
+
+#define HASH_TRY_REHASH_UP(v,id,pool) \
+ ({ \
+ if (((v).order < id##_REHASH_MAX) && ((v).count > HASH_SIZE(v))) \
+ id##_REHASH_FN(&v, pool, 1); \
+ })
+
+#define HASH_TRY_REHASH_DOWN(v,id,pool) \
+ ({ \
+ if (((v).order > id##_REHASH_MIN) && ((v).count < HASH_SIZE(v)/2)) \
+ id##_REHASH_FN(&v, pool, -1); \
+ })
+
+#define HASH_WALK(v,next,n) \
+ do { \
+ HASH_TYPE(v) *n; \
+ uint _i; \
+ uint _s = HASH_SIZE(v); \
+ for (_i = 0; _i < _s; _i++) \
+ for (n = (v).data[_i]; n; n = n->next)
+
+#define HASH_WALK_END } while (0)
+
+
+#define HASH_WALK_DELSAFE(v,next,n) \
+ do { \
+ HASH_TYPE(v) *n, *_next; \
+ uint _i; \
+ uint _s = HASH_SIZE(v); \
+ for (_i = 0; _i < _s; _i++) \
+ for (n = (v).data[_i]; n && (_next = n->next, 1); n = _next)
+
+#define HASH_WALK_DELSAFE_END } while (0)
+
+
diff --git a/lib/heap.h b/lib/heap.h
new file mode 100644
index 00000000..c8c3d348
--- /dev/null
+++ b/lib/heap.h
@@ -0,0 +1,156 @@
+/*
+ * UCW Library -- Universal Heap Macros
+ *
+ * (c) 2001 Martin Mares <mj@ucw.cz>
+ * (c) 2005 Tomas Valla <tom@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/**
+ * [[intro]]
+ * Introduction
+ * ------------
+ *
+ * Binary heap is a simple data structure, which for example supports efficient insertions, deletions
+ * and access to the minimal inserted item. We define several macros for such operations.
+ * Note that because of simplicity of heaps, we have decided to define direct macros instead
+ * of a <<generic:,macro generator>> as for several other data structures in the Libucw.
+ *
+ * A heap is represented by a number of elements and by an array of values. Beware that we
+ * index this array from one, not from zero as do the standard C arrays.
+ *
+ * Most macros use these parameters:
+ *
+ * - @type - the type of elements
+ * - @num - a variable (signed or unsigned integer) with the number of elements
+ * - @heap - a C array of type @type; the heap is stored in `heap[1] .. heap[num]`; `heap[0]` is unused
+ * - @less - a callback to compare two element values; `less(x, y)` shall return a non-zero value iff @x is lower than @y
+ * - @swap - a callback to swap two array elements; `swap(heap, i, j, t)` must swap `heap[i]` with `heap[j]` with possible help of temporary variable @t (type @type).
+ *
+ * A valid heap must follow these rules:
+ *
+ * - `num >= 0`
+ * - `heap[i] >= heap[i / 2]` for each `i` in `[2, num]`
+ *
+ * The first element `heap[1]` is always lower or equal to all other elements.
+ *
+ * [[macros]]
+ * Macros
+ * ------
+ */
+
+/* For internal usage. */
+#define HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
+ for (;;) \
+ { \
+ _l = 2*_j; \
+ if (_l > num) \
+ break; \
+ if (less(heap[_j],heap[_l]) && (_l == num || less(heap[_j],heap[_l+1]))) \
+ break; \
+ if (_l != num && less(heap[_l+1],heap[_l])) \
+ _l++; \
+ swap(heap,_j,_l,x); \
+ _j = _l; \
+ }
+
+/* For internal usage. */
+#define HEAP_BUBBLE_UP_J(heap,num,less,swap) \
+ while (_j > 1) \
+ { \
+ _u = _j/2; \
+ if (less(heap[_u], heap[_j])) \
+ break; \
+ swap(heap,_u,_j,x); \
+ _j = _u; \
+ }
+
+/**
+ * Shuffle the unordered array @heap of @num elements to become a valid heap. The time complexity is linear.
+ **/
+#define HEAP_INIT(heap,num,type,less,swap) \
+ do { \
+ uint _i = num; \
+ uint _j, _l; \
+ type x; \
+ while (_i >= 1) \
+ { \
+ _j = _i; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap) \
+ _i--; \
+ } \
+ } while(0)
+
+/**
+ * Delete the minimum element `heap[1]` in `O(log(n))` time.
+ * The removed value is moved just after the resulting heap (`heap[num + 1]`).
+ **/
+#define HEAP_DELMIN(heap,num,type,less,swap) \
+ do { \
+ uint _j, _l; \
+ type x; \
+ swap(heap,1,num,x); \
+ num--; \
+ _j = 1; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * Insert `heap[num]` in `O(log(n))` time. The value of @num must be increased before.
+ **/
+#define HEAP_INSERT(heap,num,type,less,swap) \
+ do { \
+ uint _j, _u; \
+ type x; \
+ _j = num; \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * If you need to increase the value of `heap[pos]`, just do it and then call this macro to rebuild the heap.
+ * Only `heap[pos]` can be changed, the rest of the array must form a valid heap.
+ * The time complexity is `O(log(n))`.
+ **/
+#define HEAP_INCREASE(heap,num,type,less,swap,pos) \
+ do { \
+ uint _j, _l; \
+ type x; \
+ _j = pos; \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * If you need to decrease the value of `heap[pos]`, just do it and then call this macro to rebuild the heap.
+ * Only `heap[pos]` can be changed, the rest of the array must form a valid heap.
+ * The time complexity is `O(log(n))`.
+ **/
+#define HEAP_DECREASE(heap,num,type,less,swap,pos) \
+ do { \
+ uint _j, _u; \
+ type x; \
+ _j = pos; \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * Delete `heap[pos]` in `O(log(n))` time.
+ **/
+#define HEAP_DELETE(heap,num,type,less,swap,pos) \
+ do { \
+ uint _j, _l, _u; \
+ type x; \
+ _j = pos; \
+ swap(heap,_j,num,x); \
+ num--; \
+ if (less(heap[_j], heap[num+1])) \
+ HEAP_BUBBLE_UP_J(heap,num,less,swap) \
+ else \
+ HEAP_BUBBLE_DOWN_J(heap,num,less,swap); \
+ } while(0)
+
+/**
+ * Default swapping macro.
+ **/
+#define HEAP_SWAP(heap,a,b,t) (t=heap[a], heap[a]=heap[b], heap[b]=t)
diff --git a/lib/lists.c b/lib/lists.c
index 6d97ff50..d323a4b6 100644
--- a/lib/lists.c
+++ b/lib/lists.c
@@ -101,6 +101,46 @@ rem_node(node *n)
}
/**
+ * rem2_node - remove a node from a list, with cleanup
+ * @n: node to be removed
+ *
+ * Removes a node @n from the list it's linked in and resets its pointers to NULL.
+ * Useful if you want to distinguish between linked and unlinked nodes.
+ */
+LIST_INLINE void
+rem2_node(node *n)
+{
+ node *z = n->prev;
+ node *x = n->next;
+
+ z->next = x;
+ x->prev = z;
+ n->next = NULL;
+ n->prev = NULL;
+}
+
+/**
+ * replace_node - replace a node in a list with another one
+ * @old: node to be removed
+ * @new: node to be inserted
+ *
+ * Replaces node @old in the list it's linked in with node @new. Node
+ * @old may be a copy of the original node, which is not accessed
+ * through the list. The function could be called with @old == @new,
+ * which just fixes neighbors' pointers in the case that the node
+ * was reallocated.
+ */
+LIST_INLINE void
+replace_node(node *old, node *new)
+{
+ old->next->prev = new;
+ old->prev->next = new;
+
+ new->prev = old->prev;
+ new->next = old->next;
+}
+
+/**
* init_list - create an empty list
* @l: list
*
diff --git a/lib/lists.h b/lib/lists.h
index 0b0fdbe3..9153029c 100644
--- a/lib/lists.h
+++ b/lib/lists.h
@@ -51,6 +51,7 @@ typedef struct list { /* In fact two overlayed nodes */
void add_tail(list *, node *);
void add_head(list *, node *);
void rem_node(node *);
+void rem2_node(node *);
void add_tail_list(list *, list *);
void init_list(list *);
void insert_node(node *, node *);
diff --git a/lib/printf.c b/lib/printf.c
index 14af1062..41e1cc0d 100644
--- a/lib/printf.c
+++ b/lib/printf.c
@@ -276,7 +276,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args)
ip_ntox(va_arg(args, ip_addr), ipbuf);
else {
ip_ntop(va_arg(args, ip_addr), ipbuf);
- if (field_width > 0)
+ if (field_width == 1)
field_width = STD_ADDRESS_P_LENGTH;
}
s = ipbuf;
@@ -410,3 +410,40 @@ int bsnprintf(char * buf, int size, const char *fmt, ...)
va_end(args);
return i;
}
+
+int
+buffer_vprint(buffer *buf, const char *fmt, va_list args)
+{
+ int i = bvsnprintf((char *) buf->pos, buf->end - buf->pos, fmt, args);
+ buf->pos = (i >= 0) ? (buf->pos + i) : buf->end;
+ return i;
+}
+
+int
+buffer_print(buffer *buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i=bvsnprintf((char *) buf->pos, buf->end - buf->pos, fmt, args);
+ va_end(args);
+
+ buf->pos = (i >= 0) ? (buf->pos + i) : buf->end;
+ return i;
+}
+
+void
+buffer_puts(buffer *buf, const char *str)
+{
+ byte *bp = buf->pos;
+ byte *be = buf->end;
+
+ while (bp < be && *str)
+ *bp++ = *str++;
+
+ if (bp < be)
+ *bp = 0;
+
+ buf->pos = bp;
+}
diff --git a/lib/resource.c b/lib/resource.c
index 42243aa2..bf4b3ae9 100644
--- a/lib/resource.c
+++ b/lib/resource.c
@@ -220,7 +220,8 @@ ralloc(pool *p, struct resclass *c)
bzero(r, c->size);
r->class = c;
- add_tail(&p->inside, &r->n);
+ if (p)
+ add_tail(&p->inside, &r->n);
return r;
}
@@ -366,21 +367,21 @@ mb_allocz(pool *p, unsigned size)
/**
* mb_realloc - reallocate a memory block
- * @p: pool
* @m: memory block
* @size: new size of the block
*
* mb_realloc() changes the size of the memory block @m to a given size.
* The contents will be unchanged to the minimum of the old and new sizes;
- * newly allocated memory will be uninitialized. If @m is NULL, the call
- * is equivalent to mb_alloc(@p, @size).
+ * newly allocated memory will be uninitialized. Contrary to realloc()
+ * behavior, @m must be non-NULL, because the resource pool is inherited
+ * from it.
*
* Like mb_alloc(), mb_realloc() also returns a pointer to the memory
- * chunk , not to the resource, hence you have to free it using
+ * chunk, not to the resource, hence you have to free it using
* mb_free(), not rfree().
*/
void *
-mb_realloc(pool *p, void *m, unsigned size)
+mb_realloc(void *m, unsigned size)
{
struct mblock *ob = NULL;
@@ -392,9 +393,7 @@ mb_realloc(pool *p, void *m, unsigned size)
}
struct mblock *b = xrealloc(ob, sizeof(struct mblock) + size);
-
- b->r.class = &mb_class;
- add_tail(&p->inside, &b->r.n);
+ replace_node(&b->r.n, &b->r.n);
b->size = size;
return b->data;
}
@@ -413,3 +412,18 @@ mb_free(void *m)
rfree(b);
}
+
+
+#define STEP_UP(x) ((x) + (x)/2 + 4)
+
+void
+buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size)
+{
+ unsigned nsize = MIN(*size, need);
+
+ while (nsize < need)
+ nsize = STEP_UP(nsize);
+
+ *buf = mb_realloc(*buf, nsize * item_size);
+ *size = nsize;
+}
diff --git a/lib/resource.h b/lib/resource.h
index 5cb5e274..1a62d389 100644
--- a/lib/resource.h
+++ b/lib/resource.h
@@ -52,7 +52,7 @@ extern pool root_pool;
void *mb_alloc(pool *, unsigned size);
void *mb_allocz(pool *, unsigned size);
-void *mb_realloc(pool *p, void *m, unsigned size);
+void *mb_realloc(void *m, unsigned size);
void mb_free(void *);
/* Memory pools with linear allocation */
@@ -78,6 +78,9 @@ void sl_free(slab *, void *);
* outside resource manager and possibly sysdep code.
*/
+void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size);
+
+
#ifdef HAVE_LIBDMALLOC
/*
* The standard dmalloc macros tend to produce lots of namespace
@@ -103,3 +106,4 @@ void *xrealloc(void *, unsigned);
#endif
#endif
+
diff --git a/lib/socket.h b/lib/socket.h
index 6e0a769b..780d596b 100644
--- a/lib/socket.h
+++ b/lib/socket.h
@@ -44,6 +44,7 @@ typedef struct birdsock {
/* laddr and lifindex are valid only if SKF_LADDR_RX flag is set to request it */
int fd; /* System-dependent data */
+ int index; /* Index in poll buffer */
node n;
void *rbuf_alloc, *tbuf_alloc;
char *password; /* Password for MD5 authentication */
@@ -91,6 +92,7 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shoul
#define SKF_LADDR_TX 4 /* Allow to specify local address for TX packets */
#define SKF_TTL_RX 8 /* Report TTL / Hop Limit for RX packets */
+#define SKF_THREAD 0x100 /* Socked used in thread, Do not add to main loop */
/*
* Socket types SA SP DA DP IF TTL SendTo (?=may, -=must not, *=must)
diff --git a/lib/string.h b/lib/string.h
index 14eaa360..2c477294 100644
--- a/lib/string.h
+++ b/lib/string.h
@@ -17,6 +17,10 @@ int bvsprintf(char *str, const char *fmt, va_list args);
int bsnprintf(char *str, int size, const char *fmt, ...);
int bvsnprintf(char *str, int size, const char *fmt, va_list args);
+int buffer_vprint(buffer *buf, const char *fmt, va_list args);
+int buffer_print(buffer *buf, const char *fmt, ...);
+void buffer_puts(buffer *buf, const char *str);
+
int patmatch(byte *pat, byte *str);
#endif
diff --git a/nest/bfd.h b/nest/bfd.h
new file mode 100644
index 00000000..79c3c921
--- /dev/null
+++ b/nest/bfd.h
@@ -0,0 +1,51 @@
+/*
+ * BIRD -- Bidirectional Forwarding Detection (BFD)
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_NBFD_H_
+#define _BIRD_NBFD_H_
+
+#include "lib/lists.h"
+#include "lib/resource.h"
+
+struct bfd_session;
+
+struct bfd_request {
+ resource r;
+ node n;
+
+ ip_addr addr;
+ ip_addr local;
+ struct iface *iface;
+
+ void (*hook)(struct bfd_request *);
+ void *data;
+
+ struct bfd_session *session;
+
+ u8 state;
+ u8 diag;
+ u8 old_state;
+ u8 down;
+};
+
+
+#ifdef CONFIG_BFD
+
+struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, void (*hook)(struct bfd_request *), void *data);
+
+static inline void cf_check_bfd(int use) { }
+
+#else
+
+static inline struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, void (*hook)(struct bfd_request *), void *data) { return NULL; }
+
+static inline void cf_check_bfd(int use) { if (use) cf_error("BFD not available"); }
+
+#endif /* CONFIG_BFD */
+
+
+
+#endif /* _BIRD_NBFD_H_ */
diff --git a/nest/cmds.c b/nest/cmds.c
index 9bdd66cd..ec6bc762 100644
--- a/nest/cmds.c
+++ b/nest/cmds.c
@@ -92,13 +92,10 @@ cmd_show_memory(void)
cli_msg(0, "");
}
-extern const char *log_buffer_ptr;
-
void
cmd_eval(struct f_inst *expr)
{
struct f_val v = f_eval(expr, this_cli->parser_pool);
- log_reset();
if (v.type == T_RETURN)
{
@@ -106,7 +103,8 @@ cmd_eval(struct f_inst *expr)
return;
}
- val_print(v);
- cli_msg(23, "%s", log_buffer_ptr);
- log_reset();
+ buffer buf;
+ LOG_BUFFER_INIT(buf);
+ val_format(v, &buf);
+ cli_msg(23, "%s", buf.start);
}
diff --git a/nest/proto.c b/nest/proto.c
index 140ec943..75ba10dd 100644
--- a/nest/proto.c
+++ b/nest/proto.c
@@ -699,6 +699,9 @@ proto_build(struct protocol *p)
}
}
+/* FIXME: convert this call to some protocol hook */
+extern void bfd_init_all(void);
+
/**
* protos_build - build a protocol list
*
@@ -736,6 +739,11 @@ protos_build(void)
#ifdef CONFIG_BGP
proto_build(&proto_bgp);
#endif
+#ifdef CONFIG_BFD
+ proto_build(&proto_bfd);
+ bfd_init_all();
+#endif
+
proto_pool = rp_new(&root_pool, "Protocols");
proto_flush_event = ev_new(proto_pool);
proto_flush_event->hook = proto_flush_loop;
diff --git a/nest/protocol.h b/nest/protocol.h
index 033a0ede..96923447 100644
--- a/nest/protocol.h
+++ b/nest/protocol.h
@@ -75,7 +75,7 @@ void protos_dump_all(void);
extern struct protocol
proto_device, proto_radv, proto_rip, proto_static,
- proto_ospf, proto_pipe, proto_bgp;
+ proto_ospf, proto_pipe, proto_bgp, proto_bfd;
/*
* Routing Protocol Instance
@@ -358,6 +358,12 @@ void proto_notify_state(struct proto *p, unsigned state);
#define D_EVENTS 16 /* Protocol events */
#define D_PACKETS 32 /* Packets sent/received */
+#ifndef PARSER
+#define TRACE(flags, msg, args...) \
+ do { if (p->p.debug & flags) log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
+#endif
+
+
/*
* MRTDump flags
*/
diff --git a/proto/Doc b/proto/Doc
index 16b084fb..7863472f 100644
--- a/proto/Doc
+++ b/proto/Doc
@@ -1,4 +1,5 @@
H Protocols
+C bfd
C bgp
C ospf
C pipe
diff --git a/proto/bfd/Doc b/proto/bfd/Doc
new file mode 100644
index 00000000..7ee5d3ef
--- /dev/null
+++ b/proto/bfd/Doc
@@ -0,0 +1 @@
+S bfd.c
diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile
new file mode 100644
index 00000000..c28cedec
--- /dev/null
+++ b/proto/bfd/Makefile
@@ -0,0 +1,5 @@
+source=bfd.c packets.c io.c
+root-rel=../../
+dir-name=proto/bfd
+
+include ../../Rules
diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c
new file mode 100644
index 00000000..5ebfadc1
--- /dev/null
+++ b/proto/bfd/bfd.c
@@ -0,0 +1,1114 @@
+/*
+ * BIRD -- Bidirectional Forwarding Detection (BFD)
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/**
+ * DOC: Bidirectional Forwarding Detection
+ *
+ * The BFD protocol is implemented in three files: |bfd.c| containing the
+ * protocol logic and the protocol glue with BIRD core, |packets.c| handling BFD
+ * packet processing, RX, TX and protocol sockets. |io.c| then contains generic
+ * code for the event loop, threads and event sources (sockets, microsecond
+ * timers). This generic code will be merged to the main BIRD I/O code in the
+ * future.
+ *
+ * The BFD implementation uses a separate thread with an internal event loop for
+ * handling the protocol logic, which requires high-res and low-latency timing,
+ * so it is not affected by the rest of BIRD, which has several low-granularity
+ * hooks in the main loop, uses second-based timers and cannot offer good
+ * latency. The core of BFD protocol (the code related to BFD sessions,
+ * interfaces and packets) runs in the BFD thread, while the rest (the code
+ * related to BFD requests, BFD neighbors and the protocol glue) runs in the
+ * main thread.
+ *
+ * BFD sessions are represented by structure &bfd_session that contains a state
+ * related to the session and two timers (TX timer for periodic packets and hold
+ * timer for session timeout). These sessions are allocated from @session_slab
+ * and are accessible by two hash tables, @session_hash_id (by session ID) and
+ * @session_hash_ip (by IP addresses of neighbors). Slab and both hashes are in
+ * the main protocol structure &bfd_proto. The protocol logic related to BFD
+ * sessions is implemented in internal functions bfd_session_*(), which are
+ * expected to be called from the context of BFD thread, and external functions
+ * bfd_add_session(), bfd_remove_session() and bfd_reconfigure_session(), which
+ * form an interface to the BFD core for the rest and are expected to be called
+ * from the context of main thread.
+ *
+ * Each BFD session has an associated BFD interface, represented by structure
+ * &bfd_iface. A BFD interface contains a socket used for TX (the one for RX is
+ * shared in &bfd_proto), an interface configuration and reference counter.
+ * Compared to interface structures of other protocols, these structures are not
+ * created and removed based on interface notification events, but according to
+ * the needs of BFD sessions. When a new session is created, it requests a
+ * proper BFD interface by function bfd_get_iface(), which either finds an
+ * existing one in &iface_list (from &bfd_proto) or allocates a new one. When a
+ * session is removed, an associated iface is dicharged by bfd_free_iface().
+ *
+ * BFD requests are the external API for the other protocols. When a protocol
+ * wants a BFD session, it calls bfd_request_session(), which creates a
+ * structure &bfd_request containing approprite information and an notify hook.
+ * This structure is a resource associated with the caller's resource pool. When
+ * a BFD protocol is available, a BFD request is submitted to the protocol, an
+ * appropriate BFD session is found or created and the request is attached to
+ * the session. When a session changes state, all attached requests (and related
+ * protocols) are notified. Note that BFD requests do not depend on BFD protocol
+ * running. When the BFD protocol is stopped or removed (or not available from
+ * beginning), related BFD requests are stored in @bfd_wait_list, where waits
+ * for a new protocol.
+ *
+ * BFD neighbors are just a way to statically configure BFD sessions without
+ * requests from other protocol. Structures &bfd_neighbor are part of BFD
+ * configuration (like static routes in the static protocol). BFD neighbors are
+ * handled by BFD protocol like it is a BFD client -- when a BFD neighbor is
+ * ready, the protocol just creates a BFD request like any other protocol.
+ *
+ * The protocol uses a new generic event loop (structure &birdloop) from |io.c|,
+ * which supports sockets, timers and events like the main loop. Timers
+ * (structure &timer2) are new microsecond based timers, while sockets and
+ * events are the same. A birdloop is associated with a thread (field @thread)
+ * in which event hooks are executed. Most functions for setting event sources
+ * (like sk_start() or tm2_start()) must be called from the context of that
+ * thread. Birdloop allows to temporarily acquire the context of that thread for
+ * the main thread by calling birdloop_enter() and then birdloop_leave(), which
+ * also ensures mutual exclusion with all event hooks. Note that resources
+ * associated with a birdloop (like timers) should be attached to the
+ * independent resource pool, detached from the main resource tree.
+ *
+ * There are two kinds of interaction between the BFD core (running in the BFD
+ * thread) and the rest of BFD (running in the main thread). The first kind are
+ * configuration calls from main thread to the BFD thread (like bfd_add_session()).
+ * These calls are synchronous and use birdloop_enter() mechanism for mutual
+ * exclusion. The second kind is a notification about session changes from the
+ * BFD thread to the main thread. This is done in an asynchronous way, sesions
+ * with pending notifications are linked (in the BFD thread) to @notify_list in
+ * &bfd_proto, and then bfd_notify_hook() in the main thread is activated using
+ * bfd_notify_kick() and a pipe. The hook then processes scheduled sessions and
+ * calls hooks from associated BFD requests. This @notify_list (and state fields
+ * in structure &bfd_session) is protected by a spinlock in &bfd_proto and
+ * functions bfd_lock_sessions() / bfd_unlock_sessions().
+ *
+ * There are few data races (accessing @p->p.debug from TRACE() from the BFD
+ * thread and accessing some some private fields of %bfd_session from
+ * bfd_show_sessions() from the main thread, but these are harmless (i hope).
+ *
+ * TODO: document functions and access restrictions for fields in BFD structures.
+ *
+ * Supported standards:
+ * - RFC 5880 - main BFD standard
+ * - RFC 5881 - BFD for IP links
+ * - RFC 5882 - generic application of BFD
+ * - RFC 5883 - BFD for multihop paths
+ */
+
+#include "bfd.h"
+
+
+#define HASH_ID_KEY(n) n->loc_id
+#define HASH_ID_NEXT(n) n->next_id
+#define HASH_ID_EQ(a,b) (a == b)
+#define HASH_ID_FN(k) (k)
+
+#define HASH_IP_KEY(n) n->addr
+#define HASH_IP_NEXT(n) n->next_ip
+#define HASH_IP_EQ(a,b) ipa_equal(a,b)
+#define HASH_IP_FN(k) ipa_hash(k)
+
+static list bfd_proto_list;
+static list bfd_wait_list;
+
+const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" };
+
+static void bfd_session_set_min_tx(struct bfd_session *s, u32 val);
+static struct bfd_iface *bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface);
+static void bfd_free_iface(struct bfd_iface *ifa);
+static inline void bfd_notify_kick(struct bfd_proto *p);
+
+
+/*
+ * BFD sessions
+ */
+
+static void
+bfd_session_update_state(struct bfd_session *s, uint state, uint diag)
+{
+ struct bfd_proto *p = s->ifa->bfd;
+ uint old_state = s->loc_state;
+ int notify;
+
+ if (state == old_state)
+ return;
+
+ TRACE(D_EVENTS, "Session to %I changed state from %s to %s",
+ s->addr, bfd_state_names[old_state], bfd_state_names[state]);
+
+ bfd_lock_sessions(p);
+ s->loc_state = state;
+ s->loc_diag = diag;
+
+ notify = !NODE_VALID(&s->n);
+ if (notify)
+ add_tail(&p->notify_list, &s->n);
+ bfd_unlock_sessions(p);
+
+ if (state == BFD_STATE_UP)
+ bfd_session_set_min_tx(s, s->ifa->cf->min_tx_int);
+
+ if (old_state == BFD_STATE_UP)
+ bfd_session_set_min_tx(s, s->ifa->cf->idle_tx_int);
+
+ if (notify)
+ bfd_notify_kick(p);
+}
+
+static void
+bfd_session_update_tx_interval(struct bfd_session *s)
+{
+ u32 tx_int = MAX(s->des_min_tx_int, s->rem_min_rx_int);
+ u32 tx_int_l = tx_int - (tx_int / 4); // 75 %
+ u32 tx_int_h = tx_int - (tx_int / 10); // 90 %
+
+ s->tx_timer->recurrent = tx_int_l;
+ s->tx_timer->randomize = tx_int_h - tx_int_l;
+
+ /* Do not set timer if no previous event */
+ if (!s->last_tx)
+ return;
+
+ /* Set timer relative to last tx_timer event */
+ tm2_set(s->tx_timer, s->last_tx + tx_int_l);
+}
+
+static void
+bfd_session_update_detection_time(struct bfd_session *s, int kick)
+{
+ btime timeout = (btime) MAX(s->req_min_rx_int, s->rem_min_tx_int) * s->rem_detect_mult;
+
+ if (kick)
+ s->last_rx = current_time();
+
+ if (!s->last_rx)
+ return;
+
+ tm2_set(s->hold_timer, s->last_rx + timeout);
+}
+
+static void
+bfd_session_control_tx_timer(struct bfd_session *s, int reset)
+{
+ // if (!s->opened) goto stop;
+
+ if (s->passive && (s->rem_id == 0))
+ goto stop;
+
+ if (s->rem_demand_mode &&
+ !s->poll_active &&
+ (s->loc_state == BFD_STATE_UP) &&
+ (s->rem_state == BFD_STATE_UP))
+ goto stop;
+
+ if (s->rem_min_rx_int == 0)
+ goto stop;
+
+ /* So TX timer should run */
+ if (reset || !tm2_active(s->tx_timer))
+ {
+ s->last_tx = 0;
+ tm2_start(s->tx_timer, 0);
+ }
+
+ return;
+
+ stop:
+ tm2_stop(s->tx_timer);
+ s->last_tx = 0;
+}
+
+static void
+bfd_session_request_poll(struct bfd_session *s, u8 request)
+{
+ /* Not sure about this, but doing poll in this case does not make sense */
+ if (s->rem_id == 0)
+ return;
+
+ s->poll_scheduled |= request;
+
+ if (s->poll_active)
+ return;
+
+ s->poll_active = s->poll_scheduled;
+ s->poll_scheduled = 0;
+
+ bfd_session_control_tx_timer(s, 1);
+}
+
+static void
+bfd_session_terminate_poll(struct bfd_session *s)
+{
+ u8 poll_done = s->poll_active & ~s->poll_scheduled;
+
+ if (poll_done & BFD_POLL_TX)
+ s->des_min_tx_int = s->des_min_tx_new;
+
+ if (poll_done & BFD_POLL_RX)
+ s->req_min_rx_int = s->req_min_rx_new;
+
+ s->poll_active = s->poll_scheduled;
+ s->poll_scheduled = 0;
+
+ /* Timers are updated by caller - bfd_session_process_ctl() */
+}
+
+void
+bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old_rx_int)
+{
+ if (s->poll_active && (flags & BFD_FLAG_FINAL))
+ bfd_session_terminate_poll(s);
+
+ if ((s->des_min_tx_int != old_tx_int) || (s->rem_min_rx_int != old_rx_int))
+ bfd_session_update_tx_interval(s);
+
+ bfd_session_update_detection_time(s, 1);
+
+ /* Update session state */
+ int next_state = 0;
+ int diag = BFD_DIAG_NOTHING;
+
+ switch (s->loc_state)
+ {
+ case BFD_STATE_ADMIN_DOWN:
+ return;
+
+ case BFD_STATE_DOWN:
+ if (s->rem_state == BFD_STATE_DOWN) next_state = BFD_STATE_INIT;
+ else if (s->rem_state == BFD_STATE_INIT) next_state = BFD_STATE_UP;
+ break;
+
+ case BFD_STATE_INIT:
+ if (s->rem_state == BFD_STATE_ADMIN_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN;
+ else if (s->rem_state >= BFD_STATE_INIT) next_state = BFD_STATE_UP;
+ break;
+
+ case BFD_STATE_UP:
+ if (s->rem_state <= BFD_STATE_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN;
+ break;
+ }
+
+ if (next_state)
+ bfd_session_update_state(s, next_state, diag);
+
+ bfd_session_control_tx_timer(s, 0);
+
+ if (flags & BFD_FLAG_POLL)
+ bfd_send_ctl(s->ifa->bfd, s, 1);
+}
+
+static void
+bfd_session_timeout(struct bfd_session *s)
+{
+ struct bfd_proto *p = s->ifa->bfd;
+
+ TRACE(D_EVENTS, "Session to %I expired", s->addr);
+
+ s->rem_state = BFD_STATE_DOWN;
+ s->rem_id = 0;
+ s->rem_min_tx_int = 0;
+ s->rem_min_rx_int = 1;
+ s->rem_demand_mode = 0;
+ s->rem_detect_mult = 0;
+
+ s->poll_active = 0;
+ s->poll_scheduled = 0;
+
+ bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT);
+
+ bfd_session_control_tx_timer(s, 1);
+}
+
+static void
+bfd_session_set_min_tx(struct bfd_session *s, u32 val)
+{
+ /* Note that des_min_tx_int <= des_min_tx_new */
+
+ if (val == s->des_min_tx_new)
+ return;
+
+ s->des_min_tx_new = val;
+
+ /* Postpone timer update if des_min_tx_int increases and the session is up */
+ if ((s->loc_state != BFD_STATE_UP) || (val < s->des_min_tx_int))
+ {
+ s->des_min_tx_int = val;
+ bfd_session_update_tx_interval(s);
+ }
+
+ bfd_session_request_poll(s, BFD_POLL_TX);
+}
+
+static void
+bfd_session_set_min_rx(struct bfd_session *s, u32 val)
+{
+ /* Note that req_min_rx_int >= req_min_rx_new */
+
+ if (val == s->req_min_rx_new)
+ return;
+
+ s->req_min_rx_new = val;
+
+ /* Postpone timer update if req_min_rx_int decreases and the session is up */
+ if ((s->loc_state != BFD_STATE_UP) || (val > s->req_min_rx_int))
+ {
+ s->req_min_rx_int = val;
+ bfd_session_update_detection_time(s, 0);
+ }
+
+ bfd_session_request_poll(s, BFD_POLL_RX);
+}
+
+struct bfd_session *
+bfd_find_session_by_id(struct bfd_proto *p, u32 id)
+{
+ return HASH_FIND(p->session_hash_id, HASH_ID, id);
+}
+
+struct bfd_session *
+bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr)
+{
+ return HASH_FIND(p->session_hash_ip, HASH_IP, addr);
+}
+
+static void
+bfd_tx_timer_hook(timer2 *t)
+{
+ struct bfd_session *s = t->data;
+
+ s->last_tx = current_time();
+ bfd_send_ctl(s->ifa->bfd, s, 0);
+}
+
+static void
+bfd_hold_timer_hook(timer2 *t)
+{
+ bfd_session_timeout(t->data);
+}
+
+static u32
+bfd_get_free_id(struct bfd_proto *p)
+{
+ u32 id;
+ for (id = random_u32(); 1; id++)
+ if (id && !bfd_find_session_by_id(p, id))
+ break;
+
+ return id;
+}
+
+static struct bfd_session *
+bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface)
+{
+ birdloop_enter(p->loop);
+
+ struct bfd_iface *ifa = bfd_get_iface(p, local, iface);
+
+ struct bfd_session *s = sl_alloc(p->session_slab);
+ bzero(s, sizeof(struct bfd_session));
+
+ s->addr = addr;
+ s->ifa = ifa;
+ s->loc_id = bfd_get_free_id(p);
+
+ HASH_INSERT(p->session_hash_id, HASH_ID, s);
+ HASH_INSERT(p->session_hash_ip, HASH_IP, s);
+
+
+ /* Initialization of state variables - see RFC 5880 6.8.1 */
+ s->loc_state = BFD_STATE_DOWN;
+ s->rem_state = BFD_STATE_DOWN;
+ s->des_min_tx_int = s->des_min_tx_new = ifa->cf->idle_tx_int;
+ s->req_min_rx_int = s->req_min_rx_new = ifa->cf->min_rx_int;
+ s->rem_min_rx_int = 1;
+ s->detect_mult = ifa->cf->multiplier;
+ s->passive = ifa->cf->passive;
+
+ s->tx_timer = tm2_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0);
+ s->hold_timer = tm2_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0);
+ bfd_session_update_tx_interval(s);
+ bfd_session_control_tx_timer(s, 1);
+
+ init_list(&s->request_list);
+ s->last_state_change = now;
+
+ TRACE(D_EVENTS, "Session to %I added", s->addr);
+
+ birdloop_leave(p->loop);
+
+ return s;
+}
+
+/*
+static void
+bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa)
+{
+ birdloop_enter(p->loop);
+
+ s->opened = 1;
+
+ bfd_session_control_tx_timer(s);
+
+ birdloop_leave(p->loop);
+}
+
+static void
+bfd_close_session(struct bfd_proto *p, struct bfd_session *s)
+{
+ birdloop_enter(p->loop);
+
+ s->opened = 0;
+
+ bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN);
+ bfd_session_control_tx_timer(s);
+
+ birdloop_leave(p->loop);
+}
+*/
+
+static void
+bfd_remove_session(struct bfd_proto *p, struct bfd_session *s)
+{
+ ip_addr ip = s->addr;
+
+ birdloop_enter(p->loop);
+
+ bfd_free_iface(s->ifa);
+
+ rfree(s->tx_timer);
+ rfree(s->hold_timer);
+
+ HASH_REMOVE(p->session_hash_id, HASH_ID, s);
+ HASH_REMOVE(p->session_hash_ip, HASH_IP, s);
+
+ sl_free(p->session_slab, s);
+
+ TRACE(D_EVENTS, "Session to %I removed", ip);
+
+ birdloop_leave(p->loop);
+}
+
+static void
+bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s)
+{
+ birdloop_enter(p->loop);
+
+ struct bfd_iface_config *cf = s->ifa->cf;
+
+ u32 tx = (s->loc_state == BFD_STATE_UP) ? cf->min_tx_int : cf->idle_tx_int;
+ bfd_session_set_min_tx(s, tx);
+ bfd_session_set_min_rx(s, cf->min_rx_int);
+ s->detect_mult = cf->multiplier;
+ s->passive = cf->passive;
+
+ bfd_session_control_tx_timer(s, 0);
+
+ birdloop_leave(p->loop);
+
+ TRACE(D_EVENTS, "Session to %I reconfigured", s->addr);
+}
+
+
+/*
+ * BFD interfaces
+ */
+
+static struct bfd_iface_config bfd_default_iface = {
+ .min_rx_int = BFD_DEFAULT_MIN_RX_INT,
+ .min_tx_int = BFD_DEFAULT_MIN_TX_INT,
+ .idle_tx_int = BFD_DEFAULT_IDLE_TX_INT,
+ .multiplier = BFD_DEFAULT_MULTIPLIER
+};
+
+static inline struct bfd_iface_config *
+bfd_find_iface_config(struct bfd_config *cf, struct iface *iface)
+{
+ struct bfd_iface_config *ic;
+
+ ic = iface ? (void *) iface_patt_find(&cf->patt_list, iface, NULL) : cf->multihop;
+
+ return ic ? ic : &bfd_default_iface;
+}
+
+static struct bfd_iface *
+bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface)
+{
+ struct bfd_iface *ifa;
+
+ WALK_LIST(ifa, p->iface_list)
+ if (ipa_equal(ifa->local, local) && (ifa->iface == iface))
+ return ifa->uc++, ifa;
+
+ struct bfd_config *cf = (struct bfd_config *) (p->p.cf);
+ struct bfd_iface_config *ic = bfd_find_iface_config(cf, iface);
+
+ ifa = mb_allocz(p->tpool, sizeof(struct bfd_iface));
+ ifa->local = local;
+ ifa->iface = iface;
+ ifa->cf = ic;
+ ifa->bfd = p;
+
+ ifa->sk = bfd_open_tx_sk(p, local, iface);
+ ifa->uc = 1;
+
+ add_tail(&p->iface_list, &ifa->n);
+
+ return ifa;
+}
+
+static void
+bfd_free_iface(struct bfd_iface *ifa)
+{
+ if (!ifa || --ifa->uc)
+ return;
+
+ rem_node(&ifa->n);
+ sk_stop(ifa->sk);
+ rfree(ifa->sk);
+ mb_free(ifa);
+}
+
+static void
+bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_config *nc)
+{
+ struct bfd_iface_config *nic = bfd_find_iface_config(nc, ifa->iface);
+ ifa->changed = !!memcmp(nic, ifa->cf, sizeof(struct bfd_iface_config));
+
+ /* This should be probably changed to not access ifa->cf from the BFD thread */
+ birdloop_enter(p->loop);
+ ifa->cf = nic;
+ birdloop_leave(p->loop);
+}
+
+
+/*
+ * BFD requests
+ */
+
+static void
+bfd_request_notify(struct bfd_request *req, u8 state, u8 diag)
+{
+ u8 old_state = req->state;
+
+ if (state == old_state)
+ return;
+
+ req->state = state;
+ req->diag = diag;
+ req->old_state = old_state;
+ req->down = (old_state == BFD_STATE_UP) && (state == BFD_STATE_DOWN);
+
+ if (req->hook)
+ req->hook(req);
+}
+
+static int
+bfd_add_request(struct bfd_proto *p, struct bfd_request *req)
+{
+ struct bfd_session *s = bfd_find_session_by_addr(p, req->addr);
+ u8 state, diag;
+
+ if (!s)
+ s = bfd_add_session(p, req->addr, req->local, req->iface);
+
+ rem_node(&req->n);
+ add_tail(&s->request_list, &req->n);
+ req->session = s;
+
+ bfd_lock_sessions(p);
+ state = s->loc_state;
+ diag = s->loc_diag;
+ bfd_unlock_sessions(p);
+
+ bfd_request_notify(req, state, diag);
+
+ return 1;
+}
+
+static void
+bfd_submit_request(struct bfd_request *req)
+{
+ node *n;
+
+ WALK_LIST(n, bfd_proto_list)
+ if (bfd_add_request(SKIP_BACK(struct bfd_proto, bfd_node, n), req))
+ return;
+
+ rem_node(&req->n);
+ add_tail(&bfd_wait_list, &req->n);
+ req->session = NULL;
+ bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0);
+}
+
+static void
+bfd_take_requests(struct bfd_proto *p)
+{
+ node *n, *nn;
+
+ WALK_LIST_DELSAFE(n, nn, bfd_wait_list)
+ bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n));
+}
+
+static void
+bfd_drop_requests(struct bfd_proto *p)
+{
+ node *n;
+
+ HASH_WALK(p->session_hash_id, next_id, s)
+ {
+ /* We assume that p is not in bfd_proto_list */
+ WALK_LIST_FIRST(n, s->request_list)
+ bfd_submit_request(SKIP_BACK(struct bfd_request, n, n));
+ }
+ HASH_WALK_END;
+}
+
+static struct resclass bfd_request_class;
+
+struct bfd_request *
+bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface,
+ void (*hook)(struct bfd_request *), void *data)
+{
+ struct bfd_request *req = ralloc(p, &bfd_request_class);
+
+ /* Hack: self-link req->n, we will call rem_node() on it */
+ req->n.prev = req->n.next = &req->n;
+
+ req->addr = addr;
+ req->local = local;
+ req->iface = iface;
+
+ bfd_submit_request(req);
+
+ req->hook = hook;
+ req->data = data;
+
+ return req;
+}
+
+static void
+bfd_request_free(resource *r)
+{
+ struct bfd_request *req = (struct bfd_request *) r;
+ struct bfd_session *s = req->session;
+
+ rem_node(&req->n);
+
+ /* Remove the session if there is no request for it. Skip that if
+ inside notify hooks, will be handled by bfd_notify_hook() itself */
+
+ if (s && EMPTY_LIST(s->request_list) && !s->notify_running)
+ bfd_remove_session(s->ifa->bfd, s);
+}
+
+static void
+bfd_request_dump(resource *r)
+{
+ struct bfd_request *req = (struct bfd_request *) r;
+
+ debug("(code %p, data %p)\n", req->hook, req->data);
+}
+
+static struct resclass bfd_request_class = {
+ "BFD request",
+ sizeof(struct bfd_request),
+ bfd_request_free,
+ bfd_request_dump,
+ NULL,
+ NULL
+};
+
+
+/*
+ * BFD neighbors
+ */
+
+static void
+bfd_neigh_notify(struct neighbor *nb)
+{
+ struct bfd_proto *p = (struct bfd_proto *) nb->proto;
+ struct bfd_neighbor *n = nb->data;
+
+ if (!n)
+ return;
+
+ if ((nb->scope > 0) && !n->req)
+ {
+ ip_addr local = ipa_nonzero(n->local) ? n->local : nb->iface->addr->ip;
+ n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, NULL, NULL);
+ }
+
+ if ((nb->scope <= 0) && n->req)
+ {
+ rfree(n->req);
+ n->req = NULL;
+ }
+}
+
+static void
+bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n)
+{
+ n->active = 1;
+
+ if (n->multihop)
+ {
+ n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, NULL, NULL);
+ return;
+ }
+
+ struct neighbor *nb = neigh_find2(&p->p, &n->addr, n->iface, NEF_STICKY);
+ if (!nb)
+ {
+ log(L_ERR "%s: Invalid remote address %I%J", p->p.name, n->addr, n->iface);
+ return;
+ }
+
+ if (nb->data)
+ {
+ log(L_ERR "%s: Duplicate neighbor %I", p->p.name, n->addr);
+ return;
+ }
+
+ n->neigh = nb;
+ nb->data = n;
+
+ if (nb->scope > 0)
+ bfd_neigh_notify(nb);
+ else
+ TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", n->addr, n->iface);
+}
+
+static void
+bfd_stop_neighbor(struct bfd_proto *p, struct bfd_neighbor *n)
+{
+ if (n->neigh)
+ n->neigh->data = NULL;
+ n->neigh = NULL;
+
+ rfree(n->req);
+ n->req = NULL;
+}
+
+static inline int
+bfd_same_neighbor(struct bfd_neighbor *x, struct bfd_neighbor *y)
+{
+ return ipa_equal(x->addr, y->addr) && ipa_equal(x->local, y->local) &&
+ (x->iface == y->iface) && (x->multihop == y->multihop);
+}
+
+static void
+bfd_reconfigure_neighbors(struct bfd_proto *p, struct bfd_config *new)
+{
+ struct bfd_config *old = (struct bfd_config *) (p->p.cf);
+ struct bfd_neighbor *on, *nn;
+
+ WALK_LIST(on, old->neigh_list)
+ {
+ WALK_LIST(nn, new->neigh_list)
+ if (bfd_same_neighbor(nn, on))
+ {
+ nn->neigh = on->neigh;
+ if (nn->neigh)
+ nn->neigh->data = nn;
+
+ nn->req = on->req;
+ nn->active = 1;
+ return;
+ }
+
+ bfd_stop_neighbor(p, on);
+ }
+
+ WALK_LIST(nn, new->neigh_list)
+ if (!nn->active)
+ bfd_start_neighbor(p, nn);
+}
+
+
+/*
+ * BFD notify socket
+ */
+
+/* This core notify code should be replaced after main loop transition to birdloop */
+
+int pipe(int pipefd[2]);
+void pipe_drain(int fd);
+void pipe_kick(int fd);
+
+static int
+bfd_notify_hook(sock *sk, int len)
+{
+ struct bfd_proto *p = sk->data;
+ struct bfd_session *s;
+ list tmp_list;
+ u8 state, diag;
+ node *n, *nn;
+
+ pipe_drain(sk->fd);
+
+ bfd_lock_sessions(p);
+ init_list(&tmp_list);
+ add_tail_list(&tmp_list, &p->notify_list);
+ init_list(&p->notify_list);
+ bfd_unlock_sessions(p);
+
+ WALK_LIST_FIRST(s, tmp_list)
+ {
+ bfd_lock_sessions(p);
+ rem2_node(&s->n);
+ state = s->loc_state;
+ diag = s->loc_diag;
+ bfd_unlock_sessions(p);
+
+ /* FIXME: convert to btime and move to bfd_session_update_state() */
+ s->last_state_change = now;
+
+ s->notify_running = 1;
+ WALK_LIST_DELSAFE(n, nn, s->request_list)
+ bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag);
+ s->notify_running = 0;
+
+ /* Remove the session if all requests were removed in notify hooks */
+ if (EMPTY_LIST(s->request_list))
+ bfd_remove_session(p, s);
+ }
+
+ return 0;
+}
+
+static inline void
+bfd_notify_kick(struct bfd_proto *p)
+{
+ pipe_kick(p->notify_ws->fd);
+}
+
+static void
+bfd_noterr_hook(sock *sk, int err)
+{
+ struct bfd_proto *p = sk->data;
+ log(L_ERR "%s: Notify socket error: %m", p->p.name, err);
+}
+
+static void
+bfd_notify_init(struct bfd_proto *p)
+{
+ int pfds[2];
+ sock *sk;
+
+ int rv = pipe(pfds);
+ if (rv < 0)
+ die("pipe: %m");
+
+ sk = sk_new(p->p.pool);
+ sk->type = SK_MAGIC;
+ sk->rx_hook = bfd_notify_hook;
+ sk->err_hook = bfd_noterr_hook;
+ sk->fd = pfds[0];
+ sk->data = p;
+ if (sk_open(sk) < 0)
+ die("bfd: sk_open failed");
+ p->notify_rs = sk;
+
+ /* The write sock is not added to any event loop */
+ sk = sk_new(p->p.pool);
+ sk->type = SK_MAGIC;
+ sk->fd = pfds[1];
+ sk->data = p;
+ sk->flags = SKF_THREAD;
+ if (sk_open(sk) < 0)
+ die("bfd: sk_open failed");
+ p->notify_ws = sk;
+}
+
+
+/*
+ * BFD protocol glue
+ */
+
+void
+bfd_init_all(void)
+{
+ init_list(&bfd_proto_list);
+ init_list(&bfd_wait_list);
+}
+
+static struct proto *
+bfd_init(struct proto_config *c)
+{
+ struct proto *p = proto_new(c, sizeof(struct bfd_proto));
+
+ p->neigh_notify = bfd_neigh_notify;
+
+ return p;
+}
+
+static int
+bfd_start(struct proto *P)
+{
+ struct bfd_proto *p = (struct bfd_proto *) P;
+ struct bfd_config *cf = (struct bfd_config *) (P->cf);
+
+ p->loop = birdloop_new();
+ p->tpool = rp_new(NULL, "BFD thread root");
+ pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE);
+
+ p->session_slab = sl_new(P->pool, sizeof(struct bfd_session));
+ HASH_INIT(p->session_hash_id, P->pool, 8);
+ HASH_INIT(p->session_hash_ip, P->pool, 8);
+
+ init_list(&p->iface_list);
+
+ init_list(&p->notify_list);
+ bfd_notify_init(p);
+
+ add_tail(&bfd_proto_list, &p->bfd_node);
+
+ birdloop_enter(p->loop);
+ p->rx_1 = bfd_open_rx_sk(p, 0);
+ p->rx_m = bfd_open_rx_sk(p, 1);
+ birdloop_leave(p->loop);
+
+ bfd_take_requests(p);
+
+ struct bfd_neighbor *n;
+ WALK_LIST(n, cf->neigh_list)
+ bfd_start_neighbor(p, n);
+
+ birdloop_start(p->loop);
+
+ return PS_UP;
+}
+
+
+static int
+bfd_shutdown(struct proto *P)
+{
+ struct bfd_proto *p = (struct bfd_proto *) P;
+ struct bfd_config *cf = (struct bfd_config *) (P->cf);
+
+ rem_node(&p->bfd_node);
+
+ birdloop_stop(p->loop);
+
+ struct bfd_neighbor *n;
+ WALK_LIST(n, cf->neigh_list)
+ bfd_stop_neighbor(p, n);
+
+ bfd_drop_requests(p);
+
+ /* FIXME: This is hack */
+ birdloop_enter(p->loop);
+ rfree(p->tpool);
+ birdloop_leave(p->loop);
+
+ birdloop_free(p->loop);
+
+ return PS_DOWN;
+}
+
+static int
+bfd_reconfigure(struct proto *P, struct proto_config *c)
+{
+ struct bfd_proto *p = (struct bfd_proto *) P;
+ // struct bfd_config *old = (struct bfd_config *) (P->cf);
+ struct bfd_config *new = (struct bfd_config *) c;
+ struct bfd_iface *ifa;
+
+ birdloop_mask_wakeups(p->loop);
+
+ WALK_LIST(ifa, p->iface_list)
+ bfd_reconfigure_iface(p, ifa, new);
+
+ HASH_WALK(p->session_hash_id, next_id, s)
+ {
+ if (s->ifa->changed)
+ bfd_reconfigure_session(p, s);
+ }
+ HASH_WALK_END;
+
+ bfd_reconfigure_neighbors(p, new);
+
+ birdloop_unmask_wakeups(p->loop);
+
+ return 1;
+}
+
+/* Ensure one instance */
+struct bfd_config *bfd_cf;
+
+static void
+bfd_preconfig(struct protocol *P UNUSED, struct config *c UNUSED)
+{
+ bfd_cf = NULL;
+}
+
+static void
+bfd_copy_config(struct proto_config *dest, struct proto_config *src)
+{
+ struct bfd_config *d = (struct bfd_config *) dest;
+ // struct bfd_config *s = (struct bfd_config *) src;
+
+ /* We clean up patt_list and neigh_list, neighbors and ifaces are non-sharable */
+ init_list(&d->patt_list);
+ init_list(&d->neigh_list);
+}
+
+void
+bfd_show_sessions(struct proto *P)
+{
+ byte tbuf[TM_DATETIME_BUFFER_SIZE];
+ struct bfd_proto *p = (struct bfd_proto *) P;
+ uint state, diag;
+ u32 tx_int, timeout;
+ const char *ifname;
+
+ if (p->p.proto_state != PS_UP)
+ {
+ cli_msg(-1013, "%s: is not up", p->p.name);
+ cli_msg(0, "");
+ return;
+ }
+
+ cli_msg(-1013, "%s:", p->p.name);
+ cli_msg(-1013, "%-25s %-10s %-10s %-10s %8s %8s",
+ "IP address", "Interface", "State", "Since", "Interval", "Timeout");
+
+
+ HASH_WALK(p->session_hash_id, next_id, s)
+ {
+ /* FIXME: this is thread-unsafe, but perhaps harmless */
+ state = s->loc_state;
+ diag = s->loc_diag;
+ ifname = (s->ifa && s->ifa->sk->iface) ? s->ifa->sk->iface->name : "---";
+ tx_int = s->last_tx ? (MAX(s->des_min_tx_int, s->rem_min_rx_int) TO_MS) : 0;
+ timeout = (MAX(s->req_min_rx_int, s->rem_min_tx_int) TO_MS) * s->rem_detect_mult;
+
+ state = (state < 4) ? state : 0;
+ tm_format_datetime(tbuf, &config->tf_proto, s->last_state_change);
+
+ cli_msg(-1013, "%-25I %-10s %-10s %-10s %3u.%03u %3u.%03u",
+ s->addr, ifname, bfd_state_names[state], tbuf,
+ tx_int / 1000, tx_int % 1000, timeout / 1000, timeout % 1000);
+ }
+ HASH_WALK_END;
+
+ cli_msg(0, "");
+}
+
+
+struct protocol proto_bfd = {
+ .name = "BFD",
+ .template = "bfd%d",
+ .init = bfd_init,
+ .start = bfd_start,
+ .shutdown = bfd_shutdown,
+ .reconfigure = bfd_reconfigure,
+ .preconfig = bfd_preconfig,
+ .copy_config = bfd_copy_config,
+};
diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h
new file mode 100644
index 00000000..f4ab3fcc
--- /dev/null
+++ b/proto/bfd/bfd.h
@@ -0,0 +1,191 @@
+/*
+ * BIRD -- Bidirectional Forwarding Detection (BFD)
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_BFD_H_
+#define _BIRD_BFD_H_
+
+#include <pthread.h>
+
+#include "nest/bird.h"
+#include "nest/cli.h"
+#include "nest/iface.h"
+#include "nest/protocol.h"
+#include "nest/route.h"
+#include "conf/conf.h"
+#include "lib/hash.h"
+#include "lib/resource.h"
+#include "lib/socket.h"
+#include "lib/string.h"
+
+#include "nest/bfd.h"
+#include "io.h"
+
+
+#define BFD_CONTROL_PORT 3784
+#define BFD_ECHO_PORT 3785
+#define BFD_MULTI_CTL_PORT 4784
+
+#define BFD_DEFAULT_MIN_RX_INT (10 _MS)
+#define BFD_DEFAULT_MIN_TX_INT (100 _MS)
+#define BFD_DEFAULT_IDLE_TX_INT (1 _S)
+#define BFD_DEFAULT_MULTIPLIER 5
+
+
+struct bfd_iface_config;
+
+struct bfd_config
+{
+ struct proto_config c;
+ list patt_list; /* List of iface configs (struct bfd_iface_config) */
+ list neigh_list; /* List of configured neighbors (struct bfd_neighbor) */
+ struct bfd_iface_config *multihop; /* Multihop pseudoiface config */
+};
+
+struct bfd_iface_config
+{
+ struct iface_patt i;
+ u32 min_rx_int;
+ u32 min_tx_int;
+ u32 idle_tx_int;
+ u8 multiplier;
+ u8 passive;
+};
+
+struct bfd_neighbor
+{
+ node n;
+ ip_addr addr;
+ ip_addr local;
+ struct iface *iface;
+
+ struct neighbor *neigh;
+ struct bfd_request *req;
+
+ u8 multihop;
+ u8 active;
+};
+
+struct bfd_proto
+{
+ struct proto p;
+ struct birdloop *loop;
+ pool *tpool;
+ pthread_spinlock_t lock;
+ node bfd_node;
+
+ slab *session_slab;
+ HASH(struct bfd_session) session_hash_id;
+ HASH(struct bfd_session) session_hash_ip;
+
+ sock *notify_rs;
+ sock *notify_ws;
+ list notify_list;
+
+ sock *rx_1;
+ sock *rx_m;
+ list iface_list;
+};
+
+struct bfd_iface
+{
+ node n;
+ ip_addr local;
+ struct iface *iface;
+ struct bfd_iface_config *cf;
+ struct bfd_proto *bfd;
+
+ sock *sk;
+ u32 uc;
+ u8 changed;
+};
+
+struct bfd_session
+{
+ node n;
+ ip_addr addr; /* Address of session */
+ struct bfd_iface *ifa; /* Iface associated with session */
+ struct bfd_session *next_id; /* Next in bfd.session_hash_id */
+ struct bfd_session *next_ip; /* Next in bfd.session_hash_ip */
+
+ u8 opened_unused;
+ u8 passive;
+ u8 poll_active;
+ u8 poll_scheduled;
+
+ u8 loc_state;
+ u8 rem_state;
+ u8 loc_diag;
+ u8 rem_diag;
+ u32 loc_id; /* Local session ID (local discriminator) */
+ u32 rem_id; /* Remote session ID (remote discriminator) */
+ u32 des_min_tx_int; /* Desired min rx interval, local option */
+ u32 des_min_tx_new; /* Used for des_min_tx_int change */
+ u32 req_min_rx_int; /* Required min tx interval, local option */
+ u32 req_min_rx_new; /* Used for req_min_rx_int change */
+ u32 rem_min_tx_int; /* Last received des_min_tx_int */
+ u32 rem_min_rx_int; /* Last received req_min_rx_int */
+ u8 demand_mode; /* Currently unused */
+ u8 rem_demand_mode;
+ u8 detect_mult; /* Announced detect_mult, local option */
+ u8 rem_detect_mult; /* Last received detect_mult */
+
+ btime last_tx; /* Time of last sent periodic control packet */
+ btime last_rx; /* Time of last received valid control packet */
+
+ timer2 *tx_timer; /* Periodic control packet timer */
+ timer2 *hold_timer; /* Timer for session down detection time */
+
+ list request_list; /* List of client requests (struct bfd_request) */
+ bird_clock_t last_state_change; /* Time of last state change */
+ u8 notify_running; /* 1 if notify hooks are running */
+};
+
+
+extern const char *bfd_state_names[];
+
+#define BFD_STATE_ADMIN_DOWN 0
+#define BFD_STATE_DOWN 1
+#define BFD_STATE_INIT 2
+#define BFD_STATE_UP 3
+
+#define BFD_DIAG_NOTHING 0
+#define BFD_DIAG_TIMEOUT 1
+#define BFD_DIAG_ECHO_FAILED 2
+#define BFD_DIAG_NEIGHBOR_DOWN 3
+#define BFD_DIAG_FWD_RESET 4
+#define BFD_DIAG_PATH_DOWN 5
+#define BFD_DIAG_C_PATH_DOWN 6
+#define BFD_DIAG_ADMIN_DOWN 7
+#define BFD_DIAG_RC_PATH_DOWN 8
+
+#define BFD_POLL_TX 1
+#define BFD_POLL_RX 2
+
+#define BFD_FLAGS 0x3f
+#define BFD_FLAG_POLL (1 << 5)
+#define BFD_FLAG_FINAL (1 << 4)
+#define BFD_FLAG_CPI (1 << 3)
+#define BFD_FLAG_AP (1 << 2)
+#define BFD_FLAG_DEMAND (1 << 1)
+#define BFD_FLAG_MULTIPOINT (1 << 0)
+
+
+static inline void bfd_lock_sessions(struct bfd_proto *p) { pthread_spin_lock(&p->lock); }
+static inline void bfd_unlock_sessions(struct bfd_proto *p) { pthread_spin_unlock(&p->lock); }
+
+/* bfd.c */
+struct bfd_session * bfd_find_session_by_id(struct bfd_proto *p, u32 id);
+struct bfd_session * bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr);
+void bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old_rx_int);
+void bfd_show_sessions(struct proto *P);
+
+/* packets.c */
+void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final);
+sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop);
+sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa);
+
+
+#endif /* _BIRD_BFD_H_ */
diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y
new file mode 100644
index 00000000..1bf8764f
--- /dev/null
+++ b/proto/bfd/config.Y
@@ -0,0 +1,138 @@
+/*
+ * BIRD -- Router Advertisement Configuration
+ *
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+CF_HDR
+
+#include "proto/bfd/bfd.h"
+
+CF_DEFINES
+
+#define BFD_CFG ((struct bfd_config *) this_proto)
+#define BFD_IFACE ((struct bfd_iface_config *) this_ipatt)
+#define BFD_NEIGHBOR this_bfd_neighbor
+
+static struct bfd_neighbor *this_bfd_neighbor;
+
+extern struct bfd_config *bfd_cf;
+
+CF_DECLS
+
+CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE,
+ INTERFACE, MULTIHOP, NEIGHBOR, DEV, LOCAL)
+
+%type <iface> bfd_neigh_iface
+%type <a> bfd_neigh_local
+%type <i> bfd_neigh_multihop
+
+CF_GRAMMAR
+
+CF_ADDTO(proto, bfd_proto)
+
+bfd_proto_start: proto_start BFD
+{
+ this_proto = proto_config_new(&proto_bfd, sizeof(struct bfd_config), $1);
+ init_list(&BFD_CFG->patt_list);
+ init_list(&BFD_CFG->neigh_list);
+
+ if (bfd_cf)
+ cf_error("Only one BFD instance allowed");
+ bfd_cf = BFD_CFG;
+};
+
+bfd_proto_item:
+ proto_item
+ | INTERFACE bfd_iface
+ | MULTIHOP bfd_multihop
+ | NEIGHBOR bfd_neighbor
+ ;
+
+bfd_proto_opts:
+ /* empty */
+ | bfd_proto_opts bfd_proto_item ';'
+ ;
+
+bfd_proto:
+ bfd_proto_start proto_name '{' bfd_proto_opts '}';
+
+
+bfd_iface_start:
+{
+ this_ipatt = cfg_allocz(sizeof(struct bfd_iface_config));
+ init_list(&this_ipatt->ipn_list);
+
+ BFD_IFACE->min_rx_int = BFD_DEFAULT_MIN_RX_INT;
+ BFD_IFACE->min_tx_int = BFD_DEFAULT_MIN_TX_INT;
+ BFD_IFACE->idle_tx_int = BFD_DEFAULT_IDLE_TX_INT;
+ BFD_IFACE->multiplier = BFD_DEFAULT_MULTIPLIER;
+};
+
+bfd_iface_item:
+ INTERVAL expr_us { BFD_IFACE->min_rx_int = BFD_IFACE->min_tx_int = $2; }
+ | MIN RX INTERVAL expr_us { BFD_IFACE->min_rx_int = $4; }
+ | MIN TX INTERVAL expr_us { BFD_IFACE->min_tx_int = $4; }
+ | IDLE TX INTERVAL expr_us { BFD_IFACE->idle_tx_int = $4; }
+ | MULTIPLIER expr { BFD_IFACE->multiplier = $2; }
+ | PASSIVE bool { BFD_IFACE->passive = $2; }
+ ;
+
+bfd_iface_opts:
+ /* empty */
+ | bfd_iface_opts bfd_iface_item ';'
+ ;
+
+bfd_iface_opt_list:
+ /* empty */
+ | '{' bfd_iface_opts '}'
+ ;
+
+bfd_iface: bfd_iface_start iface_patt_list bfd_iface_opt_list
+{ add_tail(&BFD_CFG->patt_list, NODE this_ipatt); };
+
+bfd_multihop: bfd_iface_start bfd_iface_opt_list
+{ BFD_CFG->multihop = BFD_IFACE; };
+
+
+bfd_neigh_iface:
+ /* empty */ { $$ = NULL; }
+ | '%' SYM { $$ = if_get_by_name($2->name); }
+ | DEV TEXT { $$ = if_get_by_name($2); }
+ ;
+
+bfd_neigh_local:
+ /* empty */ { $$ = IPA_NONE; }
+ | LOCAL ipa { $$ = $2; }
+ ;
+
+bfd_neigh_multihop:
+ /* empty */ { $$ = 0; }
+ | MULTIHOP bool { $$ = $2; }
+ ;
+
+bfd_neighbor: ipa bfd_neigh_iface bfd_neigh_local bfd_neigh_multihop
+{
+ this_bfd_neighbor = cfg_allocz(sizeof(struct bfd_neighbor));
+ add_tail(&BFD_CFG->neigh_list, NODE this_bfd_neighbor);
+
+ BFD_NEIGHBOR->addr = $1;
+ BFD_NEIGHBOR->local = $3;
+ BFD_NEIGHBOR->iface = $2;
+ BFD_NEIGHBOR->multihop = $4;
+
+ if ($4 && $2)
+ cf_error("Neighbor cannot set both interface and multihop");
+
+ if ($4 && ipa_zero($3))
+ cf_error("Multihop neighbor requires specified local address");
+};
+
+
+CF_CLI(SHOW BFD SESSIONS, optsym, [<name>], [[Show information about BFD sessions]])
+{ bfd_show_sessions(proto_get_named($4, &proto_bfd)); };
+
+CF_CODE
+
+CF_END
diff --git a/proto/bfd/io.c b/proto/bfd/io.c
new file mode 100644
index 00000000..fb150040
--- /dev/null
+++ b/proto/bfd/io.c
@@ -0,0 +1,768 @@
+/*
+ * BIRD -- I/O and event loop
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+#include <time.h>
+#include <sys/time.h>
+
+#include "nest/bird.h"
+#include "proto/bfd/io.h"
+
+#include "lib/buffer.h"
+#include "lib/heap.h"
+#include "lib/lists.h"
+#include "lib/resource.h"
+#include "lib/event.h"
+#include "lib/socket.h"
+
+
+struct birdloop
+{
+ pool *pool;
+ pthread_t thread;
+ pthread_mutex_t mutex;
+
+ btime last_time;
+ btime real_time;
+ u8 use_monotonic_clock;
+
+ u8 stop_called;
+ u8 poll_active;
+ u8 wakeup_masked;
+ int wakeup_fds[2];
+
+ BUFFER(timer2 *) timers;
+ list event_list;
+ list sock_list;
+ uint sock_num;
+
+ BUFFER(sock *) poll_sk;
+ BUFFER(struct pollfd) poll_fd;
+ u8 poll_changed;
+ u8 close_scheduled;
+};
+
+
+/*
+ * Current thread context
+ */
+
+static pthread_key_t current_loop_key;
+
+static inline struct birdloop *
+birdloop_current(void)
+{
+ return pthread_getspecific(current_loop_key);
+}
+
+static inline void
+birdloop_set_current(struct birdloop *loop)
+{
+ pthread_setspecific(current_loop_key, loop);
+}
+
+static inline void
+birdloop_init_current(void)
+{
+ pthread_key_create(&current_loop_key, NULL);
+}
+
+
+/*
+ * Time clock
+ */
+
+static void times_update_alt(struct birdloop *loop);
+
+static void
+times_init(struct birdloop *loop)
+{
+ struct timespec ts;
+ int rv;
+
+ rv = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rv < 0)
+ {
+ log(L_WARN "Monotonic clock is missing");
+
+ loop->use_monotonic_clock = 0;
+ loop->last_time = 0;
+ loop->real_time = 0;
+ times_update_alt(loop);
+ return;
+ }
+
+ if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40)))
+ log(L_WARN "Monotonic clock is crazy");
+
+ loop->use_monotonic_clock = 1;
+ loop->last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
+ loop->real_time = 0;
+}
+
+static void
+times_update_pri(struct birdloop *loop)
+{
+ struct timespec ts;
+ int rv;
+
+ rv = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rv < 0)
+ die("clock_gettime: %m");
+
+ btime new_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
+
+ if (new_time < loop->last_time)
+ log(L_ERR "Monotonic clock is broken");
+
+ loop->last_time = new_time;
+ loop->real_time = 0;
+}
+
+static void
+times_update_alt(struct birdloop *loop)
+{
+ struct timeval tv;
+ int rv;
+
+ rv = gettimeofday(&tv, NULL);
+ if (rv < 0)
+ die("gettimeofday: %m");
+
+ btime new_time = ((s64) tv.tv_sec S) + tv.tv_usec;
+ btime delta = new_time - loop->real_time;
+
+ if ((delta < 0) || (delta > (60 S)))
+ {
+ if (loop->real_time)
+ log(L_WARN "Time jump, delta %d us", (int) delta);
+
+ delta = 100 MS;
+ }
+
+ loop->last_time += delta;
+ loop->real_time = new_time;
+}
+
+static void
+times_update(struct birdloop *loop)
+{
+ if (loop->use_monotonic_clock)
+ times_update_pri(loop);
+ else
+ times_update_alt(loop);
+}
+
+btime
+current_time(void)
+{
+ return birdloop_current()->last_time;
+}
+
+
+/*
+ * Wakeup code for birdloop
+ */
+
+static void
+pipe_new(int *pfds)
+{
+ int rv = pipe(pfds);
+ if (rv < 0)
+ die("pipe: %m");
+
+ if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0)
+ die("fcntl(O_NONBLOCK): %m");
+
+ if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0)
+ die("fcntl(O_NONBLOCK): %m");
+}
+
+void
+pipe_drain(int fd)
+{
+ char buf[64];
+ int rv;
+
+ try:
+ rv = read(fd, buf, 64);
+ if (rv < 0)
+ {
+ if (errno == EINTR)
+ goto try;
+ if (errno == EAGAIN)
+ return;
+ die("wakeup read: %m");
+ }
+ if (rv == 64)
+ goto try;
+}
+
+void
+pipe_kick(int fd)
+{
+ u64 v = 1;
+ int rv;
+
+ try:
+ rv = write(fd, &v, sizeof(u64));
+ if (rv < 0)
+ {
+ if (errno == EINTR)
+ goto try;
+ if (errno == EAGAIN)
+ return;
+ die("wakeup write: %m");
+ }
+}
+
+static inline void
+wakeup_init(struct birdloop *loop)
+{
+ pipe_new(loop->wakeup_fds);
+}
+
+static inline void
+wakeup_drain(struct birdloop *loop)
+{
+ pipe_drain(loop->wakeup_fds[0]);
+}
+
+static inline void
+wakeup_do_kick(struct birdloop *loop)
+{
+ pipe_kick(loop->wakeup_fds[1]);
+}
+
+static inline void
+wakeup_kick(struct birdloop *loop)
+{
+ if (!loop->wakeup_masked)
+ wakeup_do_kick(loop);
+ else
+ loop->wakeup_masked = 2;
+}
+
+
+/*
+ * Events
+ */
+
+static inline uint
+events_waiting(struct birdloop *loop)
+{
+ return !EMPTY_LIST(loop->event_list);
+}
+
+static inline void
+events_init(struct birdloop *loop)
+{
+ init_list(&loop->event_list);
+}
+
+static void
+events_fire(struct birdloop *loop)
+{
+ times_update(loop);
+ ev_run_list(&loop->event_list);
+}
+
+void
+ev2_schedule(event *e)
+{
+ struct birdloop *loop = birdloop_current();
+
+ if (loop->poll_active && EMPTY_LIST(loop->event_list))
+ wakeup_kick(loop);
+
+ if (e->n.next)
+ rem_node(&e->n);
+
+ add_tail(&loop->event_list, &e->n);
+}
+
+
+/*
+ * Timers
+ */
+
+#define TIMER_LESS(a,b) ((a)->expires < (b)->expires)
+#define TIMER_SWAP(heap,a,b,t) (t = heap[a], heap[a] = heap[b], heap[b] = t, \
+ heap[a]->index = (a), heap[b]->index = (b))
+
+static inline uint timers_count(struct birdloop *loop)
+{ return loop->timers.used - 1; }
+
+static inline timer2 *timers_first(struct birdloop *loop)
+{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; }
+
+
+static void
+tm2_free(resource *r)
+{
+ timer2 *t = (timer2 *) r;
+
+ tm2_stop(t);
+}
+
+static void
+tm2_dump(resource *r)
+{
+ timer2 *t = (timer2 *) r;
+
+ debug("(code %p, data %p, ", t->hook, t->data);
+ if (t->randomize)
+ debug("rand %d, ", t->randomize);
+ if (t->recurrent)
+ debug("recur %d, ", t->recurrent);
+ if (t->expires)
+ debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS);
+ else
+ debug("inactive)\n");
+}
+
+
+static struct resclass tm2_class = {
+ "Timer",
+ sizeof(timer2),
+ tm2_free,
+ tm2_dump,
+ NULL,
+ NULL
+};
+
+timer2 *
+tm2_new(pool *p)
+{
+ timer2 *t = ralloc(p, &tm2_class);
+ t->index = -1;
+ return t;
+}
+
+void
+tm2_set(timer2 *t, btime when)
+{
+ struct birdloop *loop = birdloop_current();
+ uint tc = timers_count(loop);
+
+ if (!t->expires)
+ {
+ t->index = ++tc;
+ t->expires = when;
+ BUFFER_PUSH(loop->timers) = t;
+ HEAP_INSERT(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP);
+ }
+ else if (t->expires < when)
+ {
+ t->expires = when;
+ HEAP_INCREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
+ }
+ else if (t->expires > when)
+ {
+ t->expires = when;
+ HEAP_DECREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
+ }
+
+ if (loop->poll_active && (t->index == 1))
+ wakeup_kick(loop);
+}
+
+void
+tm2_start(timer2 *t, btime after)
+{
+ tm2_set(t, current_time() + MAX(after, 0));
+}
+
+void
+tm2_stop(timer2 *t)
+{
+ if (!t->expires)
+ return;
+
+ struct birdloop *loop = birdloop_current();
+ uint tc = timers_count(loop);
+
+ HEAP_DELETE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
+ BUFFER_POP(loop->timers);
+
+ t->index = -1;
+ t->expires = 0;
+}
+
+static void
+timers_init(struct birdloop *loop)
+{
+ BUFFER_INIT(loop->timers, loop->pool, 4);
+ BUFFER_PUSH(loop->timers) = NULL;
+}
+
+static void
+timers_fire(struct birdloop *loop)
+{
+ btime base_time;
+ timer2 *t;
+
+ times_update(loop);
+ base_time = loop->last_time;
+
+ while (t = timers_first(loop))
+ {
+ if (t->expires > base_time)
+ return;
+
+ if (t->recurrent)
+ {
+ btime when = t->expires + t->recurrent;
+
+ if (when <= loop->last_time)
+ when = loop->last_time + t->recurrent;
+
+ if (t->randomize)
+ when += random() % (t->randomize + 1);
+
+ tm2_set(t, when);
+ }
+ else
+ tm2_stop(t);
+
+ t->hook(t);
+ }
+}
+
+
+/*
+ * Sockets
+ */
+
+static void
+sockets_init(struct birdloop *loop)
+{
+ init_list(&loop->sock_list);
+ loop->sock_num = 0;
+
+ BUFFER_INIT(loop->poll_sk, loop->pool, 4);
+ BUFFER_INIT(loop->poll_fd, loop->pool, 4);
+ loop->poll_changed = 1; /* add wakeup fd */
+}
+
+static void
+sockets_add(struct birdloop *loop, sock *s)
+{
+ add_tail(&loop->sock_list, &s->n);
+ loop->sock_num++;
+
+ s->index = -1;
+ loop->poll_changed = 1;
+
+ if (loop->poll_active)
+ wakeup_kick(loop);
+}
+
+void
+sk_start(sock *s)
+{
+ struct birdloop *loop = birdloop_current();
+
+ sockets_add(loop, s);
+}
+
+static void
+sockets_remove(struct birdloop *loop, sock *s)
+{
+ rem_node(&s->n);
+ loop->sock_num--;
+
+ if (s->index >= 0)
+ loop->poll_sk.data[s->index] = NULL;
+
+ s->index = -1;
+ loop->poll_changed = 1;
+
+ /* Wakeup moved to sk_stop() */
+}
+
+void
+sk_stop(sock *s)
+{
+ struct birdloop *loop = birdloop_current();
+
+ sockets_remove(loop, s);
+
+ if (loop->poll_active)
+ {
+ loop->close_scheduled = 1;
+ wakeup_kick(loop);
+ }
+ else
+ close(s->fd);
+
+ s->fd = -1;
+}
+
+static inline uint sk_want_events(sock *s)
+{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); }
+
+/*
+FIXME: this should be called from sock code
+
+static void
+sockets_update(struct birdloop *loop, sock *s)
+{
+ if (s->index >= 0)
+ loop->poll_fd.data[s->index].events = sk_want_events(s);
+}
+*/
+
+static void
+sockets_prepare(struct birdloop *loop)
+{
+ BUFFER_SET(loop->poll_sk, loop->sock_num + 1);
+ BUFFER_SET(loop->poll_fd, loop->sock_num + 1);
+
+ struct pollfd *pfd = loop->poll_fd.data;
+ sock **psk = loop->poll_sk.data;
+ int i = 0;
+ node *n;
+
+ WALK_LIST(n, loop->sock_list)
+ {
+ sock *s = SKIP_BACK(sock, n, n);
+
+ ASSERT(i < loop->sock_num);
+
+ s->index = i;
+ *psk = s;
+ pfd->fd = s->fd;
+ pfd->events = sk_want_events(s);
+ pfd->revents = 0;
+
+ pfd++;
+ psk++;
+ i++;
+ }
+
+ ASSERT(i == loop->sock_num);
+
+ /* Add internal wakeup fd */
+ *psk = NULL;
+ pfd->fd = loop->wakeup_fds[0];
+ pfd->events = POLLIN;
+ pfd->revents = 0;
+
+ loop->poll_changed = 0;
+}
+
+static void
+sockets_close_fds(struct birdloop *loop)
+{
+ struct pollfd *pfd = loop->poll_fd.data;
+ sock **psk = loop->poll_sk.data;
+ int poll_num = loop->poll_fd.used - 1;
+
+ int i;
+ for (i = 0; i < poll_num; i++)
+ if (psk[i] == NULL)
+ close(pfd[i].fd);
+
+ loop->close_scheduled = 0;
+}
+
+int sk_read(sock *s);
+int sk_write(sock *s);
+
+static void
+sockets_fire(struct birdloop *loop)
+{
+ struct pollfd *pfd = loop->poll_fd.data;
+ sock **psk = loop->poll_sk.data;
+ int poll_num = loop->poll_fd.used - 1;
+
+ times_update(loop);
+
+ /* Last fd is internal wakeup fd */
+ if (pfd[loop->sock_num].revents & POLLIN)
+ wakeup_drain(loop);
+
+ int i;
+ for (i = 0; i < poll_num; pfd++, psk++, i++)
+ {
+ int e = 1;
+
+ if (! pfd->revents)
+ continue;
+
+ if (pfd->revents & POLLNVAL)
+ die("poll: invalid fd %d", pfd->fd);
+
+ if (pfd->revents & POLLIN)
+ while (e && *psk && (*psk)->rx_hook)
+ e = sk_read(*psk);
+
+ e = 1;
+ if (pfd->revents & POLLOUT)
+ while (e && *psk)
+ e = sk_write(*psk);
+ }
+}
+
+
+/*
+ * Birdloop
+ */
+
+static void * birdloop_main(void *arg);
+
+struct birdloop *
+birdloop_new(void)
+{
+ /* FIXME: this init should be elsewhere and thread-safe */
+ static int init = 0;
+ if (!init)
+ { birdloop_init_current(); init = 1; }
+
+ pool *p = rp_new(NULL, "Birdloop root");
+ struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop));
+ loop->pool = p;
+ pthread_mutex_init(&loop->mutex, NULL);
+
+ times_init(loop);
+ wakeup_init(loop);
+
+ events_init(loop);
+ timers_init(loop);
+ sockets_init(loop);
+
+ return loop;
+}
+
+void
+birdloop_start(struct birdloop *loop)
+{
+ int rv = pthread_create(&loop->thread, NULL, birdloop_main, loop);
+ if (rv)
+ die("pthread_create(): %M", rv);
+}
+
+void
+birdloop_stop(struct birdloop *loop)
+{
+ pthread_mutex_lock(&loop->mutex);
+ loop->stop_called = 1;
+ wakeup_do_kick(loop);
+ pthread_mutex_unlock(&loop->mutex);
+
+ int rv = pthread_join(loop->thread, NULL);
+ if (rv)
+ die("pthread_join(): %M", rv);
+}
+
+void
+birdloop_free(struct birdloop *loop)
+{
+ rfree(loop->pool);
+}
+
+
+void
+birdloop_enter(struct birdloop *loop)
+{
+ /* TODO: these functions could save and restore old context */
+ pthread_mutex_lock(&loop->mutex);
+ birdloop_set_current(loop);
+}
+
+void
+birdloop_leave(struct birdloop *loop)
+{
+ /* TODO: these functions could save and restore old context */
+ birdloop_set_current(NULL);
+ pthread_mutex_unlock(&loop->mutex);
+}
+
+void
+birdloop_mask_wakeups(struct birdloop *loop)
+{
+ pthread_mutex_lock(&loop->mutex);
+ loop->wakeup_masked = 1;
+ pthread_mutex_unlock(&loop->mutex);
+}
+
+void
+birdloop_unmask_wakeups(struct birdloop *loop)
+{
+ pthread_mutex_lock(&loop->mutex);
+ if (loop->wakeup_masked == 2)
+ wakeup_do_kick(loop);
+ loop->wakeup_masked = 0;
+ pthread_mutex_unlock(&loop->mutex);
+}
+
+static void *
+birdloop_main(void *arg)
+{
+ struct birdloop *loop = arg;
+ timer2 *t;
+ int rv, timeout;
+
+ birdloop_set_current(loop);
+
+ pthread_mutex_lock(&loop->mutex);
+ while (1)
+ {
+ events_fire(loop);
+ timers_fire(loop);
+
+ times_update(loop);
+ if (events_waiting(loop))
+ timeout = 0;
+ else if (t = timers_first(loop))
+ timeout = (tm2_remains(t) TO_MS) + 1;
+ else
+ timeout = -1;
+
+ if (loop->poll_changed)
+ sockets_prepare(loop);
+
+ loop->poll_active = 1;
+ pthread_mutex_unlock(&loop->mutex);
+
+ try:
+ rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout);
+ if (rv < 0)
+ {
+ if (errno == EINTR || errno == EAGAIN)
+ goto try;
+ die("poll: %m");
+ }
+
+ pthread_mutex_lock(&loop->mutex);
+ loop->poll_active = 0;
+
+ if (loop->close_scheduled)
+ sockets_close_fds(loop);
+
+ if (loop->stop_called)
+ break;
+
+ if (rv)
+ sockets_fire(loop);
+
+ timers_fire(loop);
+ }
+
+ loop->stop_called = 0;
+ pthread_mutex_unlock(&loop->mutex);
+
+ return NULL;
+}
+
+
diff --git a/proto/bfd/io.h b/proto/bfd/io.h
new file mode 100644
index 00000000..3f166a47
--- /dev/null
+++ b/proto/bfd/io.h
@@ -0,0 +1,99 @@
+/*
+ * BIRD -- I/O and event loop
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_BFD_IO_H_
+#define _BIRD_BFD_IO_H_
+
+#include "nest/bird.h"
+#include "lib/lists.h"
+#include "lib/resource.h"
+#include "lib/event.h"
+#include "lib/socket.h"
+// #include "lib/timer.h"
+
+
+typedef struct timer2
+{
+ resource r;
+ void (*hook)(struct timer2 *);
+ void *data;
+
+ btime expires; /* 0=inactive */
+ uint randomize; /* Amount of randomization */
+ uint recurrent; /* Timer recurrence */
+
+ int index;
+} timer2;
+
+
+btime current_time(void);
+
+void ev2_schedule(event *e);
+
+
+timer2 *tm2_new(pool *p);
+void tm2_set(timer2 *t, btime when);
+void tm2_start(timer2 *t, btime after);
+void tm2_stop(timer2 *t);
+
+static inline int
+tm2_active(timer2 *t)
+{
+ return t->expires != 0;
+}
+
+static inline btime
+tm2_remains(timer2 *t)
+{
+ btime now = current_time();
+ return (t->expires > now) ? (t->expires - now) : 0;
+}
+
+static inline timer2 *
+tm2_new_init(pool *p, void (*hook)(struct timer2 *), void *data, uint rec, uint rand)
+{
+ timer2 *t = tm2_new(p);
+ t->hook = hook;
+ t->data = data;
+ t->recurrent = rec;
+ t->randomize = rand;
+ return t;
+}
+
+static inline void
+tm2_set_max(timer2 *t, btime when)
+{
+ if (when > t->expires)
+ tm2_set(t, when);
+}
+
+/*
+static inline void
+tm2_start_max(timer2 *t, btime after)
+{
+ btime rem = tm2_remains(t);
+ tm2_start(t, _MAX(rem, after));
+}
+*/
+
+
+void sk_start(sock *s);
+void sk_stop(sock *s);
+
+
+
+struct birdloop *birdloop_new(void);
+void birdloop_start(struct birdloop *loop);
+void birdloop_stop(struct birdloop *loop);
+void birdloop_free(struct birdloop *loop);
+
+void birdloop_enter(struct birdloop *loop);
+void birdloop_leave(struct birdloop *loop);
+void birdloop_mask_wakeups(struct birdloop *loop);
+void birdloop_unmask_wakeups(struct birdloop *loop);
+
+
+#endif /* _BIRD_BFD_IO_H_ */
diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c
new file mode 100644
index 00000000..fc2616ca
--- /dev/null
+++ b/proto/bfd/packets.c
@@ -0,0 +1,248 @@
+/*
+ * BIRD -- Bidirectional Forwarding Detection (BFD)
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include "bfd.h"
+
+
+struct bfd_ctl_packet
+{
+ u8 vdiag; /* version and diagnostic */
+ u8 flags; /* state and flags */
+ u8 detect_mult;
+ u8 length;
+ u32 snd_id; /* sender ID, aka 'my discriminator' */
+ u32 rcv_id; /* receiver ID, aka 'your discriminator' */
+ u32 des_min_tx_int;
+ u32 req_min_rx_int;
+ u32 req_min_echo_rx_int;
+};
+
+#define BFD_BASE_LEN sizeof(struct bfd_ctl_packet)
+#define BFD_MAX_LEN 64
+
+static inline u8 bfd_pack_vdiag(u8 version, u8 diag)
+{ return (version << 5) | diag; }
+
+static inline u8 bfd_pack_flags(u8 state, u8 flags)
+{ return (state << 6) | flags; }
+
+static inline u8 bfd_pkt_get_version(struct bfd_ctl_packet *pkt)
+{ return pkt->vdiag >> 5; }
+
+static inline u8 bfd_pkt_get_diag(struct bfd_ctl_packet *pkt)
+{ return pkt->vdiag && 0x1f; }
+
+
+static inline u8 bfd_pkt_get_state(struct bfd_ctl_packet *pkt)
+{ return pkt->flags >> 6; }
+
+static inline void bfd_pkt_set_state(struct bfd_ctl_packet *pkt, u8 val)
+{ pkt->flags = val << 6; }
+
+
+char *
+bfd_format_flags(u8 flags, char *buf)
+{
+ char *bp = buf;
+ if (flags & BFD_FLAGS) *bp++ = ' ';
+ if (flags & BFD_FLAG_POLL) *bp++ = 'P';
+ if (flags & BFD_FLAG_FINAL) *bp++ = 'F';
+ if (flags & BFD_FLAG_CPI) *bp++ = 'C';
+ if (flags & BFD_FLAG_AP) *bp++ = 'A';
+ if (flags & BFD_FLAG_DEMAND) *bp++ = 'D';
+ if (flags & BFD_FLAG_MULTIPOINT) *bp++ = 'M';
+ *bp = 0;
+
+ return buf;
+}
+
+void
+bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final)
+{
+ sock *sk = s->ifa->sk;
+ struct bfd_ctl_packet *pkt = (struct bfd_ctl_packet *) sk->tbuf;
+ char fb[8];
+
+ pkt->vdiag = bfd_pack_vdiag(1, s->loc_diag);
+ pkt->flags = bfd_pack_flags(s->loc_state, 0);
+ pkt->detect_mult = s->detect_mult;
+ pkt->length = BFD_BASE_LEN;
+ pkt->snd_id = htonl(s->loc_id);
+ pkt->rcv_id = htonl(s->rem_id);
+ pkt->des_min_tx_int = htonl(s->des_min_tx_new);
+ pkt->req_min_rx_int = htonl(s->req_min_rx_new);
+ pkt->req_min_echo_rx_int = 0;
+
+ if (final)
+ pkt->flags |= BFD_FLAG_FINAL;
+ else if (s->poll_active)
+ pkt->flags |= BFD_FLAG_POLL;
+
+ if (sk->tbuf != sk->tpos)
+ log(L_WARN "%s: Old packet overwritten in TX buffer", p->p.name);
+
+ TRACE(D_PACKETS, "Sending CTL to %I [%s%s]", s->addr,
+ bfd_state_names[s->loc_state], bfd_format_flags(pkt->flags, fb));
+
+ sk_send_to(sk, pkt->length, s->addr, sk->dport);
+}
+
+#define DROP(DSC,VAL) do { err_dsc = DSC; err_val = VAL; goto drop; } while(0)
+
+static int
+bfd_rx_hook(sock *sk, int len)
+{
+ struct bfd_proto *p = sk->data;
+ struct bfd_ctl_packet *pkt = (struct bfd_ctl_packet *) sk->rbuf;
+ const char *err_dsc = NULL;
+ uint err_val = 0;
+ char fb[8];
+
+ if ((sk->sport == BFD_CONTROL_PORT) && (sk->ttl < 255))
+ DROP("wrong TTL", sk->ttl);
+
+ if (len < BFD_BASE_LEN)
+ DROP("too short", len);
+
+ u8 version = bfd_pkt_get_version(pkt);
+ if (version != 1)
+ DROP("version mismatch", version);
+
+ if ((pkt->length < BFD_BASE_LEN) || (pkt->length > len))
+ DROP("length mismatch", pkt->length);
+
+ if (pkt->detect_mult == 0)
+ DROP("invalid detect mult", 0);
+
+ if ((pkt->flags & BFD_FLAG_MULTIPOINT) ||
+ ((pkt->flags & BFD_FLAG_POLL) && (pkt->flags & BFD_FLAG_FINAL)))
+ DROP("invalid flags", pkt->flags);
+
+ if (pkt->snd_id == 0)
+ DROP("invalid my discriminator", 0);
+
+ struct bfd_session *s;
+ u32 id = ntohl(pkt->rcv_id);
+
+ if (id)
+ {
+ s = bfd_find_session_by_id(p, id);
+
+ if (!s)
+ DROP("unknown session id", id);
+ }
+ else
+ {
+ u8 ps = bfd_pkt_get_state(pkt);
+ if (ps > BFD_STATE_DOWN)
+ DROP("invalid init state", ps);
+
+ s = bfd_find_session_by_addr(p, sk->faddr);
+
+ /* FIXME: better session matching and message */
+ if (!s)
+ return 1;
+ }
+
+ /* FIXME: better authentication handling and message */
+ if (pkt->flags & BFD_FLAG_AP)
+ DROP("authentication not supported", 0);
+
+
+ u32 old_tx_int = s->des_min_tx_int;
+ u32 old_rx_int = s->rem_min_rx_int;
+
+ s->rem_id= ntohl(pkt->snd_id);
+ s->rem_state = bfd_pkt_get_state(pkt);
+ s->rem_diag = bfd_pkt_get_diag(pkt);
+ s->rem_demand_mode = pkt->flags & BFD_FLAG_DEMAND;
+ s->rem_min_tx_int = ntohl(pkt->des_min_tx_int);
+ s->rem_min_rx_int = ntohl(pkt->req_min_rx_int);
+ s->rem_detect_mult = pkt->detect_mult;
+
+ TRACE(D_PACKETS, "CTL received from %I [%s%s]", sk->faddr,
+ bfd_state_names[s->rem_state], bfd_format_flags(pkt->flags, fb));
+
+ bfd_session_process_ctl(s, pkt->flags, old_tx_int, old_rx_int);
+ return 1;
+
+ drop:
+ log(L_REMOTE "%s: Bad packet from %I - %s (%u)", p->p.name, sk->faddr, err_dsc, err_val);
+ return 1;
+}
+
+static void
+bfd_err_hook(sock *sk, int err)
+{
+ struct bfd_proto *p = sk->data;
+ log(L_ERR "%s: Socket error: %m", p->p.name, err);
+}
+
+sock *
+bfd_open_rx_sk(struct bfd_proto *p, int multihop)
+{
+ sock *sk = sk_new(p->tpool);
+ sk->type = SK_UDP;
+ sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT;
+ sk->data = p;
+
+ sk->rbsize = BFD_MAX_LEN;
+ sk->rx_hook = bfd_rx_hook;
+ sk->err_hook = bfd_err_hook;
+
+ /* TODO: configurable ToS and priority */
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->priority = sk_priority_control;
+ sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0);
+
+#ifdef IPV6
+ sk->flags |= SKF_V6ONLY;
+#endif
+
+ if (sk_open(sk) < 0)
+ goto err;
+
+ sk_start(sk);
+ return sk;
+
+ err:
+ rfree(sk);
+ return NULL;
+}
+
+sock *
+bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa)
+{
+ sock *sk = sk_new(p->tpool);
+ sk->type = SK_UDP;
+ sk->saddr = local;
+ sk->dport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT;
+ sk->iface = ifa;
+ sk->data = p;
+
+ sk->tbsize = BFD_MAX_LEN;
+ sk->err_hook = bfd_err_hook;
+
+ /* TODO: configurable ToS, priority and TTL security */
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->priority = sk_priority_control;
+ sk->ttl = ifa ? 255 : -1;
+ sk->flags = SKF_THREAD;
+
+#ifdef IPV6
+ sk->flags |= SKF_V6ONLY;
+#endif
+
+ if (sk_open(sk) < 0)
+ goto err;
+
+ sk_start(sk);
+ return sk;
+
+ err:
+ rfree(sk);
+ return NULL;
+}
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index 7cad75df..07ad31f3 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -59,8 +59,9 @@
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
-#include "nest/locks.h"
+#include "nest/bfd.h"
#include "nest/cli.h"
+#include "nest/locks.h"
#include "conf/conf.h"
#include "lib/socket.h"
#include "lib/resource.h"
@@ -76,6 +77,7 @@ static void bgp_close(struct bgp_proto *p, int apply_md5);
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
+static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
/**
@@ -153,8 +155,12 @@ bgp_initiate(struct bgp_proto *p)
if (rv < 0)
return;
+ if (p->cf->bfd)
+ bgp_update_bfd(p, p->cf->bfd);
+
if (p->startup_delay)
{
+ p->start_state = BSS_DELAY;
BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay);
bgp_start_timer(p->startup_timer, p->startup_delay);
}
@@ -765,6 +771,37 @@ bgp_neigh_notify(neighbor *n)
}
}
+static void
+bgp_bfd_notify(struct bfd_request *req)
+{
+ struct bgp_proto *p = req->data;
+ int ps = p->p.proto_state;
+
+ if (req->down && ((ps == PS_START) || (ps == PS_UP)))
+ {
+ BGP_TRACE(D_EVENTS, "BFD session down");
+ bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
+ if (ps == PS_UP)
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0);
+ }
+}
+
+static void
+bgp_update_bfd(struct bgp_proto *p, int use_bfd)
+{
+ if (use_bfd && !p->bfd_req)
+ p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
+ p->cf->multihop ? NULL : p->neigh->iface,
+ bgp_bfd_notify, p);
+
+ if (!use_bfd && p->bfd_req)
+ {
+ rfree(p->bfd_req);
+ p->bfd_req = NULL;
+ }
+}
+
static int
bgp_reload_routes(struct proto *P)
{
@@ -825,6 +862,7 @@ bgp_start(struct proto *P)
p->outgoing_conn.state = BS_IDLE;
p->incoming_conn.state = BS_IDLE;
p->neigh = NULL;
+ p->bfd_req = NULL;
rt_lock_table(p->igp_table);
@@ -992,6 +1030,9 @@ bgp_check_config(struct bgp_config *c)
ipa_has_link_scope(c->source_addr)))
cf_error("Multihop BGP cannot be used with link-local addresses");
+ if (c->multihop && c->bfd && ipa_zero(c->source_addr))
+ cf_error("Multihop BGP with BFD requires specified source address");
+
/* Different default based on rs_client */
if (!c->missing_lladdr)
@@ -1034,6 +1075,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
|| (old->password && new->password && !strcmp(old->password, new->password)))
&& (get_igp_table(old) == get_igp_table(new));
+ if (same && (p->start_state > BSS_PREPARE))
+ bgp_update_bfd(p, new->bfd);
+
/* We should update our copy of configuration ptr as old configuration will be freed */
if (same)
p->cf = new;
@@ -1115,7 +1159,7 @@ bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
-static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket" };
+static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" };
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
static const char *
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index bcbdf2cc..d2a96bbb 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -14,6 +14,7 @@
struct linpool;
struct eattr;
+struct bfd_request;
struct bgp_config {
struct proto_config c;
@@ -53,8 +54,10 @@ struct bgp_config {
unsigned error_delay_time_min; /* Time to wait after an error is detected */
unsigned error_delay_time_max;
unsigned disable_after_error; /* Disable the protocol when error is detected */
+
char *password; /* Password used for MD5 authentication */
struct rtable_config *igp_table; /* Table used for recursive next hop lookups */
+ int bfd; /* Use BFD for liveness detection */
};
#define MLL_SELF 1
@@ -100,6 +103,7 @@ struct bgp_proto {
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
struct object_lock *lock; /* Lock for neighbor connection */
struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */
+ struct bfd_request *bfd_req; /* BFD request, if BFD is used */
ip_addr source_addr; /* Local address used as an advertised next hop */
rtable *igp_table; /* Table used for recursive next hop lookups */
struct event *event; /* Event for respawning and shutting process */
@@ -288,6 +292,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define BEM_INVALID_NEXT_HOP 2
#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */
#define BEM_NO_SOCKET 4
+#define BEM_BFD_DOWN 5
/* Automatic shutdown error codes */
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index f4b2c5fe..185b1bda 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -26,7 +26,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY,
PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC,
- SECONDARY, ALLOW)
+ SECONDARY, ALLOW, BFD)
CF_GRAMMAR
@@ -112,6 +112,7 @@ bgp_proto:
| bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
| bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
| bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
+ | bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); }
;
CF_ADDTO(dynamic_attr, BGP_ORIGIN
diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y
index 3f5419dd..68efa230 100644
--- a/proto/ospf/config.Y
+++ b/proto/ospf/config.Y
@@ -309,6 +309,7 @@ ospf_iface_item:
| TX PRIORITY expr { OSPF_PATT->tx_priority = $3; }
| TTL SECURITY bool { OSPF_PATT->ttl_security = $3; }
| TTL SECURITY TX ONLY { OSPF_PATT->ttl_security = 2; }
+ | BFD bool { OSPF_PATT->bfd = $2; cf_check_bfd($2); }
| password_list
;
diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c
index 68c345f4..b6b11004 100644
--- a/proto/ospf/hello.c
+++ b/proto/ospf/hello.c
@@ -151,6 +151,9 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa,
#ifdef OSPFv3
n->iface_id = ntohl(ps->iface_id);
#endif
+
+ if (n->ifa->cf->bfd)
+ ospf_neigh_update_bfd(n, n->ifa->bfd);
}
#ifdef OSPFv3 /* NOTE: this could also be relevant for OSPFv2 on PtP ifaces */
else if (!ipa_equal(faddr, n->ip))
diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c
index 63c26466..f1409840 100644
--- a/proto/ospf/iface.c
+++ b/proto/ospf/iface.c
@@ -536,6 +536,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
ifa->check_link = ip->check_link;
ifa->ecmp_weight = ip->ecmp_weight;
ifa->check_ttl = (ip->ttl_security == 1);
+ ifa->bfd = ip->bfd;
#ifdef OSPFv2
ifa->autype = ip->autype;
@@ -840,6 +841,19 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
ifa->ecmp_weight = new->ecmp_weight;
}
+ /* BFD */
+ if (ifa->bfd != new->bfd)
+ {
+ OSPF_TRACE(D_EVENTS, "%s BFD on interface %s",
+ new->bfd ? "Enabling" : "Disabling", ifname);
+ ifa->bfd = new->bfd;
+
+ struct ospf_neighbor *n;
+ WALK_LIST(n, ifa->neigh_list)
+ ospf_neigh_update_bfd(n, ifa->bfd);
+ }
+
+
/* instance_id is not updated - it is part of key */
return 1;
diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c
index 26d81dce..61224ec2 100644
--- a/proto/ospf/neighbor.c
+++ b/proto/ospf/neighbor.c
@@ -582,6 +582,36 @@ ospf_neigh_remove(struct ospf_neighbor *n)
OSPF_TRACE(D_EVENTS, "Deleting neigbor.");
}
+static void
+ospf_neigh_bfd_hook(struct bfd_request *req)
+{
+ struct ospf_neighbor *n = req->data;
+ struct proto *p = &n->ifa->oa->po->proto;
+
+ if (req->down)
+ {
+ OSPF_TRACE(D_EVENTS, "BFD session down for %I on %s",
+ n->ip, n->ifa->iface->name);
+
+ ospf_neigh_remove(n);
+ }
+}
+
+void
+ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd)
+{
+ if (use_bfd && !n->bfd_req)
+ n->bfd_req = bfd_request_session(n->pool, n->ip, n->ifa->addr->ip, n->ifa->iface,
+ ospf_neigh_bfd_hook, n);
+
+ if (!use_bfd && n->bfd_req)
+ {
+ rfree(n->bfd_req);
+ n->bfd_req = NULL;
+ }
+}
+
+
void
ospf_sh_neigh_info(struct ospf_neighbor *n)
{
diff --git a/proto/ospf/neighbor.h b/proto/ospf/neighbor.h
index f593faed..e674927d 100644
--- a/proto/ospf/neighbor.h
+++ b/proto/ospf/neighbor.h
@@ -16,6 +16,7 @@ void bdr_election(struct ospf_iface *ifa);
struct ospf_neighbor *find_neigh(struct ospf_iface *ifa, u32 rid);
struct ospf_neighbor *find_neigh_by_ip(struct ospf_iface *ifa, ip_addr ip);
void ospf_neigh_remove(struct ospf_neighbor *n);
+void ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd);
void ospf_sh_neigh_info(struct ospf_neighbor *n);
#endif /* _BIRD_OSPF_NEIGHBOR_H_ */
diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h
index f1409af3..46a1c3c1 100644
--- a/proto/ospf/ospf.h
+++ b/proto/ospf/ospf.h
@@ -46,6 +46,7 @@ do { if ((p->debug & D_PACKETS) || OSPF_FORCE_DEBUG) \
#include "nest/route.h"
#include "nest/cli.h"
#include "nest/locks.h"
+#include "nest/bfd.h"
#include "conf/conf.h"
#include "lib/string.h"
@@ -276,6 +277,7 @@ struct ospf_iface
u8 ecmp_weight; /* Weight used for ECMP */
u8 ptp_netmask; /* Send real netmask for P2P */
u8 check_ttl; /* Check incoming packets for TTL 255 */
+ u8 bfd; /* Use BFD on iface */
};
struct ospf_md5
@@ -708,6 +710,7 @@ struct ospf_neighbor
#define ACKL_DIRECT 0
#define ACKL_DELAY 1
timer *ackd_timer; /* Delayed ack timer */
+ struct bfd_request *bfd_req; /* BFD request, if BFD is used */
u32 csn; /* Last received crypt seq number (for MD5) */
};
@@ -818,6 +821,7 @@ struct ospf_iface_patt
u8 real_bcast; /* Not really used in OSPFv3 */
u8 ptp_netmask; /* bool + 2 for unspecified */
u8 ttl_security; /* bool + 2 for TX only */
+ u8 bfd;
#ifdef OSPFv2
list *passwords;
diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c
index 5d93c0e9..f25db9a7 100644
--- a/proto/ospf/topology.c
+++ b/proto/ospf/topology.c
@@ -103,7 +103,8 @@ lsab_alloc(struct proto_ospf *po, unsigned size)
if (po->lsab_used > po->lsab_size)
{
po->lsab_size = MAX(po->lsab_used, 2 * po->lsab_size);
- po->lsab = mb_realloc(po->proto.pool, po->lsab, po->lsab_size);
+ po->lsab = po->lsab ? mb_realloc(po->lsab, po->lsab_size):
+ mb_alloc(po->proto.pool, po->lsab_size);
}
return ((byte *) po->lsab) + offset;
}
diff --git a/proto/radv/radv.c b/proto/radv/radv.c
index a6b9b16c..90408536 100644
--- a/proto/radv/radv.c
+++ b/proto/radv/radv.c
@@ -15,7 +15,7 @@
* The RAdv protocol is implemented in two files: |radv.c| containing
* the interface with BIRD core and the protocol logic and |packets.c|
* handling low level protocol stuff (RX, TX and packet formats).
- * The protocol does not import or export any routes.
+ * The protocol does not export any routes.
*
* The RAdv is structured in the usual way - for each handled interface
* there is a structure &radv_iface that contains a state related to
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index 3ec070b3..ad285bb3 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -63,6 +63,7 @@
#define P ((struct rip_proto *) p)
#define P_CF ((struct rip_proto_config *)p->cf)
+#undef TRACE
#define TRACE(level, msg, args...) do { if (p->debug & level) { log(L_TRACE "%s: " msg, p->name , ## args); } } while(0)
static struct rip_interface *new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_patt *patt);
diff --git a/sysdep/autoconf.h.in b/sysdep/autoconf.h.in
index ac6f7a87..a9e46e27 100644
--- a/sysdep/autoconf.h.in
+++ b/sysdep/autoconf.h.in
@@ -39,10 +39,14 @@
#undef CONFIG_STATIC
#undef CONFIG_RIP
#undef CONFIG_RADV
+#undef CONFIG_BFD
#undef CONFIG_BGP
#undef CONFIG_OSPF
#undef CONFIG_PIPE
+/* We use multithreading */
+#undef USE_PTHREADS
+
/* We have <syslog.h> and syslog() */
#undef HAVE_SYSLOG
diff --git a/sysdep/config.h b/sysdep/config.h
index 7bfb05d6..e2320411 100644
--- a/sysdep/config.h
+++ b/sysdep/config.h
@@ -34,6 +34,7 @@ typedef INTEGER_64 s64;
typedef unsigned INTEGER_64 u64;
typedef u8 byte;
typedef u16 word;
+typedef unsigned int uint;
#endif
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 51c6c0c1..6e3f1e4d 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -538,6 +538,11 @@ sk_free(resource *r)
if (s->fd >= 0)
{
close(s->fd);
+
+ /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
+ if (s->flags & SKF_THREAD)
+ return;
+
if (s == current_sock)
current_sock = sk_next(s);
if (s == stored_sock)
@@ -1240,7 +1245,8 @@ sk_open(sock *s)
#endif
}
- sk_insert(s);
+ if (!(s->flags & SKF_THREAD))
+ sk_insert(s);
return 0;
bad:
@@ -1428,7 +1434,9 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa,
}
*/
-static int
+ /* sk_read() and sk_write() are called from BFD's event loop */
+
+int
sk_read(sock *s)
{
switch (s->type)
@@ -1505,7 +1513,7 @@ sk_read(sock *s)
}
}
-static int
+int
sk_write(sock *s)
{
switch (s->type)
@@ -1523,7 +1531,8 @@ sk_write(sock *s)
default:
if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
{
- s->tx_hook(s);
+ if (s->tx_hook)
+ s->tx_hook(s);
return 1;
}
return 0;
diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c
index f3a66f8b..9dd4d66f 100644
--- a/sysdep/unix/log.c
+++ b/sysdep/unix/log.c
@@ -32,8 +32,24 @@ static FILE *dbgf;
static list *current_log_list;
static char *current_syslog_name; /* NULL -> syslog closed */
-bird_clock_t rate_limit_time = 5;
-int rate_limit_count = 5;
+static const bird_clock_t rate_limit_time = 5;
+static const int rate_limit_count = 5;
+
+
+#ifdef USE_PTHREADS
+
+#include <pthread.h>
+static pthread_mutex_t log_mutex;
+static inline void log_lock(void) { pthread_mutex_lock(&log_mutex); }
+static inline void log_unlock(void) { pthread_mutex_unlock(&log_mutex); }
+
+#else
+
+static inline void log_lock(void) { }
+static inline void log_unlock(void) { }
+
+#endif
+
#ifdef HAVE_SYSLOG
#include <sys/syslog.h>
@@ -65,28 +81,6 @@ static char *class_names[] = {
"BUG"
};
-#define LOG_BUFFER_SIZE 1024
-static char log_buffer[LOG_BUFFER_SIZE];
-static char *log_buffer_pos;
-static int log_buffer_remains;
-
-const char *log_buffer_ptr = log_buffer;
-
-
-/**
- * log_reset - reset the log buffer
- *
- * This function resets a log buffer and discards buffered
- * messages. Should be used before a log message is prepared
- * using logn().
- */
-void
-log_reset(void)
-{
- log_buffer_pos = log_buffer;
- log_buffer_remains = LOG_BUFFER_SIZE;
- log_buffer[0] = 0;
-}
/**
* log_commit - commit a log message
@@ -101,10 +95,14 @@ log_reset(void)
* in log(), so it should be written like *L_INFO.
*/
void
-log_commit(int class)
+log_commit(int class, buffer *buf)
{
struct log_config *l;
+ if (buf->pos == buf->end)
+ strcpy(buf->end - 100, " ... <too long>");
+
+ log_lock();
WALK_LIST(l, *current_log_list)
{
if (!(l->mask & (1 << class)))
@@ -119,47 +117,30 @@ log_commit(int class)
tm_format_datetime(tbuf, &config->tf_log, now);
fprintf(l->fh, "%s <%s> ", tbuf, class_names[class]);
}
- fputs(log_buffer, l->fh);
+ fputs(buf->start, l->fh);
fputc('\n', l->fh);
fflush(l->fh);
}
#ifdef HAVE_SYSLOG
else
- syslog(syslog_priorities[class], "%s", log_buffer);
+ syslog(syslog_priorities[class], "%s", buf->start);
#endif
}
- cli_echo(class, log_buffer);
-
- log_reset();
-}
-
-static void
-log_print(const char *msg, va_list args)
-{
- int i;
-
- if (log_buffer_remains == 0)
- return;
-
- i=bvsnprintf(log_buffer_pos, log_buffer_remains, msg, args);
- if (i < 0)
- {
- bsprintf(log_buffer + LOG_BUFFER_SIZE - 100, " ... <too long>");
- log_buffer_remains = 0;
- return;
- }
+ log_unlock();
- log_buffer_pos += i;
- log_buffer_remains -= i;
+ /* FIXME: cli_echo is not thread-safe */
+ cli_echo(class, buf->start);
}
+int buffer_vprint(buffer *buf, const char *fmt, va_list args);
static void
vlog(int class, const char *msg, va_list args)
{
- log_reset();
- log_print(msg, args);
- log_commit(class);
+ buffer buf;
+ LOG_BUFFER_INIT(buf);
+ buffer_vprint(&buf, msg, args);
+ log_commit(class, &buf);
}
@@ -188,26 +169,6 @@ log_msg(char *msg, ...)
va_end(args);
}
-/**
- * logn - prepare a partial message in the log buffer
- * @msg: printf-like formatting string (without message class information)
- *
- * This function formats a message according to the format string @msg
- * and adds it to the log buffer. Messages in the log buffer are
- * logged when the buffer is flushed using log_commit() function. The
- * message should not contain |\n|, log_commit() also terminates a
- * line.
- */
-void
-logn(char *msg, ...)
-{
- va_list args;
-
- va_start(args, msg);
- log_print(msg, args);
- va_end(args);
-}
-
void
log_rl(struct rate_limit *rl, char *msg, ...)
{
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index 165dab2b..7a945826 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -199,7 +199,7 @@ unix_read_config(struct config **cp, char *name)
return ret;
}
-static void
+static struct config *
read_config(void)
{
struct config *conf;
@@ -211,7 +211,8 @@ read_config(void)
else
die("Unable to open configuration file %s: %m", config_name);
}
- config_commit(conf, RECONFIG_HARD, 0);
+
+ return conf;
}
void
@@ -776,7 +777,7 @@ main(int argc, char **argv)
proto_build(&proto_unix_kernel);
proto_build(&proto_unix_iface);
- read_config();
+ struct config *conf = read_config();
if (parse_and_exit)
exit(0);
@@ -800,6 +801,8 @@ main(int argc, char **argv)
signal_init();
+ config_commit(conf, RECONFIG_HARD, 0);
+
#ifdef LOCAL_DEBUG
async_dump_flag = 1;
#endif