summaryrefslogtreecommitdiff
path: root/sysdep
diff options
context:
space:
mode:
Diffstat (limited to 'sysdep')
-rw-r--r--sysdep/bsd/krt-sock.c53
-rw-r--r--sysdep/bsd/sysio.h6
-rw-r--r--sysdep/config.h2
-rw-r--r--sysdep/linux/krt-sys.h33
-rw-r--r--sysdep/linux/netlink.Y50
-rw-r--r--sysdep/linux/netlink.c699
-rw-r--r--sysdep/linux/sysio.h19
-rw-r--r--sysdep/unix/Makefile2
-rw-r--r--sysdep/unix/alloc.c191
-rw-r--r--sysdep/unix/io.c35
-rw-r--r--sysdep/unix/krt.Y5
-rw-r--r--sysdep/unix/krt.c122
-rw-r--r--sysdep/unix/krt.h4
-rw-r--r--sysdep/unix/main.c42
14 files changed, 757 insertions, 506 deletions
diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c
index 6f788ac2..1c1bd50c 100644
--- a/sysdep/bsd/krt-sock.c
+++ b/sysdep/bsd/krt-sock.c
@@ -25,7 +25,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "sysdep/unix/unix.h"
@@ -366,6 +366,30 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old)
}
}
+/**
+ * krt_assume_onlink - check if routes on interface are considered onlink
+ * @iface: The interface of the next hop
+ * @ipv6: Switch to only consider IPv6 or IPv4 addresses.
+ *
+ * The BSD kernel does not support an onlink flag. If the interface has only
+ * host addresses configured, all routes should be considered as onlink and
+ * the function returns 1.
+ */
+static int
+krt_assume_onlink(struct iface *iface, int ipv6)
+{
+ const u8 type = ipv6 ? NET_IP6 : NET_IP4;
+
+ struct ifa *ifa;
+ WALK_LIST(ifa, iface->addrs)
+ {
+ if ((ifa->prefix.type == type) && !(ifa->flags & IA_HOST))
+ return 0;
+ }
+
+ return 1;
+}
+
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
static void
@@ -494,10 +518,10 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan)
net = net_get(p->p.main_channel->table, &ndst);
rta a = {
- .source = RTS_INHERIT,
- .scope = SCOPE_UNIVERSE,
};
+ ea_set_attr_u32(&a->eattrs, &ea_gen_source, 0, RTS_INHERIT);
+
/* reject/blackhole routes have also set RTF_GATEWAY,
we wil check them first. */
@@ -526,15 +550,21 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan)
a.dest = RTD_UNICAST;
if (flags & RTF_GATEWAY)
{
- neighbor *ng;
a.nh.gw = igate;
/* Clean up embedded interface ID returned in link-local address */
if (ipa_is_link_local(a.nh.gw))
_I0(a.nh.gw) = 0xfe800000;
- ng = neigh_find(&p->p, a.nh.gw, a.nh.iface, 0);
- if (!ng || (ng->scope == SCOPE_HOST))
+ /* The BSD kernel does not support an onlink flag. We heuristically
+ set the onlink flag, if the iface has only host addresses. */
+ if (krt_assume_onlink(a.nh.iface, ipv6))
+ a.nh.flags |= RNF_ONLINK;
+
+ neighbor *nbr;
+ nbr = neigh_find(&p->p, a.nh.gw, a.nh.iface,
+ (a.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
+ if (!nbr || (nbr->scope == SCOPE_HOST))
{
/* Ignore routes with next-hop 127.0.0.1, host routes with such
next-hop appear on OpenBSD for address aliases. */
@@ -550,15 +580,8 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan)
done:;
rte e0 = { .attrs = &a, .net = net, };
- ea_list *ea = alloca(sizeof(ea_list) + 1 * sizeof(eattr));
- *ea = (ea_list) { .count = 1, .next = e0.attrs->eattrs };
- e0.attrs->eattrs = ea;
-
- ea->attrs[0] = (eattr) {
- .id = EA_KRT_SOURCE,
- .type = EAF_TYPE_INT,
- .u.data = src2,
- };
+ ea_set_attr(e0.attrs->eattrs,
+ EA_LITERAL_EMBEDDED(EA_KRT_SOURCE, T_INT, 0, src2));
if (scan)
krt_got_route(p, &e0, src);
diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h
index c757960a..f1887fb4 100644
--- a/sysdep/bsd/sysio.h
+++ b/sysdep/bsd/sysio.h
@@ -271,3 +271,9 @@ sk_set_priority(sock *s, int prio UNUSED)
{
ERR_MSG("Socket priority not supported");
}
+
+static inline int
+sk_set_freebind(sock *s)
+{
+ ERR_MSG("Freebind is not supported");
+}
diff --git a/sysdep/config.h b/sysdep/config.h
index 55be90f0..5cdadbb0 100644
--- a/sysdep/config.h
+++ b/sysdep/config.h
@@ -13,7 +13,7 @@
#ifdef GIT_LABEL
#define BIRD_VERSION XSTR1(GIT_LABEL)
#else
-#define BIRD_VERSION "2.0.8"
+#define BIRD_VERSION "2.0.10"
#endif
/* Include parameters determined by configure script */
diff --git a/sysdep/linux/krt-sys.h b/sysdep/linux/krt-sys.h
index a8af4c95..aa90f6e4 100644
--- a/sysdep/linux/krt-sys.h
+++ b/sysdep/linux/krt-sys.h
@@ -34,41 +34,10 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i UNUSED) { return N
#define KRT_ALLOW_MERGE_PATHS 1
-#define EA_KRT_PREFSRC EA_CODE(PROTOCOL_KERNEL, 0x10)
-#define EA_KRT_REALM EA_CODE(PROTOCOL_KERNEL, 0x11)
-#define EA_KRT_SCOPE EA_CODE(PROTOCOL_KERNEL, 0x12)
-
-
-#define KRT_METRICS_MAX 0x10 /* RTAX_QUICKACK+1 */
-#define KRT_METRICS_OFFSET 0x20 /* Offset of EA_KRT_* vs RTAX_* */
-
-#define KRT_FEATURES_MAX 4
-
-/*
- * Following attributes are parts of RTA_METRICS kernel route attribute, their
- * ids must be consistent with their RTAX_* constants (+ KRT_METRICS_OFFSET)
- */
-#define EA_KRT_METRICS EA_CODE(PROTOCOL_KERNEL, 0x20) /* Dummy one */
-#define EA_KRT_LOCK EA_CODE(PROTOCOL_KERNEL, 0x21)
-#define EA_KRT_MTU EA_CODE(PROTOCOL_KERNEL, 0x22)
-#define EA_KRT_WINDOW EA_CODE(PROTOCOL_KERNEL, 0x23)
-#define EA_KRT_RTT EA_CODE(PROTOCOL_KERNEL, 0x24)
-#define EA_KRT_RTTVAR EA_CODE(PROTOCOL_KERNEL, 0x25)
-#define EA_KRT_SSTRESH EA_CODE(PROTOCOL_KERNEL, 0x26)
-#define EA_KRT_CWND EA_CODE(PROTOCOL_KERNEL, 0x27)
-#define EA_KRT_ADVMSS EA_CODE(PROTOCOL_KERNEL, 0x28)
-#define EA_KRT_REORDERING EA_CODE(PROTOCOL_KERNEL, 0x29)
-#define EA_KRT_HOPLIMIT EA_CODE(PROTOCOL_KERNEL, 0x2a)
-#define EA_KRT_INITCWND EA_CODE(PROTOCOL_KERNEL, 0x2b)
-#define EA_KRT_FEATURES EA_CODE(PROTOCOL_KERNEL, 0x2c)
-#define EA_KRT_RTO_MIN EA_CODE(PROTOCOL_KERNEL, 0x2d)
-#define EA_KRT_INITRWND EA_CODE(PROTOCOL_KERNEL, 0x2e)
-#define EA_KRT_QUICKACK EA_CODE(PROTOCOL_KERNEL, 0x2f)
-
-
struct krt_params {
u32 table_id; /* Kernel table ID we sync with */
u32 metric; /* Kernel metric used for all routes */
+ uint netlink_rx_buffer; /* Rx buffer size for the netlink socket */
};
struct krt_state {
diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y
index 7097f577..7ba8c7c9 100644
--- a/sysdep/linux/netlink.Y
+++ b/sysdep/linux/netlink.Y
@@ -10,9 +10,7 @@ CF_HDR
CF_DECLS
-CF_KEYWORDS(KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW,
- KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING,
- KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK,
+CF_KEYWORDS(KERNEL, TABLE, METRIC, NETLINK, RX, BUFFER,
KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR,
KRT_LOCK_SSTRESH, KRT_LOCK_CWND, KRT_LOCK_ADVMSS, KRT_LOCK_REORDERING,
KRT_LOCK_HOPLIMIT, KRT_LOCK_RTO_MIN, KRT_FEATURE_ECN, KRT_FEATURE_ALLFRAG)
@@ -24,41 +22,25 @@ kern_proto: kern_proto kern_sys_item ';' ;
kern_sys_item:
KERNEL TABLE expr { THIS_KRT->sys.table_id = $3; }
| METRIC expr { THIS_KRT->sys.metric = $2; }
+ | NETLINK RX BUFFER expr { THIS_KRT->sys.netlink_rx_buffer = $4; }
;
-dynamic_attr: KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); } ;
-dynamic_attr: KRT_REALM { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REALM); } ;
-dynamic_attr: KRT_SCOPE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SCOPE); } ;
-
-dynamic_attr: KRT_MTU { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_MTU); } ;
-dynamic_attr: KRT_WINDOW { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_WINDOW); } ;
-dynamic_attr: KRT_RTT { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTT); } ;
-dynamic_attr: KRT_RTTVAR { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTTVAR); } ;
-dynamic_attr: KRT_SSTRESH { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SSTRESH); } ;
-dynamic_attr: KRT_CWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_CWND); } ;
-dynamic_attr: KRT_ADVMSS { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_ADVMSS); } ;
-dynamic_attr: KRT_REORDERING { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REORDERING); } ;
-dynamic_attr: KRT_HOPLIMIT { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_HOPLIMIT); } ;
-dynamic_attr: KRT_INITCWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_INITCWND); } ;
-dynamic_attr: KRT_RTO_MIN { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTO_MIN); } ;
-dynamic_attr: KRT_INITRWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_INITRWND); } ;
-dynamic_attr: KRT_QUICKACK { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_QUICKACK); } ;
-
/* Bits of EA_KRT_LOCK, based on RTAX_* constants */
-dynamic_attr: KRT_LOCK_MTU { $$ = f_new_dynamic_attr_bit(2, T_BOOL, EA_KRT_LOCK); } ;
-dynamic_attr: KRT_LOCK_WINDOW { $$ = f_new_dynamic_attr_bit(3, T_BOOL, EA_KRT_LOCK); } ;
-dynamic_attr: KRT_LOCK_RTT { $$ = f_new_dynamic_attr_bit(4, T_BOOL, EA_KRT_LOCK); } ;
-dynamic_attr: KRT_LOCK_RTTVAR { $$ = f_new_dynamic_attr_bit(5, T_BOOL, EA_KRT_LOCK); } ;
-dynamic_attr: KRT_LOCK_SSTRESH { $$ = f_new_dynamic_attr_bit(6, T_BOOL, EA_KRT_LOCK); } ;
-dynamic_attr: KRT_LOCK_CWND { $$ = f_new_dynamic_attr_bit(7, T_BOOL, EA_KRT_LOCK); } ;
-dynamic_attr: KRT_LOCK_ADVMSS { $$ = f_new_dynamic_attr_bit(8, T_BOOL, EA_KRT_LOCK); } ;
-dynamic_attr: KRT_LOCK_REORDERING { $$ = f_new_dynamic_attr_bit(9, T_BOOL, EA_KRT_LOCK); } ;
-dynamic_attr: KRT_LOCK_HOPLIMIT { $$ = f_new_dynamic_attr_bit(10, T_BOOL, EA_KRT_LOCK); } ;
-dynamic_attr: KRT_LOCK_RTO_MIN { $$ = f_new_dynamic_attr_bit(13, T_BOOL, EA_KRT_LOCK); } ;
-
-dynamic_attr: KRT_FEATURE_ECN { $$ = f_new_dynamic_attr_bit(0, T_BOOL, EA_KRT_FEATURES); } ;
-dynamic_attr: KRT_FEATURE_ALLFRAG { $$ = f_new_dynamic_attr(3, T_BOOL, EA_KRT_FEATURES); } ;
+attr_bit: KRT_LOCK_MTU { $$ = f_new_dynamic_attr_bit(2, "krt_lock"); } ;
+attr_bit: KRT_LOCK_WINDOW { $$ = f_new_dynamic_attr_bit(3, "krt_lock"); } ;
+attr_bit: KRT_LOCK_RTT { $$ = f_new_dynamic_attr_bit(4, "krt_lock"); } ;
+attr_bit: KRT_LOCK_RTTVAR { $$ = f_new_dynamic_attr_bit(5, "krt_lock"); } ;
+attr_bit: KRT_LOCK_SSTRESH { $$ = f_new_dynamic_attr_bit(6, "krt_lock"); } ;
+attr_bit: KRT_LOCK_CWND { $$ = f_new_dynamic_attr_bit(7, "krt_lock"); } ;
+attr_bit: KRT_LOCK_ADVMSS { $$ = f_new_dynamic_attr_bit(8, "krt_lock"); } ;
+attr_bit: KRT_LOCK_REORDERING { $$ = f_new_dynamic_attr_bit(9, "krt_lock"); } ;
+attr_bit: KRT_LOCK_HOPLIMIT { $$ = f_new_dynamic_attr_bit(10, "krt_lock"); } ;
+attr_bit: KRT_LOCK_RTO_MIN { $$ = f_new_dynamic_attr_bit(13, "krt_lock"); } ;
+
+/* Bits of EA_KRT_FEATURES */
+attr_bit: KRT_FEATURE_ECN { $$ = f_new_dynamic_attr_bit(0, "krt_features"); } ;
+attr_bit: KRT_FEATURE_ALLFRAG { $$ = f_new_dynamic_attr_bit(3, "krt_features"); } ;
CF_CODE
diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
index bff2d579..656202ac 100644
--- a/sysdep/linux/netlink.c
+++ b/sysdep/linux/netlink.c
@@ -17,7 +17,7 @@
#undef LOCAL_DEBUG
#include "nest/bird.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "lib/alloca.h"
@@ -26,6 +26,7 @@
#include "lib/socket.h"
#include "lib/string.h"
#include "lib/hash.h"
+#include "lib/macro.h"
#include "conf/conf.h"
#include <asm/types.h>
@@ -69,6 +70,10 @@
#define RTA_ENCAP 22
#endif
+#ifndef NETLINK_GET_STRICT_CHK
+#define NETLINK_GET_STRICT_CHK 12
+#endif
+
#define krt_ipv4(p) ((p)->af == AF_INET)
#define krt_ecmp6(p) ((p)->af == AF_INET6)
@@ -106,7 +111,7 @@ struct nl_parse_state
int merge;
net_addr *net;
- rta *attrs;
+ ea_list *attrs;
struct krt_proto *proto;
s8 new;
s8 krt_src;
@@ -118,6 +123,101 @@ struct nl_parse_state
};
/*
+ * Netlink eattr definitions
+ */
+
+#define KRT_METRICS_MAX ARRAY_SIZE(ea_krt_metrics)
+#define KRT_FEATURES_MAX 4
+
+static void krt_bitfield_format(const eattr *e, byte *buf, uint buflen);
+
+static struct ea_class
+ ea_krt_prefsrc = {
+ .name = "krt_prefsrc",
+ .type = T_IP,
+ },
+ ea_krt_realm = {
+ .name = "krt_realm",
+ .type = T_INT,
+ },
+ ea_krt_scope = {
+ .name = "krt_scope",
+ .type = T_INT,
+ };
+
+static struct ea_class ea_krt_metrics[] = {
+ [RTAX_LOCK] = {
+ .name = "krt_lock",
+ .type = T_INT,
+ .format = krt_bitfield_format,
+ },
+ [RTAX_FEATURES] = {
+ .name = "krt_features",
+ .type = T_INT,
+ .format = krt_bitfield_format,
+ },
+#define KRT_METRIC_INT(_rtax, _name) [_rtax] = { .name = _name, .type = T_INT }
+ KRT_METRIC_INT(RTAX_MTU, "krt_mtu"),
+ KRT_METRIC_INT(RTAX_WINDOW, "krt_window"),
+ KRT_METRIC_INT(RTAX_RTT, "krt_rtt"),
+ KRT_METRIC_INT(RTAX_RTTVAR, "krt_rttvar"),
+ KRT_METRIC_INT(RTAX_SSTHRESH, "krt_sstresh"),
+ KRT_METRIC_INT(RTAX_CWND, "krt_cwnd"),
+ KRT_METRIC_INT(RTAX_ADVMSS, "krt_advmss"),
+ KRT_METRIC_INT(RTAX_REORDERING, "krt_reordering"),
+ KRT_METRIC_INT(RTAX_HOPLIMIT, "krt_hoplimit"),
+ KRT_METRIC_INT(RTAX_INITCWND, "krt_initcwnd"),
+ KRT_METRIC_INT(RTAX_RTO_MIN, "krt_rto_min"),
+ KRT_METRIC_INT(RTAX_INITRWND, "krt_initrwnd"),
+ KRT_METRIC_INT(RTAX_QUICKACK, "krt_quickack"),
+#undef KRT_METRIC_INT
+};
+
+static const char *krt_metrics_names[KRT_METRICS_MAX] = {
+ NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
+ "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
+};
+
+static const char *krt_features_names[KRT_FEATURES_MAX] = {
+ "ecn", NULL, NULL, "allfrag"
+};
+
+static void
+krt_bitfield_format(const eattr *a, byte *buf, uint buflen)
+{
+ if (a->id == ea_krt_metrics[RTAX_LOCK].id)
+ ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
+ else if (a->id == ea_krt_metrics[RTAX_FEATURES].id)
+ ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
+}
+
+static void
+nl_ea_register(void)
+{
+ EA_REGISTER_ALL(
+ &ea_krt_prefsrc,
+ &ea_krt_realm,
+ &ea_krt_scope
+ );
+
+ for (uint i = 0; i < KRT_METRICS_MAX; i++)
+ {
+ if (!ea_krt_metrics[i].name)
+ ea_krt_metrics[i] = (struct ea_class) {
+ .name = mb_sprintf(&root_pool, "krt_metric_%d", i),
+ .type = T_INT,
+ };
+
+ ea_register_init(&ea_krt_metrics[i]);
+ }
+
+ for (uint i = 1; i < KRT_METRICS_MAX; i++)
+ ASSERT_DIE(ea_krt_metrics[i].id == ea_krt_metrics[0].id + i);
+}
+
+
+
+/*
* Synchronous Netlink interface
*/
@@ -130,7 +230,7 @@ struct nl_sock
uint last_size;
};
-#define NL_RX_SIZE 8192
+#define NL_RX_SIZE 32768
#define NL_OP_DELETE 0
#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
@@ -158,10 +258,46 @@ nl_open_sock(struct nl_sock *nl)
}
static void
+nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED)
+{
+ /*
+ * Strict checking is not necessary, it improves behavior on newer kernels.
+ * If it is not available (missing SOL_NETLINK compile-time, or ENOPROTOOPT
+ * run-time), we can just ignore it.
+ */
+#ifdef SOL_NETLINK
+ setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
+#endif
+}
+
+static void
+nl_set_rcvbuf(int fd, uint val)
+{
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0)
+ log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val);
+}
+
+static uint
+nl_cfg_rx_buffer_size(struct config *cfg)
+{
+ uint bufsize = 0;
+
+ struct proto_config *pc;
+ WALK_LIST(pc, cfg->protos)
+ if ((pc->protocol == &proto_unix_kernel) && !pc->disabled)
+ bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer);
+
+ return bufsize;
+}
+
+
+static void
nl_open(void)
{
nl_open_sock(&nl_scan);
nl_open_sock(&nl_req);
+
+ nl_set_strict_dump(&nl_scan, 1);
}
static void
@@ -180,20 +316,60 @@ nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
}
static void
-nl_request_dump(int af, int cmd)
+nl_request_dump_link(void)
{
struct {
struct nlmsghdr nh;
- struct rtgenmsg g;
+ struct ifinfomsg ifi;
} req = {
- .nh.nlmsg_type = cmd,
- .nh.nlmsg_len = sizeof(req),
+ .nh.nlmsg_type = RTM_GETLINK,
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
- .g.rtgen_family = af
+ .nh.nlmsg_seq = ++(nl_scan.seq),
+ .ifi.ifi_family = AF_UNSPEC,
};
- nl_send(&nl_scan, &req.nh);
+
+ send(nl_scan.fd, &req, sizeof(req), 0);
+ nl_scan.last_hdr = NULL;
}
+static void
+nl_request_dump_addr(int af)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifaddrmsg ifa;
+ } req = {
+ .nh.nlmsg_type = RTM_GETADDR,
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+ .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+ .nh.nlmsg_seq = ++(nl_scan.seq),
+ .ifa.ifa_family = af,
+ };
+
+ send(nl_scan.fd, &req, sizeof(req), 0);
+ nl_scan.last_hdr = NULL;
+}
+
+static void
+nl_request_dump_route(int af)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rtm;
+ } req = {
+ .nh.nlmsg_type = RTM_GETROUTE,
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
+ .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+ .nh.nlmsg_seq = ++(nl_scan.seq),
+ .rtm.rtm_family = af,
+ };
+
+ send(nl_scan.fd, &req, sizeof(req), 0);
+ nl_scan.last_hdr = NULL;
+}
+
+
static struct nlmsghdr *
nl_get_reply(struct nl_sock *nl)
{
@@ -651,12 +827,12 @@ nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUS
}
static void
-nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs)
+nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop_adata *nhad, int af, ea_list *eattrs)
{
struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
- eattr *flow = ea_find(eattrs, EA_KRT_REALM);
+ eattr *flow = ea_find(eattrs, &ea_krt_realm);
- for (; nh; nh = nh->next)
+ NEXTHOP_WALK(nh, nhad)
{
struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
@@ -680,33 +856,49 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, e
nl_close_attr(h, a);
}
-static struct nexthop *
-nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
+static struct nexthop_adata *
+nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src)
{
struct rtattr *a[BIRD_RTA_MAX];
- struct rtnexthop *nh = RTA_DATA(ra);
- struct nexthop *rv, *first, **last;
- unsigned len = RTA_PAYLOAD(ra);
+ struct rtnexthop *nh, *orig_nh = RTA_DATA(ra);
+ unsigned len, orig_len = RTA_PAYLOAD(ra);
+ uint cnt = 0;
+
+ /* First count the nexthops */
+ for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh))
+ {
+ /* Use RTNH_OK(nh,len) ?? */
+ if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
+ goto err;
- first = NULL;
- last = &first;
+ if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
+ ;
+ else
+ cnt++;
+ }
+
+ struct nexthop_adata *nhad = lp_allocz(s->pool, cnt * NEXTHOP_MAX_SIZE + sizeof *nhad);
+ struct nexthop *rv = &nhad->nh;
- while (len)
+ for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh))
{
/* Use RTNH_OK(nh,len) ?? */
if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
- return NULL;
+ goto err;
- if (nh->rtnh_flags & RTNH_F_DEAD)
- goto next;
+ if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
+ continue;
- *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
- last = &(rv->next);
+ *rv = (struct nexthop) {
+ .weight = nh->rtnh_hops,
+ .iface = if_find_by_index(nh->rtnh_ifindex),
+ };
- rv->weight = nh->rtnh_hops;
- rv->iface = if_find_by_index(nh->rtnh_ifindex);
if (!rv->iface)
- return NULL;
+ {
+ log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex);
+ return NULL;
+ }
/* Nonexistent RTNH_PAYLOAD ?? */
nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
@@ -714,18 +906,18 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr
{
case AF_INET:
if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
- return NULL;
+ goto err;
break;
case AF_INET6:
if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
- return NULL;
+ goto err;
break;
#ifdef HAVE_MPLS_KERNEL
case AF_MPLS:
if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want_mpls, a, sizeof(a)))
- return NULL;
+ goto err;
if (a[RTA_NEWDST])
rv->labels = rta_get_mpls(a[RTA_NEWDST], rv->label);
@@ -734,7 +926,7 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr
#endif
default:
- return NULL;
+ goto err;
}
if (a[RTA_GATEWAY])
@@ -757,14 +949,19 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr
nbr = neigh_find(&p->p, rv->gw, rv->iface,
(rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
if (!nbr || (nbr->scope == SCOPE_HOST))
- return NULL;
+ {
+ log(L_ERR "KRT: Received route %N with strange next-hop %I", n, rv->gw);
+ return NULL;
+ }
}
#ifdef HAVE_MPLS_KERNEL
if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
{
- if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
- log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
+ if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS)
+ {
+ log(L_WARN "KRT: Received route %N with unknown encapsulation method %d",
+ n, rta_get_u16(a[RTA_ENCAP_TYPE]));
return NULL;
}
@@ -775,16 +972,18 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr
}
#endif
- next:
- len -= NLMSG_ALIGN(nh->rtnh_len);
- nh = RTNH_NEXT(nh);
+ rv = NEXTHOP_NEXT(rv);
}
+ /* Store final length */
+ nhad->ad.length = (void *) rv - (void *) nhad->ad.data;
+
/* Ensure nexthops are sorted to satisfy nest invariant */
- if (!nexthop_is_sorted(first))
- first = nexthop_sort(first);
+ return nexthop_is_sorted(nhad) ? nhad : nexthop_sort(nhad, s->pool);
- return first;
+err:
+ log(L_ERR "KRT: Received strange multipath route %N", n);
+ return NULL;
}
static void
@@ -1139,7 +1338,7 @@ kif_do_scan(struct kif_proto *p UNUSED)
if_start_update();
- nl_request_dump(AF_UNSPEC, RTM_GETLINK);
+ nl_request_dump_link();
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
nl_parse_link(h, 1);
@@ -1166,14 +1365,14 @@ kif_do_scan(struct kif_proto *p UNUSED)
}
}
- nl_request_dump(AF_INET, RTM_GETADDR);
+ nl_request_dump_addr(AF_INET);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
nl_parse_addr(h, 1);
else
log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
- nl_request_dump(AF_INET6, RTM_GETADDR);
+ nl_request_dump_addr(AF_INET6);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
nl_parse_addr(h, 1);
@@ -1208,11 +1407,16 @@ HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
int
krt_capable(rte *e)
{
- rta *a = e->attrs;
+ eattr *ea = ea_find(e->attrs, &ea_gen_nexthop);
+ if (!ea)
+ return 0;
+
+ struct nexthop_adata *nhad = (void *) ea->u.ptr;
+ if (NEXTHOP_IS_REACHABLE(nhad))
+ return 1;
- switch (a->dest)
+ switch (nhad->dest)
{
- case RTD_UNICAST:
case RTD_BLACKHOLE:
case RTD_UNREACHABLE:
case RTD_PROHIBIT:
@@ -1224,21 +1428,21 @@ krt_capable(rte *e)
}
static inline int
-nh_bufsize(struct nexthop *nh)
+nh_bufsize(struct nexthop_adata *nhad)
{
int rv = 0;
- for (; nh != NULL; nh = nh->next)
+ NEXTHOP_WALK(nh, nhad)
rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
return rv;
}
static int
-nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop *nh)
+nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop_adata *nh)
{
eattr *ea;
- rta *a = e->attrs;
- ea_list *eattrs = a->eattrs;
- int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
+ ea_list *eattrs = e->attrs;
+
+ int bufsize = 128 + KRT_METRICS_MAX*8 + (nh ? nh_bufsize(nh) : 0);
u32 priority = 0;
struct {
@@ -1306,7 +1510,7 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho
priority = 0;
else if (KRT_CF->sys.metric)
priority = KRT_CF->sys.metric;
- else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
+ else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, &ea_krt_metric)))
priority = ea->u.data;
if (priority)
@@ -1319,15 +1523,15 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho
/* Default scope is LINK for device routes, UNIVERSE otherwise */
if (p->af == AF_MPLS)
r->r.rtm_scope = RT_SCOPE_UNIVERSE;
- else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
+ else if (ea = ea_find(eattrs, &ea_krt_scope))
r->r.rtm_scope = ea->u.data;
else
- r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+ r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->nh.gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
- if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
+ if (ea = ea_find(eattrs, &ea_krt_prefsrc))
nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
- if (ea = ea_find(eattrs, EA_KRT_REALM))
+ if (ea = ea_find(eattrs, &ea_krt_realm))
nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
@@ -1335,9 +1539,9 @@ nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nextho
metrics[0] = 0;
struct ea_walk_state ews = { .eattrs = eattrs };
- while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
+ while (ea = ea_walk(&ews, ea_krt_metrics[0].id, KRT_METRICS_MAX))
{
- int id = ea->id - EA_KRT_METRICS;
+ int id = ea->id - ea_krt_metrics[0].id;
metrics[0] |= 1 << id;
metrics[id] = ea->u.data;
}
@@ -1351,14 +1555,14 @@ dest:
{
case RTD_UNICAST:
r->r.rtm_type = RTN_UNICAST;
- if (nh->next && !krt_ecmp6(p))
+ if (!NEXTHOP_ONE(nh) && !krt_ecmp6(p))
nl_add_multipath(&r->h, rsize, nh, p->af, eattrs);
else
{
- nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
- nl_add_nexthop(&r->h, rsize, nh, p->af);
+ nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->nh.iface->index);
+ nl_add_nexthop(&r->h, rsize, &nh->nh, p->af);
- if (nh->flags & RNF_ONLINK)
+ if (nh->nh.flags & RNF_ONLINK)
r->r.rtm_flags |= RTNH_F_ONLINK;
}
break;
@@ -1384,24 +1588,39 @@ dest:
static inline int
nl_add_rte(struct krt_proto *p, rte *e)
{
- rta *a = e->attrs;
+ ea_list *ea = e->attrs;
int err = 0;
- if (krt_ecmp6(p) && a->nh.next)
- {
- struct nexthop *nh = &(a->nh);
+ eattr *nhea = ea_find(ea, &ea_gen_nexthop);
+ struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL;
- err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
- if (err < 0)
- return err;
-
- for (nh = nh->next; nh; nh = nh->next)
- err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
+ if (krt_ecmp6(p) && nhad && NEXTHOP_IS_REACHABLE(nhad) && !NEXTHOP_ONE(nhad))
+ {
+ uint cnt = 0;
+ NEXTHOP_WALK(nh, nhad)
+ {
+ struct {
+ struct nexthop_adata nhad;
+ u32 labels[MPLS_MAX_LABEL_STACK];
+ } nhx;
+ memcpy(&nhx.nhad.nh, nh, NEXTHOP_SIZE(nh));
+ nhx.nhad.ad.length = (void *) NEXTHOP_NEXT(&nhx.nhad.nh) - (void *) nhx.nhad.ad.data;
+
+ if (!cnt++)
+ {
+ err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, &nhx.nhad);
+ if (err < 0)
+ return err;
+ }
+ else
+ err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, &nhx.nhad);
+ }
return err;
}
- return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
+ return nl_send_route(p, e, NL_OP_ADD,
+ NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad);
}
static inline int
@@ -1420,8 +1639,10 @@ nl_delete_rte(struct krt_proto *p, const rte *e)
static inline int
nl_replace_rte(struct krt_proto *p, rte *e)
{
- rta *a = e->attrs;
- return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh));
+ eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop);
+ struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL;
+ return nl_send_route(p, e, NL_OP_REPLACE,
+ NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad);
}
@@ -1490,21 +1711,16 @@ nl_announce_route(struct nl_parse_state *s)
.net = s->net,
};
- ea_list *ea = alloca(sizeof(ea_list) + 2 * sizeof(eattr));
- *ea = (ea_list) { .count = 2, .next = e0.attrs->eattrs };
- e0.attrs->eattrs = ea;
-
- ea->attrs[0] = (eattr) {
- .id = EA_KRT_SOURCE,
- .type = EAF_TYPE_INT,
- .u.data = s->krt_proto,
- };
- ea->attrs[1] = (eattr) {
- .id = EA_KRT_METRIC,
- .type = EAF_TYPE_INT,
- .u.data = s->krt_metric,
+ EA_LOCAL_LIST(2) ea = {
+ .l = { .count = 2, .next = e0.attrs },
+ .a = {
+ EA_LITERAL_EMBEDDED(&ea_krt_source, 0, s->krt_proto),
+ EA_LITERAL_EMBEDDED(&ea_krt_metric, 0, s->krt_metric),
+ },
};
+ e0.attrs = &ea.l;
+
if (s->scan)
krt_got_route(s->proto, &e0, s->krt_src);
else
@@ -1532,7 +1748,8 @@ nl_parse_end(struct nl_parse_state *s)
}
-#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
+#define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0)
+#define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0)
static void
nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
@@ -1585,10 +1802,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
return;
if (!a[RTA_DST])
- SKIP("MPLS route without RTA_DST");
+ SKIP0("MPLS route without RTA_DST\n");
if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
- SKIP("MPLS route with multi-label RTA_DST");
+ SKIP0("MPLS route with multi-label RTA_DST\n");
net_fill_mpls(&dst, rta_mpls_stack[0]);
break;
@@ -1606,6 +1823,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
else
table_id = i->rtm_table;
+ if (i->rtm_flags & RTM_F_CLONED)
+ SKIP("cloned\n");
+
/* Do we know this table? */
p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
if (!p)
@@ -1665,80 +1885,110 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family))
nl_announce_route(s);
- rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
- ra->source = RTS_INHERIT;
- ra->scope = SCOPE_UNIVERSE;
+ ea_list *ra = NULL;
+ ea_set_attr_u32(&ra, &ea_gen_source, 0, RTS_INHERIT);
if (a[RTA_FLOW])
s->rta_flow = rta_get_u32(a[RTA_FLOW]);
else
s->rta_flow = 0;
+ union {
+ struct {
+ struct adata ad;
+ struct nexthop nh;
+ u32 labels[MPLS_MAX_LABEL_STACK];
+ };
+ struct nexthop_adata nhad;
+ } nhad = {};
+
switch (i->rtm_type)
{
case RTN_UNICAST:
- ra->dest = RTD_UNICAST;
-
if (a[RTA_MULTIPATH])
{
- struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
+ struct nexthop_adata *nh = nl_parse_multipath(s, p, net, a[RTA_MULTIPATH], i->rtm_family, krt_src);
if (!nh)
- {
- log(L_ERR "KRT: Received strange multipath route %N", net);
- return;
- }
+ SKIP("strange RTA_MULTIPATH\n");
- nexthop_link(ra, nh);
+ ea_set_attr(&ra, EA_LITERAL_DIRECT_ADATA(
+ &ea_gen_nexthop, 0, &nh->ad));
break;
}
- if (i->rtm_flags & RTNH_F_DEAD)
- return;
+ if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
+ SKIP("ignore RTNH_F_DEAD\n");
- ra->nh.iface = if_find_by_index(oif);
- if (!ra->nh.iface)
+ nhad.nh.iface = if_find_by_index(oif);
+ if (!nhad.nh.iface)
{
log(L_ERR "KRT: Received route %N with unknown ifindex %u", net, oif);
return;
}
if (a[RTA_GATEWAY])
- ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
+ nhad.nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
#ifdef HAVE_MPLS_KERNEL
if (a[RTA_VIA])
- ra->nh.gw = rta_get_via(a[RTA_VIA]);
+ nhad.nh.gw = rta_get_via(a[RTA_VIA]);
#endif
- if (ipa_nonzero(ra->nh.gw))
+ if (ipa_nonzero(nhad.nh.gw))
{
/* Silently skip strange 6to4 routes */
const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
- if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
+ if ((i->rtm_family == AF_INET6) && ipa_in_netX(nhad.nh.gw, (net_addr *) &sit))
return;
if (i->rtm_flags & RTNH_F_ONLINK)
- ra->nh.flags |= RNF_ONLINK;
+ nhad.nh.flags |= RNF_ONLINK;
neighbor *nbr;
- nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
- (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
+ nbr = neigh_find(&p->p, nhad.nh.gw, nhad.nh.iface,
+ (nhad.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
if (!nbr || (nbr->scope == SCOPE_HOST))
{
- log(L_ERR "KRT: Received route %N with strange next-hop %I", net, ra->nh.gw);
+ log(L_ERR "KRT: Received route %N with strange next-hop %I", net,
+ nhad.nh.gw);
return;
}
}
+#ifdef HAVE_MPLS_KERNEL
+ if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !a[RTA_MULTIPATH])
+ nhad.nh.labels = rta_get_mpls(a[RTA_NEWDST], nhad.nh.label);
+
+ if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !a[RTA_MULTIPATH])
+ {
+ switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
+ {
+ case LWTUNNEL_ENCAP_MPLS:
+ {
+ struct rtattr *enca[BIRD_RTA_MAX];
+ nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
+ nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
+ nhad.nh.labels = rta_get_mpls(enca[RTA_DST], nhad.nh.label);
+ break;
+ }
+ default:
+ SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
+ break;
+ }
+ }
+#endif
+
+ /* Finalize the nexthop */
+ nhad.ad.length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data;
break;
case RTN_BLACKHOLE:
- ra->dest = RTD_BLACKHOLE;
+ nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_BLACKHOLE);
break;
case RTN_UNREACHABLE:
- ra->dest = RTD_UNREACHABLE;
+ nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_UNREACHABLE);
break;
case RTN_PROHIBIT:
- ra->dest = RTD_PROHIBIT;
+ nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_PROHIBIT);
break;
/* FIXME: What about RTN_THROW? */
default:
@@ -1746,105 +1996,36 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
return;
}
-#ifdef HAVE_MPLS_KERNEL
- if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
- ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
-
- if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
- {
- switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
- {
- case LWTUNNEL_ENCAP_MPLS:
- {
- struct rtattr *enca[BIRD_RTA_MAX];
- nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
- nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
- ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
- break;
- }
- default:
- SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
- break;
- }
- }
-#endif
-
if (i->rtm_scope != def_scope)
- {
- ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
- ea->next = ra->eattrs;
- ra->eattrs = ea;
- ea->flags = EALF_SORTED;
- ea->count = 1;
- ea->attrs[0].id = EA_KRT_SCOPE;
- ea->attrs[0].flags = 0;
- ea->attrs[0].type = EAF_TYPE_INT;
- ea->attrs[0].u.data = i->rtm_scope;
- }
+ ea_set_attr(&ra,
+ EA_LITERAL_EMBEDDED(&ea_krt_scope, 0, i->rtm_scope));
if (a[RTA_PREFSRC])
- {
- ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
-
- ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
- ea->next = ra->eattrs;
- ra->eattrs = ea;
- ea->flags = EALF_SORTED;
- ea->count = 1;
- ea->attrs[0].id = EA_KRT_PREFSRC;
- ea->attrs[0].flags = 0;
- ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
-
- struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
- ad->length = sizeof(ps);
- memcpy(ad->data, &ps, sizeof(ps));
-
- ea->attrs[0].u.ptr = ad;
- }
+ {
+ ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
+
+ ea_set_attr(&ra,
+ EA_LITERAL_STORE_ADATA(&ea_krt_prefsrc, 0, &ps, sizeof(ps)));
+ }
/* Can be set per-route or per-nexthop */
if (s->rta_flow)
- {
- ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
- ea->next = ra->eattrs;
- ra->eattrs = ea;
- ea->flags = EALF_SORTED;
- ea->count = 1;
- ea->attrs[0].id = EA_KRT_REALM;
- ea->attrs[0].flags = 0;
- ea->attrs[0].type = EAF_TYPE_INT;
- ea->attrs[0].u.data = s->rta_flow;
- }
+ ea_set_attr(&ra,
+ EA_LITERAL_EMBEDDED(&ea_krt_realm, 0, s->rta_flow));
if (a[RTA_METRICS])
{
u32 metrics[KRT_METRICS_MAX];
- ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
- int t, n = 0;
-
if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
{
log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net);
return;
}
- for (t = 1; t < KRT_METRICS_MAX; t++)
+ for (uint t = 1; t < KRT_METRICS_MAX; t++)
if (metrics[0] & (1 << t))
- {
- ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
- ea->attrs[n].flags = 0;
- ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
- ea->attrs[n].u.data = metrics[t];
- n++;
- }
-
- if (n > 0)
- {
- ea->next = ra->eattrs;
- ea->flags = EALF_SORTED;
- ea->count = n;
- ra->eattrs = ea;
- }
+ ea_set_attr(&ra,
+ EA_LITERAL_EMBEDDED(&ea_krt_metrics[t], 0, metrics[t]));
}
/*
@@ -1861,6 +2042,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
s->net = lp_alloc(s->pool, net->length);
net_copy(s->net, net);
+ ea_set_attr_data(&ra, &ea_gen_nexthop, 0,
+ nhad.ad.data, nhad.ad.length);
+
s->attrs = ra;
s->proto = p;
s->new = new;
@@ -1872,20 +2056,18 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
else
{
/* Merge next hops with the stored route */
- rta *oa = s->attrs;
-
- struct nexthop *nhs = &oa->nh;
- nexthop_insert(&nhs, &ra->nh);
-
- /* Perhaps new nexthop is inserted at the first position */
- if (nhs == &ra->nh)
- {
- /* Swap rtas */
- s->attrs = ra;
-
- /* Keep old eattrs */
- ra->eattrs = oa->eattrs;
- }
+ eattr *nhea = ea_find(s->attrs, &ea_gen_nexthop);
+ struct nexthop_adata *nhad_old = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL;
+
+ if (nhad_old)
+ ea_set_attr(&s->attrs,
+ EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0,
+ &(nexthop_merge(nhad_old, &nhad.nhad,
+ KRT_CF->merge_paths, s->pool)->ad)
+ ));
+ else
+ ea_set_attr_data(&s->attrs, &ea_gen_nexthop, 0,
+ nhad.ad.data, nhad.ad.length);
}
}
@@ -1896,7 +2078,7 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
struct nl_parse_state s;
nl_parse_begin(&s, 1);
- nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
+ nl_request_dump_route(AF_UNSPEC);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
nl_parse_route(&s, h);
@@ -1911,6 +2093,8 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
static byte *nl_async_rx_buffer; /* Receive buffer */
+static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */
+static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */
static void
nl_async_msg(struct nlmsghdr *h)
@@ -2046,6 +2230,32 @@ nl_open_async(void)
bug("Netlink: sk_open failed");
}
+static void
+nl_update_async_bufsize(void)
+{
+ /* No async socket */
+ if (!nl_async_sk)
+ return;
+
+ /* Already reconfigured */
+ if (nl_last_config == config)
+ return;
+
+ /* Update netlink buffer size */
+ uint bufsize = nl_cfg_rx_buffer_size(config);
+ if (bufsize && (bufsize != nl_async_bufsize))
+ {
+ /* Log message for reconfigurations only */
+ if (nl_last_config)
+ log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize);
+
+ nl_set_rcvbuf(nl_async_sk->fd, bufsize);
+ nl_async_bufsize = bufsize;
+ }
+
+ nl_last_config = config;
+}
+
/*
* Interface to the UNIX krt module
@@ -2056,6 +2266,8 @@ krt_sys_io_init(void)
{
nl_linpool = lp_new_default(krt_pool);
HASH_INIT(nl_table_map, krt_pool, 6);
+
+ nl_ea_register();
}
int
@@ -2074,6 +2286,7 @@ krt_sys_start(struct krt_proto *p)
nl_open();
nl_open_async();
+ nl_update_async_bufsize();
return 1;
}
@@ -2081,12 +2294,16 @@ krt_sys_start(struct krt_proto *p)
void
krt_sys_shutdown(struct krt_proto *p)
{
+ nl_update_async_bufsize();
+
HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
}
int
krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
{
+ nl_update_async_bufsize();
+
return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
}
@@ -2104,56 +2321,6 @@ krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
d->sys.metric = s->sys.metric;
}
-static const char *krt_metrics_names[KRT_METRICS_MAX] = {
- NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
- "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
-};
-
-static const char *krt_features_names[KRT_FEATURES_MAX] = {
- "ecn", NULL, NULL, "allfrag"
-};
-
-int
-krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
-{
- switch (a->id)
- {
- case EA_KRT_PREFSRC:
- bsprintf(buf, "prefsrc");
- return GA_NAME;
-
- case EA_KRT_REALM:
- bsprintf(buf, "realm");
- return GA_NAME;
-
- case EA_KRT_SCOPE:
- bsprintf(buf, "scope");
- return GA_NAME;
-
- case EA_KRT_LOCK:
- buf += bsprintf(buf, "lock:");
- ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
- return GA_FULL;
-
- case EA_KRT_FEATURES:
- buf += bsprintf(buf, "features:");
- ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
- return GA_FULL;
-
- default:;
- int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
- if (id > 0 && id < KRT_METRICS_MAX)
- {
- bsprintf(buf, "%s", krt_metrics_names[id]);
- return GA_NAME;
- }
-
- return GA_UNKNOWN;
- }
-}
-
-
-
void
kif_sys_start(struct kif_proto *p UNUSED)
{
diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h
index e21ff487..f13eda7c 100644
--- a/sysdep/linux/sysio.h
+++ b/sysdep/linux/sysio.h
@@ -10,6 +10,10 @@
#define IPV6_MINHOPCOUNT 73
#endif
+#ifndef IPV6_FREEBIND
+#define IPV6_FREEBIND 78
+#endif
+
#ifndef TCP_MD5SIG_EXT
#define TCP_MD5SIG_EXT 32
#endif
@@ -266,3 +270,18 @@ sk_set_priority(sock *s, int prio)
return 0;
}
+static inline int
+sk_set_freebind(sock *s)
+{
+ int y = 1;
+
+ if (sk_is_ipv4(s))
+ if (setsockopt(s->fd, SOL_IP, IP_FREEBIND, &y, sizeof(y)) < 0)
+ ERR("IP_FREEBIND");
+
+ if (sk_is_ipv6(s))
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_FREEBIND, &y, sizeof(y)) < 0)
+ ERR("IPV6_FREEBIND");
+
+ return 0;
+}
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile
index d0d36b5f..51ab98a9 100644
--- a/sysdep/unix/Makefile
+++ b/sysdep/unix/Makefile
@@ -2,6 +2,8 @@ src := alloc.c io.c krt.c log.c main.c random.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
+$(call proto-build,kif_build)
+$(call proto-build,krt_build)
$(conf-y-targets): $(s)krt.Y
src := $(filter-out main.c, $(src))
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c
index 4c9d5eb5..edad6209 100644
--- a/sysdep/unix/alloc.c
+++ b/sysdep/unix/alloc.c
@@ -8,7 +8,10 @@
#include "nest/bird.h"
#include "lib/resource.h"
+#include "lib/lists.h"
+#include "lib/event.h"
+#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
@@ -17,82 +20,168 @@
#endif
long page_size = 0;
-_Bool alloc_multipage = 0;
#ifdef HAVE_MMAP
+#define KEEP_PAGES_MAIN_MAX 256
+#define KEEP_PAGES_MAIN_MIN 8
+#define CLEANUP_PAGES_BULK 256
+
+STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX);
+
static _Bool use_fake = 0;
+
+#if DEBUGGING
+struct free_page {
+ node unused[42];
+ node n;
+};
#else
-static _Bool use_fake = 1;
+struct free_page {
+ node n;
+};
+#endif
+
+struct free_pages {
+ list pages;
+ u16 min, max; /* Minimal and maximal number of free pages kept */
+ uint cnt; /* Number of empty pages */
+ event cleanup;
+};
+
+static void global_free_pages_cleanup_event(void *);
+
+static struct free_pages global_free_pages = {
+ .min = KEEP_PAGES_MAIN_MIN,
+ .max = KEEP_PAGES_MAIN_MAX,
+ .cleanup = { .hook = global_free_pages_cleanup_event },
+};
+
+uint *pages_kept = &global_free_pages.cnt;
+
+static void *
+alloc_sys_page(void)
+{
+ void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (ptr == MAP_FAILED)
+ bug("mmap(%lu) failed: %m", page_size);
+
+ return ptr;
+}
+
+extern int shutting_down; /* Shutdown requested. */
+
+#else // ! HAVE_MMAP
+#define use_fake 1
#endif
-void resource_sys_init(void)
+void *
+alloc_page(void)
{
+ if (use_fake)
+ {
+ void *ptr = NULL;
+ int err = posix_memalign(&ptr, page_size, page_size);
+
+ if (err || !ptr)
+ bug("posix_memalign(%lu) failed", (long unsigned int) page_size);
+
+ return ptr;
+ }
+
#ifdef HAVE_MMAP
- if (!(page_size = sysconf(_SC_PAGESIZE)))
- die("System page size must be non-zero");
+ struct free_pages *fps = &global_free_pages;
- if ((u64_popcount(page_size) > 1) || (page_size > 16384))
+ if (fps->cnt)
{
-#endif
- /* Too big or strange page, use the aligned allocator instead */
- page_size = 4096;
- use_fake = 1;
+ struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages));
+ rem_node(&fp->n);
+ if ((--fps->cnt < fps->min) && !shutting_down)
+ ev_schedule(&fps->cleanup);
+
+ bzero(fp, page_size);
+ return fp;
}
+
+ return alloc_sys_page();
+#endif
}
-void *
-alloc_sys_page(void)
+void
+free_page(void *ptr)
{
-#ifdef HAVE_MMAP
- if (!use_fake)
+ if (use_fake)
{
- if (alloc_multipage)
- {
- void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (big == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- uintptr_t offset = ((uintptr_t) big) % page_size;
- if (offset)
- {
- void *ret = big + page_size - offset;
- munmap(big, page_size - offset);
- munmap(ret + page_size, offset);
- return ret;
- }
- else
- {
- munmap(big + page_size, page_size);
- return big;
- }
- }
-
- void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (ret == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- return ret;
+ free(ptr);
+ return;
}
- else
+
+#ifdef HAVE_MMAP
+ struct free_pages *fps = &global_free_pages;
+ struct free_page *fp = ptr;
+
+ fp->n = (node) {};
+ add_tail(&fps->pages, &fp->n);
+
+ if ((++fps->cnt > fps->max) && !shutting_down)
+ ev_schedule(&fps->cleanup);
#endif
+}
+
+#ifdef HAVE_MMAP
+static void
+global_free_pages_cleanup_event(void *data UNUSED)
+{
+ if (shutting_down)
+ return;
+
+ struct free_pages *fps = &global_free_pages;
+
+ while (fps->cnt / 2 < fps->min)
{
- void *ret = aligned_alloc(page_size, page_size);
- if (!ret)
- bug("aligned_alloc(%lu) failed", page_size);
- return ret;
+ struct free_page *fp = alloc_sys_page();
+ fp->n = (node) {};
+ add_tail(&fps->pages, &fp->n);
+ fps->cnt++;
+ }
+
+ for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++)
+ {
+ struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages));
+ rem_node(&fp->n);
+
+ if (munmap(fp, page_size) == 0)
+ fps->cnt--;
+ else if (errno == ENOMEM)
+ add_head(&fps->pages, &fp->n);
+ else
+ bug("munmap(%p) failed: %m", fp);
}
}
+#endif
void
-free_sys_page(void *ptr)
+resource_sys_init(void)
{
#ifdef HAVE_MMAP
- if (!use_fake)
+ ASSERT_DIE(global_free_pages.cnt == 0);
+
+ if (!(page_size = sysconf(_SC_PAGESIZE)))
+ die("System page size must be non-zero");
+
+ if (u64_popcount(page_size) == 1)
{
- if (munmap(ptr, page_size) < 0)
- bug("munmap(%p) failed: %m", ptr);
+ struct free_pages *fps = &global_free_pages;
+
+ init_list(&fps->pages);
+ global_free_pages_cleanup_event(NULL);
+ return;
}
- else
+
+ /* Too big or strange page, use the aligned allocator instead */
+ log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size);
+ use_fake = 1;
#endif
- free(ptr);
+
+ page_size = 4096;
}
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 3d67d0a7..810e782d 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -1436,6 +1436,10 @@ sk_open(sock *s)
if (sk_set_high_port(s) < 0)
log(L_WARN "Socket error: %s%#m", s->err);
+ if (s->flags & SKF_FREEBIND)
+ if (sk_set_freebind(s) < 0)
+ log(L_WARN "Socket error: %s%#m", s->err);
+
sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
ERR2("bind");
@@ -1850,8 +1854,8 @@ sk_read_ssh(sock *s)
/* sk_read() and sk_write() are called from BFD's event loop */
-int
-sk_read(sock *s, int revents)
+static inline int
+sk_read_noflush(sock *s, int revents)
{
switch (s->type)
{
@@ -1914,7 +1918,15 @@ sk_read(sock *s, int revents)
}
int
-sk_write(sock *s)
+sk_read(sock *s, int revents)
+{
+ int e = sk_read_noflush(s, revents);
+ tmp_flush();
+ return e;
+}
+
+static inline int
+sk_write_noflush(sock *s)
{
switch (s->type)
{
@@ -1962,6 +1974,14 @@ sk_write(sock *s)
}
}
+int
+sk_write(sock *s)
+{
+ int e = sk_write_noflush(s);
+ tmp_flush();
+ return e;
+}
+
int sk_is_ipv4(sock *s)
{ return s->af == AF_INET; }
@@ -1980,6 +2000,7 @@ sk_err(sock *s, int revents)
}
s->err_hook(s, se);
+ tmp_flush();
}
void
@@ -2042,8 +2063,8 @@ io_update_time(void)
event_open->duration = last_time - event_open->timestamp;
if (event_open->duration > config->latency_limit)
- log(L_WARN "Event 0x%p 0x%p took %d ms",
- event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
+ log(L_WARN "Event 0x%p 0x%p took %u.%03u ms",
+ event_open->hook, event_open->data, (uint) (event_open->duration TO_MS), (uint) (event_open->duration % 1000));
event_open = NULL;
}
@@ -2147,8 +2168,8 @@ watchdog_stop(void)
btime duration = last_time - loop_time;
if (duration > config->watchdog_warning)
- log(L_WARN "I/O loop cycle took %d ms for %d events",
- (int) (duration TO_MS), event_log_num);
+ log(L_WARN "I/O loop cycle took %u.%03u ms for %d events",
+ (uint) (duration TO_MS), (uint) (duration % 1000), event_log_num);
}
diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y
index 95b54d65..4ce9a328 100644
--- a/sysdep/unix/krt.Y
+++ b/sysdep/unix/krt.Y
@@ -29,7 +29,7 @@ kif_set_preferred(ip_addr ip)
CF_DECLS
-CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS)
+CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, MERGE, PATHS)
CF_KEYWORDS(INTERFACE, PREFERRED)
%type <i> kern_mp_limit
@@ -122,9 +122,6 @@ kif_iface:
kif_iface_start iface_patt_list_nopx kif_iface_opt_list;
-dynamic_attr: KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SOURCE); } ;
-dynamic_attr: KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_METRIC); } ;
-
CF_CODE
CF_END
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 609ee921..46b5a51d 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -53,7 +53,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "filter/filter.h"
#include "conf/conf.h"
@@ -232,7 +232,6 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src)
struct protocol proto_unix_iface = {
.name = "Device",
.template = "device%d",
- .class = PROTOCOL_DEVICE,
.proto_size = sizeof(struct kif_proto),
.config_size = sizeof(struct kif_config),
.preconfig = kif_preconfig,
@@ -243,6 +242,13 @@ struct protocol proto_unix_iface = {
.copy_config = kif_copy_config
};
+void
+kif_build(void)
+{
+ proto_build(&proto_unix_iface);
+}
+
+
/*
* Tracing of routes
*/
@@ -280,29 +286,45 @@ static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS;
static inline u32
krt_metric(rte *a)
{
- eattr *ea = ea_find(a->attrs->eattrs, EA_KRT_METRIC);
+ eattr *ea = ea_find(a->attrs, &ea_krt_metric);
return ea ? ea->u.data : 0;
}
static inline int
-krt_rte_better(rte *a, rte *b)
+krt_same_key(rte *a, rte *b)
{
- return (krt_metric(a) > krt_metric(b));
+ return (krt_metric(a) == krt_metric(b));
+}
+
+static inline int
+krt_uptodate(rte *a, rte *b)
+{
+ return (a->attrs == b->attrs);
}
/* Called when alien route is discovered during scan */
static void
-krt_learn_rte(struct krt_proto *p, rte *e)
+krt_learn_scan(struct krt_proto *p, rte *e)
{
- e->src = rt_get_source(&p->p, krt_metric(e));
- rte_update(p->p.main_channel, e->net, e, e->src);
+ rte e0 = {
+ .attrs = e->attrs,
+ .src = rt_get_source(&p->p, krt_metric(e)),
+ };
+
+ ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference);
+
+ rte_update(p->p.main_channel, e->net, &e0, e0.src);
}
static void
-krt_learn_init(struct krt_proto *p)
+krt_learn_async(struct krt_proto *p, rte *e, int new)
{
- if (KRT_CF->learn)
- channel_setup_in_table(p->p.main_channel, 1);
+ if (new)
+ return krt_learn_scan(p, e);
+
+ struct rte_src *src = rt_find_source(&p->p, krt_metric(e));
+ if (src)
+ rte_update(p->p.main_channel, e->net, NULL, src);
}
#endif
@@ -322,7 +344,7 @@ rte_feed_count(net *n)
{
uint count = 0;
for (struct rte_storage *e = n->routes; e; e = e->next)
- if (rte_is_valid(RTES_OR_NULL(e)))
+ if (rte_is_valid(RTE_OR_NULL(e)))
count++;
return count;
}
@@ -332,7 +354,7 @@ rte_feed_obtain(net *n, rte **feed, uint count)
{
uint i = 0;
for (struct rte_storage *e = n->routes; e; e = e->next)
- if (rte_is_valid(RTES_OR_NULL(e)))
+ if (rte_is_valid(RTE_OR_NULL(e)))
{
ASSERT_DIE(i < count);
feed[i++] = &e->rte;
@@ -371,7 +393,7 @@ krt_export_net(struct krt_proto *p, net *net)
if (filter == FILTER_ACCEPT)
goto accept;
- if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT)
+ if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT)
goto reject;
@@ -385,15 +407,12 @@ reject:
static int
krt_same_dest(rte *k, rte *e)
{
- rta *ka = k->attrs, *ea = e->attrs;
+ ea_list *ka = k->attrs, *ea = e->attrs;
- if (ka->dest != ea->dest)
- return 0;
+ eattr *nhea_k = ea_find(ka, &ea_gen_nexthop);
+ eattr *nhea_e = ea_find(ea, &ea_gen_nexthop);
- if (ka->dest == RTD_UNICAST)
- return nexthop_same(&(ka->nh), &(ea->nh));
-
- return 1;
+ return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr);
}
/*
@@ -418,7 +437,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
case KRT_SRC_ALIEN:
if (KRT_CF->learn)
- krt_learn_rte(p, e);
+ krt_learn_scan(p, e);
else
krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored");
return;
@@ -487,11 +506,6 @@ static void
krt_init_scan(struct krt_proto *p)
{
bmap_reset(&p->seen_map, 1024);
-
-#ifdef KRT_ALLOW_LEARN
- if (KRT_CF->learn)
- channel_refresh_begin(p->p.main_channel);
-#endif
}
static void
@@ -517,11 +531,6 @@ krt_prune(struct krt_proto *p)
}
FIB_WALK_END;
-#ifdef KRT_ALLOW_LEARN
- if (KRT_CF->learn)
- channel_refresh_end(p->p.main_channel);
-#endif
-
if (p->ready)
p->initialized = 1;
}
@@ -561,7 +570,7 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src)
case KRT_SRC_ALIEN:
if (KRT_CF->learn)
{
- krt_learn_rte(p, e);
+ krt_learn_async(p, e, new);
return;
}
#endif
@@ -672,9 +681,9 @@ krt_scan_timer_kick(struct krt_proto *p)
*/
static int
-krt_preexport(struct channel *c, rte *e)
+krt_preexport(struct channel *C, rte *e)
{
- if (e->src->proto == c->proto)
+ if (e->src->proto == C->proto)
return -1;
if (!krt_capable(e))
@@ -807,7 +816,6 @@ krt_init(struct proto_config *CF)
p->p.if_notify = krt_if_notify;
p->p.reload_routes = krt_reload_routes;
p->p.feed_end = krt_feed_end;
- p->p.rte_better = krt_rte_better;
krt_sys_init(p);
return &p->p;
@@ -833,10 +841,6 @@ krt_start(struct proto *P)
bmap_init(&p->seen_map, p->p.pool, 1024);
add_tail(&krt_proto_list, &p->krt_node);
-#ifdef KRT_ALLOW_LEARN
- krt_learn_init(p);
-#endif
-
if (!krt_sys_start(p))
{
rem_node(&p->krt_node);
@@ -916,24 +920,15 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src)
krt_sys_copy_config(d, s);
}
-static int
-krt_get_attr(const eattr *a, byte *buf, int buflen)
-{
- switch (a->id)
- {
- case EA_KRT_SOURCE:
- bsprintf(buf, "source");
- return GA_NAME;
-
- case EA_KRT_METRIC:
- bsprintf(buf, "metric");
- return GA_NAME;
-
- default:
- return krt_sys_get_attr(a, buf, buflen);
- }
-}
+struct ea_class ea_krt_source = {
+ .name = "krt_source",
+ .type = T_INT,
+};
+struct ea_class ea_krt_metric = {
+ .name = "krt_metric",
+ .type = T_INT,
+};
#ifdef CONFIG_IP6_SADR_KERNEL
#define MAYBE_IP6_SADR NB_IP6_SADR
@@ -950,7 +945,6 @@ krt_get_attr(const eattr *a, byte *buf, int buflen)
struct protocol proto_unix_kernel = {
.name = "Kernel",
.template = "kernel%d",
- .class = PROTOCOL_KERNEL,
.preference = DEF_PREF_INHERITED,
.channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS,
.proto_size = sizeof(struct krt_proto),
@@ -962,5 +956,15 @@ struct protocol proto_unix_kernel = {
.shutdown = krt_shutdown,
.reconfigure = krt_reconfigure,
.copy_config = krt_copy_config,
- .get_attr = krt_get_attr,
};
+
+void
+krt_build(void)
+{
+ proto_build(&proto_unix_kernel);
+
+ EA_REGISTER_ALL(
+ &ea_krt_source,
+ &ea_krt_metric,
+ );
+}
diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h
index 968c5b16..e0d60cbd 100644
--- a/sysdep/unix/krt.h
+++ b/sysdep/unix/krt.h
@@ -21,8 +21,12 @@ struct kif_proto;
#define KRT_DEFAULT_ECMP_LIMIT 16
+#if 0
#define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0)
#define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1)
+#endif
+
+extern struct ea_class ea_krt_source, ea_krt_metric;
#define KRT_REF_SEEN 0x1 /* Seen in table */
#define KRT_REF_BEST 0x2 /* Best in table */
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index 7e8ea0dc..07d6c691 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -30,7 +30,7 @@
#include "lib/event.h"
#include "lib/timer.h"
#include "lib/string.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "nest/cli.h"
@@ -56,7 +56,7 @@ async_dump(void)
// XXXX tm_dump_all();
if_dump_all();
neigh_dump_all();
- rta_dump_all();
+ ea_dump_all();
rt_dump_all();
protos_dump_all();
@@ -116,7 +116,7 @@ add_num_const(char *name, int val, const char *file, const uint line)
struct f_val *v = cfg_alloc(sizeof(struct f_val));
*v = (struct f_val) { .type = T_INT, .val.i = val };
struct symbol *sym = cf_get_symbol(name);
- if (sym->class && (sym->scope == conf_this_scope))
+ if (sym->class && cf_symbol_is_local(sym))
cf_error("Error reading value for %s from %s:%d: already defined", name, file, line);
cf_define_symbol(sym, SYM_CONSTANT | T_INT, val, v);
@@ -682,7 +682,7 @@ signal_init(void)
* Parsing of command-line arguments
*/
-static char *opt_list = "B:c:dD:ps:P:u:g:flRh";
+static char *opt_list = "bc:dD:ps:P:u:g:flRh";
int parse_and_exit;
char *bird_name;
static char *use_user;
@@ -703,7 +703,6 @@ display_help(void)
fprintf(stderr,
"\n"
"Options: \n"
- " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n"
" -c <config-file> Use given configuration file instead of\n"
" " PATH_CONFIG_FILE "\n"
" -d Enable debug messages and run bird in foreground\n"
@@ -790,15 +789,12 @@ get_gid(const char *s)
return gr->gr_gid;
}
-extern _Bool alloc_multipage;
-
static void
parse_args(int argc, char **argv)
{
int config_changed = 0;
int socket_changed = 0;
int c;
- int bp;
bird_name = get_bird_name(argv[0], "bird");
if (argc == 2)
@@ -811,29 +807,6 @@ parse_args(int argc, char **argv)
while ((c = getopt(argc, argv, opt_list)) >= 0)
switch (c)
{
- case 'B':
- bp = atoi(optarg);
- if (bp < 1)
- {
- fprintf(stderr, "Strange block size power %d\n\n", bp);
- display_usage();
- exit(1);
- }
-
- if ((1 << bp) < page_size)
- {
- fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size);
- display_usage();
- exit(1);
- }
-
- if ((1L << bp) > page_size)
- {
- alloc_multipage = 1;
- page_size = (1L << bp);
- }
-
- break;
case 'c':
config_name = optarg;
config_changed = 1;
@@ -888,8 +861,6 @@ parse_args(int argc, char **argv)
}
}
-void resource_sys_init(void);
-
/*
* Hic Est main()
*/
@@ -902,7 +873,6 @@ main(int argc, char **argv)
dmalloc_debug(0x2f03d00);
#endif
- resource_sys_init();
parse_args(argc, argv);
log_switch(1, NULL, NULL);
@@ -911,8 +881,8 @@ main(int argc, char **argv)
resource_init();
timer_init();
olock_init();
- io_init();
rt_init();
+ io_init();
if_init();
// roa_init();
config_init();
@@ -936,8 +906,6 @@ main(int argc, char **argv)
open_pid_file();
protos_build();
- proto_build(&proto_unix_kernel);
- proto_build(&proto_unix_iface);
struct config *conf = read_config();