summaryrefslogtreecommitdiff
path: root/sysdep
diff options
context:
space:
mode:
authorOndrej Zajicek (work) <santiago@crfreenet.org>2016-11-08 17:03:31 +0100
committerOndrej Zajicek (work) <santiago@crfreenet.org>2016-11-08 17:04:29 +0100
commitcc5b93f72db80abd1262a0a5e1d8400ceef54385 (patch)
tree42d75cb7898c6b6077e9cfbb04074cfc84e38930 /sysdep
parent5de0e848de06a9187046dbc380d9ce6a6f8b21a2 (diff)
parentf51b1f556595108d53b9f4580bfcb96bfbc85442 (diff)
Merge tag 'v1.6.2' into int-new
Diffstat (limited to 'sysdep')
-rw-r--r--sysdep/bsd/krt-sock.c7
-rw-r--r--sysdep/config.h2
-rw-r--r--sysdep/linux/krt-sys.h4
-rw-r--r--sysdep/linux/netlink.Y10
-rw-r--r--sysdep/linux/netlink.c432
-rw-r--r--sysdep/unix/io.c29
-rw-r--r--sysdep/unix/krt.Y18
-rw-r--r--sysdep/unix/krt.c2
-rw-r--r--sysdep/unix/log.c9
-rw-r--r--sysdep/unix/main.c60
10 files changed, 459 insertions, 114 deletions
diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c
index 56026bdd..3440ed63 100644
--- a/sysdep/bsd/krt-sock.c
+++ b/sysdep/bsd/krt-sock.c
@@ -946,6 +946,12 @@ krt_sock_hook(sock *sk, int size UNUSED)
return 0;
}
+static void
+krt_sock_err_hook(sock *sk, int e UNUSED)
+{
+ krt_sock_hook(sk, 0);
+}
+
static sock *
krt_sock_open(pool *pool, void *data, int table_id)
{
@@ -967,6 +973,7 @@ krt_sock_open(pool *pool, void *data, int table_id)
sk = sk_new(pool);
sk->type = SK_MAGIC;
sk->rx_hook = krt_sock_hook;
+ sk->err_hook = krt_sock_err_hook;
sk->fd = fd;
sk->data = data;
diff --git a/sysdep/config.h b/sysdep/config.h
index a8d58349..c7f63e69 100644
--- a/sysdep/config.h
+++ b/sysdep/config.h
@@ -7,7 +7,7 @@
#define _BIRD_CONFIG_H_
/* BIRD version */
-#define BIRD_VERSION "1.6.0"
+#define BIRD_VERSION "1.6.2"
/* Include parameters determined by configure script */
#include "sysdep/autoconf.h"
diff --git a/sysdep/linux/krt-sys.h b/sysdep/linux/krt-sys.h
index 7fd5f139..6d6586d1 100644
--- a/sysdep/linux/krt-sys.h
+++ b/sysdep/linux/krt-sys.h
@@ -32,8 +32,11 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i) { return NULL; }
/* Kernel routes */
+#define KRT_ALLOW_MERGE_PATHS 1
+
#define EA_KRT_PREFSRC EA_CODE(EAP_KRT, 0x10)
#define EA_KRT_REALM EA_CODE(EAP_KRT, 0x11)
+#define EA_KRT_SCOPE EA_CODE(EAP_KRT, 0x12)
#define KRT_METRICS_MAX 0x10 /* RTAX_QUICKACK+1 */
@@ -86,6 +89,7 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i) { return NULL; }
struct krt_params {
u32 table_id; /* Kernel table ID we sync with */
+ u32 metric; /* Kernel metric used for all routes */
};
struct krt_state {
diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y
index e9c225a2..f577244d 100644
--- a/sysdep/linux/netlink.Y
+++ b/sysdep/linux/netlink.Y
@@ -10,8 +10,8 @@ CF_HDR
CF_DECLS
-CF_KEYWORDS(KERNEL, TABLE, KRT_PREFSRC, KRT_REALM, KRT_MTU, KRT_WINDOW, KRT_RTT,
- KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING,
+CF_KEYWORDS(KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW,
+ KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING,
KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK,
KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR,
KRT_LOCK_SSTRESH, KRT_LOCK_CWND, KRT_LOCK_ADVMSS, KRT_LOCK_REORDERING,
@@ -22,13 +22,13 @@ CF_GRAMMAR
CF_ADDTO(kern_proto, kern_proto kern_sys_item ';')
kern_sys_item:
- KERNEL TABLE expr {
- THIS_KRT->sys.table_id = $3;
- }
+ KERNEL TABLE expr { THIS_KRT->sys.table_id = $3; }
+ | METRIC expr { THIS_KRT->sys.metric = $2; }
;
CF_ADDTO(dynamic_attr, KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); })
CF_ADDTO(dynamic_attr, KRT_REALM { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REALM); })
+CF_ADDTO(dynamic_attr, KRT_SCOPE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SCOPE); })
CF_ADDTO(dynamic_attr, KRT_MTU { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_MTU); })
CF_ADDTO(dynamic_attr, KRT_WINDOW { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_WINDOW); })
diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
index 8146072b..7af575a7 100644
--- a/sysdep/linux/netlink.c
+++ b/sysdep/linux/netlink.c
@@ -20,7 +20,6 @@
#include "nest/route.h"
#include "nest/protocol.h"
#include "nest/iface.h"
-#include "lib/alloca.h"
#include "sysdep/unix/timer.h"
#include "sysdep/unix/unix.h"
#include "sysdep/unix/krt.h"
@@ -39,6 +38,10 @@
#define MSG_TRUNC 0x20
#endif
+#ifndef IFA_FLAGS
+#define IFA_FLAGS 8
+#endif
+
#ifndef IFF_LOWER_UP
#define IFF_LOWER_UP 0x10000
#endif
@@ -48,6 +51,45 @@
#endif
+#define krt_ecmp6(p) ((p)->af == AF_INET6)
+
+/*
+ * Structure nl_parse_state keeps state of received route processing. Ideally,
+ * we could just independently parse received Netlink messages and immediately
+ * propagate received routes to the rest of BIRD, but Linux kernel represents
+ * and announces IPv6 ECMP routes not as one route with multiple next hops (like
+ * RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix.
+ *
+ * Therefore, BIRD keeps currently processed route in nl_parse_state structure
+ * and postpones its propagation until we expect it to be final; i.e., when
+ * non-matching route is received or when the scan ends. When another matching
+ * route is received, it is merged with the already processed route to form an
+ * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
+ * postponing is done in both cases (for simplicity). All IPv4 routes are just
+ * considered non-matching.
+ *
+ * This is ignored for asynchronous notifications (every notification is handled
+ * as a separate route). It is not an issue for our routes, as we ignore such
+ * notifications anyways. But importing alien IPv6 ECMP routes does not work
+ * properly.
+ */
+
+struct nl_parse_state
+{
+ struct linpool *pool;
+ int scan;
+ int merge;
+
+ net *net;
+ rta *attrs;
+ struct krt_proto *proto;
+ s8 new;
+ s8 krt_src;
+ u8 krt_type;
+ u8 krt_proto;
+ u32 krt_metric;
+};
+
/*
* Synchronous Netlink interface
*/
@@ -63,6 +105,13 @@ struct nl_sock
#define NL_RX_SIZE 8192
+#define NL_OP_DELETE 0
+#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
+#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
+#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
+
+static linpool *nl_linpool;
+
static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
@@ -166,7 +215,7 @@ nl_get_reply(struct nl_sock *nl)
static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
static int
-nl_error(struct nlmsghdr *h)
+nl_error(struct nlmsghdr *h, int ignore_esrch)
{
struct nlmsgerr *e;
int ec;
@@ -178,7 +227,7 @@ nl_error(struct nlmsghdr *h)
}
e = (struct nlmsgerr *) NLMSG_DATA(h);
ec = -e->error;
- if (ec)
+ if (ec && !(ignore_esrch && (ec == ESRCH)))
log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
return ec;
}
@@ -192,14 +241,14 @@ nl_get_scan(void)
return NULL;
if (h->nlmsg_type == NLMSG_ERROR)
{
- nl_error(h);
+ nl_error(h, 0);
return NULL;
}
return h;
}
static int
-nl_exchange(struct nlmsghdr *pkt)
+nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
{
struct nlmsghdr *h;
@@ -211,7 +260,7 @@ nl_exchange(struct nlmsghdr *pkt)
break;
log(L_WARN "nl_exchange: Unexpected reply received");
}
- return nl_error(h) ? -1 : 0;
+ return nl_error(h, ignore_esrch) ? -1 : 0;
}
/*
@@ -248,17 +297,19 @@ static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
};
-#define BIRD_IFA_MAX (IFA_ANYCAST+1)
+#define BIRD_IFA_MAX (IFA_FLAGS+1)
static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
[IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
[IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
[IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
+ [IFA_FLAGS] = { 1, 1, sizeof(u32) },
};
static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
[IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
[IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
+ [IFA_FLAGS] = { 1, 1, sizeof(u32) },
};
@@ -627,6 +678,7 @@ nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
{
struct rtattr *a[BIRD_IFA_MAX];
struct iface *ifi;
+ u32 ifa_flags;
int scope;
if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
@@ -650,10 +702,15 @@ nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
return;
}
+ if (a[IFA_FLAGS])
+ ifa_flags = rta_get_u32(a[IFA_FLAGS]);
+ else
+ ifa_flags = i->ifa_flags;
+
struct ifa ifa;
bzero(&ifa, sizeof(ifa));
ifa.iface = ifi;
- if (i->ifa_flags & IFA_F_SECONDARY)
+ if (ifa_flags & IFA_F_SECONDARY)
ifa.flags |= IA_SECONDARY;
ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
@@ -730,6 +787,7 @@ nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
{
struct rtattr *a[BIRD_IFA_MAX];
struct iface *ifi;
+ u32 ifa_flags;
int scope;
if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
@@ -748,14 +806,22 @@ nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
return;
}
+ if (a[IFA_FLAGS])
+ ifa_flags = rta_get_u32(a[IFA_FLAGS]);
+ else
+ ifa_flags = i->ifa_flags;
+
struct ifa ifa;
bzero(&ifa, sizeof(ifa));
ifa.iface = ifi;
- if (i->ifa_flags & IFA_F_SECONDARY)
+ if (ifa_flags & IFA_F_SECONDARY)
ifa.flags |= IA_SECONDARY;
- /* IFA_LOCAL can be unset for IPv6 interfaces */
+ /* Ignore tentative addresses silently */
+ if (ifa_flags & IFA_F_TENTATIVE)
+ return;
+ /* IFA_LOCAL can be unset for IPv6 interfaces */
ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
@@ -916,12 +982,13 @@ nh_bufsize(struct mpnh *nh)
}
static int
-nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
+nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface)
{
eattr *ea;
net *net = e->net;
rta *a = e->attrs;
int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops);
+ u32 priority = 0;
struct {
struct nlmsghdr h;
@@ -932,13 +999,13 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
int rsize = sizeof(*r) + bufsize;
r = alloca(rsize);
- DBG("nl_send_route(%N,new=%d)\n", net->n.addr, new);
+ DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
bzero(&r->h, sizeof(r->h));
bzero(&r->r, sizeof(r->r));
- r->h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
+ r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
- r->h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0);
+ r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
r->r.rtm_family = p->af;
r->r.rtm_dst_len = net_pxlen(net->n.addr);
@@ -946,18 +1013,37 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
r->r.rtm_scope = RT_SCOPE_UNIVERSE;
nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
+ /*
+ * Strange behavior for RTM_DELROUTE:
+ * 1) rtm_family is ignored in IPv6, works for IPv4
+ * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
+ * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
+ */
+
if (krt_table_id(p) < 256)
r->r.rtm_table = krt_table_id(p);
else
nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
- /* For route delete, we do not specify route attributes */
- if (!new)
- return nl_exchange(&r->h);
+ if (a->source == RTS_DUMMY)
+ priority = e->u.krt.metric;
+ else if (KRT_CF->sys.metric)
+ priority = KRT_CF->sys.metric;
+ else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
+ priority = ea->u.data;
+ if (priority)
+ nl_add_attr_u32(&r->h, sizeof(r), RTA_PRIORITY, priority);
- if (ea = ea_find(eattrs, EA_KRT_METRIC))
- nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, ea->u.data);
+ /* For route delete, we do not specify remaining route attributes */
+ if (op == NL_OP_DELETE)
+ goto dest;
+
+ /* Default scope is LINK for device routes, UNIVERSE otherwise */
+ if (ea = ea_find(eattrs, EA_KRT_SCOPE))
+ r->r.rtm_scope = ea->u.data;
+ else
+ r->r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
@@ -981,18 +1067,18 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
+dest:
/* a->iface != NULL checked in krt_capable() for router and device routes */
-
- switch (a->dest)
+ switch (dest)
{
case RTD_ROUTER:
r->r.rtm_type = RTN_UNICAST;
- nl_add_attr_u32(&r->h, rsize, RTA_OIF, a->iface->index);
- nl_add_attr_ipa(&r->h, rsize, RTA_GATEWAY, a->gw);
+ nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index);
+ nl_add_attr_ipa(&r->h, rsize, RTA_GATEWAY, gw);
break;
case RTD_DEVICE:
r->r.rtm_type = RTN_UNICAST;
- nl_add_attr_u32(&r->h, rsize, RTA_OIF, a->iface->index);
+ nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index);
break;
case RTD_BLACKHOLE:
r->r.rtm_type = RTN_BLACKHOLE;
@@ -1007,11 +1093,50 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
r->r.rtm_type = RTN_UNICAST;
nl_add_multipath(&r->h, rsize, a->nexthops);
break;
+ case RTD_NONE:
+ break;
default:
bug("krt_capable inconsistent with nl_send_route");
}
- return nl_exchange(&r->h);
+ /* Ignore missing for DELETE */
+ return nl_exchange(&r->h, (op == NL_OP_DELETE));
+}
+
+static inline int
+nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
+{
+ rta *a = e->attrs;
+ int err = 0;
+
+ if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH))
+ {
+ struct mpnh *nh = a->nexthops;
+
+ err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface);
+ if (err < 0)
+ return err;
+
+ for (nh = nh->next; nh; nh = nh->next)
+ err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface);
+
+ return err;
+ }
+
+ return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface);
+}
+
+static inline int
+nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
+{
+ int err = 0;
+
+ /* For IPv6, we just repeatedly request DELETE until we get error */
+ do
+ err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL);
+ while (krt_ecmp6(p) && !err);
+
+ return err;
}
void
@@ -1020,17 +1145,21 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list
int err = 0;
/*
- * NULL for eattr of the old route is a little hack, but we don't
- * get proper eattrs for old in rt_notify() anyway. NULL means no
- * extended route attributes and therefore matches if the kernel
- * route has any of them.
+ * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
+ *
+ * 1) Does not check for matching rtm_protocol
+ * 2) Has broken semantics for IPv6 ECMP
+ * 3) Crashes some kernel version when used for IPv6 ECMP
+ *
+ * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
+ * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
*/
if (old)
- nl_send_route(p, old, NULL, 0);
+ nl_delete_rte(p, old, eattrs);
if (new)
- err = nl_send_route(p, new, eattrs, 1);
+ err = nl_add_rte(p, new, eattrs);
if (err < 0)
n->n.flags |= KRF_SYNC_ERROR;
@@ -1039,10 +1168,80 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list
}
+static inline struct mpnh *
+nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
+{
+ struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh));
+
+ nh->gw = gw;
+ nh->iface = iface;
+ nh->next = NULL;
+ nh->weight = weight;
+
+ return nh;
+}
+
+static int
+nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
+{
+ /* Route merging must be active */
+ if (!s->merge)
+ return 0;
+
+ /* Saved and new route must have same network, proto/table, and priority */
+ if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
+ return 0;
+
+ /* Both must be regular unicast routes */
+ if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
+ return 0;
+
+ return 1;
+}
+
+static void
+nl_announce_route(struct nl_parse_state *s)
+{
+ rte *e = rte_get_temp(s->attrs);
+ e->net = s->net;
+ e->u.krt.src = s->krt_src;
+ e->u.krt.proto = s->krt_proto;
+ e->u.krt.seen = 0;
+ e->u.krt.best = 0;
+ e->u.krt.metric = s->krt_metric;
+
+ if (s->scan)
+ krt_got_route(s->proto, e);
+ else
+ krt_got_route_async(s->proto, e, s->new);
+
+ s->net = NULL;
+ s->attrs = NULL;
+ s->proto = NULL;
+ lp_flush(s->pool);
+}
+
+static inline void
+nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
+{
+ memset(s, 0, sizeof (struct nl_parse_state));
+ s->pool = nl_linpool;
+ s->scan = scan;
+ s->merge = merge;
+}
+
+static inline void
+nl_parse_end(struct nl_parse_state *s)
+{
+ if (s->net)
+ nl_announce_route(s);
+}
+
+
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
static void
-nl_parse_route(struct nlmsghdr *h, int scan)
+nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
{
struct krt_proto *p;
struct rtmsg *i;
@@ -1052,6 +1251,8 @@ nl_parse_route(struct nlmsghdr *h, int scan)
net_addr dst;
u32 oif = ~0;
u32 table_id;
+ u32 priority = 0;
+ u32 def_scope = RT_SCOPE_UNIVERSE;
int src;
if (!(i = nl_checkin(h, sizeof(*i))))
@@ -1069,9 +1270,9 @@ nl_parse_route(struct nlmsghdr *h, int scan)
net_fill_ip4(&dst, IP4_NONE, 0);
break;
- case AF_INET6:
- if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
- return;
+ case AF_INET6:
+ if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
+ return;
if (a[RTA_DST])
net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
@@ -1096,24 +1297,22 @@ nl_parse_route(struct nlmsghdr *h, int scan)
if (!p)
SKIP("unknown table %d\n", table);
-
if (a[RTA_IIF])
SKIP("IIF set\n");
if (i->rtm_tos != 0) /* We don't support TOS */
SKIP("TOS %02x\n", i->rtm_tos);
- if (scan && !new)
+ if (s->scan && !new)
SKIP("RTM_DELROUTE in scan\n");
+ if (a[RTA_PRIORITY])
+ priority = rta_get_u32(a[RTA_PRIORITY]);
+
int c = net_classify(&dst);
if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
SKIP("strange class/scope\n");
- // ignore rtm_scope, it is not a real scope
- // if (i->rtm_scope != RT_SCOPE_UNIVERSE)
- // SKIP("scope %u\n", i->rtm_scope);
-
switch (i->rtm_protocol)
{
case RTPROT_UNSPEC:
@@ -1128,7 +1327,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
return;
case RTPROT_BIRD:
- if (!scan)
+ if (!s->scan)
SKIP("echo\n");
src = KRT_SRC_BIRD;
break;
@@ -1140,12 +1339,14 @@ nl_parse_route(struct nlmsghdr *h, int scan)
net *net = net_get(p->p.main_channel->table, &dst);
- rta ra = {
- .src= p->p.main_source,
- .source = RTS_INHERIT,
- .scope = SCOPE_UNIVERSE,
- .cast = RTC_UNICAST
- };
+ if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
+ nl_announce_route(s);
+
+ rta *ra = lp_allocz(s->pool, sizeof(rta));
+ ra->src = p->p.main_source;
+ ra->source = RTS_INHERIT;
+ ra->scope = SCOPE_UNIVERSE;
+ ra->cast = RTC_UNICAST;
switch (i->rtm_type)
{
@@ -1153,9 +1354,9 @@ nl_parse_route(struct nlmsghdr *h, int scan)
if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
{
- ra.dest = RTD_MULTIPATH;
- ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
- if (!ra.nexthops)
+ ra->dest = RTD_MULTIPATH;
+ ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
+ if (!ra->nexthops)
{
log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
return;
@@ -1164,8 +1365,8 @@ nl_parse_route(struct nlmsghdr *h, int scan)
break;
}
- ra.iface = if_find_by_index(oif);
- if (!ra.iface)
+ ra->iface = if_find_by_index(oif);
+ if (!ra->iface)
{
log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
return;
@@ -1173,37 +1374,38 @@ nl_parse_route(struct nlmsghdr *h, int scan)
if (a[RTA_GATEWAY])
{
- ra.dest = RTD_ROUTER;
- ra.gw = rta_get_ipa(a[RTA_GATEWAY]);
+ ra->dest = RTD_ROUTER;
+ ra->gw = rta_get_ipa(a[RTA_GATEWAY]);
/* Silently skip strange 6to4 routes */
const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
- if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra.gw, (net_addr *) &sit))
+ if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->gw, (net_addr *) &sit))
return;
neighbor *nbr;
- nbr = neigh_find2(&p->p, &ra.gw, ra.iface,
+ nbr = neigh_find2(&p->p, &ra->gw, ra->iface,
(i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
if (!nbr || (nbr->scope == SCOPE_HOST))
{
- log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, ra.gw);
+ log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, ra->gw);
return;
}
}
else
{
- ra.dest = RTD_DEVICE;
+ ra->dest = RTD_DEVICE;
+ def_scope = RT_SCOPE_LINK;
}
break;
case RTN_BLACKHOLE:
- ra.dest = RTD_BLACKHOLE;
+ ra->dest = RTD_BLACKHOLE;
break;
case RTN_UNREACHABLE:
- ra.dest = RTD_UNREACHABLE;
+ ra->dest = RTD_UNREACHABLE;
break;
case RTN_PROHIBIT:
- ra.dest = RTD_PROHIBIT;
+ ra->dest = RTD_PROHIBIT;
break;
/* FIXME: What about RTN_THROW? */
default:
@@ -1211,39 +1413,41 @@ nl_parse_route(struct nlmsghdr *h, int scan)
return;
}
- rte *e = rte_get_temp(&ra);
- e->net = net;
- e->u.krt.src = src;
- e->u.krt.proto = i->rtm_protocol;
- e->u.krt.seen = 0;
- e->u.krt.best = 0;
- e->u.krt.metric = 0;
-
- if (a[RTA_PRIORITY])
- e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]);
+ if (i->rtm_scope != def_scope)
+ {
+ ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
+ ea->next = ra->eattrs;
+ ra->eattrs = ea;
+ ea->flags = EALF_SORTED;
+ ea->count = 1;
+ ea->attrs[0].id = EA_KRT_SCOPE;
+ ea->attrs[0].flags = 0;
+ ea->attrs[0].type = EAF_TYPE_INT;
+ ea->attrs[0].u.data = i->rtm_scope;
+ }
if (a[RTA_PREFSRC])
{
ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
- ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
- ea->next = ra.eattrs;
- ra.eattrs = ea;
+ ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
+ ea->next = ra->eattrs;
+ ra->eattrs = ea;
ea->flags = EALF_SORTED;
ea->count = 1;
ea->attrs[0].id = EA_KRT_PREFSRC;
ea->attrs[0].flags = 0;
ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
- ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(ps));
+ ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
ea->attrs[0].u.ptr->length = sizeof(ps);
memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
}
if (a[RTA_FLOW])
{
- ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
- ea->next = ra.eattrs;
- ra.eattrs = ea;
+ ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
+ ea->next = ra->eattrs;
+ ra->eattrs = ea;
ea->flags = EALF_SORTED;
ea->count = 1;
ea->attrs[0].id = EA_KRT_REALM;
@@ -1255,7 +1459,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
if (a[RTA_METRICS])
{
u32 metrics[KRT_METRICS_MAX];
- ea_list *ea = alloca(sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
+ ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
int t, n = 0;
if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
@@ -1276,37 +1480,69 @@ nl_parse_route(struct nlmsghdr *h, int scan)
if (n > 0)
{
- ea->next = ra.eattrs;
+ ea->next = ra->eattrs;
ea->flags = EALF_SORTED;
ea->count = n;
- ra.eattrs = ea;
+ ra->eattrs = ea;
}
}
- if (scan)
- krt_got_route(p, e);
+ /*
+ * Ideally, now we would send the received route to the rest of kernel code.
+ * But IPv6 ECMP routes are sent as a sequence of routes, so we postpone it
+ * and merge next hops until the end of the sequence.
+ */
+
+ if (!s->net)
+ {
+ /* Store the new route */
+ s->net = net;
+ s->attrs = ra;
+ s->proto = p;
+ s->new = new;
+ s->krt_src = src;
+ s->krt_type = i->rtm_type;
+ s->krt_proto = i->rtm_protocol;
+ s->krt_metric = priority;
+ }
else
- krt_got_route_async(p, e, new);
+ {
+ /* Merge next hops with the stored route */
+ rta *a = s->attrs;
+
+ if (a->dest != RTD_MULTIPATH)
+ {
+ a->dest = RTD_MULTIPATH;
+ a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0);
+ }
+
+ mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0));
+ }
}
void
krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
{
struct nlmsghdr *h;
+ struct nl_parse_state s;
+ nl_parse_begin(&s, 1, 0);
nl_request_dump(AF_INET, RTM_GETROUTE);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
- nl_parse_route(h, 1);
+ nl_parse_route(&s, h);
else
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
+ nl_parse_end(&s);
+ nl_parse_begin(&s, 1, 1);
nl_request_dump(AF_INET6, RTM_GETROUTE);
while (h = nl_get_scan())
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
- nl_parse_route(h, 1);
+ nl_parse_route(&s, h);
else
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
+ nl_parse_end(&s);
}
/*
@@ -1319,12 +1555,16 @@ static byte *nl_async_rx_buffer; /* Receive buffer */
static void
nl_async_msg(struct nlmsghdr *h)
{
+ struct nl_parse_state s;
+
switch (h->nlmsg_type)
{
case RTM_NEWROUTE:
case RTM_DELROUTE:
DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
- nl_parse_route(h, 0);
+ nl_parse_begin(&s, 0, 0);
+ nl_parse_route(&s, h);
+ nl_parse_end(&s);
break;
case RTM_NEWLINK:
case RTM_DELLINK:
@@ -1397,6 +1637,12 @@ nl_async_hook(sock *sk, int size UNUSED)
}
static void
+nl_async_err_hook(sock *sk, int e UNUSED)
+{
+ nl_async_hook(sk, 0);
+}
+
+static void
nl_open_async(void)
{
sock *sk;
@@ -1433,6 +1679,7 @@ nl_open_async(void)
sk = nl_async_sk = sk_new(krt_pool);
sk->type = SK_MAGIC;
sk->rx_hook = nl_async_hook;
+ sk->err_hook = nl_async_err_hook;
sk->fd = fd;
if (sk_open(sk) < 0)
bug("Netlink: sk_open failed");
@@ -1446,6 +1693,7 @@ nl_open_async(void)
void
krt_sys_io_init(void)
{
+ nl_linpool = lp_new(krt_pool, 4080);
HASH_INIT(nl_table_map, krt_pool, 6);
}
@@ -1478,19 +1726,21 @@ krt_sys_shutdown(struct krt_proto *p)
int
krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
{
- return n->sys.table_id == o->sys.table_id;
+ return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
}
void
krt_sys_init_config(struct krt_config *cf)
{
cf->sys.table_id = RT_TABLE_MAIN;
+ cf->sys.metric = 0;
}
void
krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
{
d->sys.table_id = s->sys.table_id;
+ d->sys.metric = s->sys.metric;
}
static const char *krt_metrics_names[KRT_METRICS_MAX] = {
@@ -1515,6 +1765,10 @@ krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
bsprintf(buf, "realm");
return GA_NAME;
+ case EA_KRT_SCOPE:
+ bsprintf(buf, "scope");
+ return GA_NAME;
+
case EA_KRT_LOCK:
buf += bsprintf(buf, "lock:");
ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 5ec728af..e90964c1 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -1893,6 +1893,20 @@ int sk_is_ipv6(sock *s)
{ return s->af == AF_INET6; }
void
+sk_err(sock *s, int revents)
+{
+ int se = 0, sse = sizeof(se);
+ if ((s->type != SK_MAGIC) && (revents & POLLERR))
+ if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
+ {
+ log(L_ERR "IO: Socket error: SO_ERROR: %m");
+ se = 0;
+ }
+
+ s->err_hook(s, se);
+}
+
+void
sk_dump_all(void)
{
node *n;
@@ -2202,7 +2216,7 @@ io_loop(void)
int steps;
steps = MAX_STEPS;
- if (s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
+ if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
do
{
steps--;
@@ -2224,6 +2238,7 @@ io_loop(void)
goto next;
}
while (e && steps);
+
current_sock = sk_next(s);
next: ;
}
@@ -2247,18 +2262,26 @@ io_loop(void)
goto next2;
}
- if (!s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
+ if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
{
count++;
io_log_event(s->rx_hook, s->data);
sk_read(s, pfd[s->index].revents);
if (s != current_sock)
- goto next2;
+ goto next2;
}
+
+ if (pfd[s->index].revents & (POLLHUP | POLLERR))
+ {
+ sk_err(s, pfd[s->index].revents);
+ goto next2;
+ }
+
current_sock = sk_next(s);
next2: ;
}
+
stored_sock = current_sock;
}
}
diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y
index 91317d97..33dc4a19 100644
--- a/sysdep/unix/krt.Y
+++ b/sysdep/unix/krt.Y
@@ -29,6 +29,8 @@ CF_DECLS
CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS)
+%type <i> kern_mp_limit
+
CF_GRAMMAR
/* Kernel syncer protocol */
@@ -43,6 +45,11 @@ kern_proto_start: proto_start KERNEL {
CF_ADDTO(kern_proto, kern_proto_start proto_name '{')
CF_ADDTO(kern_proto, kern_proto kern_item ';')
+kern_mp_limit:
+ /* empty */ { $$ = KRT_DEFAULT_ECMP_LIMIT; }
+ | LIMIT expr { $$ = $2; if (($2 <= 0) || ($2 > 255)) cf_error("Merge paths limit must be in range 1-255"); }
+ ;
+
kern_item:
proto_item
| proto_channel { this_proto->net_type = $1->net_type; }
@@ -55,13 +62,18 @@ kern_item:
THIS_KRT->learn = $2;
#ifndef KRT_ALLOW_LEARN
if ($2)
- cf_error("Learning of kernel routes not supported in this configuration");
+ cf_error("Learning of kernel routes not supported on this platform");
#endif
}
| DEVICE ROUTES bool { THIS_KRT->devroutes = $3; }
| GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; }
- | MERGE PATHS bool { krt_set_merge_paths(this_channel, $3, KRT_DEFAULT_ECMP_LIMIT); }
- | MERGE PATHS bool LIMIT expr { krt_set_merge_paths(this_channel, $3, $5); }
+ | MERGE PATHS bool kern_mp_limit {
+ krt_set_merge_paths(this_channel, $3, $4);
+#ifndef KRT_ALLOW_MERGE_PATHS
+ if ($3)
+ cf_error("Path merging not supported on this platform");
+#endif
+ }
;
/* Kernel interface protocol */
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 6531bb28..d4cb964e 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -604,7 +604,7 @@ krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa)
rte *rt;
if (c->ra_mode == RA_MERGED)
- return rt_export_merged(c, net, rt_free, tmpa, 1);
+ return rt_export_merged(c, net, rt_free, tmpa, krt_filter_lp, 1);
rt = net->routes;
*rt_free = NULL;
diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c
index 9c56eb24..e5c5e74e 100644
--- a/sysdep/unix/log.c
+++ b/sysdep/unix/log.c
@@ -20,6 +20,7 @@
#include <stdarg.h>
#include <time.h>
#include <unistd.h>
+#include <errno.h>
#include "nest/bird.h"
#include "nest/cli.h"
@@ -209,6 +210,7 @@ bug(const char *msg, ...)
va_start(args, msg);
vlog(L_BUG[0], msg, args);
+ va_end(args);
abort();
}
@@ -226,6 +228,7 @@ die(const char *msg, ...)
va_start(args, msg);
vlog(L_FATAL[0], msg, args);
+ va_end(args);
exit(1);
}
@@ -312,7 +315,11 @@ log_init_debug(char *f)
else if (!*f)
dbgf = stderr;
else if (!(dbgf = fopen(f, "a")))
- log(L_ERR "Error opening debug file `%s': %m", f);
+ {
+ /* Cannot use die() nor log() here, logging is not yet initialized */
+ fprintf(stderr, "bird: Unable to open debug file %s: %s\n", f, strerror(errno));
+ exit(1);
+ }
if (dbgf)
setvbuf(dbgf, NULL, _IONBF, 0);
}
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index 1f47680e..9594269d 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -621,7 +621,7 @@ signal_init(void)
* Parsing of command-line arguments
*/
-static char *opt_list = "c:dD:ps:P:u:g:flR";
+static char *opt_list = "c:dD:ps:P:u:g:flRh";
static int parse_and_exit;
char *bird_name;
static char *use_user;
@@ -629,10 +629,43 @@ static char *use_group;
static int run_in_foreground = 0;
static void
-usage(void)
+display_usage(void)
{
- fprintf(stderr, "Usage: %s [-c <config-file>] [-d] [-D <debug-file>] [-p] [-s <control-socket>] [-P <pid-file>] [-u <user>] [-g <group>] [-f] [-l] [-R]\n", bird_name);
- exit(1);
+ fprintf(stderr, "Usage: %s [--version] [--help] [-c <config-file>] [OPTIONS]\n", bird_name);
+}
+
+static void
+display_help(void)
+{
+ display_usage();
+
+ fprintf(stderr,
+ "\n"
+ "Options: \n"
+ " -c <config-file> Use given configuration file instead\n"
+ " of prefix/etc/bird.conf\n"
+ " -d Enable debug messages and run bird in foreground\n"
+ " -D <debug-file> Log debug messages to given file instead of stderr\n"
+ " -f Run bird in foreground\n"
+ " -g <group> Use given group ID\n"
+ " -h, --help Display this information\n"
+ " -l Look for a configuration file and a communication socket\n"
+ " file in the current working directory\n"
+ " -p Test configuration file and exit without start\n"
+ " -P <pid-file> Create a PID file with given filename\n"
+ " -R Apply graceful restart recovery after start\n"
+ " -s <control-socket> Use given filename for a control socket\n"
+ " -u <user> Drop privileges and use given user ID\n"
+ " --version Display version of BIRD\n");
+
+ exit(0);
+}
+
+static void
+display_version(void)
+{
+ fprintf(stderr, "BIRD version " BIRD_VERSION "\n");
+ exit(0);
}
static inline char *
@@ -706,12 +739,9 @@ parse_args(int argc, char **argv)
if (argc == 2)
{
if (!strcmp(argv[1], "--version"))
- {
- fprintf(stderr, "BIRD version " BIRD_VERSION "\n");
- exit(0);
- }
+ display_version();
if (!strcmp(argv[1], "--help"))
- usage();
+ display_help();
}
while ((c = getopt(argc, argv, opt_list)) >= 0)
switch (c)
@@ -755,11 +785,19 @@ parse_args(int argc, char **argv)
case 'R':
graceful_restart_recovery();
break;
+ case 'h':
+ display_help();
+ break;
default:
- usage();
+ fputc('\n', stderr);
+ display_usage();
+ exit(1);
}
if (optind < argc)
- usage();
+ {
+ display_usage();
+ exit(1);
+ }
}
/*