summaryrefslogtreecommitdiff
path: root/sysdep/unix
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2014-02-06 17:46:01 +0100
committerOndrej Zajicek <santiago@crfreenet.org>2014-02-06 17:46:01 +0100
commit48e5f32db676645640f84ab3d630cce975aa6b20 (patch)
treeb940fc8156b3e0c18aab6c339a066bdb7a9ec1e0 /sysdep/unix
parentf48fa14214301382b2e6b134788a7506b61b664f (diff)
Many changes in I/O and OSPF sockets and packet handling.
I/O: - BSD: specify src addr on IP sockets by IP_HDRINCL - BSD: specify src addr on UDP sockets by IP_SENDSRCADDR - Linux: specify src addr on IP/UDP sockets by IP_PKTINFO - IPv6: specify src addr on IP/UDP sockets by IPV6_PKTINFO - Alternative SKF_BIND flag for binding to IP address - Allows IP/UDP sockets without tx_hook, on these sockets a packet is discarded when TX queue is full - Use consistently SOL_ for socket layer values. OSPF: - Packet src addr is always explicitly set - Support for secondary addresses in BSD - Dynamic RX/TX buffers - Fixes some minor buffer overruns - Interface option 'tx length' - Names for vlink pseudoifaces (vlinkX) - Vlinks use separate socket for TX - Vlinks do not use fixed associated iface - Fixes TTL for direct unicast packets - Fixes DONTROUTE for OSPF sockets - Use ifa->ifname instead of ifa->iface->name
Diffstat (limited to 'sysdep/unix')
-rw-r--r--sysdep/unix/io.c294
1 files changed, 206 insertions, 88 deletions
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 6e3f1e4d..428f24cc 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -23,6 +23,8 @@
#include <fcntl.h>
#include <errno.h>
#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
#include <netinet/icmp6.h>
#include "nest/bird.h"
@@ -489,6 +491,11 @@ tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
#define SOL_IPV6 IPPROTO_IPV6
#endif
+#ifndef SOL_ICMPV6
+#define SOL_ICMPV6 IPPROTO_ICMPV6
+#endif
+
+
static list sock_list;
static struct birdsock *current_sock;
static struct birdsock *stored_sock;
@@ -553,6 +560,43 @@ sk_free(resource *r)
}
void
+sk_set_rbsize(sock *s, uint val)
+{
+ ASSERT(s->rbuf_alloc == s->rbuf);
+
+ if (s->rbsize == val)
+ return;
+
+ s->rbsize = val;
+ xfree(s->rbuf_alloc);
+ s->rbuf_alloc = xmalloc(val);
+ s->rpos = s->rbuf = s->rbuf_alloc;
+}
+
+void
+sk_set_tbsize(sock *s, uint val)
+{
+ ASSERT(s->tbuf_alloc == s->tbuf);
+
+ if (s->tbsize == val)
+ return;
+
+ byte *old_tbuf = s->tbuf;
+
+ s->tbsize = val;
+ s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
+ s->tpos = s->tbuf + (s->tpos - old_tbuf);
+ s->ttx = s->tbuf + (s->ttx - old_tbuf);
+}
+
+void
+sk_set_tbuf(sock *s, void *tbuf)
+{
+ s->tbuf = tbuf ?: s->tbuf_alloc;
+ s->ttx = s->tpos = s->tbuf;
+}
+
+void
sk_reallocate(sock *s)
{
sk_free_bufs(s);
@@ -703,11 +747,11 @@ sysio_register_cmsgs(sock *s)
int ok = 1;
if ((s->flags & SKF_LADDR_RX) &&
- (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0))
+ (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0))
return "IPV6_RECVPKTINFO";
if ((s->flags & SKF_TTL_RX) &&
- (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0))
+ (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0))
return "IPV6_RECVHOPLIMIT";
return NULL;
@@ -722,10 +766,10 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
{
- if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
+ if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
pi = (struct in6_pktinfo *) CMSG_DATA(cm);
- if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT)
+ if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT)
hlim = (int *) CMSG_DATA(cm);
}
@@ -749,32 +793,27 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
return;
}
-/*
static void
sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
{
struct cmsghdr *cm;
struct in6_pktinfo *pi;
- if (!(s->flags & SKF_LADDR_TX))
- return;
-
msg->msg_control = cbuf;
msg->msg_controllen = cbuflen;
cm = CMSG_FIRSTHDR(msg);
- cm->cmsg_level = IPPROTO_IPV6;
+ cm->cmsg_level = SOL_IPV6;
cm->cmsg_type = IPV6_PKTINFO;
cm->cmsg_len = CMSG_LEN(sizeof(*pi));
pi = (struct in6_pktinfo *) CMSG_DATA(cm);
- set_inaddr(&pi->ipi6_addr, s->saddr);
pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
+ set_inaddr(&pi->ipi6_addr, s->saddr);
msg->msg_controllen = cm->cmsg_len;
- return;
}
-*/
+
#endif
static char *
@@ -786,11 +825,6 @@ sk_set_ttl_int(sock *s)
#else
if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
return "IP_TTL";
-#ifdef CONFIG_UNIX_DONTROUTE
- int one = 1;
- if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
- return "SO_DONTROUTE";
-#endif
#endif
return NULL;
}
@@ -801,6 +835,7 @@ sk_set_ttl_int(sock *s)
static char *
sk_setup(sock *s)
{
+ int one = 1;
int fd = s->fd;
char *err = NULL;
@@ -809,6 +844,41 @@ sk_setup(sock *s)
if (s->type == SK_UNIX)
return NULL;
+ if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
+ s->flags |= SKF_PKTINFO;
+
+#ifdef CONFIG_USE_HDRINCL
+ if ((s->type == SK_IP) && (s->flags & SKF_PKTINFO))
+ {
+ s->flags &= ~SKF_PKTINFO;
+ s->flags |= SKF_HDRINCL;
+ if (setsockopt(fd, SOL_IP, IP_HDRINCL, &one, sizeof(one)) < 0)
+ ERR("IP_HDRINCL");
+ }
+#endif
+
+ if (s->iface)
+ {
+#ifdef SO_BINDTODEVICE
+ struct ifreq ifr;
+ strcpy(ifr.ifr_name, s->iface->name);
+ if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
+ ERR("SO_BINDTODEVICE");
+#endif
+
+#ifdef CONFIG_UNIX_DONTROUTE
+ if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
+ ERR("SO_DONTROUTE");
+#endif
+ }
+
+ if ((s->ttl >= 0) && (err = sk_set_ttl_int(s)))
+ goto bad;
+
+ if (err = sysio_register_cmsgs(s))
+ goto bad;
+
+
#ifdef IPV6
if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0)
WARN("IPV6_TCLASS");
@@ -821,15 +891,10 @@ sk_setup(sock *s)
sk_set_priority(s, s->priority);
#ifdef IPV6
- int v = 1;
- if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
+ if ((s->flags & SKF_V6ONLY) && setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0)
WARN("IPV6_V6ONLY");
#endif
- if ((s->ttl >= 0) && (err = sk_set_ttl_int(s)))
- goto bad;
-
- err = sysio_register_cmsgs(s);
bad:
return err;
}
@@ -926,7 +991,7 @@ sk_set_broadcast(sock *s, int enable)
int
sk_set_ipv6_checksum(sock *s, int offset)
{
- if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
+ if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
{
log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
return -1;
@@ -945,7 +1010,7 @@ sk_set_icmp_filter(sock *s, int p1, int p2)
ICMP6_FILTER_SETPASS(p1, &f);
ICMP6_FILTER_SETPASS(p2, &f);
- if (setsockopt(s->fd, IPPROTO_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
+ if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
{
log(L_ERR "sk_setup_icmp_filter: ICMP6_FILTER: %m");
return -1;
@@ -961,7 +1026,7 @@ sk_setup_multicast(sock *s)
int zero = 0;
int index;
- ASSERT(s->iface && s->iface->addr);
+ ASSERT(s->iface);
index = s->iface->index;
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
@@ -971,9 +1036,6 @@ sk_setup_multicast(sock *s)
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
ERR("IPV6_MULTICAST_IF");
- if (err = sysio_bind_to_iface(s))
- goto bad;
-
return 0;
bad:
@@ -981,18 +1043,17 @@ bad:
return -1;
}
+#ifdef CONFIG_IPV6_GLIBC_20
+#define ipv6mr_interface ipv6mr_ifindex
+#endif
+
int
sk_join_group(sock *s, ip_addr maddr)
{
struct ipv6_mreq mreq;
set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
-
-#ifdef CONFIG_IPV6_GLIBC_20
- mreq.ipv6mr_ifindex = s->iface->index;
-#else
mreq.ipv6mr_interface = s->iface->index;
-#endif
if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0)
{
@@ -1009,12 +1070,7 @@ sk_leave_group(sock *s, ip_addr maddr)
struct ipv6_mreq mreq;
set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
-
-#ifdef CONFIG_IPV6_GLIBC_20
- mreq.ipv6mr_ifindex = s->iface->index;
-#else
mreq.ipv6mr_interface = s->iface->index;
-#endif
if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0)
{
@@ -1032,7 +1088,7 @@ sk_setup_multicast(sock *s)
{
char *err;
- ASSERT(s->iface && s->iface->addr);
+ ASSERT(s->iface);
if (err = sysio_setup_multicast(s))
{
@@ -1142,31 +1198,45 @@ int
sk_open(sock *s)
{
int fd;
- sockaddr sa;
int one = 1;
- int type = s->type;
- int has_src = ipa_nonzero(s->saddr) || s->sport;
+ int do_bind = 0;
+ int bind_port = 0;
+ ip_addr bind_addr = IPA_NONE;
+ sockaddr sa;
char *err;
- switch (type)
+ switch (s->type)
{
case SK_TCP_ACTIVE:
s->ttx = ""; /* Force s->ttx != s->tpos */
/* Fall thru */
case SK_TCP_PASSIVE:
fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
+ bind_port = s->sport;
+ bind_addr = s->saddr;
+ do_bind = bind_port || ipa_nonzero(bind_addr);
break;
+
case SK_UDP:
fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
+ bind_port = s->sport;
+ bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
+ do_bind = 1;
break;
+
case SK_IP:
fd = socket(BIRD_PF, SOCK_RAW, s->dport);
+ bind_port = 0;
+ bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
+ do_bind = ipa_nonzero(bind_addr);
break;
+
case SK_MAGIC:
fd = s->fd;
break;
+
default:
- bug("sk_open() called for invalid sock type %d", type);
+ bug("sk_open() called for invalid sock type %d", s->type);
}
if (fd < 0)
die("sk_open: socket: %m");
@@ -1175,31 +1245,28 @@ sk_open(sock *s)
if (err = sk_setup(s))
goto bad;
- if (has_src)
+ if (do_bind)
{
- int port;
-
- if (type == SK_IP)
- port = 0;
- else
+ if (bind_port)
{
- port = s->sport;
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
ERR("SO_REUSEADDR");
-
+
#ifdef CONFIG_NO_IFACE_BIND
/* Workaround missing ability to bind to an iface */
- if ((type == SK_UDP) && s->iface && ipa_zero(s->saddr))
+ if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
{
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
ERR("SO_REUSEPORT");
}
#endif
}
- fill_in_sockaddr(&sa, s->saddr, s->iface, port);
+
+ fill_in_sockaddr(&sa, bind_addr, s->iface, bind_port);
if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
ERR("bind");
}
+
fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
if (s->password)
@@ -1209,7 +1276,7 @@ sk_open(sock *s)
goto bad_no_log;
}
- switch (type)
+ switch (s->type)
{
case SK_TCP_ACTIVE:
if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
@@ -1287,6 +1354,79 @@ sk_open_unix(sock *s, char *name)
die("Unable to create control socket %s", name);
}
+
+static inline int
+sk_sendmsg(sock *s)
+{
+ struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
+ byte cmsg_buf[CMSG_TX_SPACE];
+ sockaddr dst;
+
+ fill_in_sockaddr(&dst, s->daddr, s->iface, s->dport);
+
+ struct msghdr msg = {
+ .msg_name = &dst,
+ .msg_namelen = sizeof(dst),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+
+#ifdef CONFIG_USE_HDRINCL
+ byte hdr[20];
+ struct iovec iov2[2] = { {hdr, 20}, iov };
+
+ if (s->flags & SKF_HDRINCL)
+ {
+ fill_ip_header(s, hdr, iov.iov_len);
+ msg.msg_iov = iov2;
+ msg.msg_iovlen = 2;
+ }
+#endif
+
+ if (s->flags & SKF_PKTINFO)
+ sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
+
+ return sendmsg(s->fd, &msg, 0);
+}
+
+static inline int
+sk_recvmsg(sock *s)
+{
+ struct iovec iov = {s->rbuf, s->rbsize};
+ byte cmsg_buf[CMSG_RX_SPACE];
+ sockaddr src;
+
+ struct msghdr msg = {
+ .msg_name = &src,
+ .msg_namelen = sizeof(src),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = cmsg_buf,
+ .msg_controllen = sizeof(cmsg_buf),
+ .msg_flags = 0
+ };
+
+ int rv = recvmsg(s->fd, &msg, 0);
+ if (rv < 0)
+ return rv;
+
+ //ifdef IPV4
+ // if (cf_type == SK_IP)
+ // rv = ipv4_skip_header(pbuf, rv);
+ //endif
+
+ get_sockaddr(&src, &s->faddr, NULL, &s->fport, 1);
+ sysio_process_rx_cmsgs(s, &msg);
+
+ if (msg.msg_flags & MSG_TRUNC)
+ s->flags |= SKF_TRUNCATED;
+ else
+ s->flags &= ~SKF_TRUNCATED;
+
+ return rv;
+}
+
+
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
static int
@@ -1323,20 +1463,7 @@ sk_maybe_write(sock *s)
if (s->tbuf == s->tpos)
return 1;
- sockaddr sa;
- fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
-
- struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
- // byte cmsg_buf[CMSG_TX_SPACE];
-
- struct msghdr msg = {
- .msg_name = &sa,
- .msg_namelen = sizeof(sa),
- .msg_iov = &iov,
- .msg_iovlen = 1};
-
- // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
- e = sendmsg(s->fd, &msg, 0);
+ e = sk_sendmsg(s);
if (e < 0)
{
@@ -1346,6 +1473,9 @@ sk_maybe_write(sock *s)
s->err_hook(s, errno);
return -1;
}
+
+ if (!s->tx_hook)
+ reset_tx_buffer(s);
return 0;
}
reset_tx_buffer(s);
@@ -1408,12 +1538,15 @@ sk_send(sock *s, unsigned len)
*
* This is a sk_send() replacement for connection-less packet sockets
* which allows destination of the packet to be chosen dynamically.
+ * Raw IP sockets should use 0 for @port.
*/
int
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
{
s->daddr = addr;
- s->dport = port;
+ if (port)
+ s->dport = port;
+
s->ttx = s->tbuf;
s->tpos = s->tbuf + len;
return sk_maybe_write(s);
@@ -1480,22 +1613,9 @@ sk_read(sock *s)
return s->rx_hook(s, 0);
default:
{
- sockaddr sa;
int e;
- struct iovec iov = {s->rbuf, s->rbsize};
- byte cmsg_buf[CMSG_RX_SPACE];
-
- struct msghdr msg = {
- .msg_name = &sa,
- .msg_namelen = sizeof(sa),
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = cmsg_buf,
- .msg_controllen = sizeof(cmsg_buf),
- .msg_flags = 0};
-
- e = recvmsg(s->fd, &msg, 0);
+ e = sk_recvmsg(s);
if (e < 0)
{
@@ -1503,10 +1623,8 @@ sk_read(sock *s)
s->err_hook(s, errno);
return 0;
}
- s->rpos = s->rbuf + e;
- get_sockaddr(&sa, &s->faddr, NULL, &s->fport, 1);
- sysio_process_rx_cmsgs(s, &msg);
+ s->rpos = s->rbuf + e;
s->rx_hook(s, e);
return 1;
}