diff options
author | Ondrej Zajicek <santiago@crfreenet.org> | 2014-02-06 17:46:01 +0100 |
---|---|---|
committer | Ondrej Zajicek <santiago@crfreenet.org> | 2014-02-06 17:46:01 +0100 |
commit | 48e5f32db676645640f84ab3d630cce975aa6b20 (patch) | |
tree | b940fc8156b3e0c18aab6c339a066bdb7a9ec1e0 /sysdep/unix | |
parent | f48fa14214301382b2e6b134788a7506b61b664f (diff) |
Many changes in I/O and OSPF sockets and packet handling.
I/O:
- BSD: specify src addr on IP sockets by IP_HDRINCL
- BSD: specify src addr on UDP sockets by IP_SENDSRCADDR
- Linux: specify src addr on IP/UDP sockets by IP_PKTINFO
- IPv6: specify src addr on IP/UDP sockets by IPV6_PKTINFO
- Alternative SKF_BIND flag for binding to IP address
- Allows IP/UDP sockets without tx_hook, on these
sockets a packet is discarded when TX queue is full
- Use consistently SOL_ for socket layer values.
OSPF:
- Packet src addr is always explicitly set
- Support for secondary addresses in BSD
- Dynamic RX/TX buffers
- Fixes some minor buffer overruns
- Interface option 'tx length'
- Names for vlink pseudoifaces (vlinkX)
- Vlinks use separate socket for TX
- Vlinks do not use fixed associated iface
- Fixes TTL for direct unicast packets
- Fixes DONTROUTE for OSPF sockets
- Use ifa->ifname instead of ifa->iface->name
Diffstat (limited to 'sysdep/unix')
-rw-r--r-- | sysdep/unix/io.c | 294 |
1 files changed, 206 insertions, 88 deletions
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 6e3f1e4d..428f24cc 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -23,6 +23,8 @@ #include <fcntl.h> #include <errno.h> #include <netinet/in.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> #include <netinet/icmp6.h> #include "nest/bird.h" @@ -489,6 +491,11 @@ tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t) #define SOL_IPV6 IPPROTO_IPV6 #endif +#ifndef SOL_ICMPV6 +#define SOL_ICMPV6 IPPROTO_ICMPV6 +#endif + + static list sock_list; static struct birdsock *current_sock; static struct birdsock *stored_sock; @@ -553,6 +560,43 @@ sk_free(resource *r) } void +sk_set_rbsize(sock *s, uint val) +{ + ASSERT(s->rbuf_alloc == s->rbuf); + + if (s->rbsize == val) + return; + + s->rbsize = val; + xfree(s->rbuf_alloc); + s->rbuf_alloc = xmalloc(val); + s->rpos = s->rbuf = s->rbuf_alloc; +} + +void +sk_set_tbsize(sock *s, uint val) +{ + ASSERT(s->tbuf_alloc == s->tbuf); + + if (s->tbsize == val) + return; + + byte *old_tbuf = s->tbuf; + + s->tbsize = val; + s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val); + s->tpos = s->tbuf + (s->tpos - old_tbuf); + s->ttx = s->tbuf + (s->ttx - old_tbuf); +} + +void +sk_set_tbuf(sock *s, void *tbuf) +{ + s->tbuf = tbuf ?: s->tbuf_alloc; + s->ttx = s->tpos = s->tbuf; +} + +void sk_reallocate(sock *s) { sk_free_bufs(s); @@ -703,11 +747,11 @@ sysio_register_cmsgs(sock *s) int ok = 1; if ((s->flags & SKF_LADDR_RX) && - (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)) + (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)) return "IPV6_RECVPKTINFO"; if ((s->flags & SKF_TTL_RX) && - (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0)) + (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0)) return "IPV6_RECVHOPLIMIT"; return NULL; @@ -722,10 +766,10 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) { - if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO) + if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_PKTINFO) pi = (struct in6_pktinfo *) CMSG_DATA(cm); - if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT) + if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT) hlim = (int *) CMSG_DATA(cm); } @@ -749,32 +793,27 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) return; } -/* static void sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) { struct cmsghdr *cm; struct in6_pktinfo *pi; - if (!(s->flags & SKF_LADDR_TX)) - return; - msg->msg_control = cbuf; msg->msg_controllen = cbuflen; cm = CMSG_FIRSTHDR(msg); - cm->cmsg_level = IPPROTO_IPV6; + cm->cmsg_level = SOL_IPV6; cm->cmsg_type = IPV6_PKTINFO; cm->cmsg_len = CMSG_LEN(sizeof(*pi)); pi = (struct in6_pktinfo *) CMSG_DATA(cm); - set_inaddr(&pi->ipi6_addr, s->saddr); pi->ipi6_ifindex = s->iface ? s->iface->index : 0; + set_inaddr(&pi->ipi6_addr, s->saddr); msg->msg_controllen = cm->cmsg_len; - return; } -*/ + #endif static char * @@ -786,11 +825,6 @@ sk_set_ttl_int(sock *s) #else if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0) return "IP_TTL"; -#ifdef CONFIG_UNIX_DONTROUTE - int one = 1; - if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) - return "SO_DONTROUTE"; -#endif #endif return NULL; } @@ -801,6 +835,7 @@ sk_set_ttl_int(sock *s) static char * sk_setup(sock *s) { + int one = 1; int fd = s->fd; char *err = NULL; @@ -809,6 +844,41 @@ sk_setup(sock *s) if (s->type == SK_UNIX) return NULL; + if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND)) + s->flags |= SKF_PKTINFO; + +#ifdef CONFIG_USE_HDRINCL + if ((s->type == SK_IP) && (s->flags & SKF_PKTINFO)) + { + s->flags &= ~SKF_PKTINFO; + s->flags |= SKF_HDRINCL; + if (setsockopt(fd, SOL_IP, IP_HDRINCL, &one, sizeof(one)) < 0) + ERR("IP_HDRINCL"); + } +#endif + + if (s->iface) + { +#ifdef SO_BINDTODEVICE + struct ifreq ifr; + strcpy(ifr.ifr_name, s->iface->name); + if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) + ERR("SO_BINDTODEVICE"); +#endif + +#ifdef CONFIG_UNIX_DONTROUTE + if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) + ERR("SO_DONTROUTE"); +#endif + } + + if ((s->ttl >= 0) && (err = sk_set_ttl_int(s))) + goto bad; + + if (err = sysio_register_cmsgs(s)) + goto bad; + + #ifdef IPV6 if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0) WARN("IPV6_TCLASS"); @@ -821,15 +891,10 @@ sk_setup(sock *s) sk_set_priority(s, s->priority); #ifdef IPV6 - int v = 1; - if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0) + if ((s->flags & SKF_V6ONLY) && setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) WARN("IPV6_V6ONLY"); #endif - if ((s->ttl >= 0) && (err = sk_set_ttl_int(s))) - goto bad; - - err = sysio_register_cmsgs(s); bad: return err; } @@ -926,7 +991,7 @@ sk_set_broadcast(sock *s, int enable) int sk_set_ipv6_checksum(sock *s, int offset) { - if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) + if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) { log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m"); return -1; @@ -945,7 +1010,7 @@ sk_set_icmp_filter(sock *s, int p1, int p2) ICMP6_FILTER_SETPASS(p1, &f); ICMP6_FILTER_SETPASS(p2, &f); - if (setsockopt(s->fd, IPPROTO_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) + if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) { log(L_ERR "sk_setup_icmp_filter: ICMP6_FILTER: %m"); return -1; @@ -961,7 +1026,7 @@ sk_setup_multicast(sock *s) int zero = 0; int index; - ASSERT(s->iface && s->iface->addr); + ASSERT(s->iface); index = s->iface->index; if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0) @@ -971,9 +1036,6 @@ sk_setup_multicast(sock *s) if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) ERR("IPV6_MULTICAST_IF"); - if (err = sysio_bind_to_iface(s)) - goto bad; - return 0; bad: @@ -981,18 +1043,17 @@ bad: return -1; } +#ifdef CONFIG_IPV6_GLIBC_20 +#define ipv6mr_interface ipv6mr_ifindex +#endif + int sk_join_group(sock *s, ip_addr maddr) { struct ipv6_mreq mreq; set_inaddr(&mreq.ipv6mr_multiaddr, maddr); - -#ifdef CONFIG_IPV6_GLIBC_20 - mreq.ipv6mr_ifindex = s->iface->index; -#else mreq.ipv6mr_interface = s->iface->index; -#endif if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0) { @@ -1009,12 +1070,7 @@ sk_leave_group(sock *s, ip_addr maddr) struct ipv6_mreq mreq; set_inaddr(&mreq.ipv6mr_multiaddr, maddr); - -#ifdef CONFIG_IPV6_GLIBC_20 - mreq.ipv6mr_ifindex = s->iface->index; -#else mreq.ipv6mr_interface = s->iface->index; -#endif if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0) { @@ -1032,7 +1088,7 @@ sk_setup_multicast(sock *s) { char *err; - ASSERT(s->iface && s->iface->addr); + ASSERT(s->iface); if (err = sysio_setup_multicast(s)) { @@ -1142,31 +1198,45 @@ int sk_open(sock *s) { int fd; - sockaddr sa; int one = 1; - int type = s->type; - int has_src = ipa_nonzero(s->saddr) || s->sport; + int do_bind = 0; + int bind_port = 0; + ip_addr bind_addr = IPA_NONE; + sockaddr sa; char *err; - switch (type) + switch (s->type) { case SK_TCP_ACTIVE: s->ttx = ""; /* Force s->ttx != s->tpos */ /* Fall thru */ case SK_TCP_PASSIVE: fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP); + bind_port = s->sport; + bind_addr = s->saddr; + do_bind = bind_port || ipa_nonzero(bind_addr); break; + case SK_UDP: fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP); + bind_port = s->sport; + bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; + do_bind = 1; break; + case SK_IP: fd = socket(BIRD_PF, SOCK_RAW, s->dport); + bind_port = 0; + bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; + do_bind = ipa_nonzero(bind_addr); break; + case SK_MAGIC: fd = s->fd; break; + default: - bug("sk_open() called for invalid sock type %d", type); + bug("sk_open() called for invalid sock type %d", s->type); } if (fd < 0) die("sk_open: socket: %m"); @@ -1175,31 +1245,28 @@ sk_open(sock *s) if (err = sk_setup(s)) goto bad; - if (has_src) + if (do_bind) { - int port; - - if (type == SK_IP) - port = 0; - else + if (bind_port) { - port = s->sport; if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) ERR("SO_REUSEADDR"); - + #ifdef CONFIG_NO_IFACE_BIND /* Workaround missing ability to bind to an iface */ - if ((type == SK_UDP) && s->iface && ipa_zero(s->saddr)) + if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr)) { if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0) ERR("SO_REUSEPORT"); } #endif } - fill_in_sockaddr(&sa, s->saddr, s->iface, port); + + fill_in_sockaddr(&sa, bind_addr, s->iface, bind_port); if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) ERR("bind"); } + fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport); if (s->password) @@ -1209,7 +1276,7 @@ sk_open(sock *s) goto bad_no_log; } - switch (type) + switch (s->type) { case SK_TCP_ACTIVE: if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0) @@ -1287,6 +1354,79 @@ sk_open_unix(sock *s, char *name) die("Unable to create control socket %s", name); } + +static inline int +sk_sendmsg(sock *s) +{ + struct iovec iov = {s->tbuf, s->tpos - s->tbuf}; + byte cmsg_buf[CMSG_TX_SPACE]; + sockaddr dst; + + fill_in_sockaddr(&dst, s->daddr, s->iface, s->dport); + + struct msghdr msg = { + .msg_name = &dst, + .msg_namelen = sizeof(dst), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + +#ifdef CONFIG_USE_HDRINCL + byte hdr[20]; + struct iovec iov2[2] = { {hdr, 20}, iov }; + + if (s->flags & SKF_HDRINCL) + { + fill_ip_header(s, hdr, iov.iov_len); + msg.msg_iov = iov2; + msg.msg_iovlen = 2; + } +#endif + + if (s->flags & SKF_PKTINFO) + sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); + + return sendmsg(s->fd, &msg, 0); +} + +static inline int +sk_recvmsg(sock *s) +{ + struct iovec iov = {s->rbuf, s->rbsize}; + byte cmsg_buf[CMSG_RX_SPACE]; + sockaddr src; + + struct msghdr msg = { + .msg_name = &src, + .msg_namelen = sizeof(src), + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), + .msg_flags = 0 + }; + + int rv = recvmsg(s->fd, &msg, 0); + if (rv < 0) + return rv; + + //ifdef IPV4 + // if (cf_type == SK_IP) + // rv = ipv4_skip_header(pbuf, rv); + //endif + + get_sockaddr(&src, &s->faddr, NULL, &s->fport, 1); + sysio_process_rx_cmsgs(s, &msg); + + if (msg.msg_flags & MSG_TRUNC) + s->flags |= SKF_TRUNCATED; + else + s->flags &= ~SKF_TRUNCATED; + + return rv; +} + + static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; } static int @@ -1323,20 +1463,7 @@ sk_maybe_write(sock *s) if (s->tbuf == s->tpos) return 1; - sockaddr sa; - fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport); - - struct iovec iov = {s->tbuf, s->tpos - s->tbuf}; - // byte cmsg_buf[CMSG_TX_SPACE]; - - struct msghdr msg = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - .msg_iovlen = 1}; - - // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); - e = sendmsg(s->fd, &msg, 0); + e = sk_sendmsg(s); if (e < 0) { @@ -1346,6 +1473,9 @@ sk_maybe_write(sock *s) s->err_hook(s, errno); return -1; } + + if (!s->tx_hook) + reset_tx_buffer(s); return 0; } reset_tx_buffer(s); @@ -1408,12 +1538,15 @@ sk_send(sock *s, unsigned len) * * This is a sk_send() replacement for connection-less packet sockets * which allows destination of the packet to be chosen dynamically. + * Raw IP sockets should use 0 for @port. */ int sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port) { s->daddr = addr; - s->dport = port; + if (port) + s->dport = port; + s->ttx = s->tbuf; s->tpos = s->tbuf + len; return sk_maybe_write(s); @@ -1480,22 +1613,9 @@ sk_read(sock *s) return s->rx_hook(s, 0); default: { - sockaddr sa; int e; - struct iovec iov = {s->rbuf, s->rbsize}; - byte cmsg_buf[CMSG_RX_SPACE]; - - struct msghdr msg = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = cmsg_buf, - .msg_controllen = sizeof(cmsg_buf), - .msg_flags = 0}; - - e = recvmsg(s->fd, &msg, 0); + e = sk_recvmsg(s); if (e < 0) { @@ -1503,10 +1623,8 @@ sk_read(sock *s) s->err_hook(s, errno); return 0; } - s->rpos = s->rbuf + e; - get_sockaddr(&sa, &s->faddr, NULL, &s->fport, 1); - sysio_process_rx_cmsgs(s, &msg); + s->rpos = s->rbuf + e; s->rx_hook(s, e); return 1; } |