diff options
author | Ondrej Zajicek <santiago@crfreenet.org> | 2014-05-18 11:42:26 +0200 |
---|---|---|
committer | Ondrej Zajicek <santiago@crfreenet.org> | 2014-05-18 11:42:26 +0200 |
commit | 05476c4d04a24bdb26fa64e05ab31bc36118f34e (patch) | |
tree | e775f059cfb4bb027c444bb53eb9356e643082c8 /sysdep | |
parent | 1149aa977d906a6400f998d5f6600871584395d0 (diff) |
IPv4/IPv6 integrated socket code.
Diffstat (limited to 'sysdep')
-rw-r--r-- | sysdep/bsd/krt-sock.c | 33 | ||||
-rw-r--r-- | sysdep/bsd/sysio.h | 298 | ||||
-rw-r--r-- | sysdep/linux/netlink.c | 8 | ||||
-rw-r--r-- | sysdep/linux/sysio.h | 354 | ||||
-rw-r--r-- | sysdep/unix/io.c | 1508 | ||||
-rw-r--r-- | sysdep/unix/main.c | 7 | ||||
-rw-r--r-- | sysdep/unix/unix.h | 75 |
7 files changed, 1177 insertions, 1106 deletions
diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index aaeb7d90..26710375 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -251,9 +251,9 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff); #endif - fill_in_sockaddr(&dst, net->n.prefix, NULL, 0); - fill_in_sockaddr(&mask, ipa_mkmask(net->n.pxlen), NULL, 0); - fill_in_sockaddr(&gate, gw, NULL, 0); + sockaddr_fill(&dst, BIRD_AF, net->n.prefix, NULL, 0); + sockaddr_fill(&mask, BIRD_AF, ipa_mkmask(net->n.pxlen), NULL, 0); + sockaddr_fill(&gate, BIRD_AF, gw, NULL, 0); switch (a->dest) { @@ -280,7 +280,7 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) return -1; } - fill_in_sockaddr(&gate, i->addr->ip, NULL, 0); + sockaddr_fill(&dst, BIRD_AF, i->addr->ip, NULL, 0); msg.rtm.rtm_addrs |= RTA_GATEWAY; } break; @@ -366,20 +366,16 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) GETADDR(&gate, RTA_GATEWAY); GETADDR(&mask, RTA_NETMASK); - if (sa_family_check(&dst)) - get_sockaddr(&dst, &idst, NULL, NULL, 0); - else + if (dst.sa.sa_family != BIRD_AF) SKIP("invalid DST"); - /* We will check later whether we have valid gateway addr */ - if (sa_family_check(&gate)) - get_sockaddr(&gate, &igate, NULL, NULL, 0); - else - igate = IPA_NONE; + idst = ipa_from_sa(&dst); + imask = ipa_from_sa(&mask); + igate = (gate.sa.sa_family == BIRD_AF) ? ipa_from_sa(&gate) : IPA_NONE; /* We do not test family for RTA_NETMASK, because BSD sends us some strange values, but interpreting them as IPv4/IPv6 works */ - get_sockaddr(&mask, &imask, NULL, NULL, 0); + int c = ipa_classify_net(idst); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) @@ -648,12 +644,13 @@ krt_read_addr(struct ks_msg *msg, int scan) GETADDR (&brd, RTA_BRD); /* Some other family address */ - if (!sa_family_check(&addr)) + if (addr.sa.sa_family != BIRD_AF) return; - get_sockaddr(&addr, &iaddr, NULL, NULL, 0); - get_sockaddr(&mask, &imask, NULL, NULL, 0); - get_sockaddr(&brd, &ibrd, NULL, NULL, 0); + iaddr = ipa_from_sa(&addr); + imask = ipa_from_sa(&mask); + ibrd = ipa_from_sa(&brd); + if ((masklen = ipa_mklen(imask)) < 0) { @@ -806,7 +803,7 @@ krt_sysctl_scan(struct proto *p, int cmd, int table_id) mib[0] = CTL_NET; mib[1] = PF_ROUTE; mib[2] = 0; - mib[3] = BIRD_PF; + mib[3] = BIRD_AF; mib[4] = cmd; mib[5] = 0; mcnt = 6; diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h index e45deb6f..fa3969bd 100644 --- a/sysdep/bsd/sysio.h +++ b/sysdep/bsd/sysio.h @@ -1,11 +1,16 @@ /* - * BIRD Internet Routing Daemon -- NetBSD Multicasting and Network Includes + * BIRD Internet Routing Daemon -- BSD Multicasting and Network Includes * * (c) 2004 Ondrej Filip <feela@network.cz> * * Can be freely distributed and used under the terms of the GNU GPL. */ +#include <net/if_dl.h> +#include <netinet/in_systm.h> // Workaround for some BSDs +#include <netinet/ip.h> + + #ifdef __NetBSD__ #ifndef IP_RECVTTL @@ -22,173 +27,117 @@ #define TCP_MD5SIG TCP_SIGNATURE_ENABLE #endif -#ifdef IPV6 -static inline void -set_inaddr(struct in6_addr * ia, ip_addr a) -{ - ipa_hton(a); - memcpy(ia, &a, sizeof(a)); -} +#define SA_LEN(x) (x).sa.sa_len -static inline void -get_inaddr(ip_addr *a, struct in6_addr *ia) -{ - memcpy(a, ia, sizeof(*a)); - ipa_ntoh(*a); -} - - -#else - -#include <net/if.h> -#include <net/if_dl.h> -#include <netinet/in_systm.h> // Workaround for some BSDs -#include <netinet/ip.h> - -static inline void -set_inaddr(struct in_addr * ia, ip_addr a) -{ - ipa_hton(a); - memcpy(&ia->s_addr, &a, sizeof(a)); -} - -static inline void -get_inaddr(ip_addr *a, struct in_addr *ia) -{ - memcpy(a, &ia->s_addr, sizeof(*a)); - ipa_ntoh(*a); -} +/* + * BSD IPv4 multicast syscalls + */ -/* BSD Multicast handling for IPv4 */ +#define INIT_MREQ4(maddr,ifa) \ + { .imr_multiaddr = ipa_to_in4(maddr), .imr_interface = ipa_to_in4(ifa->addr->ip) } -static inline char * -sysio_setup_multicast(sock *s) +static inline int +sk_setup_multicast4(sock *s) { - struct in_addr m; - u8 zero = 0; - u8 ttl = s->ttl; + struct in_addr ifa = ipa_to_in4(s->iface->addr->ip); + u8 ttl = s->ttl; + u8 n = 0; - if (setsockopt(s->fd, IPPROTO_IP, IP_MULTICAST_LOOP, &zero, sizeof(zero)) < 0) - return "IP_MULTICAST_LOOP"; + /* This defines where should we send _outgoing_ multicasts */ + if (setsockopt(s->fd, IPPROTO_IP, IP_MULTICAST_IF, &ifa, sizeof(ifa)) < 0) + ERR("IP_MULTICAST_IF"); - if (setsockopt(s->fd, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)) < 0) - return "IP_MULTICAST_TTL"; + if (setsockopt(s->fd, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)) < 0) + ERR("IP_MULTICAST_TTL"); - /* This defines where should we send _outgoing_ multicasts */ - set_inaddr(&m, s->iface->addr->ip); - if (setsockopt(s->fd, IPPROTO_IP, IP_MULTICAST_IF, &m, sizeof(m)) < 0) - return "IP_MULTICAST_IF"; + if (setsockopt(s->fd, IPPROTO_IP, IP_MULTICAST_LOOP, &n, sizeof(n)) < 0) + ERR("IP_MULTICAST_LOOP"); - return NULL; + return 0; } - -static inline char * -sysio_join_group(sock *s, ip_addr maddr) +static inline int +sk_join_group4(sock *s, ip_addr maddr) { - struct ip_mreq mreq; + struct ip_mreq mr = INIT_MREQ4(maddr, s->iface); - bzero(&mreq, sizeof(mreq)); - set_inaddr(&mreq.imr_interface, s->iface->addr->ip); - set_inaddr(&mreq.imr_multiaddr, maddr); + if (setsockopt(s->fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mr, sizeof(mr)) < 0) + ERR("IP_ADD_MEMBERSHIP"); - /* And this one sets interface for _receiving_ multicasts from */ - if (setsockopt(s->fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0) - return "IP_ADD_MEMBERSHIP"; - - return NULL; + return 0; } -static inline char * -sysio_leave_group(sock *s, ip_addr maddr) +static inline int +sk_leave_group4(sock *s, ip_addr maddr) { - struct ip_mreq mreq; + struct ip_mreq mr = INIT_MREQ4(maddr, s->iface); - bzero(&mreq, sizeof(mreq)); - set_inaddr(&mreq.imr_interface, s->iface->addr->ip); - set_inaddr(&mreq.imr_multiaddr, maddr); + if (setsockopt(s->fd, IPPROTO_IP, IP_DROP_MEMBERSHIP, &mr, sizeof(mr)) < 0) + ERR("IP_ADD_MEMBERSHIP"); - /* And this one sets interface for _receiving_ multicasts from */ - if (setsockopt(s->fd, IPPROTO_IP, IP_DROP_MEMBERSHIP, &mreq, sizeof(mreq)) < 0) - return "IP_DROP_MEMBERSHIP"; - - return NULL; + return 0; } -/* BSD RX/TX packet info handling for IPv4 */ -/* it uses IP_RECVDSTADDR / IP_RECVIF socket options instead of IP_PKTINFO */ +/* + * BSD IPv4 packet control messages + */ -#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in_addr)) + \ - CMSG_SPACE(sizeof(struct sockaddr_dl)) + \ - CMSG_SPACE(sizeof(char))) -#define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in_addr)) +/* It uses IP_RECVDSTADDR / IP_RECVIF socket options instead of IP_PKTINFO */ -static char * -sysio_register_cmsgs(sock *s) -{ - int ok = 1; - if (s->flags & SKF_LADDR_RX) - { - if (setsockopt(s->fd, IPPROTO_IP, IP_RECVDSTADDR, &ok, sizeof(ok)) < 0) - return "IP_RECVDSTADDR"; +#define CMSG4_SPACE_PKTINFO (CMSG_SPACE(sizeof(struct in_addr)) + \ + CMSG_SPACE(sizeof(struct sockaddr_dl))) +#define CMSG4_SPACE_TTL CMSG_SPACE(sizeof(char)) - if (setsockopt(s->fd, IPPROTO_IP, IP_RECVIF, &ok, sizeof(ok)) < 0) - return "IP_RECVIF"; - } +static inline int +sk_request_cmsg4_pktinfo(sock *s) +{ + int y = 1; - if ((s->flags & SKF_TTL_RX) && - (setsockopt(s->fd, IPPROTO_IP, IP_RECVTTL, &ok, sizeof(ok)) < 0)) - return "IP_RECVTTL"; + if (setsockopt(s->fd, IPPROTO_IP, IP_RECVDSTADDR, &y, sizeof(y)) < 0) + ERR("IP_RECVDSTADDR"); + if (setsockopt(s->fd, IPPROTO_IP, IP_RECVIF, &y, sizeof(y)) < 0) + ERR("IP_RECVIF"); - return NULL; + return 0; } -static inline void -sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) +static inline int +sk_request_cmsg4_ttl(sock *s) { - struct cmsghdr *cm; - struct in_addr *ra = NULL; - struct sockaddr_dl *ri = NULL; - unsigned char *ttl = NULL; - - for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) - { - if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVDSTADDR) - ra = (struct in_addr *) CMSG_DATA(cm); - - if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVIF) - ri = (struct sockaddr_dl *) CMSG_DATA(cm); - - if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_RECVTTL) - ttl = (unsigned char *) CMSG_DATA(cm); - } + int y = 1; - if (s->flags & SKF_LADDR_RX) - { - s->laddr = IPA_NONE; - s->lifindex = 0; + if (setsockopt(s->fd, IPPROTO_IP, IP_RECVTTL, &y, sizeof(y)) < 0) + ERR("IP_RECVTTL"); - if (ra) - get_inaddr(&s->laddr, ra); - if (ri) - s->lifindex = ri->sdl_index; - } + return 0; +} - if (s->flags & SKF_TTL_RX) - s->ttl = ttl ? *ttl : -1; +static inline void +sk_process_cmsg4_pktinfo(sock *s, struct cmsghdr *cm) +{ + if (cm->cmsg_type == IP_RECVDSTADDR) + s->laddr = ipa_from_in4(* (struct in_addr *) CMSG_DATA(cm)); - // log(L_WARN "RX %I %d", s->laddr, s->lifindex); + if (cm->cmsg_type == IP_RECVIF) + s->lifindex = ((struct sockaddr_dl *) CMSG_DATA(cm))->sdl_index; } -/* Unfortunately, IP_SENDSRCADDR does not work for raw IP sockets on BSD kernels */ +static inline void +sk_process_cmsg4_ttl(sock *s, struct cmsghdr *cm) +{ + if (cm->cmsg_type == IP_RECVTTL) + s->rcv_ttl = * (unsigned char *) CMSG_DATA(cm); +} static inline void -sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) +sk_prepare_cmsgs4(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) { + /* Unfortunately, IP_SENDSRCADDR does not work for raw IP sockets on BSD kernels */ + #ifdef IP_SENDSRCADDR struct cmsghdr *cm; struct in_addr *sa; @@ -202,15 +151,14 @@ sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) cm->cmsg_len = CMSG_LEN(sizeof(*sa)); sa = (struct in_addr *) CMSG_DATA(cm); - set_inaddr(sa, s->saddr); + *sa = ipa_to_in4(s->saddr); msg->msg_controllen = cm->cmsg_len; #endif } - static void -fill_ip_header(sock *s, void *hdr, int dlen) +sk_prepare_ip_header(sock *s, void *hdr, int dlen) { struct ip *ip = hdr; @@ -222,8 +170,8 @@ fill_ip_header(sock *s, void *hdr, int dlen) ip->ip_len = 20 + dlen; ip->ip_ttl = (s->ttl < 0) ? 64 : s->ttl; ip->ip_p = s->dport; - set_inaddr(&ip->ip_src, s->saddr); - set_inaddr(&ip->ip_dst, s->daddr); + ip->ip_src = ipa_to_in4(s->saddr); + ip->ip_dst = ipa_to_in4(s->daddr); #ifdef __OpenBSD__ /* OpenBSD expects ip_len in network order, other BSDs expect host order */ @@ -231,10 +179,11 @@ fill_ip_header(sock *s, void *hdr, int dlen) #endif } -#endif +/* + * Miscellaneous BSD socket syscalls + */ -#include <netinet/tcp.h> #ifndef TCP_KEYLEN_MAX #define TCP_KEYLEN_MAX 80 #endif @@ -248,72 +197,69 @@ fill_ip_header(sock *s, void *hdr, int dlen) * management. */ -static int -sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd) +int +sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd) { int enable = 0; - if (passwd) - { - int len = strlen(passwd); - - enable = len ? TCP_SIG_SPI : 0; - - if (len > TCP_KEYLEN_MAX) - { - log(L_ERR "MD5 password too long"); - return -1; - } - } - - int rv = setsockopt(s->fd, IPPROTO_TCP, TCP_MD5SIG, &enable, sizeof(enable)); - - if (rv < 0) - { - if (errno == ENOPROTOOPT) - log(L_ERR "Kernel does not support TCP MD5 signatures"); - else - log(L_ERR "sk_set_md5_auth_int: setsockopt: %m"); - } - - return rv; -} + if (passwd && *passwd) + { + int len = strlen(passwd); + enable = TCP_SIG_SPI; + + if (len > TCP_KEYLEN_MAX) + ERR_MSG("MD5 password too long"); + } + + if (setsockopt(s->fd, IPPROTO_TCP, TCP_MD5SIG, &enable, sizeof(enable)) < 0) + { + if (errno == ENOPROTOOPT) + ERR_MSG("Kernel does not support TCP MD5 signatures"); + else + ERR("TCP_MD5SIG"); + } -#ifndef IPV6 + return 0; +} -static int +static inline int sk_set_min_ttl4(sock *s, int ttl) { if (setsockopt(s->fd, IPPROTO_IP, IP_MINTTL, &ttl, sizeof(ttl)) < 0) { if (errno == ENOPROTOOPT) - log(L_ERR "Kernel does not support IPv4 TTL security"); + ERR_MSG("Kernel does not support IPv4 TTL security"); else - log(L_ERR "sk_set_min_ttl4: setsockopt: %m"); - - return -1; + ERR("IP_MINTTL"); } return 0; } -#else /* IPv6 */ - -static int +static inline int sk_set_min_ttl6(sock *s, int ttl) { - log(L_ERR "IPv6 TTL security not supported"); - return -1; + ERR_MSG("Kernel does not support IPv6 TTL security"); } -#endif +static inline int +sk_disable_mtu_disc4(sock *s) +{ + /* TODO: Set IP_DONTFRAG to 0 ? */ + return 0; +} +static inline int +sk_disable_mtu_disc6(sock *s) +{ + /* TODO: Set IPV6_DONTFRAG to 0 ? */ + return 0; +} int sk_priority_control = -1; -static int +static inline int sk_set_priority(sock *s, int prio UNUSED) { - log(L_WARN "Socket priority not supported"); - return -1; + ERR_MSG("Socket priority not supported"); } diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 7063e2ca..a0f85186 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -104,9 +104,9 @@ nl_request_dump(int cmd) req.nh.nlmsg_type = cmd; req.nh.nlmsg_len = sizeof(req); req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; - /* Is it important which PF_* is used for link-level interface scan? - It seems that some information is available only when PF_INET is used. */ - req.g.rtgen_family = (cmd == RTM_GETLINK) ? PF_INET : BIRD_PF; + /* Is it important which AF_* is used for link-level interface scan? + It seems that some information is available only when AF_INET is used. */ + req.g.rtgen_family = (cmd == RTM_GETLINK) ? AF_INET : BIRD_AF; nl_send(&nl_scan, &req.nh); } @@ -1069,7 +1069,7 @@ nl_open_async(void) sk->type = SK_MAGIC; sk->rx_hook = nl_async_hook; sk->fd = fd; - if (sk_open(sk)) + if (sk_open(sk) < 0) bug("Netlink: sk_open failed"); } diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h index dc807392..5fd75c90 100644 --- a/sysdep/linux/sysio.h +++ b/sysdep/linux/sysio.h @@ -6,232 +6,151 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ -#include <net/if.h> - -#ifdef IPV6 - -#ifndef IPV6_UNICAST_HOPS -/* Needed on glibc 2.0 systems */ -#include <linux/in6.h> -#define CONFIG_IPV6_GLIBC_20 -#endif - -static inline void -set_inaddr(struct in6_addr *ia, ip_addr a) -{ - ipa_hton(a); - memcpy(ia, &a, sizeof(a)); -} - -static inline void -get_inaddr(ip_addr *a, struct in6_addr *ia) -{ - memcpy(a, ia, sizeof(*a)); - ipa_ntoh(*a); -} - -#else - -static inline void -set_inaddr(struct in_addr *ia, ip_addr a) -{ - ipa_hton(a); - memcpy(&ia->s_addr, &a, sizeof(a)); -} - -static inline void -get_inaddr(ip_addr *a, struct in_addr *ia) -{ - memcpy(a, &ia->s_addr, sizeof(*a)); - ipa_ntoh(*a); -} - #ifndef HAVE_STRUCT_IP_MREQN /* Several versions of glibc don't define this structure, so we have to do it ourselves */ struct ip_mreqn { - struct in_addr imr_multiaddr; /* IP multicast address of group */ - struct in_addr imr_address; /* local IP address of interface */ - int imr_ifindex; /* Interface index */ + struct in_addr imr_multiaddr; /* IP multicast address of group */ + struct in_addr imr_address; /* local IP address of interface */ + int imr_ifindex; /* Interface index */ }; #endif +#ifndef IP_MINTTL +#define IP_MINTTL 21 +#endif -static inline void fill_mreqn(struct ip_mreqn *m, ip_addr maddr, struct iface *ifa) -{ - bzero(m, sizeof(*m)); - m->imr_ifindex = ifa->index; - set_inaddr(&m->imr_multiaddr, maddr); -} +#ifndef IPV6_TCLASS +#define IPV6_TCLASS 67 +#endif -static inline char * -sysio_setup_multicast(sock *s) -{ - struct ip_mreqn m; - int zero = 0; +#ifndef IPV6_MINHOPCOUNT +#define IPV6_MINHOPCOUNT 73 +#endif - if (setsockopt(s->fd, SOL_IP, IP_MULTICAST_LOOP, &zero, sizeof(zero)) < 0) - return "IP_MULTICAST_LOOP"; - if (setsockopt(s->fd, SOL_IP, IP_MULTICAST_TTL, &s->ttl, sizeof(s->ttl)) < 0) - return "IP_MULTICAST_TTL"; +#ifndef TCP_MD5SIG - /* This defines where should we send _outgoing_ multicasts */ - fill_mreqn(&m, IPA_NONE, s->iface); - if (setsockopt(s->fd, SOL_IP, IP_MULTICAST_IF, &m, sizeof(m)) < 0) - return "IP_MULTICAST_IF"; +#define TCP_MD5SIG 14 +#define TCP_MD5SIG_MAXKEYLEN 80 - return NULL; -} +struct tcp_md5sig { + struct sockaddr_storage tcpm_addr; /* address associated */ + u16 __tcpm_pad1; /* zero */ + u16 tcpm_keylen; /* key length */ + u32 __tcpm_pad2; /* zero */ + u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ +}; -static inline char * -sysio_join_group(sock *s, ip_addr maddr) -{ - struct ip_mreqn m; +#endif - /* And this one sets interface for _receiving_ multicasts from */ - fill_mreqn(&m, maddr, s->iface); - if (setsockopt(s->fd, SOL_IP, IP_ADD_MEMBERSHIP, &m, sizeof(m)) < 0) - return "IP_ADD_MEMBERSHIP"; - return NULL; -} +/* Linux does not care if sa_len is larger than needed */ +#define SA_LEN(x) sizeof(sockaddr) -static inline char * -sysio_leave_group(sock *s, ip_addr maddr) -{ - struct ip_mreqn m; - /* And this one sets interface for _receiving_ multicasts from */ - fill_mreqn(&m, maddr, s->iface); - if (setsockopt(s->fd, SOL_IP, IP_DROP_MEMBERSHIP, &m, sizeof(m)) < 0) - return "IP_DROP_MEMBERSHIP"; +/* + * Linux IPv4 multicast syscalls + */ - return NULL; -} +#define INIT_MREQ4(maddr,ifa) \ + { .imr_multiaddr = ipa_to_in4(maddr), .imr_ifindex = ifa->index } -#endif +static inline int +sk_setup_multicast4(sock *s) +{ + struct ip_mreqn mr = { .imr_ifindex = s->iface->index }; + int ttl = s->ttl; + int n = 0; + /* This defines where should we send _outgoing_ multicasts */ + if (setsockopt(s->fd, SOL_IP, IP_MULTICAST_IF, &mr, sizeof(mr)) < 0) + ERR("IP_MULTICAST_IF"); -/* For the case that we have older libc headers */ -/* Copied from Linux kernel file include/linux/tcp.h */ + if (setsockopt(s->fd, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)) < 0) + ERR("IP_MULTICAST_TTL"); -#ifndef TCP_MD5SIG + if (setsockopt(s->fd, SOL_IP, IP_MULTICAST_LOOP, &n, sizeof(n)) < 0) + ERR("IP_MULTICAST_LOOP"); -#define TCP_MD5SIG 14 -#define TCP_MD5SIG_MAXKEYLEN 80 + return 0; +} -#include <linux/types.h> +static inline int +sk_join_group4(sock *s, ip_addr maddr) +{ + struct ip_mreqn mr = INIT_MREQ4(maddr, s->iface); -struct tcp_md5sig { - struct sockaddr_storage tcpm_addr; /* address associated */ - __u16 __tcpm_pad1; /* zero */ - __u16 tcpm_keylen; /* key length */ - __u32 __tcpm_pad2; /* zero */ - __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ -}; + if (setsockopt(s->fd, SOL_IP, IP_ADD_MEMBERSHIP, &mr, sizeof(mr)) < 0) + ERR("IP_ADD_MEMBERSHIP"); -#endif + return 0; +} -static int -sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd) +static inline int +sk_leave_group4(sock *s, ip_addr maddr) { - struct tcp_md5sig md5; + struct ip_mreqn mr = INIT_MREQ4(maddr, s->iface); - memset(&md5, 0, sizeof(md5)); - memcpy(&md5.tcpm_addr, (struct sockaddr *) sa, sizeof(*sa)); + if (setsockopt(s->fd, SOL_IP, IP_DROP_MEMBERSHIP, &mr, sizeof(mr)) < 0) + ERR("IP_DROP_MEMBERSHIP"); - if (passwd) - { - int len = strlen(passwd); - - if (len > TCP_MD5SIG_MAXKEYLEN) - { - log(L_ERR "MD5 password too long"); - return -1; - } - - md5.tcpm_keylen = len; - memcpy(&md5.tcpm_key, passwd, len); - } - - int rv = setsockopt(s->fd, SOL_TCP, TCP_MD5SIG, &md5, sizeof(md5)); - - if (rv < 0) - { - if (errno == ENOPROTOOPT) - log(L_ERR "Kernel does not support TCP MD5 signatures"); - else - log(L_ERR "sk_set_md5_auth_int: setsockopt: %m"); - } - - return rv; + return 0; } -#ifndef IPV6 +/* + * Linux IPv4 packet control messages + */ -/* RX/TX packet info handling for IPv4 */ /* Mostly similar to standardized IPv6 code */ -#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in_pktinfo)) + CMSG_SPACE(sizeof(int))) -#define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in_pktinfo)) +#define CMSG4_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in_pktinfo)) +#define CMSG4_SPACE_TTL CMSG_SPACE(sizeof(int)) -static char * -sysio_register_cmsgs(sock *s) +static inline int +sk_request_cmsg4_pktinfo(sock *s) { - int ok = 1; + int y = 1; - if ((s->flags & SKF_LADDR_RX) && - (setsockopt(s->fd, SOL_IP, IP_PKTINFO, &ok, sizeof(ok)) < 0)) - return "IP_PKTINFO"; + if (setsockopt(s->fd, SOL_IP, IP_PKTINFO, &y, sizeof(y)) < 0) + ERR("IP_PKTINFO"); - if ((s->flags & SKF_TTL_RX) && - (setsockopt(s->fd, SOL_IP, IP_RECVTTL, &ok, sizeof(ok)) < 0)) - return "IP_RECVTTL"; - - return NULL; + return 0; } -static void -sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) +static inline int +sk_request_cmsg4_ttl(sock *s) { - struct cmsghdr *cm; - struct in_pktinfo *pi = NULL; - int *ttl = NULL; + int y = 1; - for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) - { - if (cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_PKTINFO) - pi = (struct in_pktinfo *) CMSG_DATA(cm); + if (setsockopt(s->fd, SOL_IP, IP_RECVTTL, &y, sizeof(y)) < 0) + ERR("IP_RECVTTL"); - if (cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_TTL) - ttl = (int *) CMSG_DATA(cm); - } + return 0; +} - if (s->flags & SKF_LADDR_RX) +static inline void +sk_process_cmsg4_pktinfo(sock *s, struct cmsghdr *cm) +{ + if (cm->cmsg_type == IP_PKTINFO) { - if (pi) - { - get_inaddr(&s->laddr, &pi->ipi_addr); - s->lifindex = pi->ipi_ifindex; - } - else - { - s->laddr = IPA_NONE; - s->lifindex = 0; - } + struct in_pktinfo *pi = (struct in_pktinfo *) CMSG_DATA(cm); + s->laddr = ipa_from_in4(pi->ipi_addr); + s->lifindex = pi->ipi_ifindex; } +} - if (s->flags & SKF_TTL_RX) - s->ttl = ttl ? *ttl : -1; - - return; +static inline void +sk_process_cmsg4_ttl(sock *s, struct cmsghdr *cm) +{ + if (cm->cmsg_type == IP_TTL) + s->rcv_ttl = * (int *) CMSG_DATA(cm); } -static void -sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) +static inline void +sk_prepare_cmsgs4(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) { struct cmsghdr *cm; struct in_pktinfo *pi; @@ -246,78 +165,105 @@ sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) pi = (struct in_pktinfo *) CMSG_DATA(cm); pi->ipi_ifindex = s->iface ? s->iface->index : 0; - set_inaddr(&pi->ipi_spec_dst, s->saddr); - set_inaddr(&pi->ipi_addr, IPA_NONE); + pi->ipi_spec_dst = ipa_to_in4(s->saddr); + pi->ipi_addr = ipa_to_in4(IPA_NONE); msg->msg_controllen = cm->cmsg_len; } -#endif +/* + * Miscellaneous Linux socket syscalls + */ +int +sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd) +{ + struct tcp_md5sig md5; -#ifndef IP_MINTTL -#define IP_MINTTL 21 -#endif + memset(&md5, 0, sizeof(md5)); + sockaddr_fill((sockaddr *) &md5.tcpm_addr, s->af, a, ifa, 0); -#ifndef IPV6_MINHOPCOUNT -#define IPV6_MINHOPCOUNT 73 -#endif + if (passwd) + { + int len = strlen(passwd); + + if (len > TCP_MD5SIG_MAXKEYLEN) + ERR_MSG("MD5 password too long"); + md5.tcpm_keylen = len; + memcpy(&md5.tcpm_key, passwd, len); + } + + if (setsockopt(s->fd, SOL_TCP, TCP_MD5SIG, &md5, sizeof(md5)) < 0) + { + if (errno == ENOPROTOOPT) + ERR_MSG("Kernel does not support TCP MD5 signatures"); + else + ERR("TCP_MD5SIG"); + } -#ifndef IPV6 + return 0; +} -static int +static inline int sk_set_min_ttl4(sock *s, int ttl) { if (setsockopt(s->fd, SOL_IP, IP_MINTTL, &ttl, sizeof(ttl)) < 0) { if (errno == ENOPROTOOPT) - log(L_ERR "Kernel does not support IPv4 TTL security"); + ERR_MSG("Kernel does not support IPv4 TTL security"); else - log(L_ERR "sk_set_min_ttl4: setsockopt: %m"); - - return -1; + ERR("IP_MINTTL"); } return 0; } -#else - -static int +static inline int sk_set_min_ttl6(sock *s, int ttl) { if (setsockopt(s->fd, SOL_IPV6, IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) < 0) { if (errno == ENOPROTOOPT) - log(L_ERR "Kernel does not support IPv6 TTL security"); + ERR_MSG("Kernel does not support IPv6 TTL security"); else - log(L_ERR "sk_set_min_ttl6: setsockopt: %m"); - - return -1; + ERR("IPV6_MINHOPCOUNT"); } return 0; } -#endif +static inline int +sk_disable_mtu_disc4(sock *s) +{ + int dont = IP_PMTUDISC_DONT; + if (setsockopt(s->fd, SOL_IP, IP_MTU_DISCOVER, &dont, sizeof(dont)) < 0) + ERR("IP_MTU_DISCOVER"); -#ifndef IPV6_TCLASS -#define IPV6_TCLASS 67 -#endif + return 0; +} + +static inline int +sk_disable_mtu_disc6(sock *s) +{ + int dont = IPV6_PMTUDISC_DONT; + + if (setsockopt(s->fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0) + ERR("IPV6_MTU_DISCOVER"); + + return 0; +} int sk_priority_control = 7; -static int +static inline int sk_set_priority(sock *s, int prio) { if (setsockopt(s->fd, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)) < 0) - { - log(L_WARN "sk_set_priority: setsockopt: %m"); - return -1; - } + ERR("SO_PRIORITY"); return 0; } + diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 428f24cc..5a0c07e5 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -22,6 +22,7 @@ #include <unistd.h> #include <fcntl.h> #include <errno.h> +#include <net/if.h> #include <netinet/in.h> #include <netinet/tcp.h> #include <netinet/udp.h> @@ -470,6 +471,7 @@ tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t) strcpy(x, "<too-long>"); } + /** * DOC: Sockets * @@ -496,234 +498,147 @@ tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t) #endif -static list sock_list; -static struct birdsock *current_sock; -static struct birdsock *stored_sock; -static int sock_recalc_fdsets_p; +/* + * Sockaddr helper functions + */ -static inline sock * -sk_next(sock *s) +static inline int sockaddr_length(int af) +{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); } + +static inline void +sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port) { - if (!s->n.next->next) - return NULL; - else - return SKIP_BACK(sock, n, s->n.next); + memset(sa, 0, sizeof(struct sockaddr_in)); +#ifdef HAVE_SIN_LEN + sa->sin_len = sizeof(struct sockaddr_in); +#endif + sa->sin_family = AF_INET; + sa->sin_port = htons(port); + sa->sin_addr = ipa_to_in4(a); } -static void -sk_alloc_bufs(sock *s) +static inline void +sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port) { - if (!s->rbuf && s->rbsize) - s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize); - s->rpos = s->rbuf; - if (!s->tbuf && s->tbsize) - s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize); - s->tpos = s->ttx = s->tbuf; + memset(sa, 0, sizeof(struct sockaddr_in6)); +#ifdef SIN6_LEN + sa->sin6_len = sizeof(struct sockaddr_in6); +#endif + sa->sin6_family = AF_INET6; + sa->sin6_port = htons(port); + sa->sin6_flowinfo = 0; + sa->sin6_addr = ipa_to_in6(a); + + if (ifa && ipa_is_link_local(a)) + sa->sin6_scope_id = ifa->index; } -static void -sk_free_bufs(sock *s) +void +sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port) { - if (s->rbuf_alloc) - { - xfree(s->rbuf_alloc); - s->rbuf = s->rbuf_alloc = NULL; - } - if (s->tbuf_alloc) - { - xfree(s->tbuf_alloc); - s->tbuf = s->tbuf_alloc = NULL; - } + if (af == AF_INET) + sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port); + else if (af == AF_INET6) + sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port); + else + bug("Unknown AF"); } -static void -sk_free(resource *r) +static inline void +sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port) { - sock *s = (sock *) r; - - sk_free_bufs(s); - if (s->fd >= 0) - { - close(s->fd); - - /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ - if (s->flags & SKF_THREAD) - return; - - if (s == current_sock) - current_sock = sk_next(s); - if (s == stored_sock) - stored_sock = sk_next(s); - rem_node(&s->n); - sock_recalc_fdsets_p = 1; - } + *port = ntohs(sa->sin_port); + *a = ipa_from_in4(sa->sin_addr); } -void -sk_set_rbsize(sock *s, uint val) +static inline void +sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port) { - ASSERT(s->rbuf_alloc == s->rbuf); - - if (s->rbsize == val) - return; + *port = ntohs(sa->sin6_port); + *a = ipa_from_in6(sa->sin6_addr); - s->rbsize = val; - xfree(s->rbuf_alloc); - s->rbuf_alloc = xmalloc(val); - s->rpos = s->rbuf = s->rbuf_alloc; + if (ifa && ipa_is_link_local(*a)) + *ifa = if_find_by_index(sa->sin6_scope_id); } -void -sk_set_tbsize(sock *s, uint val) +int +sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port) { - ASSERT(s->tbuf_alloc == s->tbuf); + if (sa->sa.sa_family != af) + goto fail; - if (s->tbsize == val) - return; - - byte *old_tbuf = s->tbuf; + if (af == AF_INET) + sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port); + else if (af == AF_INET6) + sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port); + else + goto fail; - s->tbsize = val; - s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val); - s->tpos = s->tbuf + (s->tpos - old_tbuf); - s->ttx = s->tbuf + (s->ttx - old_tbuf); -} + return 0; -void -sk_set_tbuf(sock *s, void *tbuf) -{ - s->tbuf = tbuf ?: s->tbuf_alloc; - s->ttx = s->tpos = s->tbuf; + fail: + *a = IPA_NONE; + *port = 0; + return -1; } -void -sk_reallocate(sock *s) -{ - sk_free_bufs(s); - sk_alloc_bufs(s); -} -static void -sk_dump(resource *r) -{ - sock *s = (sock *) r; - static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" }; +/* + * IPv6 multicast syscalls + */ - debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n", - sk_type_names[s->type], - s->data, - s->saddr, - s->sport, - s->daddr, - s->dport, - s->tos, - s->ttl, - s->iface ? s->iface->name : "none"); -} +/* Fortunately standardized in RFC 3493 */ -static struct resclass sk_class = { - "Socket", - sizeof(sock), - sk_free, - sk_dump, - NULL, - NULL -}; +#define INIT_MREQ6(maddr,ifa) \ + { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index } -/** - * sk_new - create a socket - * @p: pool - * - * This function creates a new socket resource. If you want to use it, - * you need to fill in all the required fields of the structure and - * call sk_open() to do the actual opening of the socket. - * - * The real function name is sock_new(), sk_new() is a macro wrapper - * to avoid collision with OpenSSL. - */ -sock * -sock_new(pool *p) +static inline int +sk_setup_multicast6(sock *s) { - sock *s = ralloc(p, &sk_class); - s->pool = p; - // s->saddr = s->daddr = IPA_NONE; - s->tos = s->priority = s->ttl = -1; - s->fd = -1; - return s; -} + int index = s->iface->index; + int ttl = s->ttl; + int n = 0; -static void -sk_insert(sock *s) -{ - add_tail(&sock_list, &s->n); - sock_recalc_fdsets_p = 1; -} + if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) + ERR("IPV6_MULTICAST_IF"); -#ifdef IPV6 + if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0) + ERR("IPV6_MULTICAST_HOPS"); -void -fill_in_sockaddr(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, unsigned port) -{ - memset(sa, 0, sizeof (struct sockaddr_in6)); - sa->sin6_family = AF_INET6; - sa->sin6_port = htons(port); - sa->sin6_flowinfo = 0; -#ifdef HAVE_SIN_LEN - sa->sin6_len = sizeof(struct sockaddr_in6); -#endif - set_inaddr(&sa->sin6_addr, a); + if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0) + ERR("IPV6_MULTICAST_LOOP"); - if (ifa && ipa_has_link_scope(a)) - sa->sin6_scope_id = ifa->index; + return 0; } -void -get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, unsigned *port, int check) +static inline int +sk_join_group6(sock *s, ip_addr maddr) { - if (check && sa->sin6_family != AF_INET6) - bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family); - if (port) - *port = ntohs(sa->sin6_port); - memcpy(a, &sa->sin6_addr, sizeof(*a)); - ipa_ntoh(*a); + struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); - if (ifa && ipa_has_link_scope(*a)) - *ifa = if_find_by_index(sa->sin6_scope_id); -} + if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0) + ERR("IPV6_JOIN_GROUP"); -#else - -void -fill_in_sockaddr(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, unsigned port) -{ - memset (sa, 0, sizeof (struct sockaddr_in)); - sa->sin_family = AF_INET; - sa->sin_port = htons(port); -#ifdef HAVE_SIN_LEN - sa->sin_len = sizeof(struct sockaddr_in); -#endif - set_inaddr(&sa->sin_addr, a); + return 0; } -void -get_sockaddr(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, unsigned *port, int check) +static inline int +sk_leave_group6(sock *s, ip_addr maddr) { - if (check && sa->sin_family != AF_INET) - bug("get_sockaddr called for wrong address family (%d)", sa->sin_family); - if (port) - *port = ntohs(sa->sin_port); - memcpy(a, &sa->sin_addr.s_addr, sizeof(*a)); - ipa_ntoh(*a); -} + struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); -#endif + if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0) + ERR("IPV6_LEAVE_GROUP"); + + return 0; +} -#ifdef IPV6 +/* + * IPv6 packet control messages + */ -/* PKTINFO handling is also standardized in IPv6 */ -#define CMSG_RX_SPACE (CMSG_SPACE(sizeof(struct in6_pktinfo)) + CMSG_SPACE(sizeof(int))) -#define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo)) +/* Also standardized, in RFC 3542 */ /* * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg @@ -741,60 +656,52 @@ get_sockaddr(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, unsigned *p #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT #endif -static char * -sysio_register_cmsgs(sock *s) -{ - int ok = 1; - if ((s->flags & SKF_LADDR_RX) && - (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)) - return "IPV6_RECVPKTINFO"; +#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo)) +#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int)) + +static inline int +sk_request_cmsg6_pktinfo(sock *s) +{ + int y = 1; - if ((s->flags & SKF_TTL_RX) && - (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0)) - return "IPV6_RECVHOPLIMIT"; + if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0) + ERR("IPV6_RECVPKTINFO"); - return NULL; + return 0; } -static void -sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) +static inline int +sk_request_cmsg6_ttl(sock *s) { - struct cmsghdr *cm; - struct in6_pktinfo *pi = NULL; - int *hlim = NULL; + int y = 1; - for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) - { - if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_PKTINFO) - pi = (struct in6_pktinfo *) CMSG_DATA(cm); + if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0) + ERR("IPV6_RECVHOPLIMIT"); - if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT) - hlim = (int *) CMSG_DATA(cm); - } + return 0; +} - if (s->flags & SKF_LADDR_RX) +static inline void +sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm) +{ + if (cm->cmsg_type == IPV6_PKTINFO) { - if (pi) - { - get_inaddr(&s->laddr, &pi->ipi6_addr); - s->lifindex = pi->ipi6_ifindex; - } - else - { - s->laddr = IPA_NONE; - s->lifindex = 0; - } + struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm); + s->laddr = ipa_from_in6(pi->ipi6_addr); + s->lifindex = pi->ipi6_ifindex; } +} - if (s->flags & SKF_TTL_RX) - s->ttl = hlim ? *hlim : -1; - - return; +static inline void +sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm) +{ + if (cm->cmsg_type == IPV6_HOPLIMIT) + s->rcv_ttl = * (int *) CMSG_DATA(cm); } -static void -sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) +static inline void +sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) { struct cmsghdr *cm; struct in6_pktinfo *pi; @@ -809,104 +716,147 @@ sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) pi = (struct in6_pktinfo *) CMSG_DATA(cm); pi->ipi6_ifindex = s->iface ? s->iface->index : 0; - set_inaddr(&pi->ipi6_addr, s->saddr); + pi->ipi6_addr = ipa_to_in6(s->saddr); msg->msg_controllen = cm->cmsg_len; } -#endif -static char * -sk_set_ttl_int(sock *s) +/* + * Miscellaneous socket syscalls + */ + +static inline int +sk_set_ttl4(sock *s, int ttl) { -#ifdef IPV6 - if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0) - return "IPV6_UNICAST_HOPS"; -#else - if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0) - return "IP_TTL"; -#endif - return NULL; + if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0) + ERR("IP_TTL"); + + return 0; } -#define ERR(x) do { err = x; goto bad; } while(0) -#define WARN(x) log(L_WARN "sk_setup: %s: %m", x) +static inline int +sk_set_ttl6(sock *s, int ttl) +{ + if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0) + ERR("IPV6_UNICAST_HOPS"); -static char * -sk_setup(sock *s) + return 0; +} + +static inline int +sk_set_tos4(sock *s, int tos) { - int one = 1; - int fd = s->fd; - char *err = NULL; + if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0) + ERR("IP_TOS"); - if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) - ERR("fcntl(O_NONBLOCK)"); - if (s->type == SK_UNIX) - return NULL; + return 0; +} - if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND)) - s->flags |= SKF_PKTINFO; +static inline int +sk_set_tos6(sock *s, int tos) +{ + if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0) + ERR("IPV6_TCLASS"); -#ifdef CONFIG_USE_HDRINCL - if ((s->type == SK_IP) && (s->flags & SKF_PKTINFO)) - { - s->flags &= ~SKF_PKTINFO; - s->flags |= SKF_HDRINCL; - if (setsockopt(fd, SOL_IP, IP_HDRINCL, &one, sizeof(one)) < 0) - ERR("IP_HDRINCL"); - } -#endif + return 0; +} - if (s->iface) - { -#ifdef SO_BINDTODEVICE - struct ifreq ifr; - strcpy(ifr.ifr_name, s->iface->name); - if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) - ERR("SO_BINDTODEVICE"); -#endif -#ifdef CONFIG_UNIX_DONTROUTE - if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) - ERR("SO_DONTROUTE"); -#endif - } +/* + * Public socket functions + */ - if ((s->ttl >= 0) && (err = sk_set_ttl_int(s))) - goto bad; +/** + * sk_setup_multicast - enable multicast for given socket + * @s: socket + * + * Prepare transmission of multicast packets for given datagram socket. + * The socket must have defined @iface. + * + * Result: 0 for success, -1 for an error. + */ - if (err = sysio_register_cmsgs(s)) - goto bad; +int +sk_setup_multicast(sock *s) +{ + ASSERT(s->iface); + if (sk_is_ipv4(s)) + return sk_setup_multicast4(s); + else + return sk_setup_multicast6(s); +} -#ifdef IPV6 - if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0) - WARN("IPV6_TCLASS"); -#else - if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0) - WARN("IP_TOS"); -#endif +/** + * sk_join_group - join multicast group for given socket + * @s: socket + * @maddr: multicast address + * + * Join multicast group for given datagram socket and associated interface. + * The socket must have defined @iface. + * + * Result: 0 for success, -1 for an error. + */ - if (s->priority >= 0) - sk_set_priority(s, s->priority); +int +sk_join_group(sock *s, ip_addr maddr) +{ + if (sk_is_ipv4(s)) + return sk_join_group4(s, maddr); + else + return sk_join_group6(s, maddr); +} -#ifdef IPV6 - if ((s->flags & SKF_V6ONLY) && setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) - WARN("IPV6_V6ONLY"); -#endif +/** + * sk_leave_group - leave multicast group for given socket + * @s: socket + * @maddr: multicast address + * + * Leave multicast group for given datagram socket and associated interface. + * The socket must have defined @iface. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_leave_group(sock *s, ip_addr maddr) +{ + if (sk_is_ipv4(s)) + return sk_leave_group4(s, maddr); + else + return sk_leave_group6(s, maddr); +} + +/** + * sk_setup_broadcast - enable broadcast for given socket + * @s: socket + * + * Allow reception and transmission of broadcast packets for given datagram + * socket. The socket must have defined @iface. For transmission, packets should + * be send to @brd address of @iface. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_setup_broadcast(sock *s) +{ + int y = 1; + + if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0) + ERR("SO_BROADCAST"); -bad: - return err; + return 0; } /** - * sk_set_ttl - set transmit TTL for given socket. + * sk_set_ttl - set transmit TTL for given socket * @s: socket * @ttl: TTL value * - * Set TTL for already opened connections when TTL was not set before. - * Useful for accepted connections when different ones should have - * different TTL. + * Set TTL for already opened connections when TTL was not set before. Useful + * for accepted connections when different ones should have different TTL. * * Result: 0 for success, -1 for an error. */ @@ -914,21 +864,21 @@ bad: int sk_set_ttl(sock *s, int ttl) { - char *err; - s->ttl = ttl; - if (err = sk_set_ttl_int(s)) - log(L_ERR "sk_set_ttl: %s: %m", err); - return (err ? -1 : 0); + if (sk_is_ipv4(s)) + return sk_set_ttl4(s, ttl); + else + return sk_set_ttl6(s, ttl); } /** - * sk_set_min_ttl - set minimal accepted TTL for given socket. + * sk_set_min_ttl - set minimal accepted TTL for given socket * @s: socket * @ttl: TTL value * - * Can be used in TTL security implementation + * Set minimal accepted TTL for given socket. Can be used for TTL security. + * implementations. * * Result: 0 for success, -1 for an error. */ @@ -936,28 +886,24 @@ sk_set_ttl(sock *s, int ttl) int sk_set_min_ttl(sock *s, int ttl) { - int err; -#ifdef IPV6 - err = sk_set_min_ttl6(s, ttl); -#else - err = sk_set_min_ttl4(s, ttl); -#endif - - return err; + if (sk_is_ipv4(s)) + return sk_set_min_ttl4(s, ttl); + else + return sk_set_min_ttl6(s, ttl); } +#if 0 /** - * sk_set_md5_auth - add / remove MD5 security association for given socket. + * sk_set_md5_auth - add / remove MD5 security association for given socket * @s: socket * @a: IP address of the other side * @ifa: Interface for link-local IP address * @passwd: password used for MD5 authentication * - * In TCP MD5 handling code in kernel, there is a set of pairs - * (address, password) used to choose password according to - * address of the other side. This function is useful for - * listening socket, for active sockets it is enough to set - * s->password field. + * In TCP MD5 handling code in kernel, there is a set of pairs (address, + * password) used to choose password according to address of the other side. + * This function is useful for listening socket, for active sockets it is enough + * to set s->password field. * * When called with passwd != NULL, the new pair is added, * When called with passwd == NULL, the existing pair is removed. @@ -967,41 +913,33 @@ sk_set_min_ttl(sock *s, int ttl) int sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd) -{ - sockaddr sa; - fill_in_sockaddr(&sa, a, ifa, 0); - return sk_set_md5_auth_int(s, &sa, passwd); -} - -int -sk_set_broadcast(sock *s, int enable) -{ - if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &enable, sizeof(enable)) < 0) - { - log(L_ERR "sk_set_broadcast: SO_BROADCAST: %m"); - return -1; - } - - return 0; -} - +{ DUMMY; } +#endif -#ifdef IPV6 +/** + * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket + * @s: socket + * @offset: offset + * + * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the + * kernel will automatically fill it for outgoing packets and check it for + * incoming packets. Should not be used on ICMPv6 sockets, where the position is + * known to the kernel. + * + * Result: 0 for success, -1 for an error. + */ int sk_set_ipv6_checksum(sock *s, int offset) { if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) - { - log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m"); - return -1; - } + ERR("IPV6_CHECKSUM"); return 0; } int -sk_set_icmp_filter(sock *s, int p1, int p2) +sk_set_icmp6_filter(sock *s, int p1, int p2) { /* a bit of lame interface, but it is here only for Radv */ struct icmp6_filter f; @@ -1011,132 +949,291 @@ sk_set_icmp_filter(sock *s, int p1, int p2) ICMP6_FILTER_SETPASS(p2, &f); if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) - { - log(L_ERR "sk_setup_icmp_filter: ICMP6_FILTER: %m"); - return -1; - } + ERR("ICMP6_FILTER"); return 0; } -int -sk_setup_multicast(sock *s) +void +sk_log_error(sock *s, const char *p) { - char *err; - int zero = 0; - int index; + log(L_ERR "%s: Socket error: %s%#m", p, s->err); +} - ASSERT(s->iface); - index = s->iface->index; - if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0) - ERR("IPV6_MULTICAST_HOPS"); - if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0) - ERR("IPV6_MULTICAST_LOOP"); - if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) - ERR("IPV6_MULTICAST_IF"); +/* + * Actual struct birdsock code + */ - return 0; +static list sock_list; +static struct birdsock *current_sock; +static struct birdsock *stored_sock; +static int sock_recalc_fdsets_p; -bad: - log(L_ERR "sk_setup_multicast: %s: %m", err); - return -1; +static inline sock * +sk_next(sock *s) +{ + if (!s->n.next->next) + return NULL; + else + return SKIP_BACK(sock, n, s->n.next); } -#ifdef CONFIG_IPV6_GLIBC_20 -#define ipv6mr_interface ipv6mr_ifindex -#endif - -int -sk_join_group(sock *s, ip_addr maddr) +static void +sk_alloc_bufs(sock *s) { - struct ipv6_mreq mreq; + if (!s->rbuf && s->rbsize) + s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize); + s->rpos = s->rbuf; + if (!s->tbuf && s->tbsize) + s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize); + s->tpos = s->ttx = s->tbuf; +} - set_inaddr(&mreq.ipv6mr_multiaddr, maddr); - mreq.ipv6mr_interface = s->iface->index; +static void +sk_free_bufs(sock *s) +{ + if (s->rbuf_alloc) + { + xfree(s->rbuf_alloc); + s->rbuf = s->rbuf_alloc = NULL; + } + if (s->tbuf_alloc) + { + xfree(s->tbuf_alloc); + s->tbuf = s->tbuf_alloc = NULL; + } +} - if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0) - { - log(L_ERR "sk_join_group: IPV6_JOIN_GROUP: %m"); - return -1; - } +static void +sk_free(resource *r) +{ + sock *s = (sock *) r; - return 0; + sk_free_bufs(s); + if (s->fd >= 0) + { + close(s->fd); + + /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ + if (s->flags & SKF_THREAD) + return; + + if (s == current_sock) + current_sock = sk_next(s); + if (s == stored_sock) + stored_sock = sk_next(s); + rem_node(&s->n); + sock_recalc_fdsets_p = 1; + } } -int -sk_leave_group(sock *s, ip_addr maddr) +void +sk_set_rbsize(sock *s, uint val) { - struct ipv6_mreq mreq; - - set_inaddr(&mreq.ipv6mr_multiaddr, maddr); - mreq.ipv6mr_interface = s->iface->index; + ASSERT(s->rbuf_alloc == s->rbuf); - if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0) - { - log(L_ERR "sk_leave_group: IPV6_LEAVE_GROUP: %m"); - return -1; - } + if (s->rbsize == val) + return; - return 0; + s->rbsize = val; + xfree(s->rbuf_alloc); + s->rbuf_alloc = xmalloc(val); + s->rpos = s->rbuf = s->rbuf_alloc; } -#else /* IPV4 */ - -int -sk_setup_multicast(sock *s) +void +sk_set_tbsize(sock *s, uint val) { - char *err; + ASSERT(s->tbuf_alloc == s->tbuf); - ASSERT(s->iface); + if (s->tbsize == val) + return; - if (err = sysio_setup_multicast(s)) - { - log(L_ERR "sk_setup_multicast: %s: %m", err); - return -1; - } + byte *old_tbuf = s->tbuf; - return 0; + s->tbsize = val; + s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val); + s->tpos = s->tbuf + (s->tpos - old_tbuf); + s->ttx = s->tbuf + (s->ttx - old_tbuf); } -int -sk_join_group(sock *s, ip_addr maddr) +void +sk_set_tbuf(sock *s, void *tbuf) { - char *err; + s->tbuf = tbuf ?: s->tbuf_alloc; + s->ttx = s->tpos = s->tbuf; +} - if (err = sysio_join_group(s, maddr)) - { - log(L_ERR "sk_join_group: %s: %m", err); - return -1; - } +void +sk_reallocate(sock *s) +{ + sk_free_bufs(s); + sk_alloc_bufs(s); +} - return 0; +static void +sk_dump(resource *r) +{ + sock *s = (sock *) r; + static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" }; + + debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n", + sk_type_names[s->type], + s->data, + s->saddr, + s->sport, + s->daddr, + s->dport, + s->tos, + s->ttl, + s->iface ? s->iface->name : "none"); } -int -sk_leave_group(sock *s, ip_addr maddr) +static struct resclass sk_class = { + "Socket", + sizeof(sock), + sk_free, + sk_dump, + NULL, + NULL +}; + +/** + * sk_new - create a socket + * @p: pool + * + * This function creates a new socket resource. If you want to use it, + * you need to fill in all the required fields of the structure and + * call sk_open() to do the actual opening of the socket. + * + * The real function name is sock_new(), sk_new() is a macro wrapper + * to avoid collision with OpenSSL. + */ +sock * +sock_new(pool *p) +{ + sock *s = ralloc(p, &sk_class); + s->pool = p; + // s->saddr = s->daddr = IPA_NONE; + s->tos = s->priority = s->ttl = -1; + s->fd = -1; + return s; +} + +static int +sk_setup(sock *s) { - char *err; + int y = 1; + int fd = s->fd; - if (err = sysio_leave_group(s, maddr)) - { - log(L_ERR "sk_leave_group: %s: %m", err); + if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) + ERR("O_NONBLOCK"); + + if (!s->af) + return 0; + + if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND)) + s->flags |= SKF_PKTINFO; + +#ifdef CONFIG_USE_HDRINCL + if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO)) + { + s->flags &= ~SKF_PKTINFO; + s->flags |= SKF_HDRINCL; + if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0) + ERR("IP_HDRINCL"); + } +#endif + + if (s->iface) + { +#ifdef SO_BINDTODEVICE + struct ifreq ifr; + strcpy(ifr.ifr_name, s->iface->name); + if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) + ERR("SO_BINDTODEVICE"); +#endif + +#ifdef CONFIG_UNIX_DONTROUTE + if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0) + ERR("SO_DONTROUTE"); +#endif + } + + if (s->priority >= 0) + if (sk_set_priority(s, s->priority) < 0) return -1; - } + + if (sk_is_ipv4(s)) + { + if (s->flags & SKF_LADDR_RX) + if (sk_request_cmsg4_pktinfo(s) < 0) + return -1; + + if (s->flags & SKF_TTL_RX) + if (sk_request_cmsg4_ttl(s) < 0) + return -1; + + if ((s->type == SK_UDP) || (s->type == SK_IP)) + if (sk_disable_mtu_disc4(s) < 0) + return -1; + + if (s->ttl >= 0) + if (sk_set_ttl4(s, s->ttl) < 0) + return -1; + + if (s->tos >= 0) + if (sk_set_tos4(s, s->tos) < 0) + return -1; + } + + if (sk_is_ipv6(s)) + { + if (s->flags & SKF_V6ONLY) + if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0) + ERR("IPV6_V6ONLY"); + + if (s->flags & SKF_LADDR_RX) + if (sk_request_cmsg6_pktinfo(s) < 0) + return -1; + + if (s->flags & SKF_TTL_RX) + if (sk_request_cmsg6_ttl(s) < 0) + return -1; + + if ((s->type == SK_UDP) || (s->type == SK_IP)) + if (sk_disable_mtu_disc6(s) < 0) + return -1; + + if (s->ttl >= 0) + if (sk_set_ttl6(s, s->ttl) < 0) + return -1; + + if (s->tos >= 0) + if (sk_set_tos6(s, s->tos) < 0) + return -1; + } return 0; } -#endif - +static void +sk_insert(sock *s) +{ + add_tail(&sock_list, &s->n); + sock_recalc_fdsets_p = 1; +} static void sk_tcp_connected(sock *s) { - sockaddr lsa; - int lsa_len = sizeof(lsa); - if (getsockname(s->fd, (struct sockaddr *) &lsa, &lsa_len) == 0) - get_sockaddr(&lsa, &s->saddr, &s->iface, &s->sport, 1); + sockaddr sa; + int sa_len = sizeof(sa); + + if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) || + (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0)) + log(L_WARN "SOCK: Cannot get local IP address for TCP>"); s->type = SK_TCP; sk_alloc_bufs(s); @@ -1144,44 +1241,55 @@ sk_tcp_connected(sock *s) } static int -sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type) +sk_passive_connected(sock *s, int type) { - int fd = accept(s->fd, sa, &al); - if (fd >= 0) - { - sock *t = sk_new(s->pool); - char *err; - t->type = type; - t->fd = fd; - t->ttl = s->ttl; - t->tos = s->tos; - t->rbsize = s->rbsize; - t->tbsize = s->tbsize; - if (type == SK_TCP) - { - sockaddr lsa; - int lsa_len = sizeof(lsa); - if (getsockname(fd, (struct sockaddr *) &lsa, &lsa_len) == 0) - get_sockaddr(&lsa, &t->saddr, &t->iface, &t->sport, 1); + sockaddr loc_sa, rem_sa; + int loc_sa_len = sizeof(loc_sa); + int rem_sa_len = sizeof(rem_sa); - get_sockaddr((sockaddr *) sa, &t->daddr, &t->iface, &t->dport, 1); - } - sk_insert(t); - if (err = sk_setup(t)) - { - log(L_ERR "Incoming connection: %s: %m", err); - rfree(t); - return 1; - } - sk_alloc_bufs(t); - s->rx_hook(t, 0); - return 1; - } - else if (errno != EINTR && errno != EAGAIN) - { + int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len); + if (fd < 0) + { + if ((errno != EINTR) && (errno != EAGAIN)) s->err_hook(s, errno); - } - return 0; + return 0; + } + + sock *t = sk_new(s->pool); + t->type = type; + t->fd = fd; + t->af = s->af; + t->ttl = s->ttl; + t->tos = s->tos; + t->rbsize = s->rbsize; + t->tbsize = s->tbsize; + + if (type == SK_TCP) + { + if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) || + (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0)) + log(L_WARN "SOCK: Cannot get local IP address for TCP<"); + + if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0) + log(L_WARN "SOCK: Cannot get remote IP address for TCP<"); + } + + if (sk_setup(t) < 0) + { + /* FIXME: Call err_hook instead ? */ + log(L_ERR "SOCK: Incoming connection: %s%#m", t->err); + + /* FIXME: handle it better in rfree() */ + close(t->fd); + t->fd = -1; + rfree(t); + return 1; + } + + sk_insert(t); + sk_alloc_bufs(t); + s->rx_hook(t, 0); + return 1; } /** @@ -1197,161 +1305,185 @@ sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type) int sk_open(sock *s) { - int fd; - int one = 1; + int af = BIRD_AF; + int fd = -1; int do_bind = 0; int bind_port = 0; ip_addr bind_addr = IPA_NONE; sockaddr sa; - char *err; switch (s->type) - { - case SK_TCP_ACTIVE: - s->ttx = ""; /* Force s->ttx != s->tpos */ - /* Fall thru */ - case SK_TCP_PASSIVE: - fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP); - bind_port = s->sport; - bind_addr = s->saddr; - do_bind = bind_port || ipa_nonzero(bind_addr); - break; - - case SK_UDP: - fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP); - bind_port = s->sport; - bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; - do_bind = 1; - break; - - case SK_IP: - fd = socket(BIRD_PF, SOCK_RAW, s->dport); - bind_port = 0; - bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; - do_bind = ipa_nonzero(bind_addr); - break; - - case SK_MAGIC: - fd = s->fd; - break; - - default: - bug("sk_open() called for invalid sock type %d", s->type); - } + { + case SK_TCP_ACTIVE: + s->ttx = ""; /* Force s->ttx != s->tpos */ + /* Fall thru */ + case SK_TCP_PASSIVE: + fd = socket(af, SOCK_STREAM, IPPROTO_TCP); + bind_port = s->sport; + bind_addr = s->saddr; + do_bind = bind_port || ipa_nonzero(bind_addr); + break; + + case SK_UDP: + fd = socket(af, SOCK_DGRAM, IPPROTO_UDP); + bind_port = s->sport; + bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; + do_bind = 1; + break; + + case SK_IP: + fd = socket(af, SOCK_RAW, s->dport); + bind_port = 0; + bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; + do_bind = ipa_nonzero(bind_addr); + break; + + case SK_MAGIC: + af = 0; + fd = s->fd; + break; + + default: + bug("sk_open() called for invalid sock type %d", s->type); + } + if (fd < 0) - die("sk_open: socket: %m"); + ERR("socket"); + + s->af = af; s->fd = fd; - if (err = sk_setup(s)) - goto bad; + if (sk_setup(s) < 0) + goto err; if (do_bind) + { + if (bind_port) { - if (bind_port) - { - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) - ERR("SO_REUSEADDR"); + int y = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0) + ERR2("SO_REUSEADDR"); #ifdef CONFIG_NO_IFACE_BIND - /* Workaround missing ability to bind to an iface */ - if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr)) - { - if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0) - ERR("SO_REUSEPORT"); - } + /* Workaround missing ability to bind to an iface */ + if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr)) + { + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0) + ERR2("SO_REUSEPORT"); + } #endif - } - - fill_in_sockaddr(&sa, bind_addr, s->iface, bind_port); - if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) - ERR("bind"); } - fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport); + sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port); + if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) + ERR2("bind"); + } if (s->password) - { - int rv = sk_set_md5_auth_int(s, &sa, s->password); - if (rv < 0) - goto bad_no_log; - } + if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0) + goto err; switch (s->type) - { - case SK_TCP_ACTIVE: - if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0) - sk_tcp_connected(s); - else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && - errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH) - ERR("connect"); - break; - case SK_TCP_PASSIVE: - if (listen(fd, 8)) - ERR("listen"); - break; - case SK_MAGIC: - break; - default: - sk_alloc_bufs(s); -#ifdef IPV6 -#ifdef IPV6_MTU_DISCOVER - { - int dont = IPV6_PMTUDISC_DONT; - if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0) - ERR("IPV6_MTU_DISCOVER"); - } -#endif -#else -#ifdef IP_PMTUDISC - { - int dont = IP_PMTUDISC_DONT; - if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0) - ERR("IP_PMTUDISC"); - } -#endif -#endif - } + { + case SK_TCP_ACTIVE: + sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport); + if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0) + sk_tcp_connected(s); + else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && + errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH) + ERR2("connect"); + break; + + case SK_TCP_PASSIVE: + if (listen(fd, 8) < 0) + ERR2("listen"); + break; + + case SK_MAGIC: + break; + + default: + sk_alloc_bufs(s); + } if (!(s->flags & SKF_THREAD)) sk_insert(s); return 0; -bad: - log(L_ERR "sk_open: %s: %m", err); -bad_no_log: +err: close(fd); s->fd = -1; return -1; } -void +int sk_open_unix(sock *s, char *name) { - int fd; struct sockaddr_un sa; - char *err; + int fd; + + /* We are sloppy during error (leak fd and not set s->err), but we die anyway */ fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd < 0) - ERR("socket"); - s->fd = fd; - if (err = sk_setup(s)) - goto bad; - unlink(name); + return -1; + + if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) + return -1; /* Path length checked in test_old_bird() */ sa.sun_family = AF_UNIX; strcpy(sa.sun_path, name); + if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0) - ERR("bind"); - if (listen(fd, 8)) - ERR("listen"); + return -1; + + if (listen(fd, 8) < 0) + return -1; + + s->fd = fd; sk_insert(s); - return; + return 0; +} + + +#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \ + CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL) +#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO) + +static void +sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) +{ + if (sk_is_ipv4(s)) + sk_prepare_cmsgs4(s, msg, cbuf, cbuflen); + else + sk_prepare_cmsgs6(s, msg, cbuf, cbuflen); +} + +static void +sk_process_cmsgs(sock *s, struct msghdr *msg) +{ + struct cmsghdr *cm; + + s->laddr = IPA_NONE; + s->lifindex = 0; + s->rcv_ttl = -1; - bad: - log(L_ERR "sk_open_unix: %s: %m", err); - die("Unable to create control socket %s", name); + for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) + { + if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s)) + { + sk_process_cmsg4_pktinfo(s, cm); + sk_process_cmsg4_ttl(s, cm); + } + + if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s)) + { + sk_process_cmsg6_pktinfo(s, cm); + sk_process_cmsg6_ttl(s, cm); + } + } } @@ -1362,11 +1494,11 @@ sk_sendmsg(sock *s) byte cmsg_buf[CMSG_TX_SPACE]; sockaddr dst; - fill_in_sockaddr(&dst, s->daddr, s->iface, s->dport); + sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport); struct msghdr msg = { - .msg_name = &dst, - .msg_namelen = sizeof(dst), + .msg_name = &dst.sa, + .msg_namelen = SA_LEN(dst), .msg_iov = &iov, .msg_iovlen = 1 }; @@ -1377,14 +1509,14 @@ sk_sendmsg(sock *s) if (s->flags & SKF_HDRINCL) { - fill_ip_header(s, hdr, iov.iov_len); + sk_prepare_ip_header(s, hdr, iov.iov_len); msg.msg_iov = iov2; msg.msg_iovlen = 2; } #endif if (s->flags & SKF_PKTINFO) - sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); + sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); return sendmsg(s->fd, &msg, 0); } @@ -1397,8 +1529,8 @@ sk_recvmsg(sock *s) sockaddr src; struct msghdr msg = { - .msg_name = &src, - .msg_namelen = sizeof(src), + .msg_name = &src.sa, + .msg_namelen = sizeof(src), // XXXX ?? .msg_iov = &iov, .msg_iovlen = 1, .msg_control = cmsg_buf, @@ -1415,8 +1547,8 @@ sk_recvmsg(sock *s) // rv = ipv4_skip_header(pbuf, rv); //endif - get_sockaddr(&src, &s->faddr, NULL, &s->fport, 1); - sysio_process_rx_cmsgs(s, &msg); + sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport); + sk_process_cmsgs(s, &msg); if (msg.msg_flags & MSG_TRUNC) s->flags |= SKF_TRUNCATED; @@ -1435,55 +1567,57 @@ sk_maybe_write(sock *s) int e; switch (s->type) + { + case SK_TCP: + case SK_MAGIC: + case SK_UNIX: + while (s->ttx != s->tpos) { - case SK_TCP: - case SK_MAGIC: - case SK_UNIX: - while (s->ttx != s->tpos) + e = write(s->fd, s->ttx, s->tpos - s->ttx); + + if (e < 0) + { + if (errno != EINTR && errno != EAGAIN) { - e = write(s->fd, s->ttx, s->tpos - s->ttx); - if (e < 0) - { - if (errno != EINTR && errno != EAGAIN) - { - reset_tx_buffer(s); - /* EPIPE is just a connection close notification during TX */ - s->err_hook(s, (errno != EPIPE) ? errno : 0); - return -1; - } - return 0; - } - s->ttx += e; + reset_tx_buffer(s); + /* EPIPE is just a connection close notification during TX */ + s->err_hook(s, (errno != EPIPE) ? errno : 0); + return -1; } - reset_tx_buffer(s); - return 1; - case SK_UDP: - case SK_IP: - { - if (s->tbuf == s->tpos) - return 1; - - e = sk_sendmsg(s); - - if (e < 0) - { - if (errno != EINTR && errno != EAGAIN) - { - reset_tx_buffer(s); - s->err_hook(s, errno); - return -1; - } - - if (!s->tx_hook) - reset_tx_buffer(s); - return 0; - } - reset_tx_buffer(s); + return 0; + } + s->ttx += e; + } + reset_tx_buffer(s); + return 1; + + case SK_UDP: + case SK_IP: + { + if (s->tbuf == s->tpos) return 1; + + e = sk_sendmsg(s); + + if (e < 0) + { + if (errno != EINTR && errno != EAGAIN) + { + reset_tx_buffer(s); + s->err_hook(s, errno); + return -1; + } + + if (!s->tx_hook) + reset_tx_buffer(s); + return 0; } - default: - bug("sk_maybe_write: unknown socket type %d", s->type); + reset_tx_buffer(s); + return 1; } + default: + bug("sk_maybe_write: unknown socket type %d", s->type); + } } int @@ -1573,88 +1707,86 @@ int sk_read(sock *s) { switch (s->type) + { + case SK_TCP_PASSIVE: + return sk_passive_connected(s, SK_TCP); + + case SK_UNIX_PASSIVE: + return sk_passive_connected(s, SK_UNIX); + + case SK_TCP: + case SK_UNIX: { - case SK_TCP_PASSIVE: - { - sockaddr sa; - return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP); - } - case SK_UNIX_PASSIVE: + int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos); + + if (c < 0) { - struct sockaddr_un sa; - return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX); + if (errno != EINTR && errno != EAGAIN) + s->err_hook(s, errno); } - case SK_TCP: - case SK_UNIX: + else if (!c) + s->err_hook(s, 0); + else { - int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos); - - if (c < 0) - { - if (errno != EINTR && errno != EAGAIN) - s->err_hook(s, errno); - } - else if (!c) - s->err_hook(s, 0); - else - { - s->rpos += c; - if (s->rx_hook(s, s->rpos - s->rbuf)) - { - /* We need to be careful since the socket could have been deleted by the hook */ - if (current_sock == s) - s->rpos = s->rbuf; - } - return 1; - } - return 0; + s->rpos += c; + if (s->rx_hook(s, s->rpos - s->rbuf)) + { + /* We need to be careful since the socket could have been deleted by the hook */ + if (current_sock == s) + s->rpos = s->rbuf; + } + return 1; } - case SK_MAGIC: - return s->rx_hook(s, 0); - default: - { - int e; + return 0; + } - e = sk_recvmsg(s); + case SK_MAGIC: + return s->rx_hook(s, 0); - if (e < 0) - { - if (errno != EINTR && errno != EAGAIN) - s->err_hook(s, errno); - return 0; - } + default: + { + int e = sk_recvmsg(s); - s->rpos = s->rbuf + e; - s->rx_hook(s, e); - return 1; + if (e < 0) + { + if (errno != EINTR && errno != EAGAIN) + s->err_hook(s, errno); + return 0; } + + s->rpos = s->rbuf + e; + s->rx_hook(s, e); + return 1; } + } } int sk_write(sock *s) { switch (s->type) + { + case SK_TCP_ACTIVE: { - case SK_TCP_ACTIVE: - { - sockaddr sa; - fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport); - if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN) - sk_tcp_connected(s); - else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS) - s->err_hook(s, errno); - return 0; - } - default: - if (s->ttx != s->tpos && sk_maybe_write(s) > 0) - { - if (s->tx_hook) - s->tx_hook(s); - return 1; - } + sockaddr sa; + sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport); + + if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN) + sk_tcp_connected(s); + else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS) + s->err_hook(s, errno); return 0; } + + default: + if (s->ttx != s->tpos && sk_maybe_write(s) > 0) + { + if (s->tx_hook) + s->tx_hook(s); + return 1; + } + return 0; + } } void @@ -1665,16 +1797,14 @@ sk_dump_all(void) debug("Open sockets:\n"); WALK_LIST(n, sock_list) - { - s = SKIP_BACK(sock, n, n); - debug("%p ", s); - sk_dump(&s->r); - } + { + s = SKIP_BACK(sock, n, n); + debug("%p ", s); + sk_dump(&s->r); + } debug("\n"); } -#undef ERR -#undef WARN /* * Main I/O Loop diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 31094c52..61b306dc 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -463,7 +463,12 @@ cli_init_unix(uid_t use_uid, gid_t use_gid) s->type = SK_UNIX_PASSIVE; s->rx_hook = cli_connect; s->rbsize = 1024; - sk_open_unix(s, path_control_socket); + + /* Return value intentionally ignored */ + unlink(path_control_socket); + + if (sk_open_unix(s, path_control_socket) < 0) + die("Cannot create control socket %s: %m", path_control_socket); if (use_uid || use_gid) if (chown(path_control_socket, use_uid, use_gid) < 0) diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h index 346adcf2..518713bc 100644 --- a/sysdep/unix/unix.h +++ b/sysdep/unix/unix.h @@ -12,6 +12,8 @@ #include <sys/socket.h> struct pool; +struct iface; +struct birdsock; /* main.c */ @@ -27,36 +29,81 @@ void cmd_shutdown(void); #define UNIX_DEFAULT_CONFIGURE_TIMEOUT 300 + /* io.c */ -volatile int async_config_flag; -volatile int async_dump_flag; -volatile int async_shutdown_flag; +#define ERR(c) do { s->err = c; return -1; } while (0) +#define ERR2(c) do { s->err = c; goto err; } while (0) +#define ERR_MSG(c) do { errno = 0; s->err = c; return -1; } while (0) + + +#define SOCKADDR_SIZE 32 + +typedef struct sockaddr_bird { + struct sockaddr sa; + char padding[SOCKADDR_SIZE - sizeof(struct sockaddr)]; +} sockaddr; + #ifdef IPV6 -#define BIRD_PF PF_INET6 #define BIRD_AF AF_INET6 -typedef struct sockaddr_in6 sockaddr; -static inline int sa_family_check(sockaddr *sa) { return sa->sin6_family == AF_INET6; } +#define _MI6(x1,x2,x3,x4) _MI(x1, x2, x3, x4) +#define ipa_is_link_local(x) ipa_has_link_scope(x) +#define ipa_from_sa(x) ipa_from_sa6(x) +#define ipa_from_u32(x) _MI6(0,0,0xffff,x) +#define ipa_to_u32(x) _I3(x) #else -#define BIRD_PF PF_INET #define BIRD_AF AF_INET -typedef struct sockaddr_in sockaddr; -static inline int sa_family_check(sockaddr *sa) { return sa->sin_family == AF_INET; } +#define _I0(X) 0 +#define _I1(X) 0 +#define _I2(X) 0 +#define _I3(X) 0 +#define _MI6(x1,x2,x3,x4) IPA_NONE +#define ipa_is_link_local(x) 0 +#define ipa_from_sa(x) ipa_from_sa4(x) #endif + +/* This is sloppy hack, it should be detected by configure script */ +/* Linux systems have it defined so this is definition for BSD systems */ +#ifndef s6_addr32 +#define s6_addr32 __u6_addr.__u6_addr32 +#endif + + +static inline ip_addr ipa_from_in4(struct in_addr a) +{ return ipa_from_u32(ntohl(a.s_addr)); } + +static inline ip_addr ipa_from_in6(struct in6_addr a) +{ return _MI6(ntohl(a.s6_addr32[0]), ntohl(a.s6_addr32[1]), ntohl(a.s6_addr32[2]), ntohl(a.s6_addr32[3])); } + +static inline ip_addr ipa_from_sa4(sockaddr *sa) +{ return ipa_from_in4(((struct sockaddr_in *) sa)->sin_addr); } + +static inline ip_addr ipa_from_sa6(sockaddr *sa) +{ return ipa_from_in6(((struct sockaddr_in6 *) sa)->sin6_addr); } + +static inline struct in_addr ipa_to_in4(ip_addr a) +{ return (struct in_addr) { htonl(ipa_to_u32(a)) }; } + +static inline struct in6_addr ipa_to_in6(ip_addr a) +{ return (struct in6_addr) { .s6_addr32 = { htonl(_I0(a)), htonl(_I1(a)), htonl(_I2(a)), htonl(_I3(a)) } }; } + +void sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port); +int sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port); + + #ifndef SUN_LEN #define SUN_LEN(ptr) ((size_t) (((struct sockaddr_un *) 0)->sun_path) + strlen ((ptr)->sun_path)) #endif -struct birdsock; -struct iface; +volatile int async_config_flag; +volatile int async_dump_flag; +volatile int async_shutdown_flag; void io_init(void); void io_loop(void); -void fill_in_sockaddr(sockaddr *sa, ip_addr a, struct iface *ifa, unsigned port); -void get_sockaddr(sockaddr *sa, ip_addr *a, struct iface **ifa, unsigned *port, int check); -void sk_open_unix(struct birdsock *s, char *name); +int sk_open_unix(struct birdsock *s, char *name); void *tracked_fopen(struct pool *, char *name, char *mode); void test_old_bird(char *path); |