diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2017-08-11 23:28:44 +0200 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2017-08-23 09:05:19 -0600 |
commit | bbf116c0cde4b2ea27c38dbcc9ab296588c8815f (patch) | |
tree | b9300b345de4b09fd317603f341f476d50de53d9 | |
parent | c81d33b0bbdfb7cdcf5ef37a1e1264e6e8c11b3b (diff) |
socket: improve reply-to-src algorithm
We store the destination IP of incoming packets as the source IP of
outgoing packets. When we send outgoing packets, we then ask the routing
table for which interface to use and which source address, given our
inputs of the destination address and a suggested source address. This
all is good and fine, since it means we'll successfully reply using the
correct source address, correlating with the destination address for
incoming packets. However, what happens when default routes change? Or
when interface IP addresses change?
Prior to this commit, after getting the response from the routing table
of the source address, destination address, and interface, we would then
make sure that the source address actually belonged to the outbound
interface. If it didn't, we'd reset our source address to zero and
re-ask the routing table, in which case the routing table would then
give us the default IP address for sending that packet. This worked
mostly fine for most purposes, but there was a problem: what if
WireGuard legitimately accepted an inbound packet on a default interface
using an IP of another interface? In this case, falling back to asking
for the default source IP was not a good strategy, since it'd nearly
always mean we'd fail to reply using the right source.
So, this commit changes the algorithm slightly. Rather than falling back
to using the default IP if the preferred source IP doesn't belong to the
outbound interface, we have two checks: we make sure that the source IP
address belongs to _some_ interface on the system, no matter which one
(so long as it's within the network namespace), and we check whether or
not the interface of an incoming packet matches the returned interface
for the outbound traffic. If both these conditions are true, then we
proceed with using this source IP address. If not, we fall back to the
default IP address.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-rw-r--r-- | src/compat/compat.h | 47 | ||||
-rw-r--r-- | src/peer.h | 5 | ||||
-rw-r--r-- | src/socket.c | 14 | ||||
-rwxr-xr-x | src/tests/netns.sh | 40 | ||||
-rw-r--r-- | src/tests/qemu/kernel.config | 1 |
5 files changed, 102 insertions, 5 deletions
diff --git a/src/compat/compat.h b/src/compat/compat.h index a373c83..451078f 100644 --- a/src/compat/compat.h +++ b/src/compat/compat.h @@ -305,7 +305,52 @@ static inline u64 ktime_get_ns(void) #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) #include <linux/inetdevice.h> -#define inet_confirm_addr(a,b,c,d,e) inet_confirm_addr(b,c,d,e) +static inline __be32 our_confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope) +{ + int same = 0; + __be32 addr = 0; + for_ifa(in_dev) { + if (!addr && (local == ifa->ifa_local || !local) && ifa->ifa_scope <= scope) { + addr = ifa->ifa_local; + if (same) + break; + } + if (!same) { + same = (!local || inet_ifa_match(local, ifa)) && (!dst || inet_ifa_match(dst, ifa)); + if (same && addr) { + if (local || !dst) + break; + if (inet_ifa_match(addr, ifa)) + break; + if (ifa->ifa_scope <= scope) { + addr = ifa->ifa_local; + break; + } + same = 0; + } + } + } endfor_ifa(in_dev); + return same ? addr : 0; +} +static inline __be32 our_inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope) +{ + __be32 addr = 0; + struct net_device *dev; + if (in_dev) + return our_confirm_addr_indev(in_dev, dst, local, scope); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + in_dev = __in_dev_get_rcu(dev); + if (in_dev) { + addr = our_confirm_addr_indev(in_dev, dst, local, scope); + if (addr) + break; + } + } + rcu_read_unlock(); + return addr; +} +#define inet_confirm_addr our_inet_confirm_addr #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) @@ -21,7 +21,10 @@ struct endpoint { struct sockaddr_in6 addr6; }; union { - struct in_addr src4; + struct { + struct in_addr src4; + int src_if4; /* Essentially the same as addr6->scope_id */ + }; struct in6_addr src6; }; }; diff --git a/src/socket.c b/src/socket.c index cdb0f47..1dff57a 100644 --- a/src/socket.c +++ b/src/socket.c @@ -44,9 +44,14 @@ static inline int send4(struct wireguard_device *wg, struct sk_buff *skb, struct if (!rt) { security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); + if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, fl.saddr, RT_SCOPE_HOST))) { + endpoint->src4.s_addr = endpoint->src_if4 = fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + } rt = ip_route_output_flow(sock_net(sock), &fl, sock); - if (unlikely(endpoint->src4.s_addr && ((IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && !inet_confirm_addr(sock_net(sock), rcu_dereference_bh(rt->dst.dev->ip_ptr), 0, fl.saddr, RT_SCOPE_HOST))))) { - endpoint->src4.s_addr = fl.saddr = 0; + if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && rt->dst.dev->ifindex != endpoint->src_if4)))) { + endpoint->src4.s_addr = endpoint->src_if4 = fl.saddr = 0; if (cache) dst_cache_reset(cache); if (!IS_ERR(rt)) @@ -204,6 +209,7 @@ int socket_endpoint_from_skb(struct endpoint *endpoint, struct sk_buff *skb) endpoint->addr4.sin_port = udp_hdr(skb)->source; endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; endpoint->src4.s_addr = ip_hdr(skb)->daddr; + endpoint->src_if4 = skb->skb_iif; } else if (skb->protocol == htons(ETH_P_IPV6)) { endpoint->addr6.sin6_family = AF_INET6; endpoint->addr6.sin6_port = udp_hdr(skb)->source; @@ -223,12 +229,14 @@ void socket_set_peer_endpoint(struct wireguard_peer *peer, struct endpoint *endp if (likely(peer->endpoint.addr4.sin_family == AF_INET && peer->endpoint.addr4.sin_port == endpoint->addr4.sin_port && peer->endpoint.addr4.sin_addr.s_addr == endpoint->addr4.sin_addr.s_addr && - peer->endpoint.src4.s_addr == endpoint->src4.s_addr)) + peer->endpoint.src4.s_addr == endpoint->src4.s_addr && + peer->endpoint.src_if4 == endpoint->src_if4)) goto out; read_unlock_bh(&peer->endpoint_lock); write_lock_bh(&peer->endpoint_lock); peer->endpoint.addr4 = endpoint->addr4; peer->endpoint.src4 = endpoint->src4; + peer->endpoint.src_if4 = endpoint->src_if4; } else if (endpoint->addr.sa_family == AF_INET6) { read_lock_bh(&peer->endpoint_lock); if (likely(peer->endpoint.addr6.sin6_family == AF_INET6 && diff --git a/src/tests/netns.sh b/src/tests/netns.sh index 6a58b37..ea70fb5 100755 --- a/src/tests/netns.sh +++ b/src/tests/netns.sh @@ -322,6 +322,46 @@ n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1 n2 ping -W 1 -c 1 192.168.241.1 [[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]] +# What happens if the inbound destination address belongs to a different interface as the default route? +ip1 link add dummy0 type dummy +ip1 addr add 10.50.0.1/24 dev dummy0 +ip1 link set dummy0 up +ip2 route add 10.50.0.0/24 dev veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]] + +ip1 link del dummy0 +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 route flush dev veth1 +ip2 route flush dev veth2 + +# Now we see what happens if another interface route takes precedence over an ongoing one +ip1 link add veth3 type veth peer name veth4 +ip1 link set veth4 netns $netns2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip1 addr add 10.0.0.3/24 dev veth3 +ip1 link set veth1 up +ip2 link set veth2 up +ip1 link set veth3 up +ip2 link set veth4 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +waitiface $netns1 veth3 +waitiface $netns2 veth4 +ip1 route flush dev veth1 +ip1 route flush dev veth3 +ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] + ip1 link del veth1 +ip1 link del veth3 ip1 link del wg0 ip2 link del wg0 diff --git a/src/tests/qemu/kernel.config b/src/tests/qemu/kernel.config index 5469448..84a9875 100644 --- a/src/tests/qemu/kernel.config +++ b/src/tests/qemu/kernel.config @@ -2,6 +2,7 @@ CONFIG_NET=y CONFIG_NETDEVICES=y CONFIG_NET_CORE=y CONFIG_NET_IPIP=y +CONFIG_DUMMY=y CONFIG_VETH=y CONFIG_MULTIUSER=y CONFIG_NAMESPACES=y |