summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2017-08-11 23:28:44 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2017-08-23 09:05:19 -0600
commitbbf116c0cde4b2ea27c38dbcc9ab296588c8815f (patch)
treeb9300b345de4b09fd317603f341f476d50de53d9 /src
parentc81d33b0bbdfb7cdcf5ef37a1e1264e6e8c11b3b (diff)
socket: improve reply-to-src algorithm
We store the destination IP of incoming packets as the source IP of outgoing packets. When we send outgoing packets, we then ask the routing table for which interface to use and which source address, given our inputs of the destination address and a suggested source address. This all is good and fine, since it means we'll successfully reply using the correct source address, correlating with the destination address for incoming packets. However, what happens when default routes change? Or when interface IP addresses change? Prior to this commit, after getting the response from the routing table of the source address, destination address, and interface, we would then make sure that the source address actually belonged to the outbound interface. If it didn't, we'd reset our source address to zero and re-ask the routing table, in which case the routing table would then give us the default IP address for sending that packet. This worked mostly fine for most purposes, but there was a problem: what if WireGuard legitimately accepted an inbound packet on a default interface using an IP of another interface? In this case, falling back to asking for the default source IP was not a good strategy, since it'd nearly always mean we'd fail to reply using the right source. So, this commit changes the algorithm slightly. Rather than falling back to using the default IP if the preferred source IP doesn't belong to the outbound interface, we have two checks: we make sure that the source IP address belongs to _some_ interface on the system, no matter which one (so long as it's within the network namespace), and we check whether or not the interface of an incoming packet matches the returned interface for the outbound traffic. If both these conditions are true, then we proceed with using this source IP address. If not, we fall back to the default IP address. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'src')
-rw-r--r--src/compat/compat.h47
-rw-r--r--src/peer.h5
-rw-r--r--src/socket.c14
-rwxr-xr-xsrc/tests/netns.sh40
-rw-r--r--src/tests/qemu/kernel.config1
5 files changed, 102 insertions, 5 deletions
diff --git a/src/compat/compat.h b/src/compat/compat.h
index a373c83..451078f 100644
--- a/src/compat/compat.h
+++ b/src/compat/compat.h
@@ -305,7 +305,52 @@ static inline u64 ktime_get_ns(void)
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
#include <linux/inetdevice.h>
-#define inet_confirm_addr(a,b,c,d,e) inet_confirm_addr(b,c,d,e)
+static inline __be32 our_confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope)
+{
+ int same = 0;
+ __be32 addr = 0;
+ for_ifa(in_dev) {
+ if (!addr && (local == ifa->ifa_local || !local) && ifa->ifa_scope <= scope) {
+ addr = ifa->ifa_local;
+ if (same)
+ break;
+ }
+ if (!same) {
+ same = (!local || inet_ifa_match(local, ifa)) && (!dst || inet_ifa_match(dst, ifa));
+ if (same && addr) {
+ if (local || !dst)
+ break;
+ if (inet_ifa_match(addr, ifa))
+ break;
+ if (ifa->ifa_scope <= scope) {
+ addr = ifa->ifa_local;
+ break;
+ }
+ same = 0;
+ }
+ }
+ } endfor_ifa(in_dev);
+ return same ? addr : 0;
+}
+static inline __be32 our_inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope)
+{
+ __be32 addr = 0;
+ struct net_device *dev;
+ if (in_dev)
+ return our_confirm_addr_indev(in_dev, dst, local, scope);
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev) {
+ addr = our_confirm_addr_indev(in_dev, dst, local, scope);
+ if (addr)
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return addr;
+}
+#define inet_confirm_addr our_inet_confirm_addr
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
diff --git a/src/peer.h b/src/peer.h
index c058f59..c10406b 100644
--- a/src/peer.h
+++ b/src/peer.h
@@ -21,7 +21,10 @@ struct endpoint {
struct sockaddr_in6 addr6;
};
union {
- struct in_addr src4;
+ struct {
+ struct in_addr src4;
+ int src_if4; /* Essentially the same as addr6->scope_id */
+ };
struct in6_addr src6;
};
};
diff --git a/src/socket.c b/src/socket.c
index cdb0f47..1dff57a 100644
--- a/src/socket.c
+++ b/src/socket.c
@@ -44,9 +44,14 @@ static inline int send4(struct wireguard_device *wg, struct sk_buff *skb, struct
if (!rt) {
security_sk_classify_flow(sock, flowi4_to_flowi(&fl));
+ if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, fl.saddr, RT_SCOPE_HOST))) {
+ endpoint->src4.s_addr = endpoint->src_if4 = fl.saddr = 0;
+ if (cache)
+ dst_cache_reset(cache);
+ }
rt = ip_route_output_flow(sock_net(sock), &fl, sock);
- if (unlikely(endpoint->src4.s_addr && ((IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && !inet_confirm_addr(sock_net(sock), rcu_dereference_bh(rt->dst.dev->ip_ptr), 0, fl.saddr, RT_SCOPE_HOST))))) {
- endpoint->src4.s_addr = fl.saddr = 0;
+ if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && rt->dst.dev->ifindex != endpoint->src_if4)))) {
+ endpoint->src4.s_addr = endpoint->src_if4 = fl.saddr = 0;
if (cache)
dst_cache_reset(cache);
if (!IS_ERR(rt))
@@ -204,6 +209,7 @@ int socket_endpoint_from_skb(struct endpoint *endpoint, struct sk_buff *skb)
endpoint->addr4.sin_port = udp_hdr(skb)->source;
endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
endpoint->src4.s_addr = ip_hdr(skb)->daddr;
+ endpoint->src_if4 = skb->skb_iif;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
endpoint->addr6.sin6_family = AF_INET6;
endpoint->addr6.sin6_port = udp_hdr(skb)->source;
@@ -223,12 +229,14 @@ void socket_set_peer_endpoint(struct wireguard_peer *peer, struct endpoint *endp
if (likely(peer->endpoint.addr4.sin_family == AF_INET &&
peer->endpoint.addr4.sin_port == endpoint->addr4.sin_port &&
peer->endpoint.addr4.sin_addr.s_addr == endpoint->addr4.sin_addr.s_addr &&
- peer->endpoint.src4.s_addr == endpoint->src4.s_addr))
+ peer->endpoint.src4.s_addr == endpoint->src4.s_addr &&
+ peer->endpoint.src_if4 == endpoint->src_if4))
goto out;
read_unlock_bh(&peer->endpoint_lock);
write_lock_bh(&peer->endpoint_lock);
peer->endpoint.addr4 = endpoint->addr4;
peer->endpoint.src4 = endpoint->src4;
+ peer->endpoint.src_if4 = endpoint->src_if4;
} else if (endpoint->addr.sa_family == AF_INET6) {
read_lock_bh(&peer->endpoint_lock);
if (likely(peer->endpoint.addr6.sin6_family == AF_INET6 &&
diff --git a/src/tests/netns.sh b/src/tests/netns.sh
index 6a58b37..ea70fb5 100755
--- a/src/tests/netns.sh
+++ b/src/tests/netns.sh
@@ -322,6 +322,46 @@ n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1
n2 ping -W 1 -c 1 192.168.241.1
[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]]
+# What happens if the inbound destination address belongs to a different interface as the default route?
+ip1 link add dummy0 type dummy
+ip1 addr add 10.50.0.1/24 dev dummy0
+ip1 link set dummy0 up
+ip2 route add 10.50.0.0/24 dev veth2
+n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]]
+
+ip1 link del dummy0
+ip1 addr flush dev veth1
+ip2 addr flush dev veth2
+ip1 route flush dev veth1
+ip2 route flush dev veth2
+
+# Now we see what happens if another interface route takes precedence over an ongoing one
+ip1 link add veth3 type veth peer name veth4
+ip1 link set veth4 netns $netns2
+ip1 addr add 10.0.0.1/24 dev veth1
+ip2 addr add 10.0.0.2/24 dev veth2
+ip1 addr add 10.0.0.3/24 dev veth3
+ip1 link set veth1 up
+ip2 link set veth2 up
+ip1 link set veth3 up
+ip2 link set veth4 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+waitiface $netns1 veth3
+waitiface $netns2 veth4
+ip1 route flush dev veth1
+ip1 route flush dev veth3
+ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]]
+
ip1 link del veth1
+ip1 link del veth3
ip1 link del wg0
ip2 link del wg0
diff --git a/src/tests/qemu/kernel.config b/src/tests/qemu/kernel.config
index 5469448..84a9875 100644
--- a/src/tests/qemu/kernel.config
+++ b/src/tests/qemu/kernel.config
@@ -2,6 +2,7 @@ CONFIG_NET=y
CONFIG_NETDEVICES=y
CONFIG_NET_CORE=y
CONFIG_NET_IPIP=y
+CONFIG_DUMMY=y
CONFIG_VETH=y
CONFIG_MULTIUSER=y
CONFIG_NAMESPACES=y