summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorKevin Krakauer <krakauer@google.com>2020-06-11 20:33:56 -0700
committerKevin Krakauer <krakauer@google.com>2020-07-31 10:47:26 -0700
commit2a7b2a61e3ea32129c26eeaa6fab3d81a5d8ad6e (patch)
tree38abd6504df18c8b0caa881bd8fdeef45f6d8e23
parent68a7da9549ac4f6290fe85d2dab550e11f16b209 (diff)
iptables: support SO_ORIGINAL_DST
Envoy (#170) uses this to get the original destination of redirected packets.
-rw-r--r--pkg/abi/linux/netfilter.go8
-rw-r--r--pkg/abi/linux/socket.go4
-rw-r--r--pkg/sentry/socket/netstack/netstack.go17
-rw-r--r--pkg/sentry/strace/socket.go1
-rw-r--r--pkg/tcpip/stack/conntrack.go26
-rw-r--r--pkg/tcpip/stack/iptables.go11
-rw-r--r--pkg/tcpip/tcpip.go4
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go11
-rw-r--r--test/iptables/BUILD1
-rw-r--r--test/iptables/iptables_test.go8
-rw-r--r--test/iptables/iptables_unsafe.go63
-rw-r--r--test/iptables/iptables_util.go51
-rw-r--r--test/iptables/nat.go152
13 files changed, 344 insertions, 13 deletions
diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go
index a91f9f018..9c27f7bb2 100644
--- a/pkg/abi/linux/netfilter.go
+++ b/pkg/abi/linux/netfilter.go
@@ -59,7 +59,7 @@ var VerdictStrings = map[int32]string{
NF_RETURN: "RETURN",
}
-// Socket options. These correspond to values in
+// Socket options for SOL_SOCKET. These correspond to values in
// include/uapi/linux/netfilter_ipv4/ip_tables.h.
const (
IPT_BASE_CTL = 64
@@ -74,6 +74,12 @@ const (
IPT_SO_GET_MAX = IPT_SO_GET_REVISION_TARGET
)
+// Socket option for SOL_IP. This corresponds to the value in
+// include/uapi/linux/netfilter_ipv4.h.
+const (
+ SO_ORIGINAL_DST = 80
+)
+
// Name lengths. These correspond to values in
// include/uapi/linux/netfilter/x_tables.h.
const (
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go
index c24a8216e..d6946bb82 100644
--- a/pkg/abi/linux/socket.go
+++ b/pkg/abi/linux/socket.go
@@ -239,11 +239,13 @@ const SockAddrMax = 128
type InetAddr [4]byte
// SockAddrInet is struct sockaddr_in, from uapi/linux/in.h.
+//
+// +marshal
type SockAddrInet struct {
Family uint16
Port uint16
Addr InetAddr
- Zero [8]uint8 // pad to sizeof(struct sockaddr).
+ _ [8]uint8 // pad to sizeof(struct sockaddr).
}
// InetMulticastRequest is struct ip_mreq, from uapi/linux/in.h.
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index f86e6cd7a..31a168f7e 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -1490,6 +1490,10 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marsha
vP := primitive.Int32(boolToInt32(v))
return &vP, nil
+ case linux.SO_ORIGINAL_DST:
+ // TODO(gvisor.dev/issue/170): ip6tables.
+ return nil, syserr.ErrInvalidArgument
+
default:
emitUnimplementedEventIPv6(t, name)
}
@@ -1600,6 +1604,19 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
vP := primitive.Int32(boolToInt32(v))
return &vP, nil
+ case linux.SO_ORIGINAL_DST:
+ if outLen < int(binary.Size(linux.SockAddrInet{})) {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ var v tcpip.OriginalDestinationOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ a, _ := ConvertAddress(linux.AF_INET, tcpip.FullAddress(v))
+ return a.(*linux.SockAddrInet), nil
+
default:
emitUnimplementedEventIP(t, name)
}
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index c0512de89..b51c4c941 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -521,6 +521,7 @@ var sockOptNames = map[uint64]abi.ValueSet{
linux.IP_ROUTER_ALERT: "IP_ROUTER_ALERT",
linux.IP_PKTOPTIONS: "IP_PKTOPTIONS",
linux.IP_MTU: "IP_MTU",
+ linux.SO_ORIGINAL_DST: "SO_ORIGINAL_DST",
},
linux.SOL_SOCKET: {
linux.SO_ERROR: "SO_ERROR",
diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go
index 559a1c4dd..470c265aa 100644
--- a/pkg/tcpip/stack/conntrack.go
+++ b/pkg/tcpip/stack/conntrack.go
@@ -240,7 +240,10 @@ func (ct *ConnTrack) connFor(pkt *PacketBuffer) (*conn, direction) {
if err != nil {
return nil, dirOriginal
}
+ return ct.connForTID(tid)
+}
+func (ct *ConnTrack) connForTID(tid tupleID) (*conn, direction) {
bucket := ct.bucket(tid)
now := time.Now()
@@ -604,3 +607,26 @@ func (ct *ConnTrack) reapTupleLocked(tuple *tuple, bucket int, now time.Time) bo
return true
}
+
+func (ct *ConnTrack) originalDst(epID TransportEndpointID) (tcpip.Address, uint16, *tcpip.Error) {
+ // Lookup the connection. The reply's original destination
+ // describes the original address.
+ tid := tupleID{
+ srcAddr: epID.LocalAddress,
+ srcPort: epID.LocalPort,
+ dstAddr: epID.RemoteAddress,
+ dstPort: epID.RemotePort,
+ transProto: header.TCPProtocolNumber,
+ netProto: header.IPv4ProtocolNumber,
+ }
+ conn, _ := ct.connForTID(tid)
+ if conn == nil {
+ // Not a tracked connection.
+ return "", 0, tcpip.ErrNotConnected
+ } else if conn.manip == manipNone {
+ // Unmanipulated connection.
+ return "", 0, tcpip.ErrInvalidOptionValue
+ }
+
+ return conn.original.dstAddr, conn.original.dstPort, nil
+}
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index cbbae4224..110ba073d 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -218,19 +218,16 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr
// Many users never configure iptables. Spare them the cost of rule
// traversal if rules have never been set.
it.mu.RLock()
+ defer it.mu.RUnlock()
if !it.modified {
- it.mu.RUnlock()
return true
}
- it.mu.RUnlock()
// Packets are manipulated only if connection and matching
// NAT rule exists.
shouldTrack := it.connections.handlePacket(pkt, hook, gso, r)
// Go through each table containing the hook.
- it.mu.RLock()
- defer it.mu.RUnlock()
priorities := it.priorities[hook]
for _, tableID := range priorities {
// If handlePacket already NATed the packet, we don't need to
@@ -418,3 +415,9 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx
// All the matchers matched, so run the target.
return rule.Target.Action(pkt, &it.connections, hook, gso, r, address)
}
+
+// OriginalDst returns the original destination of redirected connections. It
+// returns an error if the connection doesn't exist or isn't redirected.
+func (it *IPTables) OriginalDst(epID TransportEndpointID) (tcpip.Address, uint16, *tcpip.Error) {
+ return it.connections.originalDst(epID)
+}
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index a634b9b60..45f59b60f 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -954,6 +954,10 @@ type DefaultTTLOption uint8
// classic BPF filter on a given endpoint.
type SocketDetachFilterOption int
+// OriginalDestinationOption is used to get the original destination address
+// and port of a redirected packet.
+type OriginalDestinationOption FullAddress
+
// IPPacketInfo is the message structure for IP_PKTINFO.
//
// +stateify savable
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 0f7487963..682687ebe 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -2017,6 +2017,17 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
*o = tcpip.TCPDeferAcceptOption(e.deferAccept)
e.UnlockUser()
+ case *tcpip.OriginalDestinationOption:
+ ipt := e.stack.IPTables()
+ addr, port, err := ipt.OriginalDst(e.ID)
+ if err != nil {
+ return err
+ }
+ *o = tcpip.OriginalDestinationOption{
+ Addr: addr,
+ Port: port,
+ }
+
default:
return tcpip.ErrUnknownProtocolOption
}
diff --git a/test/iptables/BUILD b/test/iptables/BUILD
index 40b63ebbe..66453772a 100644
--- a/test/iptables/BUILD
+++ b/test/iptables/BUILD
@@ -9,6 +9,7 @@ go_library(
"filter_input.go",
"filter_output.go",
"iptables.go",
+ "iptables_unsafe.go",
"iptables_util.go",
"nat.go",
],
diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go
index 550b6198a..fda5f694f 100644
--- a/test/iptables/iptables_test.go
+++ b/test/iptables/iptables_test.go
@@ -371,3 +371,11 @@ func TestFilterAddrs(t *testing.T) {
}
}
}
+
+func TestNATPreOriginalDst(t *testing.T) {
+ singleTest(t, NATPreOriginalDst{})
+}
+
+func TestNATOutOriginalDst(t *testing.T) {
+ singleTest(t, NATOutOriginalDst{})
+}
diff --git a/test/iptables/iptables_unsafe.go b/test/iptables/iptables_unsafe.go
new file mode 100644
index 000000000..bd85a8fea
--- /dev/null
+++ b/test/iptables/iptables_unsafe.go
@@ -0,0 +1,63 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package iptables
+
+import (
+ "fmt"
+ "syscall"
+ "unsafe"
+)
+
+type originalDstError struct {
+ errno syscall.Errno
+}
+
+func (e originalDstError) Error() string {
+ return fmt.Sprintf("errno (%d) when calling getsockopt(SO_ORIGINAL_DST): %v", int(e.errno), e.errno.Error())
+}
+
+// SO_ORIGINAL_DST gets the original destination of a redirected packet via
+// getsockopt.
+const SO_ORIGINAL_DST = 80
+
+func originalDestination4(connfd int) (syscall.RawSockaddrInet4, error) {
+ var addr syscall.RawSockaddrInet4
+ var addrLen uint32 = syscall.SizeofSockaddrInet4
+ if errno := originalDestination(connfd, syscall.SOL_IP, unsafe.Pointer(&addr), &addrLen); errno != 0 {
+ return syscall.RawSockaddrInet4{}, originalDstError{errno}
+ }
+ return addr, nil
+}
+
+func originalDestination6(connfd int) (syscall.RawSockaddrInet6, error) {
+ var addr syscall.RawSockaddrInet6
+ var addrLen uint32 = syscall.SizeofSockaddrInet6
+ if errno := originalDestination(connfd, syscall.SOL_IPV6, unsafe.Pointer(&addr), &addrLen); errno != 0 {
+ return syscall.RawSockaddrInet6{}, originalDstError{errno}
+ }
+ return addr, nil
+}
+
+func originalDestination(connfd int, level uintptr, optval unsafe.Pointer, optlen *uint32) syscall.Errno {
+ _, _, errno := syscall.Syscall6(
+ syscall.SYS_GETSOCKOPT,
+ uintptr(connfd),
+ level,
+ SO_ORIGINAL_DST,
+ uintptr(optval),
+ uintptr(unsafe.Pointer(optlen)),
+ 0)
+ return errno
+}
diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go
index ca80a4b5f..5125fe47b 100644
--- a/test/iptables/iptables_util.go
+++ b/test/iptables/iptables_util.go
@@ -15,6 +15,8 @@
package iptables
import (
+ "encoding/binary"
+ "errors"
"fmt"
"net"
"os/exec"
@@ -218,17 +220,58 @@ func filterAddrs(addrs []string, ipv6 bool) []string {
// getInterfaceName returns the name of the interface other than loopback.
func getInterfaceName() (string, bool) {
- var ifname string
+ iface, ok := getNonLoopbackInterface()
+ if !ok {
+ return "", false
+ }
+ return iface.Name, true
+}
+
+func getInterfaceAddrs(ipv6 bool) ([]net.IP, error) {
+ iface, ok := getNonLoopbackInterface()
+ if !ok {
+ return nil, errors.New("no non-loopback interface found")
+ }
+ addrs, err := iface.Addrs()
+ if err != nil {
+ return nil, err
+ }
+
+ // Get only IPv4 or IPv6 addresses.
+ ips := make([]net.IP, 0, len(addrs))
+ for _, addr := range addrs {
+ parts := strings.Split(addr.String(), "/")
+ var ip net.IP
+ // To16() returns IPv4 addresses as IPv4-mapped IPv6 addresses.
+ // So we check whether To4() returns nil to test whether the
+ // address is v4 or v6.
+ if v4 := net.ParseIP(parts[0]).To4(); ipv6 && v4 == nil {
+ ip = net.ParseIP(parts[0]).To16()
+ } else {
+ ip = v4
+ }
+ if ip != nil {
+ ips = append(ips, ip)
+ }
+ }
+ return ips, nil
+}
+
+func getNonLoopbackInterface() (net.Interface, bool) {
if interfaces, err := net.Interfaces(); err == nil {
for _, intf := range interfaces {
if intf.Name != "lo" {
- ifname = intf.Name
- break
+ return intf, true
}
}
}
+ return net.Interface{}, false
+}
- return ifname, ifname != ""
+func htons(x uint16) uint16 {
+ buf := make([]byte, 2)
+ binary.BigEndian.PutUint16(buf, x)
+ return binary.LittleEndian.Uint16(buf)
}
func localIP(ipv6 bool) string {
diff --git a/test/iptables/nat.go b/test/iptables/nat.go
index ac0d91bb2..b7fea2527 100644
--- a/test/iptables/nat.go
+++ b/test/iptables/nat.go
@@ -18,12 +18,11 @@ import (
"errors"
"fmt"
"net"
+ "syscall"
"time"
)
-const (
- redirectPort = 42
-)
+const redirectPort = 42
func init() {
RegisterTestCase(NATPreRedirectUDPPort{})
@@ -42,6 +41,8 @@ func init() {
RegisterTestCase(NATOutRedirectInvert{})
RegisterTestCase(NATRedirectRequiresProtocol{})
RegisterTestCase(NATLoopbackSkipsPrerouting{})
+ RegisterTestCase(NATPreOriginalDst{})
+ RegisterTestCase(NATOutOriginalDst{})
}
// NATPreRedirectUDPPort tests that packets are redirected to different port.
@@ -471,6 +472,151 @@ func (NATLoopbackSkipsPrerouting) LocalAction(ip net.IP, ipv6 bool) error {
return nil
}
+// NATPreOriginalDst tests that SO_ORIGINAL_DST returns the pre-NAT destination
+// of PREROUTING NATted packets.
+type NATPreOriginalDst struct{}
+
+// Name implements TestCase.Name.
+func (NATPreOriginalDst) Name() string {
+ return "NATPreOriginalDst"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATPreOriginalDst) ContainerAction(ip net.IP, ipv6 bool) error {
+ // Redirect incoming TCP connections to acceptPort.
+ if err := natTable(ipv6, "-A", "PREROUTING",
+ "-p", "tcp",
+ "--destination-port", fmt.Sprintf("%d", dropPort),
+ "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)); err != nil {
+ return err
+ }
+
+ addrs, err := getInterfaceAddrs(ipv6)
+ if err != nil {
+ return err
+ }
+ return listenForRedirectedConn(ipv6, addrs)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATPreOriginalDst) LocalAction(ip net.IP, ipv6 bool) error {
+ return connectTCP(ip, dropPort, sendloopDuration)
+}
+
+// NATOutOriginalDst tests that SO_ORIGINAL_DST returns the pre-NAT destination
+// of OUTBOUND NATted packets.
+type NATOutOriginalDst struct{}
+
+// Name implements TestCase.Name.
+func (NATOutOriginalDst) Name() string {
+ return "NATOutOriginalDst"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATOutOriginalDst) ContainerAction(ip net.IP, ipv6 bool) error {
+ // Redirect incoming TCP connections to acceptPort.
+ if err := natTable(ipv6, "-A", "OUTPUT", "-p", "tcp", "-j", "REDIRECT", "--to-port", fmt.Sprintf("%d", acceptPort)); err != nil {
+ return err
+ }
+
+ connCh := make(chan error)
+ go func() {
+ connCh <- connectTCP(ip, dropPort, sendloopDuration)
+ }()
+
+ if err := listenForRedirectedConn(ipv6, []net.IP{ip}); err != nil {
+ return err
+ }
+ return <-connCh
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATOutOriginalDst) LocalAction(ip net.IP, ipv6 bool) error {
+ // No-op.
+ return nil
+}
+
+func listenForRedirectedConn(ipv6 bool, originalDsts []net.IP) error {
+ // The net package doesn't give guarantee access to the connection's
+ // underlying FD, and thus we cannot call getsockopt. We have to use
+ // traditional syscalls for SO_ORIGINAL_DST.
+
+ // Create the listening socket, bind, listen, and accept.
+ family := syscall.AF_INET
+ if ipv6 {
+ family = syscall.AF_INET6
+ }
+ sockfd, err := syscall.Socket(family, syscall.SOCK_STREAM, 0)
+ if err != nil {
+ return err
+ }
+ defer syscall.Close(sockfd)
+
+ var bindAddr syscall.Sockaddr
+ if ipv6 {
+ bindAddr = &syscall.SockaddrInet6{
+ Port: acceptPort,
+ Addr: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // in6addr_any
+ }
+ } else {
+ bindAddr = &syscall.SockaddrInet4{
+ Port: acceptPort,
+ Addr: [4]byte{0, 0, 0, 0}, // INADDR_ANY
+ }
+ }
+ if err := syscall.Bind(sockfd, bindAddr); err != nil {
+ return err
+ }
+
+ if err := syscall.Listen(sockfd, 1); err != nil {
+ return err
+ }
+
+ connfd, _, err := syscall.Accept(sockfd)
+ if err != nil {
+ return err
+ }
+ defer syscall.Close(connfd)
+
+ // Verify that, despite listening on acceptPort, SO_ORIGINAL_DST
+ // indicates the packet was sent to originalDst:dropPort.
+ if ipv6 {
+ got, err := originalDestination6(connfd)
+ if err != nil {
+ return err
+ }
+ // The original destination could be any of our IPs.
+ for _, dst := range originalDsts {
+ want := syscall.RawSockaddrInet6{
+ Family: syscall.AF_INET6,
+ Port: htons(dropPort),
+ }
+ copy(want.Addr[:], dst.To16())
+ if got == want {
+ return nil
+ }
+ }
+ return fmt.Errorf("SO_ORIGINAL_DST returned %+v, but wanted one of %+v (note: port numbers are in network byte order)", got, originalDsts)
+ } else {
+ got, err := originalDestination4(connfd)
+ if err != nil {
+ return err
+ }
+ // The original destination could be any of our IPs.
+ for _, dst := range originalDsts {
+ want := syscall.RawSockaddrInet4{
+ Family: syscall.AF_INET,
+ Port: htons(dropPort),
+ }
+ copy(want.Addr[:], dst.To4())
+ if got == want {
+ return nil
+ }
+ }
+ return fmt.Errorf("SO_ORIGINAL_DST returned %+v, but wanted one of %+v (note: port numbers are in network byte order)", got, originalDsts)
+ }
+}
+
// loopbackTests runs an iptables rule and ensures that packets sent to
// dest:dropPort are received by localhost:acceptPort.
func loopbackTest(ipv6 bool, dest net.IP, args ...string) error {