diff options
-rw-r--r-- | pkg/abi/linux/netfilter.go | 44 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/netfilter.go | 77 | ||||
-rw-r--r-- | pkg/tcpip/iptables/iptables.go | 32 | ||||
-rw-r--r-- | pkg/tcpip/iptables/targets.go | 60 | ||||
-rw-r--r-- | pkg/tcpip/iptables/types.go | 4 | ||||
-rw-r--r-- | pkg/tcpip/stack/nic.go | 12 | ||||
-rw-r--r-- | test/iptables/iptables_test.go | 12 | ||||
-rw-r--r-- | test/iptables/nat.go | 52 |
8 files changed, 279 insertions, 14 deletions
diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go index bd2e13ba1..314c318b6 100644 --- a/pkg/abi/linux/netfilter.go +++ b/pkg/abi/linux/netfilter.go @@ -253,6 +253,50 @@ type XTErrorTarget struct { // SizeOfXTErrorTarget is the size of an XTErrorTarget. const SizeOfXTErrorTarget = 64 +// Flag values for NfNATIPV4Range. The values indicate whether to map +// protocol specific part(ports) or IPs. It corresponds to values in +// include/uapi/linux/netfilter/nf_nat.h. +const ( + NF_NAT_RANGE_MAP_IPS = 1 << 0 + NF_NAT_RANGE_PROTO_SPECIFIED = 1 << 1 + NF_NAT_RANGE_PROTO_RANDOM = 1 << 2 + NF_NAT_RANGE_PERSISTENT = 1 << 3 + NF_NAT_RANGE_PROTO_RANDOM_FULLY = 1 << 4 + NF_NAT_RANGE_PROTO_RANDOM_ALL = (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) + NF_NAT_RANGE_MASK = (NF_NAT_RANGE_MAP_IPS | + NF_NAT_RANGE_PROTO_SPECIFIED | NF_NAT_RANGE_PROTO_RANDOM | + NF_NAT_RANGE_PERSISTENT | NF_NAT_RANGE_PROTO_RANDOM_FULLY) +) + +// NfNATIPV4Range. It corresponds to struct nf_nat_ipv4_range +// in include/uapi/linux/netfilter/nf_nat.h. The fields are in +// network byte order. +type NfNATIPV4Range struct { + Flags uint32 + MinIP [4]byte + MaxIP [4]byte + MinPort uint16 + MaxPort uint16 +} + +// NfNATIPV4MultiRangeCompat. It corresponds to struct +// nf_nat_ipv4_multi_range_compat in include/uapi/linux/netfilter/nf_nat.h. +type NfNATIPV4MultiRangeCompat struct { + RangeSize uint32 + RangeIPV4 NfNATIPV4Range +} + +// XTRedirectTarget triggers a redirect when reached. +// Adding 4 bytes of padding to make the struct 8 byte aligned. +type XTRedirectTarget struct { + Target XTEntryTarget + NfRange NfNATIPV4MultiRangeCompat + _ [4]byte +} + +// SizeOfXTRedirectTarget is the size of an XTRedirectTarget. +const SizeOfXTRedirectTarget = 56 + // IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds // to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h. type IPTGetinfo struct { diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index 2ec11f6ac..f68a2260d 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/iptables" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/usermem" @@ -35,6 +36,11 @@ import ( // shouldn't be reached - an error has occurred if we fall through to one. const errorTargetName = "ERROR" +// redirectTargetName is used to mark targets as redirect targets. Redirect +// targets should be reached for only NAT and Mangle tables. These targets will +// change the destination port/destination IP for packets. +const redirectTargetName = "REDIRECT" + // Metadata is used to verify that we are correctly serializing and // deserializing iptables into structs consumable by the iptables tool. We save // a metadata struct when the tables are written, and when they are read out we @@ -240,6 +246,8 @@ func marshalTarget(target iptables.Target) []byte { return marshalErrorTarget(tg.Name) case iptables.ReturnTarget: return marshalStandardTarget(iptables.RuleReturn) + case iptables.RedirectTarget: + return marshalRedirectTarget() case JumpTarget: return marshalJumpTarget(tg) default: @@ -276,6 +284,19 @@ func marshalErrorTarget(errorName string) []byte { return binary.Marshal(ret, usermem.ByteOrder, target) } +func marshalRedirectTarget() []byte { + // This is a redirect target named redirect + target := linux.XTRedirectTarget{ + Target: linux.XTEntryTarget{ + TargetSize: linux.SizeOfXTRedirectTarget, + }, + } + copy(target.Target.Name[:], redirectTargetName) + + ret := make([]byte, 0, linux.SizeOfXTRedirectTarget) + return binary.Marshal(ret, usermem.ByteOrder, target) +} + func marshalJumpTarget(jt JumpTarget) []byte { nflog("convert to binary: marshalling jump target") @@ -345,6 +366,8 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { switch replace.Name.String() { case iptables.TablenameFilter: table = iptables.EmptyFilterTable() + case iptables.TablenameNat: + table = iptables.EmptyNatTable() default: nflog("we don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String()) return syserr.ErrInvalidArgument @@ -404,7 +427,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal)) return syserr.ErrInvalidArgument } - target, err := parseTarget(optVal[:targetSize]) + target, err := parseTarget(filter, optVal[:targetSize]) if err != nil { nflog("failed to parse target: %v", err) return syserr.ErrInvalidArgument @@ -495,10 +518,11 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error { } // TODO(gvisor.dev/issue/170): Support other chains. - // Since we only support modifying the INPUT chain right now, make sure - // all other chains point to ACCEPT rules. + // Since we only support modifying the INPUT chain and redirect for + // PREROUTING chain right now, make sure all other chains point to + // ACCEPT rules. for hook, ruleIdx := range table.BuiltinChains { - if hook != iptables.Input { + if hook != iptables.Input && hook != iptables.Prerouting { if _, ok := table.Rules[ruleIdx].Target.(iptables.AcceptTarget); !ok { nflog("hook %d is unsupported.", hook) return syserr.ErrInvalidArgument @@ -570,7 +594,7 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma // parseTarget parses a target from optVal. optVal should contain only the // target. -func parseTarget(optVal []byte) (iptables.Target, error) { +func parseTarget(filter iptables.IPHeaderFilter, optVal []byte) (iptables.Target, error) { nflog("set entries: parsing target of size %d", len(optVal)) if len(optVal) < linux.SizeOfXTEntryTarget { return nil, fmt.Errorf("optVal has insufficient size for entry target %d", len(optVal)) @@ -620,6 +644,49 @@ func parseTarget(optVal []byte) (iptables.Target, error) { nflog("set entries: user-defined target %q", name) return iptables.UserChainTarget{Name: name}, nil } + + case redirectTargetName: + // Redirect target. + if len(optVal) < linux.SizeOfXTRedirectTarget { + return nil, fmt.Errorf("netfilter.SetEntries: optVal has insufficient size for redirect target %d", len(optVal)) + } + + if filter.Protocol != header.TCPProtocolNumber && filter.Protocol != header.UDPProtocolNumber { + return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + } + + var redirectTarget linux.XTRedirectTarget + buf = optVal[:linux.SizeOfXTRedirectTarget] + binary.Unmarshal(buf, usermem.ByteOrder, &redirectTarget) + + // Copy linux.XTRedirectTarget to iptables.RedirectTarget. + var target iptables.RedirectTarget + nfRange := redirectTarget.NfRange + + // RangeSize should be 1. + if nfRange.RangeSize != 1 { + return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + } + + // TODO(gvisor.dev/issue/170): Check if the flags are valid. + // Also check if we need to map ports or IP. + // For now, redirect target only supports dest port change. + if nfRange.RangeIPV4.Flags&linux.NF_NAT_RANGE_PROTO_SPECIFIED == 0 { + return nil, fmt.Errorf("netfilter.SetEntries: invalid argument.") + } + target.Flags = nfRange.RangeIPV4.Flags + + target.MinIP = tcpip.Address(nfRange.RangeIPV4.MinIP[:]) + target.MaxIP = tcpip.Address(nfRange.RangeIPV4.MaxIP[:]) + + // Convert port from big endian to little endian. + port := make([]byte, 2) + binary.BigEndian.PutUint16(port, nfRange.RangeIPV4.MinPort) + target.MinPort = binary.LittleEndian.Uint16(port) + + binary.BigEndian.PutUint16(port, nfRange.RangeIPV4.MaxPort) + target.MaxPort = binary.LittleEndian.Uint16(port) + return target, nil } // Unknown target. diff --git a/pkg/tcpip/iptables/iptables.go b/pkg/tcpip/iptables/iptables.go index dbaccbb36..80ddbd442 100644 --- a/pkg/tcpip/iptables/iptables.go +++ b/pkg/tcpip/iptables/iptables.go @@ -135,6 +135,27 @@ func EmptyFilterTable() Table { } } +// EmptyNatTable returns a Table with no rules and the filter table chains +// mapped to HookUnset. +func EmptyNatTable() Table { + return Table{ + Rules: []Rule{}, + BuiltinChains: map[Hook]int{ + Prerouting: HookUnset, + Input: HookUnset, + Output: HookUnset, + Postrouting: HookUnset, + }, + Underflows: map[Hook]int{ + Prerouting: HookUnset, + Input: HookUnset, + Output: HookUnset, + Postrouting: HookUnset, + }, + UserChains: map[string]int{}, + } +} + // A chainVerdict is what a table decides should be done with a packet. type chainVerdict int @@ -150,6 +171,7 @@ const ( chainReturn ) + // Check runs pkt through the rules for hook. It returns true when the packet // should continue traversing the network stack and false when it should be // dropped. @@ -220,7 +242,7 @@ func (it *IPTables) checkChain(hook Hook, pkt tcpip.PacketBuffer, table Table, r return chainDrop case chainReturn: ruleIdx++ - continue + continue default: panic(fmt.Sprintf("Unknown verdict: %d", verdict)) } @@ -240,6 +262,12 @@ func (it *IPTables) checkChain(hook Hook, pkt tcpip.PacketBuffer, table Table, r func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ruleIdx int) (RuleVerdict, int) { rule := table.Rules[ruleIdx] + // If pkt.NetworkHeader hasn't been set yet, it will be contained in + // pkt.Data.First(). + if pkt.NetworkHeader == nil { + pkt.NetworkHeader = pkt.Data.First() + } + // First check whether the packet matches the IP header filter. // TODO(gvisor.dev/issue/170): Support other fields of the filter. if rule.Filter.Protocol != 0 && rule.Filter.Protocol != header.IPv4(pkt.NetworkHeader).TransportProtocol() { @@ -261,5 +289,5 @@ func (it *IPTables) checkRule(hook Hook, pkt tcpip.PacketBuffer, table Table, ru } // All the matchers matched, so run the target. - return rule.Target.Action(pkt) + return rule.Target.Action(pkt, rule.Filter) } diff --git a/pkg/tcpip/iptables/targets.go b/pkg/tcpip/iptables/targets.go index 81a2e39a2..5dbb28145 100644 --- a/pkg/tcpip/iptables/targets.go +++ b/pkg/tcpip/iptables/targets.go @@ -17,13 +17,14 @@ package iptables import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" ) // AcceptTarget accepts packets. type AcceptTarget struct{} // Action implements Target.Action. -func (AcceptTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, int) { +func (AcceptTarget) Action(packet tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, int) { return RuleAccept, 0 } @@ -31,7 +32,7 @@ func (AcceptTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, int) { type DropTarget struct{} // Action implements Target.Action. -func (DropTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, int) { +func (DropTarget) Action(packet tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, int) { return RuleDrop, 0 } @@ -40,7 +41,7 @@ func (DropTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, int) { type ErrorTarget struct{} // Action implements Target.Action. -func (ErrorTarget) Action(packet tcpip.PacketBuffer) (RuleVerdict, int) { +func (ErrorTarget) Action(packet tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, int) { log.Debugf("ErrorTarget triggered.") return RuleDrop, 0 } @@ -51,7 +52,7 @@ type UserChainTarget struct { } // Action implements Target.Action. -func (UserChainTarget) Action(tcpip.PacketBuffer) (RuleVerdict, int) { +func (UserChainTarget) Action(tcpip.PacketBuffer, IPHeaderFilter) (RuleVerdict, int) { panic("UserChainTarget should never be called.") } @@ -60,6 +61,55 @@ func (UserChainTarget) Action(tcpip.PacketBuffer) (RuleVerdict, int) { type ReturnTarget struct{} // Action implements Target.Action. -func (ReturnTarget) Action(tcpip.PacketBuffer) (RuleVerdict, int) { +func (ReturnTarget) Action(tcpip.PacketBuffer, IPHeaderFilter) (RuleVerdict, int) { return RuleReturn, 0 } + +// RedirectTarget redirects the packet by modifying the destination port/IP. +// Min and Max values for IP and Ports in the struct indicate the range of +// values which can be used to redirect. +type RedirectTarget struct { + // Flags to check if the redirect is for address or ports. + Flags uint32 + + // Min address used to redirect. + MinIP tcpip.Address + + // Max address used to redirect. + MaxIP tcpip.Address + + // Min port used to redirect. + MinPort uint16 + + // Max port used to redirect. + MaxPort uint16 +} + +// Action implements Target.Action. +func (rt RedirectTarget) Action(pkt tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, int) { + headerView := pkt.Data.First() + + // Network header should be set. + netHeader := header.IPv4(headerView) + if netHeader == nil { + return RuleDrop, 0 + } + + // TODO(gvisor.dev/issue/170): Check Flags in RedirectTarget if + // we need to change dest address (for OUTPUT chain) or ports. + hlen := int(netHeader.HeaderLength()) + + switch protocol := filter.Protocol; protocol { + case header.UDPProtocolNumber: + udp := header.UDP(headerView[hlen:]) + udp.SetDestinationPort(rt.MinPort) + case header.TCPProtocolNumber: + // TODO(gvisor.dev/issue/170): Need to recompute checksum + // and implement nat connection tracking to support TCP. + tcp := header.TCP(headerView[hlen:]) + tcp.SetDestinationPort(rt.MinPort) + default: + return RuleDrop, 0 + } + return RuleAccept, 0 +} diff --git a/pkg/tcpip/iptables/types.go b/pkg/tcpip/iptables/types.go index 7d032fd23..8bd3a2c94 100644 --- a/pkg/tcpip/iptables/types.go +++ b/pkg/tcpip/iptables/types.go @@ -163,6 +163,6 @@ type Matcher interface { type Target interface { // Action takes an action on the packet and returns a verdict on how // traversal should (or should not) continue. If the return value is - // Jump, it also returns the index of the rule to jump to. - Action(packet tcpip.PacketBuffer) (RuleVerdict, int) + // Jump, it also returns the name of the chain to jump to. + Action(packet tcpip.PacketBuffer, filter IPHeaderFilter) (RuleVerdict, int) } diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 46d3a6646..271e55be0 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/iptables" ) var ipv4BroadcastAddr = tcpip.ProtocolAddress{ @@ -1116,6 +1117,7 @@ func (n *NIC) isInGroup(addr tcpip.Address) bool { func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt tcpip.PacketBuffer) { r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */) r.RemoteLinkAddress = remotelinkAddr + ref.ep.HandlePacket(&r, pkt) ref.decRef() } @@ -1186,6 +1188,16 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link n.stack.stats.IP.InvalidSourceAddressesReceived.Increment() return } + + // TODO(gvisor.dev/issue/170): Not supporting iptables for IPv6 yet. + if protocol == header.IPv4ProtocolNumber { + ipt := n.stack.IPTables() + if ok := ipt.Check(iptables.Prerouting, pkt); !ok { + // iptables is telling us to drop the packet. + return + } + } + if ref := n.getRef(protocol, dst); ref != nil { handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, pkt) return diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go index 0621861eb..29ad5932d 100644 --- a/test/iptables/iptables_test.go +++ b/test/iptables/iptables_test.go @@ -196,12 +196,24 @@ func TestNATRedirectUDPPort(t *testing.T) { } } +func TestNATRedirectTCPPort(t *testing.T) { + if err := singleTest(NATRedirectTCPPort{}); err != nil { + t.Fatal(err) + } +} + func TestNATDropUDP(t *testing.T) { if err := singleTest(NATDropUDP{}); err != nil { t.Fatal(err) } } +func TestNATAcceptAll(t *testing.T) { + if err := singleTest(NATAcceptAll{}); err != nil { + t.Fatal(err) + } +} + func TestFilterInputDropTCPDestPort(t *testing.T) { if err := singleTest(FilterInputDropTCPDestPort{}); err != nil { t.Fatal(err) diff --git a/test/iptables/nat.go b/test/iptables/nat.go index a01117ec8..899d1c9d3 100644 --- a/test/iptables/nat.go +++ b/test/iptables/nat.go @@ -25,7 +25,9 @@ const ( func init() { RegisterTestCase(NATRedirectUDPPort{}) + RegisterTestCase(NATRedirectTCPPort{}) RegisterTestCase(NATDropUDP{}) + RegisterTestCase(NATAcceptAll{}) } // NATRedirectUDPPort tests that packets are redirected to different port. @@ -45,6 +47,7 @@ func (NATRedirectUDPPort) ContainerAction(ip net.IP) error { if err := listenUDP(redirectPort, sendloopDuration); err != nil { return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", redirectPort, err) } + return nil } @@ -53,6 +56,29 @@ func (NATRedirectUDPPort) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } +// NATRedirectTCPPort tests that connections are redirected on specified ports. +type NATRedirectTCPPort struct{} + +// Name implements TestCase.Name. +func (NATRedirectTCPPort) Name() string { + return "NATRedirectTCPPort" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATRedirectTCPPort) ContainerAction(ip net.IP) error { + if err := natTable("-A", "PREROUTING", "-p", "tcp", "-m", "tcp", "--dport", fmt.Sprintf("%d", dropPort), "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", redirectPort)); err != nil { + return err + } + + // Listen for TCP packets on redirect port. + return listenTCP(redirectPort, sendloopDuration) +} + +// LocalAction implements TestCase.LocalAction. +func (NATRedirectTCPPort) LocalAction(ip net.IP) error { + return connectTCP(ip, dropPort, acceptPort, sendloopDuration) +} + // NATDropUDP tests that packets are not received in ports other than redirect port. type NATDropUDP struct{} @@ -78,3 +104,29 @@ func (NATDropUDP) ContainerAction(ip net.IP) error { func (NATDropUDP) LocalAction(ip net.IP) error { return sendUDPLoop(ip, acceptPort, sendloopDuration) } + +// NATAcceptAll tests that all UDP packets are accepted. +type NATAcceptAll struct{} + +// Name implements TestCase.Name. +func (NATAcceptAll) Name() string { + return "NATAcceptAll" +} + +// ContainerAction implements TestCase.ContainerAction. +func (NATAcceptAll) ContainerAction(ip net.IP) error { + if err := natTable("-A", "PREROUTING", "-p", "udp", "-j", "ACCEPT"); err != nil { + return err + } + + if err := listenUDP(acceptPort, sendloopDuration); err != nil { + return fmt.Errorf("packets on port %d should be allowed, but encountered an error: %v", acceptPort, err) + } + + return nil +} + +// LocalAction implements TestCase.LocalAction. +func (NATAcceptAll) LocalAction(ip net.IP) error { + return sendUDPLoop(ip, acceptPort, sendloopDuration) +} |