diff options
author | Kevin Krakauer <krakauer@google.com> | 2020-08-27 10:51:59 -0700 |
---|---|---|
committer | Andrei Vagin <avagin@gmail.com> | 2020-09-09 17:53:10 -0700 |
commit | 97d6398d435025c7ab361c36994feab2c7e2d84f (patch) | |
tree | ebdca9785e7de4d6a9148b824f0b1d6b232c4fb0 | |
parent | 5588def369a09b4525842b04a43fbf146e662311 (diff) |
ip6tables: (de)serialize ip6tables structs
More implementation+testing to follow.
#3549.
PiperOrigin-RevId: 328770160
-rw-r--r-- | pkg/abi/linux/netfilter_ipv6.go | 13 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/ipv4.go | 33 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/ipv6.go | 265 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/netfilter.go | 77 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/targets.go | 10 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/netstack.go | 75 | ||||
-rw-r--r-- | pkg/tcpip/stack/iptables.go | 12 | ||||
-rw-r--r-- | pkg/tcpip/stack/iptables_types.go | 5 | ||||
-rw-r--r-- | test/syscalls/linux/ip6tables.cc | 48 |
10 files changed, 489 insertions, 50 deletions
diff --git a/pkg/abi/linux/netfilter_ipv6.go b/pkg/abi/linux/netfilter_ipv6.go index 9bb9efb10..f6117024c 100644 --- a/pkg/abi/linux/netfilter_ipv6.go +++ b/pkg/abi/linux/netfilter_ipv6.go @@ -290,6 +290,19 @@ type IP6TIP struct { const SizeOfIP6TIP = 136 +// Flags in IP6TIP.Flags. Corresponding constants are in +// include/uapi/linux/netfilter_ipv6/ip6_tables.h. +const ( + // Whether to check the Protocol field. + IP6T_F_PROTO = 0x01 + // Whether to match the TOS field. + IP6T_F_TOS = 0x02 + // Indicates that the jump target is an aboslute GOTO, not an offset. + IP6T_F_GOTO = 0x04 + // Enables all flags. + IP6T_F_MASK = 0x07 +) + // Flags in IP6TIP.InverseFlags. Corresponding constants are in // include/uapi/linux/netfilter_ipv6/ip6_tables.h. const ( diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index 795620589..8aea0200f 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -7,6 +7,7 @@ go_library( srcs = [ "extensions.go", "ipv4.go", + "ipv6.go", "netfilter.go", "owner_matcher.go", "targets.go", diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go index 4fb887e49..e4c55a100 100644 --- a/pkg/sentry/socket/netfilter/ipv4.go +++ b/pkg/sentry/socket/netfilter/ipv4.go @@ -36,14 +36,37 @@ var emptyIPv4Filter = stack.IPHeaderFilter{ SrcMask: "\x00\x00\x00\x00", } -func getEntries4(table stack.Table, info *linux.IPTGetinfo) linux.KernelIPTGetEntries { +// convertNetstackToBinary4 converts the iptables as stored in netstack to the +// format expected by the iptables tool. Linux stores each table as a binary +// blob that can only be traversed by parsing a little data, reading some +// offsets, jumping to those offsets, parsing again, etc. +func convertNetstackToBinary4(stk *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) { + // The table name has to fit in the struct. + if linux.XT_TABLE_MAXNAMELEN < len(tablename) { + return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename) + } + + table, ok := stk.IPTables().GetTable(tablename.String(), false) + if !ok { + return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename) + } + + // Setup the info struct. + entries, info := getEntries4(table, tablename) + return entries, info, nil +} + +func getEntries4(table stack.Table, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo) { + var info linux.IPTGetinfo var entries linux.KernelIPTGetEntries + copy(info.Name[:], tablename[:]) copy(entries.Name[:], info.Name[:]) + info.ValidHooks = table.ValidHooks() for ruleIdx, rule := range table.Rules { nflog("convert to binary: current offset: %d", entries.Size) - setHooksAndUnderflow(info, table, entries.Size, ruleIdx) + setHooksAndUnderflow(&info, table, entries.Size, ruleIdx) // Each rule corresponds to an entry. entry := linux.KernelIPTEntry{ Entry: linux.IPTEntry{ @@ -100,7 +123,7 @@ func getEntries4(table stack.Table, info *linux.IPTGetinfo) linux.KernelIPTGetEn info.Size = entries.Size nflog("convert to binary: finished with an marshalled size of %d", info.Size) - return entries + return entries, info } func modifyEntries4(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) { @@ -205,7 +228,9 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) { ifnameMask := string(iptip.OutputInterfaceMask[:n]) return stack.IPHeaderFilter{ - Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), + Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), + // A Protocol value of 0 indicates all protocols match. + CheckProtocol: iptip.Protocol != 0, Dst: tcpip.Address(iptip.Dst[:]), DstMask: tcpip.Address(iptip.DstMask[:]), DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0, diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go new file mode 100644 index 000000000..3b2c1becd --- /dev/null +++ b/pkg/sentry/socket/netfilter/ipv6.go @@ -0,0 +1,265 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package netfilter + +import ( + "bytes" + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/usermem" +) + +// emptyIPv6Filter is for comparison with a rule's filters to determine whether +// it is also empty. It is immutable. +var emptyIPv6Filter = stack.IPHeaderFilter{ + Dst: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + DstMask: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + Src: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + SrcMask: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", +} + +// convertNetstackToBinary6 converts the ip6tables as stored in netstack to the +// format expected by the iptables tool. Linux stores each table as a binary +// blob that can only be traversed by parsing a little data, reading some +// offsets, jumping to those offsets, parsing again, etc. +func convertNetstackToBinary6(stk *stack.Stack, tablename linux.TableName) (linux.KernelIP6TGetEntries, linux.IPTGetinfo, error) { + // The table name has to fit in the struct. + if linux.XT_TABLE_MAXNAMELEN < len(tablename) { + return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename) + } + + table, ok := stk.IPTables().GetTable(tablename.String(), true) + if !ok { + return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename) + } + + // Setup the info struct, which is the same in IPv4 and IPv6. + entries, info := getEntries6(table, tablename) + return entries, info, nil +} + +func getEntries6(table stack.Table, tablename linux.TableName) (linux.KernelIP6TGetEntries, linux.IPTGetinfo) { + var info linux.IPTGetinfo + var entries linux.KernelIP6TGetEntries + copy(info.Name[:], tablename[:]) + copy(entries.Name[:], info.Name[:]) + info.ValidHooks = table.ValidHooks() + + for ruleIdx, rule := range table.Rules { + nflog("convert to binary: current offset: %d", entries.Size) + + setHooksAndUnderflow(&info, table, entries.Size, ruleIdx) + // Each rule corresponds to an entry. + entry := linux.KernelIP6TEntry{ + Entry: linux.IP6TEntry{ + IPv6: linux.IP6TIP{ + Protocol: uint16(rule.Filter.Protocol), + }, + NextOffset: linux.SizeOfIP6TEntry, + TargetOffset: linux.SizeOfIP6TEntry, + }, + } + copy(entry.Entry.IPv6.Dst[:], rule.Filter.Dst) + copy(entry.Entry.IPv6.DstMask[:], rule.Filter.DstMask) + copy(entry.Entry.IPv6.Src[:], rule.Filter.Src) + copy(entry.Entry.IPv6.SrcMask[:], rule.Filter.SrcMask) + copy(entry.Entry.IPv6.OutputInterface[:], rule.Filter.OutputInterface) + copy(entry.Entry.IPv6.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask) + if rule.Filter.DstInvert { + entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_DSTIP + } + if rule.Filter.SrcInvert { + entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_SRCIP + } + if rule.Filter.OutputInterfaceInvert { + entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_VIA_OUT + } + if rule.Filter.CheckProtocol { + entry.Entry.IPv6.Flags |= linux.IP6T_F_PROTO + } + + for _, matcher := range rule.Matchers { + // Serialize the matcher and add it to the + // entry. + serialized := marshalMatcher(matcher) + nflog("convert to binary: matcher serialized as: %v", serialized) + if len(serialized)%8 != 0 { + panic(fmt.Sprintf("matcher %T is not 64-bit aligned", matcher)) + } + entry.Elems = append(entry.Elems, serialized...) + entry.Entry.NextOffset += uint16(len(serialized)) + entry.Entry.TargetOffset += uint16(len(serialized)) + } + + // Serialize and append the target. + serialized := marshalTarget(rule.Target) + if len(serialized)%8 != 0 { + panic(fmt.Sprintf("target %T is not 64-bit aligned", rule.Target)) + } + entry.Elems = append(entry.Elems, serialized...) + entry.Entry.NextOffset += uint16(len(serialized)) + + nflog("convert to binary: adding entry: %+v", entry) + + entries.Size += uint32(entry.Entry.NextOffset) + entries.Entrytable = append(entries.Entrytable, entry) + info.NumEntries++ + } + + info.Size = entries.Size + nflog("convert to binary: finished with an marshalled size of %d", info.Size) + return entries, info +} + +func modifyEntries6(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) { + nflog("set entries: setting entries in table %q", replace.Name.String()) + + // Convert input into a list of rules and their offsets. + var offset uint32 + // offsets maps rule byte offsets to their position in table.Rules. + offsets := map[uint32]int{} + for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ { + nflog("set entries: processing entry at offset %d", offset) + + // Get the struct ipt_entry. + if len(optVal) < linux.SizeOfIP6TEntry { + nflog("optVal has insufficient size for entry %d", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + var entry linux.IP6TEntry + buf := optVal[:linux.SizeOfIP6TEntry] + binary.Unmarshal(buf, usermem.ByteOrder, &entry) + initialOptValLen := len(optVal) + optVal = optVal[linux.SizeOfIP6TEntry:] + + if entry.TargetOffset < linux.SizeOfIP6TEntry { + nflog("entry has too-small target offset %d", entry.TargetOffset) + return nil, syserr.ErrInvalidArgument + } + + // TODO(gvisor.dev/issue/170): We should support more IPTIP + // filtering fields. + filter, err := filterFromIP6TIP(entry.IPv6) + if err != nil { + nflog("bad iptip: %v", err) + return nil, syserr.ErrInvalidArgument + } + + // TODO(gvisor.dev/issue/170): Matchers and targets can specify + // that they only work for certain protocols, hooks, tables. + // Get matchers. + matchersSize := entry.TargetOffset - linux.SizeOfIP6TEntry + if len(optVal) < int(matchersSize) { + nflog("entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + matchers, err := parseMatchers(filter, optVal[:matchersSize]) + if err != nil { + nflog("failed to parse matchers: %v", err) + return nil, syserr.ErrInvalidArgument + } + optVal = optVal[matchersSize:] + + // Get the target of the rule. + targetSize := entry.NextOffset - entry.TargetOffset + if len(optVal) < int(targetSize) { + nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal)) + return nil, syserr.ErrInvalidArgument + } + target, err := parseTarget(filter, optVal[:targetSize]) + if err != nil { + nflog("failed to parse target: %v", err) + return nil, syserr.ErrInvalidArgument + } + optVal = optVal[targetSize:] + + table.Rules = append(table.Rules, stack.Rule{ + Filter: filter, + Target: target, + Matchers: matchers, + }) + offsets[offset] = int(entryIdx) + offset += uint32(entry.NextOffset) + + if initialOptValLen-len(optVal) != int(entry.NextOffset) { + nflog("entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal)) + return nil, syserr.ErrInvalidArgument + } + } + return offsets, nil +} + +func filterFromIP6TIP(iptip linux.IP6TIP) (stack.IPHeaderFilter, error) { + if containsUnsupportedFields6(iptip) { + return stack.IPHeaderFilter{}, fmt.Errorf("unsupported fields in struct iptip: %+v", iptip) + } + if len(iptip.Dst) != header.IPv6AddressSize || len(iptip.DstMask) != header.IPv6AddressSize { + return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask)) + } + if len(iptip.Src) != header.IPv6AddressSize || len(iptip.SrcMask) != header.IPv6AddressSize { + return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask)) + } + + n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0) + if n == -1 { + n = len(iptip.OutputInterface) + } + ifname := string(iptip.OutputInterface[:n]) + + n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0) + if n == -1 { + n = len(iptip.OutputInterfaceMask) + } + ifnameMask := string(iptip.OutputInterfaceMask[:n]) + + return stack.IPHeaderFilter{ + Protocol: tcpip.TransportProtocolNumber(iptip.Protocol), + // In ip6tables a flag controls whether to check the protocol. + CheckProtocol: iptip.Flags&linux.IP6T_F_PROTO != 0, + Dst: tcpip.Address(iptip.Dst[:]), + DstMask: tcpip.Address(iptip.DstMask[:]), + DstInvert: iptip.InverseFlags&linux.IP6T_INV_DSTIP != 0, + Src: tcpip.Address(iptip.Src[:]), + SrcMask: tcpip.Address(iptip.SrcMask[:]), + SrcInvert: iptip.InverseFlags&linux.IP6T_INV_SRCIP != 0, + OutputInterface: ifname, + OutputInterfaceMask: ifnameMask, + OutputInterfaceInvert: iptip.InverseFlags&linux.IP6T_INV_VIA_OUT != 0, + }, nil +} + +func containsUnsupportedFields6(iptip linux.IP6TIP) bool { + // The following features are supported: + // - Protocol + // - Dst and DstMask + // - Src and SrcMask + // - The inverse destination IP check flag + // - OutputInterface, OutputInterfaceMask and its inverse. + var emptyInterface = [linux.IFNAMSIZ]byte{} + flagMask := uint8(linux.IP6T_F_PROTO) + // Disable any supported inverse flags. + inverseMask := uint8(linux.IP6T_INV_DSTIP) | uint8(linux.IP6T_INV_SRCIP) | uint8(linux.IP6T_INV_VIA_OUT) + return iptip.InputInterface != emptyInterface || + iptip.InputInterfaceMask != emptyInterface || + iptip.Flags&^flagMask != 0 || + iptip.InverseFlags&^inverseMask != 0 || + iptip.TOS != 0 +} diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index df256676f..3e1735079 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -42,14 +42,19 @@ func nflog(format string, args ...interface{}) { } // GetInfo returns information about iptables. -func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPTGetinfo, *syserr.Error) { +func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, ipv6 bool) (linux.IPTGetinfo, *syserr.Error) { // Read in the struct and table name. var info linux.IPTGetinfo if _, err := info.CopyIn(t, outPtr); err != nil { return linux.IPTGetinfo{}, syserr.FromError(err) } - _, info, err := convertNetstackToBinary(stack, info.Name) + var err error + if ipv6 { + _, info, err = convertNetstackToBinary6(stack, info.Name) + } else { + _, info, err = convertNetstackToBinary4(stack, info.Name) + } if err != nil { nflog("couldn't convert iptables: %v", err) return linux.IPTGetinfo{}, syserr.ErrInvalidArgument @@ -59,9 +64,9 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT return info, nil } -// GetEntries4 returns netstack's iptables rules encoded for the iptables tool. +// GetEntries4 returns netstack's iptables rules. func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) { - // Read in the ABI struct. + // Read in the struct and table name. var userEntries linux.IPTGetEntries if _, err := userEntries.CopyIn(t, outPtr); err != nil { nflog("couldn't copy in entries %q", userEntries.Name) @@ -70,7 +75,7 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen // Convert netstack's iptables rules to something that the iptables // tool can understand. - entries, _, err := convertNetstackToBinary(stack, userEntries.Name) + entries, _, err := convertNetstackToBinary4(stack, userEntries.Name) if err != nil { nflog("couldn't read entries: %v", err) return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument @@ -83,28 +88,29 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen return entries, nil } -// convertNetstackToBinary converts the iptables as stored in netstack to the -// format expected by the iptables tool. Linux stores each table as a binary -// blob that can only be traversed by parsing a bit, reading some offsets, -// jumping to those offsets, parsing again, etc. -func convertNetstackToBinary(stk *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) { - // The table name has to fit in the struct. - if linux.XT_TABLE_MAXNAMELEN < len(tablename) { - return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename) +// GetEntries6 returns netstack's ip6tables rules. +func GetEntries6(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIP6TGetEntries, *syserr.Error) { + // Read in the struct and table name. IPv4 and IPv6 utilize structs + // with the same layout. + var userEntries linux.IPTGetEntries + if _, err := userEntries.CopyIn(t, outPtr); err != nil { + nflog("couldn't copy in entries %q", userEntries.Name) + return linux.KernelIP6TGetEntries{}, syserr.FromError(err) } - table, ok := stk.IPTables().GetTable(tablename.String()) - if !ok { - return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename) + // Convert netstack's iptables rules to something that the iptables + // tool can understand. + entries, _, err := convertNetstackToBinary6(stack, userEntries.Name) + if err != nil { + nflog("couldn't read entries: %v", err) + return linux.KernelIP6TGetEntries{}, syserr.ErrInvalidArgument + } + if binary.Size(entries) > uintptr(outLen) { + nflog("insufficient GetEntries output size: %d", uintptr(outLen)) + return linux.KernelIP6TGetEntries{}, syserr.ErrInvalidArgument } - // Setup the info struct. - var info linux.IPTGetinfo - info.ValidHooks = table.ValidHooks() - copy(info.Name[:], tablename[:]) - - entries := getEntries4(table, &info) - return entries, info, nil + return entries, nil } // setHooksAndUnderflow checks whether the rule at ruleIdx is a hook entrypoint @@ -128,7 +134,7 @@ func setHooksAndUnderflow(info *linux.IPTGetinfo, table stack.Table, offset uint // SetEntries sets iptables rules for a single table. See // net/ipv4/netfilter/ip_tables.c:translate_table for reference. -func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { +func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error { var replace linux.IPTReplace replaceBuf := optVal[:linux.SizeOfIPTReplace] optVal = optVal[linux.SizeOfIPTReplace:] @@ -146,7 +152,13 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { return syserr.ErrInvalidArgument } - offsets, err := modifyEntries4(stk, optVal, &replace, &table) + var err *syserr.Error + var offsets map[uint32]int + if ipv6 { + offsets, err = modifyEntries6(stk, optVal, &replace, &table) + } else { + offsets, err = modifyEntries4(stk, optVal, &replace, &table) + } if err != nil { return err } @@ -163,7 +175,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { table.BuiltinChains[hk] = ruleIdx } if offset == replace.Underflow[hook] { - if !validUnderflow(table.Rules[ruleIdx]) { + if !validUnderflow(table.Rules[ruleIdx], ipv6) { nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP", ruleIdx) return syserr.ErrInvalidArgument } @@ -228,7 +240,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { if ruleIdx == stack.HookUnset { continue } - if !isUnconditionalAccept(table.Rules[ruleIdx]) { + if !isUnconditionalAccept(table.Rules[ruleIdx], ipv6) { nflog("hook %d is unsupported.", hook) return syserr.ErrInvalidArgument } @@ -240,7 +252,8 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error { // - There are no chains without an unconditional final rule. // - There are no chains without an unconditional underflow rule. - return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table)) + return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table, ipv6)) + } // parseMatchers parses 0 or more matchers from optVal. optVal should contain @@ -286,11 +299,11 @@ func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher, return matchers, nil } -func validUnderflow(rule stack.Rule) bool { +func validUnderflow(rule stack.Rule, ipv6 bool) bool { if len(rule.Matchers) != 0 { return false } - if rule.Filter != emptyIPv4Filter { + if (ipv6 && rule.Filter != emptyIPv6Filter) || (!ipv6 && rule.Filter != emptyIPv4Filter) { return false } switch rule.Target.(type) { @@ -301,8 +314,8 @@ func validUnderflow(rule stack.Rule) bool { } } -func isUnconditionalAccept(rule stack.Rule) bool { - if !validUnderflow(rule) { +func isUnconditionalAccept(rule stack.Rule, ipv6 bool) bool { + if !validUnderflow(rule, ipv6) { return false } _, ok := rule.Target.(stack.AcceptTarget) diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go index 8ebdaff18..87e41abd8 100644 --- a/pkg/sentry/socket/netfilter/targets.go +++ b/pkg/sentry/socket/netfilter/targets.go @@ -218,8 +218,8 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro return nil, fmt.Errorf("netfilter.SetEntries: optVal has insufficient size for redirect target %d", len(optVal)) } - if filter.Protocol != header.TCPProtocolNumber && filter.Protocol != header.UDPProtocolNumber { - return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + if p := filter.Protocol; p != header.TCPProtocolNumber && p != header.UDPProtocolNumber { + return nil, fmt.Errorf("netfilter.SetEntries: bad proto %d", p) } var redirectTarget linux.XTRedirectTarget @@ -232,7 +232,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro // RangeSize should be 1. if nfRange.RangeSize != 1 { - return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + return nil, fmt.Errorf("netfilter.SetEntries: bad rangesize %d", nfRange.RangeSize) } // TODO(gvisor.dev/issue/170): Check if the flags are valid. @@ -240,7 +240,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro // For now, redirect target only supports destination port change. // Port range and IP range are not supported yet. if nfRange.RangeIPV4.Flags&linux.NF_NAT_RANGE_PROTO_SPECIFIED == 0 { - return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + return nil, fmt.Errorf("netfilter.SetEntries: invalid range flags %d", nfRange.RangeIPV4.Flags) } target.RangeProtoSpecified = true @@ -249,7 +249,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro // TODO(gvisor.dev/issue/170): Port range is not supported yet. if nfRange.RangeIPV4.MinPort != nfRange.RangeIPV4.MaxPort { - return nil, fmt.Errorf("netfilter.SetEntries: invalid argument") + return nil, fmt.Errorf("netfilter.SetEntries: minport != maxport (%d, %d)", nfRange.RangeIPV4.MinPort, nfRange.RangeIPV4.MaxPort) } // Convert port from big endian to little endian. diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 9e2ebc7d4..2af2d8252 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -997,7 +997,7 @@ func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family in return getSockOptTCP(t, ep, name, outLen) case linux.SOL_IPV6: - return getSockOptIPv6(t, ep, name, outLen) + return getSockOptIPv6(t, s, ep, name, outPtr, outLen) case linux.SOL_IP: return getSockOptIP(t, s, ep, name, outPtr, outLen, family) @@ -1455,7 +1455,7 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal } // getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6. -func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal.Marshallable, *syserr.Error) { +func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { switch name { case linux.IPV6_V6ONLY: if outLen < sizeOfInt32 { @@ -1508,10 +1508,50 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marsha vP := primitive.Int32(boolToInt32(v)) return &vP, nil - case linux.SO_ORIGINAL_DST: + case linux.IP6T_ORIGINAL_DST: // TODO(gvisor.dev/issue/170): ip6tables. return nil, syserr.ErrInvalidArgument + case linux.IP6T_SO_GET_INFO: + if outLen < linux.SizeOfIPTGetinfo { + return nil, syserr.ErrInvalidArgument + } + + // Only valid for raw IPv6 sockets. + if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW { + return nil, syserr.ErrProtocolNotAvailable + } + + stack := inet.StackFromContext(t) + if stack == nil { + return nil, syserr.ErrNoDevice + } + info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, true) + if err != nil { + return nil, err + } + return &info, nil + + case linux.IP6T_SO_GET_ENTRIES: + // IPTGetEntries is reused for IPv6. + if outLen < linux.SizeOfIPTGetEntries { + return nil, syserr.ErrInvalidArgument + } + // Only valid for raw IPv6 sockets. + if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW { + return nil, syserr.ErrProtocolNotAvailable + } + + stack := inet.StackFromContext(t) + if stack == nil { + return nil, syserr.ErrNoDevice + } + entries, err := netfilter.GetEntries6(t, stack.(*Stack).Stack, outPtr, outLen) + if err != nil { + return nil, err + } + return &entries, nil + default: emitUnimplementedEventIPv6(t, name) } @@ -1649,7 +1689,7 @@ func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in if stack == nil { return nil, syserr.ErrNoDevice } - info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr) + info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, false) if err != nil { return nil, err } @@ -1722,7 +1762,7 @@ func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int return setSockOptTCP(t, ep, name, optVal) case linux.SOL_IPV6: - return setSockOptIPv6(t, ep, name, optVal) + return setSockOptIPv6(t, s, ep, name, optVal) case linux.SOL_IP: return setSockOptIP(t, s, ep, name, optVal) @@ -2027,7 +2067,7 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * } // setSockOptIPv6 implements SetSockOpt when level is SOL_IPV6. -func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error { +func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error { switch name { case linux.IPV6_V6ONLY: if len(optVal) < sizeOfInt32 { @@ -2076,6 +2116,27 @@ func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTClassOption, v != 0)) + case linux.IP6T_SO_SET_REPLACE: + if len(optVal) < linux.SizeOfIP6TReplace { + return syserr.ErrInvalidArgument + } + + // Only valid for raw IPv6 sockets. + if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW { + return syserr.ErrProtocolNotAvailable + } + + stack := inet.StackFromContext(t) + if stack == nil { + return syserr.ErrNoDevice + } + // Stack must be a netstack stack. + return netfilter.SetEntries(stack.(*Stack).Stack, optVal, true) + + case linux.IP6T_SO_SET_ADD_COUNTERS: + // TODO(gvisor.dev/issue/170): Counter support. + return nil + default: emitUnimplementedEventIPv6(t, name) } @@ -2271,7 +2332,7 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in return syserr.ErrNoDevice } // Stack must be a netstack stack. - return netfilter.SetEntries(stack.(*Stack).Stack, optVal) + return netfilter.SetEntries(stack.(*Stack).Stack, optVal, false) case linux.IPT_SO_SET_ADD_COUNTERS: // TODO(gvisor.dev/issue/170): Counter support. diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go index 41ef4236b..30aa41db2 100644 --- a/pkg/tcpip/stack/iptables.go +++ b/pkg/tcpip/stack/iptables.go @@ -165,7 +165,11 @@ func EmptyNATTable() Table { } // GetTable returns a table by name. -func (it *IPTables) GetTable(name string) (Table, bool) { +func (it *IPTables) GetTable(name string, ipv6 bool) (Table, bool) { + // TODO(gvisor.dev/issue/3549): Enable IPv6. + if ipv6 { + return Table{}, false + } id, ok := nameToID[name] if !ok { return Table{}, false @@ -176,7 +180,11 @@ func (it *IPTables) GetTable(name string) (Table, bool) { } // ReplaceTable replaces or inserts table by name. -func (it *IPTables) ReplaceTable(name string, table Table) *tcpip.Error { +func (it *IPTables) ReplaceTable(name string, table Table, ipv6 bool) *tcpip.Error { + // TODO(gvisor.dev/issue/3549): Enable IPv6. + if ipv6 { + return tcpip.ErrInvalidOptionValue + } id, ok := nameToID[name] if !ok { return tcpip.ErrInvalidOptionValue diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go index 73274ada9..fbbd2f50f 100644 --- a/pkg/tcpip/stack/iptables_types.go +++ b/pkg/tcpip/stack/iptables_types.go @@ -155,6 +155,11 @@ type IPHeaderFilter struct { // Protocol matches the transport protocol. Protocol tcpip.TransportProtocolNumber + // CheckProtocol determines whether the Protocol field should be + // checked during matching. + // TODO(gvisor.dev/issue/3549): Check this field during matching. + CheckProtocol bool + // Dst matches the destination IP address. Dst tcpip.Address diff --git a/test/syscalls/linux/ip6tables.cc b/test/syscalls/linux/ip6tables.cc index 685e513f8..78e1fa09d 100644 --- a/test/syscalls/linux/ip6tables.cc +++ b/test/syscalls/linux/ip6tables.cc @@ -34,6 +34,54 @@ constexpr size_t kEmptyStandardEntrySize = constexpr size_t kEmptyErrorEntrySize = sizeof(struct ip6t_entry) + sizeof(struct xt_error_target); +TEST(IP6TablesBasic, FailSockoptNonRaw) { + // Even if the user has CAP_NET_RAW, they shouldn't be able to use the + // ip6tables sockopts with a non-raw socket. + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds()); + + struct ipt_getinfo info = {}; + snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + socklen_t info_size = sizeof(info); + EXPECT_THAT(getsockopt(sock, SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size), + SyscallFailsWithErrno(ENOPROTOOPT)); + + EXPECT_THAT(close(sock), SyscallSucceeds()); +} + +TEST(IP6TablesBasic, GetInfoErrorPrecedence) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds()); + + // When using the wrong type of socket and a too-short optlen, we should get + // EINVAL. + struct ipt_getinfo info = {}; + snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + socklen_t info_size = sizeof(info) - 1; + EXPECT_THAT(getsockopt(sock, SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(IP6TablesBasic, GetEntriesErrorPrecedence) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); + + int sock; + ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds()); + + // When using the wrong type of socket and a too-short optlen, we should get + // EINVAL. + struct ip6t_get_entries entries = {}; + socklen_t entries_size = sizeof(struct ip6t_get_entries) - 1; + snprintf(entries.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename); + EXPECT_THAT( + getsockopt(sock, SOL_IPV6, IP6T_SO_GET_ENTRIES, &entries, &entries_size), + SyscallFailsWithErrno(EINVAL)); +} + // This tests the initial state of a machine with empty ip6tables via // getsockopt(IP6T_SO_GET_INFO). We don't have a guarantee that the iptables are // empty when running in native, but we can test that gVisor has the same |