summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorKevin Krakauer <krakauer@google.com>2020-08-27 10:51:59 -0700
committerAndrei Vagin <avagin@gmail.com>2020-09-09 17:53:10 -0700
commit97d6398d435025c7ab361c36994feab2c7e2d84f (patch)
treeebdca9785e7de4d6a9148b824f0b1d6b232c4fb0
parent5588def369a09b4525842b04a43fbf146e662311 (diff)
ip6tables: (de)serialize ip6tables structs
More implementation+testing to follow. #3549. PiperOrigin-RevId: 328770160
-rw-r--r--pkg/abi/linux/netfilter_ipv6.go13
-rw-r--r--pkg/sentry/socket/netfilter/BUILD1
-rw-r--r--pkg/sentry/socket/netfilter/ipv4.go33
-rw-r--r--pkg/sentry/socket/netfilter/ipv6.go265
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go77
-rw-r--r--pkg/sentry/socket/netfilter/targets.go10
-rw-r--r--pkg/sentry/socket/netstack/netstack.go75
-rw-r--r--pkg/tcpip/stack/iptables.go12
-rw-r--r--pkg/tcpip/stack/iptables_types.go5
-rw-r--r--test/syscalls/linux/ip6tables.cc48
10 files changed, 489 insertions, 50 deletions
diff --git a/pkg/abi/linux/netfilter_ipv6.go b/pkg/abi/linux/netfilter_ipv6.go
index 9bb9efb10..f6117024c 100644
--- a/pkg/abi/linux/netfilter_ipv6.go
+++ b/pkg/abi/linux/netfilter_ipv6.go
@@ -290,6 +290,19 @@ type IP6TIP struct {
const SizeOfIP6TIP = 136
+// Flags in IP6TIP.Flags. Corresponding constants are in
+// include/uapi/linux/netfilter_ipv6/ip6_tables.h.
+const (
+ // Whether to check the Protocol field.
+ IP6T_F_PROTO = 0x01
+ // Whether to match the TOS field.
+ IP6T_F_TOS = 0x02
+ // Indicates that the jump target is an aboslute GOTO, not an offset.
+ IP6T_F_GOTO = 0x04
+ // Enables all flags.
+ IP6T_F_MASK = 0x07
+)
+
// Flags in IP6TIP.InverseFlags. Corresponding constants are in
// include/uapi/linux/netfilter_ipv6/ip6_tables.h.
const (
diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD
index 795620589..8aea0200f 100644
--- a/pkg/sentry/socket/netfilter/BUILD
+++ b/pkg/sentry/socket/netfilter/BUILD
@@ -7,6 +7,7 @@ go_library(
srcs = [
"extensions.go",
"ipv4.go",
+ "ipv6.go",
"netfilter.go",
"owner_matcher.go",
"targets.go",
diff --git a/pkg/sentry/socket/netfilter/ipv4.go b/pkg/sentry/socket/netfilter/ipv4.go
index 4fb887e49..e4c55a100 100644
--- a/pkg/sentry/socket/netfilter/ipv4.go
+++ b/pkg/sentry/socket/netfilter/ipv4.go
@@ -36,14 +36,37 @@ var emptyIPv4Filter = stack.IPHeaderFilter{
SrcMask: "\x00\x00\x00\x00",
}
-func getEntries4(table stack.Table, info *linux.IPTGetinfo) linux.KernelIPTGetEntries {
+// convertNetstackToBinary4 converts the iptables as stored in netstack to the
+// format expected by the iptables tool. Linux stores each table as a binary
+// blob that can only be traversed by parsing a little data, reading some
+// offsets, jumping to those offsets, parsing again, etc.
+func convertNetstackToBinary4(stk *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) {
+ // The table name has to fit in the struct.
+ if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
+ return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename)
+ }
+
+ table, ok := stk.IPTables().GetTable(tablename.String(), false)
+ if !ok {
+ return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename)
+ }
+
+ // Setup the info struct.
+ entries, info := getEntries4(table, tablename)
+ return entries, info, nil
+}
+
+func getEntries4(table stack.Table, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo) {
+ var info linux.IPTGetinfo
var entries linux.KernelIPTGetEntries
+ copy(info.Name[:], tablename[:])
copy(entries.Name[:], info.Name[:])
+ info.ValidHooks = table.ValidHooks()
for ruleIdx, rule := range table.Rules {
nflog("convert to binary: current offset: %d", entries.Size)
- setHooksAndUnderflow(info, table, entries.Size, ruleIdx)
+ setHooksAndUnderflow(&info, table, entries.Size, ruleIdx)
// Each rule corresponds to an entry.
entry := linux.KernelIPTEntry{
Entry: linux.IPTEntry{
@@ -100,7 +123,7 @@ func getEntries4(table stack.Table, info *linux.IPTGetinfo) linux.KernelIPTGetEn
info.Size = entries.Size
nflog("convert to binary: finished with an marshalled size of %d", info.Size)
- return entries
+ return entries, info
}
func modifyEntries4(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) {
@@ -205,7 +228,9 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) {
ifnameMask := string(iptip.OutputInterfaceMask[:n])
return stack.IPHeaderFilter{
- Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
+ Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
+ // A Protocol value of 0 indicates all protocols match.
+ CheckProtocol: iptip.Protocol != 0,
Dst: tcpip.Address(iptip.Dst[:]),
DstMask: tcpip.Address(iptip.DstMask[:]),
DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0,
diff --git a/pkg/sentry/socket/netfilter/ipv6.go b/pkg/sentry/socket/netfilter/ipv6.go
new file mode 100644
index 000000000..3b2c1becd
--- /dev/null
+++ b/pkg/sentry/socket/netfilter/ipv6.go
@@ -0,0 +1,265 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netfilter
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// emptyIPv6Filter is for comparison with a rule's filters to determine whether
+// it is also empty. It is immutable.
+var emptyIPv6Filter = stack.IPHeaderFilter{
+ Dst: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ DstMask: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ Src: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ SrcMask: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+}
+
+// convertNetstackToBinary6 converts the ip6tables as stored in netstack to the
+// format expected by the iptables tool. Linux stores each table as a binary
+// blob that can only be traversed by parsing a little data, reading some
+// offsets, jumping to those offsets, parsing again, etc.
+func convertNetstackToBinary6(stk *stack.Stack, tablename linux.TableName) (linux.KernelIP6TGetEntries, linux.IPTGetinfo, error) {
+ // The table name has to fit in the struct.
+ if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
+ return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename)
+ }
+
+ table, ok := stk.IPTables().GetTable(tablename.String(), true)
+ if !ok {
+ return linux.KernelIP6TGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename)
+ }
+
+ // Setup the info struct, which is the same in IPv4 and IPv6.
+ entries, info := getEntries6(table, tablename)
+ return entries, info, nil
+}
+
+func getEntries6(table stack.Table, tablename linux.TableName) (linux.KernelIP6TGetEntries, linux.IPTGetinfo) {
+ var info linux.IPTGetinfo
+ var entries linux.KernelIP6TGetEntries
+ copy(info.Name[:], tablename[:])
+ copy(entries.Name[:], info.Name[:])
+ info.ValidHooks = table.ValidHooks()
+
+ for ruleIdx, rule := range table.Rules {
+ nflog("convert to binary: current offset: %d", entries.Size)
+
+ setHooksAndUnderflow(&info, table, entries.Size, ruleIdx)
+ // Each rule corresponds to an entry.
+ entry := linux.KernelIP6TEntry{
+ Entry: linux.IP6TEntry{
+ IPv6: linux.IP6TIP{
+ Protocol: uint16(rule.Filter.Protocol),
+ },
+ NextOffset: linux.SizeOfIP6TEntry,
+ TargetOffset: linux.SizeOfIP6TEntry,
+ },
+ }
+ copy(entry.Entry.IPv6.Dst[:], rule.Filter.Dst)
+ copy(entry.Entry.IPv6.DstMask[:], rule.Filter.DstMask)
+ copy(entry.Entry.IPv6.Src[:], rule.Filter.Src)
+ copy(entry.Entry.IPv6.SrcMask[:], rule.Filter.SrcMask)
+ copy(entry.Entry.IPv6.OutputInterface[:], rule.Filter.OutputInterface)
+ copy(entry.Entry.IPv6.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask)
+ if rule.Filter.DstInvert {
+ entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_DSTIP
+ }
+ if rule.Filter.SrcInvert {
+ entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_SRCIP
+ }
+ if rule.Filter.OutputInterfaceInvert {
+ entry.Entry.IPv6.InverseFlags |= linux.IP6T_INV_VIA_OUT
+ }
+ if rule.Filter.CheckProtocol {
+ entry.Entry.IPv6.Flags |= linux.IP6T_F_PROTO
+ }
+
+ for _, matcher := range rule.Matchers {
+ // Serialize the matcher and add it to the
+ // entry.
+ serialized := marshalMatcher(matcher)
+ nflog("convert to binary: matcher serialized as: %v", serialized)
+ if len(serialized)%8 != 0 {
+ panic(fmt.Sprintf("matcher %T is not 64-bit aligned", matcher))
+ }
+ entry.Elems = append(entry.Elems, serialized...)
+ entry.Entry.NextOffset += uint16(len(serialized))
+ entry.Entry.TargetOffset += uint16(len(serialized))
+ }
+
+ // Serialize and append the target.
+ serialized := marshalTarget(rule.Target)
+ if len(serialized)%8 != 0 {
+ panic(fmt.Sprintf("target %T is not 64-bit aligned", rule.Target))
+ }
+ entry.Elems = append(entry.Elems, serialized...)
+ entry.Entry.NextOffset += uint16(len(serialized))
+
+ nflog("convert to binary: adding entry: %+v", entry)
+
+ entries.Size += uint32(entry.Entry.NextOffset)
+ entries.Entrytable = append(entries.Entrytable, entry)
+ info.NumEntries++
+ }
+
+ info.Size = entries.Size
+ nflog("convert to binary: finished with an marshalled size of %d", info.Size)
+ return entries, info
+}
+
+func modifyEntries6(stk *stack.Stack, optVal []byte, replace *linux.IPTReplace, table *stack.Table) (map[uint32]int, *syserr.Error) {
+ nflog("set entries: setting entries in table %q", replace.Name.String())
+
+ // Convert input into a list of rules and their offsets.
+ var offset uint32
+ // offsets maps rule byte offsets to their position in table.Rules.
+ offsets := map[uint32]int{}
+ for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ {
+ nflog("set entries: processing entry at offset %d", offset)
+
+ // Get the struct ipt_entry.
+ if len(optVal) < linux.SizeOfIP6TEntry {
+ nflog("optVal has insufficient size for entry %d", len(optVal))
+ return nil, syserr.ErrInvalidArgument
+ }
+ var entry linux.IP6TEntry
+ buf := optVal[:linux.SizeOfIP6TEntry]
+ binary.Unmarshal(buf, usermem.ByteOrder, &entry)
+ initialOptValLen := len(optVal)
+ optVal = optVal[linux.SizeOfIP6TEntry:]
+
+ if entry.TargetOffset < linux.SizeOfIP6TEntry {
+ nflog("entry has too-small target offset %d", entry.TargetOffset)
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ // TODO(gvisor.dev/issue/170): We should support more IPTIP
+ // filtering fields.
+ filter, err := filterFromIP6TIP(entry.IPv6)
+ if err != nil {
+ nflog("bad iptip: %v", err)
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ // TODO(gvisor.dev/issue/170): Matchers and targets can specify
+ // that they only work for certain protocols, hooks, tables.
+ // Get matchers.
+ matchersSize := entry.TargetOffset - linux.SizeOfIP6TEntry
+ if len(optVal) < int(matchersSize) {
+ nflog("entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal))
+ return nil, syserr.ErrInvalidArgument
+ }
+ matchers, err := parseMatchers(filter, optVal[:matchersSize])
+ if err != nil {
+ nflog("failed to parse matchers: %v", err)
+ return nil, syserr.ErrInvalidArgument
+ }
+ optVal = optVal[matchersSize:]
+
+ // Get the target of the rule.
+ targetSize := entry.NextOffset - entry.TargetOffset
+ if len(optVal) < int(targetSize) {
+ nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal))
+ return nil, syserr.ErrInvalidArgument
+ }
+ target, err := parseTarget(filter, optVal[:targetSize])
+ if err != nil {
+ nflog("failed to parse target: %v", err)
+ return nil, syserr.ErrInvalidArgument
+ }
+ optVal = optVal[targetSize:]
+
+ table.Rules = append(table.Rules, stack.Rule{
+ Filter: filter,
+ Target: target,
+ Matchers: matchers,
+ })
+ offsets[offset] = int(entryIdx)
+ offset += uint32(entry.NextOffset)
+
+ if initialOptValLen-len(optVal) != int(entry.NextOffset) {
+ nflog("entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal))
+ return nil, syserr.ErrInvalidArgument
+ }
+ }
+ return offsets, nil
+}
+
+func filterFromIP6TIP(iptip linux.IP6TIP) (stack.IPHeaderFilter, error) {
+ if containsUnsupportedFields6(iptip) {
+ return stack.IPHeaderFilter{}, fmt.Errorf("unsupported fields in struct iptip: %+v", iptip)
+ }
+ if len(iptip.Dst) != header.IPv6AddressSize || len(iptip.DstMask) != header.IPv6AddressSize {
+ return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask))
+ }
+ if len(iptip.Src) != header.IPv6AddressSize || len(iptip.SrcMask) != header.IPv6AddressSize {
+ return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of source (%d) and/or source mask (%d) fields", len(iptip.Src), len(iptip.SrcMask))
+ }
+
+ n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0)
+ if n == -1 {
+ n = len(iptip.OutputInterface)
+ }
+ ifname := string(iptip.OutputInterface[:n])
+
+ n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0)
+ if n == -1 {
+ n = len(iptip.OutputInterfaceMask)
+ }
+ ifnameMask := string(iptip.OutputInterfaceMask[:n])
+
+ return stack.IPHeaderFilter{
+ Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
+ // In ip6tables a flag controls whether to check the protocol.
+ CheckProtocol: iptip.Flags&linux.IP6T_F_PROTO != 0,
+ Dst: tcpip.Address(iptip.Dst[:]),
+ DstMask: tcpip.Address(iptip.DstMask[:]),
+ DstInvert: iptip.InverseFlags&linux.IP6T_INV_DSTIP != 0,
+ Src: tcpip.Address(iptip.Src[:]),
+ SrcMask: tcpip.Address(iptip.SrcMask[:]),
+ SrcInvert: iptip.InverseFlags&linux.IP6T_INV_SRCIP != 0,
+ OutputInterface: ifname,
+ OutputInterfaceMask: ifnameMask,
+ OutputInterfaceInvert: iptip.InverseFlags&linux.IP6T_INV_VIA_OUT != 0,
+ }, nil
+}
+
+func containsUnsupportedFields6(iptip linux.IP6TIP) bool {
+ // The following features are supported:
+ // - Protocol
+ // - Dst and DstMask
+ // - Src and SrcMask
+ // - The inverse destination IP check flag
+ // - OutputInterface, OutputInterfaceMask and its inverse.
+ var emptyInterface = [linux.IFNAMSIZ]byte{}
+ flagMask := uint8(linux.IP6T_F_PROTO)
+ // Disable any supported inverse flags.
+ inverseMask := uint8(linux.IP6T_INV_DSTIP) | uint8(linux.IP6T_INV_SRCIP) | uint8(linux.IP6T_INV_VIA_OUT)
+ return iptip.InputInterface != emptyInterface ||
+ iptip.InputInterfaceMask != emptyInterface ||
+ iptip.Flags&^flagMask != 0 ||
+ iptip.InverseFlags&^inverseMask != 0 ||
+ iptip.TOS != 0
+}
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index df256676f..3e1735079 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -42,14 +42,19 @@ func nflog(format string, args ...interface{}) {
}
// GetInfo returns information about iptables.
-func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPTGetinfo, *syserr.Error) {
+func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, ipv6 bool) (linux.IPTGetinfo, *syserr.Error) {
// Read in the struct and table name.
var info linux.IPTGetinfo
if _, err := info.CopyIn(t, outPtr); err != nil {
return linux.IPTGetinfo{}, syserr.FromError(err)
}
- _, info, err := convertNetstackToBinary(stack, info.Name)
+ var err error
+ if ipv6 {
+ _, info, err = convertNetstackToBinary6(stack, info.Name)
+ } else {
+ _, info, err = convertNetstackToBinary4(stack, info.Name)
+ }
if err != nil {
nflog("couldn't convert iptables: %v", err)
return linux.IPTGetinfo{}, syserr.ErrInvalidArgument
@@ -59,9 +64,9 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT
return info, nil
}
-// GetEntries4 returns netstack's iptables rules encoded for the iptables tool.
+// GetEntries4 returns netstack's iptables rules.
func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
- // Read in the ABI struct.
+ // Read in the struct and table name.
var userEntries linux.IPTGetEntries
if _, err := userEntries.CopyIn(t, outPtr); err != nil {
nflog("couldn't copy in entries %q", userEntries.Name)
@@ -70,7 +75,7 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
// Convert netstack's iptables rules to something that the iptables
// tool can understand.
- entries, _, err := convertNetstackToBinary(stack, userEntries.Name)
+ entries, _, err := convertNetstackToBinary4(stack, userEntries.Name)
if err != nil {
nflog("couldn't read entries: %v", err)
return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
@@ -83,28 +88,29 @@ func GetEntries4(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
return entries, nil
}
-// convertNetstackToBinary converts the iptables as stored in netstack to the
-// format expected by the iptables tool. Linux stores each table as a binary
-// blob that can only be traversed by parsing a bit, reading some offsets,
-// jumping to those offsets, parsing again, etc.
-func convertNetstackToBinary(stk *stack.Stack, tablename linux.TableName) (linux.KernelIPTGetEntries, linux.IPTGetinfo, error) {
- // The table name has to fit in the struct.
- if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
- return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("table name %q too long", tablename)
+// GetEntries6 returns netstack's ip6tables rules.
+func GetEntries6(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIP6TGetEntries, *syserr.Error) {
+ // Read in the struct and table name. IPv4 and IPv6 utilize structs
+ // with the same layout.
+ var userEntries linux.IPTGetEntries
+ if _, err := userEntries.CopyIn(t, outPtr); err != nil {
+ nflog("couldn't copy in entries %q", userEntries.Name)
+ return linux.KernelIP6TGetEntries{}, syserr.FromError(err)
}
- table, ok := stk.IPTables().GetTable(tablename.String())
- if !ok {
- return linux.KernelIPTGetEntries{}, linux.IPTGetinfo{}, fmt.Errorf("couldn't find table %q", tablename)
+ // Convert netstack's iptables rules to something that the iptables
+ // tool can understand.
+ entries, _, err := convertNetstackToBinary6(stack, userEntries.Name)
+ if err != nil {
+ nflog("couldn't read entries: %v", err)
+ return linux.KernelIP6TGetEntries{}, syserr.ErrInvalidArgument
+ }
+ if binary.Size(entries) > uintptr(outLen) {
+ nflog("insufficient GetEntries output size: %d", uintptr(outLen))
+ return linux.KernelIP6TGetEntries{}, syserr.ErrInvalidArgument
}
- // Setup the info struct.
- var info linux.IPTGetinfo
- info.ValidHooks = table.ValidHooks()
- copy(info.Name[:], tablename[:])
-
- entries := getEntries4(table, &info)
- return entries, info, nil
+ return entries, nil
}
// setHooksAndUnderflow checks whether the rule at ruleIdx is a hook entrypoint
@@ -128,7 +134,7 @@ func setHooksAndUnderflow(info *linux.IPTGetinfo, table stack.Table, offset uint
// SetEntries sets iptables rules for a single table. See
// net/ipv4/netfilter/ip_tables.c:translate_table for reference.
-func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
+func SetEntries(stk *stack.Stack, optVal []byte, ipv6 bool) *syserr.Error {
var replace linux.IPTReplace
replaceBuf := optVal[:linux.SizeOfIPTReplace]
optVal = optVal[linux.SizeOfIPTReplace:]
@@ -146,7 +152,13 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
return syserr.ErrInvalidArgument
}
- offsets, err := modifyEntries4(stk, optVal, &replace, &table)
+ var err *syserr.Error
+ var offsets map[uint32]int
+ if ipv6 {
+ offsets, err = modifyEntries6(stk, optVal, &replace, &table)
+ } else {
+ offsets, err = modifyEntries4(stk, optVal, &replace, &table)
+ }
if err != nil {
return err
}
@@ -163,7 +175,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
table.BuiltinChains[hk] = ruleIdx
}
if offset == replace.Underflow[hook] {
- if !validUnderflow(table.Rules[ruleIdx]) {
+ if !validUnderflow(table.Rules[ruleIdx], ipv6) {
nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP", ruleIdx)
return syserr.ErrInvalidArgument
}
@@ -228,7 +240,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
if ruleIdx == stack.HookUnset {
continue
}
- if !isUnconditionalAccept(table.Rules[ruleIdx]) {
+ if !isUnconditionalAccept(table.Rules[ruleIdx], ipv6) {
nflog("hook %d is unsupported.", hook)
return syserr.ErrInvalidArgument
}
@@ -240,7 +252,8 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
// - There are no chains without an unconditional final rule.
// - There are no chains without an unconditional underflow rule.
- return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table))
+ return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table, ipv6))
+
}
// parseMatchers parses 0 or more matchers from optVal. optVal should contain
@@ -286,11 +299,11 @@ func parseMatchers(filter stack.IPHeaderFilter, optVal []byte) ([]stack.Matcher,
return matchers, nil
}
-func validUnderflow(rule stack.Rule) bool {
+func validUnderflow(rule stack.Rule, ipv6 bool) bool {
if len(rule.Matchers) != 0 {
return false
}
- if rule.Filter != emptyIPv4Filter {
+ if (ipv6 && rule.Filter != emptyIPv6Filter) || (!ipv6 && rule.Filter != emptyIPv4Filter) {
return false
}
switch rule.Target.(type) {
@@ -301,8 +314,8 @@ func validUnderflow(rule stack.Rule) bool {
}
}
-func isUnconditionalAccept(rule stack.Rule) bool {
- if !validUnderflow(rule) {
+func isUnconditionalAccept(rule stack.Rule, ipv6 bool) bool {
+ if !validUnderflow(rule, ipv6) {
return false
}
_, ok := rule.Target.(stack.AcceptTarget)
diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go
index 8ebdaff18..87e41abd8 100644
--- a/pkg/sentry/socket/netfilter/targets.go
+++ b/pkg/sentry/socket/netfilter/targets.go
@@ -218,8 +218,8 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro
return nil, fmt.Errorf("netfilter.SetEntries: optVal has insufficient size for redirect target %d", len(optVal))
}
- if filter.Protocol != header.TCPProtocolNumber && filter.Protocol != header.UDPProtocolNumber {
- return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
+ if p := filter.Protocol; p != header.TCPProtocolNumber && p != header.UDPProtocolNumber {
+ return nil, fmt.Errorf("netfilter.SetEntries: bad proto %d", p)
}
var redirectTarget linux.XTRedirectTarget
@@ -232,7 +232,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro
// RangeSize should be 1.
if nfRange.RangeSize != 1 {
- return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
+ return nil, fmt.Errorf("netfilter.SetEntries: bad rangesize %d", nfRange.RangeSize)
}
// TODO(gvisor.dev/issue/170): Check if the flags are valid.
@@ -240,7 +240,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro
// For now, redirect target only supports destination port change.
// Port range and IP range are not supported yet.
if nfRange.RangeIPV4.Flags&linux.NF_NAT_RANGE_PROTO_SPECIFIED == 0 {
- return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
+ return nil, fmt.Errorf("netfilter.SetEntries: invalid range flags %d", nfRange.RangeIPV4.Flags)
}
target.RangeProtoSpecified = true
@@ -249,7 +249,7 @@ func parseTarget(filter stack.IPHeaderFilter, optVal []byte) (stack.Target, erro
// TODO(gvisor.dev/issue/170): Port range is not supported yet.
if nfRange.RangeIPV4.MinPort != nfRange.RangeIPV4.MaxPort {
- return nil, fmt.Errorf("netfilter.SetEntries: invalid argument")
+ return nil, fmt.Errorf("netfilter.SetEntries: minport != maxport (%d, %d)", nfRange.RangeIPV4.MinPort, nfRange.RangeIPV4.MaxPort)
}
// Convert port from big endian to little endian.
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 9e2ebc7d4..2af2d8252 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -997,7 +997,7 @@ func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family in
return getSockOptTCP(t, ep, name, outLen)
case linux.SOL_IPV6:
- return getSockOptIPv6(t, ep, name, outLen)
+ return getSockOptIPv6(t, s, ep, name, outPtr, outLen)
case linux.SOL_IP:
return getSockOptIP(t, s, ep, name, outPtr, outLen, family)
@@ -1455,7 +1455,7 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal
}
// getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6.
-func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal.Marshallable, *syserr.Error) {
+func getSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
switch name {
case linux.IPV6_V6ONLY:
if outLen < sizeOfInt32 {
@@ -1508,10 +1508,50 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marsha
vP := primitive.Int32(boolToInt32(v))
return &vP, nil
- case linux.SO_ORIGINAL_DST:
+ case linux.IP6T_ORIGINAL_DST:
// TODO(gvisor.dev/issue/170): ip6tables.
return nil, syserr.ErrInvalidArgument
+ case linux.IP6T_SO_GET_INFO:
+ if outLen < linux.SizeOfIPTGetinfo {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ // Only valid for raw IPv6 sockets.
+ if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW {
+ return nil, syserr.ErrProtocolNotAvailable
+ }
+
+ stack := inet.StackFromContext(t)
+ if stack == nil {
+ return nil, syserr.ErrNoDevice
+ }
+ info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, true)
+ if err != nil {
+ return nil, err
+ }
+ return &info, nil
+
+ case linux.IP6T_SO_GET_ENTRIES:
+ // IPTGetEntries is reused for IPv6.
+ if outLen < linux.SizeOfIPTGetEntries {
+ return nil, syserr.ErrInvalidArgument
+ }
+ // Only valid for raw IPv6 sockets.
+ if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW {
+ return nil, syserr.ErrProtocolNotAvailable
+ }
+
+ stack := inet.StackFromContext(t)
+ if stack == nil {
+ return nil, syserr.ErrNoDevice
+ }
+ entries, err := netfilter.GetEntries6(t, stack.(*Stack).Stack, outPtr, outLen)
+ if err != nil {
+ return nil, err
+ }
+ return &entries, nil
+
default:
emitUnimplementedEventIPv6(t, name)
}
@@ -1649,7 +1689,7 @@ func getSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
if stack == nil {
return nil, syserr.ErrNoDevice
}
- info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr)
+ info, err := netfilter.GetInfo(t, stack.(*Stack).Stack, outPtr, false)
if err != nil {
return nil, err
}
@@ -1722,7 +1762,7 @@ func SetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, level int
return setSockOptTCP(t, ep, name, optVal)
case linux.SOL_IPV6:
- return setSockOptIPv6(t, ep, name, optVal)
+ return setSockOptIPv6(t, s, ep, name, optVal)
case linux.SOL_IP:
return setSockOptIP(t, s, ep, name, optVal)
@@ -2027,7 +2067,7 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
}
// setSockOptIPv6 implements SetSockOpt when level is SOL_IPV6.
-func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
+func setSockOptIPv6(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
switch name {
case linux.IPV6_V6ONLY:
if len(optVal) < sizeOfInt32 {
@@ -2076,6 +2116,27 @@ func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte)
return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTClassOption, v != 0))
+ case linux.IP6T_SO_SET_REPLACE:
+ if len(optVal) < linux.SizeOfIP6TReplace {
+ return syserr.ErrInvalidArgument
+ }
+
+ // Only valid for raw IPv6 sockets.
+ if family, skType, _ := s.Type(); family != linux.AF_INET6 || skType != linux.SOCK_RAW {
+ return syserr.ErrProtocolNotAvailable
+ }
+
+ stack := inet.StackFromContext(t)
+ if stack == nil {
+ return syserr.ErrNoDevice
+ }
+ // Stack must be a netstack stack.
+ return netfilter.SetEntries(stack.(*Stack).Stack, optVal, true)
+
+ case linux.IP6T_SO_SET_ADD_COUNTERS:
+ // TODO(gvisor.dev/issue/170): Counter support.
+ return nil
+
default:
emitUnimplementedEventIPv6(t, name)
}
@@ -2271,7 +2332,7 @@ func setSockOptIP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name in
return syserr.ErrNoDevice
}
// Stack must be a netstack stack.
- return netfilter.SetEntries(stack.(*Stack).Stack, optVal)
+ return netfilter.SetEntries(stack.(*Stack).Stack, optVal, false)
case linux.IPT_SO_SET_ADD_COUNTERS:
// TODO(gvisor.dev/issue/170): Counter support.
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 41ef4236b..30aa41db2 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -165,7 +165,11 @@ func EmptyNATTable() Table {
}
// GetTable returns a table by name.
-func (it *IPTables) GetTable(name string) (Table, bool) {
+func (it *IPTables) GetTable(name string, ipv6 bool) (Table, bool) {
+ // TODO(gvisor.dev/issue/3549): Enable IPv6.
+ if ipv6 {
+ return Table{}, false
+ }
id, ok := nameToID[name]
if !ok {
return Table{}, false
@@ -176,7 +180,11 @@ func (it *IPTables) GetTable(name string) (Table, bool) {
}
// ReplaceTable replaces or inserts table by name.
-func (it *IPTables) ReplaceTable(name string, table Table) *tcpip.Error {
+func (it *IPTables) ReplaceTable(name string, table Table, ipv6 bool) *tcpip.Error {
+ // TODO(gvisor.dev/issue/3549): Enable IPv6.
+ if ipv6 {
+ return tcpip.ErrInvalidOptionValue
+ }
id, ok := nameToID[name]
if !ok {
return tcpip.ErrInvalidOptionValue
diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go
index 73274ada9..fbbd2f50f 100644
--- a/pkg/tcpip/stack/iptables_types.go
+++ b/pkg/tcpip/stack/iptables_types.go
@@ -155,6 +155,11 @@ type IPHeaderFilter struct {
// Protocol matches the transport protocol.
Protocol tcpip.TransportProtocolNumber
+ // CheckProtocol determines whether the Protocol field should be
+ // checked during matching.
+ // TODO(gvisor.dev/issue/3549): Check this field during matching.
+ CheckProtocol bool
+
// Dst matches the destination IP address.
Dst tcpip.Address
diff --git a/test/syscalls/linux/ip6tables.cc b/test/syscalls/linux/ip6tables.cc
index 685e513f8..78e1fa09d 100644
--- a/test/syscalls/linux/ip6tables.cc
+++ b/test/syscalls/linux/ip6tables.cc
@@ -34,6 +34,54 @@ constexpr size_t kEmptyStandardEntrySize =
constexpr size_t kEmptyErrorEntrySize =
sizeof(struct ip6t_entry) + sizeof(struct xt_error_target);
+TEST(IP6TablesBasic, FailSockoptNonRaw) {
+ // Even if the user has CAP_NET_RAW, they shouldn't be able to use the
+ // ip6tables sockopts with a non-raw socket.
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ int sock;
+ ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds());
+
+ struct ipt_getinfo info = {};
+ snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+ socklen_t info_size = sizeof(info);
+ EXPECT_THAT(getsockopt(sock, SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size),
+ SyscallFailsWithErrno(ENOPROTOOPT));
+
+ EXPECT_THAT(close(sock), SyscallSucceeds());
+}
+
+TEST(IP6TablesBasic, GetInfoErrorPrecedence) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ int sock;
+ ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds());
+
+ // When using the wrong type of socket and a too-short optlen, we should get
+ // EINVAL.
+ struct ipt_getinfo info = {};
+ snprintf(info.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+ socklen_t info_size = sizeof(info) - 1;
+ EXPECT_THAT(getsockopt(sock, SOL_IPV6, IP6T_SO_GET_INFO, &info, &info_size),
+ SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(IP6TablesBasic, GetEntriesErrorPrecedence) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ int sock;
+ ASSERT_THAT(sock = socket(AF_INET6, SOCK_DGRAM, 0), SyscallSucceeds());
+
+ // When using the wrong type of socket and a too-short optlen, we should get
+ // EINVAL.
+ struct ip6t_get_entries entries = {};
+ socklen_t entries_size = sizeof(struct ip6t_get_entries) - 1;
+ snprintf(entries.name, XT_TABLE_MAXNAMELEN, "%s", kNatTablename);
+ EXPECT_THAT(
+ getsockopt(sock, SOL_IPV6, IP6T_SO_GET_ENTRIES, &entries, &entries_size),
+ SyscallFailsWithErrno(EINVAL));
+}
+
// This tests the initial state of a machine with empty ip6tables via
// getsockopt(IP6T_SO_GET_INFO). We don't have a guarantee that the iptables are
// empty when running in native, but we can test that gVisor has the same