summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/socket
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/socket')
-rw-r--r--pkg/sentry/socket/control/BUILD1
-rw-r--r--pkg/sentry/socket/control/control.go71
-rw-r--r--pkg/sentry/socket/hostinet/socket.go11
-rw-r--r--pkg/sentry/socket/netfilter/BUILD5
-rw-r--r--pkg/sentry/socket/netfilter/extensions.go95
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go340
-rw-r--r--pkg/sentry/socket/netfilter/targets.go35
-rw-r--r--pkg/sentry/socket/netfilter/tcp_matcher.go143
-rw-r--r--pkg/sentry/socket/netfilter/udp_matcher.go142
-rw-r--r--pkg/sentry/socket/netlink/message.go15
-rw-r--r--pkg/sentry/socket/netstack/netstack.go64
-rw-r--r--pkg/sentry/socket/netstack/provider.go2
12 files changed, 715 insertions, 209 deletions
diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD
index 79e16d6e8..4d42d29cb 100644
--- a/pkg/sentry/socket/control/BUILD
+++ b/pkg/sentry/socket/control/BUILD
@@ -19,6 +19,7 @@ go_library(
"//pkg/sentry/socket",
"//pkg/sentry/socket/unix/transport",
"//pkg/syserror",
+ "//pkg/tcpip",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go
index 00265f15b..8834a1e1a 100644
--- a/pkg/sentry/socket/control/control.go
+++ b/pkg/sentry/socket/control/control.go
@@ -26,6 +26,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -189,7 +190,7 @@ func putUint32(buf []byte, n uint32) []byte {
// putCmsg writes a control message header and as much data as will fit into
// the unused capacity of a buffer.
func putCmsg(buf []byte, flags int, msgType uint32, align uint, data []int32) ([]byte, int) {
- space := AlignDown(cap(buf)-len(buf), 4)
+ space := binary.AlignDown(cap(buf)-len(buf), 4)
// We can't write to space that doesn't exist, so if we are going to align
// the available space, we must align down.
@@ -282,19 +283,9 @@ func PackCredentials(t *kernel.Task, creds SCMCredentials, buf []byte, flags int
return putCmsg(buf, flags, linux.SCM_CREDENTIALS, align, c)
}
-// AlignUp rounds a length up to an alignment. align must be a power of 2.
-func AlignUp(length int, align uint) int {
- return (length + int(align) - 1) & ^(int(align) - 1)
-}
-
-// AlignDown rounds a down to an alignment. align must be a power of 2.
-func AlignDown(length int, align uint) int {
- return length & ^(int(align) - 1)
-}
-
// alignSlice extends a slice's length (up to the capacity) to align it.
func alignSlice(buf []byte, align uint) []byte {
- aligned := AlignUp(len(buf), align)
+ aligned := binary.AlignUp(len(buf), align)
if aligned > cap(buf) {
// Linux allows unaligned data if there isn't room for alignment.
// Since there isn't room for alignment, there isn't room for any
@@ -338,7 +329,7 @@ func PackTOS(t *kernel.Task, tos uint8, buf []byte) []byte {
}
// PackTClass packs an IPV6_TCLASS socket control message.
-func PackTClass(t *kernel.Task, tClass int32, buf []byte) []byte {
+func PackTClass(t *kernel.Task, tClass uint32, buf []byte) []byte {
return putCmsgStruct(
buf,
linux.SOL_IPV6,
@@ -348,6 +339,22 @@ func PackTClass(t *kernel.Task, tClass int32, buf []byte) []byte {
)
}
+// PackIPPacketInfo packs an IP_PKTINFO socket control message.
+func PackIPPacketInfo(t *kernel.Task, packetInfo tcpip.IPPacketInfo, buf []byte) []byte {
+ var p linux.ControlMessageIPPacketInfo
+ p.NIC = int32(packetInfo.NIC)
+ copy(p.LocalAddr[:], []byte(packetInfo.LocalAddr))
+ copy(p.DestinationAddr[:], []byte(packetInfo.DestinationAddr))
+
+ return putCmsgStruct(
+ buf,
+ linux.SOL_IP,
+ linux.IP_PKTINFO,
+ t.Arch().Width(),
+ p,
+ )
+}
+
// PackControlMessages packs control messages into the given buffer.
//
// We skip control messages specific to Unix domain sockets.
@@ -372,12 +379,16 @@ func PackControlMessages(t *kernel.Task, cmsgs socket.ControlMessages, buf []byt
buf = PackTClass(t, cmsgs.IP.TClass, buf)
}
+ if cmsgs.IP.HasIPPacketInfo {
+ buf = PackIPPacketInfo(t, cmsgs.IP.PacketInfo, buf)
+ }
+
return buf
}
// cmsgSpace is equivalent to CMSG_SPACE in Linux.
func cmsgSpace(t *kernel.Task, dataLen int) int {
- return linux.SizeOfControlMessageHeader + AlignUp(dataLen, t.Arch().Width())
+ return linux.SizeOfControlMessageHeader + binary.AlignUp(dataLen, t.Arch().Width())
}
// CmsgsSpace returns the number of bytes needed to fit the control messages
@@ -404,6 +415,16 @@ func CmsgsSpace(t *kernel.Task, cmsgs socket.ControlMessages) int {
return space
}
+// NewIPPacketInfo returns the IPPacketInfo struct.
+func NewIPPacketInfo(packetInfo linux.ControlMessageIPPacketInfo) tcpip.IPPacketInfo {
+ var p tcpip.IPPacketInfo
+ p.NIC = tcpip.NICID(packetInfo.NIC)
+ copy([]byte(p.LocalAddr), packetInfo.LocalAddr[:])
+ copy([]byte(p.DestinationAddr), packetInfo.DestinationAddr[:])
+
+ return p
+}
+
// Parse parses a raw socket control message into portable objects.
func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.ControlMessages, error) {
var (
@@ -437,7 +458,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con
case linux.SOL_SOCKET:
switch h.Type {
case linux.SCM_RIGHTS:
- rightsSize := AlignDown(length, linux.SizeOfControlMessageRight)
+ rightsSize := binary.AlignDown(length, linux.SizeOfControlMessageRight)
numRights := rightsSize / linux.SizeOfControlMessageRight
if len(fds)+numRights > linux.SCM_MAX_FD {
@@ -448,7 +469,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con
fds = append(fds, int32(usermem.ByteOrder.Uint32(buf[j:j+linux.SizeOfControlMessageRight])))
}
- i += AlignUp(length, width)
+ i += binary.AlignUp(length, width)
case linux.SCM_CREDENTIALS:
if length < linux.SizeOfControlMessageCredentials {
@@ -462,7 +483,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con
return socket.ControlMessages{}, err
}
cmsgs.Unix.Credentials = scmCreds
- i += AlignUp(length, width)
+ i += binary.AlignUp(length, width)
default:
// Unknown message type.
@@ -476,7 +497,19 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con
}
cmsgs.IP.HasTOS = true
binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTOS], usermem.ByteOrder, &cmsgs.IP.TOS)
- i += AlignUp(length, width)
+ i += binary.AlignUp(length, width)
+
+ case linux.IP_PKTINFO:
+ if length < linux.SizeOfControlMessageIPPacketInfo {
+ return socket.ControlMessages{}, syserror.EINVAL
+ }
+
+ cmsgs.IP.HasIPPacketInfo = true
+ var packetInfo linux.ControlMessageIPPacketInfo
+ binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageIPPacketInfo], usermem.ByteOrder, &packetInfo)
+
+ cmsgs.IP.PacketInfo = NewIPPacketInfo(packetInfo)
+ i += binary.AlignUp(length, width)
default:
return socket.ControlMessages{}, syserror.EINVAL
@@ -489,7 +522,7 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con
}
cmsgs.IP.HasTClass = true
binary.Unmarshal(buf[i:i+linux.SizeOfControlMessageTClass], usermem.ByteOrder, &cmsgs.IP.TClass)
- i += AlignUp(length, width)
+ i += binary.AlignUp(length, width)
default:
return socket.ControlMessages{}, syserror.EINVAL
diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go
index de76388ac..22f78d2e2 100644
--- a/pkg/sentry/socket/hostinet/socket.go
+++ b/pkg/sentry/socket/hostinet/socket.go
@@ -289,7 +289,7 @@ func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outPt
switch level {
case linux.SOL_IP:
switch name {
- case linux.IP_TOS, linux.IP_RECVTOS:
+ case linux.IP_TOS, linux.IP_RECVTOS, linux.IP_PKTINFO:
optlen = sizeofInt32
}
case linux.SOL_IPV6:
@@ -336,6 +336,8 @@ func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt [
switch name {
case linux.IP_TOS, linux.IP_RECVTOS:
optlen = sizeofInt32
+ case linux.IP_PKTINFO:
+ optlen = linux.SizeOfControlMessageIPPacketInfo
}
case linux.SOL_IPV6:
switch name {
@@ -473,7 +475,14 @@ func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
case syscall.IP_TOS:
controlMessages.IP.HasTOS = true
binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageTOS], usermem.ByteOrder, &controlMessages.IP.TOS)
+
+ case syscall.IP_PKTINFO:
+ controlMessages.IP.HasIPPacketInfo = true
+ var packetInfo linux.ControlMessageIPPacketInfo
+ binary.Unmarshal(unixCmsg.Data[:linux.SizeOfControlMessageIPPacketInfo], usermem.ByteOrder, &packetInfo)
+ controlMessages.IP.PacketInfo = control.NewIPPacketInfo(packetInfo)
}
+
case syscall.SOL_IPV6:
switch unixCmsg.Header.Type {
case syscall.IPV6_TCLASS:
diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD
index fa2a2cb66..7cd2ce55b 100644
--- a/pkg/sentry/socket/netfilter/BUILD
+++ b/pkg/sentry/socket/netfilter/BUILD
@@ -5,7 +5,11 @@ package(licenses = ["notice"])
go_library(
name = "netfilter",
srcs = [
+ "extensions.go",
"netfilter.go",
+ "targets.go",
+ "tcp_matcher.go",
+ "udp_matcher.go",
],
# This target depends on netstack and should only be used by epsocket,
# which is allowed to depend on netstack.
@@ -17,6 +21,7 @@ go_library(
"//pkg/sentry/kernel",
"//pkg/syserr",
"//pkg/tcpip",
+ "//pkg/tcpip/header",
"//pkg/tcpip/iptables",
"//pkg/tcpip/stack",
"//pkg/usermem",
diff --git a/pkg/sentry/socket/netfilter/extensions.go b/pkg/sentry/socket/netfilter/extensions.go
new file mode 100644
index 000000000..b4b244abf
--- /dev/null
+++ b/pkg/sentry/socket/netfilter/extensions.go
@@ -0,0 +1,95 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netfilter
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/tcpip/iptables"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// TODO(gvisor.dev/issue/170): The following per-matcher params should be
+// supported:
+// - Table name
+// - Match size
+// - User size
+// - Hooks
+// - Proto
+// - Family
+
+// matchMaker knows how to (un)marshal the matcher named name().
+type matchMaker interface {
+ // name is the matcher name as stored in the xt_entry_match struct.
+ name() string
+
+ // marshal converts from an iptables.Matcher to an ABI struct.
+ marshal(matcher iptables.Matcher) []byte
+
+ // unmarshal converts from the ABI matcher struct to an
+ // iptables.Matcher.
+ unmarshal(buf []byte, filter iptables.IPHeaderFilter) (iptables.Matcher, error)
+}
+
+// matchMakers maps the name of supported matchers to the matchMaker that
+// marshals and unmarshals it. It is immutable after package initialization.
+var matchMakers = map[string]matchMaker{}
+
+// registermatchMaker should be called by match extensions to register them
+// with the netfilter package.
+func registerMatchMaker(mm matchMaker) {
+ if _, ok := matchMakers[mm.name()]; ok {
+ panic(fmt.Sprintf("Multiple matches registered with name %q.", mm.name()))
+ }
+ matchMakers[mm.name()] = mm
+}
+
+func marshalMatcher(matcher iptables.Matcher) []byte {
+ matchMaker, ok := matchMakers[matcher.Name()]
+ if !ok {
+ panic(fmt.Sprintf("Unknown matcher of type %T.", matcher))
+ }
+ return matchMaker.marshal(matcher)
+}
+
+// marshalEntryMatch creates a marshalled XTEntryMatch with the given name and
+// data appended at the end.
+func marshalEntryMatch(name string, data []byte) []byte {
+ nflog("marshaling matcher %q", name)
+
+ // We have to pad this struct size to a multiple of 8 bytes.
+ size := binary.AlignUp(linux.SizeOfXTEntryMatch+len(data), 8)
+ matcher := linux.KernelXTEntryMatch{
+ XTEntryMatch: linux.XTEntryMatch{
+ MatchSize: uint16(size),
+ },
+ Data: data,
+ }
+ copy(matcher.Name[:], name)
+
+ buf := make([]byte, 0, size)
+ buf = binary.Marshal(buf, usermem.ByteOrder, matcher)
+ return append(buf, make([]byte, size-len(buf))...)
+}
+
+func unmarshalMatcher(match linux.XTEntryMatch, filter iptables.IPHeaderFilter, buf []byte) (iptables.Matcher, error) {
+ matchMaker, ok := matchMakers[match.Name.String()]
+ if !ok {
+ return nil, fmt.Errorf("unsupported matcher with name %q", match.Name.String())
+ }
+ return matchMaker.unmarshal(buf, filter)
+}
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index 445d08d11..2ec11f6ac 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -17,6 +17,7 @@
package netfilter
import (
+ "errors"
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -34,10 +35,6 @@ import (
// shouldn't be reached - an error has occurred if we fall through to one.
const errorTargetName = "ERROR"
-const (
- matcherNameUDP = "udp"
-)
-
// Metadata is used to verify that we are correctly serializing and
// deserializing iptables into structs consumable by the iptables tool. We save
// a metadata struct when the tables are written, and when they are read out we
@@ -53,7 +50,9 @@ type metadata struct {
// nflog logs messages related to the writing and reading of iptables.
func nflog(format string, args ...interface{}) {
- log.Infof("netfilter: "+format, args...)
+ if log.IsLogging(log.Debug) {
+ log.Debugf("netfilter: "+format, args...)
+ }
}
// GetInfo returns information about iptables.
@@ -67,7 +66,8 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT
// Find the appropriate table.
table, err := findTable(stack, info.Name)
if err != nil {
- return linux.IPTGetinfo{}, err
+ nflog("%v", err)
+ return linux.IPTGetinfo{}, syserr.ErrInvalidArgument
}
// Get the hooks that apply to this table.
@@ -94,39 +94,40 @@ func GetEntries(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen
// Read in the struct and table name.
var userEntries linux.IPTGetEntries
if _, err := t.CopyIn(outPtr, &userEntries); err != nil {
- log.Warningf("netfilter: couldn't copy in entries %q", userEntries.Name)
+ nflog("couldn't copy in entries %q", userEntries.Name)
return linux.KernelIPTGetEntries{}, syserr.FromError(err)
}
// Find the appropriate table.
table, err := findTable(stack, userEntries.Name)
if err != nil {
- log.Warningf("netfilter: couldn't find table %q", userEntries.Name)
- return linux.KernelIPTGetEntries{}, err
+ nflog("%v", err)
+ return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
}
// Convert netstack's iptables rules to something that the iptables
// tool can understand.
entries, meta, err := convertNetstackToBinary(userEntries.Name.String(), table)
if err != nil {
- return linux.KernelIPTGetEntries{}, err
+ nflog("couldn't read entries: %v", err)
+ return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
}
if meta != table.Metadata().(metadata) {
panic(fmt.Sprintf("Table %q metadata changed between writing and reading. Was saved as %+v, but is now %+v", userEntries.Name.String(), table.Metadata().(metadata), meta))
}
if binary.Size(entries) > uintptr(outLen) {
- log.Warningf("Insufficient GetEntries output size: %d", uintptr(outLen))
+ nflog("insufficient GetEntries output size: %d", uintptr(outLen))
return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
}
return entries, nil
}
-func findTable(stack *stack.Stack, tablename linux.TableName) (iptables.Table, *syserr.Error) {
+func findTable(stack *stack.Stack, tablename linux.TableName) (iptables.Table, error) {
ipt := stack.IPTables()
table, ok := ipt.Tables[tablename.String()]
if !ok {
- return iptables.Table{}, syserr.ErrInvalidArgument
+ return iptables.Table{}, fmt.Errorf("couldn't find table %q", tablename)
}
return table, nil
}
@@ -154,15 +155,14 @@ func FillDefaultIPTables(stack *stack.Stack) {
// format expected by the iptables tool. Linux stores each table as a binary
// blob that can only be traversed by parsing a bit, reading some offsets,
// jumping to those offsets, parsing again, etc.
-func convertNetstackToBinary(tablename string, table iptables.Table) (linux.KernelIPTGetEntries, metadata, *syserr.Error) {
+func convertNetstackToBinary(tablename string, table iptables.Table) (linux.KernelIPTGetEntries, metadata, error) {
// Return values.
var entries linux.KernelIPTGetEntries
var meta metadata
// The table name has to fit in the struct.
if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
- log.Warningf("Table name %q too long.", tablename)
- return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument
+ return linux.KernelIPTGetEntries{}, metadata{}, fmt.Errorf("table name %q too long.", tablename)
}
copy(entries.Name[:], tablename)
@@ -228,61 +228,27 @@ func convertNetstackToBinary(tablename string, table iptables.Table) (linux.Kern
return entries, meta, nil
}
-func marshalMatcher(matcher iptables.Matcher) []byte {
- switch m := matcher.(type) {
- case *iptables.UDPMatcher:
- return marshalUDPMatcher(m)
- default:
- // TODO(gvisor.dev/issue/170): Support other matchers.
- panic(fmt.Errorf("unknown matcher of type %T", matcher))
- }
-}
-
-func marshalUDPMatcher(matcher *iptables.UDPMatcher) []byte {
- nflog("convert to binary: marshalling UDP matcher: %+v", matcher)
-
- // We have to pad this struct size to a multiple of 8 bytes.
- size := alignUp(linux.SizeOfXTEntryMatch+linux.SizeOfXTUDP, 8)
-
- linuxMatcher := linux.KernelXTEntryMatch{
- XTEntryMatch: linux.XTEntryMatch{
- MatchSize: uint16(size),
- },
- Data: make([]byte, 0, linux.SizeOfXTUDP),
- }
- copy(linuxMatcher.Name[:], matcherNameUDP)
-
- xtudp := linux.XTUDP{
- SourcePortStart: matcher.Data.SourcePortStart,
- SourcePortEnd: matcher.Data.SourcePortEnd,
- DestinationPortStart: matcher.Data.DestinationPortStart,
- DestinationPortEnd: matcher.Data.DestinationPortEnd,
- InverseFlags: matcher.Data.InverseFlags,
- }
- linuxMatcher.Data = binary.Marshal(linuxMatcher.Data, usermem.ByteOrder, xtudp)
-
- buf := make([]byte, 0, size)
- buf = binary.Marshal(buf, usermem.ByteOrder, linuxMatcher)
- buf = append(buf, make([]byte, size-len(buf))...)
- nflog("convert to binary: marshalled UDP matcher into %v", buf)
- return buf[:]
-}
-
func marshalTarget(target iptables.Target) []byte {
- switch target.(type) {
- case iptables.UnconditionalAcceptTarget:
- return marshalStandardTarget(iptables.Accept)
- case iptables.UnconditionalDropTarget:
- return marshalStandardTarget(iptables.Drop)
+ switch tg := target.(type) {
+ case iptables.AcceptTarget:
+ return marshalStandardTarget(iptables.RuleAccept)
+ case iptables.DropTarget:
+ return marshalStandardTarget(iptables.RuleDrop)
case iptables.ErrorTarget:
- return marshalErrorTarget()
+ return marshalErrorTarget(errorTargetName)
+ case iptables.UserChainTarget:
+ return marshalErrorTarget(tg.Name)
+ case iptables.ReturnTarget:
+ return marshalStandardTarget(iptables.RuleReturn)
+ case JumpTarget:
+ return marshalJumpTarget(tg)
default:
panic(fmt.Errorf("unknown target of type %T", target))
}
}
-func marshalStandardTarget(verdict iptables.Verdict) []byte {
- nflog("convert to binary: marshalling standard target with size %d", linux.SizeOfXTStandardTarget)
+func marshalStandardTarget(verdict iptables.RuleVerdict) []byte {
+ nflog("convert to binary: marshalling standard target")
// The target's name will be the empty string.
target := linux.XTStandardTarget{
@@ -296,56 +262,69 @@ func marshalStandardTarget(verdict iptables.Verdict) []byte {
return binary.Marshal(ret, usermem.ByteOrder, target)
}
-func marshalErrorTarget() []byte {
+func marshalErrorTarget(errorName string) []byte {
// This is an error target named error
target := linux.XTErrorTarget{
Target: linux.XTEntryTarget{
TargetSize: linux.SizeOfXTErrorTarget,
},
}
- copy(target.Name[:], errorTargetName)
+ copy(target.Name[:], errorName)
copy(target.Target.Name[:], errorTargetName)
ret := make([]byte, 0, linux.SizeOfXTErrorTarget)
return binary.Marshal(ret, usermem.ByteOrder, target)
}
+func marshalJumpTarget(jt JumpTarget) []byte {
+ nflog("convert to binary: marshalling jump target")
+
+ // The target's name will be the empty string.
+ target := linux.XTStandardTarget{
+ Target: linux.XTEntryTarget{
+ TargetSize: linux.SizeOfXTStandardTarget,
+ },
+ // Verdict is overloaded by the ABI. When positive, it holds
+ // the jump offset from the start of the table.
+ Verdict: int32(jt.Offset),
+ }
+
+ ret := make([]byte, 0, linux.SizeOfXTStandardTarget)
+ return binary.Marshal(ret, usermem.ByteOrder, target)
+}
+
// translateFromStandardVerdict translates verdicts the same way as the iptables
// tool.
-func translateFromStandardVerdict(verdict iptables.Verdict) int32 {
+func translateFromStandardVerdict(verdict iptables.RuleVerdict) int32 {
switch verdict {
- case iptables.Accept:
+ case iptables.RuleAccept:
return -linux.NF_ACCEPT - 1
- case iptables.Drop:
+ case iptables.RuleDrop:
return -linux.NF_DROP - 1
- case iptables.Queue:
- return -linux.NF_QUEUE - 1
- case iptables.Return:
+ case iptables.RuleReturn:
return linux.NF_RETURN
- case iptables.Jump:
+ default:
// TODO(gvisor.dev/issue/170): Support Jump.
- panic("Jump isn't supported yet")
+ panic(fmt.Sprintf("unknown standard verdict: %d", verdict))
}
- panic(fmt.Sprintf("unknown standard verdict: %d", verdict))
}
-// translateToStandardVerdict translates from the value in a
+// translateToStandardTarget translates from the value in a
// linux.XTStandardTarget to an iptables.Verdict.
-func translateToStandardVerdict(val int32) (iptables.Verdict, *syserr.Error) {
+func translateToStandardTarget(val int32) (iptables.Target, error) {
// TODO(gvisor.dev/issue/170): Support other verdicts.
switch val {
case -linux.NF_ACCEPT - 1:
- return iptables.Accept, nil
+ return iptables.AcceptTarget{}, nil
case -linux.NF_DROP - 1:
- return iptables.Drop, nil
+ return iptables.DropTarget{}, nil
case -linux.NF_QUEUE - 1:
- log.Warningf("Unsupported iptables verdict QUEUE.")
+ return nil, errors.New("unsupported iptables verdict QUEUE")
case linux.NF_RETURN:
- log.Warningf("Unsupported iptables verdict RETURN.")
+ return iptables.ReturnTarget{}, nil
default:
- log.Warningf("Unknown iptables verdict %d.", val)
+ return nil, fmt.Errorf("unknown iptables verdict %d", val)
}
- return iptables.Invalid, syserr.ErrInvalidArgument
}
// SetEntries sets iptables rules for a single table. See
@@ -353,7 +332,7 @@ func translateToStandardVerdict(val int32) (iptables.Verdict, *syserr.Error) {
func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
// Get the basic rules data (struct ipt_replace).
if len(optVal) < linux.SizeOfIPTReplace {
- log.Warningf("netfilter.SetEntries: optVal has insufficient size for replace %d", len(optVal))
+ nflog("optVal has insufficient size for replace %d", len(optVal))
return syserr.ErrInvalidArgument
}
var replace linux.IPTReplace
@@ -367,7 +346,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
case iptables.TablenameFilter:
table = iptables.EmptyFilterTable()
default:
- log.Warningf("We don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String())
+ nflog("we don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String())
return syserr.ErrInvalidArgument
}
@@ -375,13 +354,14 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
// Convert input into a list of rules and their offsets.
var offset uint32
- var offsets []uint32
+ // offsets maps rule byte offsets to their position in table.Rules.
+ offsets := map[uint32]int{}
for entryIdx := uint32(0); entryIdx < replace.NumEntries; entryIdx++ {
nflog("set entries: processing entry at offset %d", offset)
// Get the struct ipt_entry.
if len(optVal) < linux.SizeOfIPTEntry {
- log.Warningf("netfilter: optVal has insufficient size for entry %d", len(optVal))
+ nflog("optVal has insufficient size for entry %d", len(optVal))
return syserr.ErrInvalidArgument
}
var entry linux.IPTEntry
@@ -391,7 +371,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
optVal = optVal[linux.SizeOfIPTEntry:]
if entry.TargetOffset < linux.SizeOfIPTEntry {
- log.Warningf("netfilter: entry has too-small target offset %d", entry.TargetOffset)
+ nflog("entry has too-small target offset %d", entry.TargetOffset)
return syserr.ErrInvalidArgument
}
@@ -399,7 +379,8 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
// filtering fields.
filter, err := filterFromIPTIP(entry.IP)
if err != nil {
- return err
+ nflog("bad iptip: %v", err)
+ return syserr.ErrInvalidArgument
}
// TODO(gvisor.dev/issue/170): Matchers and targets can specify
@@ -407,25 +388,26 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
// Get matchers.
matchersSize := entry.TargetOffset - linux.SizeOfIPTEntry
if len(optVal) < int(matchersSize) {
- log.Warningf("netfilter: entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal))
+ nflog("entry doesn't have enough room for its matchers (only %d bytes remain)", len(optVal))
return syserr.ErrInvalidArgument
}
matchers, err := parseMatchers(filter, optVal[:matchersSize])
if err != nil {
- log.Warningf("netfilter: failed to parse matchers: %v", err)
- return err
+ nflog("failed to parse matchers: %v", err)
+ return syserr.ErrInvalidArgument
}
optVal = optVal[matchersSize:]
// Get the target of the rule.
targetSize := entry.NextOffset - entry.TargetOffset
if len(optVal) < int(targetSize) {
- log.Warningf("netfilter: entry doesn't have enough room for its target (only %d bytes remain)", len(optVal))
+ nflog("entry doesn't have enough room for its target (only %d bytes remain)", len(optVal))
return syserr.ErrInvalidArgument
}
target, err := parseTarget(optVal[:targetSize])
if err != nil {
- return err
+ nflog("failed to parse target: %v", err)
+ return syserr.ErrInvalidArgument
}
optVal = optVal[targetSize:]
@@ -434,11 +416,12 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
Target: target,
Matchers: matchers,
})
- offsets = append(offsets, offset)
+ offsets[offset] = int(entryIdx)
offset += uint32(entry.NextOffset)
if initialOptValLen-len(optVal) != int(entry.NextOffset) {
- log.Warningf("netfilter: entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal))
+ nflog("entry NextOffset is %d, but entry took up %d bytes", entry.NextOffset, initialOptValLen-len(optVal))
+ return syserr.ErrInvalidArgument
}
}
@@ -447,32 +430,77 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
for hook, _ := range replace.HookEntry {
if table.ValidHooks()&(1<<hook) != 0 {
hk := hookFromLinux(hook)
- for ruleIdx, offset := range offsets {
+ for offset, ruleIdx := range offsets {
if offset == replace.HookEntry[hook] {
table.BuiltinChains[hk] = ruleIdx
}
if offset == replace.Underflow[hook] {
+ if !validUnderflow(table.Rules[ruleIdx]) {
+ nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP")
+ return syserr.ErrInvalidArgument
+ }
table.Underflows[hk] = ruleIdx
}
}
if ruleIdx := table.BuiltinChains[hk]; ruleIdx == iptables.HookUnset {
- log.Warningf("Hook %v is unset.", hk)
+ nflog("hook %v is unset.", hk)
return syserr.ErrInvalidArgument
}
if ruleIdx := table.Underflows[hk]; ruleIdx == iptables.HookUnset {
- log.Warningf("Underflow %v is unset.", hk)
+ nflog("underflow %v is unset.", hk)
return syserr.ErrInvalidArgument
}
}
}
+ // Add the user chains.
+ for ruleIdx, rule := range table.Rules {
+ target, ok := rule.Target.(iptables.UserChainTarget)
+ if !ok {
+ continue
+ }
+
+ // We found a user chain. Before inserting it into the table,
+ // check that:
+ // - There's some other rule after it.
+ // - There are no matchers.
+ if ruleIdx == len(table.Rules)-1 {
+ nflog("user chain must have a rule or default policy")
+ return syserr.ErrInvalidArgument
+ }
+ if len(table.Rules[ruleIdx].Matchers) != 0 {
+ nflog("user chain's first node must have no matchers")
+ return syserr.ErrInvalidArgument
+ }
+ table.UserChains[target.Name] = ruleIdx + 1
+ }
+
+ // Set each jump to point to the appropriate rule. Right now they hold byte
+ // offsets.
+ for ruleIdx, rule := range table.Rules {
+ jump, ok := rule.Target.(JumpTarget)
+ if !ok {
+ continue
+ }
+
+ // Find the rule corresponding to the jump rule offset.
+ jumpTo, ok := offsets[jump.Offset]
+ if !ok {
+ nflog("failed to find a rule to jump to")
+ return syserr.ErrInvalidArgument
+ }
+ jump.RuleNum = jumpTo
+ rule.Target = jump
+ table.Rules[ruleIdx] = rule
+ }
+
// TODO(gvisor.dev/issue/170): Support other chains.
// Since we only support modifying the INPUT chain right now, make sure
// all other chains point to ACCEPT rules.
for hook, ruleIdx := range table.BuiltinChains {
if hook != iptables.Input {
- if _, ok := table.Rules[ruleIdx].Target.(iptables.UnconditionalAcceptTarget); !ok {
- log.Warningf("Hook %d is unsupported.", hook)
+ if _, ok := table.Rules[ruleIdx].Target.(iptables.AcceptTarget); !ok {
+ nflog("hook %d is unsupported.", hook)
return syserr.ErrInvalidArgument
}
}
@@ -498,7 +526,7 @@ func SetEntries(stack *stack.Stack, optVal []byte) *syserr.Error {
// parseMatchers parses 0 or more matchers from optVal. optVal should contain
// only the matchers.
-func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Matcher, *syserr.Error) {
+func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Matcher, error) {
nflog("set entries: parsing matchers of size %d", len(optVal))
var matchers []iptables.Matcher
for len(optVal) > 0 {
@@ -506,8 +534,7 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma
// Get the XTEntryMatch.
if len(optVal) < linux.SizeOfXTEntryMatch {
- log.Warningf("netfilter: optVal has insufficient size for entry match: %d", len(optVal))
- return nil, syserr.ErrInvalidArgument
+ return nil, fmt.Errorf("optVal has insufficient size for entry match: %d", len(optVal))
}
var match linux.XTEntryMatch
buf := optVal[:linux.SizeOfXTEntryMatch]
@@ -516,45 +543,18 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma
// Check some invariants.
if match.MatchSize < linux.SizeOfXTEntryMatch {
- log.Warningf("netfilter: match size is too small, must be at least %d", linux.SizeOfXTEntryMatch)
- return nil, syserr.ErrInvalidArgument
+
+ return nil, fmt.Errorf("match size is too small, must be at least %d", linux.SizeOfXTEntryMatch)
}
if len(optVal) < int(match.MatchSize) {
- log.Warningf("netfilter: optVal has insufficient size for match: %d", len(optVal))
- return nil, syserr.ErrInvalidArgument
+ return nil, fmt.Errorf("optVal has insufficient size for match: %d", len(optVal))
}
- buf = optVal[linux.SizeOfXTEntryMatch:match.MatchSize]
- var matcher iptables.Matcher
- var err error
- switch match.Name.String() {
- case matcherNameUDP:
- if len(buf) < linux.SizeOfXTUDP {
- log.Warningf("netfilter: optVal has insufficient size for UDP match: %d", len(optVal))
- return nil, syserr.ErrInvalidArgument
- }
- // For alignment reasons, the match's total size may
- // exceed what's strictly necessary to hold matchData.
- var matchData linux.XTUDP
- binary.Unmarshal(buf[:linux.SizeOfXTUDP], usermem.ByteOrder, &matchData)
- log.Infof("parseMatchers: parsed XTUDP: %+v", matchData)
- matcher, err = iptables.NewUDPMatcher(filter, iptables.UDPMatcherParams{
- SourcePortStart: matchData.SourcePortStart,
- SourcePortEnd: matchData.SourcePortEnd,
- DestinationPortStart: matchData.DestinationPortStart,
- DestinationPortEnd: matchData.DestinationPortEnd,
- InverseFlags: matchData.InverseFlags,
- })
- if err != nil {
- log.Warningf("netfilter: failed to create UDP matcher: %v", err)
- return nil, syserr.ErrInvalidArgument
- }
-
- default:
- log.Warningf("netfilter: unsupported matcher with name %q", match.Name.String())
- return nil, syserr.ErrInvalidArgument
+ // Parse the specific matcher.
+ matcher, err := unmarshalMatcher(match, filter, optVal[linux.SizeOfXTEntryMatch:match.MatchSize])
+ if err != nil {
+ return nil, fmt.Errorf("failed to create matcher: %v", err)
}
-
matchers = append(matchers, matcher)
// TODO(gvisor.dev/issue/170): Check the revision field.
@@ -562,8 +562,7 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma
}
if len(optVal) != 0 {
- log.Warningf("netfilter: optVal should be exhausted after parsing matchers")
- return nil, syserr.ErrInvalidArgument
+ return nil, errors.New("optVal should be exhausted after parsing matchers")
}
return matchers, nil
@@ -571,11 +570,10 @@ func parseMatchers(filter iptables.IPHeaderFilter, optVal []byte) ([]iptables.Ma
// parseTarget parses a target from optVal. optVal should contain only the
// target.
-func parseTarget(optVal []byte) (iptables.Target, *syserr.Error) {
+func parseTarget(optVal []byte) (iptables.Target, error) {
nflog("set entries: parsing target of size %d", len(optVal))
if len(optVal) < linux.SizeOfXTEntryTarget {
- log.Warningf("netfilter: optVal has insufficient size for entry target %d", len(optVal))
- return nil, syserr.ErrInvalidArgument
+ return nil, fmt.Errorf("optVal has insufficient size for entry target %d", len(optVal))
}
var target linux.XTEntryTarget
buf := optVal[:linux.SizeOfXTEntryTarget]
@@ -584,32 +582,23 @@ func parseTarget(optVal []byte) (iptables.Target, *syserr.Error) {
case "":
// Standard target.
if len(optVal) != linux.SizeOfXTStandardTarget {
- log.Warningf("netfilter.SetEntries: optVal has wrong size for standard target %d", len(optVal))
- return nil, syserr.ErrInvalidArgument
+ return nil, fmt.Errorf("optVal has wrong size for standard target %d", len(optVal))
}
var standardTarget linux.XTStandardTarget
buf = optVal[:linux.SizeOfXTStandardTarget]
binary.Unmarshal(buf, usermem.ByteOrder, &standardTarget)
- verdict, err := translateToStandardVerdict(standardTarget.Verdict)
- if err != nil {
- return nil, err
- }
- switch verdict {
- case iptables.Accept:
- return iptables.UnconditionalAcceptTarget{}, nil
- case iptables.Drop:
- return iptables.UnconditionalDropTarget{}, nil
- default:
- log.Warningf("Unknown verdict: %v", verdict)
- return nil, syserr.ErrInvalidArgument
+ if standardTarget.Verdict < 0 {
+ // A Verdict < 0 indicates a non-jump verdict.
+ return translateToStandardTarget(standardTarget.Verdict)
}
+ // A verdict >= 0 indicates a jump.
+ return JumpTarget{Offset: uint32(standardTarget.Verdict)}, nil
case errorTargetName:
// Error target.
if len(optVal) != linux.SizeOfXTErrorTarget {
- log.Infof("netfilter.SetEntries: optVal has insufficient size for error target %d", len(optVal))
- return nil, syserr.ErrInvalidArgument
+ return nil, fmt.Errorf("optVal has insufficient size for error target %d", len(optVal))
}
var errorTarget linux.XTErrorTarget
buf = optVal[:linux.SizeOfXTErrorTarget]
@@ -622,24 +611,24 @@ func parseTarget(optVal []byte) (iptables.Target, *syserr.Error) {
// somehow fall through every rule.
// * To mark the start of a user defined chain. These
// rules have an error with the name of the chain.
- switch errorTarget.Name.String() {
+ switch name := errorTarget.Name.String(); name {
case errorTargetName:
+ nflog("set entries: error target")
return iptables.ErrorTarget{}, nil
default:
- log.Infof("Unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String())
- return nil, syserr.ErrInvalidArgument
+ // User defined chain.
+ nflog("set entries: user-defined target %q", name)
+ return iptables.UserChainTarget{Name: name}, nil
}
}
// Unknown target.
- log.Infof("Unknown target %q doesn't exist or isn't supported yet.", target.Name.String())
- return nil, syserr.ErrInvalidArgument
+ return nil, fmt.Errorf("unknown target %q doesn't exist or isn't supported yet.", target.Name.String())
}
-func filterFromIPTIP(iptip linux.IPTIP) (iptables.IPHeaderFilter, *syserr.Error) {
+func filterFromIPTIP(iptip linux.IPTIP) (iptables.IPHeaderFilter, error) {
if containsUnsupportedFields(iptip) {
- log.Warningf("netfilter: unsupported fields in struct iptip: %+v", iptip)
- return iptables.IPHeaderFilter{}, syserr.ErrInvalidArgument
+ return iptables.IPHeaderFilter{}, fmt.Errorf("unsupported fields in struct iptip: %+v", iptip)
}
return iptables.IPHeaderFilter{
Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
@@ -662,6 +651,18 @@ func containsUnsupportedFields(iptip linux.IPTIP) bool {
iptip.InverseFlags != 0
}
+func validUnderflow(rule iptables.Rule) bool {
+ if len(rule.Matchers) != 0 {
+ return false
+ }
+ switch rule.Target.(type) {
+ case iptables.AcceptTarget, iptables.DropTarget:
+ return true
+ default:
+ return false
+ }
+}
+
func hookFromLinux(hook int) iptables.Hook {
switch hook {
case linux.NF_INET_PRE_ROUTING:
@@ -677,8 +678,3 @@ func hookFromLinux(hook int) iptables.Hook {
}
panic(fmt.Sprintf("Unknown hook %d does not correspond to a builtin chain", hook))
}
-
-// alignUp rounds a length up to an alignment. align must be a power of 2.
-func alignUp(length int, align uint) int {
- return (length + int(align) - 1) & ^(int(align) - 1)
-}
diff --git a/pkg/sentry/socket/netfilter/targets.go b/pkg/sentry/socket/netfilter/targets.go
new file mode 100644
index 000000000..c421b87cf
--- /dev/null
+++ b/pkg/sentry/socket/netfilter/targets.go
@@ -0,0 +1,35 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netfilter
+
+import (
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/iptables"
+)
+
+// JumpTarget implements iptables.Target.
+type JumpTarget struct {
+ // Offset is the byte offset of the rule to jump to. It is used for
+ // marshaling and unmarshaling.
+ Offset uint32
+
+ // RuleNum is the rule to jump to.
+ RuleNum int
+}
+
+// Action implements iptables.Target.Action.
+func (jt JumpTarget) Action(tcpip.PacketBuffer) (iptables.RuleVerdict, int) {
+ return iptables.RuleJump, jt.RuleNum
+}
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
new file mode 100644
index 000000000..f9945e214
--- /dev/null
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -0,0 +1,143 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netfilter
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/iptables"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+const matcherNameTCP = "tcp"
+
+func init() {
+ registerMatchMaker(tcpMarshaler{})
+}
+
+// tcpMarshaler implements matchMaker for TCP matching.
+type tcpMarshaler struct{}
+
+// name implements matchMaker.name.
+func (tcpMarshaler) name() string {
+ return matcherNameTCP
+}
+
+// marshal implements matchMaker.marshal.
+func (tcpMarshaler) marshal(mr iptables.Matcher) []byte {
+ matcher := mr.(*TCPMatcher)
+ xttcp := linux.XTTCP{
+ SourcePortStart: matcher.sourcePortStart,
+ SourcePortEnd: matcher.sourcePortEnd,
+ DestinationPortStart: matcher.destinationPortStart,
+ DestinationPortEnd: matcher.destinationPortEnd,
+ }
+ buf := make([]byte, 0, linux.SizeOfXTTCP)
+ return marshalEntryMatch(matcherNameTCP, binary.Marshal(buf, usermem.ByteOrder, xttcp))
+}
+
+// unmarshal implements matchMaker.unmarshal.
+func (tcpMarshaler) unmarshal(buf []byte, filter iptables.IPHeaderFilter) (iptables.Matcher, error) {
+ if len(buf) < linux.SizeOfXTTCP {
+ return nil, fmt.Errorf("buf has insufficient size for TCP match: %d", len(buf))
+ }
+
+ // For alignment reasons, the match's total size may
+ // exceed what's strictly necessary to hold matchData.
+ var matchData linux.XTTCP
+ binary.Unmarshal(buf[:linux.SizeOfXTTCP], usermem.ByteOrder, &matchData)
+ nflog("parseMatchers: parsed XTTCP: %+v", matchData)
+
+ if matchData.Option != 0 ||
+ matchData.FlagMask != 0 ||
+ matchData.FlagCompare != 0 ||
+ matchData.InverseFlags != 0 {
+ return nil, fmt.Errorf("unsupported TCP matcher flags set")
+ }
+
+ if filter.Protocol != header.TCPProtocolNumber {
+ return nil, fmt.Errorf("TCP matching is only valid for protocol %d.", header.TCPProtocolNumber)
+ }
+
+ return &TCPMatcher{
+ sourcePortStart: matchData.SourcePortStart,
+ sourcePortEnd: matchData.SourcePortEnd,
+ destinationPortStart: matchData.DestinationPortStart,
+ destinationPortEnd: matchData.DestinationPortEnd,
+ }, nil
+}
+
+// TCPMatcher matches TCP packets and their headers. It implements Matcher.
+type TCPMatcher struct {
+ sourcePortStart uint16
+ sourcePortEnd uint16
+ destinationPortStart uint16
+ destinationPortEnd uint16
+}
+
+// Name implements Matcher.Name.
+func (*TCPMatcher) Name() string {
+ return matcherNameTCP
+}
+
+// Match implements Matcher.Match.
+func (tm *TCPMatcher) Match(hook iptables.Hook, pkt tcpip.PacketBuffer, interfaceName string) (bool, bool) {
+ netHeader := header.IPv4(pkt.NetworkHeader)
+
+ if netHeader.TransportProtocol() != header.TCPProtocolNumber {
+ return false, false
+ }
+
+ // We dont't match fragments.
+ if frag := netHeader.FragmentOffset(); frag != 0 {
+ if frag == 1 {
+ return false, true
+ }
+ return false, false
+ }
+
+ // Now we need the transport header. However, this may not have been set
+ // yet.
+ // TODO(gvisor.dev/issue/170): Parsing the transport header should
+ // ultimately be moved into the iptables.Check codepath as matchers are
+ // added.
+ var tcpHeader header.TCP
+ if pkt.TransportHeader != nil {
+ tcpHeader = header.TCP(pkt.TransportHeader)
+ } else {
+ // The TCP header hasn't been parsed yet. We have to do it here.
+ if len(pkt.Data.First()) < header.TCPMinimumSize {
+ // There's no valid TCP header here, so we hotdrop the
+ // packet.
+ return false, true
+ }
+ tcpHeader = header.TCP(pkt.Data.First())
+ }
+
+ // Check whether the source and destination ports are within the
+ // matching range.
+ if sourcePort := tcpHeader.SourcePort(); sourcePort < tm.sourcePortStart || tm.sourcePortEnd < sourcePort {
+ return false, false
+ }
+ if destinationPort := tcpHeader.DestinationPort(); destinationPort < tm.destinationPortStart || tm.destinationPortEnd < destinationPort {
+ return false, false
+ }
+
+ return true, false
+}
diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go
new file mode 100644
index 000000000..86aa11696
--- /dev/null
+++ b/pkg/sentry/socket/netfilter/udp_matcher.go
@@ -0,0 +1,142 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netfilter
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/iptables"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+const matcherNameUDP = "udp"
+
+func init() {
+ registerMatchMaker(udpMarshaler{})
+}
+
+// udpMarshaler implements matchMaker for UDP matching.
+type udpMarshaler struct{}
+
+// name implements matchMaker.name.
+func (udpMarshaler) name() string {
+ return matcherNameUDP
+}
+
+// marshal implements matchMaker.marshal.
+func (udpMarshaler) marshal(mr iptables.Matcher) []byte {
+ matcher := mr.(*UDPMatcher)
+ xtudp := linux.XTUDP{
+ SourcePortStart: matcher.sourcePortStart,
+ SourcePortEnd: matcher.sourcePortEnd,
+ DestinationPortStart: matcher.destinationPortStart,
+ DestinationPortEnd: matcher.destinationPortEnd,
+ }
+ buf := make([]byte, 0, linux.SizeOfXTUDP)
+ return marshalEntryMatch(matcherNameUDP, binary.Marshal(buf, usermem.ByteOrder, xtudp))
+}
+
+// unmarshal implements matchMaker.unmarshal.
+func (udpMarshaler) unmarshal(buf []byte, filter iptables.IPHeaderFilter) (iptables.Matcher, error) {
+ if len(buf) < linux.SizeOfXTUDP {
+ return nil, fmt.Errorf("buf has insufficient size for UDP match: %d", len(buf))
+ }
+
+ // For alignment reasons, the match's total size may exceed what's
+ // strictly necessary to hold matchData.
+ var matchData linux.XTUDP
+ binary.Unmarshal(buf[:linux.SizeOfXTUDP], usermem.ByteOrder, &matchData)
+ nflog("parseMatchers: parsed XTUDP: %+v", matchData)
+
+ if matchData.InverseFlags != 0 {
+ return nil, fmt.Errorf("unsupported UDP matcher inverse flags set")
+ }
+
+ if filter.Protocol != header.UDPProtocolNumber {
+ return nil, fmt.Errorf("UDP matching is only valid for protocol %d.", header.UDPProtocolNumber)
+ }
+
+ return &UDPMatcher{
+ sourcePortStart: matchData.SourcePortStart,
+ sourcePortEnd: matchData.SourcePortEnd,
+ destinationPortStart: matchData.DestinationPortStart,
+ destinationPortEnd: matchData.DestinationPortEnd,
+ }, nil
+}
+
+// UDPMatcher matches UDP packets and their headers. It implements Matcher.
+type UDPMatcher struct {
+ sourcePortStart uint16
+ sourcePortEnd uint16
+ destinationPortStart uint16
+ destinationPortEnd uint16
+}
+
+// Name implements Matcher.Name.
+func (*UDPMatcher) Name() string {
+ return matcherNameUDP
+}
+
+// Match implements Matcher.Match.
+func (um *UDPMatcher) Match(hook iptables.Hook, pkt tcpip.PacketBuffer, interfaceName string) (bool, bool) {
+ netHeader := header.IPv4(pkt.NetworkHeader)
+
+ // TODO(gvisor.dev/issue/170): Proto checks should ultimately be moved
+ // into the iptables.Check codepath as matchers are added.
+ if netHeader.TransportProtocol() != header.UDPProtocolNumber {
+ return false, false
+ }
+
+ // We dont't match fragments.
+ if frag := netHeader.FragmentOffset(); frag != 0 {
+ if frag == 1 {
+ return false, true
+ }
+ return false, false
+ }
+
+ // Now we need the transport header. However, this may not have been set
+ // yet.
+ // TODO(gvisor.dev/issue/170): Parsing the transport header should
+ // ultimately be moved into the iptables.Check codepath as matchers are
+ // added.
+ var udpHeader header.UDP
+ if pkt.TransportHeader != nil {
+ udpHeader = header.UDP(pkt.TransportHeader)
+ } else {
+ // The UDP header hasn't been parsed yet. We have to do it here.
+ if len(pkt.Data.First()) < header.UDPMinimumSize {
+ // There's no valid UDP header here, so we hotdrop the
+ // packet.
+ return false, true
+ }
+ udpHeader = header.UDP(pkt.Data.First())
+ }
+
+ // Check whether the source and destination ports are within the
+ // matching range.
+ if sourcePort := udpHeader.SourcePort(); sourcePort < um.sourcePortStart || um.sourcePortEnd < sourcePort {
+ return false, false
+ }
+ if destinationPort := udpHeader.DestinationPort(); destinationPort < um.destinationPortStart || um.destinationPortEnd < destinationPort {
+ return false, false
+ }
+
+ return true, false
+}
diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go
index 4ea252ccb..0899c61d1 100644
--- a/pkg/sentry/socket/netlink/message.go
+++ b/pkg/sentry/socket/netlink/message.go
@@ -23,18 +23,11 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-// alignUp rounds a length up to an alignment.
-//
-// Preconditions: align is a power of two.
-func alignUp(length int, align uint) int {
- return (length + int(align) - 1) &^ (int(align) - 1)
-}
-
// alignPad returns the length of padding required for alignment.
//
// Preconditions: align is a power of two.
func alignPad(length int, align uint) int {
- return alignUp(length, align) - length
+ return binary.AlignUp(length, align) - length
}
// Message contains a complete serialized netlink message.
@@ -138,7 +131,7 @@ func (m *Message) Finalize() []byte {
// Align the message. Note that the message length in the header (set
// above) is the useful length of the message, not the total aligned
// length. See net/netlink/af_netlink.c:__nlmsg_put.
- aligned := alignUp(len(m.buf), linux.NLMSG_ALIGNTO)
+ aligned := binary.AlignUp(len(m.buf), linux.NLMSG_ALIGNTO)
m.putZeros(aligned - len(m.buf))
return m.buf
}
@@ -173,7 +166,7 @@ func (m *Message) PutAttr(atype uint16, v interface{}) {
m.Put(v)
// Align the attribute.
- aligned := alignUp(l, linux.NLA_ALIGNTO)
+ aligned := binary.AlignUp(l, linux.NLA_ALIGNTO)
m.putZeros(aligned - l)
}
@@ -190,7 +183,7 @@ func (m *Message) PutAttrString(atype uint16, s string) {
m.putZeros(1)
// Align the attribute.
- aligned := alignUp(l, linux.NLA_ALIGNTO)
+ aligned := binary.AlignUp(l, linux.NLA_ALIGNTO)
m.putZeros(aligned - l)
}
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index ed2fbcceb..e187276c5 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -1318,6 +1318,22 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf
}
return ib, nil
+ case linux.IPV6_RECVTCLASS:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ v, err := ep.GetSockOptBool(tcpip.ReceiveTClassOption)
+ if err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ var o int32
+ if v {
+ o = 1
+ }
+ return o, nil
+
default:
emitUnimplementedEventIPv6(t, name)
}
@@ -1414,6 +1430,21 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
}
return o, nil
+ case linux.IP_PKTINFO:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ v, err := ep.GetSockOptBool(tcpip.ReceiveIPPacketInfoOption)
+ if err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+ var o int32
+ if v {
+ o = 1
+ }
+ return o, nil
+
default:
emitUnimplementedEventIP(t, name)
}
@@ -1762,6 +1793,7 @@ func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte)
linux.IPV6_IPSEC_POLICY,
linux.IPV6_JOIN_ANYCAST,
linux.IPV6_LEAVE_ANYCAST,
+ // TODO(b/148887420): Add support for IPV6_PKTINFO.
linux.IPV6_PKTINFO,
linux.IPV6_ROUTER_ALERT,
linux.IPV6_XFRM_POLICY,
@@ -1787,6 +1819,14 @@ func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte)
}
return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.IPv6TrafficClassOption(v)))
+ case linux.IPV6_RECVTCLASS:
+ v, err := parseIntOrChar(optVal)
+ if err != nil {
+ return err
+ }
+
+ return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTClassOption, v != 0))
+
default:
emitUnimplementedEventIPv6(t, name)
}
@@ -1949,6 +1989,16 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
}
return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveTOSOption, v != 0))
+ case linux.IP_PKTINFO:
+ if len(optVal) == 0 {
+ return nil
+ }
+ v, err := parseIntOrChar(optVal)
+ if err != nil {
+ return err
+ }
+ return syserr.TranslateNetstackError(ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, v != 0))
+
case linux.IP_ADD_SOURCE_MEMBERSHIP,
linux.IP_BIND_ADDRESS_NO_PORT,
linux.IP_BLOCK_SOURCE,
@@ -1964,7 +2014,6 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
linux.IP_NODEFRAG,
linux.IP_OPTIONS,
linux.IP_PASSSEC,
- linux.IP_PKTINFO,
linux.IP_RECVERR,
linux.IP_RECVFRAGSIZE,
linux.IP_RECVOPTS,
@@ -2061,7 +2110,6 @@ func emitUnimplementedEventIPv6(t *kernel.Task, name int) {
linux.IPV6_RECVPATHMTU,
linux.IPV6_RECVPKTINFO,
linux.IPV6_RECVRTHDR,
- linux.IPV6_RECVTCLASS,
linux.IPV6_RTHDR,
linux.IPV6_RTHDRDSTOPTS,
linux.IPV6_TCLASS,
@@ -2395,10 +2443,14 @@ func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSe
func (s *SocketOperations) controlMessages() socket.ControlMessages {
return socket.ControlMessages{
IP: tcpip.ControlMessages{
- HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp,
- Timestamp: s.readCM.Timestamp,
- HasTOS: s.readCM.HasTOS,
- TOS: s.readCM.TOS,
+ HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp,
+ Timestamp: s.readCM.Timestamp,
+ HasTOS: s.readCM.HasTOS,
+ TOS: s.readCM.TOS,
+ HasTClass: s.readCM.HasTClass,
+ TClass: s.readCM.TClass,
+ HasIPPacketInfo: s.readCM.HasIPPacketInfo,
+ PacketInfo: s.readCM.PacketInfo,
},
}
}
diff --git a/pkg/sentry/socket/netstack/provider.go b/pkg/sentry/socket/netstack/provider.go
index 5afff2564..5f181f017 100644
--- a/pkg/sentry/socket/netstack/provider.go
+++ b/pkg/sentry/socket/netstack/provider.go
@@ -75,6 +75,8 @@ func getTransportProtocol(ctx context.Context, stype linux.SockType, protocol in
switch protocol {
case syscall.IPPROTO_ICMP:
return header.ICMPv4ProtocolNumber, true, nil
+ case syscall.IPPROTO_ICMPV6:
+ return header.ICMPv6ProtocolNumber, true, nil
case syscall.IPPROTO_UDP:
return header.UDPProtocolNumber, true, nil
case syscall.IPPROTO_TCP: