summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/BUILD8
-rw-r--r--pkg/sentry/arch/BUILD1
-rw-r--r--pkg/sentry/arch/arch_amd64.s7
-rw-r--r--pkg/sentry/arch/arch_state_x86.go4
-rw-r--r--pkg/sentry/arch/arch_x86.go15
-rw-r--r--pkg/sentry/arch/arch_x86_impl.go43
-rw-r--r--pkg/sentry/fs/copy_up.go2
-rw-r--r--pkg/sentry/fs/fsutil/inode.go20
-rw-r--r--pkg/sentry/fs/gofer/attr.go6
-rw-r--r--pkg/sentry/fs/gofer/context_file.go14
-rw-r--r--pkg/sentry/fs/gofer/inode.go13
-rw-r--r--pkg/sentry/fs/inode.go14
-rw-r--r--pkg/sentry/fs/inode_operations.go13
-rw-r--r--pkg/sentry/fs/inode_overlay.go18
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go9
-rw-r--r--pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go3
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go8
-rw-r--r--pkg/sentry/socket/netfilter/extensions.go4
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go9
-rw-r--r--pkg/sentry/socket/netfilter/tcp_matcher.go2
-rw-r--r--pkg/sentry/strace/linux64_amd64.go4
-rw-r--r--pkg/sentry/strace/socket.go215
-rw-r--r--pkg/sentry/strace/strace.go22
-rw-r--r--pkg/sentry/strace/syscalls.go22
-rw-r--r--pkg/sentry/syscalls/linux/linux64_amd64.go27
-rw-r--r--pkg/sentry/syscalls/linux/linux64_arm64.go37
-rw-r--r--pkg/sentry/syscalls/linux/sys_xattr.go244
-rw-r--r--pkg/sentry/usage/memory.go4
29 files changed, 665 insertions, 127 deletions
diff --git a/pkg/sentry/BUILD b/pkg/sentry/BUILD
index e8b794179..e759dc36f 100644
--- a/pkg/sentry/BUILD
+++ b/pkg/sentry/BUILD
@@ -1,13 +1,11 @@
-# This BUILD file defines a package_group that allows for interdependencies for
-# sentry-internal packages.
-
package(licenses = ["notice"])
+# The "internal" package_group should be used as much as possible by packages
+# that should remain Sentry-internal (i.e. not be exposed directly to command
+# line tooling or APIs).
package_group(
name = "internal",
packages = [
- "//cloud/gvisor/gopkg/sentry/...",
- "//cloud/gvisor/sentry/...",
"//pkg/sentry/...",
"//runsc/...",
# Code generated by go_marshal relies on go_marshal libraries.
diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD
index 34c0a867d..e27f21e5e 100644
--- a/pkg/sentry/arch/BUILD
+++ b/pkg/sentry/arch/BUILD
@@ -14,6 +14,7 @@ go_library(
"arch_state_aarch64.go",
"arch_state_x86.go",
"arch_x86.go",
+ "arch_x86_impl.go",
"auxv.go",
"signal.go",
"signal_act.go",
diff --git a/pkg/sentry/arch/arch_amd64.s b/pkg/sentry/arch/arch_amd64.s
index bd61402cf..6c10336e7 100644
--- a/pkg/sentry/arch/arch_amd64.s
+++ b/pkg/sentry/arch/arch_amd64.s
@@ -26,10 +26,11 @@
//
// func initX86FPState(data *FloatingPointData, useXsave bool)
//
-// We need to clear out and initialize an empty fp state area since the sentry
-// may have left sensitive information in the floating point registers.
+// We need to clear out and initialize an empty fp state area since the sentry,
+// or any previous loader, may have left sensitive information in the floating
+// point registers.
//
-// Preconditions: data is zeroed
+// Preconditions: data is zeroed.
TEXT ·initX86FPState(SB), $24-16
// Save MXCSR (callee-save)
STMXCSR mxcsr-8(SP)
diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go
index d388ee9cf..e35c9214a 100644
--- a/pkg/sentry/arch/arch_state_x86.go
+++ b/pkg/sentry/arch/arch_state_x86.go
@@ -43,8 +43,8 @@ func (e ErrFloatingPoint) Error() string {
// and SSE state, so this is the equivalent XSTATE_BV value.
const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE
-// afterLoad is invoked by stateify.
-func (s *State) afterLoad() {
+// afterLoadFPState is invoked by afterLoad.
+func (s *State) afterLoadFPState() {
old := s.x86FPState
// Recreate the slice. This is done to ensure that it is aligned
diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go
index 3db8bd34b..88b40a9d1 100644
--- a/pkg/sentry/arch/arch_x86.go
+++ b/pkg/sentry/arch/arch_x86.go
@@ -155,21 +155,6 @@ func NewFloatingPointData() *FloatingPointData {
return (*FloatingPointData)(&(newX86FPState()[0]))
}
-// State contains the common architecture bits for X86 (the build tag of this
-// file ensures it's only built on x86).
-//
-// +stateify savable
-type State struct {
- // The system registers.
- Regs syscall.PtraceRegs `state:".(syscallPtraceRegs)"`
-
- // Our floating point state.
- x86FPState `state:"wait"`
-
- // FeatureSet is a pointer to the currently active feature set.
- FeatureSet *cpuid.FeatureSet
-}
-
// Proto returns a protobuf representation of the system registers in State.
func (s State) Proto() *rpb.Registers {
regs := &rpb.AMD64Registers{
diff --git a/pkg/sentry/arch/arch_x86_impl.go b/pkg/sentry/arch/arch_x86_impl.go
new file mode 100644
index 000000000..04ac283c6
--- /dev/null
+++ b/pkg/sentry/arch/arch_x86_impl.go
@@ -0,0 +1,43 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64 i386
+
+package arch
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/cpuid"
+)
+
+// State contains the common architecture bits for X86 (the build tag of this
+// file ensures it's only built on x86).
+//
+// +stateify savable
+type State struct {
+ // The system registers.
+ Regs syscall.PtraceRegs `state:".(syscallPtraceRegs)"`
+
+ // Our floating point state.
+ x86FPState `state:"wait"`
+
+ // FeatureSet is a pointer to the currently active feature set.
+ FeatureSet *cpuid.FeatureSet
+}
+
+// afterLoad is invoked by stateify.
+func (s *State) afterLoad() {
+ s.afterLoadFPState()
+}
diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go
index f6c79e51b..b060a12ff 100644
--- a/pkg/sentry/fs/copy_up.go
+++ b/pkg/sentry/fs/copy_up.go
@@ -401,7 +401,7 @@ func copyAttributesLocked(ctx context.Context, upper *Inode, lower *Inode) error
if err != nil {
return err
}
- lowerXattr, err := lower.ListXattr(ctx)
+ lowerXattr, err := lower.ListXattr(ctx, linux.XATTR_SIZE_MAX)
if err != nil && err != syserror.EOPNOTSUPP {
return err
}
diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go
index 252830572..daecc4ffe 100644
--- a/pkg/sentry/fs/fsutil/inode.go
+++ b/pkg/sentry/fs/fsutil/inode.go
@@ -247,7 +247,7 @@ func (i *InodeSimpleExtendedAttributes) SetXattr(_ context.Context, _ *fs.Inode,
}
// ListXattr implements fs.InodeOperations.ListXattr.
-func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) {
+func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) {
i.mu.RLock()
names := make(map[string]struct{}, len(i.xattrs))
for name := range i.xattrs {
@@ -257,6 +257,17 @@ func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (m
return names, nil
}
+// RemoveXattr implements fs.InodeOperations.RemoveXattr.
+func (i *InodeSimpleExtendedAttributes) RemoveXattr(_ context.Context, _ *fs.Inode, name string) error {
+ i.mu.RLock()
+ defer i.mu.RUnlock()
+ if _, ok := i.xattrs[name]; ok {
+ delete(i.xattrs, name)
+ return nil
+ }
+ return syserror.ENOATTR
+}
+
// staticFile is a file with static contents. It is returned by
// InodeStaticFileGetter.GetFile.
//
@@ -460,10 +471,15 @@ func (InodeNoExtendedAttributes) SetXattr(context.Context, *fs.Inode, string, st
}
// ListXattr implements fs.InodeOperations.ListXattr.
-func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) {
+func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) {
return nil, syserror.EOPNOTSUPP
}
+// RemoveXattr implements fs.InodeOperations.RemoveXattr.
+func (InodeNoExtendedAttributes) RemoveXattr(context.Context, *fs.Inode, string) error {
+ return syserror.EOPNOTSUPP
+}
+
// InodeNoopRelease implements fs.InodeOperations.Release as a noop.
type InodeNoopRelease struct{}
diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go
index 71cccdc34..6db4b762d 100644
--- a/pkg/sentry/fs/gofer/attr.go
+++ b/pkg/sentry/fs/gofer/attr.go
@@ -88,8 +88,9 @@ func bsize(pattr p9.Attr) int64 {
if pattr.BlockSize > 0 {
return int64(pattr.BlockSize)
}
- // Some files may have no clue of their block size. Better not to report
- // something misleading or buggy and have a safe default.
+ // Some files, particularly those that are not on a local file system,
+ // may have no clue of their block size. Better not to report something
+ // misleading or buggy and have a safe default.
return usermem.PageSize
}
@@ -149,6 +150,7 @@ func links(valid p9.AttrMask, pattr p9.Attr) uint64 {
}
// This node is likely backed by a file system that doesn't support links.
+ //
// We could readdir() and count children directories to provide an accurate
// link count. However this may be expensive since the gofer may be backed by remote
// storage. Instead, simply return 2 links for directories and 1 for everything else
diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go
index 3da818aed..125907d70 100644
--- a/pkg/sentry/fs/gofer/context_file.go
+++ b/pkg/sentry/fs/gofer/context_file.go
@@ -73,6 +73,20 @@ func (c *contextFile) setXattr(ctx context.Context, name, value string, flags ui
return err
}
+func (c *contextFile) listXattr(ctx context.Context, size uint64) (map[string]struct{}, error) {
+ ctx.UninterruptibleSleepStart(false)
+ xattrs, err := c.file.ListXattr(size)
+ ctx.UninterruptibleSleepFinish(false)
+ return xattrs, err
+}
+
+func (c *contextFile) removeXattr(ctx context.Context, name string) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := c.file.RemoveXattr(name)
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
func (c *contextFile) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error {
ctx.UninterruptibleSleepStart(false)
err := c.file.Allocate(mode, offset, length)
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
index ac28174d2..1c934981b 100644
--- a/pkg/sentry/fs/gofer/inode.go
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -604,18 +604,23 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length
}
// GetXattr implements fs.InodeOperations.GetXattr.
-func (i *inodeOperations) GetXattr(ctx context.Context, inode *fs.Inode, name string, size uint64) (string, error) {
+func (i *inodeOperations) GetXattr(ctx context.Context, _ *fs.Inode, name string, size uint64) (string, error) {
return i.fileState.file.getXattr(ctx, name, size)
}
// SetXattr implements fs.InodeOperations.SetXattr.
-func (i *inodeOperations) SetXattr(ctx context.Context, inode *fs.Inode, name string, value string, flags uint32) error {
+func (i *inodeOperations) SetXattr(ctx context.Context, _ *fs.Inode, name string, value string, flags uint32) error {
return i.fileState.file.setXattr(ctx, name, value, flags)
}
// ListXattr implements fs.InodeOperations.ListXattr.
-func (i *inodeOperations) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) {
- return nil, syscall.EOPNOTSUPP
+func (i *inodeOperations) ListXattr(ctx context.Context, _ *fs.Inode, size uint64) (map[string]struct{}, error) {
+ return i.fileState.file.listXattr(ctx, size)
+}
+
+// RemoveXattr implements fs.InodeOperations.RemoveXattr.
+func (i *inodeOperations) RemoveXattr(ctx context.Context, _ *fs.Inode, name string) error {
+ return i.fileState.file.removeXattr(ctx, name)
}
// Allocate implements fs.InodeOperations.Allocate.
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index b66c091ab..55fb71c16 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -278,11 +278,19 @@ func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, fla
}
// ListXattr calls i.InodeOperations.ListXattr with i as the Inode.
-func (i *Inode) ListXattr(ctx context.Context) (map[string]struct{}, error) {
+func (i *Inode) ListXattr(ctx context.Context, size uint64) (map[string]struct{}, error) {
if i.overlay != nil {
- return overlayListXattr(ctx, i.overlay)
+ return overlayListXattr(ctx, i.overlay, size)
}
- return i.InodeOperations.ListXattr(ctx, i)
+ return i.InodeOperations.ListXattr(ctx, i, size)
+}
+
+// RemoveXattr calls i.InodeOperations.RemoveXattr with i as the Inode.
+func (i *Inode) RemoveXattr(ctx context.Context, d *Dirent, name string) error {
+ if i.overlay != nil {
+ return overlayRemoveXattr(ctx, i.overlay, d, name)
+ }
+ return i.InodeOperations.RemoveXattr(ctx, i, name)
}
// CheckPermission will check if the caller may access this file in the
diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go
index 70f2eae96..2bbfb72ef 100644
--- a/pkg/sentry/fs/inode_operations.go
+++ b/pkg/sentry/fs/inode_operations.go
@@ -190,7 +190,18 @@ type InodeOperations interface {
// ListXattr returns the set of all extended attributes names that
// have values. Inodes that do not support extended attributes return
// EOPNOTSUPP.
- ListXattr(ctx context.Context, inode *Inode) (map[string]struct{}, error)
+ //
+ // If this is called through the listxattr(2) syscall, size indicates the
+ // size of the buffer that the application has allocated to hold the
+ // attribute list. If the list would be larger than size, implementations may
+ // return ERANGE to indicate that the buffer is too small, but they are also
+ // free to ignore the hint entirely. All size checking is done independently
+ // at the syscall layer.
+ ListXattr(ctx context.Context, inode *Inode, size uint64) (map[string]struct{}, error)
+
+ // RemoveXattr removes an extended attribute specified by name. Inodes that
+ // do not support extended attributes return EOPNOTSUPP.
+ RemoveXattr(ctx context.Context, inode *Inode, name string) error
// Check determines whether an Inode can be accessed with the
// requested permission mask using the context (which gives access
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 4729b4aac..5ada33a32 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -564,15 +564,15 @@ func overlaySetxattr(ctx context.Context, o *overlayEntry, d *Dirent, name, valu
return o.upper.SetXattr(ctx, d, name, value, flags)
}
-func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{}, error) {
+func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[string]struct{}, error) {
o.copyMu.RLock()
defer o.copyMu.RUnlock()
var names map[string]struct{}
var err error
if o.upper != nil {
- names, err = o.upper.ListXattr(ctx)
+ names, err = o.upper.ListXattr(ctx, size)
} else {
- names, err = o.lower.ListXattr(ctx)
+ names, err = o.lower.ListXattr(ctx, size)
}
for name := range names {
// Same as overlayGetXattr, we shouldn't forward along
@@ -584,6 +584,18 @@ func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{}
return names, err
}
+func overlayRemoveXattr(ctx context.Context, o *overlayEntry, d *Dirent, name string) error {
+ // Don't allow changes to overlay xattrs through a removexattr syscall.
+ if strings.HasPrefix(XattrOverlayPrefix, name) {
+ return syserror.EPERM
+ }
+
+ if err := copyUp(ctx, d); err != nil {
+ return err
+ }
+ return o.upper.RemoveXattr(ctx, d, name)
+}
+
func overlayCheck(ctx context.Context, o *overlayEntry, p PermMask) error {
o.copyMu.RLock()
// Hot path. Avoid defers.
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index c00cef0a5..3c2b583ae 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -159,8 +159,13 @@ func (d *Dir) SetXattr(ctx context.Context, i *fs.Inode, name, value string, fla
}
// ListXattr implements fs.InodeOperations.ListXattr.
-func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode) (map[string]struct{}, error) {
- return d.ramfsDir.ListXattr(ctx, i)
+func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode, size uint64) (map[string]struct{}, error) {
+ return d.ramfsDir.ListXattr(ctx, i, size)
+}
+
+// RemoveXattr implements fs.InodeOperations.RemoveXattr.
+func (d *Dir) RemoveXattr(ctx context.Context, i *fs.Inode, name string) error {
+ return d.ramfsDir.RemoveXattr(ctx, i, name)
}
// Lookup implements fs.InodeOperations.Lookup.
diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
index d1436b943..2015a8871 100644
--- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
+++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
@@ -15,6 +15,9 @@
// These benchmarks emulate memfs benchmarks. Ext4 images must be created
// before this benchmark is run using the `make_deep_ext4.sh` script at
// /tmp/image-{depth}.ext4 for all the depths tested below.
+//
+// The benchmark itself cannot run the script because the script requires
+// sudo privileges to create the file system images.
package benchmark_test
import (
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index baa2cdd8e..6d4ebc2bf 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -87,6 +87,10 @@ func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
// to assume that directory fids have the correct semantics, and translates
// struct file_operations::readdir calls directly to readdir RPCs), but is
// consistent with VFS1.
+ //
+ // NOTE(b/135560623): In particular, some gofer implementations may not
+ // retain state between calls to Readdir, so may not provide a coherent
+ // directory stream across in the presence of mutation.
d.fs.renameMu.RLock()
defer d.fs.renameMu.RUnlock()
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 9d65d0179..e49303c26 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -111,10 +111,10 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir
// Dentry isn't cached; it either doesn't exist or failed
// revalidation. Attempt to resolve it via Lookup.
//
- // FIXME(b/144498111): Inode.Lookup() should return *(kernfs.)Dentry,
- // not *vfs.Dentry, since (kernfs.)Filesystem assumes that all dentries
- // in the filesystem are (kernfs.)Dentry and performs vfs.DentryImpl
- // casts accordingly.
+ // FIXME(gvisor.dev/issue/1193): Inode.Lookup() should return
+ // *(kernfs.)Dentry, not *vfs.Dentry, since (kernfs.)Filesystem assumes
+ // that all dentries in the filesystem are (kernfs.)Dentry and performs
+ // vfs.DentryImpl casts accordingly.
var err error
childVFSD, err = parent.inode.Lookup(ctx, name)
if err != nil {
diff --git a/pkg/sentry/socket/netfilter/extensions.go b/pkg/sentry/socket/netfilter/extensions.go
index 3082976cd..22fd0ebe7 100644
--- a/pkg/sentry/socket/netfilter/extensions.go
+++ b/pkg/sentry/socket/netfilter/extensions.go
@@ -45,6 +45,8 @@ type matchMaker interface {
unmarshal(buf []byte, filter iptables.IPHeaderFilter) (iptables.Matcher, error)
}
+// matchMakers maps the name of supported matchers to the matchMaker that
+// marshals and unmarshals it. It is immutable after package initialization.
var matchMakers = map[string]matchMaker{}
// registermatchMaker should be called by match extensions to register them
@@ -59,7 +61,7 @@ func registerMatchMaker(mm matchMaker) {
func marshalMatcher(matcher iptables.Matcher) []byte {
matchMaker, ok := matchMakers[matcher.Name()]
if !ok {
- panic(fmt.Errorf("Unknown matcher of type %T.", matcher))
+ panic(fmt.Sprintf("Unknown matcher of type %T.", matcher))
}
return matchMaker.marshal(matcher)
}
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index ea43a0ce3..ea02627de 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -149,7 +149,6 @@ func FillDefaultIPTables(stack *stack.Stack) {
stack.SetIPTables(ipt)
}
-// TODO: Return proto.
// convertNetstackToBinary converts the iptables as stored in netstack to the
// format expected by the iptables tool. Linux stores each table as a binary
// blob that can only be traversed by parsing a bit, reading some offsets,
@@ -161,7 +160,7 @@ func convertNetstackToBinary(tablename string, table iptables.Table) (linux.Kern
// The table name has to fit in the struct.
if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
- return linux.KernelIPTGetEntries{}, metadata{}, fmt.Errorf("Table name %q too long.", tablename)
+ return linux.KernelIPTGetEntries{}, metadata{}, fmt.Errorf("table name %q too long.", tablename)
}
copy(entries.Name[:], tablename)
@@ -302,7 +301,7 @@ func translateToStandardVerdict(val int32) (iptables.Verdict, error) {
case linux.NF_RETURN:
return iptables.Invalid, errors.New("unsupported iptables verdict RETURN")
default:
- return iptables.Invalid, fmt.Errorf("unknown iptables verdict %d.", val)
+ return iptables.Invalid, fmt.Errorf("unknown iptables verdict %d", val)
}
}
@@ -553,12 +552,12 @@ func parseTarget(optVal []byte) (iptables.Target, error) {
case errorTargetName:
return iptables.ErrorTarget{}, nil
default:
- return nil, fmt.Errorf("Unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String())
+ return nil, fmt.Errorf("unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String())
}
}
// Unknown target.
- return nil, fmt.Errorf("Unknown target %q doesn't exist or isn't supported yet.", target.Name.String())
+ return nil, fmt.Errorf("unknown target %q doesn't exist or isn't supported yet.", target.Name.String())
}
func filterFromIPTIP(iptip linux.IPTIP) (iptables.IPHeaderFilter, error) {
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
index 6b2f4c31a..f9945e214 100644
--- a/pkg/sentry/socket/netfilter/tcp_matcher.go
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -48,7 +48,7 @@ func (tcpMarshaler) marshal(mr iptables.Matcher) []byte {
DestinationPortStart: matcher.destinationPortStart,
DestinationPortEnd: matcher.destinationPortEnd,
}
- buf := make([]byte, 0, linux.SizeOfXTUDP)
+ buf := make([]byte, 0, linux.SizeOfXTTCP)
return marshalEntryMatch(matcherNameTCP, binary.Marshal(buf, usermem.ByteOrder, xttcp))
}
diff --git a/pkg/sentry/strace/linux64_amd64.go b/pkg/sentry/strace/linux64_amd64.go
index 85ec66fd3..a4de545e9 100644
--- a/pkg/sentry/strace/linux64_amd64.go
+++ b/pkg/sentry/strace/linux64_amd64.go
@@ -78,8 +78,8 @@ var linuxAMD64 = SyscallMap{
51: makeSyscallInfo("getsockname", FD, PostSockAddr, SockLen),
52: makeSyscallInfo("getpeername", FD, PostSockAddr, SockLen),
53: makeSyscallInfo("socketpair", SockFamily, SockType, SockProtocol, Hex),
- 54: makeSyscallInfo("setsockopt", FD, Hex, Hex, Hex, Hex),
- 55: makeSyscallInfo("getsockopt", FD, Hex, Hex, Hex, Hex),
+ 54: makeSyscallInfo("setsockopt", FD, SockOptLevel, SockOptName, SetSockOptVal, Hex /* length by value, not a pointer */),
+ 55: makeSyscallInfo("getsockopt", FD, SockOptLevel, SockOptName, GetSockOptVal, SockLen),
56: makeSyscallInfo("clone", CloneFlags, Hex, Hex, Hex, Hex),
57: makeSyscallInfo("fork"),
58: makeSyscallInfo("vfork"),
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index d2079c85f..f7ff4573e 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -419,3 +419,218 @@ func sockFlags(flags int32) string {
}
return SocketFlagSet.Parse(uint64(flags))
}
+
+func getSockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen usermem.Addr, maximumBlobSize uint, rval uintptr) string {
+ if int(rval) < 0 {
+ return hexNum(uint64(optVal))
+ }
+ if optVal == 0 {
+ return "null"
+ }
+ l, err := copySockLen(t, optLen)
+ if err != nil {
+ return fmt.Sprintf("%#x {error reading length: %v}", optLen, err)
+ }
+ return sockOptVal(t, level, optname, optVal, uint64(l), maximumBlobSize)
+}
+
+func sockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen uint64, maximumBlobSize uint) string {
+ switch optLen {
+ case 1:
+ var v uint8
+ _, err := t.CopyIn(optVal, &v)
+ if err != nil {
+ return fmt.Sprintf("%#x {error reading optval: %v}", optVal, err)
+ }
+ return fmt.Sprintf("%#x {value=%v}", optVal, v)
+ case 2:
+ var v uint16
+ _, err := t.CopyIn(optVal, &v)
+ if err != nil {
+ return fmt.Sprintf("%#x {error reading optval: %v}", optVal, err)
+ }
+ return fmt.Sprintf("%#x {value=%v}", optVal, v)
+ case 4:
+ var v uint32
+ _, err := t.CopyIn(optVal, &v)
+ if err != nil {
+ return fmt.Sprintf("%#x {error reading optval: %v}", optVal, err)
+ }
+ return fmt.Sprintf("%#x {value=%v}", optVal, v)
+ default:
+ return dump(t, optVal, uint(optLen), maximumBlobSize)
+ }
+}
+
+var sockOptLevels = abi.ValueSet{
+ linux.SOL_IP: "SOL_IP",
+ linux.SOL_SOCKET: "SOL_SOCKET",
+ linux.SOL_TCP: "SOL_TCP",
+ linux.SOL_UDP: "SOL_UDP",
+ linux.SOL_IPV6: "SOL_IPV6",
+ linux.SOL_ICMPV6: "SOL_ICMPV6",
+ linux.SOL_RAW: "SOL_RAW",
+ linux.SOL_PACKET: "SOL_PACKET",
+ linux.SOL_NETLINK: "SOL_NETLINK",
+}
+
+var sockOptNames = map[uint64]abi.ValueSet{
+ linux.SOL_IP: {
+ linux.IP_TTL: "IP_TTL",
+ linux.IP_MULTICAST_TTL: "IP_MULTICAST_TTL",
+ linux.IP_MULTICAST_IF: "IP_MULTICAST_IF",
+ linux.IP_MULTICAST_LOOP: "IP_MULTICAST_LOOP",
+ linux.IP_TOS: "IP_TOS",
+ linux.IP_RECVTOS: "IP_RECVTOS",
+ linux.IPT_SO_GET_INFO: "IPT_SO_GET_INFO",
+ linux.IPT_SO_GET_ENTRIES: "IPT_SO_GET_ENTRIES",
+ linux.IP_ADD_MEMBERSHIP: "IP_ADD_MEMBERSHIP",
+ linux.IP_DROP_MEMBERSHIP: "IP_DROP_MEMBERSHIP",
+ linux.MCAST_JOIN_GROUP: "MCAST_JOIN_GROUP",
+ linux.IP_ADD_SOURCE_MEMBERSHIP: "IP_ADD_SOURCE_MEMBERSHIP",
+ linux.IP_BIND_ADDRESS_NO_PORT: "IP_BIND_ADDRESS_NO_PORT",
+ linux.IP_BLOCK_SOURCE: "IP_BLOCK_SOURCE",
+ linux.IP_CHECKSUM: "IP_CHECKSUM",
+ linux.IP_DROP_SOURCE_MEMBERSHIP: "IP_DROP_SOURCE_MEMBERSHIP",
+ linux.IP_FREEBIND: "IP_FREEBIND",
+ linux.IP_HDRINCL: "IP_HDRINCL",
+ linux.IP_IPSEC_POLICY: "IP_IPSEC_POLICY",
+ linux.IP_MINTTL: "IP_MINTTL",
+ linux.IP_MSFILTER: "IP_MSFILTER",
+ linux.IP_MTU_DISCOVER: "IP_MTU_DISCOVER",
+ linux.IP_MULTICAST_ALL: "IP_MULTICAST_ALL",
+ linux.IP_NODEFRAG: "IP_NODEFRAG",
+ linux.IP_OPTIONS: "IP_OPTIONS",
+ linux.IP_PASSSEC: "IP_PASSSEC",
+ linux.IP_PKTINFO: "IP_PKTINFO",
+ linux.IP_RECVERR: "IP_RECVERR",
+ linux.IP_RECVFRAGSIZE: "IP_RECVFRAGSIZE",
+ linux.IP_RECVOPTS: "IP_RECVOPTS",
+ linux.IP_RECVORIGDSTADDR: "IP_RECVORIGDSTADDR",
+ linux.IP_RECVTTL: "IP_RECVTTL",
+ linux.IP_RETOPTS: "IP_RETOPTS",
+ linux.IP_TRANSPARENT: "IP_TRANSPARENT",
+ linux.IP_UNBLOCK_SOURCE: "IP_UNBLOCK_SOURCE",
+ linux.IP_UNICAST_IF: "IP_UNICAST_IF",
+ linux.IP_XFRM_POLICY: "IP_XFRM_POLICY",
+ linux.MCAST_BLOCK_SOURCE: "MCAST_BLOCK_SOURCE",
+ linux.MCAST_JOIN_SOURCE_GROUP: "MCAST_JOIN_SOURCE_GROUP",
+ linux.MCAST_LEAVE_GROUP: "MCAST_LEAVE_GROUP",
+ linux.MCAST_LEAVE_SOURCE_GROUP: "MCAST_LEAVE_SOURCE_GROUP",
+ linux.MCAST_MSFILTER: "MCAST_MSFILTER",
+ linux.MCAST_UNBLOCK_SOURCE: "MCAST_UNBLOCK_SOURCE",
+ linux.IP_ROUTER_ALERT: "IP_ROUTER_ALERT",
+ linux.IP_PKTOPTIONS: "IP_PKTOPTIONS",
+ linux.IP_MTU: "IP_MTU",
+ },
+ linux.SOL_SOCKET: {
+ linux.SO_ERROR: "SO_ERROR",
+ linux.SO_PEERCRED: "SO_PEERCRED",
+ linux.SO_PASSCRED: "SO_PASSCRED",
+ linux.SO_SNDBUF: "SO_SNDBUF",
+ linux.SO_RCVBUF: "SO_RCVBUF",
+ linux.SO_REUSEADDR: "SO_REUSEADDR",
+ linux.SO_REUSEPORT: "SO_REUSEPORT",
+ linux.SO_BINDTODEVICE: "SO_BINDTODEVICE",
+ linux.SO_BROADCAST: "SO_BROADCAST",
+ linux.SO_KEEPALIVE: "SO_KEEPALIVE",
+ linux.SO_LINGER: "SO_LINGER",
+ linux.SO_SNDTIMEO: "SO_SNDTIMEO",
+ linux.SO_RCVTIMEO: "SO_RCVTIMEO",
+ linux.SO_OOBINLINE: "SO_OOBINLINE",
+ linux.SO_TIMESTAMP: "SO_TIMESTAMP",
+ },
+ linux.SOL_TCP: {
+ linux.TCP_NODELAY: "TCP_NODELAY",
+ linux.TCP_CORK: "TCP_CORK",
+ linux.TCP_QUICKACK: "TCP_QUICKACK",
+ linux.TCP_MAXSEG: "TCP_MAXSEG",
+ linux.TCP_KEEPIDLE: "TCP_KEEPIDLE",
+ linux.TCP_KEEPINTVL: "TCP_KEEPINTVL",
+ linux.TCP_USER_TIMEOUT: "TCP_USER_TIMEOUT",
+ linux.TCP_INFO: "TCP_INFO",
+ linux.TCP_CC_INFO: "TCP_CC_INFO",
+ linux.TCP_NOTSENT_LOWAT: "TCP_NOTSENT_LOWAT",
+ linux.TCP_ZEROCOPY_RECEIVE: "TCP_ZEROCOPY_RECEIVE",
+ linux.TCP_CONGESTION: "TCP_CONGESTION",
+ linux.TCP_LINGER2: "TCP_LINGER2",
+ linux.TCP_DEFER_ACCEPT: "TCP_DEFER_ACCEPT",
+ linux.TCP_REPAIR_OPTIONS: "TCP_REPAIR_OPTIONS",
+ linux.TCP_INQ: "TCP_INQ",
+ linux.TCP_FASTOPEN: "TCP_FASTOPEN",
+ linux.TCP_FASTOPEN_CONNECT: "TCP_FASTOPEN_CONNECT",
+ linux.TCP_FASTOPEN_KEY: "TCP_FASTOPEN_KEY",
+ linux.TCP_FASTOPEN_NO_COOKIE: "TCP_FASTOPEN_NO_COOKIE",
+ linux.TCP_KEEPCNT: "TCP_KEEPCNT",
+ linux.TCP_QUEUE_SEQ: "TCP_QUEUE_SEQ",
+ linux.TCP_REPAIR: "TCP_REPAIR",
+ linux.TCP_REPAIR_QUEUE: "TCP_REPAIR_QUEUE",
+ linux.TCP_REPAIR_WINDOW: "TCP_REPAIR_WINDOW",
+ linux.TCP_SAVED_SYN: "TCP_SAVED_SYN",
+ linux.TCP_SAVE_SYN: "TCP_SAVE_SYN",
+ linux.TCP_SYNCNT: "TCP_SYNCNT",
+ linux.TCP_THIN_DUPACK: "TCP_THIN_DUPACK",
+ linux.TCP_THIN_LINEAR_TIMEOUTS: "TCP_THIN_LINEAR_TIMEOUTS",
+ linux.TCP_TIMESTAMP: "TCP_TIMESTAMP",
+ linux.TCP_ULP: "TCP_ULP",
+ linux.TCP_WINDOW_CLAMP: "TCP_WINDOW_CLAMP",
+ },
+ linux.SOL_IPV6: {
+ linux.IPV6_V6ONLY: "IPV6_V6ONLY",
+ linux.IPV6_PATHMTU: "IPV6_PATHMTU",
+ linux.IPV6_TCLASS: "IPV6_TCLASS",
+ linux.IPV6_ADD_MEMBERSHIP: "IPV6_ADD_MEMBERSHIP",
+ linux.IPV6_DROP_MEMBERSHIP: "IPV6_DROP_MEMBERSHIP",
+ linux.IPV6_IPSEC_POLICY: "IPV6_IPSEC_POLICY",
+ linux.IPV6_JOIN_ANYCAST: "IPV6_JOIN_ANYCAST",
+ linux.IPV6_LEAVE_ANYCAST: "IPV6_LEAVE_ANYCAST",
+ linux.IPV6_PKTINFO: "IPV6_PKTINFO",
+ linux.IPV6_ROUTER_ALERT: "IPV6_ROUTER_ALERT",
+ linux.IPV6_XFRM_POLICY: "IPV6_XFRM_POLICY",
+ linux.MCAST_BLOCK_SOURCE: "MCAST_BLOCK_SOURCE",
+ linux.MCAST_JOIN_GROUP: "MCAST_JOIN_GROUP",
+ linux.MCAST_JOIN_SOURCE_GROUP: "MCAST_JOIN_SOURCE_GROUP",
+ linux.MCAST_LEAVE_GROUP: "MCAST_LEAVE_GROUP",
+ linux.MCAST_LEAVE_SOURCE_GROUP: "MCAST_LEAVE_SOURCE_GROUP",
+ linux.MCAST_UNBLOCK_SOURCE: "MCAST_UNBLOCK_SOURCE",
+ linux.IPV6_2292DSTOPTS: "IPV6_2292DSTOPTS",
+ linux.IPV6_2292HOPLIMIT: "IPV6_2292HOPLIMIT",
+ linux.IPV6_2292HOPOPTS: "IPV6_2292HOPOPTS",
+ linux.IPV6_2292PKTINFO: "IPV6_2292PKTINFO",
+ linux.IPV6_2292PKTOPTIONS: "IPV6_2292PKTOPTIONS",
+ linux.IPV6_2292RTHDR: "IPV6_2292RTHDR",
+ linux.IPV6_ADDR_PREFERENCES: "IPV6_ADDR_PREFERENCES",
+ linux.IPV6_AUTOFLOWLABEL: "IPV6_AUTOFLOWLABEL",
+ linux.IPV6_DONTFRAG: "IPV6_DONTFRAG",
+ linux.IPV6_DSTOPTS: "IPV6_DSTOPTS",
+ linux.IPV6_FLOWINFO: "IPV6_FLOWINFO",
+ linux.IPV6_FLOWINFO_SEND: "IPV6_FLOWINFO_SEND",
+ linux.IPV6_FLOWLABEL_MGR: "IPV6_FLOWLABEL_MGR",
+ linux.IPV6_FREEBIND: "IPV6_FREEBIND",
+ linux.IPV6_HOPOPTS: "IPV6_HOPOPTS",
+ linux.IPV6_MINHOPCOUNT: "IPV6_MINHOPCOUNT",
+ linux.IPV6_MTU: "IPV6_MTU",
+ linux.IPV6_MTU_DISCOVER: "IPV6_MTU_DISCOVER",
+ linux.IPV6_MULTICAST_ALL: "IPV6_MULTICAST_ALL",
+ linux.IPV6_MULTICAST_HOPS: "IPV6_MULTICAST_HOPS",
+ linux.IPV6_MULTICAST_IF: "IPV6_MULTICAST_IF",
+ linux.IPV6_MULTICAST_LOOP: "IPV6_MULTICAST_LOOP",
+ linux.IPV6_RECVDSTOPTS: "IPV6_RECVDSTOPTS",
+ linux.IPV6_RECVERR: "IPV6_RECVERR",
+ linux.IPV6_RECVFRAGSIZE: "IPV6_RECVFRAGSIZE",
+ linux.IPV6_RECVHOPLIMIT: "IPV6_RECVHOPLIMIT",
+ linux.IPV6_RECVHOPOPTS: "IPV6_RECVHOPOPTS",
+ linux.IPV6_RECVORIGDSTADDR: "IPV6_RECVORIGDSTADDR",
+ linux.IPV6_RECVPATHMTU: "IPV6_RECVPATHMTU",
+ linux.IPV6_RECVPKTINFO: "IPV6_RECVPKTINFO",
+ linux.IPV6_RECVRTHDR: "IPV6_RECVRTHDR",
+ linux.IPV6_RECVTCLASS: "IPV6_RECVTCLASS",
+ linux.IPV6_RTHDR: "IPV6_RTHDR",
+ linux.IPV6_RTHDRDSTOPTS: "IPV6_RTHDRDSTOPTS",
+ linux.IPV6_TRANSPARENT: "IPV6_TRANSPARENT",
+ linux.IPV6_UNICAST_HOPS: "IPV6_UNICAST_HOPS",
+ linux.IPV6_UNICAST_IF: "IPV6_UNICAST_IF",
+ linux.MCAST_MSFILTER: "MCAST_MSFILTER",
+ linux.IPV6_ADDRFORM: "IPV6_ADDRFORM",
+ },
+}
diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go
index 3fc4a47fc..a796b2396 100644
--- a/pkg/sentry/strace/strace.go
+++ b/pkg/sentry/strace/strace.go
@@ -55,6 +55,14 @@ var ItimerTypes = abi.ValueSet{
linux.ITIMER_PROF: "ITIMER_PROF",
}
+func hexNum(num uint64) string {
+ return "0x" + strconv.FormatUint(num, 16)
+}
+
+func hexArg(arg arch.SyscallArgument) string {
+ return hexNum(arg.Uint64())
+}
+
func iovecs(t *kernel.Task, addr usermem.Addr, iovcnt int, printContent bool, maxBytes uint64) string {
if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV {
return fmt.Sprintf("%#x (error decoding iovecs: invalid iovcnt)", addr)
@@ -389,6 +397,12 @@ func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlo
output = append(output, path(t, args[arg].Pointer()))
case ExecveStringVector:
output = append(output, stringVector(t, args[arg].Pointer()))
+ case SetSockOptVal:
+ output = append(output, sockOptVal(t, args[arg-2].Uint64() /* level */, args[arg-1].Uint64() /* optName */, args[arg].Pointer() /* optVal */, args[arg+1].Uint64() /* optLen */, maximumBlobSize))
+ case SockOptLevel:
+ output = append(output, sockOptLevels.Parse(args[arg].Uint64()))
+ case SockOptName:
+ output = append(output, sockOptNames[args[arg-1].Uint64() /* level */].Parse(args[arg].Uint64()))
case SockAddr:
output = append(output, sockAddr(t, args[arg].Pointer(), uint32(args[arg+1].Uint64())))
case SockLen:
@@ -446,7 +460,7 @@ func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlo
case Hex:
fallthrough
default:
- output = append(output, "0x"+strconv.FormatUint(args[arg].Uint64(), 16))
+ output = append(output, hexArg(args[arg]))
}
}
@@ -507,6 +521,12 @@ func (i *SyscallInfo) post(t *kernel.Task, args arch.SyscallArguments, rval uint
output[arg] = capData(t, args[arg-1].Pointer(), args[arg].Pointer())
case PollFDs:
output[arg] = pollFDs(t, args[arg].Pointer(), uint(args[arg+1].Uint()), true)
+ case GetSockOptVal:
+ output[arg] = getSockOptVal(t, args[arg-2].Uint64() /* level */, args[arg-1].Uint64() /* optName */, args[arg].Pointer() /* optVal */, args[arg+1].Pointer() /* optLen */, maximumBlobSize, rval)
+ case SetSockOptVal:
+ // No need to print the value again. While it usually
+ // isn't, the string version of this arg can be long.
+ output[arg] = hexArg(args[arg])
}
}
}
diff --git a/pkg/sentry/strace/syscalls.go b/pkg/sentry/strace/syscalls.go
index 24e29a2ba..446d1e0f6 100644
--- a/pkg/sentry/strace/syscalls.go
+++ b/pkg/sentry/strace/syscalls.go
@@ -207,9 +207,27 @@ const (
// array is in the next argument.
PollFDs
- // SelectFDSet is an fd_set argument in select(2)/pselect(2). The number of
- // fds represented must be the first argument.
+ // SelectFDSet is an fd_set argument in select(2)/pselect(2). The
+ // number of FDs represented must be the first argument.
SelectFDSet
+
+ // GetSockOptVal is the optval argument in getsockopt(2).
+ //
+ // Formatted after syscall execution.
+ GetSockOptVal
+
+ // SetSockOptVal is the optval argument in setsockopt(2).
+ //
+ // Contents omitted after syscall execution.
+ SetSockOptVal
+
+ // SockOptLevel is the level argument in getsockopt(2) and
+ // setsockopt(2).
+ SockOptLevel
+
+ // SockOptLevel is the optname argument in getsockopt(2) and
+ // setsockopt(2).
+ SockOptName
)
// defaultFormat is the syscall argument format to use if the actual format is
diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go
index 588f8b087..79066ad2a 100644
--- a/pkg/sentry/syscalls/linux/linux64_amd64.go
+++ b/pkg/sentry/syscalls/linux/linux64_amd64.go
@@ -228,21 +228,18 @@ var AMD64 = &kernel.SyscallTable{
185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil),
186: syscalls.Supported("gettid", Gettid),
187: syscalls.Supported("readahead", Readahead),
- // TODO(b/148303075): Enable set/getxattr (in their various
- // forms) once we also have list and removexattr. The JVM
- // assumes that if get/set exist, then list and remove do too.
- 188: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 189: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 190: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil),
+ 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil),
+ 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil),
+ 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil),
+ 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil),
+ 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil),
+ 194: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil),
+ 195: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil),
+ 196: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil),
+ 197: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil),
+ 198: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil),
+ 199: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil),
200: syscalls.Supported("tkill", Tkill),
201: syscalls.Supported("time", Time),
202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil),
diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go
index 06e5ee401..7421619de 100644
--- a/pkg/sentry/syscalls/linux/linux64_arm64.go
+++ b/pkg/sentry/syscalls/linux/linux64_arm64.go
@@ -36,26 +36,23 @@ var ARM64 = &kernel.SyscallTable{
},
AuditNumber: linux.AUDIT_ARCH_AARCH64,
Table: map[uintptr]kernel.Syscall{
- 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- // TODO(b/148303075): Enable set/getxattr (in their various
- // forms) once we also have list and removexattr. The JVM
- // assumes that if get/set exist, then list and remove do too.
- 5: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 6: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 7: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 13: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil),
+ 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil),
+ 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil),
+ 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil),
+ 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil),
+ 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil),
+ 11: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil),
+ 12: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil),
+ 13: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil),
+ 14: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil),
+ 15: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil),
+ 16: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil),
17: syscalls.Supported("getcwd", Getcwd),
18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil),
19: syscalls.Supported("eventfd2", Eventfd2),
diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go
index efb95555c..9d8140b8a 100644
--- a/pkg/sentry/syscalls/linux/sys_xattr.go
+++ b/pkg/sentry/syscalls/linux/sys_xattr.go
@@ -49,14 +49,11 @@ func FGetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
defer f.DecRef()
- n, value, err := getXattr(t, f.Dirent, nameAddr, size)
+ n, err := getXattr(t, f.Dirent, nameAddr, valueAddr, size)
if err != nil {
return 0, nil, err
}
- if _, err := t.CopyOutBytes(valueAddr, []byte(value)); err != nil {
- return 0, nil, err
- }
return uintptr(n), nil, nil
}
@@ -71,41 +68,36 @@ func getXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink
return 0, nil, err
}
- valueLen := 0
- err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
+ n := 0
+ err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error {
if dirPath && !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
- n, value, err := getXattr(t, d, nameAddr, size)
- valueLen = n
- if err != nil {
- return err
- }
-
- _, err = t.CopyOutBytes(valueAddr, []byte(value))
+ n, err = getXattr(t, d, nameAddr, valueAddr, size)
return err
})
if err != nil {
return 0, nil, err
}
- return uintptr(valueLen), nil, nil
+
+ return uintptr(n), nil, nil
}
// getXattr implements getxattr(2) from the given *fs.Dirent.
-func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr, size uint64) (int, string, error) {
- if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Read: true}); err != nil {
- return 0, "", err
- }
-
+func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, size uint64) (int, error) {
name, err := copyInXattrName(t, nameAddr)
if err != nil {
- return 0, "", err
+ return 0, err
+ }
+
+ if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Read: true}); err != nil {
+ return 0, err
}
// TODO(b/148380782): Support xattrs in namespaces other than "user".
if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
- return 0, "", syserror.EOPNOTSUPP
+ return 0, syserror.EOPNOTSUPP
}
// If getxattr(2) is called with size 0, the size of the value will be
@@ -118,18 +110,22 @@ func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr, size uint64)
value, err := d.Inode.GetXattr(t, name, requestedSize)
if err != nil {
- return 0, "", err
+ return 0, err
}
n := len(value)
if uint64(n) > requestedSize {
- return 0, "", syserror.ERANGE
+ return 0, syserror.ERANGE
}
// Don't copy out the attribute value if size is 0.
if size == 0 {
- return n, "", nil
+ return n, nil
+ }
+
+ if _, err = t.CopyOutBytes(valueAddr, []byte(value)); err != nil {
+ return 0, err
}
- return n, value, nil
+ return n, nil
}
// SetXattr implements linux syscall setxattr(2).
@@ -172,7 +168,7 @@ func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink
return 0, nil, err
}
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error {
if dirPath && !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -187,12 +183,12 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si
return syserror.EINVAL
}
- if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil {
+ name, err := copyInXattrName(t, nameAddr)
+ if err != nil {
return err
}
- name, err := copyInXattrName(t, nameAddr)
- if err != nil {
+ if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil {
return err
}
@@ -226,12 +222,18 @@ func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) {
return name, nil
}
+// Restrict xattrs to regular files and directories.
+//
+// TODO(b/148380782): In Linux, this restriction technically only applies to
+// xattrs in the "user.*" namespace. Make file type checks specific to the
+// namespace once we allow other xattr prefixes.
+func xattrFileTypeOk(i *fs.Inode) bool {
+ return fs.IsRegular(i.StableAttr) || fs.IsDir(i.StableAttr)
+}
+
func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error {
// Restrict xattrs to regular files and directories.
- //
- // In Linux, this restriction technically only applies to xattrs in the
- // "user.*" namespace, but we don't allow any other xattr prefixes anyway.
- if !fs.IsRegular(i.StableAttr) && !fs.IsDir(i.StableAttr) {
+ if !xattrFileTypeOk(i) {
if perms.Write {
return syserror.EPERM
}
@@ -240,3 +242,179 @@ func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error
return i.CheckPermission(t, perms)
}
+
+// ListXattr implements linux syscall listxattr(2).
+func ListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return listXattrFromPath(t, args, true)
+}
+
+// LListXattr implements linux syscall llistxattr(2).
+func LListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return listXattrFromPath(t, args, false)
+}
+
+// FListXattr implements linux syscall flistxattr(2).
+func FListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ listAddr := args[1].Pointer()
+ size := uint64(args[2].SizeT())
+
+ // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH.
+ f := t.GetFile(fd)
+ if f == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer f.DecRef()
+
+ n, err := listXattr(t, f.Dirent, listAddr, size)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ return uintptr(n), nil, nil
+}
+
+func listXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) {
+ pathAddr := args[0].Pointer()
+ listAddr := args[1].Pointer()
+ size := uint64(args[2].SizeT())
+
+ path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ n := 0
+ err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error {
+ if dirPath && !fs.IsDir(d.Inode.StableAttr) {
+ return syserror.ENOTDIR
+ }
+
+ n, err = listXattr(t, d, listAddr, size)
+ return err
+ })
+ if err != nil {
+ return 0, nil, err
+ }
+
+ return uintptr(n), nil, nil
+}
+
+func listXattr(t *kernel.Task, d *fs.Dirent, addr usermem.Addr, size uint64) (int, error) {
+ if !xattrFileTypeOk(d.Inode) {
+ return 0, nil
+ }
+
+ // If listxattr(2) is called with size 0, the buffer size needed to contain
+ // the xattr list will be returned successfully even if it is nonzero. In
+ // that case, we need to retrieve the entire list so we can compute and
+ // return the correct size.
+ requestedSize := size
+ if size == 0 || size > linux.XATTR_SIZE_MAX {
+ requestedSize = linux.XATTR_SIZE_MAX
+ }
+ xattrs, err := d.Inode.ListXattr(t, requestedSize)
+ if err != nil {
+ return 0, err
+ }
+
+ // TODO(b/148380782): support namespaces other than "user".
+ for x := range xattrs {
+ if !strings.HasPrefix(x, linux.XATTR_USER_PREFIX) {
+ delete(xattrs, x)
+ }
+ }
+
+ listSize := xattrListSize(xattrs)
+ if listSize > linux.XATTR_SIZE_MAX {
+ return 0, syserror.E2BIG
+ }
+ if uint64(listSize) > requestedSize {
+ return 0, syserror.ERANGE
+ }
+
+ // Don't copy out the attributes if size is 0.
+ if size == 0 {
+ return listSize, nil
+ }
+
+ buf := make([]byte, 0, listSize)
+ for x := range xattrs {
+ buf = append(buf, []byte(x)...)
+ buf = append(buf, 0)
+ }
+ if _, err := t.CopyOutBytes(addr, buf); err != nil {
+ return 0, err
+ }
+
+ return len(buf), nil
+}
+
+func xattrListSize(xattrs map[string]struct{}) int {
+ size := 0
+ for x := range xattrs {
+ size += len(x) + 1
+ }
+ return size
+}
+
+// RemoveXattr implements linux syscall removexattr(2).
+func RemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return removeXattrFromPath(t, args, true)
+}
+
+// LRemoveXattr implements linux syscall lremovexattr(2).
+func LRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return removeXattrFromPath(t, args, false)
+}
+
+// FRemoveXattr implements linux syscall fremovexattr(2).
+func FRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ nameAddr := args[1].Pointer()
+
+ // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH.
+ f := t.GetFile(fd)
+ if f == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer f.DecRef()
+
+ return 0, nil, removeXattr(t, f.Dirent, nameAddr)
+}
+
+func removeXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) {
+ pathAddr := args[0].Pointer()
+ nameAddr := args[1].Pointer()
+
+ path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error {
+ if dirPath && !fs.IsDir(d.Inode.StableAttr) {
+ return syserror.ENOTDIR
+ }
+
+ return removeXattr(t, d, nameAddr)
+ })
+}
+
+// removeXattr implements removexattr(2) from the given *fs.Dirent.
+func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error {
+ name, err := copyInXattrName(t, nameAddr)
+ if err != nil {
+ return err
+ }
+
+ if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil {
+ return err
+ }
+
+ if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
+ return syserror.EOPNOTSUPP
+ }
+
+ return d.Inode.RemoveXattr(t, d, name)
+}
diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go
index 538c645eb..4320ad17f 100644
--- a/pkg/sentry/usage/memory.go
+++ b/pkg/sentry/usage/memory.go
@@ -253,6 +253,10 @@ func (m *MemoryLocked) Copy() (MemoryStats, uint64) {
}
// MinimumTotalMemoryBytes is the minimum reported total system memory.
+//
+// This can be configured through options provided to the Sentry at start.
+// This number is purely synthetic. This is only set before the application
+// starts executing, and must not be modified.
var MinimumTotalMemoryBytes uint64 = 2 << 30 // 2 GB
// TotalMemory returns the "total usable memory" available.