diff options
Diffstat (limited to 'pkg/sentry')
29 files changed, 665 insertions, 127 deletions
diff --git a/pkg/sentry/BUILD b/pkg/sentry/BUILD index e8b794179..e759dc36f 100644 --- a/pkg/sentry/BUILD +++ b/pkg/sentry/BUILD @@ -1,13 +1,11 @@ -# This BUILD file defines a package_group that allows for interdependencies for -# sentry-internal packages. - package(licenses = ["notice"]) +# The "internal" package_group should be used as much as possible by packages +# that should remain Sentry-internal (i.e. not be exposed directly to command +# line tooling or APIs). package_group( name = "internal", packages = [ - "//cloud/gvisor/gopkg/sentry/...", - "//cloud/gvisor/sentry/...", "//pkg/sentry/...", "//runsc/...", # Code generated by go_marshal relies on go_marshal libraries. diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index 34c0a867d..e27f21e5e 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -14,6 +14,7 @@ go_library( "arch_state_aarch64.go", "arch_state_x86.go", "arch_x86.go", + "arch_x86_impl.go", "auxv.go", "signal.go", "signal_act.go", diff --git a/pkg/sentry/arch/arch_amd64.s b/pkg/sentry/arch/arch_amd64.s index bd61402cf..6c10336e7 100644 --- a/pkg/sentry/arch/arch_amd64.s +++ b/pkg/sentry/arch/arch_amd64.s @@ -26,10 +26,11 @@ // // func initX86FPState(data *FloatingPointData, useXsave bool) // -// We need to clear out and initialize an empty fp state area since the sentry -// may have left sensitive information in the floating point registers. +// We need to clear out and initialize an empty fp state area since the sentry, +// or any previous loader, may have left sensitive information in the floating +// point registers. // -// Preconditions: data is zeroed +// Preconditions: data is zeroed. TEXT ·initX86FPState(SB), $24-16 // Save MXCSR (callee-save) STMXCSR mxcsr-8(SP) diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go index d388ee9cf..e35c9214a 100644 --- a/pkg/sentry/arch/arch_state_x86.go +++ b/pkg/sentry/arch/arch_state_x86.go @@ -43,8 +43,8 @@ func (e ErrFloatingPoint) Error() string { // and SSE state, so this is the equivalent XSTATE_BV value. const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE -// afterLoad is invoked by stateify. -func (s *State) afterLoad() { +// afterLoadFPState is invoked by afterLoad. +func (s *State) afterLoadFPState() { old := s.x86FPState // Recreate the slice. This is done to ensure that it is aligned diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go index 3db8bd34b..88b40a9d1 100644 --- a/pkg/sentry/arch/arch_x86.go +++ b/pkg/sentry/arch/arch_x86.go @@ -155,21 +155,6 @@ func NewFloatingPointData() *FloatingPointData { return (*FloatingPointData)(&(newX86FPState()[0])) } -// State contains the common architecture bits for X86 (the build tag of this -// file ensures it's only built on x86). -// -// +stateify savable -type State struct { - // The system registers. - Regs syscall.PtraceRegs `state:".(syscallPtraceRegs)"` - - // Our floating point state. - x86FPState `state:"wait"` - - // FeatureSet is a pointer to the currently active feature set. - FeatureSet *cpuid.FeatureSet -} - // Proto returns a protobuf representation of the system registers in State. func (s State) Proto() *rpb.Registers { regs := &rpb.AMD64Registers{ diff --git a/pkg/sentry/arch/arch_x86_impl.go b/pkg/sentry/arch/arch_x86_impl.go new file mode 100644 index 000000000..04ac283c6 --- /dev/null +++ b/pkg/sentry/arch/arch_x86_impl.go @@ -0,0 +1,43 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 i386 + +package arch + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/cpuid" +) + +// State contains the common architecture bits for X86 (the build tag of this +// file ensures it's only built on x86). +// +// +stateify savable +type State struct { + // The system registers. + Regs syscall.PtraceRegs `state:".(syscallPtraceRegs)"` + + // Our floating point state. + x86FPState `state:"wait"` + + // FeatureSet is a pointer to the currently active feature set. + FeatureSet *cpuid.FeatureSet +} + +// afterLoad is invoked by stateify. +func (s *State) afterLoad() { + s.afterLoadFPState() +} diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index f6c79e51b..b060a12ff 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -401,7 +401,7 @@ func copyAttributesLocked(ctx context.Context, upper *Inode, lower *Inode) error if err != nil { return err } - lowerXattr, err := lower.ListXattr(ctx) + lowerXattr, err := lower.ListXattr(ctx, linux.XATTR_SIZE_MAX) if err != nil && err != syserror.EOPNOTSUPP { return err } diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index 252830572..daecc4ffe 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -247,7 +247,7 @@ func (i *InodeSimpleExtendedAttributes) SetXattr(_ context.Context, _ *fs.Inode, } // ListXattr implements fs.InodeOperations.ListXattr. -func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { +func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) { i.mu.RLock() names := make(map[string]struct{}, len(i.xattrs)) for name := range i.xattrs { @@ -257,6 +257,17 @@ func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (m return names, nil } +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (i *InodeSimpleExtendedAttributes) RemoveXattr(_ context.Context, _ *fs.Inode, name string) error { + i.mu.RLock() + defer i.mu.RUnlock() + if _, ok := i.xattrs[name]; ok { + delete(i.xattrs, name) + return nil + } + return syserror.ENOATTR +} + // staticFile is a file with static contents. It is returned by // InodeStaticFileGetter.GetFile. // @@ -460,10 +471,15 @@ func (InodeNoExtendedAttributes) SetXattr(context.Context, *fs.Inode, string, st } // ListXattr implements fs.InodeOperations.ListXattr. -func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { +func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) { return nil, syserror.EOPNOTSUPP } +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (InodeNoExtendedAttributes) RemoveXattr(context.Context, *fs.Inode, string) error { + return syserror.EOPNOTSUPP +} + // InodeNoopRelease implements fs.InodeOperations.Release as a noop. type InodeNoopRelease struct{} diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go index 71cccdc34..6db4b762d 100644 --- a/pkg/sentry/fs/gofer/attr.go +++ b/pkg/sentry/fs/gofer/attr.go @@ -88,8 +88,9 @@ func bsize(pattr p9.Attr) int64 { if pattr.BlockSize > 0 { return int64(pattr.BlockSize) } - // Some files may have no clue of their block size. Better not to report - // something misleading or buggy and have a safe default. + // Some files, particularly those that are not on a local file system, + // may have no clue of their block size. Better not to report something + // misleading or buggy and have a safe default. return usermem.PageSize } @@ -149,6 +150,7 @@ func links(valid p9.AttrMask, pattr p9.Attr) uint64 { } // This node is likely backed by a file system that doesn't support links. + // // We could readdir() and count children directories to provide an accurate // link count. However this may be expensive since the gofer may be backed by remote // storage. Instead, simply return 2 links for directories and 1 for everything else diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go index 3da818aed..125907d70 100644 --- a/pkg/sentry/fs/gofer/context_file.go +++ b/pkg/sentry/fs/gofer/context_file.go @@ -73,6 +73,20 @@ func (c *contextFile) setXattr(ctx context.Context, name, value string, flags ui return err } +func (c *contextFile) listXattr(ctx context.Context, size uint64) (map[string]struct{}, error) { + ctx.UninterruptibleSleepStart(false) + xattrs, err := c.file.ListXattr(size) + ctx.UninterruptibleSleepFinish(false) + return xattrs, err +} + +func (c *contextFile) removeXattr(ctx context.Context, name string) error { + ctx.UninterruptibleSleepStart(false) + err := c.file.RemoveXattr(name) + ctx.UninterruptibleSleepFinish(false) + return err +} + func (c *contextFile) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error { ctx.UninterruptibleSleepStart(false) err := c.file.Allocate(mode, offset, length) diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index ac28174d2..1c934981b 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -604,18 +604,23 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length } // GetXattr implements fs.InodeOperations.GetXattr. -func (i *inodeOperations) GetXattr(ctx context.Context, inode *fs.Inode, name string, size uint64) (string, error) { +func (i *inodeOperations) GetXattr(ctx context.Context, _ *fs.Inode, name string, size uint64) (string, error) { return i.fileState.file.getXattr(ctx, name, size) } // SetXattr implements fs.InodeOperations.SetXattr. -func (i *inodeOperations) SetXattr(ctx context.Context, inode *fs.Inode, name string, value string, flags uint32) error { +func (i *inodeOperations) SetXattr(ctx context.Context, _ *fs.Inode, name string, value string, flags uint32) error { return i.fileState.file.setXattr(ctx, name, value, flags) } // ListXattr implements fs.InodeOperations.ListXattr. -func (i *inodeOperations) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { - return nil, syscall.EOPNOTSUPP +func (i *inodeOperations) ListXattr(ctx context.Context, _ *fs.Inode, size uint64) (map[string]struct{}, error) { + return i.fileState.file.listXattr(ctx, size) +} + +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (i *inodeOperations) RemoveXattr(ctx context.Context, _ *fs.Inode, name string) error { + return i.fileState.file.removeXattr(ctx, name) } // Allocate implements fs.InodeOperations.Allocate. diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index b66c091ab..55fb71c16 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -278,11 +278,19 @@ func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, fla } // ListXattr calls i.InodeOperations.ListXattr with i as the Inode. -func (i *Inode) ListXattr(ctx context.Context) (map[string]struct{}, error) { +func (i *Inode) ListXattr(ctx context.Context, size uint64) (map[string]struct{}, error) { if i.overlay != nil { - return overlayListXattr(ctx, i.overlay) + return overlayListXattr(ctx, i.overlay, size) } - return i.InodeOperations.ListXattr(ctx, i) + return i.InodeOperations.ListXattr(ctx, i, size) +} + +// RemoveXattr calls i.InodeOperations.RemoveXattr with i as the Inode. +func (i *Inode) RemoveXattr(ctx context.Context, d *Dirent, name string) error { + if i.overlay != nil { + return overlayRemoveXattr(ctx, i.overlay, d, name) + } + return i.InodeOperations.RemoveXattr(ctx, i, name) } // CheckPermission will check if the caller may access this file in the diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index 70f2eae96..2bbfb72ef 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -190,7 +190,18 @@ type InodeOperations interface { // ListXattr returns the set of all extended attributes names that // have values. Inodes that do not support extended attributes return // EOPNOTSUPP. - ListXattr(ctx context.Context, inode *Inode) (map[string]struct{}, error) + // + // If this is called through the listxattr(2) syscall, size indicates the + // size of the buffer that the application has allocated to hold the + // attribute list. If the list would be larger than size, implementations may + // return ERANGE to indicate that the buffer is too small, but they are also + // free to ignore the hint entirely. All size checking is done independently + // at the syscall layer. + ListXattr(ctx context.Context, inode *Inode, size uint64) (map[string]struct{}, error) + + // RemoveXattr removes an extended attribute specified by name. Inodes that + // do not support extended attributes return EOPNOTSUPP. + RemoveXattr(ctx context.Context, inode *Inode, name string) error // Check determines whether an Inode can be accessed with the // requested permission mask using the context (which gives access diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 4729b4aac..5ada33a32 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -564,15 +564,15 @@ func overlaySetxattr(ctx context.Context, o *overlayEntry, d *Dirent, name, valu return o.upper.SetXattr(ctx, d, name, value, flags) } -func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{}, error) { +func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[string]struct{}, error) { o.copyMu.RLock() defer o.copyMu.RUnlock() var names map[string]struct{} var err error if o.upper != nil { - names, err = o.upper.ListXattr(ctx) + names, err = o.upper.ListXattr(ctx, size) } else { - names, err = o.lower.ListXattr(ctx) + names, err = o.lower.ListXattr(ctx, size) } for name := range names { // Same as overlayGetXattr, we shouldn't forward along @@ -584,6 +584,18 @@ func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{} return names, err } +func overlayRemoveXattr(ctx context.Context, o *overlayEntry, d *Dirent, name string) error { + // Don't allow changes to overlay xattrs through a removexattr syscall. + if strings.HasPrefix(XattrOverlayPrefix, name) { + return syserror.EPERM + } + + if err := copyUp(ctx, d); err != nil { + return err + } + return o.upper.RemoveXattr(ctx, d, name) +} + func overlayCheck(ctx context.Context, o *overlayEntry, p PermMask) error { o.copyMu.RLock() // Hot path. Avoid defers. diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index c00cef0a5..3c2b583ae 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -159,8 +159,13 @@ func (d *Dir) SetXattr(ctx context.Context, i *fs.Inode, name, value string, fla } // ListXattr implements fs.InodeOperations.ListXattr. -func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode) (map[string]struct{}, error) { - return d.ramfsDir.ListXattr(ctx, i) +func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode, size uint64) (map[string]struct{}, error) { + return d.ramfsDir.ListXattr(ctx, i, size) +} + +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (d *Dir) RemoveXattr(ctx context.Context, i *fs.Inode, name string) error { + return d.ramfsDir.RemoveXattr(ctx, i, name) } // Lookup implements fs.InodeOperations.Lookup. diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go index d1436b943..2015a8871 100644 --- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go +++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go @@ -15,6 +15,9 @@ // These benchmarks emulate memfs benchmarks. Ext4 images must be created // before this benchmark is run using the `make_deep_ext4.sh` script at // /tmp/image-{depth}.ext4 for all the depths tested below. +// +// The benchmark itself cannot run the script because the script requires +// sudo privileges to create the file system images. package benchmark_test import ( diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go index baa2cdd8e..6d4ebc2bf 100644 --- a/pkg/sentry/fsimpl/gofer/directory.go +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -87,6 +87,10 @@ func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) { // to assume that directory fids have the correct semantics, and translates // struct file_operations::readdir calls directly to readdir RPCs), but is // consistent with VFS1. + // + // NOTE(b/135560623): In particular, some gofer implementations may not + // retain state between calls to Readdir, so may not provide a coherent + // directory stream across in the presence of mutation. d.fs.renameMu.RLock() defer d.fs.renameMu.RUnlock() diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 9d65d0179..e49303c26 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -111,10 +111,10 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir // Dentry isn't cached; it either doesn't exist or failed // revalidation. Attempt to resolve it via Lookup. // - // FIXME(b/144498111): Inode.Lookup() should return *(kernfs.)Dentry, - // not *vfs.Dentry, since (kernfs.)Filesystem assumes that all dentries - // in the filesystem are (kernfs.)Dentry and performs vfs.DentryImpl - // casts accordingly. + // FIXME(gvisor.dev/issue/1193): Inode.Lookup() should return + // *(kernfs.)Dentry, not *vfs.Dentry, since (kernfs.)Filesystem assumes + // that all dentries in the filesystem are (kernfs.)Dentry and performs + // vfs.DentryImpl casts accordingly. var err error childVFSD, err = parent.inode.Lookup(ctx, name) if err != nil { diff --git a/pkg/sentry/socket/netfilter/extensions.go b/pkg/sentry/socket/netfilter/extensions.go index 3082976cd..22fd0ebe7 100644 --- a/pkg/sentry/socket/netfilter/extensions.go +++ b/pkg/sentry/socket/netfilter/extensions.go @@ -45,6 +45,8 @@ type matchMaker interface { unmarshal(buf []byte, filter iptables.IPHeaderFilter) (iptables.Matcher, error) } +// matchMakers maps the name of supported matchers to the matchMaker that +// marshals and unmarshals it. It is immutable after package initialization. var matchMakers = map[string]matchMaker{} // registermatchMaker should be called by match extensions to register them @@ -59,7 +61,7 @@ func registerMatchMaker(mm matchMaker) { func marshalMatcher(matcher iptables.Matcher) []byte { matchMaker, ok := matchMakers[matcher.Name()] if !ok { - panic(fmt.Errorf("Unknown matcher of type %T.", matcher)) + panic(fmt.Sprintf("Unknown matcher of type %T.", matcher)) } return matchMaker.marshal(matcher) } diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index ea43a0ce3..ea02627de 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -149,7 +149,6 @@ func FillDefaultIPTables(stack *stack.Stack) { stack.SetIPTables(ipt) } -// TODO: Return proto. // convertNetstackToBinary converts the iptables as stored in netstack to the // format expected by the iptables tool. Linux stores each table as a binary // blob that can only be traversed by parsing a bit, reading some offsets, @@ -161,7 +160,7 @@ func convertNetstackToBinary(tablename string, table iptables.Table) (linux.Kern // The table name has to fit in the struct. if linux.XT_TABLE_MAXNAMELEN < len(tablename) { - return linux.KernelIPTGetEntries{}, metadata{}, fmt.Errorf("Table name %q too long.", tablename) + return linux.KernelIPTGetEntries{}, metadata{}, fmt.Errorf("table name %q too long.", tablename) } copy(entries.Name[:], tablename) @@ -302,7 +301,7 @@ func translateToStandardVerdict(val int32) (iptables.Verdict, error) { case linux.NF_RETURN: return iptables.Invalid, errors.New("unsupported iptables verdict RETURN") default: - return iptables.Invalid, fmt.Errorf("unknown iptables verdict %d.", val) + return iptables.Invalid, fmt.Errorf("unknown iptables verdict %d", val) } } @@ -553,12 +552,12 @@ func parseTarget(optVal []byte) (iptables.Target, error) { case errorTargetName: return iptables.ErrorTarget{}, nil default: - return nil, fmt.Errorf("Unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String()) + return nil, fmt.Errorf("unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String()) } } // Unknown target. - return nil, fmt.Errorf("Unknown target %q doesn't exist or isn't supported yet.", target.Name.String()) + return nil, fmt.Errorf("unknown target %q doesn't exist or isn't supported yet.", target.Name.String()) } func filterFromIPTIP(iptip linux.IPTIP) (iptables.IPHeaderFilter, error) { diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go index 6b2f4c31a..f9945e214 100644 --- a/pkg/sentry/socket/netfilter/tcp_matcher.go +++ b/pkg/sentry/socket/netfilter/tcp_matcher.go @@ -48,7 +48,7 @@ func (tcpMarshaler) marshal(mr iptables.Matcher) []byte { DestinationPortStart: matcher.destinationPortStart, DestinationPortEnd: matcher.destinationPortEnd, } - buf := make([]byte, 0, linux.SizeOfXTUDP) + buf := make([]byte, 0, linux.SizeOfXTTCP) return marshalEntryMatch(matcherNameTCP, binary.Marshal(buf, usermem.ByteOrder, xttcp)) } diff --git a/pkg/sentry/strace/linux64_amd64.go b/pkg/sentry/strace/linux64_amd64.go index 85ec66fd3..a4de545e9 100644 --- a/pkg/sentry/strace/linux64_amd64.go +++ b/pkg/sentry/strace/linux64_amd64.go @@ -78,8 +78,8 @@ var linuxAMD64 = SyscallMap{ 51: makeSyscallInfo("getsockname", FD, PostSockAddr, SockLen), 52: makeSyscallInfo("getpeername", FD, PostSockAddr, SockLen), 53: makeSyscallInfo("socketpair", SockFamily, SockType, SockProtocol, Hex), - 54: makeSyscallInfo("setsockopt", FD, Hex, Hex, Hex, Hex), - 55: makeSyscallInfo("getsockopt", FD, Hex, Hex, Hex, Hex), + 54: makeSyscallInfo("setsockopt", FD, SockOptLevel, SockOptName, SetSockOptVal, Hex /* length by value, not a pointer */), + 55: makeSyscallInfo("getsockopt", FD, SockOptLevel, SockOptName, GetSockOptVal, SockLen), 56: makeSyscallInfo("clone", CloneFlags, Hex, Hex, Hex, Hex), 57: makeSyscallInfo("fork"), 58: makeSyscallInfo("vfork"), diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go index d2079c85f..f7ff4573e 100644 --- a/pkg/sentry/strace/socket.go +++ b/pkg/sentry/strace/socket.go @@ -419,3 +419,218 @@ func sockFlags(flags int32) string { } return SocketFlagSet.Parse(uint64(flags)) } + +func getSockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen usermem.Addr, maximumBlobSize uint, rval uintptr) string { + if int(rval) < 0 { + return hexNum(uint64(optVal)) + } + if optVal == 0 { + return "null" + } + l, err := copySockLen(t, optLen) + if err != nil { + return fmt.Sprintf("%#x {error reading length: %v}", optLen, err) + } + return sockOptVal(t, level, optname, optVal, uint64(l), maximumBlobSize) +} + +func sockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen uint64, maximumBlobSize uint) string { + switch optLen { + case 1: + var v uint8 + _, err := t.CopyIn(optVal, &v) + if err != nil { + return fmt.Sprintf("%#x {error reading optval: %v}", optVal, err) + } + return fmt.Sprintf("%#x {value=%v}", optVal, v) + case 2: + var v uint16 + _, err := t.CopyIn(optVal, &v) + if err != nil { + return fmt.Sprintf("%#x {error reading optval: %v}", optVal, err) + } + return fmt.Sprintf("%#x {value=%v}", optVal, v) + case 4: + var v uint32 + _, err := t.CopyIn(optVal, &v) + if err != nil { + return fmt.Sprintf("%#x {error reading optval: %v}", optVal, err) + } + return fmt.Sprintf("%#x {value=%v}", optVal, v) + default: + return dump(t, optVal, uint(optLen), maximumBlobSize) + } +} + +var sockOptLevels = abi.ValueSet{ + linux.SOL_IP: "SOL_IP", + linux.SOL_SOCKET: "SOL_SOCKET", + linux.SOL_TCP: "SOL_TCP", + linux.SOL_UDP: "SOL_UDP", + linux.SOL_IPV6: "SOL_IPV6", + linux.SOL_ICMPV6: "SOL_ICMPV6", + linux.SOL_RAW: "SOL_RAW", + linux.SOL_PACKET: "SOL_PACKET", + linux.SOL_NETLINK: "SOL_NETLINK", +} + +var sockOptNames = map[uint64]abi.ValueSet{ + linux.SOL_IP: { + linux.IP_TTL: "IP_TTL", + linux.IP_MULTICAST_TTL: "IP_MULTICAST_TTL", + linux.IP_MULTICAST_IF: "IP_MULTICAST_IF", + linux.IP_MULTICAST_LOOP: "IP_MULTICAST_LOOP", + linux.IP_TOS: "IP_TOS", + linux.IP_RECVTOS: "IP_RECVTOS", + linux.IPT_SO_GET_INFO: "IPT_SO_GET_INFO", + linux.IPT_SO_GET_ENTRIES: "IPT_SO_GET_ENTRIES", + linux.IP_ADD_MEMBERSHIP: "IP_ADD_MEMBERSHIP", + linux.IP_DROP_MEMBERSHIP: "IP_DROP_MEMBERSHIP", + linux.MCAST_JOIN_GROUP: "MCAST_JOIN_GROUP", + linux.IP_ADD_SOURCE_MEMBERSHIP: "IP_ADD_SOURCE_MEMBERSHIP", + linux.IP_BIND_ADDRESS_NO_PORT: "IP_BIND_ADDRESS_NO_PORT", + linux.IP_BLOCK_SOURCE: "IP_BLOCK_SOURCE", + linux.IP_CHECKSUM: "IP_CHECKSUM", + linux.IP_DROP_SOURCE_MEMBERSHIP: "IP_DROP_SOURCE_MEMBERSHIP", + linux.IP_FREEBIND: "IP_FREEBIND", + linux.IP_HDRINCL: "IP_HDRINCL", + linux.IP_IPSEC_POLICY: "IP_IPSEC_POLICY", + linux.IP_MINTTL: "IP_MINTTL", + linux.IP_MSFILTER: "IP_MSFILTER", + linux.IP_MTU_DISCOVER: "IP_MTU_DISCOVER", + linux.IP_MULTICAST_ALL: "IP_MULTICAST_ALL", + linux.IP_NODEFRAG: "IP_NODEFRAG", + linux.IP_OPTIONS: "IP_OPTIONS", + linux.IP_PASSSEC: "IP_PASSSEC", + linux.IP_PKTINFO: "IP_PKTINFO", + linux.IP_RECVERR: "IP_RECVERR", + linux.IP_RECVFRAGSIZE: "IP_RECVFRAGSIZE", + linux.IP_RECVOPTS: "IP_RECVOPTS", + linux.IP_RECVORIGDSTADDR: "IP_RECVORIGDSTADDR", + linux.IP_RECVTTL: "IP_RECVTTL", + linux.IP_RETOPTS: "IP_RETOPTS", + linux.IP_TRANSPARENT: "IP_TRANSPARENT", + linux.IP_UNBLOCK_SOURCE: "IP_UNBLOCK_SOURCE", + linux.IP_UNICAST_IF: "IP_UNICAST_IF", + linux.IP_XFRM_POLICY: "IP_XFRM_POLICY", + linux.MCAST_BLOCK_SOURCE: "MCAST_BLOCK_SOURCE", + linux.MCAST_JOIN_SOURCE_GROUP: "MCAST_JOIN_SOURCE_GROUP", + linux.MCAST_LEAVE_GROUP: "MCAST_LEAVE_GROUP", + linux.MCAST_LEAVE_SOURCE_GROUP: "MCAST_LEAVE_SOURCE_GROUP", + linux.MCAST_MSFILTER: "MCAST_MSFILTER", + linux.MCAST_UNBLOCK_SOURCE: "MCAST_UNBLOCK_SOURCE", + linux.IP_ROUTER_ALERT: "IP_ROUTER_ALERT", + linux.IP_PKTOPTIONS: "IP_PKTOPTIONS", + linux.IP_MTU: "IP_MTU", + }, + linux.SOL_SOCKET: { + linux.SO_ERROR: "SO_ERROR", + linux.SO_PEERCRED: "SO_PEERCRED", + linux.SO_PASSCRED: "SO_PASSCRED", + linux.SO_SNDBUF: "SO_SNDBUF", + linux.SO_RCVBUF: "SO_RCVBUF", + linux.SO_REUSEADDR: "SO_REUSEADDR", + linux.SO_REUSEPORT: "SO_REUSEPORT", + linux.SO_BINDTODEVICE: "SO_BINDTODEVICE", + linux.SO_BROADCAST: "SO_BROADCAST", + linux.SO_KEEPALIVE: "SO_KEEPALIVE", + linux.SO_LINGER: "SO_LINGER", + linux.SO_SNDTIMEO: "SO_SNDTIMEO", + linux.SO_RCVTIMEO: "SO_RCVTIMEO", + linux.SO_OOBINLINE: "SO_OOBINLINE", + linux.SO_TIMESTAMP: "SO_TIMESTAMP", + }, + linux.SOL_TCP: { + linux.TCP_NODELAY: "TCP_NODELAY", + linux.TCP_CORK: "TCP_CORK", + linux.TCP_QUICKACK: "TCP_QUICKACK", + linux.TCP_MAXSEG: "TCP_MAXSEG", + linux.TCP_KEEPIDLE: "TCP_KEEPIDLE", + linux.TCP_KEEPINTVL: "TCP_KEEPINTVL", + linux.TCP_USER_TIMEOUT: "TCP_USER_TIMEOUT", + linux.TCP_INFO: "TCP_INFO", + linux.TCP_CC_INFO: "TCP_CC_INFO", + linux.TCP_NOTSENT_LOWAT: "TCP_NOTSENT_LOWAT", + linux.TCP_ZEROCOPY_RECEIVE: "TCP_ZEROCOPY_RECEIVE", + linux.TCP_CONGESTION: "TCP_CONGESTION", + linux.TCP_LINGER2: "TCP_LINGER2", + linux.TCP_DEFER_ACCEPT: "TCP_DEFER_ACCEPT", + linux.TCP_REPAIR_OPTIONS: "TCP_REPAIR_OPTIONS", + linux.TCP_INQ: "TCP_INQ", + linux.TCP_FASTOPEN: "TCP_FASTOPEN", + linux.TCP_FASTOPEN_CONNECT: "TCP_FASTOPEN_CONNECT", + linux.TCP_FASTOPEN_KEY: "TCP_FASTOPEN_KEY", + linux.TCP_FASTOPEN_NO_COOKIE: "TCP_FASTOPEN_NO_COOKIE", + linux.TCP_KEEPCNT: "TCP_KEEPCNT", + linux.TCP_QUEUE_SEQ: "TCP_QUEUE_SEQ", + linux.TCP_REPAIR: "TCP_REPAIR", + linux.TCP_REPAIR_QUEUE: "TCP_REPAIR_QUEUE", + linux.TCP_REPAIR_WINDOW: "TCP_REPAIR_WINDOW", + linux.TCP_SAVED_SYN: "TCP_SAVED_SYN", + linux.TCP_SAVE_SYN: "TCP_SAVE_SYN", + linux.TCP_SYNCNT: "TCP_SYNCNT", + linux.TCP_THIN_DUPACK: "TCP_THIN_DUPACK", + linux.TCP_THIN_LINEAR_TIMEOUTS: "TCP_THIN_LINEAR_TIMEOUTS", + linux.TCP_TIMESTAMP: "TCP_TIMESTAMP", + linux.TCP_ULP: "TCP_ULP", + linux.TCP_WINDOW_CLAMP: "TCP_WINDOW_CLAMP", + }, + linux.SOL_IPV6: { + linux.IPV6_V6ONLY: "IPV6_V6ONLY", + linux.IPV6_PATHMTU: "IPV6_PATHMTU", + linux.IPV6_TCLASS: "IPV6_TCLASS", + linux.IPV6_ADD_MEMBERSHIP: "IPV6_ADD_MEMBERSHIP", + linux.IPV6_DROP_MEMBERSHIP: "IPV6_DROP_MEMBERSHIP", + linux.IPV6_IPSEC_POLICY: "IPV6_IPSEC_POLICY", + linux.IPV6_JOIN_ANYCAST: "IPV6_JOIN_ANYCAST", + linux.IPV6_LEAVE_ANYCAST: "IPV6_LEAVE_ANYCAST", + linux.IPV6_PKTINFO: "IPV6_PKTINFO", + linux.IPV6_ROUTER_ALERT: "IPV6_ROUTER_ALERT", + linux.IPV6_XFRM_POLICY: "IPV6_XFRM_POLICY", + linux.MCAST_BLOCK_SOURCE: "MCAST_BLOCK_SOURCE", + linux.MCAST_JOIN_GROUP: "MCAST_JOIN_GROUP", + linux.MCAST_JOIN_SOURCE_GROUP: "MCAST_JOIN_SOURCE_GROUP", + linux.MCAST_LEAVE_GROUP: "MCAST_LEAVE_GROUP", + linux.MCAST_LEAVE_SOURCE_GROUP: "MCAST_LEAVE_SOURCE_GROUP", + linux.MCAST_UNBLOCK_SOURCE: "MCAST_UNBLOCK_SOURCE", + linux.IPV6_2292DSTOPTS: "IPV6_2292DSTOPTS", + linux.IPV6_2292HOPLIMIT: "IPV6_2292HOPLIMIT", + linux.IPV6_2292HOPOPTS: "IPV6_2292HOPOPTS", + linux.IPV6_2292PKTINFO: "IPV6_2292PKTINFO", + linux.IPV6_2292PKTOPTIONS: "IPV6_2292PKTOPTIONS", + linux.IPV6_2292RTHDR: "IPV6_2292RTHDR", + linux.IPV6_ADDR_PREFERENCES: "IPV6_ADDR_PREFERENCES", + linux.IPV6_AUTOFLOWLABEL: "IPV6_AUTOFLOWLABEL", + linux.IPV6_DONTFRAG: "IPV6_DONTFRAG", + linux.IPV6_DSTOPTS: "IPV6_DSTOPTS", + linux.IPV6_FLOWINFO: "IPV6_FLOWINFO", + linux.IPV6_FLOWINFO_SEND: "IPV6_FLOWINFO_SEND", + linux.IPV6_FLOWLABEL_MGR: "IPV6_FLOWLABEL_MGR", + linux.IPV6_FREEBIND: "IPV6_FREEBIND", + linux.IPV6_HOPOPTS: "IPV6_HOPOPTS", + linux.IPV6_MINHOPCOUNT: "IPV6_MINHOPCOUNT", + linux.IPV6_MTU: "IPV6_MTU", + linux.IPV6_MTU_DISCOVER: "IPV6_MTU_DISCOVER", + linux.IPV6_MULTICAST_ALL: "IPV6_MULTICAST_ALL", + linux.IPV6_MULTICAST_HOPS: "IPV6_MULTICAST_HOPS", + linux.IPV6_MULTICAST_IF: "IPV6_MULTICAST_IF", + linux.IPV6_MULTICAST_LOOP: "IPV6_MULTICAST_LOOP", + linux.IPV6_RECVDSTOPTS: "IPV6_RECVDSTOPTS", + linux.IPV6_RECVERR: "IPV6_RECVERR", + linux.IPV6_RECVFRAGSIZE: "IPV6_RECVFRAGSIZE", + linux.IPV6_RECVHOPLIMIT: "IPV6_RECVHOPLIMIT", + linux.IPV6_RECVHOPOPTS: "IPV6_RECVHOPOPTS", + linux.IPV6_RECVORIGDSTADDR: "IPV6_RECVORIGDSTADDR", + linux.IPV6_RECVPATHMTU: "IPV6_RECVPATHMTU", + linux.IPV6_RECVPKTINFO: "IPV6_RECVPKTINFO", + linux.IPV6_RECVRTHDR: "IPV6_RECVRTHDR", + linux.IPV6_RECVTCLASS: "IPV6_RECVTCLASS", + linux.IPV6_RTHDR: "IPV6_RTHDR", + linux.IPV6_RTHDRDSTOPTS: "IPV6_RTHDRDSTOPTS", + linux.IPV6_TRANSPARENT: "IPV6_TRANSPARENT", + linux.IPV6_UNICAST_HOPS: "IPV6_UNICAST_HOPS", + linux.IPV6_UNICAST_IF: "IPV6_UNICAST_IF", + linux.MCAST_MSFILTER: "MCAST_MSFILTER", + linux.IPV6_ADDRFORM: "IPV6_ADDRFORM", + }, +} diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go index 3fc4a47fc..a796b2396 100644 --- a/pkg/sentry/strace/strace.go +++ b/pkg/sentry/strace/strace.go @@ -55,6 +55,14 @@ var ItimerTypes = abi.ValueSet{ linux.ITIMER_PROF: "ITIMER_PROF", } +func hexNum(num uint64) string { + return "0x" + strconv.FormatUint(num, 16) +} + +func hexArg(arg arch.SyscallArgument) string { + return hexNum(arg.Uint64()) +} + func iovecs(t *kernel.Task, addr usermem.Addr, iovcnt int, printContent bool, maxBytes uint64) string { if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV { return fmt.Sprintf("%#x (error decoding iovecs: invalid iovcnt)", addr) @@ -389,6 +397,12 @@ func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlo output = append(output, path(t, args[arg].Pointer())) case ExecveStringVector: output = append(output, stringVector(t, args[arg].Pointer())) + case SetSockOptVal: + output = append(output, sockOptVal(t, args[arg-2].Uint64() /* level */, args[arg-1].Uint64() /* optName */, args[arg].Pointer() /* optVal */, args[arg+1].Uint64() /* optLen */, maximumBlobSize)) + case SockOptLevel: + output = append(output, sockOptLevels.Parse(args[arg].Uint64())) + case SockOptName: + output = append(output, sockOptNames[args[arg-1].Uint64() /* level */].Parse(args[arg].Uint64())) case SockAddr: output = append(output, sockAddr(t, args[arg].Pointer(), uint32(args[arg+1].Uint64()))) case SockLen: @@ -446,7 +460,7 @@ func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlo case Hex: fallthrough default: - output = append(output, "0x"+strconv.FormatUint(args[arg].Uint64(), 16)) + output = append(output, hexArg(args[arg])) } } @@ -507,6 +521,12 @@ func (i *SyscallInfo) post(t *kernel.Task, args arch.SyscallArguments, rval uint output[arg] = capData(t, args[arg-1].Pointer(), args[arg].Pointer()) case PollFDs: output[arg] = pollFDs(t, args[arg].Pointer(), uint(args[arg+1].Uint()), true) + case GetSockOptVal: + output[arg] = getSockOptVal(t, args[arg-2].Uint64() /* level */, args[arg-1].Uint64() /* optName */, args[arg].Pointer() /* optVal */, args[arg+1].Pointer() /* optLen */, maximumBlobSize, rval) + case SetSockOptVal: + // No need to print the value again. While it usually + // isn't, the string version of this arg can be long. + output[arg] = hexArg(args[arg]) } } } diff --git a/pkg/sentry/strace/syscalls.go b/pkg/sentry/strace/syscalls.go index 24e29a2ba..446d1e0f6 100644 --- a/pkg/sentry/strace/syscalls.go +++ b/pkg/sentry/strace/syscalls.go @@ -207,9 +207,27 @@ const ( // array is in the next argument. PollFDs - // SelectFDSet is an fd_set argument in select(2)/pselect(2). The number of - // fds represented must be the first argument. + // SelectFDSet is an fd_set argument in select(2)/pselect(2). The + // number of FDs represented must be the first argument. SelectFDSet + + // GetSockOptVal is the optval argument in getsockopt(2). + // + // Formatted after syscall execution. + GetSockOptVal + + // SetSockOptVal is the optval argument in setsockopt(2). + // + // Contents omitted after syscall execution. + SetSockOptVal + + // SockOptLevel is the level argument in getsockopt(2) and + // setsockopt(2). + SockOptLevel + + // SockOptLevel is the optname argument in getsockopt(2) and + // setsockopt(2). + SockOptName ) // defaultFormat is the syscall argument format to use if the actual format is diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 588f8b087..79066ad2a 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -228,21 +228,18 @@ var AMD64 = &kernel.SyscallTable{ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), - // TODO(b/148303075): Enable set/getxattr (in their various - // forms) once we also have list and removexattr. The JVM - // assumes that if get/set exist, then list and remove do too. - 188: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 189: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 190: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), + 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), + 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), + 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), + 194: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), + 195: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), + 196: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), + 197: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), + 198: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), + 199: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), 200: syscalls.Supported("tkill", Tkill), 201: syscalls.Supported("time", Time), 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index 06e5ee401..7421619de 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -36,26 +36,23 @@ var ARM64 = &kernel.SyscallTable{ }, AuditNumber: linux.AUDIT_ARCH_AARCH64, Table: map[uintptr]kernel.Syscall{ - 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - // TODO(b/148303075): Enable set/getxattr (in their various - // forms) once we also have list and removexattr. The JVM - // assumes that if get/set exist, then list and remove do too. - 5: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 6: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 7: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 13: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), + 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), + 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), + 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), + 11: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), + 12: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), + 13: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), + 14: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), + 15: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), + 16: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), 17: syscalls.Supported("getcwd", Getcwd), 18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), 19: syscalls.Supported("eventfd2", Eventfd2), diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index efb95555c..9d8140b8a 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -49,14 +49,11 @@ func FGetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys } defer f.DecRef() - n, value, err := getXattr(t, f.Dirent, nameAddr, size) + n, err := getXattr(t, f.Dirent, nameAddr, valueAddr, size) if err != nil { return 0, nil, err } - if _, err := t.CopyOutBytes(valueAddr, []byte(value)); err != nil { - return 0, nil, err - } return uintptr(n), nil, nil } @@ -71,41 +68,36 @@ func getXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink return 0, nil, err } - valueLen := 0 - err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + n := 0 + err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } - n, value, err := getXattr(t, d, nameAddr, size) - valueLen = n - if err != nil { - return err - } - - _, err = t.CopyOutBytes(valueAddr, []byte(value)) + n, err = getXattr(t, d, nameAddr, valueAddr, size) return err }) if err != nil { return 0, nil, err } - return uintptr(valueLen), nil, nil + + return uintptr(n), nil, nil } // getXattr implements getxattr(2) from the given *fs.Dirent. -func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr, size uint64) (int, string, error) { - if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Read: true}); err != nil { - return 0, "", err - } - +func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, size uint64) (int, error) { name, err := copyInXattrName(t, nameAddr) if err != nil { - return 0, "", err + return 0, err + } + + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Read: true}); err != nil { + return 0, err } // TODO(b/148380782): Support xattrs in namespaces other than "user". if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { - return 0, "", syserror.EOPNOTSUPP + return 0, syserror.EOPNOTSUPP } // If getxattr(2) is called with size 0, the size of the value will be @@ -118,18 +110,22 @@ func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr, size uint64) value, err := d.Inode.GetXattr(t, name, requestedSize) if err != nil { - return 0, "", err + return 0, err } n := len(value) if uint64(n) > requestedSize { - return 0, "", syserror.ERANGE + return 0, syserror.ERANGE } // Don't copy out the attribute value if size is 0. if size == 0 { - return n, "", nil + return n, nil + } + + if _, err = t.CopyOutBytes(valueAddr, []byte(value)); err != nil { + return 0, err } - return n, value, nil + return n, nil } // SetXattr implements linux syscall setxattr(2). @@ -172,7 +168,7 @@ func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -187,12 +183,12 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si return syserror.EINVAL } - if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { + name, err := copyInXattrName(t, nameAddr) + if err != nil { return err } - name, err := copyInXattrName(t, nameAddr) - if err != nil { + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { return err } @@ -226,12 +222,18 @@ func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { return name, nil } +// Restrict xattrs to regular files and directories. +// +// TODO(b/148380782): In Linux, this restriction technically only applies to +// xattrs in the "user.*" namespace. Make file type checks specific to the +// namespace once we allow other xattr prefixes. +func xattrFileTypeOk(i *fs.Inode) bool { + return fs.IsRegular(i.StableAttr) || fs.IsDir(i.StableAttr) +} + func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error { // Restrict xattrs to regular files and directories. - // - // In Linux, this restriction technically only applies to xattrs in the - // "user.*" namespace, but we don't allow any other xattr prefixes anyway. - if !fs.IsRegular(i.StableAttr) && !fs.IsDir(i.StableAttr) { + if !xattrFileTypeOk(i) { if perms.Write { return syserror.EPERM } @@ -240,3 +242,179 @@ func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error return i.CheckPermission(t, perms) } + +// ListXattr implements linux syscall listxattr(2). +func ListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return listXattrFromPath(t, args, true) +} + +// LListXattr implements linux syscall llistxattr(2). +func LListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return listXattrFromPath(t, args, false) +} + +// FListXattr implements linux syscall flistxattr(2). +func FListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + listAddr := args[1].Pointer() + size := uint64(args[2].SizeT()) + + // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. + f := t.GetFile(fd) + if f == nil { + return 0, nil, syserror.EBADF + } + defer f.DecRef() + + n, err := listXattr(t, f.Dirent, listAddr, size) + if err != nil { + return 0, nil, err + } + + return uintptr(n), nil, nil +} + +func listXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + listAddr := args[1].Pointer() + size := uint64(args[2].SizeT()) + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + n := 0 + err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + + n, err = listXattr(t, d, listAddr, size) + return err + }) + if err != nil { + return 0, nil, err + } + + return uintptr(n), nil, nil +} + +func listXattr(t *kernel.Task, d *fs.Dirent, addr usermem.Addr, size uint64) (int, error) { + if !xattrFileTypeOk(d.Inode) { + return 0, nil + } + + // If listxattr(2) is called with size 0, the buffer size needed to contain + // the xattr list will be returned successfully even if it is nonzero. In + // that case, we need to retrieve the entire list so we can compute and + // return the correct size. + requestedSize := size + if size == 0 || size > linux.XATTR_SIZE_MAX { + requestedSize = linux.XATTR_SIZE_MAX + } + xattrs, err := d.Inode.ListXattr(t, requestedSize) + if err != nil { + return 0, err + } + + // TODO(b/148380782): support namespaces other than "user". + for x := range xattrs { + if !strings.HasPrefix(x, linux.XATTR_USER_PREFIX) { + delete(xattrs, x) + } + } + + listSize := xattrListSize(xattrs) + if listSize > linux.XATTR_SIZE_MAX { + return 0, syserror.E2BIG + } + if uint64(listSize) > requestedSize { + return 0, syserror.ERANGE + } + + // Don't copy out the attributes if size is 0. + if size == 0 { + return listSize, nil + } + + buf := make([]byte, 0, listSize) + for x := range xattrs { + buf = append(buf, []byte(x)...) + buf = append(buf, 0) + } + if _, err := t.CopyOutBytes(addr, buf); err != nil { + return 0, err + } + + return len(buf), nil +} + +func xattrListSize(xattrs map[string]struct{}) int { + size := 0 + for x := range xattrs { + size += len(x) + 1 + } + return size +} + +// RemoveXattr implements linux syscall removexattr(2). +func RemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return removeXattrFromPath(t, args, true) +} + +// LRemoveXattr implements linux syscall lremovexattr(2). +func LRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return removeXattrFromPath(t, args, false) +} + +// FRemoveXattr implements linux syscall fremovexattr(2). +func FRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + nameAddr := args[1].Pointer() + + // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. + f := t.GetFile(fd) + if f == nil { + return 0, nil, syserror.EBADF + } + defer f.DecRef() + + return 0, nil, removeXattr(t, f.Dirent, nameAddr) +} + +func removeXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + nameAddr := args[1].Pointer() + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + + return removeXattr(t, d, nameAddr) + }) +} + +// removeXattr implements removexattr(2) from the given *fs.Dirent. +func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error { + name, err := copyInXattrName(t, nameAddr) + if err != nil { + return err + } + + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { + return err + } + + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + + return d.Inode.RemoveXattr(t, d, name) +} diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go index 538c645eb..4320ad17f 100644 --- a/pkg/sentry/usage/memory.go +++ b/pkg/sentry/usage/memory.go @@ -253,6 +253,10 @@ func (m *MemoryLocked) Copy() (MemoryStats, uint64) { } // MinimumTotalMemoryBytes is the minimum reported total system memory. +// +// This can be configured through options provided to the Sentry at start. +// This number is purely synthetic. This is only set before the application +// starts executing, and must not be modified. var MinimumTotalMemoryBytes uint64 = 2 << 30 // 2 GB // TotalMemory returns the "total usable memory" available. |