summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fs/fsutil/host_mappable.go2
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go22
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached_test.go4
-rw-r--r--pkg/sentry/fs/gofer/inode.go15
-rw-r--r--pkg/sentry/fs/host/inode.go4
-rw-r--r--pkg/sentry/fs/host/tty.go15
-rw-r--r--pkg/sentry/fs/inode_overlay.go6
-rw-r--r--pkg/sentry/fs/proc/net.go135
-rw-r--r--pkg/sentry/fs/proc/proc.go2
-rw-r--r--pkg/sentry/fs/splice.go4
-rw-r--r--pkg/sentry/fs/timerfd/timerfd.go3
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go2
-rw-r--r--pkg/sentry/fsimpl/memfs/directory.go2
-rw-r--r--pkg/sentry/fsimpl/memfs/memfs.go2
-rw-r--r--pkg/sentry/fsimpl/memfs/regular_file.go2
-rw-r--r--pkg/sentry/kernel/kernel.go129
-rw-r--r--pkg/sentry/kernel/pipe/node_test.go4
-rw-r--r--pkg/sentry/kernel/pipe/pipe.go4
-rw-r--r--pkg/sentry/kernel/posixtimer.go8
-rw-r--r--pkg/sentry/kernel/task_identity.go4
-rw-r--r--pkg/sentry/kernel/task_sched.go33
-rw-r--r--pkg/sentry/kernel/thread_group.go3
-rw-r--r--pkg/sentry/kernel/time/time.go27
-rw-r--r--pkg/sentry/loader/elf.go18
-rw-r--r--pkg/sentry/loader/loader.go3
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go14
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_linux.go4
-rw-r--r--pkg/sentry/sighandling/sighandling_unsafe.go2
-rw-r--r--pkg/sentry/socket/netstack/BUILD (renamed from pkg/sentry/socket/epsocket/BUILD)6
-rw-r--r--pkg/sentry/socket/netstack/device.go (renamed from pkg/sentry/socket/epsocket/device.go)6
-rw-r--r--pkg/sentry/socket/netstack/netstack.go (renamed from pkg/sentry/socket/epsocket/epsocket.go)80
-rw-r--r--pkg/sentry/socket/netstack/provider.go (renamed from pkg/sentry/socket/epsocket/provider.go)4
-rw-r--r--pkg/sentry/socket/netstack/save_restore.go (renamed from pkg/sentry/socket/epsocket/save_restore.go)2
-rw-r--r--pkg/sentry/socket/netstack/stack.go (renamed from pkg/sentry/socket/epsocket/stack.go)2
-rw-r--r--pkg/sentry/socket/unix/BUILD2
-rw-r--r--pkg/sentry/socket/unix/unix.go24
-rw-r--r--pkg/sentry/strace/BUILD2
-rw-r--r--pkg/sentry/strace/socket.go4
-rw-r--r--pkg/sentry/syscalls/linux/BUILD2
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go379
-rw-r--r--pkg/sentry/syscalls/linux/linux64_amd64.go386
-rw-r--r--pkg/sentry/syscalls/linux/linux64_arm64.go313
-rw-r--r--pkg/sentry/syscalls/linux/sys_read.go33
-rw-r--r--pkg/sentry/syscalls/linux/sys_socket.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_splice.go24
-rw-r--r--pkg/sentry/syscalls/linux/sys_time.go39
-rw-r--r--pkg/sentry/syscalls/linux/sys_utsname.go6
-rw-r--r--pkg/sentry/usermem/usermem.go8
-rw-r--r--pkg/sentry/vfs/options.go6
49 files changed, 1241 insertions, 562 deletions
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
index d2495cb83..693625ddc 100644
--- a/pkg/sentry/fs/fsutil/host_mappable.go
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -144,7 +144,7 @@ func (h *HostMappable) Truncate(ctx context.Context, newSize int64) error {
mask := fs.AttrMask{Size: true}
attr := fs.UnstableAttr{Size: newSize}
- if err := h.backingFile.SetMaskedAttributes(ctx, mask, attr); err != nil {
+ if err := h.backingFile.SetMaskedAttributes(ctx, mask, attr, false); err != nil {
return err
}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index d404a79d4..dd80757dc 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -140,12 +140,16 @@ type CachedFileObject interface {
// WriteFromBlocksAt may return a partial write without an error.
WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)
- // SetMaskedAttributes sets the attributes in attr that are true in mask
- // on the backing file.
+ // SetMaskedAttributes sets the attributes in attr that are true in
+ // mask on the backing file. If the mask contains only ATime or MTime
+ // and the CachedFileObject has an FD to the file, then this operation
+ // is a noop unless forceSetTimestamps is true. This avoids an extra
+ // RPC to the gofer in the open-read/write-close case, when the
+ // timestamps on the file will be updated by the host kernel for us.
//
// SetMaskedAttributes may be called at any point, regardless of whether
// the file was opened.
- SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error
+ SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr, forceSetTimestamps bool) error
// Allocate allows the caller to reserve disk space for the inode.
// It's equivalent to fallocate(2) with 'mode=0'.
@@ -224,7 +228,7 @@ func (c *CachingInodeOperations) SetPermissions(ctx context.Context, inode *fs.I
now := ktime.NowFromContext(ctx)
masked := fs.AttrMask{Perms: true}
- if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{Perms: perms}); err != nil {
+ if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{Perms: perms}, false); err != nil {
return false
}
c.attr.Perms = perms
@@ -246,7 +250,7 @@ func (c *CachingInodeOperations) SetOwner(ctx context.Context, inode *fs.Inode,
UID: owner.UID.Ok(),
GID: owner.GID.Ok(),
}
- if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{Owner: owner}); err != nil {
+ if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{Owner: owner}, false); err != nil {
return err
}
if owner.UID.Ok() {
@@ -282,7 +286,9 @@ func (c *CachingInodeOperations) SetTimestamps(ctx context.Context, inode *fs.In
AccessTime: !ts.ATimeOmit,
ModificationTime: !ts.MTimeOmit,
}
- if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{AccessTime: ts.ATime, ModificationTime: ts.MTime}); err != nil {
+ // Call SetMaskedAttributes with forceSetTimestamps = true to make sure
+ // the timestamp is updated.
+ if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{AccessTime: ts.ATime, ModificationTime: ts.MTime}, true); err != nil {
return err
}
if !ts.ATimeOmit {
@@ -305,7 +311,7 @@ func (c *CachingInodeOperations) Truncate(ctx context.Context, inode *fs.Inode,
now := ktime.NowFromContext(ctx)
masked := fs.AttrMask{Size: true}
attr := fs.UnstableAttr{Size: size}
- if err := c.backingFile.SetMaskedAttributes(ctx, masked, attr); err != nil {
+ if err := c.backingFile.SetMaskedAttributes(ctx, masked, attr, false); err != nil {
c.dataMu.Unlock()
return err
}
@@ -394,7 +400,7 @@ func (c *CachingInodeOperations) WriteOut(ctx context.Context, inode *fs.Inode)
c.dirtyAttr.Size = false
// Write out cached attributes.
- if err := c.backingFile.SetMaskedAttributes(ctx, c.dirtyAttr, c.attr); err != nil {
+ if err := c.backingFile.SetMaskedAttributes(ctx, c.dirtyAttr, c.attr, false); err != nil {
c.attrMu.Unlock()
return err
}
diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go
index eb5730c35..129f314c8 100644
--- a/pkg/sentry/fs/fsutil/inode_cached_test.go
+++ b/pkg/sentry/fs/fsutil/inode_cached_test.go
@@ -39,7 +39,7 @@ func (noopBackingFile) WriteFromBlocksAt(ctx context.Context, srcs safemem.Block
return srcs.NumBytes(), nil
}
-func (noopBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr) error {
+func (noopBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr, bool) error {
return nil
}
@@ -230,7 +230,7 @@ func (f *sliceBackingFile) WriteFromBlocksAt(ctx context.Context, srcs safemem.B
return w.WriteFromBlocks(srcs)
}
-func (*sliceBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr) error {
+func (*sliceBackingFile) SetMaskedAttributes(context.Context, fs.AttrMask, fs.UnstableAttr, bool) error {
return nil
}
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
index 95b064aea..d918d6620 100644
--- a/pkg/sentry/fs/gofer/inode.go
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -215,8 +215,8 @@ func (i *inodeFileState) WriteFromBlocksAt(ctx context.Context, srcs safemem.Blo
}
// SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes.
-func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error {
- if i.skipSetAttr(mask) {
+func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr, forceSetTimestamps bool) error {
+ if i.skipSetAttr(mask, forceSetTimestamps) {
return nil
}
as, ans := attr.AccessTime.Unix()
@@ -251,13 +251,14 @@ func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMa
// when:
// - Mask is empty
// - Mask contains only attributes that cannot be set in the gofer
-// - Mask contains only atime and/or mtime, and host FD exists
+// - forceSetTimestamps is false and mask contains only atime and/or mtime
+// and host FD exists
//
// Updates to atime and mtime can be skipped because cached value will be
// "close enough" to host value, given that operation went directly to host FD.
// Skipping atime updates is particularly important to reduce the number of
// operations sent to the Gofer for readonly files.
-func (i *inodeFileState) skipSetAttr(mask fs.AttrMask) bool {
+func (i *inodeFileState) skipSetAttr(mask fs.AttrMask, forceSetTimestamps bool) bool {
// First remove attributes that cannot be updated.
cpy := mask
cpy.Type = false
@@ -277,6 +278,12 @@ func (i *inodeFileState) skipSetAttr(mask fs.AttrMask) bool {
return false
}
+ // If forceSetTimestamps was passed, then we cannot skip.
+ if forceSetTimestamps {
+ return false
+ }
+
+ // Skip if we have a host FD.
i.handlesMu.RLock()
defer i.handlesMu.RUnlock()
return (i.readHandles != nil && i.readHandles.Host != nil) ||
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 894ab01f0..a6e4a09e3 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -114,7 +114,7 @@ func (i *inodeFileState) WriteFromBlocksAt(ctx context.Context, srcs safemem.Blo
}
// SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes.
-func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error {
+func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr, _ bool) error {
if mask.Empty() {
return nil
}
@@ -163,7 +163,7 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err
return unstableAttr(i.mops, &s), nil
}
-// SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes.
+// Allocate implements fsutil.CachedFileObject.Allocate.
func (i *inodeFileState) Allocate(_ context.Context, offset, length int64) error {
return syscall.Fallocate(i.FD(), 0, offset, length)
}
diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
index 2526412a4..90331e3b2 100644
--- a/pkg/sentry/fs/host/tty.go
+++ b/pkg/sentry/fs/host/tty.go
@@ -43,12 +43,15 @@ type TTYFileOperations struct {
// fgProcessGroup is the foreground process group that is currently
// connected to this TTY.
fgProcessGroup *kernel.ProcessGroup
+
+ termios linux.KernelTermios
}
// newTTYFile returns a new fs.File that wraps a TTY FD.
func newTTYFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags, iops *inodeOperations) *fs.File {
return fs.NewFile(ctx, dirent, flags, &TTYFileOperations{
fileOperations: fileOperations{iops: iops},
+ termios: linux.DefaultSlaveTermios,
})
}
@@ -97,9 +100,12 @@ func (t *TTYFileOperations) Write(ctx context.Context, file *fs.File, src userme
t.mu.Lock()
defer t.mu.Unlock()
- // Are we allowed to do the write?
- if err := t.checkChange(ctx, linux.SIGTTOU); err != nil {
- return 0, err
+ // Check whether TOSTOP is enabled. This corresponds to the check in
+ // drivers/tty/n_tty.c:n_tty_write().
+ if t.termios.LEnabled(linux.TOSTOP) {
+ if err := t.checkChange(ctx, linux.SIGTTOU); err != nil {
+ return 0, err
+ }
}
return t.fileOperations.Write(ctx, file, src, offset)
}
@@ -144,6 +150,9 @@ func (t *TTYFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO
return 0, err
}
err := ioctlSetTermios(fd, ioctl, &termios)
+ if err == nil {
+ t.termios.FromTermios(termios)
+ }
return 0, err
case linux.TIOCGPGRP:
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 246b97161..5a388dad1 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -15,6 +15,7 @@
package fs
import (
+ "fmt"
"strings"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -207,6 +208,11 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name
}
func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name string, flags FileFlags, perm FilePermissions) (*File, error) {
+ // Sanity check.
+ if parent.Inode.overlay == nil {
+ panic(fmt.Sprintf("overlayCreate called with non-overlay parent inode (parent InodeOperations type is %T)", parent.Inode.InodeOperations))
+ }
+
// Dirent.Create takes renameMu if the Inode is an overlay Inode.
if err := copyUpLockedForRename(ctx, parent); err != nil {
return nil, err
diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go
index 5e28982c5..f70239449 100644
--- a/pkg/sentry/fs/proc/net.go
+++ b/pkg/sentry/fs/proc/net.go
@@ -66,7 +66,7 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo
if s.SupportsIPv6() {
contents["if_inet6"] = seqfile.NewSeqFileInode(ctx, &ifinet6{s: s}, msrc)
contents["ipv6_route"] = newStaticProcInode(ctx, msrc, []byte(""))
- contents["tcp6"] = newStaticProcInode(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode"))
+ contents["tcp6"] = seqfile.NewSeqFileInode(ctx, &netTCP6{k: k}, msrc)
contents["udp6"] = newStaticProcInode(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode"))
}
}
@@ -310,44 +310,51 @@ func networkToHost16(n uint16) uint16 {
return usermem.ByteOrder.Uint16(buf[:])
}
-func writeInetAddr(w io.Writer, a linux.SockAddrInet) {
- // linux.SockAddrInet.Port is stored in the network byte order and is
- // printed like a number in host byte order. Note that all numbers in host
- // byte order are printed with the most-significant byte first when
- // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux.
- port := networkToHost16(a.Port)
-
- // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order
- // (i.e. most-significant byte in index 0). Linux represents this as a
- // __be32 which is a typedef for an unsigned int, and is printed with
- // %X. This means that for a little-endian machine, Linux prints the
- // least-significant byte of the address first. To emulate this, we first
- // invert the byte order for the address using usermem.ByteOrder.Uint32,
- // which makes it have the equivalent encoding to a __be32 on a little
- // endian machine. Note that this operation is a no-op on a big endian
- // machine. Then similar to Linux, we format it with %X, which will print
- // the most-significant byte of the __be32 address first, which is now
- // actually the least-significant byte of the original address in
- // linux.SockAddrInet.Addr on little endian machines, due to the conversion.
- addr := usermem.ByteOrder.Uint32(a.Addr[:])
-
- fmt.Fprintf(w, "%08X:%04X ", addr, port)
-}
+func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
+ switch family {
+ case linux.AF_INET:
+ var a linux.SockAddrInet
+ if i != nil {
+ a = *i.(*linux.SockAddrInet)
+ }
-// netTCP implements seqfile.SeqSource for /proc/net/tcp.
-//
-// +stateify savable
-type netTCP struct {
- k *kernel.Kernel
-}
+ // linux.SockAddrInet.Port is stored in the network byte order and is
+ // printed like a number in host byte order. Note that all numbers in host
+ // byte order are printed with the most-significant byte first when
+ // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux.
+ port := networkToHost16(a.Port)
+
+ // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order
+ // (i.e. most-significant byte in index 0). Linux represents this as a
+ // __be32 which is a typedef for an unsigned int, and is printed with
+ // %X. This means that for a little-endian machine, Linux prints the
+ // least-significant byte of the address first. To emulate this, we first
+ // invert the byte order for the address using usermem.ByteOrder.Uint32,
+ // which makes it have the equivalent encoding to a __be32 on a little
+ // endian machine. Note that this operation is a no-op on a big endian
+ // machine. Then similar to Linux, we format it with %X, which will print
+ // the most-significant byte of the __be32 address first, which is now
+ // actually the least-significant byte of the original address in
+ // linux.SockAddrInet.Addr on little endian machines, due to the conversion.
+ addr := usermem.ByteOrder.Uint32(a.Addr[:])
+
+ fmt.Fprintf(w, "%08X:%04X ", addr, port)
+ case linux.AF_INET6:
+ var a linux.SockAddrInet6
+ if i != nil {
+ a = *i.(*linux.SockAddrInet6)
+ }
-// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
-func (*netTCP) NeedsUpdate(generation int64) bool {
- return true
+ port := networkToHost16(a.Port)
+ addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4])
+ addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8])
+ addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12])
+ addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16])
+ fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port)
+ }
}
-// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
-func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+func commonReadSeqFileDataTCP(ctx context.Context, n seqfile.SeqHandle, k *kernel.Kernel, h seqfile.SeqHandle, fa int, header []byte) ([]seqfile.SeqData, int64) {
// t may be nil here if our caller is not part of a task goroutine. This can
// happen for example if we're here for "sentryctl cat". When t is nil,
// degrade gracefully and retrieve what we can.
@@ -358,7 +365,7 @@ func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
}
var buf bytes.Buffer
- for _, se := range n.k.ListSockets() {
+ for _, se := range k.ListSockets() {
s := se.Sock.Get()
if s == nil {
log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID)
@@ -369,7 +376,7 @@ func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
if !ok {
panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
}
- if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) {
+ if family, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) {
s.DecRef()
// Not tcp4 sockets.
continue
@@ -384,22 +391,22 @@ func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
fmt.Fprintf(&buf, "%4d: ", se.ID)
// Field: local_adddress.
- var localAddr linux.SockAddrInet
+ var localAddr linux.SockAddr
if t != nil {
if local, _, err := sops.GetSockName(t); err == nil {
- localAddr = *local.(*linux.SockAddrInet)
+ localAddr = local
}
}
- writeInetAddr(&buf, localAddr)
+ writeInetAddr(&buf, fa, localAddr)
// Field: rem_address.
- var remoteAddr linux.SockAddrInet
+ var remoteAddr linux.SockAddr
if t != nil {
if remote, _, err := sops.GetPeerName(t); err == nil {
- remoteAddr = *remote.(*linux.SockAddrInet)
+ remoteAddr = remote
}
}
- writeInetAddr(&buf, remoteAddr)
+ writeInetAddr(&buf, fa, remoteAddr)
// Field: state; socket state.
fmt.Fprintf(&buf, "%02X ", sops.State())
@@ -465,7 +472,7 @@ func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
data := []seqfile.SeqData{
{
- Buf: []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n"),
+ Buf: header,
Handle: n,
},
{
@@ -476,6 +483,42 @@ func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
return data, 0
}
+// netTCP implements seqfile.SeqSource for /proc/net/tcp.
+//
+// +stateify savable
+type netTCP struct {
+ k *kernel.Kernel
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*netTCP) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ header := []byte(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n")
+ return commonReadSeqFileDataTCP(ctx, n, n.k, h, linux.AF_INET, header)
+}
+
+// netTCP6 implements seqfile.SeqSource for /proc/net/tcp6.
+//
+// +stateify savable
+type netTCP6 struct {
+ k *kernel.Kernel
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (*netTCP6) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+func (n *netTCP6) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ header := []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n")
+ return commonReadSeqFileDataTCP(ctx, n, n.k, h, linux.AF_INET6, header)
+}
+
// netUDP implements seqfile.SeqSource for /proc/net/udp.
//
// +stateify savable
@@ -529,7 +572,7 @@ func (n *netUDP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
localAddr = *local.(*linux.SockAddrInet)
}
}
- writeInetAddr(&buf, localAddr)
+ writeInetAddr(&buf, linux.AF_INET, &localAddr)
// Field: rem_address.
var remoteAddr linux.SockAddrInet
@@ -538,7 +581,7 @@ func (n *netUDP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
remoteAddr = *remote.(*linux.SockAddrInet)
}
}
- writeInetAddr(&buf, remoteAddr)
+ writeInetAddr(&buf, linux.AF_INET, &remoteAddr)
// Field: state; socket state.
fmt.Fprintf(&buf, "%02X ", sops.State())
diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go
index 0ef13f2f5..56e92721e 100644
--- a/pkg/sentry/fs/proc/proc.go
+++ b/pkg/sentry/fs/proc/proc.go
@@ -230,7 +230,7 @@ func (rpf *rootProcFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dent
// But for whatever crazy reason, you can still walk to the given node.
for _, tg := range rpf.iops.pidns.ThreadGroups() {
if leader := tg.Leader(); leader != nil {
- name := strconv.FormatUint(uint64(tg.ID()), 10)
+ name := strconv.FormatUint(uint64(rpf.iops.pidns.IDOfThreadGroup(tg)), 10)
m[name] = fs.GenericDentAttr(fs.SpecialDirectory, device.ProcDevice)
names = append(names, name)
}
diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go
index b03b7f836..311798811 100644
--- a/pkg/sentry/fs/splice.go
+++ b/pkg/sentry/fs/splice.go
@@ -139,7 +139,7 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
// Attempt to do a WriteTo; this is likely the most efficient.
n, err := src.FileOperations.WriteTo(ctx, src, w, opts.Length, opts.Dup)
- if n == 0 && err != nil && err != syserror.ErrWouldBlock && !opts.Dup {
+ if n == 0 && err == syserror.ENOSYS && !opts.Dup {
// Attempt as a ReadFrom. If a WriteTo, a ReadFrom may also be
// more efficient than a copy if buffers are cached or readily
// available. (It's unlikely that they can actually be donated).
@@ -151,7 +151,7 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
// if we block at some point, we could lose data. If the source is
// not a pipe then reading is not destructive; if the destination
// is a regular file, then it is guaranteed not to block writing.
- if n == 0 && err != nil && err != syserror.ErrWouldBlock && !opts.Dup && (!dstPipe || !srcPipe) {
+ if n == 0 && err == syserror.ENOSYS && !opts.Dup && (!dstPipe || !srcPipe) {
// Fallback to an in-kernel copy.
n, err = io.Copy(w, &io.LimitedReader{
R: r,
diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go
index 59403d9db..f8bf663bb 100644
--- a/pkg/sentry/fs/timerfd/timerfd.go
+++ b/pkg/sentry/fs/timerfd/timerfd.go
@@ -141,9 +141,10 @@ func (t *TimerOperations) Write(context.Context, *fs.File, usermem.IOSequence, i
}
// Notify implements ktime.TimerListener.Notify.
-func (t *TimerOperations) Notify(exp uint64) {
+func (t *TimerOperations) Notify(exp uint64, setting ktime.Setting) (ktime.Setting, bool) {
atomic.AddUint64(&t.val, exp)
t.events.Notify(waiter.EventIn)
+ return ktime.Setting{}, false
}
// Destroy implements ktime.TimerListener.Destroy.
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index 159fb7c08..69089c8a8 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -324,7 +324,7 @@ type Fifo struct {
// NewFifo creates a new named pipe.
func NewFifo(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions, msrc *fs.MountSource) *fs.Inode {
// First create a pipe.
- p := pipe.NewPipe(ctx, true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize)
+ p := pipe.NewPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize)
// Build pipe InodeOperations.
iops := pipe.NewInodeOperations(ctx, perms, p)
diff --git a/pkg/sentry/fsimpl/memfs/directory.go b/pkg/sentry/fsimpl/memfs/directory.go
index c620227c9..0bd82e480 100644
--- a/pkg/sentry/fsimpl/memfs/directory.go
+++ b/pkg/sentry/fsimpl/memfs/directory.go
@@ -32,7 +32,7 @@ type directory struct {
childList dentryList
}
-func (fs *filesystem) newDirectory(creds *auth.Credentials, mode uint16) *inode {
+func (fs *filesystem) newDirectory(creds *auth.Credentials, mode linux.FileMode) *inode {
dir := &directory{}
dir.inode.init(dir, fs, creds, mode)
dir.inode.nlink = 2 // from "." and parent directory or ".." for root
diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go
index 45cd42b3e..b78471c0f 100644
--- a/pkg/sentry/fsimpl/memfs/memfs.go
+++ b/pkg/sentry/fsimpl/memfs/memfs.go
@@ -137,7 +137,7 @@ type inode struct {
impl interface{} // immutable
}
-func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode uint16) {
+func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) {
i.refs = 1
i.mode = uint32(mode)
i.uid = uint32(creds.EffectiveKUID)
diff --git a/pkg/sentry/fsimpl/memfs/regular_file.go b/pkg/sentry/fsimpl/memfs/regular_file.go
index 55f869798..b7f4853b3 100644
--- a/pkg/sentry/fsimpl/memfs/regular_file.go
+++ b/pkg/sentry/fsimpl/memfs/regular_file.go
@@ -37,7 +37,7 @@ type regularFile struct {
dataLen int64
}
-func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode uint16) *inode {
+func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode {
file := &regularFile{}
file.inode.init(file, fs, creds, mode)
file.inode.nlink = 1 // from parent directory
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 8c1f79ab5..3cda03891 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -24,6 +24,7 @@
// TaskSet.mu
// SignalHandlers.mu
// Task.mu
+// runningTasksMu
//
// Locking SignalHandlers.mu in multiple SignalHandlers requires locking
// TaskSet.mu exclusively first. Locking Task.mu in multiple Tasks at the same
@@ -135,6 +136,22 @@ type Kernel struct {
// syslog is the kernel log.
syslog syslog
+ // runningTasksMu synchronizes disable/enable of cpuClockTicker when
+ // the kernel is idle (runningTasks == 0).
+ //
+ // runningTasksMu is used to exclude critical sections when the timer
+ // disables itself and when the first active task enables the timer,
+ // ensuring that tasks always see a valid cpuClock value.
+ runningTasksMu sync.Mutex `state:"nosave"`
+
+ // runningTasks is the total count of tasks currently in
+ // TaskGoroutineRunningSys or TaskGoroutineRunningApp. i.e., they are
+ // not blocked or stopped.
+ //
+ // runningTasks must be accessed atomically. Increments from 0 to 1 are
+ // further protected by runningTasksMu (see incRunningTasks).
+ runningTasks int64
+
// cpuClock is incremented every linux.ClockTick. cpuClock is used to
// measure task CPU usage, since sampling monotonicClock twice on every
// syscall turns out to be unreasonably expensive. This is similar to how
@@ -150,6 +167,22 @@ type Kernel struct {
// cpuClockTicker increments cpuClock.
cpuClockTicker *ktime.Timer `state:"nosave"`
+ // cpuClockTickerDisabled indicates that cpuClockTicker has been
+ // disabled because no tasks are running.
+ //
+ // cpuClockTickerDisabled is protected by runningTasksMu.
+ cpuClockTickerDisabled bool
+
+ // cpuClockTickerSetting is the ktime.Setting of cpuClockTicker at the
+ // point it was disabled. It is cached here to avoid a lock ordering
+ // violation with cpuClockTicker.mu when runningTaskMu is held.
+ //
+ // cpuClockTickerSetting is only valid when cpuClockTickerDisabled is
+ // true.
+ //
+ // cpuClockTickerSetting is protected by runningTasksMu.
+ cpuClockTickerSetting ktime.Setting
+
// fdMapUids is an ever-increasing counter for generating FDTable uids.
//
// fdMapUids is mutable, and is accessed using atomic memory operations.
@@ -912,6 +945,102 @@ func (k *Kernel) resumeTimeLocked() {
}
}
+func (k *Kernel) incRunningTasks() {
+ for {
+ tasks := atomic.LoadInt64(&k.runningTasks)
+ if tasks != 0 {
+ // Standard case. Simply increment.
+ if !atomic.CompareAndSwapInt64(&k.runningTasks, tasks, tasks+1) {
+ continue
+ }
+ return
+ }
+
+ // Transition from 0 -> 1. Synchronize with other transitions and timer.
+ k.runningTasksMu.Lock()
+ tasks = atomic.LoadInt64(&k.runningTasks)
+ if tasks != 0 {
+ // We're no longer the first task, no need to
+ // re-enable.
+ atomic.AddInt64(&k.runningTasks, 1)
+ k.runningTasksMu.Unlock()
+ return
+ }
+
+ if !k.cpuClockTickerDisabled {
+ // Timer was never disabled.
+ atomic.StoreInt64(&k.runningTasks, 1)
+ k.runningTasksMu.Unlock()
+ return
+ }
+
+ // We need to update cpuClock for all of the ticks missed while we
+ // slept, and then re-enable the timer.
+ //
+ // The Notify in Swap isn't sufficient. kernelCPUClockTicker.Notify
+ // always increments cpuClock by 1 regardless of the number of
+ // expirations as a heuristic to avoid over-accounting in cases of CPU
+ // throttling.
+ //
+ // We want to cover the normal case, when all time should be accounted,
+ // so we increment for all expirations. Throttling is less concerning
+ // here because the ticker is only disabled from Notify. This means
+ // that Notify must schedule and compensate for the throttled period
+ // before the timer is disabled. Throttling while the timer is disabled
+ // doesn't matter, as nothing is running or reading cpuClock anyways.
+ //
+ // S/R also adds complication, as there are two cases. Recall that
+ // monotonicClock will jump forward on restore.
+ //
+ // 1. If the ticker is enabled during save, then on Restore Notify is
+ // called with many expirations, covering the time jump, but cpuClock
+ // is only incremented by 1.
+ //
+ // 2. If the ticker is disabled during save, then after Restore the
+ // first wakeup will call this function and cpuClock will be
+ // incremented by the number of expirations across the S/R.
+ //
+ // These cause very different value of cpuClock. But again, since
+ // nothing was running while the ticker was disabled, those differences
+ // don't matter.
+ setting, exp := k.cpuClockTickerSetting.At(k.monotonicClock.Now())
+ if exp > 0 {
+ atomic.AddUint64(&k.cpuClock, exp)
+ }
+
+ // Now that cpuClock is updated it is safe to allow other tasks to
+ // transition to running.
+ atomic.StoreInt64(&k.runningTasks, 1)
+
+ // N.B. we must unlock before calling Swap to maintain lock ordering.
+ //
+ // cpuClockTickerDisabled need not wait until after Swap to become
+ // true. It is sufficient that the timer *will* be enabled.
+ k.cpuClockTickerDisabled = false
+ k.runningTasksMu.Unlock()
+
+ // This won't call Notify (unless it's been ClockTick since setting.At
+ // above). This means we skip the thread group work in Notify. However,
+ // since nothing was running while we were disabled, none of the timers
+ // could have expired.
+ k.cpuClockTicker.Swap(setting)
+
+ return
+ }
+}
+
+func (k *Kernel) decRunningTasks() {
+ tasks := atomic.AddInt64(&k.runningTasks, -1)
+ if tasks < 0 {
+ panic(fmt.Sprintf("Invalid running count %d", tasks))
+ }
+
+ // Nothing to do. The next CPU clock tick will disable the timer if
+ // there is still nothing running. This provides approximately one tick
+ // of slack in which we can switch back and forth between idle and
+ // active without an expensive transition.
+}
+
// WaitExited blocks until all tasks in k have exited.
func (k *Kernel) WaitExited() {
k.tasks.liveGoroutines.Wait()
diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go
index adbad7764..16fa80abe 100644
--- a/pkg/sentry/kernel/pipe/node_test.go
+++ b/pkg/sentry/kernel/pipe/node_test.go
@@ -85,11 +85,11 @@ func testOpen(ctx context.Context, t *testing.T, n fs.InodeOperations, flags fs.
}
func newNamedPipe(t *testing.T) *Pipe {
- return NewPipe(contexttest.Context(t), true, DefaultPipeSize, usermem.PageSize)
+ return NewPipe(true, DefaultPipeSize, usermem.PageSize)
}
func newAnonPipe(t *testing.T) *Pipe {
- return NewPipe(contexttest.Context(t), false, DefaultPipeSize, usermem.PageSize)
+ return NewPipe(false, DefaultPipeSize, usermem.PageSize)
}
// assertRecvBlocks ensures that a recv attempt on c blocks for at least
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index 93b50669f..8e4e8e82e 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -98,7 +98,7 @@ type Pipe struct {
// NewPipe initializes and returns a pipe.
//
// N.B. The size and atomicIOBytes will be bounded.
-func NewPipe(ctx context.Context, isNamed bool, sizeBytes, atomicIOBytes int64) *Pipe {
+func NewPipe(isNamed bool, sizeBytes, atomicIOBytes int64) *Pipe {
if sizeBytes < MinimumPipeSize {
sizeBytes = MinimumPipeSize
}
@@ -121,7 +121,7 @@ func NewPipe(ctx context.Context, isNamed bool, sizeBytes, atomicIOBytes int64)
// NewConnectedPipe initializes a pipe and returns a pair of objects
// representing the read and write ends of the pipe.
func NewConnectedPipe(ctx context.Context, sizeBytes, atomicIOBytes int64) (*fs.File, *fs.File) {
- p := NewPipe(ctx, false /* isNamed */, sizeBytes, atomicIOBytes)
+ p := NewPipe(false /* isNamed */, sizeBytes, atomicIOBytes)
// Build an fs.Dirent for the pipe which will be shared by both
// returned files.
diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go
index c5d095af7..2e861a5a8 100644
--- a/pkg/sentry/kernel/posixtimer.go
+++ b/pkg/sentry/kernel/posixtimer.go
@@ -117,9 +117,9 @@ func (it *IntervalTimer) signalRejectedLocked() {
}
// Notify implements ktime.TimerListener.Notify.
-func (it *IntervalTimer) Notify(exp uint64) {
+func (it *IntervalTimer) Notify(exp uint64, setting ktime.Setting) (ktime.Setting, bool) {
if it.target == nil {
- return
+ return ktime.Setting{}, false
}
it.target.tg.pidns.owner.mu.RLock()
@@ -129,7 +129,7 @@ func (it *IntervalTimer) Notify(exp uint64) {
if it.sigpending {
it.overrunCur += exp
- return
+ return ktime.Setting{}, false
}
// sigpending must be set before sendSignalTimerLocked() so that it can be
@@ -148,6 +148,8 @@ func (it *IntervalTimer) Notify(exp uint64) {
if err := it.target.sendSignalTimerLocked(si, it.group, it); err != nil {
it.signalRejectedLocked()
}
+
+ return ktime.Setting{}, false
}
// Destroy implements ktime.TimerListener.Destroy. Users of Timer should call
diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go
index 78ff14b20..ce3e6ef28 100644
--- a/pkg/sentry/kernel/task_identity.go
+++ b/pkg/sentry/kernel/task_identity.go
@@ -465,8 +465,8 @@ func (t *Task) SetKeepCaps(k bool) {
// disables the features we don't support anyway, is always set. This
// drastically simplifies this function.
//
-// - We don't implement AT_SECURE, because no_new_privs always being set means
-// that the conditions that require AT_SECURE never arise. (Compare Linux's
+// - We don't set AT_SECURE = 1, because no_new_privs always being set means
+// that the conditions that require AT_SECURE = 1 never arise. (Compare Linux's
// security/commoncap.c:cap_bprm_set_creds() and cap_bprm_secureexec().)
//
// - We don't check for CAP_SYS_ADMIN in prctl(PR_SET_SECCOMP), since
diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go
index e76c069b0..8b148db35 100644
--- a/pkg/sentry/kernel/task_sched.go
+++ b/pkg/sentry/kernel/task_sched.go
@@ -126,12 +126,22 @@ func (t *Task) accountTaskGoroutineEnter(state TaskGoroutineState) {
t.gosched.Timestamp = now
t.gosched.State = state
t.goschedSeq.EndWrite()
+
+ if state != TaskGoroutineRunningApp {
+ // Task is blocking/stopping.
+ t.k.decRunningTasks()
+ }
}
// Preconditions: The caller must be running on the task goroutine, and leaving
// a state indicated by a previous call to
// t.accountTaskGoroutineEnter(state).
func (t *Task) accountTaskGoroutineLeave(state TaskGoroutineState) {
+ if state != TaskGoroutineRunningApp {
+ // Task is unblocking/continuing.
+ t.k.incRunningTasks()
+ }
+
now := t.k.CPUClockNow()
if t.gosched.State != state {
panic(fmt.Sprintf("Task goroutine switching from state %v (expected %v) to %v", t.gosched.State, state, TaskGoroutineRunningSys))
@@ -330,7 +340,7 @@ func newKernelCPUClockTicker(k *Kernel) *kernelCPUClockTicker {
}
// Notify implements ktime.TimerListener.Notify.
-func (ticker *kernelCPUClockTicker) Notify(exp uint64) {
+func (ticker *kernelCPUClockTicker) Notify(exp uint64, setting ktime.Setting) (ktime.Setting, bool) {
// Only increment cpuClock by 1 regardless of the number of expirations.
// This approximately compensates for cases where thread throttling or bad
// Go runtime scheduling prevents the kernelCPUClockTicker goroutine, and
@@ -426,6 +436,27 @@ func (ticker *kernelCPUClockTicker) Notify(exp uint64) {
tgs[i] = nil
}
ticker.tgs = tgs[:0]
+
+ // If nothing is running, we can disable the timer.
+ tasks := atomic.LoadInt64(&ticker.k.runningTasks)
+ if tasks == 0 {
+ ticker.k.runningTasksMu.Lock()
+ defer ticker.k.runningTasksMu.Unlock()
+ tasks := atomic.LoadInt64(&ticker.k.runningTasks)
+ if tasks != 0 {
+ // Raced with a 0 -> 1 transition.
+ return setting, false
+ }
+
+ // Stop the timer. We must cache the current setting so the
+ // kernel can access it without violating the lock order.
+ ticker.k.cpuClockTickerSetting = setting
+ ticker.k.cpuClockTickerDisabled = true
+ setting.Enabled = false
+ return setting, true
+ }
+
+ return setting, false
}
// Destroy implements ktime.TimerListener.Destroy.
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index 0eef24bfb..72568d296 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -511,8 +511,9 @@ type itimerRealListener struct {
}
// Notify implements ktime.TimerListener.Notify.
-func (l *itimerRealListener) Notify(exp uint64) {
+func (l *itimerRealListener) Notify(exp uint64, setting ktime.Setting) (ktime.Setting, bool) {
l.tg.SendSignal(SignalInfoPriv(linux.SIGALRM))
+ return ktime.Setting{}, false
}
// Destroy implements ktime.TimerListener.Destroy.
diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go
index aa6c75d25..107394183 100644
--- a/pkg/sentry/kernel/time/time.go
+++ b/pkg/sentry/kernel/time/time.go
@@ -280,13 +280,16 @@ func (ClockEventsQueue) Readiness(mask waiter.EventMask) waiter.EventMask {
// A TimerListener receives expirations from a Timer.
type TimerListener interface {
// Notify is called when its associated Timer expires. exp is the number of
- // expirations.
+ // expirations. setting is the next timer Setting.
//
// Notify is called with the associated Timer's mutex locked, so Notify
// must not take any locks that precede Timer.mu in lock order.
//
+ // If Notify returns true, the timer will use the returned setting
+ // rather than the passed one.
+ //
// Preconditions: exp > 0.
- Notify(exp uint64)
+ Notify(exp uint64, setting Setting) (newSetting Setting, update bool)
// Destroy is called when the timer is destroyed.
Destroy()
@@ -533,7 +536,9 @@ func (t *Timer) Tick() {
s, exp := t.setting.At(now)
t.setting = s
if exp > 0 {
- t.listener.Notify(exp)
+ if newS, ok := t.listener.Notify(exp, t.setting); ok {
+ t.setting = newS
+ }
}
t.resetKickerLocked(now)
}
@@ -588,7 +593,9 @@ func (t *Timer) Get() (Time, Setting) {
s, exp := t.setting.At(now)
t.setting = s
if exp > 0 {
- t.listener.Notify(exp)
+ if newS, ok := t.listener.Notify(exp, t.setting); ok {
+ t.setting = newS
+ }
}
t.resetKickerLocked(now)
return now, s
@@ -620,7 +627,9 @@ func (t *Timer) SwapAnd(s Setting, f func()) (Time, Setting) {
}
oldS, oldExp := t.setting.At(now)
if oldExp > 0 {
- t.listener.Notify(oldExp)
+ t.listener.Notify(oldExp, oldS)
+ // N.B. The returned Setting doesn't matter because we're about
+ // to overwrite.
}
if f != nil {
f()
@@ -628,7 +637,9 @@ func (t *Timer) SwapAnd(s Setting, f func()) (Time, Setting) {
newS, newExp := s.At(now)
t.setting = newS
if newExp > 0 {
- t.listener.Notify(newExp)
+ if newS, ok := t.listener.Notify(newExp, t.setting); ok {
+ t.setting = newS
+ }
}
t.resetKickerLocked(now)
return now, oldS
@@ -683,11 +694,13 @@ func NewChannelNotifier() (TimerListener, <-chan struct{}) {
}
// Notify implements ktime.TimerListener.Notify.
-func (c *ChannelNotifier) Notify(uint64) {
+func (c *ChannelNotifier) Notify(uint64, Setting) (Setting, bool) {
select {
case c.tchan <- struct{}{}:
default:
}
+
+ return Setting{}, false
}
// Destroy implements ktime.TimerListener.Destroy and will close the channel.
diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go
index ba9c9ce12..2d9251e92 100644
--- a/pkg/sentry/loader/elf.go
+++ b/pkg/sentry/loader/elf.go
@@ -323,18 +323,22 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f *fs.File, phdr *elf.
return syserror.ENOEXEC
}
+ // N.B. Linux uses vm_brk_flags to map these pages, which only
+ // honors the X bit, always mapping at least RW. ignoring These
+ // pages are not included in the final brk region.
+ prot := usermem.ReadWrite
+ if phdr.Flags&elf.PF_X == elf.PF_X {
+ prot.Execute = true
+ }
+
if _, err := m.MMap(ctx, memmap.MMapOpts{
Length: uint64(anonSize),
Addr: anonAddr,
// Fixed without Unmap will fail the mmap if something is
// already at addr.
- Fixed: true,
- Private: true,
- // N.B. Linux uses vm_brk to map these pages, ignoring
- // the segment protections, instead always mapping RW.
- // These pages are not included in the final brk
- // region.
- Perms: usermem.ReadWrite,
+ Fixed: true,
+ Private: true,
+ Perms: prot,
MaxPerms: usermem.AnyAccess,
}); err != nil {
ctx.Infof("Error mapping PT_LOAD segment %v anonymous memory: %v", phdr, err)
diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go
index f6f1ae762..089d1635b 100644
--- a/pkg/sentry/loader/loader.go
+++ b/pkg/sentry/loader/loader.go
@@ -308,6 +308,9 @@ func Load(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace, r
arch.AuxEntry{linux.AT_EUID, usermem.Addr(c.EffectiveKUID.In(c.UserNamespace).OrOverflow())},
arch.AuxEntry{linux.AT_GID, usermem.Addr(c.RealKGID.In(c.UserNamespace).OrOverflow())},
arch.AuxEntry{linux.AT_EGID, usermem.Addr(c.EffectiveKGID.In(c.UserNamespace).OrOverflow())},
+ // The conditions that require AT_SECURE = 1 never arise. See
+ // kernel.Task.updateCredsForExecLocked.
+ arch.AuxEntry{linux.AT_SECURE, 0},
arch.AuxEntry{linux.AT_CLKTCK, linux.CLOCKS_PER_SEC},
arch.AuxEntry{linux.AT_EXECFN, execfn},
arch.AuxEntry{linux.AT_RANDOM, random},
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index 4f8f9c5d9..9f0ecfbe4 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -267,7 +267,7 @@ func (s *subprocess) newThread() *thread {
// attach attaches to the thread.
func (t *thread) attach() {
- if _, _, errno := syscall.RawSyscall(syscall.SYS_PTRACE, syscall.PTRACE_ATTACH, uintptr(t.tid), 0); errno != 0 {
+ if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_ATTACH, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("unable to attach: %v", errno))
}
@@ -417,7 +417,7 @@ func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) {
for {
// Execute the syscall instruction.
- if _, _, errno := syscall.RawSyscall(syscall.SYS_PTRACE, syscall.PTRACE_SYSCALL, uintptr(t.tid), 0); errno != 0 {
+ if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_SYSCALL, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
}
@@ -435,7 +435,7 @@ func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) {
}
// Complete the actual system call.
- if _, _, errno := syscall.RawSyscall(syscall.SYS_PTRACE, syscall.PTRACE_SYSCALL, uintptr(t.tid), 0); errno != 0 {
+ if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_SYSCALL, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
}
@@ -526,17 +526,17 @@ func (s *subprocess) switchToApp(c *context, ac arch.Context) bool {
for {
// Start running until the next system call.
if isSingleStepping(regs) {
- if _, _, errno := syscall.RawSyscall(
+ if _, _, errno := syscall.RawSyscall6(
syscall.SYS_PTRACE,
syscall.PTRACE_SYSEMU_SINGLESTEP,
- uintptr(t.tid), 0); errno != 0 {
+ uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace sysemu failed: %v", errno))
}
} else {
- if _, _, errno := syscall.RawSyscall(
+ if _, _, errno := syscall.RawSyscall6(
syscall.SYS_PTRACE,
syscall.PTRACE_SYSEMU,
- uintptr(t.tid), 0); errno != 0 {
+ uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace sysemu failed: %v", errno))
}
}
diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go
index f09b0b3d0..c075b5f91 100644
--- a/pkg/sentry/platform/ptrace/subprocess_linux.go
+++ b/pkg/sentry/platform/ptrace/subprocess_linux.go
@@ -53,7 +53,7 @@ func probeSeccomp() bool {
for {
// Attempt an emulation.
- if _, _, errno := syscall.RawSyscall(syscall.SYS_PTRACE, syscall.PTRACE_SYSEMU, uintptr(t.tid), 0); errno != 0 {
+ if _, _, errno := syscall.RawSyscall6(syscall.SYS_PTRACE, syscall.PTRACE_SYSEMU, uintptr(t.tid), 0, 0, 0, 0); errno != 0 {
panic(fmt.Sprintf("ptrace syscall-enter failed: %v", errno))
}
@@ -266,7 +266,7 @@ func attachedThread(flags uintptr, defaultAction linux.BPFAction) (*thread, erro
// Enable cpuid-faulting; this may fail on older kernels or hardware,
// so we just disregard the result. Host CPUID will be enabled.
- syscall.RawSyscall(syscall.SYS_ARCH_PRCTL, linux.ARCH_SET_CPUID, 0, 0)
+ syscall.RawSyscall6(syscall.SYS_ARCH_PRCTL, linux.ARCH_SET_CPUID, 0, 0, 0, 0, 0)
// Call the stub; should not return.
stubCall(stubStart, ppid)
diff --git a/pkg/sentry/sighandling/sighandling_unsafe.go b/pkg/sentry/sighandling/sighandling_unsafe.go
index eace3766d..c303435d5 100644
--- a/pkg/sentry/sighandling/sighandling_unsafe.go
+++ b/pkg/sentry/sighandling/sighandling_unsafe.go
@@ -23,7 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
)
-// TODO(b/34161764): Move to pkg/abi/linux along with definitions in
+// FIXME(gvisor.dev/issue/214): Move to pkg/abi/linux along with definitions in
// pkg/sentry/arch.
type sigaction struct {
handler uintptr
diff --git a/pkg/sentry/socket/epsocket/BUILD b/pkg/sentry/socket/netstack/BUILD
index e927821e1..60523f79a 100644
--- a/pkg/sentry/socket/epsocket/BUILD
+++ b/pkg/sentry/socket/netstack/BUILD
@@ -3,15 +3,15 @@ package(licenses = ["notice"])
load("//tools/go_stateify:defs.bzl", "go_library")
go_library(
- name = "epsocket",
+ name = "netstack",
srcs = [
"device.go",
- "epsocket.go",
+ "netstack.go",
"provider.go",
"save_restore.go",
"stack.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/socket/epsocket",
+ importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netstack",
visibility = [
"//pkg/sentry:internal",
],
diff --git a/pkg/sentry/socket/epsocket/device.go b/pkg/sentry/socket/netstack/device.go
index 85484d5b1..fbeb89fb8 100644
--- a/pkg/sentry/socket/epsocket/device.go
+++ b/pkg/sentry/socket/netstack/device.go
@@ -12,9 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package epsocket
+package netstack
import "gvisor.dev/gvisor/pkg/sentry/device"
-// epsocketDevice is the endpoint socket virtual device.
-var epsocketDevice = device.NewAnonDevice()
+// netstackDevice is the endpoint socket virtual device.
+var netstackDevice = device.NewAnonDevice()
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/netstack/netstack.go
index 3e66f9cbb..b3546471e 100644
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package epsocket provides an implementation of the socket.Socket interface
+// Package netstack provides an implementation of the socket.Socket interface
// that is backed by a tcpip.Endpoint.
//
// It does not depend on any particular endpoint implementation, and thus can
@@ -22,7 +22,7 @@
// Lock ordering: netstack => mm: ioSequencePayload copies user memory inside
// tcpip.Endpoint.Write(). Netstack is allowed to (and does) hold locks during
// this operation.
-package epsocket
+package netstack
import (
"bytes"
@@ -174,9 +174,13 @@ var Metrics = tcpip.Stats{
},
}
+// DefaultTTL is linux's default TTL. All network protocols in all stacks used
+// with this package must have this value set as their default TTL.
+const DefaultTTL = 64
+
const sizeOfInt32 int = 4
-var errStackType = syserr.New("expected but did not receive an epsocket.Stack", linux.EINVAL)
+var errStackType = syserr.New("expected but did not receive a netstack.Stack", linux.EINVAL)
// ntohs converts a 16-bit number from network byte order to host byte order. It
// assumes that the host is little endian.
@@ -262,8 +266,8 @@ type SocketOperations struct {
// valid when timestampValid is true. It is protected by readMu.
timestampNS int64
- // sockOptInq corresponds to TCP_INQ. It is implemented on the epsocket
- // level, because it takes into account data from readView.
+ // sockOptInq corresponds to TCP_INQ. It is implemented at this level
+ // because it takes into account data from readView.
sockOptInq bool
}
@@ -275,7 +279,7 @@ func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue
}
}
- dirent := socket.NewDirent(t, epsocketDevice)
+ dirent := socket.NewDirent(t, netstackDevice)
defer dirent.DecRef()
return fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, &SocketOperations{
Queue: queue,
@@ -760,7 +764,7 @@ func (s *SocketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error {
// tcpip.Endpoint.
func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
// TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is
- // implemented specifically for epsocket.SocketOperations rather than
+ // implemented specifically for netstack.SocketOperations rather than
// commonEndpoint. commonEndpoint should be extended to support socket
// options where the implementation is not shared, as unix sockets need
// their own support for SO_TIMESTAMP.
@@ -833,7 +837,7 @@ func GetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int,
return getSockOptIPv6(t, ep, name, outLen)
case linux.SOL_IP:
- return getSockOptIP(t, ep, name, outLen)
+ return getSockOptIP(t, ep, name, outLen, family)
case linux.SOL_UDP,
linux.SOL_ICMPV6,
@@ -942,6 +946,19 @@ func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family
return int32(v), nil
+ case linux.SO_BINDTODEVICE:
+ var v tcpip.BindToDeviceOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+ if len(v) == 0 {
+ return []byte{}, nil
+ }
+ if outLen < linux.IFNAMSIZ {
+ return nil, syserr.ErrInvalidArgument
+ }
+ return append([]byte(v), 0), nil
+
case linux.SO_BROADCAST:
if outLen < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
@@ -1163,8 +1180,25 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf
}
// getSockOptIP implements GetSockOpt when level is SOL_IP.
-func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) {
+func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family int) (interface{}, *syserr.Error) {
switch name {
+ case linux.IP_TTL:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ var v tcpip.TTLOption
+ if err := ep.GetSockOpt(&v); err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ // Fill in the default value, if needed.
+ if v == 0 {
+ v = DefaultTTL
+ }
+
+ return int32(v), nil
+
case linux.IP_MULTICAST_TTL:
if outLen < sizeOfInt32 {
return nil, syserr.ErrInvalidArgument
@@ -1216,7 +1250,7 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfac
// tcpip.Endpoint.
func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error {
// TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is
- // implemented specifically for epsocket.SocketOperations rather than
+ // implemented specifically for netstack.SocketOperations rather than
// commonEndpoint. commonEndpoint should be extended to support socket
// options where the implementation is not shared, as unix sockets need
// their own support for SO_TIMESTAMP.
@@ -1305,6 +1339,13 @@ func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i
v := usermem.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.ReusePortOption(v)))
+ case linux.SO_BINDTODEVICE:
+ n := bytes.IndexByte(optVal, 0)
+ if n == -1 {
+ n = len(optVal)
+ }
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.BindToDeviceOption(optVal[:n])))
+
case linux.SO_BROADCAST:
if len(optVal) < sizeOfInt32 {
return syserr.ErrInvalidArgument
@@ -1628,6 +1669,20 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
t.Kernel().EmitUnimplementedEvent(t)
return syserr.ErrInvalidArgument
+ case linux.IP_TTL:
+ v, err := parseIntOrChar(optVal)
+ if err != nil {
+ return err
+ }
+
+ // -1 means default TTL.
+ if v == -1 {
+ v = 0
+ } else if v < 1 || v > 255 {
+ return syserr.ErrInvalidArgument
+ }
+ return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TTLOption(v)))
+
case linux.IP_ADD_SOURCE_MEMBERSHIP,
linux.IP_BIND_ADDRESS_NO_PORT,
linux.IP_BLOCK_SOURCE,
@@ -1653,7 +1708,6 @@ func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *s
linux.IP_RETOPTS,
linux.IP_TOS,
linux.IP_TRANSPARENT,
- linux.IP_TTL,
linux.IP_UNBLOCK_SOURCE,
linux.IP_UNICAST_IF,
linux.IP_XFRM_POLICY,
@@ -2215,7 +2269,7 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
// Ioctl implements fs.FileOperations.Ioctl.
func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- // SIOCGSTAMP is implemented by epsocket rather than all commonEndpoint
+ // SIOCGSTAMP is implemented by netstack rather than all commonEndpoint
// sockets.
// TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP.
switch args[1].Int() {
@@ -2518,7 +2572,7 @@ func ifconfIoctl(ctx context.Context, io usermem.IO, ifc *linux.IFConf) error {
// Flag values and meanings are described in greater detail in netdevice(7) in
// the SIOCGIFFLAGS section.
func interfaceStatusFlags(stack inet.Stack, name string) (uint32, *syserr.Error) {
- // epsocket should only ever be passed an epsocket.Stack.
+ // We should only ever be passed a netstack.Stack.
epstack, ok := stack.(*Stack)
if !ok {
return 0, errStackType
diff --git a/pkg/sentry/socket/epsocket/provider.go b/pkg/sentry/socket/netstack/provider.go
index 421f93dc4..357a664cc 100644
--- a/pkg/sentry/socket/epsocket/provider.go
+++ b/pkg/sentry/socket/netstack/provider.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package epsocket
+package netstack
import (
"syscall"
@@ -65,7 +65,7 @@ func getTransportProtocol(ctx context.Context, stype linux.SockType, protocol in
// Raw sockets require CAP_NET_RAW.
creds := auth.CredentialsFromContext(ctx)
if !creds.HasCapability(linux.CAP_NET_RAW) {
- return 0, true, syserr.ErrPermissionDenied
+ return 0, true, syserr.ErrNotPermitted
}
switch protocol {
diff --git a/pkg/sentry/socket/epsocket/save_restore.go b/pkg/sentry/socket/netstack/save_restore.go
index f7b8c10cc..c7aaf722a 100644
--- a/pkg/sentry/socket/epsocket/save_restore.go
+++ b/pkg/sentry/socket/netstack/save_restore.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package epsocket
+package netstack
import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
diff --git a/pkg/sentry/socket/epsocket/stack.go b/pkg/sentry/socket/netstack/stack.go
index 7cf7ff735..fda0156e5 100644
--- a/pkg/sentry/socket/epsocket/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package epsocket
+package netstack
import (
"gvisor.dev/gvisor/pkg/abi/linux"
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index da9977fde..830f4da10 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -24,7 +24,7 @@ go_library(
"//pkg/sentry/safemem",
"//pkg/sentry/socket",
"//pkg/sentry/socket/control",
- "//pkg/sentry/socket/epsocket",
+ "//pkg/sentry/socket/netstack",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/usermem",
"//pkg/syserr",
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 0d0cb68df..50c308134 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -31,7 +31,7 @@ import (
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/sentry/socket/control"
- "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netstack"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/syserr"
@@ -40,8 +40,8 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
-// SocketOperations is a Unix socket. It is similar to an epsocket, except it
-// is backed by a transport.Endpoint instead of a tcpip.Endpoint.
+// SocketOperations is a Unix socket. It is similar to a netstack socket,
+// except it is backed by a transport.Endpoint instead of a tcpip.Endpoint.
//
// +stateify savable
type SocketOperations struct {
@@ -116,7 +116,7 @@ func (s *SocketOperations) Endpoint() transport.Endpoint {
// extractPath extracts and validates the address.
func extractPath(sockaddr []byte) (string, *syserr.Error) {
- addr, _, err := epsocket.AddressAndFamily(linux.AF_UNIX, sockaddr, true /* strict */)
+ addr, _, err := netstack.AddressAndFamily(linux.AF_UNIX, sockaddr, true /* strict */)
if err != nil {
return "", err
}
@@ -143,7 +143,7 @@ func (s *SocketOperations) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32,
return nil, 0, syserr.TranslateNetstackError(err)
}
- a, l := epsocket.ConvertAddress(linux.AF_UNIX, addr)
+ a, l := netstack.ConvertAddress(linux.AF_UNIX, addr)
return a, l, nil
}
@@ -155,19 +155,19 @@ func (s *SocketOperations) GetSockName(t *kernel.Task) (linux.SockAddr, uint32,
return nil, 0, syserr.TranslateNetstackError(err)
}
- a, l := epsocket.ConvertAddress(linux.AF_UNIX, addr)
+ a, l := netstack.ConvertAddress(linux.AF_UNIX, addr)
return a, l, nil
}
// Ioctl implements fs.FileOperations.Ioctl.
func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- return epsocket.Ioctl(ctx, s.ep, io, args)
+ return netstack.Ioctl(ctx, s.ep, io, args)
}
// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
// a transport.Endpoint.
func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
- return epsocket.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outLen)
+ return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outLen)
}
// Listen implements the linux syscall listen(2) for sockets backed by
@@ -474,13 +474,13 @@ func (s *SocketOperations) EventUnregister(e *waiter.Entry) {
// SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by
// a transport.Endpoint.
func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error {
- return epsocket.SetSockOpt(t, s, s.ep, level, name, optVal)
+ return netstack.SetSockOpt(t, s, s.ep, level, name, optVal)
}
// Shutdown implements the linux syscall shutdown(2) for sockets backed by
// a transport.Endpoint.
func (s *SocketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error {
- f, err := epsocket.ConvertShutdown(how)
+ f, err := netstack.ConvertShutdown(how)
if err != nil {
return err
}
@@ -546,7 +546,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
var from linux.SockAddr
var fromLen uint32
if r.From != nil && len([]byte(r.From.Addr)) != 0 {
- from, fromLen = epsocket.ConvertAddress(linux.AF_UNIX, *r.From)
+ from, fromLen = netstack.ConvertAddress(linux.AF_UNIX, *r.From)
}
if r.ControlTrunc {
@@ -581,7 +581,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
var from linux.SockAddr
var fromLen uint32
if r.From != nil {
- from, fromLen = epsocket.ConvertAddress(linux.AF_UNIX, *r.From)
+ from, fromLen = netstack.ConvertAddress(linux.AF_UNIX, *r.From)
}
if r.ControlTrunc {
diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD
index 7d7b42eba..72ebf766d 100644
--- a/pkg/sentry/strace/BUILD
+++ b/pkg/sentry/strace/BUILD
@@ -32,8 +32,8 @@ go_library(
"//pkg/sentry/arch",
"//pkg/sentry/kernel",
"//pkg/sentry/socket/control",
- "//pkg/sentry/socket/epsocket",
"//pkg/sentry/socket/netlink",
+ "//pkg/sentry/socket/netstack",
"//pkg/sentry/syscalls/linux",
"//pkg/sentry/usermem",
],
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index f779186ad..94334f6d2 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -23,8 +23,8 @@ import (
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket/control"
- "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
"gvisor.dev/gvisor/pkg/sentry/socket/netlink"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netstack"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/sentry/usermem"
)
@@ -332,7 +332,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string {
switch family {
case linux.AF_INET, linux.AF_INET6, linux.AF_UNIX:
- fa, _, err := epsocket.AddressAndFamily(int(family), b, true /* strict */)
+ fa, _, err := netstack.AddressAndFamily(int(family), b, true /* strict */)
if err != nil {
return fmt.Sprintf("%#x {Family: %s, error extracting address: %v}", addr, familyStr, err)
}
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index e76ee27d2..cf2a56bed 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -8,6 +8,8 @@ go_library(
"error.go",
"flags.go",
"linux64.go",
+ "linux64_amd64.go",
+ "linux64_arm64.go",
"sigset.go",
"sys_aio.go",
"sys_capability.go",
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 18d24ab61..b64c49ff5 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -1,4 +1,4 @@
-// Copyright 2018 The gVisor Authors.
+// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -15,377 +15,8 @@
// Package linux provides syscall tables for amd64 Linux.
package linux
-import (
- "gvisor.dev/gvisor/pkg/abi"
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/syscalls"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
- "gvisor.dev/gvisor/pkg/syserror"
+const (
+ _LINUX_SYSNAME = "Linux"
+ _LINUX_RELEASE = "4.4"
+ _LINUX_VERSION = "#1 SMP Sun Jan 10 15:06:54 PST 2016"
)
-
-// AUDIT_ARCH_X86_64 identifies the Linux syscall API on AMD64, and is taken
-// from <linux/audit.h>.
-const _AUDIT_ARCH_X86_64 = 0xc000003e
-
-// AMD64 is a table of Linux amd64 syscall API with the corresponding syscall
-// numbers from Linux 4.4.
-var AMD64 = &kernel.SyscallTable{
- OS: abi.Linux,
- Arch: arch.AMD64,
- Version: kernel.Version{
- // Version 4.4 is chosen as a stable, longterm version of Linux, which
- // guides the interface provided by this syscall table. The build
- // version is that for a clean build with default kernel config, at 5
- // minutes after v4.4 was tagged.
- Sysname: "Linux",
- Release: "4.4",
- Version: "#1 SMP Sun Jan 10 15:06:54 PST 2016",
- },
- AuditNumber: _AUDIT_ARCH_X86_64,
- Table: map[uintptr]kernel.Syscall{
- 0: syscalls.Supported("read", Read),
- 1: syscalls.Supported("write", Write),
- 2: syscalls.PartiallySupported("open", Open, "Options O_DIRECT, O_NOATIME, O_PATH, O_TMPFILE, O_SYNC are not supported.", nil),
- 3: syscalls.Supported("close", Close),
- 4: syscalls.Supported("stat", Stat),
- 5: syscalls.Supported("fstat", Fstat),
- 6: syscalls.Supported("lstat", Lstat),
- 7: syscalls.Supported("poll", Poll),
- 8: syscalls.Supported("lseek", Lseek),
- 9: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil),
- 10: syscalls.Supported("mprotect", Mprotect),
- 11: syscalls.Supported("munmap", Munmap),
- 12: syscalls.Supported("brk", Brk),
- 13: syscalls.Supported("rt_sigaction", RtSigaction),
- 14: syscalls.Supported("rt_sigprocmask", RtSigprocmask),
- 15: syscalls.Supported("rt_sigreturn", RtSigreturn),
- 16: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil),
- 17: syscalls.Supported("pread64", Pread64),
- 18: syscalls.Supported("pwrite64", Pwrite64),
- 19: syscalls.Supported("readv", Readv),
- 20: syscalls.Supported("writev", Writev),
- 21: syscalls.Supported("access", Access),
- 22: syscalls.Supported("pipe", Pipe),
- 23: syscalls.Supported("select", Select),
- 24: syscalls.Supported("sched_yield", SchedYield),
- 25: syscalls.Supported("mremap", Mremap),
- 26: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil),
- 27: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil),
- 28: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil),
- 29: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil),
- 30: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil),
- 31: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil),
- 32: syscalls.Supported("dup", Dup),
- 33: syscalls.Supported("dup2", Dup2),
- 34: syscalls.Supported("pause", Pause),
- 35: syscalls.Supported("nanosleep", Nanosleep),
- 36: syscalls.Supported("getitimer", Getitimer),
- 37: syscalls.Supported("alarm", Alarm),
- 38: syscalls.Supported("setitimer", Setitimer),
- 39: syscalls.Supported("getpid", Getpid),
- 40: syscalls.Supported("sendfile", Sendfile),
- 41: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil),
- 42: syscalls.Supported("connect", Connect),
- 43: syscalls.Supported("accept", Accept),
- 44: syscalls.Supported("sendto", SendTo),
- 45: syscalls.Supported("recvfrom", RecvFrom),
- 46: syscalls.Supported("sendmsg", SendMsg),
- 47: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil),
- 48: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil),
- 49: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil),
- 50: syscalls.Supported("listen", Listen),
- 51: syscalls.Supported("getsockname", GetSockName),
- 52: syscalls.Supported("getpeername", GetPeerName),
- 53: syscalls.Supported("socketpair", SocketPair),
- 54: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil),
- 55: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil),
- 56: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil),
- 57: syscalls.Supported("fork", Fork),
- 58: syscalls.Supported("vfork", Vfork),
- 59: syscalls.Supported("execve", Execve),
- 60: syscalls.Supported("exit", Exit),
- 61: syscalls.Supported("wait4", Wait4),
- 62: syscalls.Supported("kill", Kill),
- 63: syscalls.Supported("uname", Uname),
- 64: syscalls.Supported("semget", Semget),
- 65: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil),
- 66: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil),
- 67: syscalls.Supported("shmdt", Shmdt),
- 68: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
- 69: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
- 70: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
- 71: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
- 72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil),
- 73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil),
- 74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil),
- 75: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil),
- 76: syscalls.Supported("truncate", Truncate),
- 77: syscalls.Supported("ftruncate", Ftruncate),
- 78: syscalls.Supported("getdents", Getdents),
- 79: syscalls.Supported("getcwd", Getcwd),
- 80: syscalls.Supported("chdir", Chdir),
- 81: syscalls.Supported("fchdir", Fchdir),
- 82: syscalls.Supported("rename", Rename),
- 83: syscalls.Supported("mkdir", Mkdir),
- 84: syscalls.Supported("rmdir", Rmdir),
- 85: syscalls.Supported("creat", Creat),
- 86: syscalls.Supported("link", Link),
- 87: syscalls.Supported("unlink", Unlink),
- 88: syscalls.Supported("symlink", Symlink),
- 89: syscalls.Supported("readlink", Readlink),
- 90: syscalls.Supported("chmod", Chmod),
- 91: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil),
- 92: syscalls.Supported("chown", Chown),
- 93: syscalls.Supported("fchown", Fchown),
- 94: syscalls.Supported("lchown", Lchown),
- 95: syscalls.Supported("umask", Umask),
- 96: syscalls.Supported("gettimeofday", Gettimeofday),
- 97: syscalls.Supported("getrlimit", Getrlimit),
- 98: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil),
- 99: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil),
- 100: syscalls.Supported("times", Times),
- 101: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil),
- 102: syscalls.Supported("getuid", Getuid),
- 103: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil),
- 104: syscalls.Supported("getgid", Getgid),
- 105: syscalls.Supported("setuid", Setuid),
- 106: syscalls.Supported("setgid", Setgid),
- 107: syscalls.Supported("geteuid", Geteuid),
- 108: syscalls.Supported("getegid", Getegid),
- 109: syscalls.Supported("setpgid", Setpgid),
- 110: syscalls.Supported("getppid", Getppid),
- 111: syscalls.Supported("getpgrp", Getpgrp),
- 112: syscalls.Supported("setsid", Setsid),
- 113: syscalls.Supported("setreuid", Setreuid),
- 114: syscalls.Supported("setregid", Setregid),
- 115: syscalls.Supported("getgroups", Getgroups),
- 116: syscalls.Supported("setgroups", Setgroups),
- 117: syscalls.Supported("setresuid", Setresuid),
- 118: syscalls.Supported("getresuid", Getresuid),
- 119: syscalls.Supported("setresgid", Setresgid),
- 120: syscalls.Supported("getresgid", Getresgid),
- 121: syscalls.Supported("getpgid", Getpgid),
- 122: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
- 123: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
- 124: syscalls.Supported("getsid", Getsid),
- 125: syscalls.Supported("capget", Capget),
- 126: syscalls.Supported("capset", Capset),
- 127: syscalls.Supported("rt_sigpending", RtSigpending),
- 128: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait),
- 129: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo),
- 130: syscalls.Supported("rt_sigsuspend", RtSigsuspend),
- 131: syscalls.Supported("sigaltstack", Sigaltstack),
- 132: syscalls.Supported("utime", Utime),
- 133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil),
- 134: syscalls.Error("uselib", syserror.ENOSYS, "Obsolete", nil),
- 135: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil),
- 136: syscalls.ErrorWithEvent("ustat", syserror.ENOSYS, "Needs filesystem support.", nil),
- 137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil),
- 138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil),
- 139: syscalls.ErrorWithEvent("sysfs", syserror.ENOSYS, "", []string{"gvisor.dev/issue/165"}),
- 140: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil),
- 141: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil),
- 142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil),
- 143: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil),
- 144: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil),
- 145: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil),
- 146: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil),
- 147: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil),
- 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil),
- 149: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
- 150: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
- 151: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
- 152: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
- 153: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil),
- 154: syscalls.Error("modify_ldt", syserror.EPERM, "", nil),
- 155: syscalls.Error("pivot_root", syserror.EPERM, "", nil),
- 156: syscalls.Error("sysctl", syserror.EPERM, "Deprecated. Use /proc/sys instead.", nil),
- 157: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil),
- 158: syscalls.PartiallySupported("arch_prctl", ArchPrctl, "Options ARCH_GET_GS, ARCH_SET_GS not supported.", nil),
- 159: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil),
- 160: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil),
- 161: syscalls.Supported("chroot", Chroot),
- 162: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil),
- 163: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil),
- 164: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil),
- 165: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil),
- 166: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil),
- 167: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil),
- 168: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil),
- 169: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil),
- 170: syscalls.Supported("sethostname", Sethostname),
- 171: syscalls.Supported("setdomainname", Setdomainname),
- 172: syscalls.CapError("iopl", linux.CAP_SYS_RAWIO, "", nil),
- 173: syscalls.CapError("ioperm", linux.CAP_SYS_RAWIO, "", nil),
- 174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil),
- 175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil),
- 176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil),
- 177: syscalls.Error("get_kernel_syms", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil),
- 178: syscalls.Error("query_module", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil),
- 179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations
- 180: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil),
- 181: syscalls.Error("getpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil),
- 182: syscalls.Error("putpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil),
- 183: syscalls.Error("afs_syscall", syserror.ENOSYS, "Not implemented in Linux.", nil),
- 184: syscalls.Error("tuxcall", syserror.ENOSYS, "Not implemented in Linux.", nil),
- 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil),
- 186: syscalls.Supported("gettid", Gettid),
- 187: syscalls.ErrorWithEvent("readahead", syserror.ENOSYS, "", []string{"gvisor.dev/issue/261"}), // TODO(b/29351341)
- 188: syscalls.Error("setxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 189: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 190: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
- 200: syscalls.Supported("tkill", Tkill),
- 201: syscalls.Supported("time", Time),
- 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil),
- 203: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil),
- 204: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil),
- 205: syscalls.Error("set_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
- 206: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 207: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 208: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 209: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 210: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 211: syscalls.Error("get_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
- 212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil),
- 213: syscalls.Supported("epoll_create", EpollCreate),
- 214: syscalls.ErrorWithEvent("epoll_ctl_old", syserror.ENOSYS, "Deprecated.", nil),
- 215: syscalls.ErrorWithEvent("epoll_wait_old", syserror.ENOSYS, "Deprecated.", nil),
- 216: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil),
- 217: syscalls.Supported("getdents64", Getdents64),
- 218: syscalls.Supported("set_tid_address", SetTidAddress),
- 219: syscalls.Supported("restart_syscall", RestartSyscall),
- 220: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920)
- 221: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil),
- 222: syscalls.Supported("timer_create", TimerCreate),
- 223: syscalls.Supported("timer_settime", TimerSettime),
- 224: syscalls.Supported("timer_gettime", TimerGettime),
- 225: syscalls.Supported("timer_getoverrun", TimerGetoverrun),
- 226: syscalls.Supported("timer_delete", TimerDelete),
- 227: syscalls.Supported("clock_settime", ClockSettime),
- 228: syscalls.Supported("clock_gettime", ClockGettime),
- 229: syscalls.Supported("clock_getres", ClockGetres),
- 230: syscalls.Supported("clock_nanosleep", ClockNanosleep),
- 231: syscalls.Supported("exit_group", ExitGroup),
- 232: syscalls.Supported("epoll_wait", EpollWait),
- 233: syscalls.Supported("epoll_ctl", EpollCtl),
- 234: syscalls.Supported("tgkill", Tgkill),
- 235: syscalls.Supported("utimes", Utimes),
- 236: syscalls.Error("vserver", syserror.ENOSYS, "Not implemented by Linux", nil),
- 237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}),
- 238: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil),
- 239: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil),
- 240: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 241: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 242: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 243: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 244: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 245: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
- 246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil),
- 247: syscalls.Supported("waitid", Waitid),
- 248: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil),
- 249: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil),
- 250: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil),
- 251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
- 252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
- 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil),
- 254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil),
- 255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil),
- 256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil),
- 257: syscalls.Supported("openat", Openat),
- 258: syscalls.Supported("mkdirat", Mkdirat),
- 259: syscalls.Supported("mknodat", Mknodat),
- 260: syscalls.Supported("fchownat", Fchownat),
- 261: syscalls.Supported("futimesat", Futimesat),
- 262: syscalls.Supported("fstatat", Fstatat),
- 263: syscalls.Supported("unlinkat", Unlinkat),
- 264: syscalls.Supported("renameat", Renameat),
- 265: syscalls.Supported("linkat", Linkat),
- 266: syscalls.Supported("symlinkat", Symlinkat),
- 267: syscalls.Supported("readlinkat", Readlinkat),
- 268: syscalls.Supported("fchmodat", Fchmodat),
- 269: syscalls.Supported("faccessat", Faccessat),
- 270: syscalls.Supported("pselect", Pselect),
- 271: syscalls.Supported("ppoll", Ppoll),
- 272: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil),
- 273: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil),
- 274: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil),
- 275: syscalls.Supported("splice", Splice),
- 276: syscalls.Supported("tee", Tee),
- 277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil),
- 278: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
- 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly)
- 280: syscalls.Supported("utimensat", Utimensat),
- 281: syscalls.Supported("epoll_pwait", EpollPwait),
- 282: syscalls.PartiallySupported("signalfd", Signalfd, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}),
- 283: syscalls.Supported("timerfd_create", TimerfdCreate),
- 284: syscalls.Supported("eventfd", Eventfd),
- 285: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil),
- 286: syscalls.Supported("timerfd_settime", TimerfdSettime),
- 287: syscalls.Supported("timerfd_gettime", TimerfdGettime),
- 288: syscalls.Supported("accept4", Accept4),
- 289: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}),
- 290: syscalls.Supported("eventfd2", Eventfd2),
- 291: syscalls.Supported("epoll_create1", EpollCreate1),
- 292: syscalls.Supported("dup3", Dup3),
- 293: syscalls.Supported("pipe2", Pipe2),
- 294: syscalls.Supported("inotify_init1", InotifyInit1),
- 295: syscalls.Supported("preadv", Preadv),
- 296: syscalls.Supported("pwritev", Pwritev),
- 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo),
- 298: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil),
- 299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil),
- 300: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
- 301: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
- 302: syscalls.Supported("prlimit64", Prlimit64),
- 303: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
- 304: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
- 305: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil),
- 306: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil),
- 307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil),
- 308: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995)
- 309: syscalls.Supported("getcpu", Getcpu),
- 310: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
- 311: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
- 312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil),
- 313: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil),
- 314: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
- 315: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
- 316: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
- 317: syscalls.Supported("seccomp", Seccomp),
- 318: syscalls.Supported("getrandom", GetRandom),
- 319: syscalls.Supported("memfd_create", MemfdCreate),
- 320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil),
- 321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
- 322: syscalls.ErrorWithEvent("execveat", syserror.ENOSYS, "", []string{"gvisor.dev/issue/265"}), // TODO(b/118901836)
- 323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
- 324: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(b/118904897)
- 325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
-
- // Syscalls after 325 are "backports" from versions of Linux after 4.4.
- 326: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil),
- 327: syscalls.Supported("preadv2", Preadv2),
- 328: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil),
- 332: syscalls.Supported("statx", Statx),
- },
-
- Emulate: map[usermem.Addr]uintptr{
- 0xffffffffff600000: 96, // vsyscall gettimeofday(2)
- 0xffffffffff600400: 201, // vsyscall time(2)
- 0xffffffffff600800: 309, // vsyscall getcpu(2)
- },
- Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
- t.Kernel().EmitUnimplementedEvent(t)
- return 0, syserror.ENOSYS
- },
-}
diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go
new file mode 100644
index 000000000..e215ac049
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/linux64_amd64.go
@@ -0,0 +1,386 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/syscalls"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// AMD64 is a table of Linux amd64 syscall API with the corresponding syscall
+// numbers from Linux 4.4.
+var AMD64 = &kernel.SyscallTable{
+ OS: abi.Linux,
+ Arch: arch.AMD64,
+ Version: kernel.Version{
+ // Version 4.4 is chosen as a stable, longterm version of Linux, which
+ // guides the interface provided by this syscall table. The build
+ // version is that for a clean build with default kernel config, at 5
+ // minutes after v4.4 was tagged.
+ Sysname: _LINUX_SYSNAME,
+ Release: _LINUX_RELEASE,
+ Version: _LINUX_VERSION,
+ },
+ AuditNumber: linux.AUDIT_ARCH_X86_64,
+ Table: map[uintptr]kernel.Syscall{
+ 0: syscalls.Supported("read", Read),
+ 1: syscalls.Supported("write", Write),
+ 2: syscalls.PartiallySupported("open", Open, "Options O_DIRECT, O_NOATIME, O_PATH, O_TMPFILE, O_SYNC are not supported.", nil),
+ 3: syscalls.Supported("close", Close),
+ 4: syscalls.Supported("stat", Stat),
+ 5: syscalls.Supported("fstat", Fstat),
+ 6: syscalls.Supported("lstat", Lstat),
+ 7: syscalls.Supported("poll", Poll),
+ 8: syscalls.Supported("lseek", Lseek),
+ 9: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil),
+ 10: syscalls.Supported("mprotect", Mprotect),
+ 11: syscalls.Supported("munmap", Munmap),
+ 12: syscalls.Supported("brk", Brk),
+ 13: syscalls.Supported("rt_sigaction", RtSigaction),
+ 14: syscalls.Supported("rt_sigprocmask", RtSigprocmask),
+ 15: syscalls.Supported("rt_sigreturn", RtSigreturn),
+ 16: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil),
+ 17: syscalls.Supported("pread64", Pread64),
+ 18: syscalls.Supported("pwrite64", Pwrite64),
+ 19: syscalls.Supported("readv", Readv),
+ 20: syscalls.Supported("writev", Writev),
+ 21: syscalls.Supported("access", Access),
+ 22: syscalls.Supported("pipe", Pipe),
+ 23: syscalls.Supported("select", Select),
+ 24: syscalls.Supported("sched_yield", SchedYield),
+ 25: syscalls.Supported("mremap", Mremap),
+ 26: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil),
+ 27: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil),
+ 28: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil),
+ 29: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil),
+ 30: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil),
+ 31: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil),
+ 32: syscalls.Supported("dup", Dup),
+ 33: syscalls.Supported("dup2", Dup2),
+ 34: syscalls.Supported("pause", Pause),
+ 35: syscalls.Supported("nanosleep", Nanosleep),
+ 36: syscalls.Supported("getitimer", Getitimer),
+ 37: syscalls.Supported("alarm", Alarm),
+ 38: syscalls.Supported("setitimer", Setitimer),
+ 39: syscalls.Supported("getpid", Getpid),
+ 40: syscalls.Supported("sendfile", Sendfile),
+ 41: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil),
+ 42: syscalls.Supported("connect", Connect),
+ 43: syscalls.Supported("accept", Accept),
+ 44: syscalls.Supported("sendto", SendTo),
+ 45: syscalls.Supported("recvfrom", RecvFrom),
+ 46: syscalls.Supported("sendmsg", SendMsg),
+ 47: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil),
+ 48: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil),
+ 49: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil),
+ 50: syscalls.Supported("listen", Listen),
+ 51: syscalls.Supported("getsockname", GetSockName),
+ 52: syscalls.Supported("getpeername", GetPeerName),
+ 53: syscalls.Supported("socketpair", SocketPair),
+ 54: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil),
+ 55: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil),
+ 56: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil),
+ 57: syscalls.Supported("fork", Fork),
+ 58: syscalls.Supported("vfork", Vfork),
+ 59: syscalls.Supported("execve", Execve),
+ 60: syscalls.Supported("exit", Exit),
+ 61: syscalls.Supported("wait4", Wait4),
+ 62: syscalls.Supported("kill", Kill),
+ 63: syscalls.Supported("uname", Uname),
+ 64: syscalls.Supported("semget", Semget),
+ 65: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil),
+ 66: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil),
+ 67: syscalls.Supported("shmdt", Shmdt),
+ 68: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 69: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 70: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 71: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil),
+ 73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil),
+ 74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil),
+ 75: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil),
+ 76: syscalls.Supported("truncate", Truncate),
+ 77: syscalls.Supported("ftruncate", Ftruncate),
+ 78: syscalls.Supported("getdents", Getdents),
+ 79: syscalls.Supported("getcwd", Getcwd),
+ 80: syscalls.Supported("chdir", Chdir),
+ 81: syscalls.Supported("fchdir", Fchdir),
+ 82: syscalls.Supported("rename", Rename),
+ 83: syscalls.Supported("mkdir", Mkdir),
+ 84: syscalls.Supported("rmdir", Rmdir),
+ 85: syscalls.Supported("creat", Creat),
+ 86: syscalls.Supported("link", Link),
+ 87: syscalls.Supported("unlink", Unlink),
+ 88: syscalls.Supported("symlink", Symlink),
+ 89: syscalls.Supported("readlink", Readlink),
+ 90: syscalls.Supported("chmod", Chmod),
+ 91: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil),
+ 92: syscalls.Supported("chown", Chown),
+ 93: syscalls.Supported("fchown", Fchown),
+ 94: syscalls.Supported("lchown", Lchown),
+ 95: syscalls.Supported("umask", Umask),
+ 96: syscalls.Supported("gettimeofday", Gettimeofday),
+ 97: syscalls.Supported("getrlimit", Getrlimit),
+ 98: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil),
+ 99: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil),
+ 100: syscalls.Supported("times", Times),
+ 101: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil),
+ 102: syscalls.Supported("getuid", Getuid),
+ 103: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil),
+ 104: syscalls.Supported("getgid", Getgid),
+ 105: syscalls.Supported("setuid", Setuid),
+ 106: syscalls.Supported("setgid", Setgid),
+ 107: syscalls.Supported("geteuid", Geteuid),
+ 108: syscalls.Supported("getegid", Getegid),
+ 109: syscalls.Supported("setpgid", Setpgid),
+ 110: syscalls.Supported("getppid", Getppid),
+ 111: syscalls.Supported("getpgrp", Getpgrp),
+ 112: syscalls.Supported("setsid", Setsid),
+ 113: syscalls.Supported("setreuid", Setreuid),
+ 114: syscalls.Supported("setregid", Setregid),
+ 115: syscalls.Supported("getgroups", Getgroups),
+ 116: syscalls.Supported("setgroups", Setgroups),
+ 117: syscalls.Supported("setresuid", Setresuid),
+ 118: syscalls.Supported("getresuid", Getresuid),
+ 119: syscalls.Supported("setresgid", Setresgid),
+ 120: syscalls.Supported("getresgid", Getresgid),
+ 121: syscalls.Supported("getpgid", Getpgid),
+ 122: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 123: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 124: syscalls.Supported("getsid", Getsid),
+ 125: syscalls.Supported("capget", Capget),
+ 126: syscalls.Supported("capset", Capset),
+ 127: syscalls.Supported("rt_sigpending", RtSigpending),
+ 128: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait),
+ 129: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo),
+ 130: syscalls.Supported("rt_sigsuspend", RtSigsuspend),
+ 131: syscalls.Supported("sigaltstack", Sigaltstack),
+ 132: syscalls.Supported("utime", Utime),
+ 133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil),
+ 134: syscalls.Error("uselib", syserror.ENOSYS, "Obsolete", nil),
+ 135: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil),
+ 136: syscalls.ErrorWithEvent("ustat", syserror.ENOSYS, "Needs filesystem support.", nil),
+ 137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil),
+ 138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil),
+ 139: syscalls.ErrorWithEvent("sysfs", syserror.ENOSYS, "", []string{"gvisor.dev/issue/165"}),
+ 140: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil),
+ 141: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil),
+ 142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil),
+ 143: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil),
+ 144: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil),
+ 145: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil),
+ 146: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil),
+ 147: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil),
+ 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil),
+ 149: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 150: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 151: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 152: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 153: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil),
+ 154: syscalls.Error("modify_ldt", syserror.EPERM, "", nil),
+ 155: syscalls.Error("pivot_root", syserror.EPERM, "", nil),
+ 156: syscalls.Error("sysctl", syserror.EPERM, "Deprecated. Use /proc/sys instead.", nil),
+ 157: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil),
+ 158: syscalls.PartiallySupported("arch_prctl", ArchPrctl, "Options ARCH_GET_GS, ARCH_SET_GS not supported.", nil),
+ 159: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil),
+ 160: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil),
+ 161: syscalls.Supported("chroot", Chroot),
+ 162: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil),
+ 163: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil),
+ 164: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil),
+ 165: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil),
+ 166: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil),
+ 167: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil),
+ 168: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil),
+ 169: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil),
+ 170: syscalls.Supported("sethostname", Sethostname),
+ 171: syscalls.Supported("setdomainname", Setdomainname),
+ 172: syscalls.CapError("iopl", linux.CAP_SYS_RAWIO, "", nil),
+ 173: syscalls.CapError("ioperm", linux.CAP_SYS_RAWIO, "", nil),
+ 174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil),
+ 175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil),
+ 176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil),
+ 177: syscalls.Error("get_kernel_syms", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil),
+ 178: syscalls.Error("query_module", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil),
+ 179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations
+ 180: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil),
+ 181: syscalls.Error("getpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 182: syscalls.Error("putpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 183: syscalls.Error("afs_syscall", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 184: syscalls.Error("tuxcall", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil),
+ 186: syscalls.Supported("gettid", Gettid),
+ 187: syscalls.Supported("readahead", Readahead),
+ 188: syscalls.Error("setxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 189: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 190: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 200: syscalls.Supported("tkill", Tkill),
+ 201: syscalls.Supported("time", Time),
+ 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil),
+ 203: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil),
+ 204: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil),
+ 205: syscalls.Error("set_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
+ 206: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 207: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 208: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 209: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 210: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 211: syscalls.Error("get_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
+ 212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil),
+ 213: syscalls.Supported("epoll_create", EpollCreate),
+ 214: syscalls.ErrorWithEvent("epoll_ctl_old", syserror.ENOSYS, "Deprecated.", nil),
+ 215: syscalls.ErrorWithEvent("epoll_wait_old", syserror.ENOSYS, "Deprecated.", nil),
+ 216: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil),
+ 217: syscalls.Supported("getdents64", Getdents64),
+ 218: syscalls.Supported("set_tid_address", SetTidAddress),
+ 219: syscalls.Supported("restart_syscall", RestartSyscall),
+ 220: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920)
+ 221: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil),
+ 222: syscalls.Supported("timer_create", TimerCreate),
+ 223: syscalls.Supported("timer_settime", TimerSettime),
+ 224: syscalls.Supported("timer_gettime", TimerGettime),
+ 225: syscalls.Supported("timer_getoverrun", TimerGetoverrun),
+ 226: syscalls.Supported("timer_delete", TimerDelete),
+ 227: syscalls.Supported("clock_settime", ClockSettime),
+ 228: syscalls.Supported("clock_gettime", ClockGettime),
+ 229: syscalls.Supported("clock_getres", ClockGetres),
+ 230: syscalls.Supported("clock_nanosleep", ClockNanosleep),
+ 231: syscalls.Supported("exit_group", ExitGroup),
+ 232: syscalls.Supported("epoll_wait", EpollWait),
+ 233: syscalls.Supported("epoll_ctl", EpollCtl),
+ 234: syscalls.Supported("tgkill", Tgkill),
+ 235: syscalls.Supported("utimes", Utimes),
+ 236: syscalls.Error("vserver", syserror.ENOSYS, "Not implemented by Linux", nil),
+ 237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}),
+ 238: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil),
+ 239: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil),
+ 240: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 241: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 242: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 243: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 244: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 245: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil),
+ 247: syscalls.Supported("waitid", Waitid),
+ 248: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil),
+ 249: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil),
+ 250: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil),
+ 251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
+ 252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
+ 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil),
+ 254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil),
+ 255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil),
+ 256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil),
+ 257: syscalls.Supported("openat", Openat),
+ 258: syscalls.Supported("mkdirat", Mkdirat),
+ 259: syscalls.Supported("mknodat", Mknodat),
+ 260: syscalls.Supported("fchownat", Fchownat),
+ 261: syscalls.Supported("futimesat", Futimesat),
+ 262: syscalls.Supported("fstatat", Fstatat),
+ 263: syscalls.Supported("unlinkat", Unlinkat),
+ 264: syscalls.Supported("renameat", Renameat),
+ 265: syscalls.Supported("linkat", Linkat),
+ 266: syscalls.Supported("symlinkat", Symlinkat),
+ 267: syscalls.Supported("readlinkat", Readlinkat),
+ 268: syscalls.Supported("fchmodat", Fchmodat),
+ 269: syscalls.Supported("faccessat", Faccessat),
+ 270: syscalls.Supported("pselect", Pselect),
+ 271: syscalls.Supported("ppoll", Ppoll),
+ 272: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil),
+ 273: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil),
+ 274: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil),
+ 275: syscalls.Supported("splice", Splice),
+ 276: syscalls.Supported("tee", Tee),
+ 277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil),
+ 278: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly)
+ 280: syscalls.Supported("utimensat", Utimensat),
+ 281: syscalls.Supported("epoll_pwait", EpollPwait),
+ 282: syscalls.PartiallySupported("signalfd", Signalfd, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}),
+ 283: syscalls.Supported("timerfd_create", TimerfdCreate),
+ 284: syscalls.Supported("eventfd", Eventfd),
+ 285: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil),
+ 286: syscalls.Supported("timerfd_settime", TimerfdSettime),
+ 287: syscalls.Supported("timerfd_gettime", TimerfdGettime),
+ 288: syscalls.Supported("accept4", Accept4),
+ 289: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}),
+ 290: syscalls.Supported("eventfd2", Eventfd2),
+ 291: syscalls.Supported("epoll_create1", EpollCreate1),
+ 292: syscalls.Supported("dup3", Dup3),
+ 293: syscalls.Supported("pipe2", Pipe2),
+ 294: syscalls.Supported("inotify_init1", InotifyInit1),
+ 295: syscalls.Supported("preadv", Preadv),
+ 296: syscalls.Supported("pwritev", Pwritev),
+ 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo),
+ 298: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil),
+ 299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil),
+ 300: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 301: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 302: syscalls.Supported("prlimit64", Prlimit64),
+ 303: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
+ 304: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
+ 305: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil),
+ 306: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil),
+ 307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil),
+ 308: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995)
+ 309: syscalls.Supported("getcpu", Getcpu),
+ 310: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 311: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil),
+ 313: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil),
+ 314: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 315: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 316: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
+ 317: syscalls.Supported("seccomp", Seccomp),
+ 318: syscalls.Supported("getrandom", GetRandom),
+ 319: syscalls.Supported("memfd_create", MemfdCreate),
+ 320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil),
+ 321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
+ 322: syscalls.ErrorWithEvent("execveat", syserror.ENOSYS, "", []string{"gvisor.dev/issue/265"}), // TODO(b/118901836)
+ 323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
+ 324: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(b/118904897)
+ 325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+
+ // Syscalls after 325 are "backports" from versions of Linux after 4.4.
+ 326: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil),
+ 327: syscalls.Supported("preadv2", Preadv2),
+ 328: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil),
+ 332: syscalls.Supported("statx", Statx),
+ },
+
+ Emulate: map[usermem.Addr]uintptr{
+ 0xffffffffff600000: 96, // vsyscall gettimeofday(2)
+ 0xffffffffff600400: 201, // vsyscall time(2)
+ 0xffffffffff600800: 309, // vsyscall getcpu(2)
+ },
+ Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
+ t.Kernel().EmitUnimplementedEvent(t)
+ return 0, syserror.ENOSYS
+ },
+}
diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go
new file mode 100644
index 000000000..1d3b63020
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/linux64_arm64.go
@@ -0,0 +1,313 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/syscalls"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// ARM64 is a table of Linux arm64 syscall API with the corresponding syscall
+// numbers from Linux 4.4.
+var ARM64 = &kernel.SyscallTable{
+ OS: abi.Linux,
+ Arch: arch.ARM64,
+ Version: kernel.Version{
+ Sysname: _LINUX_SYSNAME,
+ Release: _LINUX_RELEASE,
+ Version: _LINUX_VERSION,
+ },
+ AuditNumber: linux.AUDIT_ARCH_AARCH64,
+ Table: map[uintptr]kernel.Syscall{
+ 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 5: syscalls.Error("setxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 6: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 7: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 12: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", nil),
+ 17: syscalls.Supported("getcwd", Getcwd),
+ 18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil),
+ 19: syscalls.Supported("eventfd2", Eventfd2),
+ 20: syscalls.Supported("epoll_create1", EpollCreate1),
+ 21: syscalls.Supported("epoll_ctl", EpollCtl),
+ 22: syscalls.Supported("epoll_pwait", EpollPwait),
+ 23: syscalls.Supported("dup", Dup),
+ 24: syscalls.Supported("dup3", Dup3),
+ 26: syscalls.Supported("inotify_init1", InotifyInit1),
+ 27: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil),
+ 28: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil),
+ 29: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil),
+ 30: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
+ 31: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
+ 32: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil),
+ 33: syscalls.Supported("mknodat", Mknodat),
+ 34: syscalls.Supported("mkdirat", Mkdirat),
+ 35: syscalls.Supported("unlinkat", Unlinkat),
+ 36: syscalls.Supported("symlinkat", Symlinkat),
+ 37: syscalls.Supported("linkat", Linkat),
+ 38: syscalls.Supported("renameat", Renameat),
+ 39: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil),
+ 40: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil),
+ 41: syscalls.Error("pivot_root", syserror.EPERM, "", nil),
+ 42: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil),
+ 44: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil),
+ 46: syscalls.Supported("ftruncate", Ftruncate),
+ 47: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil),
+ 48: syscalls.Supported("faccessat", Faccessat),
+ 49: syscalls.Supported("chdir", Chdir),
+ 50: syscalls.Supported("fchdir", Fchdir),
+ 51: syscalls.Supported("chroot", Chroot),
+ 52: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil),
+ 53: syscalls.Supported("fchmodat", Fchmodat),
+ 54: syscalls.Supported("fchownat", Fchownat),
+ 55: syscalls.Supported("fchown", Fchown),
+ 56: syscalls.Supported("openat", Openat),
+ 57: syscalls.Supported("close", Close),
+ 58: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil),
+ 59: syscalls.Supported("pipe2", Pipe2),
+ 60: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations
+ 61: syscalls.Supported("getdents64", Getdents64),
+ 62: syscalls.Supported("lseek", Lseek),
+ 63: syscalls.Supported("read", Read),
+ 64: syscalls.Supported("write", Write),
+ 65: syscalls.Supported("readv", Readv),
+ 66: syscalls.Supported("writev", Writev),
+ 67: syscalls.Supported("pread64", Pread64),
+ 68: syscalls.Supported("pwrite64", Pwrite64),
+ 69: syscalls.Supported("preadv", Preadv),
+ 70: syscalls.Supported("pwritev", Pwritev),
+ 71: syscalls.Supported("sendfile", Sendfile),
+ 72: syscalls.Supported("pselect", Pselect),
+ 73: syscalls.Supported("ppoll", Ppoll),
+ 74: syscalls.ErrorWithEvent("signalfd4", syserror.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426)
+ 75: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 76: syscalls.PartiallySupported("splice", Splice, "Stub implementation.", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 77: syscalls.ErrorWithEvent("tee", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 78: syscalls.Supported("readlinkat", Readlinkat),
+ 80: syscalls.Supported("fstat", Fstat),
+ 81: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil),
+ 82: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil),
+ 83: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil),
+ 84: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil),
+ 85: syscalls.Supported("timerfd_create", TimerfdCreate),
+ 86: syscalls.Supported("timerfd_settime", TimerfdSettime),
+ 87: syscalls.Supported("timerfd_gettime", TimerfdGettime),
+ 88: syscalls.Supported("utimensat", Utimensat),
+ 89: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil),
+ 90: syscalls.Supported("capget", Capget),
+ 91: syscalls.Supported("capset", Capset),
+ 92: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil),
+ 93: syscalls.Supported("exit", Exit),
+ 94: syscalls.Supported("exit_group", ExitGroup),
+ 95: syscalls.Supported("waitid", Waitid),
+ 96: syscalls.Supported("set_tid_address", SetTidAddress),
+ 97: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil),
+ 98: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil),
+ 99: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil),
+ 100: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil),
+ 101: syscalls.Supported("nanosleep", Nanosleep),
+ 102: syscalls.Supported("getitimer", Getitimer),
+ 103: syscalls.Supported("setitimer", Setitimer),
+ 104: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil),
+ 105: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil),
+ 106: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil),
+ 107: syscalls.Supported("timer_create", TimerCreate),
+ 108: syscalls.Supported("timer_gettime", TimerGettime),
+ 109: syscalls.Supported("timer_getoverrun", TimerGetoverrun),
+ 110: syscalls.Supported("timer_settime", TimerSettime),
+ 111: syscalls.Supported("timer_delete", TimerDelete),
+ 112: syscalls.Supported("clock_settime", ClockSettime),
+ 113: syscalls.Supported("clock_gettime", ClockGettime),
+ 114: syscalls.Supported("clock_getres", ClockGetres),
+ 115: syscalls.Supported("clock_nanosleep", ClockNanosleep),
+ 116: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil),
+ 117: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil),
+ 118: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil),
+ 119: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil),
+ 120: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil),
+ 121: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil),
+ 122: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil),
+ 123: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil),
+ 124: syscalls.Supported("sched_yield", SchedYield),
+ 125: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil),
+ 126: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil),
+ 127: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil),
+ 128: syscalls.Supported("restart_syscall", RestartSyscall),
+ 129: syscalls.Supported("kill", Kill),
+ 130: syscalls.Supported("tkill", Tkill),
+ 131: syscalls.Supported("tgkill", Tgkill),
+ 132: syscalls.Supported("sigaltstack", Sigaltstack),
+ 133: syscalls.Supported("rt_sigsuspend", RtSigsuspend),
+ 134: syscalls.Supported("rt_sigaction", RtSigaction),
+ 135: syscalls.Supported("rt_sigprocmask", RtSigprocmask),
+ 136: syscalls.Supported("rt_sigpending", RtSigpending),
+ 137: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait),
+ 138: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo),
+ 139: syscalls.Supported("rt_sigreturn", RtSigreturn),
+ 140: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil),
+ 141: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil),
+ 142: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil),
+ 143: syscalls.Supported("setregid", Setregid),
+ 144: syscalls.Supported("setgid", Setgid),
+ 145: syscalls.Supported("setreuid", Setreuid),
+ 146: syscalls.Supported("setuid", Setuid),
+ 147: syscalls.Supported("setresuid", Setresuid),
+ 148: syscalls.Supported("getresuid", Getresuid),
+ 149: syscalls.Supported("setresgid", Setresgid),
+ 150: syscalls.Supported("getresgid", Getresgid),
+ 151: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 152: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 153: syscalls.Supported("times", Times),
+ 154: syscalls.Supported("setpgid", Setpgid),
+ 155: syscalls.Supported("getpgid", Getpgid),
+ 156: syscalls.Supported("getsid", Getsid),
+ 157: syscalls.Supported("setsid", Setsid),
+ 158: syscalls.Supported("getgroups", Getgroups),
+ 159: syscalls.Supported("setgroups", Setgroups),
+ 160: syscalls.Supported("uname", Uname),
+ 161: syscalls.Supported("sethostname", Sethostname),
+ 162: syscalls.Supported("setdomainname", Setdomainname),
+ 163: syscalls.Supported("getrlimit", Getrlimit),
+ 164: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil),
+ 165: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil),
+ 166: syscalls.Supported("umask", Umask),
+ 167: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil),
+ 168: syscalls.Supported("getcpu", Getcpu),
+ 169: syscalls.Supported("gettimeofday", Gettimeofday),
+ 170: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil),
+ 171: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil),
+ 172: syscalls.Supported("getpid", Getpid),
+ 173: syscalls.Supported("getppid", Getppid),
+ 174: syscalls.Supported("getuid", Getuid),
+ 175: syscalls.Supported("geteuid", Geteuid),
+ 176: syscalls.Supported("getgid", Getgid),
+ 177: syscalls.Supported("getegid", Getegid),
+ 178: syscalls.Supported("gettid", Gettid),
+ 179: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil),
+ 180: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 181: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 182: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 183: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 184: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 185: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 186: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 187: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 188: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 189: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 190: syscalls.Supported("semget", Semget),
+ 191: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil),
+ 192: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920)
+ 193: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil),
+ 194: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil),
+ 195: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil),
+ 196: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil),
+ 197: syscalls.Supported("shmdt", Shmdt),
+ 198: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil),
+ 199: syscalls.Supported("socketpair", SocketPair),
+ 200: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil),
+ 201: syscalls.Supported("listen", Listen),
+ 202: syscalls.Supported("accept", Accept),
+ 203: syscalls.Supported("connect", Connect),
+ 204: syscalls.Supported("getsockname", GetSockName),
+ 205: syscalls.Supported("getpeername", GetPeerName),
+ 206: syscalls.Supported("sendto", SendTo),
+ 207: syscalls.Supported("recvfrom", RecvFrom),
+ 208: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil),
+ 209: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil),
+ 210: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil),
+ 211: syscalls.Supported("sendmsg", SendMsg),
+ 212: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil),
+ 213: syscalls.ErrorWithEvent("readahead", syserror.ENOSYS, "", []string{"gvisor.dev/issue/261"}), // TODO(b/29351341)
+ 214: syscalls.Supported("brk", Brk),
+ 215: syscalls.Supported("munmap", Munmap),
+ 216: syscalls.Supported("mremap", Mremap),
+ 217: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil),
+ 218: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil),
+ 219: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil),
+ 220: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil),
+ 221: syscalls.Supported("execve", Execve),
+ 224: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil),
+ 225: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil),
+ 226: syscalls.Supported("mprotect", Mprotect),
+ 227: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil),
+ 228: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 229: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 230: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 231: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 232: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil),
+ 233: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil),
+ 234: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil),
+ 235: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}),
+ 236: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil),
+ 237: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil),
+ 238: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil),
+ 239: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly)
+ 240: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo),
+ 241: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil),
+ 242: syscalls.Supported("accept4", Accept4),
+ 243: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil),
+ 260: syscalls.Supported("wait4", Wait4),
+ 261: syscalls.Supported("prlimit64", Prlimit64),
+ 262: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 263: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 264: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
+ 265: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil),
+ 266: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil),
+ 267: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil),
+ 268: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995)
+ 269: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil),
+ 270: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 271: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 272: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil),
+ 273: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil),
+ 274: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 275: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 276: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
+ 277: syscalls.Supported("seccomp", Seccomp),
+ 278: syscalls.Supported("getrandom", GetRandom),
+ 279: syscalls.Supported("memfd_create", MemfdCreate),
+ 280: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
+ 281: syscalls.ErrorWithEvent("execveat", syserror.ENOSYS, "", []string{"gvisor.dev/issue/265"}), // TODO(b/118901836)
+ 282: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
+ 283: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(b/118904897)
+ 284: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
+ 285: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil),
+ 286: syscalls.Supported("preadv2", Preadv2),
+ 287: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil),
+ 291: syscalls.Supported("statx", Statx),
+ },
+ Emulate: map[usermem.Addr]uintptr{},
+
+ Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
+ t.Kernel().EmitUnimplementedEvent(t)
+ return 0, syserror.ENOSYS
+ },
+}
diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go
index 3ab54271c..cd31e0649 100644
--- a/pkg/sentry/syscalls/linux/sys_read.go
+++ b/pkg/sentry/syscalls/linux/sys_read.go
@@ -72,6 +72,39 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "read", file)
}
+// Readahead implements readahead(2).
+func Readahead(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ offset := args[1].Int64()
+ size := args[2].SizeT()
+
+ file := t.GetFile(fd)
+ if file == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer file.DecRef()
+
+ // Check that the file is readable.
+ if !file.Flags().Read {
+ return 0, nil, syserror.EBADF
+ }
+
+ // Check that the size is valid.
+ if int(size) < 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Check that the offset is legitimate.
+ if offset < 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Return EINVAL; if the underlying file type does not support readahead,
+ // then Linux will return EINVAL to indicate as much. In the future, we
+ // may extend this function to actually support readahead hints.
+ return 0, nil, syserror.EINVAL
+}
+
// Pread64 implements linux syscall pread64(2).
func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
fd := args[0].Int()
diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go
index 3bac4d90d..b5a72ce63 100644
--- a/pkg/sentry/syscalls/linux/sys_socket.go
+++ b/pkg/sentry/syscalls/linux/sys_socket.go
@@ -531,7 +531,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
return 0, nil, syserror.ENOTSOCK
}
- if optLen <= 0 {
+ if optLen < 0 {
return 0, nil, syserror.EINVAL
}
if optLen > maxOptLen {
diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go
index f0a292f2f..dd3a5807f 100644
--- a/pkg/sentry/syscalls/linux/sys_splice.go
+++ b/pkg/sentry/syscalls/linux/sys_splice.go
@@ -159,9 +159,14 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}, outFile.Flags().NonBlocking)
}
+ // Sendfile can't lose any data because inFD is always a regual file.
+ if n != 0 {
+ err = nil
+ }
+
// We can only pass a single file to handleIOError, so pick inFile
// arbitrarily. This is used only for debugging purposes.
- return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "sendfile", inFile)
+ return uintptr(n), nil, handleIOError(t, false, err, kernel.ERESTARTSYS, "sendfile", inFile)
}
// Splice implements splice(2).
@@ -245,12 +250,12 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
if inOffset != 0 || outOffset != 0 {
return 0, nil, syserror.ESPIPE
}
- default:
- return 0, nil, syserror.EINVAL
- }
- // We may not refer to the same pipe; otherwise it's a continuous loop.
- if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID {
+ // We may not refer to the same pipe; otherwise it's a continuous loop.
+ if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID {
+ return 0, nil, syserror.EINVAL
+ }
+ default:
return 0, nil, syserror.EINVAL
}
@@ -305,6 +310,11 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
Dup: true,
}, nonBlock)
+ // Tee doesn't change a state of inFD, so it can't lose any data.
+ if n != 0 {
+ err = nil
+ }
+
// See above; inFile is chosen arbitrarily here.
- return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "tee", inFile)
+ return uintptr(n), nil, handleIOError(t, false, err, kernel.ERESTARTSYS, "tee", inFile)
}
diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go
index 4b3f043a2..b887fa9d7 100644
--- a/pkg/sentry/syscalls/linux/sys_time.go
+++ b/pkg/sentry/syscalls/linux/sys_time.go
@@ -15,6 +15,7 @@
package linux
import (
+ "fmt"
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -228,41 +229,35 @@ func clockNanosleepFor(t *kernel.Task, c ktime.Clock, dur time.Duration, rem use
timer.Destroy()
- var remaining time.Duration
- // Did we just block for the entire duration?
- if err == syserror.ETIMEDOUT {
- remaining = 0
- } else {
- remaining = dur - after.Sub(start)
+ switch err {
+ case syserror.ETIMEDOUT:
+ // Slept for entire timeout.
+ return nil
+ case syserror.ErrInterrupted:
+ // Interrupted.
+ remaining := dur - after.Sub(start)
if remaining < 0 {
remaining = time.Duration(0)
}
- }
- // Copy out remaining time.
- if err != nil && rem != usermem.Addr(0) {
- timeleft := linux.NsecToTimespec(remaining.Nanoseconds())
- if err := copyTimespecOut(t, rem, &timeleft); err != nil {
- return err
+ // Copy out remaining time.
+ if rem != 0 {
+ timeleft := linux.NsecToTimespec(remaining.Nanoseconds())
+ if err := copyTimespecOut(t, rem, &timeleft); err != nil {
+ return err
+ }
}
- }
-
- // Did we just block for the entire duration?
- if err == syserror.ETIMEDOUT {
- return nil
- }
- // If interrupted, arrange for a restart with the remaining duration.
- if err == syserror.ErrInterrupted {
+ // Arrange for a restart with the remaining duration.
t.SetSyscallRestartBlock(&clockNanosleepRestartBlock{
c: c,
duration: remaining,
rem: rem,
})
return kernel.ERESTART_RESTARTBLOCK
+ default:
+ panic(fmt.Sprintf("Impossible BlockWithTimer error %v", err))
}
-
- return err
}
// Nanosleep implements linux syscall Nanosleep(2).
diff --git a/pkg/sentry/syscalls/linux/sys_utsname.go b/pkg/sentry/syscalls/linux/sys_utsname.go
index 271ace08e..748e8dd8d 100644
--- a/pkg/sentry/syscalls/linux/sys_utsname.go
+++ b/pkg/sentry/syscalls/linux/sys_utsname.go
@@ -79,11 +79,11 @@ func Sethostname(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
return 0, nil, syserror.EINVAL
}
- name, err := t.CopyInString(nameAddr, int(size))
- if err != nil {
+ name := make([]byte, size)
+ if _, err := t.CopyInBytes(nameAddr, name); err != nil {
return 0, nil, err
}
- utsns.SetHostName(name)
+ utsns.SetHostName(string(name))
return 0, nil, nil
}
diff --git a/pkg/sentry/usermem/usermem.go b/pkg/sentry/usermem/usermem.go
index 6eced660a..7b1f312b1 100644
--- a/pkg/sentry/usermem/usermem.go
+++ b/pkg/sentry/usermem/usermem.go
@@ -16,6 +16,7 @@
package usermem
import (
+ "bytes"
"errors"
"io"
"strconv"
@@ -270,11 +271,10 @@ func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpt
n, err := uio.CopyIn(ctx, addr, buf[done:done+readlen], opts)
// Look for the terminating zero byte, which may have occurred before
// hitting err.
- for i, c := range buf[done : done+n] {
- if c == 0 {
- return stringFromImmutableBytes(buf[:done+i]), nil
- }
+ if i := bytes.IndexByte(buf[done:done+n], byte(0)); i >= 0 {
+ return stringFromImmutableBytes(buf[:done+i]), nil
}
+
done += n
if err != nil {
return stringFromImmutableBytes(buf[:done]), err
diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go
index 187e5410c..3aa73d911 100644
--- a/pkg/sentry/vfs/options.go
+++ b/pkg/sentry/vfs/options.go
@@ -31,14 +31,14 @@ type GetDentryOptions struct {
// FilesystemImpl.MkdirAt().
type MkdirOptions struct {
// Mode is the file mode bits for the created directory.
- Mode uint16
+ Mode linux.FileMode
}
// MknodOptions contains options to VirtualFilesystem.MknodAt() and
// FilesystemImpl.MknodAt().
type MknodOptions struct {
// Mode is the file type and mode bits for the created file.
- Mode uint16
+ Mode linux.FileMode
// If Mode specifies a character or block device special file, DevMajor and
// DevMinor are the major and minor device numbers for the created device.
@@ -61,7 +61,7 @@ type OpenOptions struct {
// If FilesystemImpl.OpenAt() creates a file, Mode is the file mode for the
// created file.
- Mode uint16
+ Mode linux.FileMode
}
// ReadOptions contains options to FileDescription.PRead(),