diff options
Diffstat (limited to 'pkg')
32 files changed, 1122 insertions, 1058 deletions
diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go index 529980267..343f81f59 100644 --- a/pkg/sentry/arch/arch_aarch64.go +++ b/pkg/sentry/arch/arch_aarch64.go @@ -274,7 +274,7 @@ const ( func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, error) { switch regset { case _NT_PRSTATUS: - if maxlen < ptraceRegsSize { + if maxlen < registersSize { return 0, syserror.EFAULT } return s.PtraceGetRegs(dst) @@ -287,7 +287,7 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, error) { switch regset { case _NT_PRSTATUS: - if maxlen < ptraceRegsSize { + if maxlen < registersSize { return 0, syserror.EFAULT } return s.PtraceSetRegs(src) diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go index 465b47da9..91617267d 100644 --- a/pkg/sentry/fs/proc/meminfo.go +++ b/pkg/sentry/fs/proc/meminfo.go @@ -58,12 +58,16 @@ func (d *meminfoData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) var buf bytes.Buffer fmt.Fprintf(&buf, "MemTotal: %8d kB\n", totalSize/1024) - memFree := (totalSize - totalUsage) / 1024 + memFree := totalSize - totalUsage + if memFree > totalSize { + // Underflow. + memFree = 0 + } // We use MemFree as MemAvailable because we don't swap. // TODO(rahat): When reclaim is implemented the value of MemAvailable // should change. - fmt.Fprintf(&buf, "MemFree: %8d kB\n", memFree) - fmt.Fprintf(&buf, "MemAvailable: %8d kB\n", memFree) + fmt.Fprintf(&buf, "MemFree: %8d kB\n", memFree/1024) + fmt.Fprintf(&buf, "MemAvailable: %8d kB\n", memFree/1024) fmt.Fprintf(&buf, "Buffers: 0 kB\n") // memory usage by block devices fmt.Fprintf(&buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) // Emulate a system with no swap, which disables inactivation of anon pages. diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 4a8411371..4a32821bd 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -686,6 +686,8 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error { creds := rp.Credentials() _, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + // If the gofer does not allow creating a socket or pipe, create a + // synthetic one, i.e. one that is kept entirely in memory. if err == syserror.EPERM { switch opts.Mode.FileType() { case linux.S_IFSOCK: diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 1d9caf127..9ab8fdc65 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -496,10 +496,8 @@ type dentry struct { // file is the unopened p9.File that backs this dentry. file is immutable. // // If file.isNil(), this dentry represents a synthetic file, i.e. a file - // that does not exist on the remote filesystem. As of this writing, this - // is only possible for a directory created with - // MkdirOptions.ForSyntheticMountpoint == true. - // TODO(gvisor.dev/issue/1476): Support synthetic sockets (and pipes). + // that does not exist on the remote filesystem. As of this writing, the + // only files that can be synthetic are sockets, pipes, and directories. file p9file // If deleted is non-zero, the file represented by this dentry has been diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index 92007df81..e5f13b69e 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -272,12 +272,16 @@ func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { inactiveFile := file - activeFile fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024) - memFree := (totalSize - totalUsage) / 1024 + memFree := totalSize - totalUsage + if memFree > totalSize { + // Underflow. + memFree = 0 + } // We use MemFree as MemAvailable because we don't swap. // TODO(rahat): When reclaim is implemented the value of MemAvailable // should change. - fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree) - fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree) + fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree/1024) + fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree/1024) fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) // Emulate a system with no swap, which disables inactivation of anon pages. diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go index 3f085d3ca..239a9f4b4 100644 --- a/pkg/sentry/fsimpl/sockfs/sockfs.go +++ b/pkg/sentry/fsimpl/sockfs/sockfs.go @@ -53,9 +53,7 @@ func NewFilesystem(vfsObj *vfs.VirtualFilesystem) *vfs.Filesystem { // inode implements kernfs.Inode. // -// TODO(gvisor.dev/issue/1476): Add device numbers to this inode (which are -// not included in InodeAttrs) to store the numbers of the appropriate -// socket device. Override InodeAttrs.Stat() accordingly. +// TODO(gvisor.dev/issue/1193): Device numbers. type inode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink @@ -69,11 +67,6 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr } // NewDentry constructs and returns a sockfs dentry. -// -// TODO(gvisor.dev/issue/1476): Currently, we are using -// sockfs.filesystem.NextIno() to get inode numbers. We should use -// device-specific numbers, so that we are not using the same generator for -// netstack, unix, etc. func NewDentry(creds *auth.Credentials, ino uint64) *vfs.Dentry { // File mode matches net/socket.c:sock_alloc. filemode := linux.FileMode(linux.S_IFSOCK | 0600) diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go index 84156d5a1..413111faf 100644 --- a/pkg/sentry/kernel/syscalls.go +++ b/pkg/sentry/kernel/syscalls.go @@ -29,7 +29,7 @@ import ( // // The types below create fast lookup slices for all syscalls. This maximum // serves as a sanity check that we don't allocate huge slices for a very large -// syscall. +// syscall. This is checked during registration. const maxSyscallNum = 2000 // SyscallSupportLevel is a syscall support levels. @@ -266,6 +266,16 @@ type SyscallTable struct { FeatureEnable SyscallFlagsTable } +// MaxSysno returns the largest system call number. +func (s *SyscallTable) MaxSysno() (max uintptr) { + for num := range s.Table { + if num > max { + max = num + } + } + return max +} + // allSyscallTables contains all known tables. var allSyscallTables []*SyscallTable @@ -286,6 +296,20 @@ func LookupSyscallTable(os abi.OS, a arch.Arch) (*SyscallTable, bool) { // RegisterSyscallTable registers a new syscall table for use by a Kernel. func RegisterSyscallTable(s *SyscallTable) { + if max := s.MaxSysno(); max > maxSyscallNum { + panic(fmt.Sprintf("SyscallTable %+v contains too large syscall number %d", s, max)) + } + if _, ok := LookupSyscallTable(s.OS, s.Arch); ok { + panic(fmt.Sprintf("Duplicate SyscallTable registered for OS %v Arch %v", s.OS, s.Arch)) + } + allSyscallTables = append(allSyscallTables, s) + s.Init() +} + +// Init initializes the system call table. +// +// This should normally be called only during registration. +func (s *SyscallTable) Init() { if s.Table == nil { // Ensure non-nil lookup table. s.Table = make(map[uintptr]Syscall) @@ -295,42 +319,16 @@ func RegisterSyscallTable(s *SyscallTable) { s.Emulate = make(map[usermem.Addr]uintptr) } - var max uintptr - for num := range s.Table { - if num > max { - max = num - } - } - - if max > maxSyscallNum { - panic(fmt.Sprintf("SyscallTable %+v contains too large syscall number %d", s, max)) - } - - s.lookup = make([]SyscallFn, max+1) + max := s.MaxSysno() // Checked during RegisterSyscallTable. // Initialize the fast-lookup table. + s.lookup = make([]SyscallFn, max+1) for num, sc := range s.Table { s.lookup[num] = sc.Fn } + // Initialize all features. s.FeatureEnable.init(s.Table, max) - - if _, ok := LookupSyscallTable(s.OS, s.Arch); ok { - panic(fmt.Sprintf("Duplicate SyscallTable registered for OS %v Arch %v", s.OS, s.Arch)) - } - - // Save a reference to this table. - // - // This is required for a Kernel to find the table and for save/restore - // operations below. - allSyscallTables = append(allSyscallTables, s) -} - -// FlushSyscallTablesTestOnly flushes the syscall tables for tests. Used for -// parameterized VFSv2 tests. -// TODO(gvisor.dv/issue/1624): Remove when VFS1 is no longer supported. -func FlushSyscallTablesTestOnly() { - allSyscallTables = nil } // Lookup returns the syscall implementation, if one exists. diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index 577e9306a..2b11ea4ae 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -180,6 +180,11 @@ type MemoryFileOpts struct { // notifications to determine when eviction is necessary. This option has // no effect unless DelayedEviction is DelayedEvictionEnabled. UseHostMemcgPressure bool + + // If ManualZeroing is true, MemoryFile must not assume that new pages + // obtained from the host are zero-filled, such that MemoryFile must manually + // zero newly-allocated pages. + ManualZeroing bool } // DelayedEvictionType is the type of MemoryFileOpts.DelayedEviction. @@ -432,6 +437,15 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi // Mark selected pages as in use. fr := platform.FileRange{start, end} + if f.opts.ManualZeroing { + if err := f.forEachMappingSlice(fr, func(bs []byte) { + for i := range bs { + bs[i] = 0 + } + }); err != nil { + return platform.FileRange{}, err + } + } if !f.usage.Add(fr, usageInfo{ kind: kind, refs: 1, diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go index b03ca2f26..a8278bffc 100644 --- a/pkg/sentry/socket/hostinet/socket_vfs2.go +++ b/pkg/sentry/socket/hostinet/socket_vfs2.go @@ -36,8 +36,11 @@ import ( type socketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl - // TODO(gvisor.dev/issue/1484): VFS1 stores internal metadata for hostinet. - // We should perhaps rely on the host, much like in hostfs. + + // We store metadata for hostinet sockets internally. Technically, we should + // access metadata (e.g. through stat, chmod) on the host for correctness, + // but this is not very useful for inet socket fds, which do not belong to a + // concrete file anyway. vfs.DentryMetadataFileDescriptionImpl socketOpsCommon diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index eec71035d..f7d9b2ff4 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" @@ -41,6 +42,8 @@ type SocketVFS2 struct { socketOpsCommon } +var _ = socket.SocketVFS2(&SocketVFS2{}) + // NewVFS2 creates a new endpoint socket. func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, queue *waiter.Queue, endpoint tcpip.Endpoint) (*vfs.FileDescription, *syserr.Error) { if skType == linux.SOCK_STREAM { diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 5edc3cdf4..06d838868 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/control" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" @@ -43,6 +44,8 @@ type SocketVFS2 struct { socketOpsCommon } +var _ = socket.SocketVFS2(&SocketVFS2{}) + // NewSockfsFile creates a new socket file in the global sockfs mount and // returns a corresponding file description. func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) { diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 0d24fd3c4..245e8fe1e 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -8,8 +8,6 @@ go_library( "error.go", "flags.go", "linux64.go", - "linux64_amd64.go", - "linux64_arm64.go", "sigset.go", "sys_aio.go", "sys_capability.go", diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 68589a377..ea4f9b1a7 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -15,6 +15,16 @@ // Package linux provides syscall tables for amd64 Linux. package linux +import ( + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/syscalls" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + const ( // LinuxSysname is the OS name advertised by gVisor. LinuxSysname = "Linux" @@ -25,3 +35,702 @@ const ( // LinuxVersion is the version info advertised by gVisor. LinuxVersion = "#1 SMP Sun Jan 10 15:06:54 PST 2016" ) + +// AMD64 is a table of Linux amd64 syscall API with the corresponding syscall +// numbers from Linux 4.4. +var AMD64 = &kernel.SyscallTable{ + OS: abi.Linux, + Arch: arch.AMD64, + Version: kernel.Version{ + // Version 4.4 is chosen as a stable, longterm version of Linux, which + // guides the interface provided by this syscall table. The build + // version is that for a clean build with default kernel config, at 5 + // minutes after v4.4 was tagged. + Sysname: LinuxSysname, + Release: LinuxRelease, + Version: LinuxVersion, + }, + AuditNumber: linux.AUDIT_ARCH_X86_64, + Table: map[uintptr]kernel.Syscall{ + 0: syscalls.Supported("read", Read), + 1: syscalls.Supported("write", Write), + 2: syscalls.PartiallySupported("open", Open, "Options O_DIRECT, O_NOATIME, O_PATH, O_TMPFILE, O_SYNC are not supported.", nil), + 3: syscalls.Supported("close", Close), + 4: syscalls.Supported("stat", Stat), + 5: syscalls.Supported("fstat", Fstat), + 6: syscalls.Supported("lstat", Lstat), + 7: syscalls.Supported("poll", Poll), + 8: syscalls.Supported("lseek", Lseek), + 9: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil), + 10: syscalls.Supported("mprotect", Mprotect), + 11: syscalls.Supported("munmap", Munmap), + 12: syscalls.Supported("brk", Brk), + 13: syscalls.Supported("rt_sigaction", RtSigaction), + 14: syscalls.Supported("rt_sigprocmask", RtSigprocmask), + 15: syscalls.Supported("rt_sigreturn", RtSigreturn), + 16: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil), + 17: syscalls.Supported("pread64", Pread64), + 18: syscalls.Supported("pwrite64", Pwrite64), + 19: syscalls.Supported("readv", Readv), + 20: syscalls.Supported("writev", Writev), + 21: syscalls.Supported("access", Access), + 22: syscalls.Supported("pipe", Pipe), + 23: syscalls.Supported("select", Select), + 24: syscalls.Supported("sched_yield", SchedYield), + 25: syscalls.Supported("mremap", Mremap), + 26: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil), + 27: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil), + 28: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil), + 29: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil), + 30: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil), + 31: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil), + 32: syscalls.Supported("dup", Dup), + 33: syscalls.Supported("dup2", Dup2), + 34: syscalls.Supported("pause", Pause), + 35: syscalls.Supported("nanosleep", Nanosleep), + 36: syscalls.Supported("getitimer", Getitimer), + 37: syscalls.Supported("alarm", Alarm), + 38: syscalls.Supported("setitimer", Setitimer), + 39: syscalls.Supported("getpid", Getpid), + 40: syscalls.Supported("sendfile", Sendfile), + 41: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil), + 42: syscalls.Supported("connect", Connect), + 43: syscalls.Supported("accept", Accept), + 44: syscalls.Supported("sendto", SendTo), + 45: syscalls.Supported("recvfrom", RecvFrom), + 46: syscalls.Supported("sendmsg", SendMsg), + 47: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil), + 48: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil), + 49: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil), + 50: syscalls.Supported("listen", Listen), + 51: syscalls.Supported("getsockname", GetSockName), + 52: syscalls.Supported("getpeername", GetPeerName), + 53: syscalls.Supported("socketpair", SocketPair), + 54: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil), + 55: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil), + 56: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil), + 57: syscalls.Supported("fork", Fork), + 58: syscalls.Supported("vfork", Vfork), + 59: syscalls.Supported("execve", Execve), + 60: syscalls.Supported("exit", Exit), + 61: syscalls.Supported("wait4", Wait4), + 62: syscalls.Supported("kill", Kill), + 63: syscalls.Supported("uname", Uname), + 64: syscalls.Supported("semget", Semget), + 65: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil), + 66: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil), + 67: syscalls.Supported("shmdt", Shmdt), + 68: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 69: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 70: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 71: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil), + 73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil), + 74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil), + 75: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil), + 76: syscalls.Supported("truncate", Truncate), + 77: syscalls.Supported("ftruncate", Ftruncate), + 78: syscalls.Supported("getdents", Getdents), + 79: syscalls.Supported("getcwd", Getcwd), + 80: syscalls.Supported("chdir", Chdir), + 81: syscalls.Supported("fchdir", Fchdir), + 82: syscalls.Supported("rename", Rename), + 83: syscalls.Supported("mkdir", Mkdir), + 84: syscalls.Supported("rmdir", Rmdir), + 85: syscalls.Supported("creat", Creat), + 86: syscalls.Supported("link", Link), + 87: syscalls.Supported("unlink", Unlink), + 88: syscalls.Supported("symlink", Symlink), + 89: syscalls.Supported("readlink", Readlink), + 90: syscalls.Supported("chmod", Chmod), + 91: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil), + 92: syscalls.Supported("chown", Chown), + 93: syscalls.Supported("fchown", Fchown), + 94: syscalls.Supported("lchown", Lchown), + 95: syscalls.Supported("umask", Umask), + 96: syscalls.Supported("gettimeofday", Gettimeofday), + 97: syscalls.Supported("getrlimit", Getrlimit), + 98: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil), + 99: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil), + 100: syscalls.Supported("times", Times), + 101: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil), + 102: syscalls.Supported("getuid", Getuid), + 103: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil), + 104: syscalls.Supported("getgid", Getgid), + 105: syscalls.Supported("setuid", Setuid), + 106: syscalls.Supported("setgid", Setgid), + 107: syscalls.Supported("geteuid", Geteuid), + 108: syscalls.Supported("getegid", Getegid), + 109: syscalls.Supported("setpgid", Setpgid), + 110: syscalls.Supported("getppid", Getppid), + 111: syscalls.Supported("getpgrp", Getpgrp), + 112: syscalls.Supported("setsid", Setsid), + 113: syscalls.Supported("setreuid", Setreuid), + 114: syscalls.Supported("setregid", Setregid), + 115: syscalls.Supported("getgroups", Getgroups), + 116: syscalls.Supported("setgroups", Setgroups), + 117: syscalls.Supported("setresuid", Setresuid), + 118: syscalls.Supported("getresuid", Getresuid), + 119: syscalls.Supported("setresgid", Setresgid), + 120: syscalls.Supported("getresgid", Getresgid), + 121: syscalls.Supported("getpgid", Getpgid), + 122: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) + 123: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) + 124: syscalls.Supported("getsid", Getsid), + 125: syscalls.Supported("capget", Capget), + 126: syscalls.Supported("capset", Capset), + 127: syscalls.Supported("rt_sigpending", RtSigpending), + 128: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait), + 129: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo), + 130: syscalls.Supported("rt_sigsuspend", RtSigsuspend), + 131: syscalls.Supported("sigaltstack", Sigaltstack), + 132: syscalls.Supported("utime", Utime), + 133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil), + 134: syscalls.Error("uselib", syserror.ENOSYS, "Obsolete", nil), + 135: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil), + 136: syscalls.ErrorWithEvent("ustat", syserror.ENOSYS, "Needs filesystem support.", nil), + 137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), + 138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), + 139: syscalls.ErrorWithEvent("sysfs", syserror.ENOSYS, "", []string{"gvisor.dev/issue/165"}), + 140: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil), + 141: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil), + 142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil), + 143: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil), + 144: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil), + 145: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil), + 146: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil), + 147: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil), + 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil), + 149: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 150: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 151: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 152: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 153: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil), + 154: syscalls.Error("modify_ldt", syserror.EPERM, "", nil), + 155: syscalls.Error("pivot_root", syserror.EPERM, "", nil), + 156: syscalls.Error("sysctl", syserror.EPERM, "Deprecated. Use /proc/sys instead.", nil), + 157: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil), + 158: syscalls.PartiallySupported("arch_prctl", ArchPrctl, "Options ARCH_GET_GS, ARCH_SET_GS not supported.", nil), + 159: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil), + 160: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil), + 161: syscalls.Supported("chroot", Chroot), + 162: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil), + 163: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil), + 164: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil), + 165: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil), + 166: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil), + 167: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil), + 168: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil), + 169: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil), + 170: syscalls.Supported("sethostname", Sethostname), + 171: syscalls.Supported("setdomainname", Setdomainname), + 172: syscalls.CapError("iopl", linux.CAP_SYS_RAWIO, "", nil), + 173: syscalls.CapError("ioperm", linux.CAP_SYS_RAWIO, "", nil), + 174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil), + 175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil), + 176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil), + 177: syscalls.Error("get_kernel_syms", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil), + 178: syscalls.Error("query_module", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil), + 179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations + 180: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil), + 181: syscalls.Error("getpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil), + 182: syscalls.Error("putpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil), + 183: syscalls.Error("afs_syscall", syserror.ENOSYS, "Not implemented in Linux.", nil), + 184: syscalls.Error("tuxcall", syserror.ENOSYS, "Not implemented in Linux.", nil), + 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), + 186: syscalls.Supported("gettid", Gettid), + 187: syscalls.Supported("readahead", Readahead), + 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), + 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), + 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), + 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), + 194: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), + 195: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), + 196: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), + 197: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), + 198: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), + 199: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), + 200: syscalls.Supported("tkill", Tkill), + 201: syscalls.Supported("time", Time), + 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), + 203: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil), + 204: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil), + 205: syscalls.Error("set_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), + 206: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 207: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 208: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 209: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 210: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 211: syscalls.Error("get_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), + 212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), + 213: syscalls.Supported("epoll_create", EpollCreate), + 214: syscalls.ErrorWithEvent("epoll_ctl_old", syserror.ENOSYS, "Deprecated.", nil), + 215: syscalls.ErrorWithEvent("epoll_wait_old", syserror.ENOSYS, "Deprecated.", nil), + 216: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil), + 217: syscalls.Supported("getdents64", Getdents64), + 218: syscalls.Supported("set_tid_address", SetTidAddress), + 219: syscalls.Supported("restart_syscall", RestartSyscall), + 220: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), + 221: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil), + 222: syscalls.Supported("timer_create", TimerCreate), + 223: syscalls.Supported("timer_settime", TimerSettime), + 224: syscalls.Supported("timer_gettime", TimerGettime), + 225: syscalls.Supported("timer_getoverrun", TimerGetoverrun), + 226: syscalls.Supported("timer_delete", TimerDelete), + 227: syscalls.Supported("clock_settime", ClockSettime), + 228: syscalls.Supported("clock_gettime", ClockGettime), + 229: syscalls.Supported("clock_getres", ClockGetres), + 230: syscalls.Supported("clock_nanosleep", ClockNanosleep), + 231: syscalls.Supported("exit_group", ExitGroup), + 232: syscalls.Supported("epoll_wait", EpollWait), + 233: syscalls.Supported("epoll_ctl", EpollCtl), + 234: syscalls.Supported("tgkill", Tgkill), + 235: syscalls.Supported("utimes", Utimes), + 236: syscalls.Error("vserver", syserror.ENOSYS, "Not implemented by Linux", nil), + 237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}), + 238: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil), + 239: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil), + 240: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 241: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 242: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 243: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 244: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 245: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil), + 247: syscalls.Supported("waitid", Waitid), + 248: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil), + 249: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil), + 250: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil), + 251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) + 252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) + 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil), + 254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil), + 255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil), + 256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil), + 257: syscalls.Supported("openat", Openat), + 258: syscalls.Supported("mkdirat", Mkdirat), + 259: syscalls.Supported("mknodat", Mknodat), + 260: syscalls.Supported("fchownat", Fchownat), + 261: syscalls.Supported("futimesat", Futimesat), + 262: syscalls.Supported("fstatat", Fstatat), + 263: syscalls.Supported("unlinkat", Unlinkat), + 264: syscalls.Supported("renameat", Renameat), + 265: syscalls.Supported("linkat", Linkat), + 266: syscalls.Supported("symlinkat", Symlinkat), + 267: syscalls.Supported("readlinkat", Readlinkat), + 268: syscalls.Supported("fchmodat", Fchmodat), + 269: syscalls.Supported("faccessat", Faccessat), + 270: syscalls.Supported("pselect", Pselect), + 271: syscalls.Supported("ppoll", Ppoll), + 272: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil), + 273: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil), + 274: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil), + 275: syscalls.Supported("splice", Splice), + 276: syscalls.Supported("tee", Tee), + 277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil), + 278: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly) + 280: syscalls.Supported("utimensat", Utimensat), + 281: syscalls.Supported("epoll_pwait", EpollPwait), + 282: syscalls.PartiallySupported("signalfd", Signalfd, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}), + 283: syscalls.Supported("timerfd_create", TimerfdCreate), + 284: syscalls.Supported("eventfd", Eventfd), + 285: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil), + 286: syscalls.Supported("timerfd_settime", TimerfdSettime), + 287: syscalls.Supported("timerfd_gettime", TimerfdGettime), + 288: syscalls.Supported("accept4", Accept4), + 289: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}), + 290: syscalls.Supported("eventfd2", Eventfd2), + 291: syscalls.Supported("epoll_create1", EpollCreate1), + 292: syscalls.Supported("dup3", Dup3), + 293: syscalls.Supported("pipe2", Pipe2), + 294: syscalls.Supported("inotify_init1", InotifyInit1), + 295: syscalls.Supported("preadv", Preadv), + 296: syscalls.Supported("pwritev", Pwritev), + 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), + 298: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil), + 299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), + 300: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), + 301: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), + 302: syscalls.Supported("prlimit64", Prlimit64), + 303: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 304: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 305: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil), + 306: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil), + 307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), + 308: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) + 309: syscalls.Supported("getcpu", Getcpu), + 310: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), + 311: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), + 312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil), + 313: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil), + 314: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) + 315: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) + 316: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) + 317: syscalls.Supported("seccomp", Seccomp), + 318: syscalls.Supported("getrandom", GetRandom), + 319: syscalls.Supported("memfd_create", MemfdCreate), + 320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil), + 321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), + 322: syscalls.Supported("execveat", Execveat), + 323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) + 324: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(gvisor.dev/issue/267) + 325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + + // Syscalls implemented after 325 are "backports" from versions + // of Linux after 4.4. + 326: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil), + 327: syscalls.Supported("preadv2", Preadv2), + 328: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil), + 329: syscalls.ErrorWithEvent("pkey_mprotect", syserror.ENOSYS, "", nil), + 330: syscalls.ErrorWithEvent("pkey_alloc", syserror.ENOSYS, "", nil), + 331: syscalls.ErrorWithEvent("pkey_free", syserror.ENOSYS, "", nil), + 332: syscalls.Supported("statx", Statx), + 333: syscalls.ErrorWithEvent("io_pgetevents", syserror.ENOSYS, "", nil), + 334: syscalls.PartiallySupported("rseq", RSeq, "Not supported on all platforms.", nil), + + // Linux skips ahead to syscall 424 to sync numbers between arches. + 424: syscalls.ErrorWithEvent("pidfd_send_signal", syserror.ENOSYS, "", nil), + 425: syscalls.ErrorWithEvent("io_uring_setup", syserror.ENOSYS, "", nil), + 426: syscalls.ErrorWithEvent("io_uring_enter", syserror.ENOSYS, "", nil), + 427: syscalls.ErrorWithEvent("io_uring_register", syserror.ENOSYS, "", nil), + 428: syscalls.ErrorWithEvent("open_tree", syserror.ENOSYS, "", nil), + 429: syscalls.ErrorWithEvent("move_mount", syserror.ENOSYS, "", nil), + 430: syscalls.ErrorWithEvent("fsopen", syserror.ENOSYS, "", nil), + 431: syscalls.ErrorWithEvent("fsconfig", syserror.ENOSYS, "", nil), + 432: syscalls.ErrorWithEvent("fsmount", syserror.ENOSYS, "", nil), + 433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil), + 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil), + 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil), + }, + Emulate: map[usermem.Addr]uintptr{ + 0xffffffffff600000: 96, // vsyscall gettimeofday(2) + 0xffffffffff600400: 201, // vsyscall time(2) + 0xffffffffff600800: 309, // vsyscall getcpu(2) + }, + Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { + t.Kernel().EmitUnimplementedEvent(t) + return 0, syserror.ENOSYS + }, +} + +// ARM64 is a table of Linux arm64 syscall API with the corresponding syscall +// numbers from Linux 4.4. +var ARM64 = &kernel.SyscallTable{ + OS: abi.Linux, + Arch: arch.ARM64, + Version: kernel.Version{ + Sysname: LinuxSysname, + Release: LinuxRelease, + Version: LinuxVersion, + }, + AuditNumber: linux.AUDIT_ARCH_AARCH64, + Table: map[uintptr]kernel.Syscall{ + 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), + 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), + 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), + 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), + 11: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), + 12: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), + 13: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), + 14: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), + 15: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), + 16: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), + 17: syscalls.Supported("getcwd", Getcwd), + 18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), + 19: syscalls.Supported("eventfd2", Eventfd2), + 20: syscalls.Supported("epoll_create1", EpollCreate1), + 21: syscalls.Supported("epoll_ctl", EpollCtl), + 22: syscalls.Supported("epoll_pwait", EpollPwait), + 23: syscalls.Supported("dup", Dup), + 24: syscalls.Supported("dup3", Dup3), + 25: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil), + 26: syscalls.Supported("inotify_init1", InotifyInit1), + 27: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil), + 28: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil), + 29: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil), + 30: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) + 31: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) + 32: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil), + 33: syscalls.Supported("mknodat", Mknodat), + 34: syscalls.Supported("mkdirat", Mkdirat), + 35: syscalls.Supported("unlinkat", Unlinkat), + 36: syscalls.Supported("symlinkat", Symlinkat), + 37: syscalls.Supported("linkat", Linkat), + 38: syscalls.Supported("renameat", Renameat), + 39: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil), + 40: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil), + 41: syscalls.Error("pivot_root", syserror.EPERM, "", nil), + 42: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil), + 43: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), + 44: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), + 45: syscalls.Supported("truncate", Truncate), + 46: syscalls.Supported("ftruncate", Ftruncate), + 47: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil), + 48: syscalls.Supported("faccessat", Faccessat), + 49: syscalls.Supported("chdir", Chdir), + 50: syscalls.Supported("fchdir", Fchdir), + 51: syscalls.Supported("chroot", Chroot), + 52: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil), + 53: syscalls.Supported("fchmodat", Fchmodat), + 54: syscalls.Supported("fchownat", Fchownat), + 55: syscalls.Supported("fchown", Fchown), + 56: syscalls.Supported("openat", Openat), + 57: syscalls.Supported("close", Close), + 58: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil), + 59: syscalls.Supported("pipe2", Pipe2), + 60: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations + 61: syscalls.Supported("getdents64", Getdents64), + 62: syscalls.Supported("lseek", Lseek), + 63: syscalls.Supported("read", Read), + 64: syscalls.Supported("write", Write), + 65: syscalls.Supported("readv", Readv), + 66: syscalls.Supported("writev", Writev), + 67: syscalls.Supported("pread64", Pread64), + 68: syscalls.Supported("pwrite64", Pwrite64), + 69: syscalls.Supported("preadv", Preadv), + 70: syscalls.Supported("pwritev", Pwritev), + 71: syscalls.Supported("sendfile", Sendfile), + 72: syscalls.Supported("pselect", Pselect), + 73: syscalls.Supported("ppoll", Ppoll), + 74: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}), + 75: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 76: syscalls.PartiallySupported("splice", Splice, "Stub implementation.", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 77: syscalls.Supported("tee", Tee), + 78: syscalls.Supported("readlinkat", Readlinkat), + 79: syscalls.Supported("fstatat", Fstatat), + 80: syscalls.Supported("fstat", Fstat), + 81: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil), + 82: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil), + 83: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil), + 84: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil), + 85: syscalls.Supported("timerfd_create", TimerfdCreate), + 86: syscalls.Supported("timerfd_settime", TimerfdSettime), + 87: syscalls.Supported("timerfd_gettime", TimerfdGettime), + 88: syscalls.Supported("utimensat", Utimensat), + 89: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil), + 90: syscalls.Supported("capget", Capget), + 91: syscalls.Supported("capset", Capset), + 92: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil), + 93: syscalls.Supported("exit", Exit), + 94: syscalls.Supported("exit_group", ExitGroup), + 95: syscalls.Supported("waitid", Waitid), + 96: syscalls.Supported("set_tid_address", SetTidAddress), + 97: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil), + 98: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), + 99: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil), + 100: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil), + 101: syscalls.Supported("nanosleep", Nanosleep), + 102: syscalls.Supported("getitimer", Getitimer), + 103: syscalls.Supported("setitimer", Setitimer), + 104: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil), + 105: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil), + 106: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil), + 107: syscalls.Supported("timer_create", TimerCreate), + 108: syscalls.Supported("timer_gettime", TimerGettime), + 109: syscalls.Supported("timer_getoverrun", TimerGetoverrun), + 110: syscalls.Supported("timer_settime", TimerSettime), + 111: syscalls.Supported("timer_delete", TimerDelete), + 112: syscalls.Supported("clock_settime", ClockSettime), + 113: syscalls.Supported("clock_gettime", ClockGettime), + 114: syscalls.Supported("clock_getres", ClockGetres), + 115: syscalls.Supported("clock_nanosleep", ClockNanosleep), + 116: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil), + 117: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil), + 118: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil), + 119: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil), + 120: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil), + 121: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil), + 122: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil), + 123: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil), + 124: syscalls.Supported("sched_yield", SchedYield), + 125: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil), + 126: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil), + 127: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil), + 128: syscalls.Supported("restart_syscall", RestartSyscall), + 129: syscalls.Supported("kill", Kill), + 130: syscalls.Supported("tkill", Tkill), + 131: syscalls.Supported("tgkill", Tgkill), + 132: syscalls.Supported("sigaltstack", Sigaltstack), + 133: syscalls.Supported("rt_sigsuspend", RtSigsuspend), + 134: syscalls.Supported("rt_sigaction", RtSigaction), + 135: syscalls.Supported("rt_sigprocmask", RtSigprocmask), + 136: syscalls.Supported("rt_sigpending", RtSigpending), + 137: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait), + 138: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo), + 139: syscalls.Supported("rt_sigreturn", RtSigreturn), + 140: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil), + 141: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil), + 142: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil), + 143: syscalls.Supported("setregid", Setregid), + 144: syscalls.Supported("setgid", Setgid), + 145: syscalls.Supported("setreuid", Setreuid), + 146: syscalls.Supported("setuid", Setuid), + 147: syscalls.Supported("setresuid", Setresuid), + 148: syscalls.Supported("getresuid", Getresuid), + 149: syscalls.Supported("setresgid", Setresgid), + 150: syscalls.Supported("getresgid", Getresgid), + 151: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) + 152: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) + 153: syscalls.Supported("times", Times), + 154: syscalls.Supported("setpgid", Setpgid), + 155: syscalls.Supported("getpgid", Getpgid), + 156: syscalls.Supported("getsid", Getsid), + 157: syscalls.Supported("setsid", Setsid), + 158: syscalls.Supported("getgroups", Getgroups), + 159: syscalls.Supported("setgroups", Setgroups), + 160: syscalls.Supported("uname", Uname), + 161: syscalls.Supported("sethostname", Sethostname), + 162: syscalls.Supported("setdomainname", Setdomainname), + 163: syscalls.Supported("getrlimit", Getrlimit), + 164: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil), + 165: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil), + 166: syscalls.Supported("umask", Umask), + 167: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil), + 168: syscalls.Supported("getcpu", Getcpu), + 169: syscalls.Supported("gettimeofday", Gettimeofday), + 170: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil), + 171: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil), + 172: syscalls.Supported("getpid", Getpid), + 173: syscalls.Supported("getppid", Getppid), + 174: syscalls.Supported("getuid", Getuid), + 175: syscalls.Supported("geteuid", Geteuid), + 176: syscalls.Supported("getgid", Getgid), + 177: syscalls.Supported("getegid", Getegid), + 178: syscalls.Supported("gettid", Gettid), + 179: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil), + 180: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 181: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 182: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 183: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 184: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 185: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) + 186: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 187: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 188: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 189: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) + 190: syscalls.Supported("semget", Semget), + 191: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil), + 192: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), + 193: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil), + 194: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil), + 195: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil), + 196: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil), + 197: syscalls.Supported("shmdt", Shmdt), + 198: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil), + 199: syscalls.Supported("socketpair", SocketPair), + 200: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil), + 201: syscalls.Supported("listen", Listen), + 202: syscalls.Supported("accept", Accept), + 203: syscalls.Supported("connect", Connect), + 204: syscalls.Supported("getsockname", GetSockName), + 205: syscalls.Supported("getpeername", GetPeerName), + 206: syscalls.Supported("sendto", SendTo), + 207: syscalls.Supported("recvfrom", RecvFrom), + 208: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil), + 209: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil), + 210: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil), + 211: syscalls.Supported("sendmsg", SendMsg), + 212: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil), + 213: syscalls.Supported("readahead", Readahead), + 214: syscalls.Supported("brk", Brk), + 215: syscalls.Supported("munmap", Munmap), + 216: syscalls.Supported("mremap", Mremap), + 217: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil), + 218: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil), + 219: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil), + 220: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil), + 221: syscalls.Supported("execve", Execve), + 222: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil), + 223: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil), + 224: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil), + 225: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil), + 226: syscalls.Supported("mprotect", Mprotect), + 227: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil), + 228: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 229: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 230: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 231: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 232: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil), + 233: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil), + 234: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil), + 235: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}), + 236: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil), + 237: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil), + 238: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil), + 239: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly) + 240: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), + 241: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil), + 242: syscalls.Supported("accept4", Accept4), + 243: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), + 260: syscalls.Supported("wait4", Wait4), + 261: syscalls.Supported("prlimit64", Prlimit64), + 262: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), + 263: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), + 264: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 265: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 266: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil), + 267: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil), + 268: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) + 269: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), + 270: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), + 271: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), + 272: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil), + 273: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil), + 274: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) + 275: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) + 276: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) + 277: syscalls.Supported("seccomp", Seccomp), + 278: syscalls.Supported("getrandom", GetRandom), + 279: syscalls.Supported("memfd_create", MemfdCreate), + 280: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), + 281: syscalls.Supported("execveat", Execveat), + 282: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) + 283: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(gvisor.dev/issue/267) + 284: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + + // Syscalls after 284 are "backports" from versions of Linux after 4.4. + 285: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil), + 286: syscalls.Supported("preadv2", Preadv2), + 287: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil), + 288: syscalls.ErrorWithEvent("pkey_mprotect", syserror.ENOSYS, "", nil), + 289: syscalls.ErrorWithEvent("pkey_alloc", syserror.ENOSYS, "", nil), + 290: syscalls.ErrorWithEvent("pkey_free", syserror.ENOSYS, "", nil), + 291: syscalls.Supported("statx", Statx), + 292: syscalls.ErrorWithEvent("io_pgetevents", syserror.ENOSYS, "", nil), + 293: syscalls.PartiallySupported("rseq", RSeq, "Not supported on all platforms.", nil), + + // Linux skips ahead to syscall 424 to sync numbers between arches. + 424: syscalls.ErrorWithEvent("pidfd_send_signal", syserror.ENOSYS, "", nil), + 425: syscalls.ErrorWithEvent("io_uring_setup", syserror.ENOSYS, "", nil), + 426: syscalls.ErrorWithEvent("io_uring_enter", syserror.ENOSYS, "", nil), + 427: syscalls.ErrorWithEvent("io_uring_register", syserror.ENOSYS, "", nil), + 428: syscalls.ErrorWithEvent("open_tree", syserror.ENOSYS, "", nil), + 429: syscalls.ErrorWithEvent("move_mount", syserror.ENOSYS, "", nil), + 430: syscalls.ErrorWithEvent("fsopen", syserror.ENOSYS, "", nil), + 431: syscalls.ErrorWithEvent("fsconfig", syserror.ENOSYS, "", nil), + 432: syscalls.ErrorWithEvent("fsmount", syserror.ENOSYS, "", nil), + 433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil), + 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil), + 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil), + }, + Emulate: map[usermem.Addr]uintptr{}, + Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { + t.Kernel().EmitUnimplementedEvent(t) + return 0, syserror.ENOSYS + }, +} + +func init() { + kernel.RegisterSyscallTable(AMD64) + kernel.RegisterSyscallTable(ARM64) +} diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go deleted file mode 100644 index 79066ad2a..000000000 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ /dev/null @@ -1,406 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package linux - -import ( - "gvisor.dev/gvisor/pkg/abi" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/syscalls" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// AMD64 is a table of Linux amd64 syscall API with the corresponding syscall -// numbers from Linux 4.4. -var AMD64 = &kernel.SyscallTable{ - OS: abi.Linux, - Arch: arch.AMD64, - Version: kernel.Version{ - // Version 4.4 is chosen as a stable, longterm version of Linux, which - // guides the interface provided by this syscall table. The build - // version is that for a clean build with default kernel config, at 5 - // minutes after v4.4 was tagged. - Sysname: LinuxSysname, - Release: LinuxRelease, - Version: LinuxVersion, - }, - AuditNumber: linux.AUDIT_ARCH_X86_64, - Table: map[uintptr]kernel.Syscall{ - 0: syscalls.Supported("read", Read), - 1: syscalls.Supported("write", Write), - 2: syscalls.PartiallySupported("open", Open, "Options O_DIRECT, O_NOATIME, O_PATH, O_TMPFILE, O_SYNC are not supported.", nil), - 3: syscalls.Supported("close", Close), - 4: syscalls.Supported("stat", Stat), - 5: syscalls.Supported("fstat", Fstat), - 6: syscalls.Supported("lstat", Lstat), - 7: syscalls.Supported("poll", Poll), - 8: syscalls.Supported("lseek", Lseek), - 9: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil), - 10: syscalls.Supported("mprotect", Mprotect), - 11: syscalls.Supported("munmap", Munmap), - 12: syscalls.Supported("brk", Brk), - 13: syscalls.Supported("rt_sigaction", RtSigaction), - 14: syscalls.Supported("rt_sigprocmask", RtSigprocmask), - 15: syscalls.Supported("rt_sigreturn", RtSigreturn), - 16: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil), - 17: syscalls.Supported("pread64", Pread64), - 18: syscalls.Supported("pwrite64", Pwrite64), - 19: syscalls.Supported("readv", Readv), - 20: syscalls.Supported("writev", Writev), - 21: syscalls.Supported("access", Access), - 22: syscalls.Supported("pipe", Pipe), - 23: syscalls.Supported("select", Select), - 24: syscalls.Supported("sched_yield", SchedYield), - 25: syscalls.Supported("mremap", Mremap), - 26: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil), - 27: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil), - 28: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil), - 29: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil), - 30: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil), - 31: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil), - 32: syscalls.Supported("dup", Dup), - 33: syscalls.Supported("dup2", Dup2), - 34: syscalls.Supported("pause", Pause), - 35: syscalls.Supported("nanosleep", Nanosleep), - 36: syscalls.Supported("getitimer", Getitimer), - 37: syscalls.Supported("alarm", Alarm), - 38: syscalls.Supported("setitimer", Setitimer), - 39: syscalls.Supported("getpid", Getpid), - 40: syscalls.Supported("sendfile", Sendfile), - 41: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil), - 42: syscalls.Supported("connect", Connect), - 43: syscalls.Supported("accept", Accept), - 44: syscalls.Supported("sendto", SendTo), - 45: syscalls.Supported("recvfrom", RecvFrom), - 46: syscalls.Supported("sendmsg", SendMsg), - 47: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil), - 48: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil), - 49: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil), - 50: syscalls.Supported("listen", Listen), - 51: syscalls.Supported("getsockname", GetSockName), - 52: syscalls.Supported("getpeername", GetPeerName), - 53: syscalls.Supported("socketpair", SocketPair), - 54: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil), - 55: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil), - 56: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil), - 57: syscalls.Supported("fork", Fork), - 58: syscalls.Supported("vfork", Vfork), - 59: syscalls.Supported("execve", Execve), - 60: syscalls.Supported("exit", Exit), - 61: syscalls.Supported("wait4", Wait4), - 62: syscalls.Supported("kill", Kill), - 63: syscalls.Supported("uname", Uname), - 64: syscalls.Supported("semget", Semget), - 65: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil), - 66: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil), - 67: syscalls.Supported("shmdt", Shmdt), - 68: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 69: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 70: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 71: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil), - 73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil), - 74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil), - 75: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil), - 76: syscalls.Supported("truncate", Truncate), - 77: syscalls.Supported("ftruncate", Ftruncate), - 78: syscalls.Supported("getdents", Getdents), - 79: syscalls.Supported("getcwd", Getcwd), - 80: syscalls.Supported("chdir", Chdir), - 81: syscalls.Supported("fchdir", Fchdir), - 82: syscalls.Supported("rename", Rename), - 83: syscalls.Supported("mkdir", Mkdir), - 84: syscalls.Supported("rmdir", Rmdir), - 85: syscalls.Supported("creat", Creat), - 86: syscalls.Supported("link", Link), - 87: syscalls.Supported("unlink", Unlink), - 88: syscalls.Supported("symlink", Symlink), - 89: syscalls.Supported("readlink", Readlink), - 90: syscalls.Supported("chmod", Chmod), - 91: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil), - 92: syscalls.Supported("chown", Chown), - 93: syscalls.Supported("fchown", Fchown), - 94: syscalls.Supported("lchown", Lchown), - 95: syscalls.Supported("umask", Umask), - 96: syscalls.Supported("gettimeofday", Gettimeofday), - 97: syscalls.Supported("getrlimit", Getrlimit), - 98: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil), - 99: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil), - 100: syscalls.Supported("times", Times), - 101: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil), - 102: syscalls.Supported("getuid", Getuid), - 103: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil), - 104: syscalls.Supported("getgid", Getgid), - 105: syscalls.Supported("setuid", Setuid), - 106: syscalls.Supported("setgid", Setgid), - 107: syscalls.Supported("geteuid", Geteuid), - 108: syscalls.Supported("getegid", Getegid), - 109: syscalls.Supported("setpgid", Setpgid), - 110: syscalls.Supported("getppid", Getppid), - 111: syscalls.Supported("getpgrp", Getpgrp), - 112: syscalls.Supported("setsid", Setsid), - 113: syscalls.Supported("setreuid", Setreuid), - 114: syscalls.Supported("setregid", Setregid), - 115: syscalls.Supported("getgroups", Getgroups), - 116: syscalls.Supported("setgroups", Setgroups), - 117: syscalls.Supported("setresuid", Setresuid), - 118: syscalls.Supported("getresuid", Getresuid), - 119: syscalls.Supported("setresgid", Setresgid), - 120: syscalls.Supported("getresgid", Getresgid), - 121: syscalls.Supported("getpgid", Getpgid), - 122: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) - 123: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) - 124: syscalls.Supported("getsid", Getsid), - 125: syscalls.Supported("capget", Capget), - 126: syscalls.Supported("capset", Capset), - 127: syscalls.Supported("rt_sigpending", RtSigpending), - 128: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait), - 129: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo), - 130: syscalls.Supported("rt_sigsuspend", RtSigsuspend), - 131: syscalls.Supported("sigaltstack", Sigaltstack), - 132: syscalls.Supported("utime", Utime), - 133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil), - 134: syscalls.Error("uselib", syserror.ENOSYS, "Obsolete", nil), - 135: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil), - 136: syscalls.ErrorWithEvent("ustat", syserror.ENOSYS, "Needs filesystem support.", nil), - 137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), - 138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), - 139: syscalls.ErrorWithEvent("sysfs", syserror.ENOSYS, "", []string{"gvisor.dev/issue/165"}), - 140: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil), - 141: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil), - 142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil), - 143: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil), - 144: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil), - 145: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil), - 146: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil), - 147: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil), - 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil), - 149: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - 150: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - 151: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - 152: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - 153: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil), - 154: syscalls.Error("modify_ldt", syserror.EPERM, "", nil), - 155: syscalls.Error("pivot_root", syserror.EPERM, "", nil), - 156: syscalls.Error("sysctl", syserror.EPERM, "Deprecated. Use /proc/sys instead.", nil), - 157: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil), - 158: syscalls.PartiallySupported("arch_prctl", ArchPrctl, "Options ARCH_GET_GS, ARCH_SET_GS not supported.", nil), - 159: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil), - 160: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil), - 161: syscalls.Supported("chroot", Chroot), - 162: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil), - 163: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil), - 164: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil), - 165: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil), - 166: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil), - 167: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil), - 168: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil), - 169: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil), - 170: syscalls.Supported("sethostname", Sethostname), - 171: syscalls.Supported("setdomainname", Setdomainname), - 172: syscalls.CapError("iopl", linux.CAP_SYS_RAWIO, "", nil), - 173: syscalls.CapError("ioperm", linux.CAP_SYS_RAWIO, "", nil), - 174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil), - 175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil), - 176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil), - 177: syscalls.Error("get_kernel_syms", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil), - 178: syscalls.Error("query_module", syserror.ENOSYS, "Not supported in Linux > 2.6.", nil), - 179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations - 180: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil), - 181: syscalls.Error("getpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil), - 182: syscalls.Error("putpmsg", syserror.ENOSYS, "Not implemented in Linux.", nil), - 183: syscalls.Error("afs_syscall", syserror.ENOSYS, "Not implemented in Linux.", nil), - 184: syscalls.Error("tuxcall", syserror.ENOSYS, "Not implemented in Linux.", nil), - 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), - 186: syscalls.Supported("gettid", Gettid), - 187: syscalls.Supported("readahead", Readahead), - 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), - 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), - 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), - 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), - 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), - 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), - 194: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), - 195: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), - 196: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), - 197: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), - 198: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), - 199: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), - 200: syscalls.Supported("tkill", Tkill), - 201: syscalls.Supported("time", Time), - 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), - 203: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil), - 204: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil), - 205: syscalls.Error("set_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), - 206: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 207: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 208: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 209: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 210: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 211: syscalls.Error("get_thread_area", syserror.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), - 212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), - 213: syscalls.Supported("epoll_create", EpollCreate), - 214: syscalls.ErrorWithEvent("epoll_ctl_old", syserror.ENOSYS, "Deprecated.", nil), - 215: syscalls.ErrorWithEvent("epoll_wait_old", syserror.ENOSYS, "Deprecated.", nil), - 216: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil), - 217: syscalls.Supported("getdents64", Getdents64), - 218: syscalls.Supported("set_tid_address", SetTidAddress), - 219: syscalls.Supported("restart_syscall", RestartSyscall), - 220: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), - 221: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil), - 222: syscalls.Supported("timer_create", TimerCreate), - 223: syscalls.Supported("timer_settime", TimerSettime), - 224: syscalls.Supported("timer_gettime", TimerGettime), - 225: syscalls.Supported("timer_getoverrun", TimerGetoverrun), - 226: syscalls.Supported("timer_delete", TimerDelete), - 227: syscalls.Supported("clock_settime", ClockSettime), - 228: syscalls.Supported("clock_gettime", ClockGettime), - 229: syscalls.Supported("clock_getres", ClockGetres), - 230: syscalls.Supported("clock_nanosleep", ClockNanosleep), - 231: syscalls.Supported("exit_group", ExitGroup), - 232: syscalls.Supported("epoll_wait", EpollWait), - 233: syscalls.Supported("epoll_ctl", EpollCtl), - 234: syscalls.Supported("tgkill", Tgkill), - 235: syscalls.Supported("utimes", Utimes), - 236: syscalls.Error("vserver", syserror.ENOSYS, "Not implemented by Linux", nil), - 237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}), - 238: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil), - 239: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil), - 240: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 241: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 242: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 243: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 244: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 245: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil), - 247: syscalls.Supported("waitid", Waitid), - 248: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil), - 249: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil), - 250: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil), - 251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) - 252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) - 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil), - 254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil), - 255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil), - 256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil), - 257: syscalls.Supported("openat", Openat), - 258: syscalls.Supported("mkdirat", Mkdirat), - 259: syscalls.Supported("mknodat", Mknodat), - 260: syscalls.Supported("fchownat", Fchownat), - 261: syscalls.Supported("futimesat", Futimesat), - 262: syscalls.Supported("fstatat", Fstatat), - 263: syscalls.Supported("unlinkat", Unlinkat), - 264: syscalls.Supported("renameat", Renameat), - 265: syscalls.Supported("linkat", Linkat), - 266: syscalls.Supported("symlinkat", Symlinkat), - 267: syscalls.Supported("readlinkat", Readlinkat), - 268: syscalls.Supported("fchmodat", Fchmodat), - 269: syscalls.Supported("faccessat", Faccessat), - 270: syscalls.Supported("pselect", Pselect), - 271: syscalls.Supported("ppoll", Ppoll), - 272: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil), - 273: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil), - 274: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil), - 275: syscalls.Supported("splice", Splice), - 276: syscalls.Supported("tee", Tee), - 277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil), - 278: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) - 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly) - 280: syscalls.Supported("utimensat", Utimensat), - 281: syscalls.Supported("epoll_pwait", EpollPwait), - 282: syscalls.PartiallySupported("signalfd", Signalfd, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}), - 283: syscalls.Supported("timerfd_create", TimerfdCreate), - 284: syscalls.Supported("eventfd", Eventfd), - 285: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil), - 286: syscalls.Supported("timerfd_settime", TimerfdSettime), - 287: syscalls.Supported("timerfd_gettime", TimerfdGettime), - 288: syscalls.Supported("accept4", Accept4), - 289: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}), - 290: syscalls.Supported("eventfd2", Eventfd2), - 291: syscalls.Supported("epoll_create1", EpollCreate1), - 292: syscalls.Supported("dup3", Dup3), - 293: syscalls.Supported("pipe2", Pipe2), - 294: syscalls.Supported("inotify_init1", InotifyInit1), - 295: syscalls.Supported("preadv", Preadv), - 296: syscalls.Supported("pwritev", Pwritev), - 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), - 298: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil), - 299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), - 300: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), - 301: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), - 302: syscalls.Supported("prlimit64", Prlimit64), - 303: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), - 304: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), - 305: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil), - 306: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil), - 307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), - 308: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) - 309: syscalls.Supported("getcpu", Getcpu), - 310: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), - 311: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), - 312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil), - 313: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil), - 314: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) - 315: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) - 316: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) - 317: syscalls.Supported("seccomp", Seccomp), - 318: syscalls.Supported("getrandom", GetRandom), - 319: syscalls.Supported("memfd_create", MemfdCreate), - 320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil), - 321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), - 322: syscalls.Supported("execveat", Execveat), - 323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) - 324: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(gvisor.dev/issue/267) - 325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - - // Syscalls implemented after 325 are "backports" from versions - // of Linux after 4.4. - 326: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil), - 327: syscalls.Supported("preadv2", Preadv2), - 328: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil), - 329: syscalls.ErrorWithEvent("pkey_mprotect", syserror.ENOSYS, "", nil), - 330: syscalls.ErrorWithEvent("pkey_alloc", syserror.ENOSYS, "", nil), - 331: syscalls.ErrorWithEvent("pkey_free", syserror.ENOSYS, "", nil), - 332: syscalls.Supported("statx", Statx), - 333: syscalls.ErrorWithEvent("io_pgetevents", syserror.ENOSYS, "", nil), - 334: syscalls.PartiallySupported("rseq", RSeq, "Not supported on all platforms.", nil), - - // Linux skips ahead to syscall 424 to sync numbers between arches. - 424: syscalls.ErrorWithEvent("pidfd_send_signal", syserror.ENOSYS, "", nil), - 425: syscalls.ErrorWithEvent("io_uring_setup", syserror.ENOSYS, "", nil), - 426: syscalls.ErrorWithEvent("io_uring_enter", syserror.ENOSYS, "", nil), - 427: syscalls.ErrorWithEvent("io_uring_register", syserror.ENOSYS, "", nil), - 428: syscalls.ErrorWithEvent("open_tree", syserror.ENOSYS, "", nil), - 429: syscalls.ErrorWithEvent("move_mount", syserror.ENOSYS, "", nil), - 430: syscalls.ErrorWithEvent("fsopen", syserror.ENOSYS, "", nil), - 431: syscalls.ErrorWithEvent("fsconfig", syserror.ENOSYS, "", nil), - 432: syscalls.ErrorWithEvent("fsmount", syserror.ENOSYS, "", nil), - 433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil), - 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil), - 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil), - }, - - Emulate: map[usermem.Addr]uintptr{ - 0xffffffffff600000: 96, // vsyscall gettimeofday(2) - 0xffffffffff600400: 201, // vsyscall time(2) - 0xffffffffff600800: 309, // vsyscall getcpu(2) - }, - Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { - t.Kernel().EmitUnimplementedEvent(t) - return 0, syserror.ENOSYS - }, -} diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go deleted file mode 100644 index 7421619de..000000000 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ /dev/null @@ -1,340 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package linux - -import ( - "gvisor.dev/gvisor/pkg/abi" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/syscalls" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// ARM64 is a table of Linux arm64 syscall API with the corresponding syscall -// numbers from Linux 4.4. -var ARM64 = &kernel.SyscallTable{ - OS: abi.Linux, - Arch: arch.ARM64, - Version: kernel.Version{ - Sysname: LinuxSysname, - Release: LinuxRelease, - Version: LinuxVersion, - }, - AuditNumber: linux.AUDIT_ARCH_AARCH64, - Table: map[uintptr]kernel.Syscall{ - 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), - 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), - 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), - 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), - 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), - 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), - 11: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), - 12: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), - 13: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), - 14: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), - 15: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), - 16: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), - 17: syscalls.Supported("getcwd", Getcwd), - 18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), - 19: syscalls.Supported("eventfd2", Eventfd2), - 20: syscalls.Supported("epoll_create1", EpollCreate1), - 21: syscalls.Supported("epoll_ctl", EpollCtl), - 22: syscalls.Supported("epoll_pwait", EpollPwait), - 23: syscalls.Supported("dup", Dup), - 24: syscalls.Supported("dup3", Dup3), - 25: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil), - 26: syscalls.Supported("inotify_init1", InotifyInit1), - 27: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil), - 28: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil), - 29: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil), - 30: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) - 31: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) - 32: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil), - 33: syscalls.Supported("mknodat", Mknodat), - 34: syscalls.Supported("mkdirat", Mkdirat), - 35: syscalls.Supported("unlinkat", Unlinkat), - 36: syscalls.Supported("symlinkat", Symlinkat), - 37: syscalls.Supported("linkat", Linkat), - 38: syscalls.Supported("renameat", Renameat), - 39: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil), - 40: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil), - 41: syscalls.Error("pivot_root", syserror.EPERM, "", nil), - 42: syscalls.Error("nfsservctl", syserror.ENOSYS, "Removed after Linux 3.1.", nil), - 43: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), - 44: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), - 45: syscalls.Supported("truncate", Truncate), - 46: syscalls.Supported("ftruncate", Ftruncate), - 47: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil), - 48: syscalls.Supported("faccessat", Faccessat), - 49: syscalls.Supported("chdir", Chdir), - 50: syscalls.Supported("fchdir", Fchdir), - 51: syscalls.Supported("chroot", Chroot), - 52: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil), - 53: syscalls.Supported("fchmodat", Fchmodat), - 54: syscalls.Supported("fchownat", Fchownat), - 55: syscalls.Supported("fchown", Fchown), - 56: syscalls.Supported("openat", Openat), - 57: syscalls.Supported("close", Close), - 58: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil), - 59: syscalls.Supported("pipe2", Pipe2), - 60: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations - 61: syscalls.Supported("getdents64", Getdents64), - 62: syscalls.Supported("lseek", Lseek), - 63: syscalls.Supported("read", Read), - 64: syscalls.Supported("write", Write), - 65: syscalls.Supported("readv", Readv), - 66: syscalls.Supported("writev", Writev), - 67: syscalls.Supported("pread64", Pread64), - 68: syscalls.Supported("pwrite64", Pwrite64), - 69: syscalls.Supported("preadv", Preadv), - 70: syscalls.Supported("pwritev", Pwritev), - 71: syscalls.Supported("sendfile", Sendfile), - 72: syscalls.Supported("pselect", Pselect), - 73: syscalls.Supported("ppoll", Ppoll), - 74: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}), - 75: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) - 76: syscalls.PartiallySupported("splice", Splice, "Stub implementation.", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) - 77: syscalls.Supported("tee", Tee), - 78: syscalls.Supported("readlinkat", Readlinkat), - 79: syscalls.Supported("fstatat", Fstatat), - 80: syscalls.Supported("fstat", Fstat), - 81: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil), - 82: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil), - 83: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil), - 84: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil), - 85: syscalls.Supported("timerfd_create", TimerfdCreate), - 86: syscalls.Supported("timerfd_settime", TimerfdSettime), - 87: syscalls.Supported("timerfd_gettime", TimerfdGettime), - 88: syscalls.Supported("utimensat", Utimensat), - 89: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil), - 90: syscalls.Supported("capget", Capget), - 91: syscalls.Supported("capset", Capset), - 92: syscalls.ErrorWithEvent("personality", syserror.EINVAL, "Unable to change personality.", nil), - 93: syscalls.Supported("exit", Exit), - 94: syscalls.Supported("exit_group", ExitGroup), - 95: syscalls.Supported("waitid", Waitid), - 96: syscalls.Supported("set_tid_address", SetTidAddress), - 97: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil), - 98: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), - 99: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil), - 100: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil), - 101: syscalls.Supported("nanosleep", Nanosleep), - 102: syscalls.Supported("getitimer", Getitimer), - 103: syscalls.Supported("setitimer", Setitimer), - 104: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil), - 105: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil), - 106: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil), - 107: syscalls.Supported("timer_create", TimerCreate), - 108: syscalls.Supported("timer_gettime", TimerGettime), - 109: syscalls.Supported("timer_getoverrun", TimerGetoverrun), - 110: syscalls.Supported("timer_settime", TimerSettime), - 111: syscalls.Supported("timer_delete", TimerDelete), - 112: syscalls.Supported("clock_settime", ClockSettime), - 113: syscalls.Supported("clock_gettime", ClockGettime), - 114: syscalls.Supported("clock_getres", ClockGetres), - 115: syscalls.Supported("clock_nanosleep", ClockNanosleep), - 116: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil), - 117: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil), - 118: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil), - 119: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil), - 120: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil), - 121: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil), - 122: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil), - 123: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil), - 124: syscalls.Supported("sched_yield", SchedYield), - 125: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil), - 126: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil), - 127: syscalls.ErrorWithEvent("sched_rr_get_interval", syserror.EPERM, "", nil), - 128: syscalls.Supported("restart_syscall", RestartSyscall), - 129: syscalls.Supported("kill", Kill), - 130: syscalls.Supported("tkill", Tkill), - 131: syscalls.Supported("tgkill", Tgkill), - 132: syscalls.Supported("sigaltstack", Sigaltstack), - 133: syscalls.Supported("rt_sigsuspend", RtSigsuspend), - 134: syscalls.Supported("rt_sigaction", RtSigaction), - 135: syscalls.Supported("rt_sigprocmask", RtSigprocmask), - 136: syscalls.Supported("rt_sigpending", RtSigpending), - 137: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait), - 138: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo), - 139: syscalls.Supported("rt_sigreturn", RtSigreturn), - 140: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil), - 141: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil), - 142: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil), - 143: syscalls.Supported("setregid", Setregid), - 144: syscalls.Supported("setgid", Setgid), - 145: syscalls.Supported("setreuid", Setreuid), - 146: syscalls.Supported("setuid", Setuid), - 147: syscalls.Supported("setresuid", Setresuid), - 148: syscalls.Supported("getresuid", Getresuid), - 149: syscalls.Supported("setresgid", Setresgid), - 150: syscalls.Supported("getresgid", Getresgid), - 151: syscalls.ErrorWithEvent("setfsuid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) - 152: syscalls.ErrorWithEvent("setfsgid", syserror.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) - 153: syscalls.Supported("times", Times), - 154: syscalls.Supported("setpgid", Setpgid), - 155: syscalls.Supported("getpgid", Getpgid), - 156: syscalls.Supported("getsid", Getsid), - 157: syscalls.Supported("setsid", Setsid), - 158: syscalls.Supported("getgroups", Getgroups), - 159: syscalls.Supported("setgroups", Setgroups), - 160: syscalls.Supported("uname", Uname), - 161: syscalls.Supported("sethostname", Sethostname), - 162: syscalls.Supported("setdomainname", Setdomainname), - 163: syscalls.Supported("getrlimit", Getrlimit), - 164: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil), - 165: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil), - 166: syscalls.Supported("umask", Umask), - 167: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil), - 168: syscalls.Supported("getcpu", Getcpu), - 169: syscalls.Supported("gettimeofday", Gettimeofday), - 170: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil), - 171: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil), - 172: syscalls.Supported("getpid", Getpid), - 173: syscalls.Supported("getppid", Getppid), - 174: syscalls.Supported("getuid", Getuid), - 175: syscalls.Supported("geteuid", Geteuid), - 176: syscalls.Supported("getgid", Getgid), - 177: syscalls.Supported("getegid", Getegid), - 178: syscalls.Supported("gettid", Gettid), - 179: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil), - 180: syscalls.ErrorWithEvent("mq_open", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 181: syscalls.ErrorWithEvent("mq_unlink", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 182: syscalls.ErrorWithEvent("mq_timedsend", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 183: syscalls.ErrorWithEvent("mq_timedreceive", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 184: syscalls.ErrorWithEvent("mq_notify", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 185: syscalls.ErrorWithEvent("mq_getsetattr", syserror.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) - 186: syscalls.ErrorWithEvent("msgget", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 187: syscalls.ErrorWithEvent("msgctl", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 188: syscalls.ErrorWithEvent("msgrcv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 189: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 190: syscalls.Supported("semget", Semget), - 191: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil), - 192: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), - 193: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil), - 194: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil), - 195: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil), - 196: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil), - 197: syscalls.Supported("shmdt", Shmdt), - 198: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil), - 199: syscalls.Supported("socketpair", SocketPair), - 200: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil), - 201: syscalls.Supported("listen", Listen), - 202: syscalls.Supported("accept", Accept), - 203: syscalls.Supported("connect", Connect), - 204: syscalls.Supported("getsockname", GetSockName), - 205: syscalls.Supported("getpeername", GetPeerName), - 206: syscalls.Supported("sendto", SendTo), - 207: syscalls.Supported("recvfrom", RecvFrom), - 208: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil), - 209: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil), - 210: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil), - 211: syscalls.Supported("sendmsg", SendMsg), - 212: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil), - 213: syscalls.Supported("readahead", Readahead), - 214: syscalls.Supported("brk", Brk), - 215: syscalls.Supported("munmap", Munmap), - 216: syscalls.Supported("mremap", Mremap), - 217: syscalls.Error("add_key", syserror.EACCES, "Not available to user.", nil), - 218: syscalls.Error("request_key", syserror.EACCES, "Not available to user.", nil), - 219: syscalls.Error("keyctl", syserror.EACCES, "Not available to user.", nil), - 220: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil), - 221: syscalls.Supported("execve", Execve), - 222: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil), - 223: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil), - 224: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil), - 225: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil), - 226: syscalls.Supported("mprotect", Mprotect), - 227: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil), - 228: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - 229: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - 230: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - 231: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - 232: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil), - 233: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil), - 234: syscalls.ErrorWithEvent("remap_file_pages", syserror.ENOSYS, "Deprecated since Linux 3.16.", nil), - 235: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}), - 236: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil), - 237: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil), - 238: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil), - 239: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly) - 240: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), - 241: syscalls.ErrorWithEvent("perf_event_open", syserror.ENODEV, "No support for perf counters", nil), - 242: syscalls.Supported("accept4", Accept4), - 243: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), - 260: syscalls.Supported("wait4", Wait4), - 261: syscalls.Supported("prlimit64", Prlimit64), - 262: syscalls.ErrorWithEvent("fanotify_init", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), - 263: syscalls.ErrorWithEvent("fanotify_mark", syserror.ENOSYS, "Needs CONFIG_FANOTIFY", nil), - 264: syscalls.Error("name_to_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), - 265: syscalls.Error("open_by_handle_at", syserror.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), - 266: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil), - 267: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil), - 268: syscalls.ErrorWithEvent("setns", syserror.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) - 269: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), - 270: syscalls.ErrorWithEvent("process_vm_readv", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), - 271: syscalls.ErrorWithEvent("process_vm_writev", syserror.ENOSYS, "", []string{"gvisor.dev/issue/158"}), - 272: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil), - 273: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil), - 274: syscalls.ErrorWithEvent("sched_setattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) - 275: syscalls.ErrorWithEvent("sched_getattr", syserror.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) - 276: syscalls.ErrorWithEvent("renameat2", syserror.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) - 277: syscalls.Supported("seccomp", Seccomp), - 278: syscalls.Supported("getrandom", GetRandom), - 279: syscalls.Supported("memfd_create", MemfdCreate), - 280: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), - 281: syscalls.Supported("execveat", Execveat), - 282: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) - 283: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(gvisor.dev/issue/267) - 284: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), - - // Syscalls after 284 are "backports" from versions of Linux after 4.4. - 285: syscalls.ErrorWithEvent("copy_file_range", syserror.ENOSYS, "", nil), - 286: syscalls.Supported("preadv2", Preadv2), - 287: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil), - 288: syscalls.ErrorWithEvent("pkey_mprotect", syserror.ENOSYS, "", nil), - 289: syscalls.ErrorWithEvent("pkey_alloc", syserror.ENOSYS, "", nil), - 290: syscalls.ErrorWithEvent("pkey_free", syserror.ENOSYS, "", nil), - 291: syscalls.Supported("statx", Statx), - 292: syscalls.ErrorWithEvent("io_pgetevents", syserror.ENOSYS, "", nil), - 293: syscalls.PartiallySupported("rseq", RSeq, "Not supported on all platforms.", nil), - - // Linux skips ahead to syscall 424 to sync numbers between arches. - 424: syscalls.ErrorWithEvent("pidfd_send_signal", syserror.ENOSYS, "", nil), - 425: syscalls.ErrorWithEvent("io_uring_setup", syserror.ENOSYS, "", nil), - 426: syscalls.ErrorWithEvent("io_uring_enter", syserror.ENOSYS, "", nil), - 427: syscalls.ErrorWithEvent("io_uring_register", syserror.ENOSYS, "", nil), - 428: syscalls.ErrorWithEvent("open_tree", syserror.ENOSYS, "", nil), - 429: syscalls.ErrorWithEvent("move_mount", syserror.ENOSYS, "", nil), - 430: syscalls.ErrorWithEvent("fsopen", syserror.ENOSYS, "", nil), - 431: syscalls.ErrorWithEvent("fsconfig", syserror.ENOSYS, "", nil), - 432: syscalls.ErrorWithEvent("fsmount", syserror.ENOSYS, "", nil), - 433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil), - 434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil), - 435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil), - }, - Emulate: map[usermem.Addr]uintptr{}, - - Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { - t.Kernel().EmitUnimplementedEvent(t) - return 0, syserror.ENOSYS - }, -} diff --git a/pkg/sentry/syscalls/linux/sys_sysinfo.go b/pkg/sentry/syscalls/linux/sys_sysinfo.go index a65b560c8..297de052a 100644 --- a/pkg/sentry/syscalls/linux/sys_sysinfo.go +++ b/pkg/sentry/syscalls/linux/sys_sysinfo.go @@ -29,13 +29,18 @@ func Sysinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca mf.UpdateUsage() _, totalUsage := usage.MemoryAccounting.Copy() totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage) + memFree := totalSize - totalUsage + if memFree > totalSize { + // Underflow. + memFree = 0 + } // Only a subset of the fields in sysinfo_t make sense to return. si := linux.Sysinfo{ Procs: uint16(len(t.PIDNamespace().Tasks())), Uptime: t.Kernel().MonotonicClock().Now().Seconds(), TotalRAM: totalSize, - FreeRAM: totalSize - totalUsage, + FreeRAM: memFree, Unit: 1, } _, err := t.CopyOut(addr, si) diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index ffca627d4..4c7b8f819 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -13,9 +13,6 @@ go_library( "fscontext.go", "getdents.go", "ioctl.go", - "linux64.go", - "linux64_override_amd64.go", - "linux64_override_arm64.go", "mmap.go", "path.go", "pipe.go", @@ -28,6 +25,7 @@ go_library( "stat_arm64.go", "sync.go", "timerfd.go", + "vfs2.go", "xattr.go", ], marshal = True, diff --git a/pkg/sentry/syscalls/linux/vfs2/linux64.go b/pkg/sentry/syscalls/linux/vfs2/linux64.go deleted file mode 100644 index 19ee36081..000000000 --- a/pkg/sentry/syscalls/linux/vfs2/linux64.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package vfs2 provides syscall implementations that use VFS2. -package vfs2 diff --git a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go b/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go deleted file mode 100644 index 074f58e5d..000000000 --- a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build amd64 - -package vfs2 - -import ( - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/syscalls" -) - -// Override syscall table to add syscalls implementations from this package. -func Override(table map[uintptr]kernel.Syscall) { - table[0] = syscalls.Supported("read", Read) - table[1] = syscalls.Supported("write", Write) - table[2] = syscalls.Supported("open", Open) - table[3] = syscalls.Supported("close", Close) - table[4] = syscalls.Supported("stat", Stat) - table[5] = syscalls.Supported("fstat", Fstat) - table[6] = syscalls.Supported("lstat", Lstat) - table[7] = syscalls.Supported("poll", Poll) - table[8] = syscalls.Supported("lseek", Lseek) - table[9] = syscalls.Supported("mmap", Mmap) - table[16] = syscalls.Supported("ioctl", Ioctl) - table[17] = syscalls.Supported("pread64", Pread64) - table[18] = syscalls.Supported("pwrite64", Pwrite64) - table[19] = syscalls.Supported("readv", Readv) - table[20] = syscalls.Supported("writev", Writev) - table[21] = syscalls.Supported("access", Access) - table[22] = syscalls.Supported("pipe", Pipe) - table[23] = syscalls.Supported("select", Select) - table[32] = syscalls.Supported("dup", Dup) - table[33] = syscalls.Supported("dup2", Dup2) - delete(table, 40) // sendfile - // TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2. - table[41] = syscalls.PartiallySupported("socket", Socket, "In process of porting socket syscalls to VFS2.", nil) - table[42] = syscalls.PartiallySupported("connect", Connect, "In process of porting socket syscalls to VFS2.", nil) - table[43] = syscalls.PartiallySupported("accept", Accept, "In process of porting socket syscalls to VFS2.", nil) - table[44] = syscalls.PartiallySupported("sendto", SendTo, "In process of porting socket syscalls to VFS2.", nil) - table[45] = syscalls.PartiallySupported("recvfrom", RecvFrom, "In process of porting socket syscalls to VFS2.", nil) - table[46] = syscalls.PartiallySupported("sendmsg", SendMsg, "In process of porting socket syscalls to VFS2.", nil) - table[47] = syscalls.PartiallySupported("recvmsg", RecvMsg, "In process of porting socket syscalls to VFS2.", nil) - table[48] = syscalls.PartiallySupported("shutdown", Shutdown, "In process of porting socket syscalls to VFS2.", nil) - table[49] = syscalls.PartiallySupported("bind", Bind, "In process of porting socket syscalls to VFS2.", nil) - table[50] = syscalls.PartiallySupported("listen", Listen, "In process of porting socket syscalls to VFS2.", nil) - table[51] = syscalls.PartiallySupported("getsockname", GetSockName, "In process of porting socket syscalls to VFS2.", nil) - table[52] = syscalls.PartiallySupported("getpeername", GetPeerName, "In process of porting socket syscalls to VFS2.", nil) - table[53] = syscalls.PartiallySupported("socketpair", SocketPair, "In process of porting socket syscalls to VFS2.", nil) - table[54] = syscalls.PartiallySupported("setsockopt", SetSockOpt, "In process of porting socket syscalls to VFS2.", nil) - table[55] = syscalls.PartiallySupported("getsockopt", GetSockOpt, "In process of porting socket syscalls to VFS2.", nil) - table[59] = syscalls.Supported("execve", Execve) - table[72] = syscalls.Supported("fcntl", Fcntl) - delete(table, 73) // flock - table[74] = syscalls.Supported("fsync", Fsync) - table[75] = syscalls.Supported("fdatasync", Fdatasync) - table[76] = syscalls.Supported("truncate", Truncate) - table[77] = syscalls.Supported("ftruncate", Ftruncate) - table[78] = syscalls.Supported("getdents", Getdents) - table[79] = syscalls.Supported("getcwd", Getcwd) - table[80] = syscalls.Supported("chdir", Chdir) - table[81] = syscalls.Supported("fchdir", Fchdir) - table[82] = syscalls.Supported("rename", Rename) - table[83] = syscalls.Supported("mkdir", Mkdir) - table[84] = syscalls.Supported("rmdir", Rmdir) - table[85] = syscalls.Supported("creat", Creat) - table[86] = syscalls.Supported("link", Link) - table[87] = syscalls.Supported("unlink", Unlink) - table[88] = syscalls.Supported("symlink", Symlink) - table[89] = syscalls.Supported("readlink", Readlink) - table[90] = syscalls.Supported("chmod", Chmod) - table[91] = syscalls.Supported("fchmod", Fchmod) - table[92] = syscalls.Supported("chown", Chown) - table[93] = syscalls.Supported("fchown", Fchown) - table[94] = syscalls.Supported("lchown", Lchown) - table[132] = syscalls.Supported("utime", Utime) - table[133] = syscalls.Supported("mknod", Mknod) - table[137] = syscalls.Supported("statfs", Statfs) - table[138] = syscalls.Supported("fstatfs", Fstatfs) - table[161] = syscalls.Supported("chroot", Chroot) - table[162] = syscalls.Supported("sync", Sync) - delete(table, 165) // mount - delete(table, 166) // umount2 - delete(table, 187) // readahead - table[188] = syscalls.Supported("setxattr", Setxattr) - table[189] = syscalls.Supported("lsetxattr", Lsetxattr) - table[190] = syscalls.Supported("fsetxattr", Fsetxattr) - table[191] = syscalls.Supported("getxattr", Getxattr) - table[192] = syscalls.Supported("lgetxattr", Lgetxattr) - table[193] = syscalls.Supported("fgetxattr", Fgetxattr) - table[194] = syscalls.Supported("listxattr", Listxattr) - table[195] = syscalls.Supported("llistxattr", Llistxattr) - table[196] = syscalls.Supported("flistxattr", Flistxattr) - table[197] = syscalls.Supported("removexattr", Removexattr) - table[198] = syscalls.Supported("lremovexattr", Lremovexattr) - table[199] = syscalls.Supported("fremovexattr", Fremovexattr) - delete(table, 206) // io_setup - delete(table, 207) // io_destroy - delete(table, 208) // io_getevents - delete(table, 209) // io_submit - delete(table, 210) // io_cancel - table[213] = syscalls.Supported("epoll_create", EpollCreate) - table[217] = syscalls.Supported("getdents64", Getdents64) - delete(table, 221) // fdavise64 - table[232] = syscalls.Supported("epoll_wait", EpollWait) - table[233] = syscalls.Supported("epoll_ctl", EpollCtl) - table[235] = syscalls.Supported("utimes", Utimes) - delete(table, 253) // inotify_init - delete(table, 254) // inotify_add_watch - delete(table, 255) // inotify_rm_watch - table[257] = syscalls.Supported("openat", Openat) - table[258] = syscalls.Supported("mkdirat", Mkdirat) - table[259] = syscalls.Supported("mknodat", Mknodat) - table[260] = syscalls.Supported("fchownat", Fchownat) - table[261] = syscalls.Supported("futimens", Futimens) - table[262] = syscalls.Supported("newfstatat", Newfstatat) - table[263] = syscalls.Supported("unlinkat", Unlinkat) - table[264] = syscalls.Supported("renameat", Renameat) - table[265] = syscalls.Supported("linkat", Linkat) - table[266] = syscalls.Supported("symlinkat", Symlinkat) - table[267] = syscalls.Supported("readlinkat", Readlinkat) - table[268] = syscalls.Supported("fchmodat", Fchmodat) - table[269] = syscalls.Supported("faccessat", Faccessat) - table[270] = syscalls.Supported("pselect", Pselect) - table[271] = syscalls.Supported("ppoll", Ppoll) - delete(table, 275) // splice - delete(table, 276) // tee - table[277] = syscalls.Supported("sync_file_range", SyncFileRange) - table[280] = syscalls.Supported("utimensat", Utimensat) - table[281] = syscalls.Supported("epoll_pwait", EpollPwait) - delete(table, 282) // signalfd - table[283] = syscalls.Supported("timerfd_create", TimerfdCreate) - table[284] = syscalls.Supported("eventfd", Eventfd) - delete(table, 285) // fallocate - table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime) - table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime) - // TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2. - table[288] = syscalls.PartiallySupported("accept4", Accept4, "In process of porting socket syscalls to VFS2.", nil) - delete(table, 289) // signalfd4 - table[290] = syscalls.Supported("eventfd2", Eventfd2) - table[291] = syscalls.Supported("epoll_create1", EpollCreate1) - table[292] = syscalls.Supported("dup3", Dup3) - table[293] = syscalls.Supported("pipe2", Pipe2) - delete(table, 294) // inotify_init1 - table[295] = syscalls.Supported("preadv", Preadv) - table[296] = syscalls.Supported("pwritev", Pwritev) - // TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2. - table[299] = syscalls.PartiallySupported("recvmmsg", RecvMMsg, "In process of porting socket syscalls to VFS2.", nil) - table[306] = syscalls.Supported("syncfs", Syncfs) - // TODO(gvisor.dev/issue/1485): Port all socket variants to VFS2. - table[307] = syscalls.PartiallySupported("sendmmsg", SendMMsg, "In process of porting socket syscalls to VFS2.", nil) - table[316] = syscalls.Supported("renameat2", Renameat2) - delete(table, 319) // memfd_create - table[322] = syscalls.Supported("execveat", Execveat) - table[327] = syscalls.Supported("preadv2", Preadv2) - table[328] = syscalls.Supported("pwritev2", Pwritev2) - table[332] = syscalls.Supported("statx", Statx) -} diff --git a/pkg/sentry/syscalls/linux/vfs2/linux64_override_arm64.go b/pkg/sentry/syscalls/linux/vfs2/linux64_override_arm64.go deleted file mode 100644 index a6b367468..000000000 --- a/pkg/sentry/syscalls/linux/vfs2/linux64_override_arm64.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build arm64 - -package vfs2 - -import ( - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/syscalls" -) - -// Override syscall table to add syscalls implementations from this package. -func Override(table map[uintptr]kernel.Syscall) { - table[63] = syscalls.Supported("read", Read) -} diff --git a/pkg/sentry/syscalls/linux/vfs2/timerfd.go b/pkg/sentry/syscalls/linux/vfs2/timerfd.go index 7938a5249..839a07db1 100644 --- a/pkg/sentry/syscalls/linux/vfs2/timerfd.go +++ b/pkg/sentry/syscalls/linux/vfs2/timerfd.go @@ -46,7 +46,10 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel default: return 0, nil, syserror.EINVAL } - file, err := t.Kernel().VFS().NewTimerFD(clock, fileFlags) + // Timerfds aren't writable per se (their implementation of Write just + // returns EINVAL), but they are "opened for writing", which is necessary + // to actually reach said implementation of Write. + file, err := t.Kernel().VFS().NewTimerFD(clock, linux.O_RDWR|fileFlags) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go new file mode 100644 index 000000000..f1b697844 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -0,0 +1,172 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package vfs2 provides syscall implementations that use VFS2. +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/sentry/syscalls" + "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" +) + +// Override syscall table to add syscalls implementations from this package. +func Override() { + // Override AMD64. + s := linux.AMD64 + s.Table[0] = syscalls.Supported("read", Read) + s.Table[1] = syscalls.Supported("write", Write) + s.Table[2] = syscalls.Supported("open", Open) + s.Table[3] = syscalls.Supported("close", Close) + s.Table[4] = syscalls.Supported("stat", Stat) + s.Table[5] = syscalls.Supported("fstat", Fstat) + s.Table[6] = syscalls.Supported("lstat", Lstat) + s.Table[7] = syscalls.Supported("poll", Poll) + s.Table[8] = syscalls.Supported("lseek", Lseek) + s.Table[9] = syscalls.Supported("mmap", Mmap) + s.Table[16] = syscalls.Supported("ioctl", Ioctl) + s.Table[17] = syscalls.Supported("pread64", Pread64) + s.Table[18] = syscalls.Supported("pwrite64", Pwrite64) + s.Table[19] = syscalls.Supported("readv", Readv) + s.Table[20] = syscalls.Supported("writev", Writev) + s.Table[21] = syscalls.Supported("access", Access) + s.Table[22] = syscalls.Supported("pipe", Pipe) + s.Table[23] = syscalls.Supported("select", Select) + s.Table[32] = syscalls.Supported("dup", Dup) + s.Table[33] = syscalls.Supported("dup2", Dup2) + delete(s.Table, 40) // sendfile + s.Table[41] = syscalls.Supported("socket", Socket) + s.Table[42] = syscalls.Supported("connect", Connect) + s.Table[43] = syscalls.Supported("accept", Accept) + s.Table[44] = syscalls.Supported("sendto", SendTo) + s.Table[45] = syscalls.Supported("recvfrom", RecvFrom) + s.Table[46] = syscalls.Supported("sendmsg", SendMsg) + s.Table[47] = syscalls.Supported("recvmsg", RecvMsg) + s.Table[48] = syscalls.Supported("shutdown", Shutdown) + s.Table[49] = syscalls.Supported("bind", Bind) + s.Table[50] = syscalls.Supported("listen", Listen) + s.Table[51] = syscalls.Supported("getsockname", GetSockName) + s.Table[52] = syscalls.Supported("getpeername", GetPeerName) + s.Table[53] = syscalls.Supported("socketpair", SocketPair) + s.Table[54] = syscalls.Supported("setsockopt", SetSockOpt) + s.Table[55] = syscalls.Supported("getsockopt", GetSockOpt) + s.Table[59] = syscalls.Supported("execve", Execve) + s.Table[72] = syscalls.Supported("fcntl", Fcntl) + delete(s.Table, 73) // flock + s.Table[74] = syscalls.Supported("fsync", Fsync) + s.Table[75] = syscalls.Supported("fdatasync", Fdatasync) + s.Table[76] = syscalls.Supported("truncate", Truncate) + s.Table[77] = syscalls.Supported("ftruncate", Ftruncate) + s.Table[78] = syscalls.Supported("getdents", Getdents) + s.Table[79] = syscalls.Supported("getcwd", Getcwd) + s.Table[80] = syscalls.Supported("chdir", Chdir) + s.Table[81] = syscalls.Supported("fchdir", Fchdir) + s.Table[82] = syscalls.Supported("rename", Rename) + s.Table[83] = syscalls.Supported("mkdir", Mkdir) + s.Table[84] = syscalls.Supported("rmdir", Rmdir) + s.Table[85] = syscalls.Supported("creat", Creat) + s.Table[86] = syscalls.Supported("link", Link) + s.Table[87] = syscalls.Supported("unlink", Unlink) + s.Table[88] = syscalls.Supported("symlink", Symlink) + s.Table[89] = syscalls.Supported("readlink", Readlink) + s.Table[90] = syscalls.Supported("chmod", Chmod) + s.Table[91] = syscalls.Supported("fchmod", Fchmod) + s.Table[92] = syscalls.Supported("chown", Chown) + s.Table[93] = syscalls.Supported("fchown", Fchown) + s.Table[94] = syscalls.Supported("lchown", Lchown) + s.Table[132] = syscalls.Supported("utime", Utime) + s.Table[133] = syscalls.Supported("mknod", Mknod) + s.Table[137] = syscalls.Supported("statfs", Statfs) + s.Table[138] = syscalls.Supported("fstatfs", Fstatfs) + s.Table[161] = syscalls.Supported("chroot", Chroot) + s.Table[162] = syscalls.Supported("sync", Sync) + delete(s.Table, 165) // mount + delete(s.Table, 166) // umount2 + delete(s.Table, 187) // readahead + s.Table[188] = syscalls.Supported("setxattr", Setxattr) + s.Table[189] = syscalls.Supported("lsetxattr", Lsetxattr) + s.Table[190] = syscalls.Supported("fsetxattr", Fsetxattr) + s.Table[191] = syscalls.Supported("getxattr", Getxattr) + s.Table[192] = syscalls.Supported("lgetxattr", Lgetxattr) + s.Table[193] = syscalls.Supported("fgetxattr", Fgetxattr) + s.Table[194] = syscalls.Supported("listxattr", Listxattr) + s.Table[195] = syscalls.Supported("llistxattr", Llistxattr) + s.Table[196] = syscalls.Supported("flistxattr", Flistxattr) + s.Table[197] = syscalls.Supported("removexattr", Removexattr) + s.Table[198] = syscalls.Supported("lremovexattr", Lremovexattr) + s.Table[199] = syscalls.Supported("fremovexattr", Fremovexattr) + delete(s.Table, 206) // io_setup + delete(s.Table, 207) // io_destroy + delete(s.Table, 208) // io_getevents + delete(s.Table, 209) // io_submit + delete(s.Table, 210) // io_cancel + s.Table[213] = syscalls.Supported("epoll_create", EpollCreate) + s.Table[217] = syscalls.Supported("getdents64", Getdents64) + delete(s.Table, 221) // fdavise64 + s.Table[232] = syscalls.Supported("epoll_wait", EpollWait) + s.Table[233] = syscalls.Supported("epoll_ctl", EpollCtl) + s.Table[235] = syscalls.Supported("utimes", Utimes) + delete(s.Table, 253) // inotify_init + delete(s.Table, 254) // inotify_add_watch + delete(s.Table, 255) // inotify_rm_watch + s.Table[257] = syscalls.Supported("openat", Openat) + s.Table[258] = syscalls.Supported("mkdirat", Mkdirat) + s.Table[259] = syscalls.Supported("mknodat", Mknodat) + s.Table[260] = syscalls.Supported("fchownat", Fchownat) + s.Table[261] = syscalls.Supported("futimens", Futimens) + s.Table[262] = syscalls.Supported("newfstatat", Newfstatat) + s.Table[263] = syscalls.Supported("unlinkat", Unlinkat) + s.Table[264] = syscalls.Supported("renameat", Renameat) + s.Table[265] = syscalls.Supported("linkat", Linkat) + s.Table[266] = syscalls.Supported("symlinkat", Symlinkat) + s.Table[267] = syscalls.Supported("readlinkat", Readlinkat) + s.Table[268] = syscalls.Supported("fchmodat", Fchmodat) + s.Table[269] = syscalls.Supported("faccessat", Faccessat) + s.Table[270] = syscalls.Supported("pselect", Pselect) + s.Table[271] = syscalls.Supported("ppoll", Ppoll) + delete(s.Table, 275) // splice + delete(s.Table, 276) // tee + s.Table[277] = syscalls.Supported("sync_file_range", SyncFileRange) + s.Table[280] = syscalls.Supported("utimensat", Utimensat) + s.Table[281] = syscalls.Supported("epoll_pwait", EpollPwait) + delete(s.Table, 282) // signalfd + s.Table[283] = syscalls.Supported("timerfd_create", TimerfdCreate) + s.Table[284] = syscalls.Supported("eventfd", Eventfd) + delete(s.Table, 285) // fallocate + s.Table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime) + s.Table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime) + s.Table[288] = syscalls.Supported("accept4", Accept4) + delete(s.Table, 289) // signalfd4 + s.Table[290] = syscalls.Supported("eventfd2", Eventfd2) + s.Table[291] = syscalls.Supported("epoll_create1", EpollCreate1) + s.Table[292] = syscalls.Supported("dup3", Dup3) + s.Table[293] = syscalls.Supported("pipe2", Pipe2) + delete(s.Table, 294) // inotify_init1 + s.Table[295] = syscalls.Supported("preadv", Preadv) + s.Table[296] = syscalls.Supported("pwritev", Pwritev) + s.Table[299] = syscalls.Supported("recvmmsg", RecvMMsg) + s.Table[306] = syscalls.Supported("syncfs", Syncfs) + s.Table[307] = syscalls.Supported("sendmmsg", SendMMsg) + s.Table[316] = syscalls.Supported("renameat2", Renameat2) + delete(s.Table, 319) // memfd_create + s.Table[322] = syscalls.Supported("execveat", Execveat) + s.Table[327] = syscalls.Supported("preadv2", Preadv2) + s.Table[328] = syscalls.Supported("pwritev2", Pwritev2) + s.Table[332] = syscalls.Supported("statx", Statx) + s.Init() + + // Override ARM64. + s = linux.ARM64 + s.Table[63] = syscalls.Supported("read", Read) + s.Init() +} diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go index 4320ad17f..ab1d140d2 100644 --- a/pkg/sentry/usage/memory.go +++ b/pkg/sentry/usage/memory.go @@ -252,18 +252,23 @@ func (m *MemoryLocked) Copy() (MemoryStats, uint64) { return ms, m.totalLocked() } -// MinimumTotalMemoryBytes is the minimum reported total system memory. -// -// This can be configured through options provided to the Sentry at start. -// This number is purely synthetic. This is only set before the application -// starts executing, and must not be modified. -var MinimumTotalMemoryBytes uint64 = 2 << 30 // 2 GB +// These options control how much total memory the is reported to the application. +// They may only be set before the application starts executing, and must not +// be modified. +var ( + // MinimumTotalMemoryBytes is the minimum reported total system memory. + MinimumTotalMemoryBytes uint64 = 2 << 30 // 2 GB + + // MaximumTotalMemoryBytes is the maximum reported total system memory. + // The 0 value indicates no maximum. + MaximumTotalMemoryBytes uint64 +) // TotalMemory returns the "total usable memory" available. // // This number doesn't really have a true value so it's based on the following -// inputs and further bounded to be above some minimum guaranteed value (2GB), -// additionally ensuring that total memory reported is always less than used. +// inputs and further bounded to be above the MinumumTotalMemoryBytes and below +// MaximumTotalMemoryBytes. // // memSize should be the platform.Memory size reported by platform.Memory.TotalSize() // used is the total memory reported by MemoryLocked.Total() @@ -279,5 +284,8 @@ func TotalMemory(memSize, used uint64) uint64 { memSize = uint64(1) << (uint(msb) + 1) } } + if MaximumTotalMemoryBytes > 0 && memSize > MaximumTotalMemoryBytes { + memSize = MaximumTotalMemoryBytes + } return memSize } diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index 981bd8caa..adebaeefb 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -91,8 +91,6 @@ func (fs *anonFilesystem) Sync(ctx context.Context) error { } // AccessAt implements vfs.Filesystem.Impl.AccessAt. -// -// TODO(gvisor.dev/issue/1965): Implement access permissions. func (fs *anonFilesystem) AccessAt(ctx context.Context, rp *ResolvingPath, creds *auth.Credentials, ats AccessTypes) error { if !rp.Done() { return syserror.ENOTDIR diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 418d69b96..cfabd936c 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -91,10 +91,6 @@ type FileDescriptionOptions struct { // ESPIPE. DenyPWrite bool - // if InvalidWrite is true, calls to FileDescription.Write() return - // EINVAL. - InvalidWrite bool - // If UseDentryMetadata is true, calls to FileDescription methods that // interact with file and filesystem metadata (Stat, SetStat, StatFS, // Listxattr, Getxattr, Setxattr, Removexattr) are implemented by calling @@ -570,9 +566,6 @@ func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, o // Write is similar to PWrite, but does not specify an offset. func (fd *FileDescription) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { - if fd.opts.InvalidWrite { - return 0, syserror.EINVAL - } if !fd.writable { return 0, syserror.EBADF } diff --git a/pkg/sentry/vfs/timerfd.go b/pkg/sentry/vfs/timerfd.go index 42b880656..cc536ceaf 100644 --- a/pkg/sentry/vfs/timerfd.go +++ b/pkg/sentry/vfs/timerfd.go @@ -53,7 +53,6 @@ func (vfs *VirtualFilesystem) NewTimerFD(clock ktime.Clock, flags uint32) (*File UseDentryMetadata: true, DenyPRead: true, DenyPWrite: true, - InvalidWrite: true, }); err != nil { return nil, err } diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go index 0799c8f4d..da1c520ae 100644 --- a/pkg/tcpip/link/sniffer/sniffer.go +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -391,7 +391,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P break } udp := header.UDP(hdr) - if fragmentOffset == 0 && len(udp) >= header.UDPMinimumSize { + if fragmentOffset == 0 { srcPort = udp.SourcePort() dstPort = udp.DestinationPort() details = fmt.Sprintf("xsum: 0x%x", udp.Checksum()) @@ -405,14 +405,14 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P break } tcp := header.TCP(hdr) - if fragmentOffset == 0 && len(tcp) >= header.TCPMinimumSize { + if fragmentOffset == 0 { offset := int(tcp.DataOffset()) if offset < header.TCPMinimumSize { details += fmt.Sprintf("invalid packet: tcp data offset too small %d", offset) break } - if offset > len(tcp) && !moreFragments { - details += fmt.Sprintf("invalid packet: tcp data offset %d larger than packet buffer length %d", offset, len(tcp)) + if offset > vv.Size() && !moreFragments { + details += fmt.Sprintf("invalid packet: tcp data offset %d larger than packet buffer length %d", offset, vv.Size()) break } diff --git a/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go b/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go index 8b4213eec..d199ded6a 100644 --- a/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go +++ b/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Code generated by "stringer -type=DHCPv6ConfigurationFromNDPRA"; DO NOT EDIT. +// Code generated by "stringer -type DHCPv6ConfigurationFromNDPRA"; DO NOT EDIT. package stack @@ -22,9 +22,9 @@ func _() { // An "invalid array index" compiler error signifies that the constant values have changed. // Re-run the stringer command to generate them again. var x [1]struct{} - _ = x[DHCPv6NoConfiguration-0] - _ = x[DHCPv6ManagedAddress-1] - _ = x[DHCPv6OtherConfigurations-2] + _ = x[DHCPv6NoConfiguration-1] + _ = x[DHCPv6ManagedAddress-2] + _ = x[DHCPv6OtherConfigurations-3] } const _DHCPv6ConfigurationFromNDPRA_name = "DHCPv6NoConfigurationDHCPv6ManagedAddressDHCPv6OtherConfigurations" @@ -32,8 +32,9 @@ const _DHCPv6ConfigurationFromNDPRA_name = "DHCPv6NoConfigurationDHCPv6ManagedAd var _DHCPv6ConfigurationFromNDPRA_index = [...]uint8{0, 21, 41, 66} func (i DHCPv6ConfigurationFromNDPRA) String() string { + i -= 1 if i < 0 || i >= DHCPv6ConfigurationFromNDPRA(len(_DHCPv6ConfigurationFromNDPRA_index)-1) { - return "DHCPv6ConfigurationFromNDPRA(" + strconv.FormatInt(int64(i), 10) + ")" + return "DHCPv6ConfigurationFromNDPRA(" + strconv.FormatInt(int64(i+1), 10) + ")" } return _DHCPv6ConfigurationFromNDPRA_name[_DHCPv6ConfigurationFromNDPRA_index[i]:_DHCPv6ConfigurationFromNDPRA_index[i+1]] } diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go index 15343acbc..526c7d6ff 100644 --- a/pkg/tcpip/stack/ndp.go +++ b/pkg/tcpip/stack/ndp.go @@ -199,9 +199,11 @@ var ( type DHCPv6ConfigurationFromNDPRA int const ( + _ DHCPv6ConfigurationFromNDPRA = iota + // DHCPv6NoConfiguration indicates that no configurations are available via // DHCPv6. - DHCPv6NoConfiguration DHCPv6ConfigurationFromNDPRA = iota + DHCPv6NoConfiguration // DHCPv6ManagedAddress indicates that addresses are available via DHCPv6. // @@ -315,9 +317,6 @@ type NDPDispatcher interface { // OnDHCPv6Configuration will be called with an updated configuration that is // available via DHCPv6 for a specified NIC. // - // NDPDispatcher assumes that the initial configuration available by DHCPv6 is - // DHCPv6NoConfiguration. - // // This function is not permitted to block indefinitely. It must not // call functions on the stack itself. OnDHCPv6Configuration(tcpip.NICID, DHCPv6ConfigurationFromNDPRA) @@ -1808,6 +1807,8 @@ func (ndp *ndpState) cleanupState(hostOnly bool) { if got := len(ndp.defaultRouters); got != 0 { panic(fmt.Sprintf("ndp: still have discovered default routers after cleaning up; found = %d", got)) } + + ndp.dhcpv6Configuration = 0 } // startSolicitingRouters starts soliciting routers, as per RFC 4861 section diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index 67f012840..b3d174cdd 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -4888,7 +4888,12 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) { } } - // The initial DHCPv6 configuration should be stack.DHCPv6NoConfiguration. + // Even if the first RA reports no DHCPv6 configurations are available, the + // dispatcher should get an event. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, false)) + expectDHCPv6Event(stack.DHCPv6NoConfiguration) + // Receiving the same update again should not result in an event to the + // dispatcher. e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, false)) expectNoDHCPv6Event() @@ -4896,8 +4901,6 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) { // Configurations. e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true)) expectDHCPv6Event(stack.DHCPv6OtherConfigurations) - // Receiving the same update again should not result in an event to the - // NDPDispatcher. e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true)) expectNoDHCPv6Event() @@ -4933,6 +4936,21 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) { expectDHCPv6Event(stack.DHCPv6OtherConfigurations) e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true)) expectNoDHCPv6Event() + + // Cycling the NIC should cause the last DHCPv6 configuration to be cleared. + if err := s.DisableNIC(nicID); err != nil { + t.Fatalf("s.DisableNIC(%d): %s", nicID, err) + } + if err := s.EnableNIC(nicID); err != nil { + t.Fatalf("s.EnableNIC(%d): %s", nicID, err) + } + + // Receive an RA that updates the DHCPv6 configuration to Other + // Configurations. + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true)) + expectDHCPv6Event(stack.DHCPv6OtherConfigurations) + e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true)) + expectNoDHCPv6Event() } // TestRouterSolicitation tests the initial Router Solicitations that are sent diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index d8cfe3115..a3018914b 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -41,6 +41,10 @@ const ( // nDupAckThreshold is the number of duplicate ACK's required // before fast-retransmit is entered. nDupAckThreshold = 3 + + // MaxRetries is the maximum number of probe retries sender does + // before timing out the connection, Linux default TCP_RETR2. + MaxRetries = 15 ) // ccState indicates the current congestion control state for this sender. @@ -138,6 +142,14 @@ type sender struct { // the first segment that was retransmitted due to RTO expiration. firstRetransmittedSegXmitTime time.Time `state:".(unixTime)"` + // zeroWindowProbing is set if the sender is currently probing + // for zero receive window. + zeroWindowProbing bool `state:"nosave"` + + // unackZeroWindowProbes is the number of unacknowledged zero + // window probes. + unackZeroWindowProbes uint32 `state:"nosave"` + closed bool writeNext *segment writeList segmentList @@ -479,10 +491,24 @@ func (s *sender) retransmitTimerExpired() bool { remaining = uto - elapsed } - if remaining <= 0 || s.rto >= MaxRTO { + // Always honor the user-timeout irrespective of whether the zero + // window probes were acknowledged. + // net/ipv4/tcp_timer.c::tcp_probe_timer() + if remaining <= 0 || s.unackZeroWindowProbes >= MaxRetries { return false } + if s.rto >= MaxRTO { + // RFC 1122 section: 4.2.2.17 + // A TCP MAY keep its offered receive window closed + // indefinitely. As long as the receiving TCP continues to + // send acknowledgments in response to the probe segments, the + // sending TCP MUST allow the connection to stay open. + if !(s.zeroWindowProbing && s.unackZeroWindowProbes == 0) { + return false + } + } + // Set new timeout. The timer will be restarted by the call to sendData // below. s.rto *= 2 @@ -533,6 +559,15 @@ func (s *sender) retransmitTimerExpired() bool { // information is usable after an RTO. s.ep.scoreboard.Reset() s.writeNext = s.writeList.Front() + + // RFC 1122 4.2.2.17: Start sending zero window probes when we still see a + // zero receive window after retransmission interval and we have data to + // send. + if s.zeroWindowProbing { + s.sendZeroWindowProbe() + return true + } + s.sendData() return true @@ -827,6 +862,34 @@ func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) return dataSent } +func (s *sender) sendZeroWindowProbe() { + ack, win := s.ep.rcv.getSendParams() + s.unackZeroWindowProbes++ + // Send a zero window probe with sequence number pointing to + // the last acknowledged byte. + s.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck, s.sndUna-1, ack, win) + // Rearm the timer to continue probing. + s.resendTimer.enable(s.rto) +} + +func (s *sender) enableZeroWindowProbing() { + s.zeroWindowProbing = true + // We piggyback the probing on the retransmit timer with the + // current retranmission interval, as we may start probing while + // segment retransmissions. + if s.firstRetransmittedSegXmitTime.IsZero() { + s.firstRetransmittedSegXmitTime = time.Now() + } + s.resendTimer.enable(s.rto) +} + +func (s *sender) disableZeroWindowProbing() { + s.zeroWindowProbing = false + s.unackZeroWindowProbes = 0 + s.firstRetransmittedSegXmitTime = time.Time{} + s.resendTimer.disable() +} + // sendData sends new data segments. It is called when data becomes available or // when the send window opens up. func (s *sender) sendData() { @@ -875,6 +938,13 @@ func (s *sender) sendData() { s.ep.disableKeepaliveTimer() } + // If the sender has advertized zero receive window and we have + // data to be sent out, start zero window probing to query the + // the remote for it's receive window size. + if s.writeNext != nil && s.sndWnd == 0 { + s.enableZeroWindowProbing() + } + // Enable the timer if we have pending data and it's not enabled yet. if !s.resendTimer.enabled() && s.sndUna != s.sndNxt { s.resendTimer.enable(s.rto) @@ -1122,8 +1192,26 @@ func (s *sender) handleRcvdSegment(seg *segment) { // Stash away the current window size. s.sndWnd = seg.window - // Ignore ack if it doesn't acknowledge any new data. ack := seg.ackNumber + + // Disable zero window probing if remote advertizes a non-zero receive + // window. This can be with an ACK to the zero window probe (where the + // acknumber refers to the already acknowledged byte) OR to any previously + // unacknowledged segment. + if s.zeroWindowProbing && seg.window > 0 && + (ack == s.sndUna || (ack-1).InRange(s.sndUna, s.sndNxt)) { + s.disableZeroWindowProbing() + } + + // On receiving the ACK for the zero window probe, account for it and + // skip trying to send any segment as we are still probing for + // receive window to become non-zero. + if s.zeroWindowProbing && s.unackZeroWindowProbes > 0 && ack == s.sndUna { + s.unackZeroWindowProbes-- + return + } + + // Ignore ack if it doesn't acknowledge any new data. if (ack - 1).InRange(s.sndUna, s.sndNxt) { s.dupAckCount = 0 @@ -1143,7 +1231,7 @@ func (s *sender) handleRcvdSegment(seg *segment) { } // When an ack is received we must rearm the timer. - // RFC 6298 5.2 + // RFC 6298 5.3 s.resendTimer.enable(s.rto) // Remove all acknowledged data from the write list. diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 33e2b9a09..49e4ba214 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -1900,7 +1900,7 @@ func TestZeroWindowSend(t *testing.T) { c := context.New(t, defaultMTU) defer c.Cleanup() - c.CreateConnected(789, 0, -1 /* epRcvBuf */) + c.CreateConnected(789 /* iss */, 0 /* rcvWnd */, -1 /* epRcvBuf */) data := []byte{1, 2, 3} view := buffer.NewView(len(data)) @@ -1911,8 +1911,17 @@ func TestZeroWindowSend(t *testing.T) { t.Fatalf("Write failed: %v", err) } - // Since the window is currently zero, check that no packet is received. - c.CheckNoPacket("Packet received when window is zero") + // Check if we got a zero-window probe. + b := c.GetPacket() + checker.IPv4(t, b, + checker.PayloadLen(header.TCPMinimumSize), + checker.TCP( + checker.DstPort(context.TestPort), + checker.SeqNum(uint32(c.IRS)), + checker.AckNum(790), + checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)), + ), + ) // Open up the window. Data should be received now. c.SendPacket(nil, &context.Headers{ @@ -1925,7 +1934,7 @@ func TestZeroWindowSend(t *testing.T) { }) // Check that data is received. - b := c.GetPacket() + b = c.GetPacket() checker.IPv4(t, b, checker.PayloadLen(len(data)+header.TCPMinimumSize), checker.TCP( |