summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/syscalls
diff options
context:
space:
mode:
authorKevin Krakauer <krakauer@google.com>2019-06-12 15:21:22 -0700
committerKevin Krakauer <krakauer@google.com>2019-06-12 15:21:22 -0700
commit0bbbcafd68154e7c7b46692b84a39fb6bb5f1568 (patch)
treed8fba01ad76900715665b0418a786de2d77e2a05 /pkg/sentry/syscalls
parent06a83df533244dc2b3b8adfc1bf0608d3753c1d9 (diff)
parent70578806e8d3e01fae2249b3e602cd5b05d378a0 (diff)
Merge branch 'master' into iptables-1-pkg
Change-Id: I7457a11de4725e1bf3811420c505d225b1cb6943
Diffstat (limited to 'pkg/sentry/syscalls')
-rw-r--r--pkg/sentry/syscalls/linux/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/error.go4
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go756
-rw-r--r--pkg/sentry/syscalls/linux/sys_mempolicy.go312
-rw-r--r--pkg/sentry/syscalls/linux/sys_mmap.go145
-rw-r--r--pkg/sentry/syscalls/linux/sys_prctl.go33
-rw-r--r--pkg/sentry/syscalls/linux/sys_socket.go4
-rw-r--r--pkg/sentry/syscalls/syscalls.go88
8 files changed, 754 insertions, 589 deletions
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index f76989ae2..1c057526b 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -19,6 +19,7 @@ go_library(
"sys_identity.go",
"sys_inotify.go",
"sys_lseek.go",
+ "sys_mempolicy.go",
"sys_mmap.go",
"sys_mount.go",
"sys_pipe.go",
diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go
index 1ba3695fb..72146ea63 100644
--- a/pkg/sentry/syscalls/linux/error.go
+++ b/pkg/sentry/syscalls/linux/error.go
@@ -92,6 +92,10 @@ func handleIOError(t *kernel.Task, partialResult bool, err, intr error, op strin
// TODO(gvisor.dev/issue/161): In some cases SIGPIPE should
// also be sent to the application.
return nil
+ case syserror.ECONNRESET:
+ // For TCP sendfile connections, we may have a reset. But we
+ // should just return n as the result.
+ return nil
case syserror.ErrWouldBlock:
// Syscall would block, but completed a partial read/write.
// This case should only be returned by IssueIO for nonblocking
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 3e4d312af..5251c2463 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -34,33 +34,6 @@ const _AUDIT_ARCH_X86_64 = 0xc000003e
// AMD64 is a table of Linux amd64 syscall API with the corresponding syscall
// numbers from Linux 4.4. The entries commented out are those syscalls we
// don't currently support.
-//
-// Syscall support is documented as annotations in Go comments of the form:
-// @Syscall(<name>, <key:value>, ...)
-//
-// Supported args and values are:
-//
-// - arg: A syscall option. This entry only applies to the syscall when given
-// this option.
-// - support: Indicates support level
-// - UNIMPLEMENTED: Unimplemented (default, implies returns:ENOSYS)
-// - PARTIAL: Partial support. Details should be provided in note.
-// - FULL: Full support
-// - returns: Indicates a known return value. Values are syscall errors. This
-// is treated as a string so you can use something like
-// "returns:EPERM or ENOSYS".
-// - issue: A Github issue number.
-// - note: A note
-//
-// Example:
-// // @Syscall(mmap, arg:MAP_PRIVATE, support:FULL, note:Private memory fully supported)
-// // @Syscall(mmap, arg:MAP_SHARED, issue:123, note:Shared memory not supported)
-// // @Syscall(setxattr, returns:ENOTSUP, note:Requires file system support)
-//
-// Annotations should be placed as close to their implementation as possible
-// (preferrably as part of a supporting function's Godoc) and should be
-// updated as syscall support changes. Unimplemented syscalls are documented
-// here due to their lack of a supporting function or method.
var AMD64 = &kernel.SyscallTable{
OS: abi.Linux,
Arch: arch.AMD64,
@@ -74,405 +47,338 @@ var AMD64 = &kernel.SyscallTable{
Version: "#1 SMP Sun Jan 10 15:06:54 PST 2016",
},
AuditNumber: _AUDIT_ARCH_X86_64,
- Table: map[uintptr]kernel.SyscallFn{
- 0: Read,
- 1: Write,
- 2: Open,
- 3: Close,
- 4: Stat,
- 5: Fstat,
- 6: Lstat,
- 7: Poll,
- 8: Lseek,
- 9: Mmap,
- 10: Mprotect,
- 11: Munmap,
- 12: Brk,
- 13: RtSigaction,
- 14: RtSigprocmask,
- 15: RtSigreturn,
- 16: Ioctl,
- 17: Pread64,
- 18: Pwrite64,
- 19: Readv,
- 20: Writev,
- 21: Access,
- 22: Pipe,
- 23: Select,
- 24: SchedYield,
- 25: Mremap,
- 26: Msync,
- 27: Mincore,
- 28: Madvise,
- 29: Shmget,
- 30: Shmat,
- 31: Shmctl,
- 32: Dup,
- 33: Dup2,
- 34: Pause,
- 35: Nanosleep,
- 36: Getitimer,
- 37: Alarm,
- 38: Setitimer,
- 39: Getpid,
- 40: Sendfile,
- 41: Socket,
- 42: Connect,
- 43: Accept,
- 44: SendTo,
- 45: RecvFrom,
- 46: SendMsg,
- 47: RecvMsg,
- 48: Shutdown,
- 49: Bind,
- 50: Listen,
- 51: GetSockName,
- 52: GetPeerName,
- 53: SocketPair,
- 54: SetSockOpt,
- 55: GetSockOpt,
- 56: Clone,
- 57: Fork,
- 58: Vfork,
- 59: Execve,
- 60: Exit,
- 61: Wait4,
- 62: Kill,
- 63: Uname,
- 64: Semget,
- 65: Semop,
- 66: Semctl,
- 67: Shmdt,
- // 68: @Syscall(Msgget), TODO(b/29354921)
- // 69: @Syscall(Msgsnd), TODO(b/29354921)
- // 70: @Syscall(Msgrcv), TODO(b/29354921)
- // 71: @Syscall(Msgctl), TODO(b/29354921)
- 72: Fcntl,
- 73: Flock,
- 74: Fsync,
- 75: Fdatasync,
- 76: Truncate,
- 77: Ftruncate,
- 78: Getdents,
- 79: Getcwd,
- 80: Chdir,
- 81: Fchdir,
- 82: Rename,
- 83: Mkdir,
- 84: Rmdir,
- 85: Creat,
- 86: Link,
- 87: Unlink,
- 88: Symlink,
- 89: Readlink,
- 90: Chmod,
- 91: Fchmod,
- 92: Chown,
- 93: Fchown,
- 94: Lchown,
- 95: Umask,
- 96: Gettimeofday,
- 97: Getrlimit,
- 98: Getrusage,
- 99: Sysinfo,
- 100: Times,
- 101: Ptrace,
- 102: Getuid,
- 103: Syslog,
- 104: Getgid,
- 105: Setuid,
- 106: Setgid,
- 107: Geteuid,
- 108: Getegid,
- 109: Setpgid,
- 110: Getppid,
- 111: Getpgrp,
- 112: Setsid,
- 113: Setreuid,
- 114: Setregid,
- 115: Getgroups,
- 116: Setgroups,
- 117: Setresuid,
- 118: Getresuid,
- 119: Setresgid,
- 120: Getresgid,
- 121: Getpgid,
- // 122: @Syscall(Setfsuid), TODO(b/112851702)
- // 123: @Syscall(Setfsgid), TODO(b/112851702)
- 124: Getsid,
- 125: Capget,
- 126: Capset,
- 127: RtSigpending,
- 128: RtSigtimedwait,
- 129: RtSigqueueinfo,
- 130: RtSigsuspend,
- 131: Sigaltstack,
- 132: Utime,
- 133: Mknod,
- // @Syscall(Uselib, note:Obsolete)
- 134: syscalls.Error(syscall.ENOSYS),
- // @Syscall(SetPersonality, returns:EINVAL, note:Unable to change personality)
- 135: syscalls.ErrorWithEvent(syscall.EINVAL),
- // @Syscall(Ustat, note:Needs filesystem support)
- 136: syscalls.ErrorWithEvent(syscall.ENOSYS),
- 137: Statfs,
- 138: Fstatfs,
- // 139: @Syscall(Sysfs), TODO(gvisor.dev/issue/165)
- 140: Getpriority,
- 141: Setpriority,
- // @Syscall(SchedSetparam, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_nice; ENOSYS otherwise)
- 142: syscalls.CapError(linux.CAP_SYS_NICE), // requires cap_sys_nice
- 143: SchedGetparam,
- 144: SchedSetscheduler,
- 145: SchedGetscheduler,
- 146: SchedGetPriorityMax,
- 147: SchedGetPriorityMin,
- // @Syscall(SchedRrGetInterval, returns:EPERM)
- 148: syscalls.ErrorWithEvent(syscall.EPERM),
- 149: Mlock,
- 150: Munlock,
- 151: Mlockall,
- 152: Munlockall,
- // @Syscall(Vhangup, returns:EPERM)
- 153: syscalls.CapError(linux.CAP_SYS_TTY_CONFIG),
- // @Syscall(ModifyLdt, returns:EPERM)
- 154: syscalls.Error(syscall.EPERM),
- // @Syscall(PivotRoot, returns:EPERM)
- 155: syscalls.Error(syscall.EPERM),
- // @Syscall(Sysctl, returns:EPERM)
- 156: syscalls.Error(syscall.EPERM), // syscall is "worthless"
- 157: Prctl,
- 158: ArchPrctl,
- // @Syscall(Adjtimex, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_time; ENOSYS otherwise)
- 159: syscalls.CapError(linux.CAP_SYS_TIME), // requires cap_sys_time
- 160: Setrlimit,
- 161: Chroot,
- 162: Sync,
- // @Syscall(Acct, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_pacct; ENOSYS otherwise)
- 163: syscalls.CapError(linux.CAP_SYS_PACCT), // requires cap_sys_pacct
- // @Syscall(Settimeofday, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_time; ENOSYS otherwise)
- 164: syscalls.CapError(linux.CAP_SYS_TIME), // requires cap_sys_time
- 165: Mount,
- 166: Umount2,
- // @Syscall(Swapon, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_admin; ENOSYS otherwise)
- 167: syscalls.CapError(linux.CAP_SYS_ADMIN), // requires cap_sys_admin
- // @Syscall(Swapoff, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_admin; ENOSYS otherwise)
- 168: syscalls.CapError(linux.CAP_SYS_ADMIN), // requires cap_sys_admin
- // @Syscall(Reboot, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_boot; ENOSYS otherwise)
- 169: syscalls.CapError(linux.CAP_SYS_BOOT), // requires cap_sys_boot
- 170: Sethostname,
- 171: Setdomainname,
- // @Syscall(Iopl, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_rawio; ENOSYS otherwise)
- 172: syscalls.CapError(linux.CAP_SYS_RAWIO), // requires cap_sys_rawio
- // @Syscall(Ioperm, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_rawio; ENOSYS otherwise)
- 173: syscalls.CapError(linux.CAP_SYS_RAWIO), // requires cap_sys_rawio
- // @Syscall(CreateModule, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_module; ENOSYS otherwise)
- 174: syscalls.CapError(linux.CAP_SYS_MODULE), // CreateModule, requires cap_sys_module
- // @Syscall(InitModule, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_module; ENOSYS otherwise)
- 175: syscalls.CapError(linux.CAP_SYS_MODULE), // requires cap_sys_module
- // @Syscall(DeleteModule, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_module; ENOSYS otherwise)
- 176: syscalls.CapError(linux.CAP_SYS_MODULE), // requires cap_sys_module
- // @Syscall(GetKernelSyms, note:Not supported in > 2.6)
- 177: syscalls.Error(syscall.ENOSYS),
- // @Syscall(QueryModule, note:Not supported in > 2.6)
- 178: syscalls.Error(syscall.ENOSYS),
- // @Syscall(Quotactl, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_admin; ENOSYS otherwise)
- 179: syscalls.CapError(linux.CAP_SYS_ADMIN), // requires cap_sys_admin (most operations)
- // @Syscall(Nfsservctl, note:Does not exist > 3.1)
- 180: syscalls.Error(syscall.ENOSYS),
- // @Syscall(Getpmsg, note:Not implemented in Linux)
- 181: syscalls.Error(syscall.ENOSYS),
- // @Syscall(Putpmsg, note:Not implemented in Linux)
- 182: syscalls.Error(syscall.ENOSYS),
- // @Syscall(AfsSyscall, note:Not implemented in Linux)
- 183: syscalls.Error(syscall.ENOSYS),
- // @Syscall(Tuxcall, note:Not implemented in Linux)
- 184: syscalls.Error(syscall.ENOSYS),
- // @Syscall(Security, note:Not implemented in Linux)
- 185: syscalls.Error(syscall.ENOSYS),
- 186: Gettid,
- 187: nil, // @Syscall(Readahead), TODO(b/29351341)
- // @Syscall(Setxattr, returns:ENOTSUP, note:Requires filesystem support)
- 188: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Lsetxattr, returns:ENOTSUP, note:Requires filesystem support)
- 189: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Fsetxattr, returns:ENOTSUP, note:Requires filesystem support)
- 190: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Getxattr, returns:ENOTSUP, note:Requires filesystem support)
- 191: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Lgetxattr, returns:ENOTSUP, note:Requires filesystem support)
- 192: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Fgetxattr, returns:ENOTSUP, note:Requires filesystem support)
- 193: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Listxattr, returns:ENOTSUP, note:Requires filesystem support)
- 194: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Llistxattr, returns:ENOTSUP, note:Requires filesystem support)
- 195: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Flistxattr, returns:ENOTSUP, note:Requires filesystem support)
- 196: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Removexattr, returns:ENOTSUP, note:Requires filesystem support)
- 197: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Lremovexattr, returns:ENOTSUP, note:Requires filesystem support)
- 198: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- // @Syscall(Fremovexattr, returns:ENOTSUP, note:Requires filesystem support)
- 199: syscalls.ErrorWithEvent(syscall.ENOTSUP),
- 200: Tkill,
- 201: Time,
- 202: Futex,
- 203: SchedSetaffinity,
- 204: SchedGetaffinity,
- // @Syscall(SetThreadArea, note:Expected to return ENOSYS on 64-bit)
- 205: syscalls.Error(syscall.ENOSYS),
- 206: IoSetup,
- 207: IoDestroy,
- 208: IoGetevents,
- 209: IoSubmit,
- 210: IoCancel,
- // @Syscall(GetThreadArea, note:Expected to return ENOSYS on 64-bit)
- 211: syscalls.Error(syscall.ENOSYS),
- // @Syscall(LookupDcookie, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_admin; ENOSYS otherwise)
- 212: syscalls.CapError(linux.CAP_SYS_ADMIN), // requires cap_sys_admin
- 213: EpollCreate,
- // @Syscall(EpollCtlOld, note:Deprecated)
- 214: syscalls.ErrorWithEvent(syscall.ENOSYS), // deprecated (afaik, unused)
- // @Syscall(EpollWaitOld, note:Deprecated)
- 215: syscalls.ErrorWithEvent(syscall.ENOSYS), // deprecated (afaik, unused)
- // @Syscall(RemapFilePages, note:Deprecated)
- 216: syscalls.ErrorWithEvent(syscall.ENOSYS), // deprecated since 3.16
- 217: Getdents64,
- 218: SetTidAddress,
- 219: RestartSyscall,
- // 220: @Syscall(Semtimedop), TODO(b/29354920)
- 221: Fadvise64,
- 222: TimerCreate,
- 223: TimerSettime,
- 224: TimerGettime,
- 225: TimerGetoverrun,
- 226: TimerDelete,
- 227: ClockSettime,
- 228: ClockGettime,
- 229: ClockGetres,
- 230: ClockNanosleep,
- 231: ExitGroup,
- 232: EpollWait,
- 233: EpollCtl,
- 234: Tgkill,
- 235: Utimes,
- // @Syscall(Vserver, note:Not implemented by Linux)
- 236: syscalls.Error(syscall.ENOSYS), // Vserver, not implemented by Linux
- // @Syscall(Mbind, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_nice; ENOSYS otherwise), TODO(b/117792295)
- 237: syscalls.CapError(linux.CAP_SYS_NICE), // may require cap_sys_nice
- 238: SetMempolicy,
- 239: GetMempolicy,
- // 240: @Syscall(MqOpen), TODO(b/29354921)
- // 241: @Syscall(MqUnlink), TODO(b/29354921)
- // 242: @Syscall(MqTimedsend), TODO(b/29354921)
- // 243: @Syscall(MqTimedreceive), TODO(b/29354921)
- // 244: @Syscall(MqNotify), TODO(b/29354921)
- // 245: @Syscall(MqGetsetattr), TODO(b/29354921)
- 246: syscalls.CapError(linux.CAP_SYS_BOOT), // kexec_load, requires cap_sys_boot
- 247: Waitid,
- // @Syscall(AddKey, returns:EACCES, note:Not available to user)
- 248: syscalls.Error(syscall.EACCES),
- // @Syscall(RequestKey, returns:EACCES, note:Not available to user)
- 249: syscalls.Error(syscall.EACCES),
- // @Syscall(Keyctl, returns:EACCES, note:Not available to user)
- 250: syscalls.Error(syscall.EACCES),
- // @Syscall(IoprioSet, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_admin; ENOSYS otherwise)
- 251: syscalls.CapError(linux.CAP_SYS_ADMIN), // requires cap_sys_nice or cap_sys_admin (depending)
- // @Syscall(IoprioGet, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_admin; ENOSYS otherwise)
- 252: syscalls.CapError(linux.CAP_SYS_ADMIN), // requires cap_sys_nice or cap_sys_admin (depending)
- 253: InotifyInit,
- 254: InotifyAddWatch,
- 255: InotifyRmWatch,
- // @Syscall(MigratePages, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_nice; ENOSYS otherwise)
- 256: syscalls.CapError(linux.CAP_SYS_NICE),
- 257: Openat,
- 258: Mkdirat,
- 259: Mknodat,
- 260: Fchownat,
- 261: Futimesat,
- 262: Fstatat,
- 263: Unlinkat,
- 264: Renameat,
- 265: Linkat,
- 266: Symlinkat,
- 267: Readlinkat,
- 268: Fchmodat,
- 269: Faccessat,
- 270: Pselect,
- 271: Ppoll,
- 272: Unshare,
- // @Syscall(SetRobustList, note:Obsolete)
- 273: syscalls.Error(syscall.ENOSYS),
- // @Syscall(GetRobustList, note:Obsolete)
- 274: syscalls.Error(syscall.ENOSYS),
- 275: Splice,
- // 276: @Syscall(Tee), TODO(b/29354098)
- 277: SyncFileRange,
- // 278: @Syscall(Vmsplice), TODO(b/29354098)
- // @Syscall(MovePages, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_nice; ENOSYS otherwise)
- 279: syscalls.CapError(linux.CAP_SYS_NICE), // requires cap_sys_nice (mostly)
- 280: Utimensat,
- 281: EpollPwait,
- // 282: @Syscall(Signalfd), TODO(b/19846426)
- 283: TimerfdCreate,
- 284: Eventfd,
- 285: Fallocate,
- 286: TimerfdSettime,
- 287: TimerfdGettime,
- 288: Accept4,
- // 289: @Syscall(Signalfd4), TODO(b/19846426)
- 290: Eventfd2,
- 291: EpollCreate1,
- 292: Dup3,
- 293: Pipe2,
- 294: InotifyInit1,
- 295: Preadv,
- 296: Pwritev,
- 297: RtTgsigqueueinfo,
- // @Syscall(PerfEventOpen, returns:ENODEV, note:No support for perf counters)
- 298: syscalls.ErrorWithEvent(syscall.ENODEV),
- 299: RecvMMsg,
- // @Syscall(FanotifyInit, note:Needs CONFIG_FANOTIFY)
- 300: syscalls.ErrorWithEvent(syscall.ENOSYS),
- // @Syscall(FanotifyMark, note:Needs CONFIG_FANOTIFY)
- 301: syscalls.ErrorWithEvent(syscall.ENOSYS),
- 302: Prlimit64,
- // @Syscall(NameToHandleAt, returns:EOPNOTSUPP, note:Needs filesystem support)
- 303: syscalls.ErrorWithEvent(syscall.EOPNOTSUPP),
- // @Syscall(OpenByHandleAt, returns:EOPNOTSUPP, note:Needs filesystem support)
- 304: syscalls.ErrorWithEvent(syscall.EOPNOTSUPP),
- // @Syscall(ClockAdjtime, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_module; ENOSYS otherwise)
- 305: syscalls.CapError(linux.CAP_SYS_TIME), // requires cap_sys_time
- 306: Syncfs,
- 307: SendMMsg,
- // 308: @Syscall(Setns), TODO(b/29354995)
- 309: Getcpu,
- // 310: @Syscall(ProcessVmReadv), TODO(gvisor.dev/issue/158) may require cap_sys_ptrace
- // 311: @Syscall(ProcessVmWritev), TODO(gvisor.dev/issue/158) may require cap_sys_ptrace
- // @Syscall(Kcmp, returns:EPERM or ENOSYS, note:Requires cap_sys_ptrace)
- 312: syscalls.CapError(linux.CAP_SYS_PTRACE),
- // @Syscall(FinitModule, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_module; ENOSYS otherwise)
- 313: syscalls.CapError(linux.CAP_SYS_MODULE),
- // 314: @Syscall(SchedSetattr), TODO(b/118902272), we have no scheduler
- // 315: @Syscall(SchedGetattr), TODO(b/118902272), we have no scheduler
- // 316: @Syscall(Renameat2), TODO(b/118902772)
- 317: Seccomp,
- 318: GetRandom,
- 319: MemfdCreate,
- // @Syscall(KexecFileLoad, EPERM or ENOSYS, note:Infeasible to support. Returns EPERM if the process does not have cap_sys_boot; ENOSYS otherwise)
- 320: syscalls.CapError(linux.CAP_SYS_BOOT),
- // @Syscall(Bpf, returns:EPERM or ENOSYS, note:Returns EPERM if the process does not have cap_sys_boot; ENOSYS otherwise)
- 321: syscalls.CapError(linux.CAP_SYS_ADMIN), // requires cap_sys_admin for all commands
- // 322: @Syscall(Execveat), TODO(b/118901836)
- // 323: @Syscall(Userfaultfd), TODO(b/118906345)
- // 324: @Syscall(Membarrier), TODO(b/118904897)
- 325: Mlock2,
+ Table: map[uintptr]kernel.Syscall{
+ 0: syscalls.Supported("read", Read),
+ 1: syscalls.Supported("write", Write),
+ 2: syscalls.Supported("open", Open),
+ 3: syscalls.Supported("close", Close),
+ 4: syscalls.Undocumented("stat", Stat),
+ 5: syscalls.Undocumented("fstat", Fstat),
+ 6: syscalls.Undocumented("lstat", Lstat),
+ 7: syscalls.Undocumented("poll", Poll),
+ 8: syscalls.Undocumented("lseek", Lseek),
+ 9: syscalls.Undocumented("mmap", Mmap),
+ 10: syscalls.Undocumented("mprotect", Mprotect),
+ 11: syscalls.Undocumented("munmap", Munmap),
+ 12: syscalls.Undocumented("brk", Brk),
+ 13: syscalls.Undocumented("rt_sigaction", RtSigaction),
+ 14: syscalls.Undocumented("rt_sigprocmask", RtSigprocmask),
+ 15: syscalls.Undocumented("rt_sigreturn", RtSigreturn),
+ 16: syscalls.Undocumented("ioctl", Ioctl),
+ 17: syscalls.Undocumented("pread64", Pread64),
+ 18: syscalls.Undocumented("pwrite64", Pwrite64),
+ 19: syscalls.Undocumented("readv", Readv),
+ 20: syscalls.Undocumented("writev", Writev),
+ 21: syscalls.Undocumented("access", Access),
+ 22: syscalls.Undocumented("pipe", Pipe),
+ 23: syscalls.Undocumented("select", Select),
+ 24: syscalls.Undocumented("sched_yield", SchedYield),
+ 25: syscalls.Undocumented("mremap", Mremap),
+ 26: syscalls.Undocumented("msync", Msync),
+ 27: syscalls.Undocumented("mincore", Mincore),
+ 28: syscalls.Undocumented("madvise", Madvise),
+ 29: syscalls.Undocumented("shmget", Shmget),
+ 30: syscalls.Undocumented("shmat", Shmat),
+ 31: syscalls.Undocumented("shmctl", Shmctl),
+ 32: syscalls.Undocumented("dup", Dup),
+ 33: syscalls.Undocumented("dup2", Dup2),
+ 34: syscalls.Undocumented("pause", Pause),
+ 35: syscalls.Undocumented("nanosleep", Nanosleep),
+ 36: syscalls.Undocumented("getitimer", Getitimer),
+ 37: syscalls.Undocumented("alarm", Alarm),
+ 38: syscalls.Undocumented("setitimer", Setitimer),
+ 39: syscalls.Undocumented("getpid", Getpid),
+ 40: syscalls.Undocumented("sendfile", Sendfile),
+ 41: syscalls.Undocumented("socket", Socket),
+ 42: syscalls.Undocumented("connect", Connect),
+ 43: syscalls.Undocumented("accept", Accept),
+ 44: syscalls.Undocumented("sendto", SendTo),
+ 45: syscalls.Undocumented("recvfrom", RecvFrom),
+ 46: syscalls.Undocumented("sendmsg", SendMsg),
+ 47: syscalls.Undocumented("recvmsg", RecvMsg),
+ 48: syscalls.Undocumented("shutdown", Shutdown),
+ 49: syscalls.Undocumented("bind", Bind),
+ 50: syscalls.Undocumented("listen", Listen),
+ 51: syscalls.Undocumented("getsockname", GetSockName),
+ 52: syscalls.Undocumented("getpeername", GetPeerName),
+ 53: syscalls.Undocumented("socketpair", SocketPair),
+ 54: syscalls.Undocumented("setsockopt", SetSockOpt),
+ 55: syscalls.Undocumented("getsockopt", GetSockOpt),
+ 56: syscalls.Undocumented("clone", Clone),
+ 57: syscalls.Undocumented("fork", Fork),
+ 58: syscalls.Undocumented("vfork", Vfork),
+ 59: syscalls.Undocumented("execve", Execve),
+ 60: syscalls.Undocumented("exit", Exit),
+ 61: syscalls.Undocumented("wait4", Wait4),
+ 62: syscalls.Undocumented("kill", Kill),
+ 63: syscalls.Undocumented("uname", Uname),
+ 64: syscalls.Undocumented("semget", Semget),
+ 65: syscalls.Undocumented("semop", Semop),
+ 66: syscalls.Undocumented("semctl", Semctl),
+ 67: syscalls.Undocumented("shmdt", Shmdt),
+ 68: syscalls.ErrorWithEvent("msgget", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 69: syscalls.ErrorWithEvent("msgsnd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 70: syscalls.ErrorWithEvent("msgrcv", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 71: syscalls.ErrorWithEvent("msgctl", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921)
+ 72: syscalls.Undocumented("fcntl", Fcntl),
+ 73: syscalls.Undocumented("flock", Flock),
+ 74: syscalls.Undocumented("fsync", Fsync),
+ 75: syscalls.Undocumented("fdatasync", Fdatasync),
+ 76: syscalls.Undocumented("truncate", Truncate),
+ 77: syscalls.Undocumented("ftruncate", Ftruncate),
+ 78: syscalls.Undocumented("getdents", Getdents),
+ 79: syscalls.Undocumented("getcwd", Getcwd),
+ 80: syscalls.Undocumented("chdir", Chdir),
+ 81: syscalls.Undocumented("fchdir", Fchdir),
+ 82: syscalls.Undocumented("rename", Rename),
+ 83: syscalls.Undocumented("mkdir", Mkdir),
+ 84: syscalls.Undocumented("rmdir", Rmdir),
+ 85: syscalls.Undocumented("creat", Creat),
+ 86: syscalls.Undocumented("link", Link),
+ 87: syscalls.Undocumented("link", Unlink),
+ 88: syscalls.Undocumented("symlink", Symlink),
+ 89: syscalls.Undocumented("readlink", Readlink),
+ 90: syscalls.Undocumented("chmod", Chmod),
+ 91: syscalls.Undocumented("fchmod", Fchmod),
+ 92: syscalls.Undocumented("chown", Chown),
+ 93: syscalls.Undocumented("fchown", Fchown),
+ 94: syscalls.Undocumented("lchown", Lchown),
+ 95: syscalls.Undocumented("umask", Umask),
+ 96: syscalls.Undocumented("gettimeofday", Gettimeofday),
+ 97: syscalls.Undocumented("getrlimit", Getrlimit),
+ 98: syscalls.Undocumented("getrusage", Getrusage),
+ 99: syscalls.Undocumented("sysinfo", Sysinfo),
+ 100: syscalls.Undocumented("times", Times),
+ 101: syscalls.Undocumented("ptrace", Ptrace),
+ 102: syscalls.Undocumented("getuid", Getuid),
+ 103: syscalls.Undocumented("syslog", Syslog),
+ 104: syscalls.Undocumented("getgid", Getgid),
+ 105: syscalls.Undocumented("setuid", Setuid),
+ 106: syscalls.Undocumented("setgid", Setgid),
+ 107: syscalls.Undocumented("geteuid", Geteuid),
+ 108: syscalls.Undocumented("getegid", Getegid),
+ 109: syscalls.Undocumented("setpgid", Setpgid),
+ 110: syscalls.Undocumented("getppid", Getppid),
+ 111: syscalls.Undocumented("getpgrp", Getpgrp),
+ 112: syscalls.Undocumented("setsid", Setsid),
+ 113: syscalls.Undocumented("setreuid", Setreuid),
+ 114: syscalls.Undocumented("setregid", Setregid),
+ 115: syscalls.Undocumented("getgroups", Getgroups),
+ 116: syscalls.Undocumented("setgroups", Setgroups),
+ 117: syscalls.Undocumented("setresuid", Setresuid),
+ 118: syscalls.Undocumented("getresuid", Getresuid),
+ 119: syscalls.Undocumented("setresgid", Setresgid),
+ 120: syscalls.Undocumented("setresgid", Getresgid),
+ 121: syscalls.Undocumented("getpgid", Getpgid),
+ 122: syscalls.ErrorWithEvent("setfsuid", syscall.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 123: syscalls.ErrorWithEvent("setfsgid", syscall.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702)
+ 124: syscalls.Undocumented("getsid", Getsid),
+ 125: syscalls.Undocumented("capget", Capget),
+ 126: syscalls.Undocumented("capset", Capset),
+ 127: syscalls.Undocumented("rt_sigpending", RtSigpending),
+ 128: syscalls.Undocumented("rt_sigtimedwait", RtSigtimedwait),
+ 129: syscalls.Undocumented("rt_sigqueueinfo", RtSigqueueinfo),
+ 130: syscalls.Undocumented("rt_sigsuspend", RtSigsuspend),
+ 131: syscalls.Undocumented("sigaltstack", Sigaltstack),
+ 132: syscalls.Undocumented("utime", Utime),
+ 133: syscalls.Undocumented("mknod", Mknod),
+ 134: syscalls.Error("uselib", syscall.ENOSYS, "Obsolete", nil),
+ 135: syscalls.ErrorWithEvent("personality", syscall.EINVAL, "Unable to change personality.", nil),
+ 136: syscalls.ErrorWithEvent("ustat", syscall.ENOSYS, "Needs filesystem support.", nil),
+ 137: syscalls.Undocumented("statfs", Statfs),
+ 138: syscalls.Undocumented("fstatfs", Fstatfs),
+ 139: syscalls.ErrorWithEvent("sysfs", syscall.ENOSYS, "", []string{"gvisor.dev/issue/165"}),
+ 140: syscalls.Undocumented("getpriority", Getpriority),
+ 141: syscalls.Undocumented("setpriority", Setpriority),
+ 142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil),
+ 143: syscalls.Undocumented("sched_getparam", SchedGetparam),
+ 144: syscalls.Undocumented("sched_setscheduler", SchedSetscheduler),
+ 145: syscalls.Undocumented("sched_getscheduler", SchedGetscheduler),
+ 146: syscalls.Undocumented("sched_get_priority_max", SchedGetPriorityMax),
+ 147: syscalls.Undocumented("sched_get_priority_min", SchedGetPriorityMin),
+ 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syscall.EPERM, "", nil),
+ 149: syscalls.Undocumented("mlock", Mlock),
+ 150: syscalls.Undocumented("munlock", Munlock),
+ 151: syscalls.Undocumented("mlockall", Mlockall),
+ 152: syscalls.Undocumented("munlockall", Munlockall),
+ 153: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil),
+ 154: syscalls.Error("modify_ldt", syscall.EPERM, "", nil),
+ 155: syscalls.Error("pivot_root", syscall.EPERM, "", nil),
+ 156: syscalls.Error("sysctl", syscall.EPERM, `syscall is "worthless"`, nil),
+ 157: syscalls.Undocumented("prctl", Prctl),
+ 158: syscalls.Undocumented("arch_prctl", ArchPrctl),
+ 159: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil),
+ 160: syscalls.Undocumented("setrlimit", Setrlimit),
+ 161: syscalls.Undocumented("chroot", Chroot),
+ 162: syscalls.Undocumented("sync", Sync),
+ 163: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil),
+ 164: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil),
+ 165: syscalls.Undocumented("mount", Mount),
+ 166: syscalls.Undocumented("umount2", Umount2),
+ 167: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil),
+ 168: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil),
+ 169: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil),
+ 170: syscalls.Undocumented("sethostname", Sethostname),
+ 171: syscalls.Undocumented("setdomainname", Setdomainname),
+ 172: syscalls.CapError("iopl", linux.CAP_SYS_RAWIO, "", nil),
+ 173: syscalls.CapError("ioperm", linux.CAP_SYS_RAWIO, "", nil),
+ 174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil),
+ 175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil),
+ 176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil),
+ 177: syscalls.Error("get_kernel_syms", syscall.ENOSYS, "Not supported in > 2.6", nil),
+ 178: syscalls.Error("query_module", syscall.ENOSYS, "Not supported in > 2.6", nil),
+ 179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations
+ 180: syscalls.Error("nfsservctl", syscall.ENOSYS, "Does not exist > 3.1", nil),
+ 181: syscalls.Error("getpmsg", syscall.ENOSYS, "Not implemented in Linux", nil),
+ 182: syscalls.Error("putpmsg", syscall.ENOSYS, "Not implemented in Linux", nil),
+ 183: syscalls.Error("afs_syscall", syscall.ENOSYS, "Not implemented in Linux", nil),
+ 184: syscalls.Error("tuxcall", syscall.ENOSYS, "Not implemented in Linux", nil),
+ 185: syscalls.Error("security", syscall.ENOSYS, "Not implemented in Linux", nil),
+ 186: syscalls.Undocumented("gettid", Gettid),
+ 187: syscalls.ErrorWithEvent("readahead", syscall.ENOSYS, "", []string{"gvisor.dev/issue/261"}), // TODO(b/29351341)
+ 188: syscalls.ErrorWithEvent("setxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 189: syscalls.ErrorWithEvent("lsetxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 190: syscalls.ErrorWithEvent("fsetxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 191: syscalls.ErrorWithEvent("getxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 192: syscalls.ErrorWithEvent("lgetxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 193: syscalls.ErrorWithEvent("fgetxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 194: syscalls.ErrorWithEvent("listxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 195: syscalls.ErrorWithEvent("llistxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 196: syscalls.ErrorWithEvent("flistxattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 197: syscalls.ErrorWithEvent("removexattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 198: syscalls.ErrorWithEvent("lremovexattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 199: syscalls.ErrorWithEvent("fremovexattr", syscall.ENOTSUP, "Requires filesystem support", nil),
+ 200: syscalls.Undocumented("tkill", Tkill),
+ 201: syscalls.Undocumented("time", Time),
+ 202: syscalls.Undocumented("futex", Futex),
+ 203: syscalls.Undocumented("sched_setaffinity", SchedSetaffinity),
+ 204: syscalls.Undocumented("sched_getaffinity", SchedGetaffinity),
+ 205: syscalls.Error("set_thread_area", syscall.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
+ 206: syscalls.Undocumented("io_setup", IoSetup),
+ 207: syscalls.Undocumented("io_destroy", IoDestroy),
+ 208: syscalls.Undocumented("io_getevents", IoGetevents),
+ 209: syscalls.Undocumented("io_submit", IoSubmit),
+ 210: syscalls.Undocumented("io_cancel", IoCancel),
+ 211: syscalls.Error("get_thread_area", syscall.ENOSYS, "Expected to return ENOSYS on 64-bit", nil),
+ 212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil),
+ 213: syscalls.Undocumented("epoll_create", EpollCreate),
+ 214: syscalls.ErrorWithEvent("epoll_ctl_old", syscall.ENOSYS, "Deprecated", nil),
+ 215: syscalls.ErrorWithEvent("epoll_wait_old", syscall.ENOSYS, "Deprecated", nil),
+ 216: syscalls.ErrorWithEvent("remap_file_pages", syscall.ENOSYS, "Deprecated since 3.16", nil),
+ 217: syscalls.Undocumented("getdents64", Getdents64),
+ 218: syscalls.Undocumented("set_tid_address", SetTidAddress),
+ 219: syscalls.Undocumented("restart_syscall", RestartSyscall),
+ 220: syscalls.ErrorWithEvent("semtimedop", syscall.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920)
+ 221: syscalls.Undocumented("fadvise64", Fadvise64),
+ 222: syscalls.Undocumented("timer_create", TimerCreate),
+ 223: syscalls.Undocumented("timer_settime", TimerSettime),
+ 224: syscalls.Undocumented("timer_gettime", TimerGettime),
+ 225: syscalls.Undocumented("timer_getoverrun", TimerGetoverrun),
+ 226: syscalls.Undocumented("timer_delete", TimerDelete),
+ 227: syscalls.Undocumented("clock_settime", ClockSettime),
+ 228: syscalls.Undocumented("clock_gettime", ClockGettime),
+ 229: syscalls.Undocumented("clock_getres", ClockGetres),
+ 230: syscalls.Undocumented("clock_nanosleep", ClockNanosleep),
+ 231: syscalls.Undocumented("exit_group", ExitGroup),
+ 232: syscalls.Undocumented("epoll_wait", EpollWait),
+ 233: syscalls.Undocumented("epoll_ctl", EpollCtl),
+ 234: syscalls.Undocumented("tgkill", Tgkill),
+ 235: syscalls.Undocumented("utimes", Utimes),
+ 236: syscalls.Error("vserver", syscall.ENOSYS, "Not implemented by Linux", nil),
+ 237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}),
+ 238: syscalls.Undocumented("set_mempolicy", SetMempolicy),
+ 239: syscalls.Undocumented("get_mempolicy", GetMempolicy),
+ 240: syscalls.ErrorWithEvent("mq_open", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 241: syscalls.ErrorWithEvent("mq_unlink", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 242: syscalls.ErrorWithEvent("mq_timedsend", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 243: syscalls.ErrorWithEvent("mq_timedreceive", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 244: syscalls.ErrorWithEvent("mq_notify", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 245: syscalls.ErrorWithEvent("mq_getsetattr", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921)
+ 246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil),
+ 247: syscalls.Undocumented("waitid", Waitid),
+ 248: syscalls.Error("add_key", syscall.EACCES, "Not available to user", nil),
+ 249: syscalls.Error("request_key", syscall.EACCES, "Not available to user", nil),
+ 250: syscalls.Error("keyctl", syscall.EACCES, "Not available to user", nil),
+ 251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
+ 252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending)
+ 253: syscalls.Undocumented("inotify_init", InotifyInit),
+ 254: syscalls.Undocumented("inotify_add_watch", InotifyAddWatch),
+ 255: syscalls.Undocumented("inotify_rm_watch", InotifyRmWatch),
+ 256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil),
+ 257: syscalls.Undocumented("openat", Openat),
+ 258: syscalls.Undocumented("mkdirat", Mkdirat),
+ 259: syscalls.Undocumented("mknodat", Mknodat),
+ 260: syscalls.Undocumented("fchownat", Fchownat),
+ 261: syscalls.Undocumented("futimesat", Futimesat),
+ 262: syscalls.Undocumented("fstatat", Fstatat),
+ 263: syscalls.Undocumented("unlinkat", Unlinkat),
+ 264: syscalls.Undocumented("renameat", Renameat),
+ 265: syscalls.Undocumented("linkat", Linkat),
+ 266: syscalls.Undocumented("symlinkat", Symlinkat),
+ 267: syscalls.Undocumented("readlinkat", Readlinkat),
+ 268: syscalls.Undocumented("fchmodat", Fchmodat),
+ 269: syscalls.Undocumented("faccessat", Faccessat),
+ 270: syscalls.Undocumented("pselect", Pselect),
+ 271: syscalls.Undocumented("ppoll", Ppoll),
+ 272: syscalls.Undocumented("unshare", Unshare),
+ 273: syscalls.Error("set_robust_list", syscall.ENOSYS, "Obsolete", nil),
+ 274: syscalls.Error("get_robust_list", syscall.ENOSYS, "Obsolete", nil),
+ 275: syscalls.PartiallySupported("splice", Splice, "Stub implementation", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 276: syscalls.ErrorWithEvent("tee", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 277: syscalls.Undocumented("sync_file_range", SyncFileRange),
+ 278: syscalls.ErrorWithEvent("vmsplice", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098)
+ 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly)
+ 280: syscalls.Undocumented("utimensat", Utimensat),
+ 281: syscalls.Undocumented("epoll_pwait", EpollPwait),
+ 282: syscalls.ErrorWithEvent("signalfd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426)
+ 283: syscalls.Undocumented("timerfd_create", TimerfdCreate),
+ 284: syscalls.Undocumented("eventfd", Eventfd),
+ 285: syscalls.Undocumented("fallocate", Fallocate),
+ 286: syscalls.Undocumented("timerfd_settime", TimerfdSettime),
+ 287: syscalls.Undocumented("timerfd_gettime", TimerfdGettime),
+ 288: syscalls.Undocumented("accept4", Accept4),
+ 289: syscalls.ErrorWithEvent("signalfd4", syscall.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426)
+ 290: syscalls.Undocumented("eventfd2", Eventfd2),
+ 291: syscalls.Undocumented("epoll_create1", EpollCreate1),
+ 292: syscalls.Undocumented("dup3", Dup3),
+ 293: syscalls.Undocumented("pipe2", Pipe2),
+ 294: syscalls.Undocumented("inotify_init1", InotifyInit1),
+ 295: syscalls.Undocumented("preadv", Preadv),
+ 296: syscalls.Undocumented("pwritev", Pwritev),
+ 297: syscalls.Undocumented("rt_tgsigqueueinfo", RtTgsigqueueinfo),
+ 298: syscalls.ErrorWithEvent("perf_event_open", syscall.ENODEV, "No support for perf counters", nil),
+ 299: syscalls.Undocumented("recvmmsg", RecvMMsg),
+ 300: syscalls.ErrorWithEvent("fanotify_init", syscall.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 301: syscalls.ErrorWithEvent("fanotify_mark", syscall.ENOSYS, "Needs CONFIG_FANOTIFY", nil),
+ 302: syscalls.Undocumented("prlimit64", Prlimit64),
+ 303: syscalls.ErrorWithEvent("name_to_handle_at", syscall.EOPNOTSUPP, "Needs filesystem support", nil),
+ 304: syscalls.ErrorWithEvent("open_by_handle_at", syscall.EOPNOTSUPP, "Needs filesystem support", nil),
+ 305: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil),
+ 306: syscalls.Undocumented("syncfs", Syncfs),
+ 307: syscalls.Undocumented("sendmmsg", SendMMsg),
+ 308: syscalls.ErrorWithEvent("setns", syscall.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995)
+ 309: syscalls.Undocumented("getcpu", Getcpu),
+ 310: syscalls.ErrorWithEvent("process_vm_readv", syscall.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 311: syscalls.ErrorWithEvent("process_vm_writev", syscall.ENOSYS, "", []string{"gvisor.dev/issue/158"}),
+ 312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil),
+ 313: syscalls.CapError("finit_module", linux.CAP_SYS_MODULE, "", nil),
+ 314: syscalls.ErrorWithEvent("sched_setattr", syscall.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 315: syscalls.ErrorWithEvent("sched_getattr", syscall.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272)
+ 316: syscalls.ErrorWithEvent("renameat2", syscall.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772)
+ 317: syscalls.Undocumented("seccomp", Seccomp),
+ 318: syscalls.Undocumented("getrandom", GetRandom),
+ 319: syscalls.Undocumented("memfd_create", MemfdCreate),
+ 320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil),
+ 321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
+ 322: syscalls.ErrorWithEvent("execveat", syscall.ENOSYS, "", []string{"gvisor.dev/issue/265"}), // TODO(b/118901836)
+ 323: syscalls.ErrorWithEvent("userfaultfd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
+ 324: syscalls.ErrorWithEvent("membarrier", syscall.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(b/118904897)
+ 325: syscalls.Undocumented("mlock2", Mlock2),
+
// Syscalls after 325 are "backports" from versions of Linux after 4.4.
- // 326: @Syscall(CopyFileRange),
- 327: Preadv2,
- 328: Pwritev2,
+ 326: syscalls.ErrorWithEvent("copy_file_range", syscall.ENOSYS, "", nil),
+ 327: syscalls.Undocumented("preadv2", Preadv2),
+ 328: syscalls.Undocumented("pwritev2", Pwritev2),
},
Emulate: map[usermem.Addr]uintptr{
diff --git a/pkg/sentry/syscalls/linux/sys_mempolicy.go b/pkg/sentry/syscalls/linux/sys_mempolicy.go
new file mode 100644
index 000000000..652b2c206
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/sys_mempolicy.go
@@ -0,0 +1,312 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "fmt"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// We unconditionally report a single NUMA node. This also means that our
+// "nodemask_t" is a single unsigned long (uint64).
+const (
+ maxNodes = 1
+ allowedNodemask = (1 << maxNodes) - 1
+)
+
+func copyInNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32) (uint64, error) {
+ // "nodemask points to a bit mask of node IDs that contains up to maxnode
+ // bits. The bit mask size is rounded to the next multiple of
+ // sizeof(unsigned long), but the kernel will use bits only up to maxnode.
+ // A NULL value of nodemask or a maxnode value of zero specifies the empty
+ // set of nodes. If the value of maxnode is zero, the nodemask argument is
+ // ignored." - set_mempolicy(2). Unfortunately, most of this is inaccurate
+ // because of what appears to be a bug: mm/mempolicy.c:get_nodes() uses
+ // maxnode-1, not maxnode, as the number of bits.
+ bits := maxnode - 1
+ if bits > usermem.PageSize*8 { // also handles overflow from maxnode == 0
+ return 0, syserror.EINVAL
+ }
+ if bits == 0 {
+ return 0, nil
+ }
+ // Copy in the whole nodemask.
+ numUint64 := (bits + 63) / 64
+ buf := t.CopyScratchBuffer(int(numUint64) * 8)
+ if _, err := t.CopyInBytes(addr, buf); err != nil {
+ return 0, err
+ }
+ val := usermem.ByteOrder.Uint64(buf)
+ // Check that only allowed bits in the first unsigned long in the nodemask
+ // are set.
+ if val&^allowedNodemask != 0 {
+ return 0, syserror.EINVAL
+ }
+ // Check that all remaining bits in the nodemask are 0.
+ for i := 8; i < len(buf); i++ {
+ if buf[i] != 0 {
+ return 0, syserror.EINVAL
+ }
+ }
+ return val, nil
+}
+
+func copyOutNodemask(t *kernel.Task, addr usermem.Addr, maxnode uint32, val uint64) error {
+ // mm/mempolicy.c:copy_nodes_to_user() also uses maxnode-1 as the number of
+ // bits.
+ bits := maxnode - 1
+ if bits > usermem.PageSize*8 { // also handles overflow from maxnode == 0
+ return syserror.EINVAL
+ }
+ if bits == 0 {
+ return nil
+ }
+ // Copy out the first unsigned long in the nodemask.
+ buf := t.CopyScratchBuffer(8)
+ usermem.ByteOrder.PutUint64(buf, val)
+ if _, err := t.CopyOutBytes(addr, buf); err != nil {
+ return err
+ }
+ // Zero out remaining unsigned longs in the nodemask.
+ if bits > 64 {
+ remAddr, ok := addr.AddLength(8)
+ if !ok {
+ return syserror.EFAULT
+ }
+ remUint64 := (bits - 1) / 64
+ if _, err := t.MemoryManager().ZeroOut(t, remAddr, int64(remUint64)*8, usermem.IOOpts{
+ AddressSpaceActive: true,
+ }); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// GetMempolicy implements the syscall get_mempolicy(2).
+func GetMempolicy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ mode := args[0].Pointer()
+ nodemask := args[1].Pointer()
+ maxnode := args[2].Uint()
+ addr := args[3].Pointer()
+ flags := args[4].Uint()
+
+ if flags&^(linux.MPOL_F_NODE|linux.MPOL_F_ADDR|linux.MPOL_F_MEMS_ALLOWED) != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ nodeFlag := flags&linux.MPOL_F_NODE != 0
+ addrFlag := flags&linux.MPOL_F_ADDR != 0
+ memsAllowed := flags&linux.MPOL_F_MEMS_ALLOWED != 0
+
+ // "EINVAL: The value specified by maxnode is less than the number of node
+ // IDs supported by the system." - get_mempolicy(2)
+ if nodemask != 0 && maxnode < maxNodes {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // "If flags specifies MPOL_F_MEMS_ALLOWED [...], the mode argument is
+ // ignored and the set of nodes (memories) that the thread is allowed to
+ // specify in subsequent calls to mbind(2) or set_mempolicy(2) (in the
+ // absence of any mode flags) is returned in nodemask."
+ if memsAllowed {
+ // "It is not permitted to combine MPOL_F_MEMS_ALLOWED with either
+ // MPOL_F_ADDR or MPOL_F_NODE."
+ if nodeFlag || addrFlag {
+ return 0, nil, syserror.EINVAL
+ }
+ if err := copyOutNodemask(t, nodemask, maxnode, allowedNodemask); err != nil {
+ return 0, nil, err
+ }
+ return 0, nil, nil
+ }
+
+ // "If flags specifies MPOL_F_ADDR, then information is returned about the
+ // policy governing the memory address given in addr. ... If the mode
+ // argument is not NULL, then get_mempolicy() will store the policy mode
+ // and any optional mode flags of the requested NUMA policy in the location
+ // pointed to by this argument. If nodemask is not NULL, then the nodemask
+ // associated with the policy will be stored in the location pointed to by
+ // this argument."
+ if addrFlag {
+ policy, nodemaskVal, err := t.MemoryManager().NumaPolicy(addr)
+ if err != nil {
+ return 0, nil, err
+ }
+ if nodeFlag {
+ // "If flags specifies both MPOL_F_NODE and MPOL_F_ADDR,
+ // get_mempolicy() will return the node ID of the node on which the
+ // address addr is allocated into the location pointed to by mode.
+ // If no page has yet been allocated for the specified address,
+ // get_mempolicy() will allocate a page as if the thread had
+ // performed a read (load) access to that address, and return the
+ // ID of the node where that page was allocated."
+ buf := t.CopyScratchBuffer(1)
+ _, err := t.CopyInBytes(addr, buf)
+ if err != nil {
+ return 0, nil, err
+ }
+ policy = 0 // maxNodes == 1
+ }
+ if mode != 0 {
+ if _, err := t.CopyOut(mode, policy); err != nil {
+ return 0, nil, err
+ }
+ }
+ if nodemask != 0 {
+ if err := copyOutNodemask(t, nodemask, maxnode, nodemaskVal); err != nil {
+ return 0, nil, err
+ }
+ }
+ return 0, nil, nil
+ }
+
+ // "EINVAL: ... flags specified MPOL_F_ADDR and addr is NULL, or flags did
+ // not specify MPOL_F_ADDR and addr is not NULL." This is partially
+ // inaccurate: if flags specifies MPOL_F_ADDR,
+ // mm/mempolicy.c:do_get_mempolicy() doesn't special-case NULL; it will
+ // just (usually) fail to find a VMA at address 0 and return EFAULT.
+ if addr != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // "If flags is specified as 0, then information about the calling thread's
+ // default policy (as set by set_mempolicy(2)) is returned, in the buffers
+ // pointed to by mode and nodemask. ... If flags specifies MPOL_F_NODE, but
+ // not MPOL_F_ADDR, and the thread's current policy is MPOL_INTERLEAVE,
+ // then get_mempolicy() will return in the location pointed to by a
+ // non-NULL mode argument, the node ID of the next node that will be used
+ // for interleaving of internal kernel pages allocated on behalf of the
+ // thread."
+ policy, nodemaskVal := t.NumaPolicy()
+ if nodeFlag {
+ if policy&^linux.MPOL_MODE_FLAGS != linux.MPOL_INTERLEAVE {
+ return 0, nil, syserror.EINVAL
+ }
+ policy = 0 // maxNodes == 1
+ }
+ if mode != 0 {
+ if _, err := t.CopyOut(mode, policy); err != nil {
+ return 0, nil, err
+ }
+ }
+ if nodemask != 0 {
+ if err := copyOutNodemask(t, nodemask, maxnode, nodemaskVal); err != nil {
+ return 0, nil, err
+ }
+ }
+ return 0, nil, nil
+}
+
+// SetMempolicy implements the syscall set_mempolicy(2).
+func SetMempolicy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ modeWithFlags := args[0].Int()
+ nodemask := args[1].Pointer()
+ maxnode := args[2].Uint()
+
+ modeWithFlags, nodemaskVal, err := copyInMempolicyNodemask(t, modeWithFlags, nodemask, maxnode)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ t.SetNumaPolicy(modeWithFlags, nodemaskVal)
+ return 0, nil, nil
+}
+
+// Mbind implements the syscall mbind(2).
+func Mbind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ addr := args[0].Pointer()
+ length := args[1].Uint64()
+ mode := args[2].Int()
+ nodemask := args[3].Pointer()
+ maxnode := args[4].Uint()
+ flags := args[5].Uint()
+
+ if flags&^linux.MPOL_MF_VALID != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ // "If MPOL_MF_MOVE_ALL is passed in flags ... [the] calling thread must be
+ // privileged (CAP_SYS_NICE) to use this flag." - mbind(2)
+ if flags&linux.MPOL_MF_MOVE_ALL != 0 && !t.HasCapability(linux.CAP_SYS_NICE) {
+ return 0, nil, syserror.EPERM
+ }
+
+ mode, nodemaskVal, err := copyInMempolicyNodemask(t, mode, nodemask, maxnode)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // Since we claim to have only a single node, all flags can be ignored
+ // (since all pages must already be on that single node).
+ err = t.MemoryManager().SetNumaPolicy(addr, length, mode, nodemaskVal)
+ return 0, nil, err
+}
+
+func copyInMempolicyNodemask(t *kernel.Task, modeWithFlags int32, nodemask usermem.Addr, maxnode uint32) (int32, uint64, error) {
+ flags := modeWithFlags & linux.MPOL_MODE_FLAGS
+ mode := modeWithFlags &^ linux.MPOL_MODE_FLAGS
+ if flags == linux.MPOL_MODE_FLAGS {
+ // Can't specify both mode flags simultaneously.
+ return 0, 0, syserror.EINVAL
+ }
+ if mode < 0 || mode >= linux.MPOL_MAX {
+ // Must specify a valid mode.
+ return 0, 0, syserror.EINVAL
+ }
+
+ var nodemaskVal uint64
+ if nodemask != 0 {
+ var err error
+ nodemaskVal, err = copyInNodemask(t, nodemask, maxnode)
+ if err != nil {
+ return 0, 0, err
+ }
+ }
+
+ switch mode {
+ case linux.MPOL_DEFAULT:
+ // "nodemask must be specified as NULL." - set_mempolicy(2). This is inaccurate;
+ // Linux allows a nodemask to be specified, as long as it is empty.
+ if nodemaskVal != 0 {
+ return 0, 0, syserror.EINVAL
+ }
+ case linux.MPOL_BIND, linux.MPOL_INTERLEAVE:
+ // These require a non-empty nodemask.
+ if nodemaskVal == 0 {
+ return 0, 0, syserror.EINVAL
+ }
+ case linux.MPOL_PREFERRED:
+ // This permits an empty nodemask, as long as no flags are set.
+ if nodemaskVal == 0 && flags != 0 {
+ return 0, 0, syserror.EINVAL
+ }
+ case linux.MPOL_LOCAL:
+ // This requires an empty nodemask and no flags set ...
+ if nodemaskVal != 0 || flags != 0 {
+ return 0, 0, syserror.EINVAL
+ }
+ // ... and is implemented as MPOL_PREFERRED.
+ mode = linux.MPOL_PREFERRED
+ default:
+ // Unknown mode, which we should have rejected above.
+ panic(fmt.Sprintf("unknown mode: %v", mode))
+ }
+
+ return mode | flags, nodemaskVal, nil
+}
diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go
index 64a6e639c..9926f0ac5 100644
--- a/pkg/sentry/syscalls/linux/sys_mmap.go
+++ b/pkg/sentry/syscalls/linux/sys_mmap.go
@@ -204,151 +204,6 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
}
}
-func copyOutIfNotNull(t *kernel.Task, ptr usermem.Addr, val interface{}) (int, error) {
- if ptr != 0 {
- return t.CopyOut(ptr, val)
- }
- return 0, nil
-}
-
-// GetMempolicy implements the syscall get_mempolicy(2).
-func GetMempolicy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- mode := args[0].Pointer()
- nodemask := args[1].Pointer()
- maxnode := args[2].Uint()
- addr := args[3].Pointer()
- flags := args[4].Uint()
-
- memsAllowed := flags&linux.MPOL_F_MEMS_ALLOWED != 0
- nodeFlag := flags&linux.MPOL_F_NODE != 0
- addrFlag := flags&linux.MPOL_F_ADDR != 0
-
- // TODO(rahat): Once sysfs is implemented, report a single numa node in
- // /sys/devices/system/node.
- if nodemask != 0 && maxnode < 1 {
- return 0, nil, syserror.EINVAL
- }
-
- // 'addr' provided iff 'addrFlag' set.
- if addrFlag == (addr == 0) {
- return 0, nil, syserror.EINVAL
- }
-
- // Default policy for the thread.
- if flags == 0 {
- policy, nodemaskVal := t.NumaPolicy()
- if _, err := copyOutIfNotNull(t, mode, policy); err != nil {
- return 0, nil, syserror.EFAULT
- }
- if _, err := copyOutIfNotNull(t, nodemask, nodemaskVal); err != nil {
- return 0, nil, syserror.EFAULT
- }
- return 0, nil, nil
- }
-
- // Report all nodes available to caller.
- if memsAllowed {
- // MPOL_F_NODE and MPOL_F_ADDR not allowed with MPOL_F_MEMS_ALLOWED.
- if nodeFlag || addrFlag {
- return 0, nil, syserror.EINVAL
- }
-
- // Report a single numa node.
- if _, err := copyOutIfNotNull(t, nodemask, uint32(0x1)); err != nil {
- return 0, nil, syserror.EFAULT
- }
- return 0, nil, nil
- }
-
- if addrFlag {
- if nodeFlag {
- // Return the id for the node where 'addr' resides, via 'mode'.
- //
- // The real get_mempolicy(2) allocates the page referenced by 'addr'
- // by simulating a read, if it is unallocated before the call. It
- // then returns the node the page is allocated on through the mode
- // pointer.
- b := t.CopyScratchBuffer(1)
- _, err := t.CopyInBytes(addr, b)
- if err != nil {
- return 0, nil, syserror.EFAULT
- }
- if _, err := copyOutIfNotNull(t, mode, int32(0)); err != nil {
- return 0, nil, syserror.EFAULT
- }
- } else {
- storedPolicy, _ := t.NumaPolicy()
- // Return the policy governing the memory referenced by 'addr'.
- if _, err := copyOutIfNotNull(t, mode, int32(storedPolicy)); err != nil {
- return 0, nil, syserror.EFAULT
- }
- }
- return 0, nil, nil
- }
-
- storedPolicy, _ := t.NumaPolicy()
- if nodeFlag && (storedPolicy&^linux.MPOL_MODE_FLAGS == linux.MPOL_INTERLEAVE) {
- // Policy for current thread is to interleave memory between
- // nodes. Return the next node we'll allocate on. Since we only have a
- // single node, this is always node 0.
- if _, err := copyOutIfNotNull(t, mode, int32(0)); err != nil {
- return 0, nil, syserror.EFAULT
- }
- return 0, nil, nil
- }
-
- return 0, nil, syserror.EINVAL
-}
-
-func allowedNodesMask() uint32 {
- const maxNodes = 1
- return ^uint32((1 << maxNodes) - 1)
-}
-
-// SetMempolicy implements the syscall set_mempolicy(2).
-func SetMempolicy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- modeWithFlags := args[0].Int()
- nodemask := args[1].Pointer()
- maxnode := args[2].Uint()
-
- if nodemask != 0 && maxnode < 1 {
- return 0, nil, syserror.EINVAL
- }
-
- if modeWithFlags&linux.MPOL_MODE_FLAGS == linux.MPOL_MODE_FLAGS {
- // Can't specify multiple modes simultaneously.
- return 0, nil, syserror.EINVAL
- }
-
- mode := modeWithFlags &^ linux.MPOL_MODE_FLAGS
- if mode < 0 || mode >= linux.MPOL_MAX {
- // Must specify a valid mode.
- return 0, nil, syserror.EINVAL
- }
-
- var nodemaskVal uint32
- // Nodemask may be empty for some policy modes.
- if nodemask != 0 && maxnode > 0 {
- if _, err := t.CopyIn(nodemask, &nodemaskVal); err != nil {
- return 0, nil, syserror.EFAULT
- }
- }
-
- if (mode == linux.MPOL_INTERLEAVE || mode == linux.MPOL_BIND) && nodemaskVal == 0 {
- // Mode requires a non-empty nodemask, but got an empty nodemask.
- return 0, nil, syserror.EINVAL
- }
-
- if nodemaskVal&allowedNodesMask() != 0 {
- // Invalid node specified.
- return 0, nil, syserror.EINVAL
- }
-
- t.SetNumaPolicy(int32(modeWithFlags), nodemaskVal)
-
- return 0, nil, nil
-}
-
// Mincore implements the syscall mincore(2).
func Mincore(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
addr := args[0].Pointer()
diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go
index 117ae1a0e..1b7e5616b 100644
--- a/pkg/sentry/syscalls/linux/sys_prctl.go
+++ b/pkg/sentry/syscalls/linux/sys_prctl.go
@@ -15,6 +15,7 @@
package linux
import (
+ "fmt"
"syscall"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
@@ -23,6 +24,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
)
// Prctl implements linux syscall prctl(2).
@@ -44,6 +46,33 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
_, err := t.CopyOut(args[1].Pointer(), int32(t.ParentDeathSignal()))
return 0, nil, err
+ case linux.PR_GET_DUMPABLE:
+ d := t.MemoryManager().Dumpability()
+ switch d {
+ case mm.NotDumpable:
+ return linux.SUID_DUMP_DISABLE, nil, nil
+ case mm.UserDumpable:
+ return linux.SUID_DUMP_USER, nil, nil
+ case mm.RootDumpable:
+ return linux.SUID_DUMP_ROOT, nil, nil
+ default:
+ panic(fmt.Sprintf("Unknown dumpability %v", d))
+ }
+
+ case linux.PR_SET_DUMPABLE:
+ var d mm.Dumpability
+ switch args[1].Int() {
+ case linux.SUID_DUMP_DISABLE:
+ d = mm.NotDumpable
+ case linux.SUID_DUMP_USER:
+ d = mm.UserDumpable
+ default:
+ // N.B. Userspace may not pass SUID_DUMP_ROOT.
+ return 0, nil, syscall.EINVAL
+ }
+ t.MemoryManager().SetDumpability(d)
+ return 0, nil, nil
+
case linux.PR_GET_KEEPCAPS:
if t.Credentials().KeepCaps {
return 1, nil, nil
@@ -171,9 +200,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
return 0, nil, t.DropBoundingCapability(cp)
- case linux.PR_GET_DUMPABLE,
- linux.PR_SET_DUMPABLE,
- linux.PR_GET_TIMING,
+ case linux.PR_GET_TIMING,
linux.PR_SET_TIMING,
linux.PR_GET_TSC,
linux.PR_SET_TSC,
diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go
index 8f4dbf3bc..31295a6a9 100644
--- a/pkg/sentry/syscalls/linux/sys_socket.go
+++ b/pkg/sentry/syscalls/linux/sys_socket.go
@@ -188,7 +188,7 @@ func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
}
// Create the new socket.
- s, e := socket.New(t, domain, transport.SockType(stype&0xf), protocol)
+ s, e := socket.New(t, domain, linux.SockType(stype&0xf), protocol)
if e != nil {
return 0, nil, e.ToError()
}
@@ -227,7 +227,7 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
}
// Create the socket pair.
- s1, s2, e := socket.Pair(t, domain, transport.SockType(stype&0xf), protocol)
+ s1, s2, e := socket.Pair(t, domain, linux.SockType(stype&0xf), protocol)
if e != nil {
return 0, nil, e.ToError()
}
diff --git a/pkg/sentry/syscalls/syscalls.go b/pkg/sentry/syscalls/syscalls.go
index 5d10b3824..48c114232 100644
--- a/pkg/sentry/syscalls/syscalls.go
+++ b/pkg/sentry/syscalls/syscalls.go
@@ -25,37 +25,97 @@
package syscalls
import (
+ "fmt"
+ "syscall"
+
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
+// Supported returns a syscall that is fully supported.
+func Supported(name string, fn kernel.SyscallFn) kernel.Syscall {
+ return kernel.Syscall{
+ Name: name,
+ Fn: fn,
+ SupportLevel: kernel.SupportFull,
+ Note: "Full Support",
+ }
+}
+
+// Undocumented returns a syscall that is undocumented.
+func Undocumented(name string, fn kernel.SyscallFn) kernel.Syscall {
+ return kernel.Syscall{
+ Name: name,
+ Fn: fn,
+ SupportLevel: kernel.SupportUndocumented,
+ }
+}
+
+// PartiallySupported returns a syscall that has a partial implementation.
+func PartiallySupported(name string, fn kernel.SyscallFn, note string, urls []string) kernel.Syscall {
+ return kernel.Syscall{
+ Name: name,
+ Fn: fn,
+ SupportLevel: kernel.SupportPartial,
+ Note: note,
+ URLs: urls,
+ }
+}
+
// Error returns a syscall handler that will always give the passed error.
-func Error(err error) kernel.SyscallFn {
- return func(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- return 0, nil, err
+func Error(name string, err syscall.Errno, note string, urls []string) kernel.Syscall {
+ if note != "" {
+ note = note + "; "
+ }
+ return kernel.Syscall{
+ Name: name,
+ Fn: func(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return 0, nil, err
+ },
+ SupportLevel: kernel.SupportUnimplemented,
+ Note: fmt.Sprintf("%sReturns %q", note, err.Error()),
+ URLs: urls,
}
}
// ErrorWithEvent gives a syscall function that sends an unimplemented
// syscall event via the event channel and returns the passed error.
-func ErrorWithEvent(err error) kernel.SyscallFn {
- return func(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- t.Kernel().EmitUnimplementedEvent(t)
- return 0, nil, err
+func ErrorWithEvent(name string, err syscall.Errno, note string, urls []string) kernel.Syscall {
+ if note != "" {
+ note = note + "; "
+ }
+ return kernel.Syscall{
+ Name: name,
+ Fn: func(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ t.Kernel().EmitUnimplementedEvent(t)
+ return 0, nil, err
+ },
+ SupportLevel: kernel.SupportUnimplemented,
+ Note: fmt.Sprintf("%sReturns %q", note, err.Error()),
+ URLs: urls,
}
}
// CapError gives a syscall function that checks for capability c. If the task
// has the capability, it returns ENOSYS, otherwise EPERM. To unprivileged
// tasks, it will seem like there is an implementation.
-func CapError(c linux.Capability) kernel.SyscallFn {
- return func(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- if !t.HasCapability(c) {
- return 0, nil, syserror.EPERM
- }
- t.Kernel().EmitUnimplementedEvent(t)
- return 0, nil, syserror.ENOSYS
+func CapError(name string, c linux.Capability, note string, urls []string) kernel.Syscall {
+ if note != "" {
+ note = note + "; "
+ }
+ return kernel.Syscall{
+ Name: name,
+ Fn: func(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ if !t.HasCapability(c) {
+ return 0, nil, syserror.EPERM
+ }
+ t.Kernel().EmitUnimplementedEvent(t)
+ return 0, nil, syserror.ENOSYS
+ },
+ SupportLevel: kernel.SupportUnimplemented,
+ Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise", note, syserror.EPERM, c.String(), syserror.ENOSYS),
+ URLs: urls,
}
}