diff options
Diffstat (limited to 'pkg/sentry/syscalls')
47 files changed, 1021 insertions, 885 deletions
diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD index 877318fa9..79d972202 100644 --- a/pkg/sentry/syscalls/BUILD +++ b/pkg/sentry/syscalls/BUILD @@ -8,14 +8,13 @@ go_library( "epoll.go", "syscalls.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls", + importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", "//pkg/sentry/arch", "//pkg/sentry/kernel", "//pkg/sentry/kernel/epoll", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/time", "//pkg/syserror", "//pkg/waiter", diff --git a/pkg/sentry/syscalls/epoll.go b/pkg/sentry/syscalls/epoll.go index ec1eab331..11850dd0f 100644 --- a/pkg/sentry/syscalls/epoll.go +++ b/pkg/sentry/syscalls/epoll.go @@ -18,22 +18,20 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/waiter" ) // CreateEpoll implements the epoll_create(2) linux syscall. -func CreateEpoll(t *kernel.Task, closeOnExec bool) (kdefs.FD, error) { +func CreateEpoll(t *kernel.Task, closeOnExec bool) (int32, error) { file := epoll.NewEventPoll(t) defer file.DecRef() - flags := kernel.FDFlags{ + fd, err := t.NewFDFrom(0, file, kernel.FDFlags{ CloseOnExec: closeOnExec, - } - fd, err := t.FDMap().NewFDFrom(0, file, flags, t.ThreadGroup().Limits()) + }) if err != nil { return 0, err } @@ -42,16 +40,16 @@ func CreateEpoll(t *kernel.Task, closeOnExec bool) (kdefs.FD, error) { } // AddEpoll implements the epoll_ctl(2) linux syscall when op is EPOLL_CTL_ADD. -func AddEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error { +func AddEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error { // Get epoll from the file descriptor. - epollfile := t.FDMap().GetFile(epfd) + epollfile := t.GetFile(epfd) if epollfile == nil { return syscall.EBADF } defer epollfile.DecRef() // Get the target file id. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return syscall.EBADF } @@ -68,16 +66,16 @@ func AddEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFlags } // UpdateEpoll implements the epoll_ctl(2) linux syscall when op is EPOLL_CTL_MOD. -func UpdateEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error { +func UpdateEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, mask waiter.EventMask, userData [2]int32) error { // Get epoll from the file descriptor. - epollfile := t.FDMap().GetFile(epfd) + epollfile := t.GetFile(epfd) if epollfile == nil { return syscall.EBADF } defer epollfile.DecRef() // Get the target file id. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return syscall.EBADF } @@ -94,16 +92,16 @@ func UpdateEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD, flags epoll.EntryFl } // RemoveEpoll implements the epoll_ctl(2) linux syscall when op is EPOLL_CTL_DEL. -func RemoveEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD) error { +func RemoveEpoll(t *kernel.Task, epfd int32, fd int32) error { // Get epoll from the file descriptor. - epollfile := t.FDMap().GetFile(epfd) + epollfile := t.GetFile(epfd) if epollfile == nil { return syscall.EBADF } defer epollfile.DecRef() // Get the target file id. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return syscall.EBADF } @@ -120,9 +118,9 @@ func RemoveEpoll(t *kernel.Task, epfd kdefs.FD, fd kdefs.FD) error { } // WaitEpoll implements the epoll_wait(2) linux syscall. -func WaitEpoll(t *kernel.Task, fd kdefs.FD, max int, timeout int) ([]epoll.Event, error) { +func WaitEpoll(t *kernel.Task, fd int32, max int, timeout int) ([]epoll.Event, error) { // Get epoll from the file descriptor. - epollfile := t.FDMap().GetFile(fd) + epollfile := t.GetFile(fd) if epollfile == nil { return nil, syscall.EBADF } diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 1c057526b..33a40b9c6 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -49,7 +49,7 @@ go_library( "sys_write.go", "timespec.go", ], - importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls/linux", + importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls/linux", visibility = ["//:sandbox"], deps = [ "//pkg/abi", @@ -71,7 +71,6 @@ go_library( "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/eventfd", "//pkg/sentry/kernel/fasync", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/sched", "//pkg/sentry/kernel/shm", @@ -86,6 +85,7 @@ go_library( "//pkg/sentry/syscalls", "//pkg/sentry/usage", "//pkg/sentry/usermem", + "//pkg/syserr", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go index 72146ea63..ac3905c5c 100644 --- a/pkg/sentry/syscalls/linux/error.go +++ b/pkg/sentry/syscalls/linux/error.go @@ -19,12 +19,12 @@ import ( "sync" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/metric" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) var ( diff --git a/pkg/sentry/syscalls/linux/flags.go b/pkg/sentry/syscalls/linux/flags.go index d83e12971..0c1b5ec27 100644 --- a/pkg/sentry/syscalls/linux/flags.go +++ b/pkg/sentry/syscalls/linux/flags.go @@ -15,8 +15,8 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fs" ) // flagsToPermissions returns a Permissions object from Linux flags. diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 5251c2463..7bbbb5008 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -18,13 +18,13 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/syscalls" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // AUDIT_ARCH_X86_64 identifies the Linux syscall API on AMD64, and is taken @@ -50,244 +50,244 @@ var AMD64 = &kernel.SyscallTable{ Table: map[uintptr]kernel.Syscall{ 0: syscalls.Supported("read", Read), 1: syscalls.Supported("write", Write), - 2: syscalls.Supported("open", Open), + 2: syscalls.PartiallySupported("open", Open, "Options O_DIRECT, O_NOATIME, O_PATH, O_TMPFILE, O_SYNC are not supported.", nil), 3: syscalls.Supported("close", Close), - 4: syscalls.Undocumented("stat", Stat), - 5: syscalls.Undocumented("fstat", Fstat), - 6: syscalls.Undocumented("lstat", Lstat), - 7: syscalls.Undocumented("poll", Poll), - 8: syscalls.Undocumented("lseek", Lseek), - 9: syscalls.Undocumented("mmap", Mmap), - 10: syscalls.Undocumented("mprotect", Mprotect), - 11: syscalls.Undocumented("munmap", Munmap), - 12: syscalls.Undocumented("brk", Brk), - 13: syscalls.Undocumented("rt_sigaction", RtSigaction), - 14: syscalls.Undocumented("rt_sigprocmask", RtSigprocmask), - 15: syscalls.Undocumented("rt_sigreturn", RtSigreturn), - 16: syscalls.Undocumented("ioctl", Ioctl), - 17: syscalls.Undocumented("pread64", Pread64), - 18: syscalls.Undocumented("pwrite64", Pwrite64), - 19: syscalls.Undocumented("readv", Readv), - 20: syscalls.Undocumented("writev", Writev), - 21: syscalls.Undocumented("access", Access), - 22: syscalls.Undocumented("pipe", Pipe), - 23: syscalls.Undocumented("select", Select), - 24: syscalls.Undocumented("sched_yield", SchedYield), - 25: syscalls.Undocumented("mremap", Mremap), - 26: syscalls.Undocumented("msync", Msync), - 27: syscalls.Undocumented("mincore", Mincore), - 28: syscalls.Undocumented("madvise", Madvise), - 29: syscalls.Undocumented("shmget", Shmget), - 30: syscalls.Undocumented("shmat", Shmat), - 31: syscalls.Undocumented("shmctl", Shmctl), - 32: syscalls.Undocumented("dup", Dup), - 33: syscalls.Undocumented("dup2", Dup2), - 34: syscalls.Undocumented("pause", Pause), - 35: syscalls.Undocumented("nanosleep", Nanosleep), - 36: syscalls.Undocumented("getitimer", Getitimer), - 37: syscalls.Undocumented("alarm", Alarm), - 38: syscalls.Undocumented("setitimer", Setitimer), - 39: syscalls.Undocumented("getpid", Getpid), - 40: syscalls.Undocumented("sendfile", Sendfile), - 41: syscalls.Undocumented("socket", Socket), - 42: syscalls.Undocumented("connect", Connect), - 43: syscalls.Undocumented("accept", Accept), - 44: syscalls.Undocumented("sendto", SendTo), - 45: syscalls.Undocumented("recvfrom", RecvFrom), - 46: syscalls.Undocumented("sendmsg", SendMsg), - 47: syscalls.Undocumented("recvmsg", RecvMsg), - 48: syscalls.Undocumented("shutdown", Shutdown), - 49: syscalls.Undocumented("bind", Bind), - 50: syscalls.Undocumented("listen", Listen), - 51: syscalls.Undocumented("getsockname", GetSockName), - 52: syscalls.Undocumented("getpeername", GetPeerName), - 53: syscalls.Undocumented("socketpair", SocketPair), - 54: syscalls.Undocumented("setsockopt", SetSockOpt), - 55: syscalls.Undocumented("getsockopt", GetSockOpt), - 56: syscalls.Undocumented("clone", Clone), - 57: syscalls.Undocumented("fork", Fork), - 58: syscalls.Undocumented("vfork", Vfork), - 59: syscalls.Undocumented("execve", Execve), - 60: syscalls.Undocumented("exit", Exit), - 61: syscalls.Undocumented("wait4", Wait4), - 62: syscalls.Undocumented("kill", Kill), - 63: syscalls.Undocumented("uname", Uname), - 64: syscalls.Undocumented("semget", Semget), - 65: syscalls.Undocumented("semop", Semop), - 66: syscalls.Undocumented("semctl", Semctl), - 67: syscalls.Undocumented("shmdt", Shmdt), + 4: syscalls.Supported("stat", Stat), + 5: syscalls.Supported("fstat", Fstat), + 6: syscalls.Supported("lstat", Lstat), + 7: syscalls.Supported("poll", Poll), + 8: syscalls.Supported("lseek", Lseek), + 9: syscalls.PartiallySupported("mmap", Mmap, "Generally supported with exceptions. Options MAP_FIXED_NOREPLACE, MAP_SHARED_VALIDATE, MAP_SYNC MAP_GROWSDOWN, MAP_HUGETLB are not supported.", nil), + 10: syscalls.Supported("mprotect", Mprotect), + 11: syscalls.Supported("munmap", Munmap), + 12: syscalls.Supported("brk", Brk), + 13: syscalls.Supported("rt_sigaction", RtSigaction), + 14: syscalls.Supported("rt_sigprocmask", RtSigprocmask), + 15: syscalls.Supported("rt_sigreturn", RtSigreturn), + 16: syscalls.PartiallySupported("ioctl", Ioctl, "Only a few ioctls are implemented for backing devices and file systems.", nil), + 17: syscalls.Supported("pread64", Pread64), + 18: syscalls.Supported("pwrite64", Pwrite64), + 19: syscalls.Supported("readv", Readv), + 20: syscalls.Supported("writev", Writev), + 21: syscalls.Supported("access", Access), + 22: syscalls.Supported("pipe", Pipe), + 23: syscalls.Supported("select", Select), + 24: syscalls.Supported("sched_yield", SchedYield), + 25: syscalls.Supported("mremap", Mremap), + 26: syscalls.PartiallySupported("msync", Msync, "Full data flush is not guaranteed at this time.", nil), + 27: syscalls.PartiallySupported("mincore", Mincore, "Stub implementation. The sandbox does not have access to this information. Reports all mapped pages are resident.", nil), + 28: syscalls.PartiallySupported("madvise", Madvise, "Options MADV_DONTNEED, MADV_DONTFORK are supported. Other advice is ignored.", nil), + 29: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil), + 30: syscalls.PartiallySupported("shmat", Shmat, "Option SHM_RND is not supported.", nil), + 31: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil), + 32: syscalls.Supported("dup", Dup), + 33: syscalls.Supported("dup2", Dup2), + 34: syscalls.Supported("pause", Pause), + 35: syscalls.Supported("nanosleep", Nanosleep), + 36: syscalls.Supported("getitimer", Getitimer), + 37: syscalls.Supported("alarm", Alarm), + 38: syscalls.Supported("setitimer", Setitimer), + 39: syscalls.Supported("getpid", Getpid), + 40: syscalls.Supported("sendfile", Sendfile), + 41: syscalls.PartiallySupported("socket", Socket, "Limited support for AF_NETLINK, NETLINK_ROUTE sockets. Limited support for SOCK_RAW.", nil), + 42: syscalls.Supported("connect", Connect), + 43: syscalls.Supported("accept", Accept), + 44: syscalls.Supported("sendto", SendTo), + 45: syscalls.Supported("recvfrom", RecvFrom), + 46: syscalls.Supported("sendmsg", SendMsg), + 47: syscalls.PartiallySupported("recvmsg", RecvMsg, "Not all flags and control messages are supported.", nil), + 48: syscalls.PartiallySupported("shutdown", Shutdown, "Not all flags and control messages are supported.", nil), + 49: syscalls.PartiallySupported("bind", Bind, "Autobind for abstract Unix sockets is not supported.", nil), + 50: syscalls.Supported("listen", Listen), + 51: syscalls.Supported("getsockname", GetSockName), + 52: syscalls.Supported("getpeername", GetPeerName), + 53: syscalls.Supported("socketpair", SocketPair), + 54: syscalls.PartiallySupported("setsockopt", SetSockOpt, "Not all socket options are supported.", nil), + 55: syscalls.PartiallySupported("getsockopt", GetSockOpt, "Not all socket options are supported.", nil), + 56: syscalls.PartiallySupported("clone", Clone, "Mount namespace (CLONE_NEWNS) not supported. Options CLONE_PARENT, CLONE_SYSVSEM not supported.", nil), + 57: syscalls.Supported("fork", Fork), + 58: syscalls.Supported("vfork", Vfork), + 59: syscalls.Supported("execve", Execve), + 60: syscalls.Supported("exit", Exit), + 61: syscalls.Supported("wait4", Wait4), + 62: syscalls.Supported("kill", Kill), + 63: syscalls.Supported("uname", Uname), + 64: syscalls.Supported("semget", Semget), + 65: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil), + 66: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil), + 67: syscalls.Supported("shmdt", Shmdt), 68: syscalls.ErrorWithEvent("msgget", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) 69: syscalls.ErrorWithEvent("msgsnd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) 70: syscalls.ErrorWithEvent("msgrcv", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) 71: syscalls.ErrorWithEvent("msgctl", syscall.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) - 72: syscalls.Undocumented("fcntl", Fcntl), - 73: syscalls.Undocumented("flock", Flock), - 74: syscalls.Undocumented("fsync", Fsync), - 75: syscalls.Undocumented("fdatasync", Fdatasync), - 76: syscalls.Undocumented("truncate", Truncate), - 77: syscalls.Undocumented("ftruncate", Ftruncate), - 78: syscalls.Undocumented("getdents", Getdents), - 79: syscalls.Undocumented("getcwd", Getcwd), - 80: syscalls.Undocumented("chdir", Chdir), - 81: syscalls.Undocumented("fchdir", Fchdir), - 82: syscalls.Undocumented("rename", Rename), - 83: syscalls.Undocumented("mkdir", Mkdir), - 84: syscalls.Undocumented("rmdir", Rmdir), - 85: syscalls.Undocumented("creat", Creat), - 86: syscalls.Undocumented("link", Link), - 87: syscalls.Undocumented("link", Unlink), - 88: syscalls.Undocumented("symlink", Symlink), - 89: syscalls.Undocumented("readlink", Readlink), - 90: syscalls.Undocumented("chmod", Chmod), - 91: syscalls.Undocumented("fchmod", Fchmod), - 92: syscalls.Undocumented("chown", Chown), - 93: syscalls.Undocumented("fchown", Fchown), - 94: syscalls.Undocumented("lchown", Lchown), - 95: syscalls.Undocumented("umask", Umask), - 96: syscalls.Undocumented("gettimeofday", Gettimeofday), - 97: syscalls.Undocumented("getrlimit", Getrlimit), - 98: syscalls.Undocumented("getrusage", Getrusage), - 99: syscalls.Undocumented("sysinfo", Sysinfo), - 100: syscalls.Undocumented("times", Times), - 101: syscalls.Undocumented("ptrace", Ptrace), - 102: syscalls.Undocumented("getuid", Getuid), - 103: syscalls.Undocumented("syslog", Syslog), - 104: syscalls.Undocumented("getgid", Getgid), - 105: syscalls.Undocumented("setuid", Setuid), - 106: syscalls.Undocumented("setgid", Setgid), - 107: syscalls.Undocumented("geteuid", Geteuid), - 108: syscalls.Undocumented("getegid", Getegid), - 109: syscalls.Undocumented("setpgid", Setpgid), - 110: syscalls.Undocumented("getppid", Getppid), - 111: syscalls.Undocumented("getpgrp", Getpgrp), - 112: syscalls.Undocumented("setsid", Setsid), - 113: syscalls.Undocumented("setreuid", Setreuid), - 114: syscalls.Undocumented("setregid", Setregid), - 115: syscalls.Undocumented("getgroups", Getgroups), - 116: syscalls.Undocumented("setgroups", Setgroups), - 117: syscalls.Undocumented("setresuid", Setresuid), - 118: syscalls.Undocumented("getresuid", Getresuid), - 119: syscalls.Undocumented("setresgid", Setresgid), - 120: syscalls.Undocumented("setresgid", Getresgid), - 121: syscalls.Undocumented("getpgid", Getpgid), + 72: syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil), + 73: syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil), + 74: syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil), + 75: syscalls.PartiallySupported("fdatasync", Fdatasync, "Full data flush is not guaranteed at this time.", nil), + 76: syscalls.Supported("truncate", Truncate), + 77: syscalls.Supported("ftruncate", Ftruncate), + 78: syscalls.Supported("getdents", Getdents), + 79: syscalls.Supported("getcwd", Getcwd), + 80: syscalls.Supported("chdir", Chdir), + 81: syscalls.Supported("fchdir", Fchdir), + 82: syscalls.Supported("rename", Rename), + 83: syscalls.Supported("mkdir", Mkdir), + 84: syscalls.Supported("rmdir", Rmdir), + 85: syscalls.Supported("creat", Creat), + 86: syscalls.Supported("link", Link), + 87: syscalls.Supported("unlink", Unlink), + 88: syscalls.Supported("symlink", Symlink), + 89: syscalls.Supported("readlink", Readlink), + 90: syscalls.Supported("chmod", Chmod), + 91: syscalls.PartiallySupported("fchmod", Fchmod, "Options S_ISUID and S_ISGID not supported.", nil), + 92: syscalls.Supported("chown", Chown), + 93: syscalls.Supported("fchown", Fchown), + 94: syscalls.Supported("lchown", Lchown), + 95: syscalls.Supported("umask", Umask), + 96: syscalls.Supported("gettimeofday", Gettimeofday), + 97: syscalls.Supported("getrlimit", Getrlimit), + 98: syscalls.PartiallySupported("getrusage", Getrusage, "Fields ru_maxrss, ru_minflt, ru_majflt, ru_inblock, ru_oublock are not supported. Fields ru_utime and ru_stime have low precision.", nil), + 99: syscalls.PartiallySupported("sysinfo", Sysinfo, "Fields loads, sharedram, bufferram, totalswap, freeswap, totalhigh, freehigh not supported.", nil), + 100: syscalls.Supported("times", Times), + 101: syscalls.PartiallySupported("ptrace", Ptrace, "Options PTRACE_PEEKSIGINFO, PTRACE_SECCOMP_GET_FILTER not supported.", nil), + 102: syscalls.Supported("getuid", Getuid), + 103: syscalls.PartiallySupported("syslog", Syslog, "Outputs a dummy message for security reasons.", nil), + 104: syscalls.Supported("getgid", Getgid), + 105: syscalls.Supported("setuid", Setuid), + 106: syscalls.Supported("setgid", Setgid), + 107: syscalls.Supported("geteuid", Geteuid), + 108: syscalls.Supported("getegid", Getegid), + 109: syscalls.Supported("setpgid", Setpgid), + 110: syscalls.Supported("getppid", Getppid), + 111: syscalls.Supported("getpgrp", Getpgrp), + 112: syscalls.Supported("setsid", Setsid), + 113: syscalls.Supported("setreuid", Setreuid), + 114: syscalls.Supported("setregid", Setregid), + 115: syscalls.Supported("getgroups", Getgroups), + 116: syscalls.Supported("setgroups", Setgroups), + 117: syscalls.Supported("setresuid", Setresuid), + 118: syscalls.Supported("getresuid", Getresuid), + 119: syscalls.Supported("setresgid", Setresgid), + 120: syscalls.Supported("getresgid", Getresgid), + 121: syscalls.Supported("getpgid", Getpgid), 122: syscalls.ErrorWithEvent("setfsuid", syscall.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) 123: syscalls.ErrorWithEvent("setfsgid", syscall.ENOSYS, "", []string{"gvisor.dev/issue/260"}), // TODO(b/112851702) - 124: syscalls.Undocumented("getsid", Getsid), - 125: syscalls.Undocumented("capget", Capget), - 126: syscalls.Undocumented("capset", Capset), - 127: syscalls.Undocumented("rt_sigpending", RtSigpending), - 128: syscalls.Undocumented("rt_sigtimedwait", RtSigtimedwait), - 129: syscalls.Undocumented("rt_sigqueueinfo", RtSigqueueinfo), - 130: syscalls.Undocumented("rt_sigsuspend", RtSigsuspend), - 131: syscalls.Undocumented("sigaltstack", Sigaltstack), - 132: syscalls.Undocumented("utime", Utime), - 133: syscalls.Undocumented("mknod", Mknod), + 124: syscalls.Supported("getsid", Getsid), + 125: syscalls.Supported("capget", Capget), + 126: syscalls.Supported("capset", Capset), + 127: syscalls.Supported("rt_sigpending", RtSigpending), + 128: syscalls.Supported("rt_sigtimedwait", RtSigtimedwait), + 129: syscalls.Supported("rt_sigqueueinfo", RtSigqueueinfo), + 130: syscalls.Supported("rt_sigsuspend", RtSigsuspend), + 131: syscalls.Supported("sigaltstack", Sigaltstack), + 132: syscalls.Supported("utime", Utime), + 133: syscalls.PartiallySupported("mknod", Mknod, "Device creation is not generally supported. Only regular file and FIFO creation are supported.", nil), 134: syscalls.Error("uselib", syscall.ENOSYS, "Obsolete", nil), 135: syscalls.ErrorWithEvent("personality", syscall.EINVAL, "Unable to change personality.", nil), 136: syscalls.ErrorWithEvent("ustat", syscall.ENOSYS, "Needs filesystem support.", nil), - 137: syscalls.Undocumented("statfs", Statfs), - 138: syscalls.Undocumented("fstatfs", Fstatfs), + 137: syscalls.PartiallySupported("statfs", Statfs, "Depends on the backing file system implementation.", nil), + 138: syscalls.PartiallySupported("fstatfs", Fstatfs, "Depends on the backing file system implementation.", nil), 139: syscalls.ErrorWithEvent("sysfs", syscall.ENOSYS, "", []string{"gvisor.dev/issue/165"}), - 140: syscalls.Undocumented("getpriority", Getpriority), - 141: syscalls.Undocumented("setpriority", Setpriority), + 140: syscalls.PartiallySupported("getpriority", Getpriority, "Stub implementation.", nil), + 141: syscalls.PartiallySupported("setpriority", Setpriority, "Stub implementation.", nil), 142: syscalls.CapError("sched_setparam", linux.CAP_SYS_NICE, "", nil), - 143: syscalls.Undocumented("sched_getparam", SchedGetparam), - 144: syscalls.Undocumented("sched_setscheduler", SchedSetscheduler), - 145: syscalls.Undocumented("sched_getscheduler", SchedGetscheduler), - 146: syscalls.Undocumented("sched_get_priority_max", SchedGetPriorityMax), - 147: syscalls.Undocumented("sched_get_priority_min", SchedGetPriorityMin), + 143: syscalls.PartiallySupported("sched_getparam", SchedGetparam, "Stub implementation.", nil), + 144: syscalls.PartiallySupported("sched_setscheduler", SchedSetscheduler, "Stub implementation.", nil), + 145: syscalls.PartiallySupported("sched_getscheduler", SchedGetscheduler, "Stub implementation.", nil), + 146: syscalls.PartiallySupported("sched_get_priority_max", SchedGetPriorityMax, "Stub implementation.", nil), + 147: syscalls.PartiallySupported("sched_get_priority_min", SchedGetPriorityMin, "Stub implementation.", nil), 148: syscalls.ErrorWithEvent("sched_rr_get_interval", syscall.EPERM, "", nil), - 149: syscalls.Undocumented("mlock", Mlock), - 150: syscalls.Undocumented("munlock", Munlock), - 151: syscalls.Undocumented("mlockall", Mlockall), - 152: syscalls.Undocumented("munlockall", Munlockall), + 149: syscalls.PartiallySupported("mlock", Mlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 150: syscalls.PartiallySupported("munlock", Munlock, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 151: syscalls.PartiallySupported("mlockall", Mlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), + 152: syscalls.PartiallySupported("munlockall", Munlockall, "Stub implementation. The sandbox lacks appropriate permissions.", nil), 153: syscalls.CapError("vhangup", linux.CAP_SYS_TTY_CONFIG, "", nil), 154: syscalls.Error("modify_ldt", syscall.EPERM, "", nil), 155: syscalls.Error("pivot_root", syscall.EPERM, "", nil), - 156: syscalls.Error("sysctl", syscall.EPERM, `syscall is "worthless"`, nil), - 157: syscalls.Undocumented("prctl", Prctl), - 158: syscalls.Undocumented("arch_prctl", ArchPrctl), + 156: syscalls.Error("sysctl", syscall.EPERM, "Deprecated. Use /proc/sys instead.", nil), + 157: syscalls.PartiallySupported("prctl", Prctl, "Not all options are supported.", nil), + 158: syscalls.PartiallySupported("arch_prctl", ArchPrctl, "Options ARCH_GET_GS, ARCH_SET_GS not supported.", nil), 159: syscalls.CapError("adjtimex", linux.CAP_SYS_TIME, "", nil), - 160: syscalls.Undocumented("setrlimit", Setrlimit), - 161: syscalls.Undocumented("chroot", Chroot), - 162: syscalls.Undocumented("sync", Sync), + 160: syscalls.PartiallySupported("setrlimit", Setrlimit, "Not all rlimits are enforced.", nil), + 161: syscalls.Supported("chroot", Chroot), + 162: syscalls.PartiallySupported("sync", Sync, "Full data flush is not guaranteed at this time.", nil), 163: syscalls.CapError("acct", linux.CAP_SYS_PACCT, "", nil), 164: syscalls.CapError("settimeofday", linux.CAP_SYS_TIME, "", nil), - 165: syscalls.Undocumented("mount", Mount), - 166: syscalls.Undocumented("umount2", Umount2), + 165: syscalls.PartiallySupported("mount", Mount, "Not all options or file systems are supported.", nil), + 166: syscalls.PartiallySupported("umount2", Umount2, "Not all options or file systems are supported.", nil), 167: syscalls.CapError("swapon", linux.CAP_SYS_ADMIN, "", nil), 168: syscalls.CapError("swapoff", linux.CAP_SYS_ADMIN, "", nil), 169: syscalls.CapError("reboot", linux.CAP_SYS_BOOT, "", nil), - 170: syscalls.Undocumented("sethostname", Sethostname), - 171: syscalls.Undocumented("setdomainname", Setdomainname), + 170: syscalls.Supported("sethostname", Sethostname), + 171: syscalls.Supported("setdomainname", Setdomainname), 172: syscalls.CapError("iopl", linux.CAP_SYS_RAWIO, "", nil), 173: syscalls.CapError("ioperm", linux.CAP_SYS_RAWIO, "", nil), 174: syscalls.CapError("create_module", linux.CAP_SYS_MODULE, "", nil), 175: syscalls.CapError("init_module", linux.CAP_SYS_MODULE, "", nil), 176: syscalls.CapError("delete_module", linux.CAP_SYS_MODULE, "", nil), - 177: syscalls.Error("get_kernel_syms", syscall.ENOSYS, "Not supported in > 2.6", nil), - 178: syscalls.Error("query_module", syscall.ENOSYS, "Not supported in > 2.6", nil), + 177: syscalls.Error("get_kernel_syms", syscall.ENOSYS, "Not supported in Linux > 2.6.", nil), + 178: syscalls.Error("query_module", syscall.ENOSYS, "Not supported in Linux > 2.6.", nil), 179: syscalls.CapError("quotactl", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_admin for most operations - 180: syscalls.Error("nfsservctl", syscall.ENOSYS, "Does not exist > 3.1", nil), - 181: syscalls.Error("getpmsg", syscall.ENOSYS, "Not implemented in Linux", nil), - 182: syscalls.Error("putpmsg", syscall.ENOSYS, "Not implemented in Linux", nil), - 183: syscalls.Error("afs_syscall", syscall.ENOSYS, "Not implemented in Linux", nil), - 184: syscalls.Error("tuxcall", syscall.ENOSYS, "Not implemented in Linux", nil), - 185: syscalls.Error("security", syscall.ENOSYS, "Not implemented in Linux", nil), - 186: syscalls.Undocumented("gettid", Gettid), + 180: syscalls.Error("nfsservctl", syscall.ENOSYS, "Removed after Linux 3.1.", nil), + 181: syscalls.Error("getpmsg", syscall.ENOSYS, "Not implemented in Linux.", nil), + 182: syscalls.Error("putpmsg", syscall.ENOSYS, "Not implemented in Linux.", nil), + 183: syscalls.Error("afs_syscall", syscall.ENOSYS, "Not implemented in Linux.", nil), + 184: syscalls.Error("tuxcall", syscall.ENOSYS, "Not implemented in Linux.", nil), + 185: syscalls.Error("security", syscall.ENOSYS, "Not implemented in Linux.", nil), + 186: syscalls.Supported("gettid", Gettid), 187: syscalls.ErrorWithEvent("readahead", syscall.ENOSYS, "", []string{"gvisor.dev/issue/261"}), // TODO(b/29351341) - 188: syscalls.ErrorWithEvent("setxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 189: syscalls.ErrorWithEvent("lsetxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 190: syscalls.ErrorWithEvent("fsetxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 191: syscalls.ErrorWithEvent("getxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 192: syscalls.ErrorWithEvent("lgetxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 193: syscalls.ErrorWithEvent("fgetxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 194: syscalls.ErrorWithEvent("listxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 195: syscalls.ErrorWithEvent("llistxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 196: syscalls.ErrorWithEvent("flistxattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 197: syscalls.ErrorWithEvent("removexattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 198: syscalls.ErrorWithEvent("lremovexattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 199: syscalls.ErrorWithEvent("fremovexattr", syscall.ENOTSUP, "Requires filesystem support", nil), - 200: syscalls.Undocumented("tkill", Tkill), - 201: syscalls.Undocumented("time", Time), - 202: syscalls.Undocumented("futex", Futex), - 203: syscalls.Undocumented("sched_setaffinity", SchedSetaffinity), - 204: syscalls.Undocumented("sched_getaffinity", SchedGetaffinity), + 188: syscalls.ErrorWithEvent("setxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 189: syscalls.ErrorWithEvent("lsetxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 190: syscalls.ErrorWithEvent("fsetxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 191: syscalls.ErrorWithEvent("getxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 192: syscalls.ErrorWithEvent("lgetxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 193: syscalls.ErrorWithEvent("fgetxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 194: syscalls.ErrorWithEvent("listxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 195: syscalls.ErrorWithEvent("llistxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 196: syscalls.ErrorWithEvent("flistxattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 197: syscalls.ErrorWithEvent("removexattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 198: syscalls.ErrorWithEvent("lremovexattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 199: syscalls.ErrorWithEvent("fremovexattr", syscall.ENOTSUP, "Requires filesystem support.", nil), + 200: syscalls.Supported("tkill", Tkill), + 201: syscalls.Supported("time", Time), + 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), + 203: syscalls.PartiallySupported("sched_setaffinity", SchedSetaffinity, "Stub implementation.", nil), + 204: syscalls.PartiallySupported("sched_getaffinity", SchedGetaffinity, "Stub implementation.", nil), 205: syscalls.Error("set_thread_area", syscall.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), - 206: syscalls.Undocumented("io_setup", IoSetup), - 207: syscalls.Undocumented("io_destroy", IoDestroy), - 208: syscalls.Undocumented("io_getevents", IoGetevents), - 209: syscalls.Undocumented("io_submit", IoSubmit), - 210: syscalls.Undocumented("io_cancel", IoCancel), + 206: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 207: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 208: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 209: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 210: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 211: syscalls.Error("get_thread_area", syscall.ENOSYS, "Expected to return ENOSYS on 64-bit", nil), 212: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), - 213: syscalls.Undocumented("epoll_create", EpollCreate), - 214: syscalls.ErrorWithEvent("epoll_ctl_old", syscall.ENOSYS, "Deprecated", nil), - 215: syscalls.ErrorWithEvent("epoll_wait_old", syscall.ENOSYS, "Deprecated", nil), - 216: syscalls.ErrorWithEvent("remap_file_pages", syscall.ENOSYS, "Deprecated since 3.16", nil), - 217: syscalls.Undocumented("getdents64", Getdents64), - 218: syscalls.Undocumented("set_tid_address", SetTidAddress), - 219: syscalls.Undocumented("restart_syscall", RestartSyscall), + 213: syscalls.Supported("epoll_create", EpollCreate), + 214: syscalls.ErrorWithEvent("epoll_ctl_old", syscall.ENOSYS, "Deprecated.", nil), + 215: syscalls.ErrorWithEvent("epoll_wait_old", syscall.ENOSYS, "Deprecated.", nil), + 216: syscalls.ErrorWithEvent("remap_file_pages", syscall.ENOSYS, "Deprecated since Linux 3.16.", nil), + 217: syscalls.Supported("getdents64", Getdents64), + 218: syscalls.Supported("set_tid_address", SetTidAddress), + 219: syscalls.Supported("restart_syscall", RestartSyscall), 220: syscalls.ErrorWithEvent("semtimedop", syscall.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920) - 221: syscalls.Undocumented("fadvise64", Fadvise64), - 222: syscalls.Undocumented("timer_create", TimerCreate), - 223: syscalls.Undocumented("timer_settime", TimerSettime), - 224: syscalls.Undocumented("timer_gettime", TimerGettime), - 225: syscalls.Undocumented("timer_getoverrun", TimerGetoverrun), - 226: syscalls.Undocumented("timer_delete", TimerDelete), - 227: syscalls.Undocumented("clock_settime", ClockSettime), - 228: syscalls.Undocumented("clock_gettime", ClockGettime), - 229: syscalls.Undocumented("clock_getres", ClockGetres), - 230: syscalls.Undocumented("clock_nanosleep", ClockNanosleep), - 231: syscalls.Undocumented("exit_group", ExitGroup), - 232: syscalls.Undocumented("epoll_wait", EpollWait), - 233: syscalls.Undocumented("epoll_ctl", EpollCtl), - 234: syscalls.Undocumented("tgkill", Tgkill), - 235: syscalls.Undocumented("utimes", Utimes), + 221: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil), + 222: syscalls.Supported("timer_create", TimerCreate), + 223: syscalls.Supported("timer_settime", TimerSettime), + 224: syscalls.Supported("timer_gettime", TimerGettime), + 225: syscalls.Supported("timer_getoverrun", TimerGetoverrun), + 226: syscalls.Supported("timer_delete", TimerDelete), + 227: syscalls.Supported("clock_settime", ClockSettime), + 228: syscalls.Supported("clock_gettime", ClockGettime), + 229: syscalls.Supported("clock_getres", ClockGetres), + 230: syscalls.Supported("clock_nanosleep", ClockNanosleep), + 231: syscalls.Supported("exit_group", ExitGroup), + 232: syscalls.Supported("epoll_wait", EpollWait), + 233: syscalls.Supported("epoll_ctl", EpollCtl), + 234: syscalls.Supported("tgkill", Tgkill), + 235: syscalls.Supported("utimes", Utimes), 236: syscalls.Error("vserver", syscall.ENOSYS, "Not implemented by Linux", nil), 237: syscalls.PartiallySupported("mbind", Mbind, "Stub implementation. Only a single NUMA node is advertised, and mempolicy is ignored accordingly, but mbind() will succeed and has effects reflected by get_mempolicy.", []string{"gvisor.dev/issue/262"}), - 238: syscalls.Undocumented("set_mempolicy", SetMempolicy), - 239: syscalls.Undocumented("get_mempolicy", GetMempolicy), + 238: syscalls.PartiallySupported("set_mempolicy", SetMempolicy, "Stub implementation.", nil), + 239: syscalls.PartiallySupported("get_mempolicy", GetMempolicy, "Stub implementation.", nil), 240: syscalls.ErrorWithEvent("mq_open", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 241: syscalls.ErrorWithEvent("mq_unlink", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 242: syscalls.ErrorWithEvent("mq_timedsend", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) @@ -295,69 +295,69 @@ var AMD64 = &kernel.SyscallTable{ 244: syscalls.ErrorWithEvent("mq_notify", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 245: syscalls.ErrorWithEvent("mq_getsetattr", syscall.ENOSYS, "", []string{"gvisor.dev/issue/136"}), // TODO(b/29354921) 246: syscalls.CapError("kexec_load", linux.CAP_SYS_BOOT, "", nil), - 247: syscalls.Undocumented("waitid", Waitid), - 248: syscalls.Error("add_key", syscall.EACCES, "Not available to user", nil), - 249: syscalls.Error("request_key", syscall.EACCES, "Not available to user", nil), - 250: syscalls.Error("keyctl", syscall.EACCES, "Not available to user", nil), + 247: syscalls.Supported("waitid", Waitid), + 248: syscalls.Error("add_key", syscall.EACCES, "Not available to user.", nil), + 249: syscalls.Error("request_key", syscall.EACCES, "Not available to user.", nil), + 250: syscalls.Error("keyctl", syscall.EACCES, "Not available to user.", nil), 251: syscalls.CapError("ioprio_set", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) 252: syscalls.CapError("ioprio_get", linux.CAP_SYS_ADMIN, "", nil), // requires cap_sys_nice or cap_sys_admin (depending) - 253: syscalls.Undocumented("inotify_init", InotifyInit), - 254: syscalls.Undocumented("inotify_add_watch", InotifyAddWatch), - 255: syscalls.Undocumented("inotify_rm_watch", InotifyRmWatch), + 253: syscalls.PartiallySupported("inotify_init", InotifyInit, "inotify events are only available inside the sandbox.", nil), + 254: syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil), + 255: syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil), 256: syscalls.CapError("migrate_pages", linux.CAP_SYS_NICE, "", nil), - 257: syscalls.Undocumented("openat", Openat), - 258: syscalls.Undocumented("mkdirat", Mkdirat), - 259: syscalls.Undocumented("mknodat", Mknodat), - 260: syscalls.Undocumented("fchownat", Fchownat), - 261: syscalls.Undocumented("futimesat", Futimesat), - 262: syscalls.Undocumented("fstatat", Fstatat), - 263: syscalls.Undocumented("unlinkat", Unlinkat), - 264: syscalls.Undocumented("renameat", Renameat), - 265: syscalls.Undocumented("linkat", Linkat), - 266: syscalls.Undocumented("symlinkat", Symlinkat), - 267: syscalls.Undocumented("readlinkat", Readlinkat), - 268: syscalls.Undocumented("fchmodat", Fchmodat), - 269: syscalls.Undocumented("faccessat", Faccessat), - 270: syscalls.Undocumented("pselect", Pselect), - 271: syscalls.Undocumented("ppoll", Ppoll), - 272: syscalls.Undocumented("unshare", Unshare), - 273: syscalls.Error("set_robust_list", syscall.ENOSYS, "Obsolete", nil), - 274: syscalls.Error("get_robust_list", syscall.ENOSYS, "Obsolete", nil), - 275: syscalls.PartiallySupported("splice", Splice, "Stub implementation", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) - 276: syscalls.ErrorWithEvent("tee", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) - 277: syscalls.Undocumented("sync_file_range", SyncFileRange), + 257: syscalls.Supported("openat", Openat), + 258: syscalls.Supported("mkdirat", Mkdirat), + 259: syscalls.Supported("mknodat", Mknodat), + 260: syscalls.Supported("fchownat", Fchownat), + 261: syscalls.Supported("futimesat", Futimesat), + 262: syscalls.Supported("fstatat", Fstatat), + 263: syscalls.Supported("unlinkat", Unlinkat), + 264: syscalls.Supported("renameat", Renameat), + 265: syscalls.Supported("linkat", Linkat), + 266: syscalls.Supported("symlinkat", Symlinkat), + 267: syscalls.Supported("readlinkat", Readlinkat), + 268: syscalls.Supported("fchmodat", Fchmodat), + 269: syscalls.Supported("faccessat", Faccessat), + 270: syscalls.Supported("pselect", Pselect), + 271: syscalls.Supported("ppoll", Ppoll), + 272: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil), + 273: syscalls.Error("set_robust_list", syscall.ENOSYS, "Obsolete.", nil), + 274: syscalls.Error("get_robust_list", syscall.ENOSYS, "Obsolete.", nil), + 275: syscalls.PartiallySupported("splice", Splice, "Stub implementation.", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 276: syscalls.ErrorWithEvent("tee", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil), 278: syscalls.ErrorWithEvent("vmsplice", syscall.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) 279: syscalls.CapError("move_pages", linux.CAP_SYS_NICE, "", nil), // requires cap_sys_nice (mostly) - 280: syscalls.Undocumented("utimensat", Utimensat), - 281: syscalls.Undocumented("epoll_pwait", EpollPwait), + 280: syscalls.Supported("utimensat", Utimensat), + 281: syscalls.Supported("epoll_pwait", EpollPwait), 282: syscalls.ErrorWithEvent("signalfd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426) - 283: syscalls.Undocumented("timerfd_create", TimerfdCreate), - 284: syscalls.Undocumented("eventfd", Eventfd), - 285: syscalls.Undocumented("fallocate", Fallocate), - 286: syscalls.Undocumented("timerfd_settime", TimerfdSettime), - 287: syscalls.Undocumented("timerfd_gettime", TimerfdGettime), - 288: syscalls.Undocumented("accept4", Accept4), + 283: syscalls.Supported("timerfd_create", TimerfdCreate), + 284: syscalls.Supported("eventfd", Eventfd), + 285: syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil), + 286: syscalls.Supported("timerfd_settime", TimerfdSettime), + 287: syscalls.Supported("timerfd_gettime", TimerfdGettime), + 288: syscalls.Supported("accept4", Accept4), 289: syscalls.ErrorWithEvent("signalfd4", syscall.ENOSYS, "", []string{"gvisor.dev/issue/139"}), // TODO(b/19846426) - 290: syscalls.Undocumented("eventfd2", Eventfd2), - 291: syscalls.Undocumented("epoll_create1", EpollCreate1), - 292: syscalls.Undocumented("dup3", Dup3), - 293: syscalls.Undocumented("pipe2", Pipe2), - 294: syscalls.Undocumented("inotify_init1", InotifyInit1), - 295: syscalls.Undocumented("preadv", Preadv), - 296: syscalls.Undocumented("pwritev", Pwritev), - 297: syscalls.Undocumented("rt_tgsigqueueinfo", RtTgsigqueueinfo), + 290: syscalls.Supported("eventfd2", Eventfd2), + 291: syscalls.Supported("epoll_create1", EpollCreate1), + 292: syscalls.Supported("dup3", Dup3), + 293: syscalls.Supported("pipe2", Pipe2), + 294: syscalls.Supported("inotify_init1", InotifyInit1), + 295: syscalls.Supported("preadv", Preadv), + 296: syscalls.Supported("pwritev", Pwritev), + 297: syscalls.Supported("rt_tgsigqueueinfo", RtTgsigqueueinfo), 298: syscalls.ErrorWithEvent("perf_event_open", syscall.ENODEV, "No support for perf counters", nil), - 299: syscalls.Undocumented("recvmmsg", RecvMMsg), + 299: syscalls.PartiallySupported("recvmmsg", RecvMMsg, "Not all flags and control messages are supported.", nil), 300: syscalls.ErrorWithEvent("fanotify_init", syscall.ENOSYS, "Needs CONFIG_FANOTIFY", nil), 301: syscalls.ErrorWithEvent("fanotify_mark", syscall.ENOSYS, "Needs CONFIG_FANOTIFY", nil), - 302: syscalls.Undocumented("prlimit64", Prlimit64), - 303: syscalls.ErrorWithEvent("name_to_handle_at", syscall.EOPNOTSUPP, "Needs filesystem support", nil), - 304: syscalls.ErrorWithEvent("open_by_handle_at", syscall.EOPNOTSUPP, "Needs filesystem support", nil), + 302: syscalls.Supported("prlimit64", Prlimit64), + 303: syscalls.Error("name_to_handle_at", syscall.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), + 304: syscalls.Error("open_by_handle_at", syscall.EOPNOTSUPP, "Not supported by gVisor filesystems", nil), 305: syscalls.CapError("clock_adjtime", linux.CAP_SYS_TIME, "", nil), - 306: syscalls.Undocumented("syncfs", Syncfs), - 307: syscalls.Undocumented("sendmmsg", SendMMsg), + 306: syscalls.PartiallySupported("syncfs", Syncfs, "Depends on backing file system.", nil), + 307: syscalls.PartiallySupported("sendmmsg", SendMMsg, "Not all flags and control messages are supported.", nil), 308: syscalls.ErrorWithEvent("setns", syscall.EOPNOTSUPP, "Needs filesystem support", []string{"gvisor.dev/issue/140"}), // TODO(b/29354995) - 309: syscalls.Undocumented("getcpu", Getcpu), + 309: syscalls.Supported("getcpu", Getcpu), 310: syscalls.ErrorWithEvent("process_vm_readv", syscall.ENOSYS, "", []string{"gvisor.dev/issue/158"}), 311: syscalls.ErrorWithEvent("process_vm_writev", syscall.ENOSYS, "", []string{"gvisor.dev/issue/158"}), 312: syscalls.CapError("kcmp", linux.CAP_SYS_PTRACE, "", nil), @@ -365,20 +365,21 @@ var AMD64 = &kernel.SyscallTable{ 314: syscalls.ErrorWithEvent("sched_setattr", syscall.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) 315: syscalls.ErrorWithEvent("sched_getattr", syscall.ENOSYS, "gVisor does not implement a scheduler.", []string{"gvisor.dev/issue/264"}), // TODO(b/118902272) 316: syscalls.ErrorWithEvent("renameat2", syscall.ENOSYS, "", []string{"gvisor.dev/issue/263"}), // TODO(b/118902772) - 317: syscalls.Undocumented("seccomp", Seccomp), - 318: syscalls.Undocumented("getrandom", GetRandom), - 319: syscalls.Undocumented("memfd_create", MemfdCreate), + 317: syscalls.Supported("seccomp", Seccomp), + 318: syscalls.Supported("getrandom", GetRandom), + 319: syscalls.Supported("memfd_create", MemfdCreate), 320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil), 321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), 322: syscalls.ErrorWithEvent("execveat", syscall.ENOSYS, "", []string{"gvisor.dev/issue/265"}), // TODO(b/118901836) 323: syscalls.ErrorWithEvent("userfaultfd", syscall.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) 324: syscalls.ErrorWithEvent("membarrier", syscall.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(b/118904897) - 325: syscalls.Undocumented("mlock2", Mlock2), + 325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), // Syscalls after 325 are "backports" from versions of Linux after 4.4. 326: syscalls.ErrorWithEvent("copy_file_range", syscall.ENOSYS, "", nil), - 327: syscalls.Undocumented("preadv2", Preadv2), - 328: syscalls.Undocumented("pwritev2", Pwritev2), + 327: syscalls.Supported("preadv2", Preadv2), + 328: syscalls.PartiallySupported("pwritev2", Pwritev2, "Flag RWF_HIPRI is not supported.", nil), + 332: syscalls.Supported("statx", Statx), }, Emulate: map[usermem.Addr]uintptr{ diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go index 5438b664b..00b7e7cf2 100644 --- a/pkg/sentry/syscalls/linux/sigset.go +++ b/pkg/sentry/syscalls/linux/sigset.go @@ -17,10 +17,10 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // copyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go index 1b27b2415..f56411bfe 100644 --- a/pkg/sentry/syscalls/linux/sys_aio.go +++ b/pkg/sentry/syscalls/linux/sys_aio.go @@ -17,15 +17,14 @@ package linux import ( "encoding/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // I/O commands. @@ -55,7 +54,7 @@ type ioCallback struct { OpCode uint16 ReqPrio int16 - FD uint32 + FD int32 Buf uint64 Bytes uint64 @@ -65,7 +64,7 @@ type ioCallback struct { Flags uint32 // eventfd to signal if IOCB_FLAG_RESFD is set in flags. - ResFD uint32 + ResFD int32 } // ioEvent describes an I/O result. @@ -292,7 +291,7 @@ func performCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *ioC // submitCallback processes a single callback. func submitCallback(t *kernel.Task, id uint64, cb *ioCallback, cbAddr usermem.Addr) error { - file := t.FDMap().GetFile(kdefs.FD(cb.FD)) + file := t.GetFile(cb.FD) if file == nil { // File not found. return syserror.EBADF @@ -302,7 +301,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *ioCallback, cbAddr usermem.Ad // Was there an eventFD? Extract it. var eventFile *fs.File if cb.Flags&_IOCB_FLAG_RESFD != 0 { - eventFile = t.FDMap().GetFile(kdefs.FD(cb.ResFD)) + eventFile = t.GetFile(cb.ResFD) if eventFile == nil { // Bad FD. return syserror.EBADF diff --git a/pkg/sentry/syscalls/linux/sys_capability.go b/pkg/sentry/syscalls/linux/sys_capability.go index 622cb8d0d..adf5ea5f2 100644 --- a/pkg/sentry/syscalls/linux/sys_capability.go +++ b/pkg/sentry/syscalls/linux/sys_capability.go @@ -15,11 +15,11 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" ) func lookupCaps(t *kernel.Task, tid kernel.ThreadID) (permitted, inheritable, effective auth.CapabilitySet, err error) { diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go index 1467feb4e..a0dd4d4e3 100644 --- a/pkg/sentry/syscalls/linux/sys_epoll.go +++ b/pkg/sentry/syscalls/linux/sys_epoll.go @@ -17,14 +17,13 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" + "gvisor.dev/gvisor/pkg/sentry/syscalls" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // EpollCreate1 implements the epoll_create1(2) linux syscall. @@ -61,9 +60,9 @@ func EpollCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // EpollCtl implements the epoll_ctl(2) linux syscall. func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - epfd := kdefs.FD(args[0].Int()) + epfd := args[0].Int() op := args[1].Int() - fd := kdefs.FD(args[2].Int()) + fd := args[2].Int() eventAddr := args[3].Pointer() // Capture the event state if needed. @@ -132,7 +131,7 @@ func copyOutEvents(t *kernel.Task, addr usermem.Addr, e []epoll.Event) error { // EpollWait implements the epoll_wait(2) linux syscall. func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - epfd := kdefs.FD(args[0].Int()) + epfd := args[0].Int() eventsAddr := args[1].Pointer() maxEvents := int(args[2].Int()) timeout := int(args[3].Int()) diff --git a/pkg/sentry/syscalls/linux/sys_eventfd.go b/pkg/sentry/syscalls/linux/sys_eventfd.go index ca4ead488..5cfc2c3c8 100644 --- a/pkg/sentry/syscalls/linux/sys_eventfd.go +++ b/pkg/sentry/syscalls/linux/sys_eventfd.go @@ -17,10 +17,10 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd" ) const ( @@ -47,10 +47,9 @@ func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc }) defer event.DecRef() - fd, err := t.FDMap().NewFDFrom(0, event, kernel.FDFlags{ + fd, err := t.NewFDFrom(0, event, kernel.FDFlags{ CloseOnExec: flags&EFD_CLOEXEC != 0, - }, - t.ThreadGroup().Limits()) + }) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 19f579930..eb6f5648f 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -17,31 +17,29 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/lock" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/fasync" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/fasync" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // fileOpAt performs an operation on the second last component in the path. -func fileOpAt(t *kernel.Task, dirFD kdefs.FD, path string, fn func(root *fs.Dirent, d *fs.Dirent, name string) error) error { +func fileOpAt(t *kernel.Task, dirFD int32, path string, fn func(root *fs.Dirent, d *fs.Dirent, name string, remainingTraversals uint) error) error { // Extract the last component. dir, name := fs.SplitLast(path) if dir == "/" { // Common case: we are accessing a file in the root. root := t.FSContext().RootDirectory() - err := fn(root, root, name) + err := fn(root, root, name, linux.MaxSymlinkTraversals) root.DecRef() return err } else if dir == "." && dirFD == linux.AT_FDCWD { @@ -49,19 +47,19 @@ func fileOpAt(t *kernel.Task, dirFD kdefs.FD, path string, fn func(root *fs.Dire // working directory; skip the look-up. wd := t.FSContext().WorkingDirectory() root := t.FSContext().RootDirectory() - err := fn(root, wd, name) + err := fn(root, wd, name, linux.MaxSymlinkTraversals) wd.DecRef() root.DecRef() return err } - return fileOpOn(t, dirFD, dir, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - return fn(root, d, name) + return fileOpOn(t, dirFD, dir, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, remainingTraversals uint) error { + return fn(root, d, name, remainingTraversals) }) } // fileOpOn performs an operation on the last entry of the path. -func fileOpOn(t *kernel.Task, dirFD kdefs.FD, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error { +func fileOpOn(t *kernel.Task, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent, remainingTraversals uint) error) error { var ( d *fs.Dirent // The file. wd *fs.Dirent // The working directory (if required.) @@ -79,7 +77,7 @@ func fileOpOn(t *kernel.Task, dirFD kdefs.FD, path string, resolve bool, fn func rel = wd } else { // Need to extract the given FD. - f = t.FDMap().GetFile(dirFD) + f = t.GetFile(dirFD) if f == nil { return syserror.EBADF } @@ -110,7 +108,7 @@ func fileOpOn(t *kernel.Task, dirFD kdefs.FD, path string, resolve bool, fn func return err } - err = fn(root, d) + err = fn(root, d, remainingTraversals) d.DecRef() return err } @@ -132,14 +130,14 @@ func copyInPath(t *kernel.Task, addr usermem.Addr, allowEmpty bool) (path string return path, dirPath, nil } -func openAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint) (fd uintptr, err error) { +func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uintptr, err error) { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return 0, err } resolve := flags&linux.O_NOFOLLOW == 0 - err = fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + err = fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // First check a few things about the filesystem before trying to get the file // reference. // @@ -185,8 +183,9 @@ func openAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint) (fd u defer file.DecRef() // Success. - fdFlags := kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0} - newFD, err := t.FDMap().NewFDFrom(0, file, fdFlags, t.ThreadGroup().Limits()) + newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{ + CloseOnExec: flags&linux.O_CLOEXEC != 0, + }) if err != nil { return err } @@ -202,7 +201,7 @@ func openAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint) (fd u return fd, err // Use result in frame. } -func mknodAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, mode linux.FileMode) error { +func mknodAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -211,7 +210,7 @@ func mknodAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, mode linux.FileM return syserror.ENOENT } - return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -286,7 +285,7 @@ func Mknod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Mknodat implements the linux syscall mknodat(2). func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() path := args[1].Pointer() mode := linux.FileMode(args[2].ModeT()) // We don't need this argument until we support creation of device nodes. @@ -295,7 +294,7 @@ func Mknodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca return 0, nil, mknodAt(t, dirFD, path, mode) } -func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) { +func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode linux.FileMode) (fd uintptr, err error) { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return 0, err @@ -304,45 +303,105 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod return 0, syserror.ENOENT } - err = fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { - if !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR - } - - fileFlags := linuxToFlags(flags) - // Linux always adds the O_LARGEFILE flag when running in 64-bit mode. - fileFlags.LargeFile = true + fileFlags := linuxToFlags(flags) + // Linux always adds the O_LARGEFILE flag when running in 64-bit mode. + fileFlags.LargeFile = true + + err = fileOpAt(t, dirFD, path, func(root *fs.Dirent, parent *fs.Dirent, name string, remainingTraversals uint) error { + // Resolve the name to see if it exists, and follow any + // symlinks along the way. We must do the symlink resolution + // manually because if the symlink target does not exist, we + // must create the target (and not the symlink itself). + var ( + found *fs.Dirent + err error + ) + for { + if !fs.IsDir(parent.Inode.StableAttr) { + return syserror.ENOTDIR + } - // Does this file exist already? - remainingTraversals := uint(linux.MaxSymlinkTraversals) - targetDirent, err := t.MountNamespace().FindInode(t, root, d, name, &remainingTraversals) - var newFile *fs.File - switch err { - case nil: - // The file existed. - defer targetDirent.DecRef() + // Start by looking up the dirent at 'name'. + found, err = t.MountNamespace().FindLink(t, root, parent, name, &remainingTraversals) + if err != nil { + break + } + defer found.DecRef() - // Check if we wanted to create. + // We found something (possibly a symlink). If the + // O_EXCL flag was passed, then we can immediately + // return EEXIST. if flags&linux.O_EXCL != 0 { return syserror.EEXIST } + // If we have a non-symlink, then we can proceed. + if !fs.IsSymlink(found.Inode.StableAttr) { + break + } + + // If O_NOFOLLOW was passed, then don't try to resolve + // anything. + if flags&linux.O_NOFOLLOW != 0 { + return syserror.ELOOP + } + + // Try to resolve the symlink directly to a Dirent. + var resolved *fs.Dirent + resolved, err = found.Inode.Getlink(t) + if err == nil { + // No more resolution necessary. + defer resolved.DecRef() + break + } else if err != fs.ErrResolveViaReadlink { + return err + } + + // Are we able to resolve further? + if remainingTraversals == 0 { + return syscall.ELOOP + } + + // Resolve the symlink to a path via Readlink. + path, err := found.Inode.Readlink(t) + if err != nil { + break + } + remainingTraversals-- + + // Get the new parent from the target path. + newParentPath, newName := fs.SplitLast(path) + newParent, err := t.MountNamespace().FindInode(t, root, parent, newParentPath, &remainingTraversals) + if err != nil { + break + } + defer newParent.DecRef() + + // Repeat the process with the parent and name of the + // symlink target. + parent = newParent + name = newName + } + + var newFile *fs.File + switch err { + case nil: // Like sys_open, check for a few things about the // filesystem before trying to get a reference to the // fs.File. The same constraints on Check apply. - if err := targetDirent.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil { + if err := found.Inode.CheckPermission(t, flagsToPermissions(flags)); err != nil { return err } // Should we truncate the file? if flags&linux.O_TRUNC != 0 { - if err := targetDirent.Inode.Truncate(t, targetDirent, 0); err != nil { + if err := found.Inode.Truncate(t, found, 0); err != nil { return err } } // Create a new fs.File. - newFile, err = targetDirent.Inode.GetFile(t, targetDirent, fileFlags) + newFile, err = found.Inode.GetFile(t, found, fileFlags) if err != nil { return syserror.ConvertIntr(err, kernel.ERESTARTSYS) } @@ -351,26 +410,27 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod // File does not exist. Proceed with creation. // Do we have write permissions on the parent? - if err := d.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { + if err := parent.Inode.CheckPermission(t, fs.PermMask{Write: true, Execute: true}); err != nil { return err } // Attempt a creation. perms := fs.FilePermsFromMode(mode &^ linux.FileMode(t.FSContext().Umask())) - newFile, err = d.Create(t, root, name, fileFlags, perms) + newFile, err = parent.Create(t, root, name, fileFlags, perms) if err != nil { // No luck, bail. return err } defer newFile.DecRef() - targetDirent = newFile.Dirent + found = newFile.Dirent default: return err } // Success. - fdFlags := kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0} - newFD, err := t.FDMap().NewFDFrom(0, newFile, fdFlags, t.ThreadGroup().Limits()) + newFD, err := t.NewFDFrom(0, newFile, kernel.FDFlags{ + CloseOnExec: flags&linux.O_CLOEXEC != 0, + }) if err != nil { return err } @@ -379,10 +439,10 @@ func createAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, flags uint, mod fd = uintptr(newFD) // Queue the open inotify event. The creation event is - // automatically queued when the dirent is targetDirent. The - // open events are implemented at the syscall layer so we need - // to manually queue one here. - targetDirent.InotifyEvent(linux.IN_OPEN, 0) + // automatically queued when the dirent is found. The open + // events are implemented at the syscall layer so we need to + // manually queue one here. + found.InotifyEvent(linux.IN_OPEN, 0) return nil }) @@ -404,7 +464,7 @@ func Open(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Openat implements linux syscall openat(2). func Openat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() flags := uint(args[2].Uint()) if flags&linux.O_CREAT != 0 { @@ -443,7 +503,7 @@ func (ac accessContext) Value(key interface{}) interface{} { } } -func accessAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, resolve bool, mode uint) error { +func accessAt(t *kernel.Task, dirFD int32, addr usermem.Addr, resolve bool, mode uint) error { const rOK = 4 const wOK = 2 const xOK = 1 @@ -458,7 +518,7 @@ func accessAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, resolve bool, m return syserror.EINVAL } - return fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return fileOpOn(t, dirFD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // access(2) and faccessat(2) check permissions using real // UID/GID, not effective UID/GID. // @@ -498,7 +558,7 @@ func Access(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Faccessat implements linux syscall faccessat(2). func Faccessat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() mode := args[2].ModeT() flags := args[3].Int() @@ -508,10 +568,10 @@ func Faccessat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Ioctl implements linux syscall ioctl(2). func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() request := int(args[1].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -520,12 +580,12 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Shared flags between file and socket. switch request { case linux.FIONCLEX: - t.FDMap().SetFlags(fd, kernel.FDFlags{ + t.FDTable().SetFlags(fd, kernel.FDFlags{ CloseOnExec: false, }) return 0, nil, nil case linux.FIOCLEX: - t.FDMap().SetFlags(fd, kernel.FDFlags{ + t.FDTable().SetFlags(fd, kernel.FDFlags{ CloseOnExec: true, }) return 0, nil, nil @@ -572,7 +632,7 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err default: - ret, err := file.FileOperations.Ioctl(t, t.MemoryManager(), args) + ret, err := file.FileOperations.Ioctl(t, file, t.MemoryManager(), args) if err != nil { return 0, nil, err } @@ -626,7 +686,7 @@ func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Is it a directory? if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR @@ -651,7 +711,7 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Is it a directory? if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR @@ -669,9 +729,9 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Fchdir implements the linux syscall fchdir(2). func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -693,10 +753,13 @@ func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Close implements linux syscall close(2). func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file, ok := t.FDMap().Remove(fd) - if !ok { + // Note that Remove provides a reference on the file that we may use to + // flush. It is still active until we drop the final reference below + // (and other reference-holding operations complete). + file := t.FDTable().Remove(fd) + if file == nil { return 0, nil, syserror.EBADF } defer file.DecRef() @@ -707,30 +770,30 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Dup implements linux syscall dup(2). func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } defer file.DecRef() - newfd, err := t.FDMap().NewFDFrom(0, file, kernel.FDFlags{}, t.ThreadGroup().Limits()) + newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{}) if err != nil { return 0, nil, syserror.EMFILE } - return uintptr(newfd), nil, nil + return uintptr(newFD), nil, nil } // Dup2 implements linux syscall dup2(2). func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - oldfd := kdefs.FD(args[0].Int()) - newfd := kdefs.FD(args[1].Int()) + oldfd := args[0].Int() + newfd := args[1].Int() // If oldfd is a valid file descriptor, and newfd has the same value as oldfd, // then dup2() does nothing, and returns newfd. if oldfd == newfd { - oldFile := t.FDMap().GetFile(oldfd) + oldFile := t.GetFile(oldfd) if oldFile == nil { return 0, nil, syserror.EBADF } @@ -746,21 +809,21 @@ func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Dup3 implements linux syscall dup3(2). func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - oldfd := kdefs.FD(args[0].Int()) - newfd := kdefs.FD(args[1].Int()) + oldfd := args[0].Int() + newfd := args[1].Int() flags := args[2].Uint() if oldfd == newfd { return 0, nil, syserror.EINVAL } - oldFile := t.FDMap().GetFile(oldfd) + oldFile := t.GetFile(oldfd) if oldFile == nil { return 0, nil, syserror.EBADF } defer oldFile.DecRef() - err := t.FDMap().NewFDAt(newfd, oldFile, kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0}, t.ThreadGroup().Limits()) + err := t.NewFDAt(newfd, oldFile, kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0}) if err != nil { return 0, nil, err } @@ -803,10 +866,10 @@ func fSetOwn(t *kernel.Task, file *fs.File, who int32) { // Fcntl implements linux syscall fcntl(2). func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() cmd := args[1].Int() - file, flags := t.FDMap().GetDescriptor(fd) + file, flags := t.FDTable().Get(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -814,9 +877,10 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall switch cmd { case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: - from := kdefs.FD(args[2].Int()) - fdFlags := kernel.FDFlags{CloseOnExec: cmd == linux.F_DUPFD_CLOEXEC} - fd, err := t.FDMap().NewFDFrom(from, file, fdFlags, t.ThreadGroup().Limits()) + from := args[2].Int() + fd, err := t.NewFDFrom(from, file, kernel.FDFlags{ + CloseOnExec: cmd == linux.F_DUPFD_CLOEXEC, + }) if err != nil { return 0, nil, err } @@ -825,7 +889,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return uintptr(flags.ToLinuxFDFlags()), nil, nil case linux.F_SETFD: flags := args[2].Uint() - t.FDMap().SetFlags(fd, kernel.FDFlags{ + t.FDTable().SetFlags(fd, kernel.FDFlags{ CloseOnExec: flags&linux.FD_CLOEXEC != 0, }) case linux.F_GETFL: @@ -886,8 +950,8 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } - // The lock uid is that of the Task's FDMap. - lockUniqueID := lock.UniqueID(t.FDMap().ID()) + // The lock uid is that of the Task's FDTable. + lockUniqueID := lock.UniqueID(t.FDTable().ID()) // These locks don't block; execute the non-blocking operation using the inode's lock // context directly. @@ -945,17 +1009,18 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall err := tmpfs.AddSeals(file.Dirent.Inode, args[2].Uint()) return 0, nil, err case linux.F_GETPIPE_SZ: - sz, ok := file.FileOperations.(pipe.Sizer) + sz, ok := file.FileOperations.(fs.FifoSizer) if !ok { return 0, nil, syserror.EINVAL } - return uintptr(sz.PipeSize()), nil, nil + size, err := sz.FifoSize(t, file) + return uintptr(size), nil, err case linux.F_SETPIPE_SZ: - sz, ok := file.FileOperations.(pipe.Sizer) + sz, ok := file.FileOperations.(fs.FifoSizer) if !ok { return 0, nil, syserror.EINVAL } - n, err := sz.SetPipeSize(int64(args[2].Int())) + n, err := sz.SetFifoSize(int64(args[2].Int())) return uintptr(n), nil, err default: // Everything else is not yet supported. @@ -976,7 +1041,7 @@ const ( // Fadvise64 implements linux syscall fadvise64(2). // This implementation currently ignores the provided advice. func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() length := args[2].Int64() advice := args[3].Int() @@ -985,7 +1050,7 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, syserror.EINVAL } - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1011,13 +1076,13 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, nil } -func mkdirAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, mode linux.FileMode) error { +func mkdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err } - return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1056,14 +1121,14 @@ func Mkdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Mkdirat implements linux syscall mkdirat(2). func Mkdirat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() mode := linux.FileMode(args[2].ModeT()) return 0, nil, mkdirAt(t, dirFD, addr, mode) } -func rmdirAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error { +func rmdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -1074,7 +1139,7 @@ func rmdirAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error { return syserror.EBUSY } - return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1103,7 +1168,7 @@ func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, rmdirAt(t, linux.AT_FDCWD, addr) } -func symlinkAt(t *kernel.Task, dirFD kdefs.FD, newAddr usermem.Addr, oldAddr usermem.Addr) error { +func symlinkAt(t *kernel.Task, dirFD int32, newAddr usermem.Addr, oldAddr usermem.Addr) error { newPath, dirPath, err := copyInPath(t, newAddr, false /* allowEmpty */) if err != nil { return err @@ -1122,7 +1187,7 @@ func symlinkAt(t *kernel.Task, dirFD kdefs.FD, newAddr usermem.Addr, oldAddr use return syserror.ENOENT } - return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, newPath, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1146,7 +1211,7 @@ func Symlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Symlinkat implements linux syscall symlinkat(2). func Symlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { oldAddr := args[0].Pointer() - dirFD := kdefs.FD(args[1].Int()) + dirFD := args[1].Int() newAddr := args[2].Pointer() return 0, nil, symlinkAt(t, dirFD, newAddr, oldAddr) @@ -1188,7 +1253,7 @@ func mayLinkAt(t *kernel.Task, target *fs.Inode) error { // linkAt creates a hard link to the target specified by oldDirFD and oldAddr, // specified by newDirFD and newAddr. If resolve is true, then the symlinks // will be followed when evaluating the target. -func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kdefs.FD, newAddr usermem.Addr, resolve, allowEmpty bool) error { +func linkAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr, resolve, allowEmpty bool) error { oldPath, _, err := copyInPath(t, oldAddr, allowEmpty) if err != nil { return err @@ -1202,7 +1267,7 @@ func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kd } if allowEmpty && oldPath == "" { - target := t.FDMap().GetFile(oldDirFD) + target := t.GetFile(oldDirFD) if target == nil { return syserror.EBADF } @@ -1212,7 +1277,7 @@ func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kd } // Resolve the target directory. - return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string) error { + return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { if !fs.IsDir(newParent.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1227,13 +1292,13 @@ func linkAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kd // Resolve oldDirFD and oldAddr to a dirent. The "resolve" argument // only applies to this name. - return fileOpOn(t, oldDirFD, oldPath, resolve, func(root *fs.Dirent, target *fs.Dirent) error { + return fileOpOn(t, oldDirFD, oldPath, resolve, func(root *fs.Dirent, target *fs.Dirent, _ uint) error { if err := mayLinkAt(t, target.Inode); err != nil { return err } // Next resolve newDirFD and newAddr to the parent dirent and name. - return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string) error { + return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { if !fs.IsDir(newParent.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1264,9 +1329,9 @@ func Link(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Linkat implements linux syscall linkat(2). func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - oldDirFD := kdefs.FD(args[0].Int()) + oldDirFD := args[0].Int() oldAddr := args[1].Pointer() - newDirFD := kdefs.FD(args[2].Int()) + newDirFD := args[2].Int() newAddr := args[3].Pointer() // man linkat(2): @@ -1291,7 +1356,7 @@ func Linkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, linkAt(t, oldDirFD, oldAddr, newDirFD, newAddr, resolve, allowEmpty) } -func readlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, bufAddr usermem.Addr, size uint) (copied uintptr, err error) { +func readlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr, bufAddr usermem.Addr, size uint) (copied uintptr, err error) { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return 0, err @@ -1300,7 +1365,7 @@ func readlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, bufAddr userm return 0, syserror.ENOENT } - err = fileOpOn(t, dirFD, path, false /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + err = fileOpOn(t, dirFD, path, false /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Check for Read permission. if err := d.Inode.CheckPermission(t, fs.PermMask{Read: true}); err != nil { return err @@ -1341,7 +1406,7 @@ func Readlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Readlinkat implements linux syscall readlinkat(2). func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() bufAddr := args[2].Pointer() size := args[3].SizeT() @@ -1350,7 +1415,7 @@ func Readlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return n, nil, err } -func unlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error { +func unlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error { path, dirPath, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err @@ -1359,7 +1424,7 @@ func unlinkAt(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr) error { return syserror.ENOENT } - return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string) error { + return fileOpAt(t, dirFD, path, func(root *fs.Dirent, d *fs.Dirent, name string, _ uint) error { if !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1380,7 +1445,7 @@ func Unlink(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Unlinkat implements linux syscall unlinkat(2). func Unlinkat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() flags := args[2].Uint() if flags&linux.AT_REMOVEDIR != 0 { @@ -1414,7 +1479,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, syserror.EFBIG } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { if fs.IsDir(d.Inode.StableAttr) { return syserror.EISDIR } @@ -1441,10 +1506,10 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Ftruncate implements linux syscall ftruncate(2). func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() length := args[1].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1559,7 +1624,7 @@ func chown(t *kernel.Task, d *fs.Dirent, uid auth.UID, gid auth.GID) error { return nil } -func chownAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error { +func chownAt(t *kernel.Task, fd int32, addr usermem.Addr, resolve, allowEmpty bool, uid auth.UID, gid auth.GID) error { path, _, err := copyInPath(t, addr, allowEmpty) if err != nil { return err @@ -1567,7 +1632,7 @@ func chownAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, resolve, allowEmpty if path == "" { // Annoying. What's wrong with fchown? - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return syserror.EBADF } @@ -1576,7 +1641,7 @@ func chownAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, resolve, allowEmpty return chown(t, file.Dirent, uid, gid) } - return fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return chown(t, d, uid, gid) }) } @@ -1601,11 +1666,11 @@ func Lchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Fchown implements linux syscall fchown(2). func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() uid := auth.UID(args[1].Uint()) gid := auth.GID(args[2].Uint()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1616,7 +1681,7 @@ func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Fchownat implements Linux syscall fchownat(2). func Fchownat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() addr := args[1].Pointer() uid := auth.UID(args[2].Uint()) gid := auth.GID(args[3].Uint()) @@ -1646,13 +1711,13 @@ func chmod(t *kernel.Task, d *fs.Dirent, mode linux.FileMode) error { return nil } -func chmodAt(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, mode linux.FileMode) error { +func chmodAt(t *kernel.Task, fd int32, addr usermem.Addr, mode linux.FileMode) error { path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { return err } - return fileOpOn(t, fd, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return fileOpOn(t, fd, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return chmod(t, d, mode) }) } @@ -1667,10 +1732,10 @@ func Chmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Fchmod implements linux syscall fchmod(2). func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() mode := linux.FileMode(args[1].ModeT()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1681,7 +1746,7 @@ func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Fchmodat implements linux syscall fchmodat(2). func Fchmodat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() mode := linux.FileMode(args[2].ModeT()) @@ -1697,8 +1762,8 @@ func defaultSetToSystemTimeSpec() fs.TimeSpec { } } -func utimes(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, ts fs.TimeSpec, resolve bool) error { - setTimestamp := func(root *fs.Dirent, d *fs.Dirent) error { +func utimes(t *kernel.Task, dirFD int32, addr usermem.Addr, ts fs.TimeSpec, resolve bool) error { + setTimestamp := func(root *fs.Dirent, d *fs.Dirent, _ uint) error { // Does the task own the file? if !d.Inode.CheckOwnership(t) { // Trying to set a specific time? Must be owner. @@ -1730,7 +1795,7 @@ func utimes(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, ts fs.TimeSpec, r // Linux returns EINVAL in this case. See utimes.c. return syserror.EINVAL } - f := t.FDMap().GetFile(dirFD) + f := t.GetFile(dirFD) if f == nil { return syserror.EBADF } @@ -1739,7 +1804,7 @@ func utimes(t *kernel.Task, dirFD kdefs.FD, addr usermem.Addr, ts fs.TimeSpec, r root := t.FSContext().RootDirectory() defer root.DecRef() - return setTimestamp(root, f.Dirent) + return setTimestamp(root, f.Dirent, linux.MaxSymlinkTraversals) } path, _, err := copyInPath(t, addr, false /* allowEmpty */) @@ -1798,7 +1863,7 @@ func timespecIsValid(ts linux.Timespec) bool { // Utimensat implements linux syscall utimensat(2). func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() pathnameAddr := args[1].Pointer() timesAddr := args[2].Pointer() flags := args[3].Int() @@ -1833,7 +1898,7 @@ func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Futimesat implements linux syscall futimesat(2). func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - dirFD := kdefs.FD(args[0].Int()) + dirFD := args[0].Int() pathnameAddr := args[1].Pointer() timesAddr := args[2].Pointer() @@ -1857,7 +1922,7 @@ func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, utimes(t, dirFD, pathnameAddr, ts, true) } -func renameAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD kdefs.FD, newAddr usermem.Addr) error { +func renameAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32, newAddr usermem.Addr) error { newPath, _, err := copyInPath(t, newAddr, false /* allowEmpty */) if err != nil { return err @@ -1867,7 +1932,7 @@ func renameAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD return err } - return fileOpAt(t, oldDirFD, oldPath, func(root *fs.Dirent, oldParent *fs.Dirent, oldName string) error { + return fileOpAt(t, oldDirFD, oldPath, func(root *fs.Dirent, oldParent *fs.Dirent, oldName string, _ uint) error { if !fs.IsDir(oldParent.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1879,7 +1944,7 @@ func renameAt(t *kernel.Task, oldDirFD kdefs.FD, oldAddr usermem.Addr, newDirFD return syserror.EBUSY } - return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string) error { + return fileOpAt(t, newDirFD, newPath, func(root *fs.Dirent, newParent *fs.Dirent, newName string, _ uint) error { if !fs.IsDir(newParent.Inode.StableAttr) { return syserror.ENOTDIR } @@ -1905,21 +1970,21 @@ func Rename(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Renameat implements linux syscall renameat(2). func Renameat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - oldDirFD := kdefs.FD(args[0].Int()) + oldDirFD := args[0].Int() oldPathAddr := args[1].Pointer() - newDirFD := kdefs.FD(args[2].Int()) + newDirFD := args[2].Int() newPathAddr := args[3].Pointer() return 0, nil, renameAt(t, oldDirFD, oldPathAddr, newDirFD, newPathAddr) } // Fallocate implements linux system call fallocate(2). func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() mode := args[1].Int64() offset := args[2].Int64() length := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -1968,10 +2033,10 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Flock implements linux syscall flock(2). func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() operation := args[1].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { // flock(2): EBADF fd is not an open file descriptor. return 0, nil, syserror.EBADF @@ -2067,7 +2132,7 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S name = memfdPrefix + name inode := tmpfs.NewMemfdInode(t, allowSeals) - dirent := fs.NewDirent(inode, name) + dirent := fs.NewDirent(t, inode, name) // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with // FMODE_READ | FMODE_WRITE. file, err := inode.GetFile(t, dirent, fs.FileFlags{Read: true, Write: true}) @@ -2078,8 +2143,9 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S defer dirent.DecRef() defer file.DecRef() - fdFlags := kernel.FDFlags{CloseOnExec: cloExec} - newFD, err := t.FDMap().NewFDFrom(0, file, fdFlags, t.ThreadGroup().Limits()) + newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{ + CloseOnExec: cloExec, + }) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go index 7cef4b50c..b9bd25464 100644 --- a/pkg/sentry/syscalls/linux/sys_futex.go +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -17,12 +17,12 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // futexWaitRestartBlock encapsulates the state required to restart futex(2) diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go index 1b597d5bc..7a2beda23 100644 --- a/pkg/sentry/syscalls/linux/sys_getdents.go +++ b/pkg/sentry/syscalls/linux/sys_getdents.go @@ -19,18 +19,17 @@ import ( "io" "syscall" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Getdents implements linux syscall getdents(2) for 64bit systems. func Getdents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := int(args[2].Uint()) @@ -46,7 +45,7 @@ func Getdents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Getdents64 implements linux syscall getdents64(2). func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := int(args[2].Uint()) @@ -62,8 +61,8 @@ func Getdents64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // getdents implements the core of getdents(2)/getdents64(2). // f is the syscall implementation dirent serialization function. -func getdents(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) { - dir := t.FDMap().GetFile(fd) +func getdents(t *kernel.Task, fd int32, addr usermem.Addr, size int, f func(*dirent, io.Writer) (int, error)) (uintptr, error) { + dir := t.GetFile(fd) if dir == nil { return 0, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_identity.go b/pkg/sentry/syscalls/linux/sys_identity.go index 27e765a2d..715ac45e6 100644 --- a/pkg/sentry/syscalls/linux/sys_identity.go +++ b/pkg/sentry/syscalls/linux/sys_identity.go @@ -15,10 +15,10 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_inotify.go b/pkg/sentry/syscalls/linux/sys_inotify.go index 20269a769..a4f36e01f 100644 --- a/pkg/sentry/syscalls/linux/sys_inotify.go +++ b/pkg/sentry/syscalls/linux/sys_inotify.go @@ -17,12 +17,11 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/anon" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) const allFlags = int(linux.IN_NONBLOCK | linux.IN_CLOEXEC) @@ -35,7 +34,7 @@ func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. return 0, nil, syscall.EINVAL } - dirent := fs.NewDirent(anon.NewInode(t), "inotify") + dirent := fs.NewDirent(t, anon.NewInode(t), "inotify") fileFlags := fs.FileFlags{ Read: true, Write: true, @@ -44,9 +43,9 @@ func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. n := fs.NewFile(t, dirent, fileFlags, fs.NewInotify(t)) defer n.DecRef() - fd, err := t.FDMap().NewFDFrom(0, n, kernel.FDFlags{ + fd, err := t.NewFDFrom(0, n, kernel.FDFlags{ CloseOnExec: flags&linux.IN_CLOEXEC != 0, - }, t.ThreadGroup().Limits()) + }) if err != nil { return 0, nil, err @@ -63,8 +62,8 @@ func InotifyInit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // fdToInotify resolves an fd to an inotify object. If successful, the file will // have an extra ref and the caller is responsible for releasing the ref. -func fdToInotify(t *kernel.Task, fd kdefs.FD) (*fs.Inotify, *fs.File, error) { - file := t.FDMap().GetFile(fd) +func fdToInotify(t *kernel.Task, fd int32) (*fs.Inotify, *fs.File, error) { + file := t.GetFile(fd) if file == nil { // Invalid fd. return nil, nil, syscall.EBADF @@ -82,7 +81,7 @@ func fdToInotify(t *kernel.Task, fd kdefs.FD) (*fs.Inotify, *fs.File, error) { // InotifyAddWatch implements the inotify_add_watch() syscall. func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() mask := args[2].Uint() @@ -107,14 +106,14 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern return 0, nil, err } - err = fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, dirent *fs.Dirent) error { + err = fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, dirent *fs.Dirent, _ uint) error { // "IN_ONLYDIR: Only watch pathname if it is a directory." -- inotify(7) if onlyDir := mask&linux.IN_ONLYDIR != 0; onlyDir && !fs.IsDir(dirent.Inode.StableAttr) { return syscall.ENOTDIR } // Copy out to the return frame. - fd = kdefs.FD(ino.AddWatch(dirent, mask)) + fd = ino.AddWatch(dirent, mask) return nil }) @@ -123,7 +122,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern // InotifyRmWatch implements the inotify_rm_watch() syscall. func InotifyRmWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() wd := args[1].Int() ino, file, err := fdToInotify(t, fd) diff --git a/pkg/sentry/syscalls/linux/sys_lseek.go b/pkg/sentry/syscalls/linux/sys_lseek.go index 8aadc6d8c..297e920c4 100644 --- a/pkg/sentry/syscalls/linux/sys_lseek.go +++ b/pkg/sentry/syscalls/linux/sys_lseek.go @@ -15,20 +15,19 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // Lseek implements linux syscall lseek(2). func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() offset := args[1].Int64() whence := args[2].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_mempolicy.go b/pkg/sentry/syscalls/linux/sys_mempolicy.go index 652b2c206..f5a519d8a 100644 --- a/pkg/sentry/syscalls/linux/sys_mempolicy.go +++ b/pkg/sentry/syscalls/linux/sys_mempolicy.go @@ -17,11 +17,11 @@ package linux import ( "fmt" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // We unconditionally report a single NUMA node. This also means that our diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index 9926f0ac5..58a05b5bb 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -17,14 +17,13 @@ package linux import ( "bytes" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Brk implements linux syscall brk(2). @@ -40,7 +39,7 @@ func Brk(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { prot := args[2].Int() flags := args[3].Int() - fd := kdefs.FD(args[4].Int()) + fd := args[4].Int() fixed := flags&linux.MAP_FIXED != 0 private := flags&linux.MAP_PRIVATE != 0 shared := flags&linux.MAP_SHARED != 0 @@ -80,7 +79,7 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if !anon { // Convert the passed FD to a file reference. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -180,6 +179,10 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca switch adv { case linux.MADV_DONTNEED: return 0, nil, t.MemoryManager().Decommit(addr, length) + case linux.MADV_DOFORK: + return 0, nil, t.MemoryManager().SetDontFork(addr, length, false) + case linux.MADV_DONTFORK: + return 0, nil, t.MemoryManager().SetDontFork(addr, length, true) case linux.MADV_HUGEPAGE, linux.MADV_NOHUGEPAGE: fallthrough case linux.MADV_MERGEABLE, linux.MADV_UNMERGEABLE: @@ -191,7 +194,7 @@ func Madvise(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca case linux.MADV_NORMAL, linux.MADV_RANDOM, linux.MADV_SEQUENTIAL, linux.MADV_WILLNEED: // Do nothing, we totally ignore the suggestions above. return 0, nil, nil - case linux.MADV_REMOVE, linux.MADV_DOFORK, linux.MADV_DONTFORK: + case linux.MADV_REMOVE: // These "suggestions" have application-visible side effects, so we // have to indicate that we don't support them. return 0, nil, syserror.ENOSYS diff --git a/pkg/sentry/syscalls/linux/sys_mount.go b/pkg/sentry/syscalls/linux/sys_mount.go index cf613bad0..9080a10c3 100644 --- a/pkg/sentry/syscalls/linux/sys_mount.go +++ b/pkg/sentry/syscalls/linux/sys_mount.go @@ -15,12 +15,12 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Mount implements Linux syscall mount(2). @@ -109,7 +109,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, syserror.EINVAL } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, targetPath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, targetPath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return t.MountNamespace().Mount(t, d, rootInode) }) } @@ -140,7 +140,7 @@ func Umount2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca resolve := flags&linux.UMOUNT_NOFOLLOW != linux.UMOUNT_NOFOLLOW detachOnly := flags&linux.MNT_DETACH == linux.MNT_DETACH - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return t.MountNamespace().Unmount(t, d, detachOnly) }) } diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go index 036845c13..576022dd5 100644 --- a/pkg/sentry/syscalls/linux/sys_pipe.go +++ b/pkg/sentry/syscalls/linux/sys_pipe.go @@ -17,12 +17,12 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // pipe2 implements the actual system call with flags. @@ -38,24 +38,17 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) { w.SetFlags(linuxToFlags(flags).Settable()) defer w.DecRef() - rfd, err := t.FDMap().NewFDFrom(0, r, kernel.FDFlags{ - CloseOnExec: flags&linux.O_CLOEXEC != 0}, - t.ThreadGroup().Limits()) + fds, err := t.NewFDs(0, []*fs.File{r, w}, kernel.FDFlags{ + CloseOnExec: flags&linux.O_CLOEXEC != 0, + }) if err != nil { return 0, err } - wfd, err := t.FDMap().NewFDFrom(0, w, kernel.FDFlags{ - CloseOnExec: flags&linux.O_CLOEXEC != 0}, - t.ThreadGroup().Limits()) - if err != nil { - t.FDMap().Remove(rfd) - return 0, err - } - - if _, err := t.CopyOut(addr, []kdefs.FD{rfd, wfd}); err != nil { - t.FDMap().Remove(rfd) - t.FDMap().Remove(wfd) + if _, err := t.CopyOut(addr, fds); err != nil { + // The files are not closed in this case, the exact semantics + // of this error case are not well defined, but they could have + // already been observed by user space. return 0, syscall.EFAULT } return 0, nil diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index e32099dd4..7a13beac2 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -17,16 +17,15 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // fileCap is the maximum allowable files for poll & select. @@ -64,7 +63,7 @@ func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan return } - file := t.FDMap().GetFile(kdefs.FD(pfd.FD)) + file := t.GetFile(pfd.FD) if file == nil { pfd.REvents = linux.POLLNVAL return @@ -265,7 +264,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add // immediately to ensure we don't leak. Note, another thread // might be about to close fd. This is racy, but that's // OK. Linux is racy in the same way. - file := t.FDMap().GetFile(kdefs.FD(fd)) + file := t.GetFile(fd) if file == nil { return 0, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index 1b7e5616b..5329023e6 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -18,13 +18,12 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/mm" ) // Prctl implements linux syscall prctl(2). @@ -122,9 +121,9 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall switch args[1].Int() { case linux.PR_SET_MM_EXE_FILE: - fd := kdefs.FD(args[2].Int()) + fd := args[2].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_random.go b/pkg/sentry/syscalls/linux/sys_random.go index fc3959a7e..bc4c588bf 100644 --- a/pkg/sentry/syscalls/linux/sys_random.go +++ b/pkg/sentry/syscalls/linux/sys_random.go @@ -18,12 +18,12 @@ import ( "io" "math" - "gvisor.googlesource.com/gvisor/pkg/rand" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go index 48b0fd49d..b2474e60d 100644 --- a/pkg/sentry/syscalls/linux/sys_read.go +++ b/pkg/sentry/syscalls/linux/sys_read.go @@ -17,16 +17,15 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -39,11 +38,11 @@ const ( // they can do large reads all at once. Bug for bug. Same for other read // calls below. func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := args[2].SizeT() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -75,12 +74,12 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Pread64 implements linux syscall pread64(2). func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := args[2].SizeT() offset := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -122,11 +121,11 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Readv implements linux syscall readv(2). func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -152,12 +151,12 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Preadv implements linux syscall preadv(2). func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) offset := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -201,13 +200,13 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // splits the offset argument into a high/low value for compatibility with // 32-bit architectures. The flags argument is the 5th argument. - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) offset := args[3].Int64() flags := int(args[5].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go index 8b0379779..51e3f836b 100644 --- a/pkg/sentry/syscalls/linux/sys_rlimit.go +++ b/pkg/sentry/syscalls/linux/sys_rlimit.go @@ -15,12 +15,12 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // rlimit describes an implementation of 'struct rlimit', which may vary from diff --git a/pkg/sentry/syscalls/linux/sys_rusage.go b/pkg/sentry/syscalls/linux/sys_rusage.go index 003d718da..1674c7445 100644 --- a/pkg/sentry/syscalls/linux/sys_rusage.go +++ b/pkg/sentry/syscalls/linux/sys_rusage.go @@ -15,12 +15,12 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/syserror" ) func getrusage(t *kernel.Task, which int32) linux.Rusage { diff --git a/pkg/sentry/syscalls/linux/sys_sched.go b/pkg/sentry/syscalls/linux/sys_sched.go index 8aea03abe..434bbb322 100644 --- a/pkg/sentry/syscalls/linux/sys_sched.go +++ b/pkg/sentry/syscalls/linux/sys_sched.go @@ -17,9 +17,9 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_seccomp.go b/pkg/sentry/syscalls/linux/sys_seccomp.go index b4262162a..4885b5e40 100644 --- a/pkg/sentry/syscalls/linux/sys_seccomp.go +++ b/pkg/sentry/syscalls/linux/sys_seccomp.go @@ -17,11 +17,11 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/bpf" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) // userSockFprog is equivalent to Linux's struct sock_fprog on amd64. diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go index 5bd61ab87..cde3b54e7 100644 --- a/pkg/sentry/syscalls/linux/sys_sem.go +++ b/pkg/sentry/syscalls/linux/sys_sem.go @@ -17,13 +17,13 @@ package linux import ( "math" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const opsMax = 500 // SEMOPM diff --git a/pkg/sentry/syscalls/linux/sys_shm.go b/pkg/sentry/syscalls/linux/sys_shm.go index d0eceac7c..d57ffb3a1 100644 --- a/pkg/sentry/syscalls/linux/sys_shm.go +++ b/pkg/sentry/syscalls/linux/sys_shm.go @@ -15,11 +15,11 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/shm" + "gvisor.dev/gvisor/pkg/syserror" ) // Shmget implements shmget(2). diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go index 7fbeb4fcd..0104a94c0 100644 --- a/pkg/sentry/syscalls/linux/sys_signal.go +++ b/pkg/sentry/syscalls/linux/sys_signal.go @@ -18,10 +18,10 @@ import ( "math" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // "For a process to have permission to send a signal it must diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index 31295a6a9..d1165a57a 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -18,18 +18,18 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/control" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/control" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" ) // minListenBacklog is the minimum reasonable backlog for listening sockets. @@ -61,6 +61,8 @@ const controlLenOffset = 40 // to the Flags field. const flagsOffset = 48 +const sizeOfInt32 = 4 + // messageHeader64Len is the length of a MessageHeader64 struct. var messageHeader64Len = uint64(binary.Size(MessageHeader64{})) @@ -197,9 +199,9 @@ func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal }) defer s.DecRef() - fd, err := t.FDMap().NewFDFrom(0, s, kernel.FDFlags{ + fd, err := t.NewFDFrom(0, s, kernel.FDFlags{ CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, - }, t.ThreadGroup().Limits()) + }) if err != nil { return 0, nil, err } @@ -222,9 +224,6 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy fileFlags := fs.SettableFileFlags{ NonBlocking: stype&linux.SOCK_NONBLOCK != 0, } - fdFlags := kernel.FDFlags{ - CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, - } // Create the socket pair. s1, s2, e := socket.Pair(t, domain, linux.SockType(stype&0xf), protocol) @@ -237,20 +236,16 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy defer s2.DecRef() // Create the FDs for the sockets. - fd1, err := t.FDMap().NewFDFrom(0, s1, fdFlags, t.ThreadGroup().Limits()) - if err != nil { - return 0, nil, err - } - fd2, err := t.FDMap().NewFDFrom(0, s2, fdFlags, t.ThreadGroup().Limits()) + fds, err := t.NewFDs(0, []*fs.File{s1, s2}, kernel.FDFlags{ + CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, + }) if err != nil { - t.FDMap().Remove(fd1) return 0, nil, err } // Copy the file descriptors out. - if _, err := t.CopyOut(socks, []int32{int32(fd1), int32(fd2)}); err != nil { - t.FDMap().Remove(fd1) - t.FDMap().Remove(fd2) + if _, err := t.CopyOut(socks, fds); err != nil { + // Note that we don't close files here; see pipe(2) also. return 0, nil, err } @@ -259,12 +254,12 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // Connect implements the linux syscall connect(2). func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Uint() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -288,14 +283,14 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // accept is the implementation of the accept syscall. It is called by accept // and accept4 syscall handlers. -func accept(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) { +func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, flags int) (uintptr, error) { // Check that no unsupported flags are passed in. if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { return 0, syscall.EINVAL } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, syscall.EBADF } @@ -328,7 +323,7 @@ func accept(t *kernel.Task, fd kdefs.FD, addr usermem.Addr, addrLen usermem.Addr // Accept4 implements the linux syscall accept4(2). func Accept4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Pointer() flags := int(args[3].Int()) @@ -339,7 +334,7 @@ func Accept4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // Accept implements the linux syscall accept(2). func Accept(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Pointer() @@ -349,12 +344,12 @@ func Accept(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Bind implements the linux syscall bind(2). func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Uint() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -377,11 +372,11 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Listen implements the linux syscall listen(2). func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() backlog := args[1].Int() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -406,11 +401,11 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Shutdown implements the linux syscall shutdown(2). func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() how := args[1].Int() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -434,14 +429,14 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // GetSockOpt implements the linux syscall getsockopt(2). func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() level := args[1].Int() name := args[2].Int() optValAddr := args[3].Pointer() optLenAddr := args[4].Pointer() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -466,7 +461,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy } // Call syscall implementation then copy both value and value len out. - v, e := s.GetSockOpt(t, int(level), int(name), int(optLen)) + v, e := getSockOpt(t, s, int(level), int(name), int(optLen)) if e != nil { return 0, nil, e.ToError() } @@ -487,18 +482,45 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return 0, nil, nil } +// getSockOpt tries to handle common socket options, or dispatches to a specific +// socket implementation. +func getSockOpt(t *kernel.Task, s socket.Socket, level, name, len int) (interface{}, *syserr.Error) { + if level == linux.SOL_SOCKET { + switch name { + case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL: + if len < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + } + + switch name { + case linux.SO_TYPE: + _, skType, _ := s.Type() + return int32(skType), nil + case linux.SO_DOMAIN: + family, _, _ := s.Type() + return int32(family), nil + case linux.SO_PROTOCOL: + _, _, protocol := s.Type() + return int32(protocol), nil + } + } + + return s.GetSockOpt(t, level, name, len) +} + // SetSockOpt implements the linux syscall setsockopt(2). // // Note that unlike Linux, enabling SO_PASSCRED does not autobind the socket. func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() level := args[1].Int() name := args[2].Int() optValAddr := args[3].Pointer() optLen := args[4].Int() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -531,12 +553,12 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy // GetSockName implements the linux syscall getsockname(2). func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Pointer() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -559,12 +581,12 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // GetPeerName implements the linux syscall getpeername(2). func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() addrlen := args[2].Pointer() // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -587,7 +609,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S // RecvMsg implements the linux syscall recvmsg(2). func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() msgPtr := args[1].Pointer() flags := args[2].Int() @@ -597,7 +619,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -633,7 +655,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // RecvMMsg implements the linux syscall recvmmsg(2). func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() msgPtr := args[1].Pointer() vlen := args[2].Uint() flags := args[3].Int() @@ -650,7 +672,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -812,7 +834,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i // recvFrom is the implementation of the recvfrom syscall. It is called by // recvfrom and recv syscall handlers. -func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) { +func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLenPtr usermem.Addr) (uintptr, error) { if int(bufLen) < 0 { return 0, syscall.EINVAL } @@ -823,7 +845,7 @@ func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, f } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, syscall.EBADF } @@ -873,7 +895,7 @@ func recvFrom(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, f // RecvFrom implements the linux syscall recvfrom(2). func RecvFrom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() bufPtr := args[1].Pointer() bufLen := args[2].Uint64() flags := args[3].Int() @@ -886,7 +908,7 @@ func RecvFrom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // SendMsg implements the linux syscall sendmsg(2). func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() msgPtr := args[1].Pointer() flags := args[2].Int() @@ -896,7 +918,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -923,7 +945,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // SendMMsg implements the linux syscall sendmmsg(2). func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() msgPtr := args[1].Pointer() vlen := args[2].Uint() flags := args[3].Int() @@ -934,7 +956,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syscall.EBADF } @@ -1049,14 +1071,14 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme // sendTo is the implementation of the sendto syscall. It is called by sendto // and send syscall handlers. -func sendTo(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) { +func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags int32, namePtr usermem.Addr, nameLen uint32) (uintptr, error) { bl := int(bufLen) if bl < 0 { return 0, syscall.EINVAL } // Get socket from the file descriptor. - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, syscall.EBADF } @@ -1105,7 +1127,7 @@ func sendTo(t *kernel.Task, fd kdefs.FD, bufPtr usermem.Addr, bufLen uint64, fla // SendTo implements the linux syscall sendto(2). func SendTo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() bufPtr := args[1].Pointer() bufLen := args[2].Uint64() flags := args[3].Int() diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index 37303606f..a7c98efcb 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -15,13 +15,12 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) // doSplice implements a blocking splice operation. @@ -48,12 +47,12 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB if ch == nil { ch = make(chan struct{}, 1) } - if !inW && inFile.Readiness(EventMaskRead) == 0 && !inFile.Flags().NonBlocking { + if !inW && !inFile.Flags().NonBlocking { w, _ := waiter.NewChannelEntry(ch) inFile.EventRegister(&w, EventMaskRead) defer inFile.EventUnregister(&w) inW = true // Registered. - } else if !outW && outFile.Readiness(EventMaskWrite) == 0 && !outFile.Flags().NonBlocking { + } else if !outW && !outFile.Flags().NonBlocking { w, _ := waiter.NewChannelEntry(ch) outFile.EventRegister(&w, EventMaskWrite) defer outFile.EventUnregister(&w) @@ -65,6 +64,11 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB break } + if (!inW || inFile.Readiness(EventMaskRead) != 0) && (!outW || outFile.Readiness(EventMaskWrite) != 0) { + // Something became ready, try again without blocking. + continue + } + // Block until there's data. if err = t.Block(ch); err != nil { break @@ -76,8 +80,8 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB // Sendfile implements linux system call sendfile(2). func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - outFD := kdefs.FD(args[0].Int()) - inFD := kdefs.FD(args[1].Int()) + outFD := args[0].Int() + inFD := args[1].Int() offsetAddr := args[2].Pointer() count := int64(args[3].SizeT()) @@ -87,13 +91,13 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } // Get files. - outFile := t.FDMap().GetFile(outFD) + outFile := t.GetFile(outFD) if outFile == nil { return 0, nil, syserror.EBADF } defer outFile.DecRef() - inFile := t.FDMap().GetFile(inFD) + inFile := t.GetFile(inFD) if inFile == nil { return 0, nil, syserror.EBADF } @@ -158,9 +162,9 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Splice implements splice(2). func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - inFD := kdefs.FD(args[0].Int()) + inFD := args[0].Int() inOffset := args[1].Pointer() - outFD := kdefs.FD(args[2].Int()) + outFD := args[2].Int() outOffset := args[3].Pointer() count := int64(args[4].SizeT()) flags := args[5].Int() @@ -177,13 +181,13 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0 // Get files. - outFile := t.FDMap().GetFile(outFD) + outFile := t.GetFile(outFD) if outFile == nil { return 0, nil, syserror.EBADF } defer outFile.DecRef() - inFile := t.FDMap().GetFile(inFD) + inFile := t.GetFile(inFD) if inFile == nil { return 0, nil, syserror.EBADF } @@ -246,8 +250,8 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Tee imlements tee(2). func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - inFD := kdefs.FD(args[0].Int()) - outFD := kdefs.FD(args[1].Int()) + inFD := args[0].Int() + outFD := args[1].Int() count := int64(args[2].SizeT()) flags := args[3].Int() @@ -260,13 +264,13 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0 // Get files. - outFile := t.FDMap().GetFile(outFD) + outFile := t.GetFile(outFD) if outFile == nil { return 0, nil, syserror.EBADF } defer outFile.DecRef() - inFile := t.FDMap().GetFile(inFD) + inFile := t.GetFile(inFD) if inFile == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go index 10fc201ef..5556bc276 100644 --- a/pkg/sentry/syscalls/linux/sys_stat.go +++ b/pkg/sentry/syscalls/linux/sys_stat.go @@ -15,14 +15,13 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/binary" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // Stat implements linux syscall stat(2). @@ -35,14 +34,14 @@ func Stat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return stat(t, d, dirPath, statAddr) }) } // Fstatat implements linux syscall newfstatat, i.e. fstatat(2). func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() statAddr := args[2].Pointer() flags := args[3].Int() @@ -54,7 +53,7 @@ func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if path == "" { // Annoying. What's wrong with fstat? - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -67,7 +66,7 @@ func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca // then we must resolve the final component. resolve := dirPath || flags&linux.AT_SYMLINK_NOFOLLOW == 0 - return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return stat(t, d, dirPath, statAddr) }) } @@ -86,17 +85,17 @@ func Lstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // want to resolve the final component. resolve := dirPath - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return stat(t, d, dirPath, statAddr) }) } // Fstat implements linux syscall fstat(2). func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() statAddr := args[1].Pointer() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -132,24 +131,6 @@ func fstat(t *kernel.Task, f *fs.File, statAddr usermem.Addr) error { // t.CopyObjectOut has noticeable performance impact due to its many slice // allocations and use of reflection. func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs.UnstableAttr) error { - var mode uint32 - switch sattr.Type { - case fs.RegularFile, fs.SpecialFile: - mode |= linux.ModeRegular - case fs.Symlink: - mode |= linux.ModeSymlink - case fs.Directory, fs.SpecialDirectory: - mode |= linux.ModeDirectory - case fs.Pipe: - mode |= linux.ModeNamedPipe - case fs.CharacterDevice: - mode |= linux.ModeCharacterDevice - case fs.BlockDevice: - mode |= linux.ModeBlockDevice - case fs.Socket: - mode |= linux.ModeSocket - } - b := t.CopyScratchBuffer(int(linux.SizeOfStat))[:0] // Dev (uint64) @@ -159,7 +140,7 @@ func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs // Nlink (uint64) b = binary.AppendUint64(b, usermem.ByteOrder, uattr.Links) // Mode (uint32) - b = binary.AppendUint32(b, usermem.ByteOrder, mode|uint32(uattr.Perms.LinuxMode())) + b = binary.AppendUint32(b, usermem.ByteOrder, sattr.Type.LinuxType()|uint32(uattr.Perms.LinuxMode())) // UID (uint32) b = binary.AppendUint32(b, usermem.ByteOrder, uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow())) // GID (uint32) @@ -194,6 +175,98 @@ func copyOutStat(t *kernel.Task, dst usermem.Addr, sattr fs.StableAttr, uattr fs return err } +// Statx implements linux syscall statx(2). +func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + pathAddr := args[1].Pointer() + flags := args[2].Int() + mask := args[3].Uint() + statxAddr := args[4].Pointer() + + if mask&linux.STATX__RESERVED > 0 { + return 0, nil, syserror.EINVAL + } + if flags&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { + return 0, nil, syserror.EINVAL + } + + path, dirPath, err := copyInPath(t, pathAddr, flags&linux.AT_EMPTY_PATH != 0) + if err != nil { + return 0, nil, err + } + + if path == "" { + file := t.GetFile(fd) + if file == nil { + return 0, nil, syserror.EBADF + } + defer file.DecRef() + uattr, err := file.UnstableAttr(t) + if err != nil { + return 0, nil, err + } + return 0, nil, statx(t, file.Dirent.Inode.StableAttr, uattr, statxAddr) + } + + resolve := dirPath || flags&linux.AT_SYMLINK_NOFOLLOW == 0 + + return 0, nil, fileOpOn(t, fd, path, resolve, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + uattr, err := d.Inode.UnstableAttr(t) + if err != nil { + return err + } + return statx(t, d.Inode.StableAttr, uattr, statxAddr) + }) +} + +func statx(t *kernel.Task, sattr fs.StableAttr, uattr fs.UnstableAttr, statxAddr usermem.Addr) error { + // "[T]he kernel may return fields that weren't requested and may fail to + // return fields that were requested, depending on what the backing + // filesystem supports. + // [...] + // A filesystem may also fill in fields that the caller didn't ask for + // if it has values for them available and the information is available + // at no extra cost. If this happens, the corresponding bits will be + // set in stx_mask." -- statx(2) + // + // We fill in all the values we have (which currently does not include + // btime, see b/135608823), regardless of what the user asked for. The + // STATX_BASIC_STATS mask indicates that all fields are present except + // for btime. + + devMajor, devMinor := linux.DecodeDeviceID(uint32(sattr.DeviceID)) + s := linux.Statx{ + // TODO(b/135608823): Support btime, and then change this to + // STATX_ALL to indicate presence of btime. + Mask: linux.STATX_BASIC_STATS, + + // No attributes, and none supported. + Attributes: 0, + AttributesMask: 0, + + Blksize: uint32(sattr.BlockSize), + Nlink: uint32(uattr.Links), + UID: uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()), + GID: uint32(uattr.Owner.GID.In(t.UserNamespace()).OrOverflow()), + Mode: uint16(sattr.Type.LinuxType()) | uint16(uattr.Perms.LinuxMode()), + Ino: sattr.InodeID, + Size: uint64(uattr.Size), + Blocks: uint64(uattr.Usage) / 512, + Atime: uattr.AccessTime.StatxTimestamp(), + Ctime: uattr.StatusChangeTime.StatxTimestamp(), + Mtime: uattr.ModificationTime.StatxTimestamp(), + RdevMajor: uint32(sattr.DeviceFileMajor), + RdevMinor: sattr.DeviceFileMinor, + DevMajor: uint32(devMajor), + DevMinor: devMinor, + } + _, err := t.CopyOut(statxAddr, &s) + return err +} + // Statfs implements linux syscall statfs(2). func Statfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { addr := args[0].Pointer() @@ -204,17 +277,17 @@ func Statfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { return statfsImpl(t, d, statfsAddr) }) } // Fstatfs implements linux syscall fstatfs(2). func Fstatfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() statfsAddr := args[1].Pointer() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -252,8 +325,6 @@ func statfsImpl(t *kernel.Task, d *fs.Dirent, addr usermem.Addr) error { FragmentSize: d.Inode.StableAttr.BlockSize, // Leave other fields 0 like simple_statfs does. } - if _, err := t.CopyOut(addr, &statfs); err != nil { - return err - } - return nil + _, err = t.CopyOut(addr, &statfs) + return err } diff --git a/pkg/sentry/syscalls/linux/sys_sync.go b/pkg/sentry/syscalls/linux/sys_sync.go index 4352482fb..3e55235bd 100644 --- a/pkg/sentry/syscalls/linux/sys_sync.go +++ b/pkg/sentry/syscalls/linux/sys_sync.go @@ -15,12 +15,11 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // Sync implements linux system call sync(2). @@ -32,9 +31,9 @@ func Sync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC // Syncfs implements linux system call syncfs(2). func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -47,9 +46,9 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Fsync implements linux syscall fsync(2). func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -63,9 +62,9 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // // At the moment, it just calls Fsync, which is a big hammer, but correct. func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -79,6 +78,7 @@ func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { var err error + fd := args[0].Int() offset := args[1].Int64() nbytes := args[2].Int64() uflags := args[3].Uint() @@ -97,8 +97,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel nbytes = fs.FileMaxOffset } - fd := kdefs.FD(args[0].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_sysinfo.go b/pkg/sentry/syscalls/linux/sys_sysinfo.go index ecf88edc1..a65b560c8 100644 --- a/pkg/sentry/syscalls/linux/sys_sysinfo.go +++ b/pkg/sentry/syscalls/linux/sys_sysinfo.go @@ -15,10 +15,10 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usage" ) // Sysinfo implements the sysinfo syscall as described in man 2 sysinfo. diff --git a/pkg/sentry/syscalls/linux/sys_syslog.go b/pkg/sentry/syscalls/linux/sys_syslog.go index 9efc58d34..40c8bb061 100644 --- a/pkg/sentry/syscalls/linux/sys_syslog.go +++ b/pkg/sentry/syscalls/linux/sys_syslog.go @@ -15,9 +15,9 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 26f7e8ead..9e037bd7b 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -17,12 +17,12 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/sched" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go index b4f2609c0..fe8725191 100644 --- a/pkg/sentry/syscalls/linux/sys_time.go +++ b/pkg/sentry/syscalls/linux/sys_time.go @@ -17,12 +17,12 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // The most significant 29 bits hold either a pid or a file descriptor. diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go index 04ea7a4e9..ca5ccb7c3 100644 --- a/pkg/sentry/syscalls/linux/sys_timer.go +++ b/pkg/sentry/syscalls/linux/sys_timer.go @@ -18,10 +18,10 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) const nsecPerSec = int64(time.Second) diff --git a/pkg/sentry/syscalls/linux/sys_timerfd.go b/pkg/sentry/syscalls/linux/sys_timerfd.go index ec0155cbb..1ce5ce4c3 100644 --- a/pkg/sentry/syscalls/linux/sys_timerfd.go +++ b/pkg/sentry/syscalls/linux/sys_timerfd.go @@ -15,14 +15,13 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/timerfd" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/syserror" ) // TimerfdCreate implements Linux syscall timerfd_create(2). @@ -49,9 +48,9 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel NonBlocking: flags&linux.TFD_NONBLOCK != 0, }) - fd, err := t.FDMap().NewFDFrom(0, f, kernel.FDFlags{ + fd, err := t.NewFDFrom(0, f, kernel.FDFlags{ CloseOnExec: flags&linux.TFD_CLOEXEC != 0, - }, t.ThreadGroup().Limits()) + }) if err != nil { return 0, nil, err } @@ -61,7 +60,7 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel // TimerfdSettime implements Linux syscall timerfd_settime(2). func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() flags := args[1].Int() newValAddr := args[2].Pointer() oldValAddr := args[3].Pointer() @@ -70,7 +69,7 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne return 0, nil, syserror.EINVAL } - f := t.FDMap().GetFile(fd) + f := t.GetFile(fd) if f == nil { return 0, nil, syserror.EBADF } @@ -101,10 +100,10 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne // TimerfdGettime implements Linux syscall timerfd_gettime(2). func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() curValAddr := args[1].Pointer() - f := t.FDMap().GetFile(fd) + f := t.GetFile(fd) if f == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/sys_tls.go b/pkg/sentry/syscalls/linux/sys_tls.go index 1e8312e00..e3d1c6201 100644 --- a/pkg/sentry/syscalls/linux/sys_tls.go +++ b/pkg/sentry/syscalls/linux/sys_tls.go @@ -19,9 +19,9 @@ package linux import ( "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" ) // ArchPrctl implements linux syscall arch_prctl(2). diff --git a/pkg/sentry/syscalls/linux/sys_utsname.go b/pkg/sentry/syscalls/linux/sys_utsname.go index fa81fe10e..271ace08e 100644 --- a/pkg/sentry/syscalls/linux/sys_utsname.go +++ b/pkg/sentry/syscalls/linux/sys_utsname.go @@ -17,10 +17,10 @@ package linux import ( - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // Uname implements linux syscall uname. diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go index 1da72d606..5278c96a6 100644 --- a/pkg/sentry/syscalls/linux/sys_write.go +++ b/pkg/sentry/syscalls/linux/sys_write.go @@ -17,16 +17,15 @@ package linux import ( "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" - "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/waiter" ) const ( @@ -39,11 +38,11 @@ const ( // Write implements linux syscall write(2). func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := args[2].SizeT() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -75,12 +74,12 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // Pwrite64 implements linux syscall pwrite64(2). func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() size := args[2].SizeT() offset := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -122,11 +121,11 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // Writev implements linux syscall writev(2). func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -152,12 +151,12 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Pwritev implements linux syscall pwritev(2). func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) offset := args[3].Int64() - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } @@ -202,7 +201,7 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc // splits the offset argument into a high/low value for compatibility with // 32-bit architectures. The flags argument is the 5th argument. - fd := kdefs.FD(args[0].Int()) + fd := args[0].Int() addr := args[1].Pointer() iovcnt := int(args[2].Int()) offset := args[3].Int64() @@ -212,7 +211,7 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, syserror.EACCES } - file := t.FDMap().GetFile(fd) + file := t.GetFile(fd) if file == nil { return 0, nil, syserror.EBADF } diff --git a/pkg/sentry/syscalls/linux/timespec.go b/pkg/sentry/syscalls/linux/timespec.go index fa6fcdc0b..9ba0eba7a 100644 --- a/pkg/sentry/syscalls/linux/timespec.go +++ b/pkg/sentry/syscalls/linux/timespec.go @@ -18,10 +18,10 @@ import ( "syscall" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" ) // copyTimespecIn copies a Timespec from the untrusted app range to the kernel. diff --git a/pkg/sentry/syscalls/syscalls.go b/pkg/sentry/syscalls/syscalls.go index 48c114232..94d52d5d5 100644 --- a/pkg/sentry/syscalls/syscalls.go +++ b/pkg/sentry/syscalls/syscalls.go @@ -28,10 +28,10 @@ import ( "fmt" "syscall" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" ) // Supported returns a syscall that is fully supported. @@ -40,16 +40,7 @@ func Supported(name string, fn kernel.SyscallFn) kernel.Syscall { Name: name, Fn: fn, SupportLevel: kernel.SupportFull, - Note: "Full Support", - } -} - -// Undocumented returns a syscall that is undocumented. -func Undocumented(name string, fn kernel.SyscallFn) kernel.Syscall { - return kernel.Syscall{ - Name: name, - Fn: fn, - SupportLevel: kernel.SupportUndocumented, + Note: "Fully Supported.", } } @@ -75,7 +66,7 @@ func Error(name string, err syscall.Errno, note string, urls []string) kernel.Sy return 0, nil, err }, SupportLevel: kernel.SupportUnimplemented, - Note: fmt.Sprintf("%sReturns %q", note, err.Error()), + Note: fmt.Sprintf("%sReturns %q.", note, err.Error()), URLs: urls, } } @@ -93,7 +84,7 @@ func ErrorWithEvent(name string, err syscall.Errno, note string, urls []string) return 0, nil, err }, SupportLevel: kernel.SupportUnimplemented, - Note: fmt.Sprintf("%sReturns %q", note, err.Error()), + Note: fmt.Sprintf("%sReturns %q.", note, err.Error()), URLs: urls, } } @@ -115,7 +106,7 @@ func CapError(name string, c linux.Capability, note string, urls []string) kerne return 0, nil, syserror.ENOSYS }, SupportLevel: kernel.SupportUnimplemented, - Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise", note, syserror.EPERM, c.String(), syserror.ENOSYS), + Note: fmt.Sprintf("%sReturns %q if the process does not have %s; %q otherwise.", note, syserror.EPERM, c.String(), syserror.ENOSYS), URLs: urls, } } |