From 4fdd69d681bb3abb68a043377a2fb0ec8a031d54 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Mon, 4 Nov 2019 10:56:13 -0800 Subject: Check that a file is a regular file with open(O_TRUNC). It was possible to panic the sentry by opening a cache revalidating folder with O_TRUNC|O_CREAT. PiperOrigin-RevId: 278417533 --- pkg/sentry/fs/inode_operations.go | 2 ++ 1 file changed, 2 insertions(+) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index 5cde9d215..d6c35c2dc 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -221,6 +221,8 @@ type InodeOperations interface { // sys_ftruncate. // // Implementations need not check that length >= 0. + // + // Truncate must only be called on regular files. Truncate(ctx context.Context, inode *Inode, size int64) error // Allocate allows the caller to reserve disk space for the inode. -- cgit v1.2.3 From af58a4e3bb0ba81c103429317eb0c2735450136c Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Fri, 8 Nov 2019 12:18:40 -0800 Subject: Automated rollback of changelist 278417533 PiperOrigin-RevId: 279365629 --- pkg/sentry/fs/inode_operations.go | 2 -- pkg/sentry/syscalls/linux/sys_file.go | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index d6c35c2dc..5cde9d215 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -221,8 +221,6 @@ type InodeOperations interface { // sys_ftruncate. // // Implementations need not check that length >= 0. - // - // Truncate must only be called on regular files. Truncate(ctx context.Context, inode *Inode, size int64) error // Allocate allows the caller to reserve disk space for the inode. diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index c9f57fe27..b9a8e3e21 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -169,7 +169,7 @@ func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uint if dirPath { return syserror.ENOTDIR } - if flags&linux.O_TRUNC != 0 && fs.IsRegular(d.Inode.StableAttr) { + if flags&linux.O_TRUNC != 0 { if err := d.Inode.Truncate(t, d, 0); err != nil { return err } @@ -397,7 +397,7 @@ func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode l } // Should we truncate the file? - if flags&linux.O_TRUNC != 0 && fs.IsRegular(found.Inode.StableAttr) { + if flags&linux.O_TRUNC != 0 { if err := found.Inode.Truncate(t, found, 0); err != nil { return err } @@ -1483,7 +1483,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if fs.IsDir(d.Inode.StableAttr) { return syserror.EISDIR } - if !fs.IsRegular(d.Inode.StableAttr) { + if !fs.IsFile(d.Inode.StableAttr) { return syserror.EINVAL } @@ -1523,7 +1523,7 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // Note that this is different from truncate(2) above, where a // directory returns EISDIR. - if !fs.IsRegular(file.Dirent.Inode.StableAttr) { + if !fs.IsFile(file.Dirent.Inode.StableAttr) { return 0, nil, syserror.EINVAL } -- cgit v1.2.3 From 05871a1cdc73e98df58f56841be23a4eac27225c Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Mon, 11 Nov 2019 08:20:18 +0000 Subject: Enable runsc/boot support on arm64. This patch also include a minor change to replace syscall.Dup2 with syscall.Dup3 which was missed in a previous commit(ref a25a976). Signed-off-by: Haibo Xu Change-Id: I00beb9cc492e44c762ebaa3750201c63c1f7c2f3 --- pkg/seccomp/seccomp_test_victim.go | 2 +- pkg/sentry/fs/gofer/inode.go | 2 +- runsc/boot/BUILD | 2 ++ runsc/boot/filter/BUILD | 2 ++ runsc/boot/filter/config.go | 6 +----- runsc/boot/filter/config_amd64.go | 31 +++++++++++++++++++++++++++++++ runsc/boot/filter/config_arm64.go | 21 +++++++++++++++++++++ runsc/boot/loader.go | 4 ---- runsc/boot/loader_amd64.go | 28 ++++++++++++++++++++++++++++ runsc/boot/loader_arm64.go | 28 ++++++++++++++++++++++++++++ 10 files changed, 115 insertions(+), 11 deletions(-) create mode 100644 runsc/boot/filter/config_amd64.go create mode 100644 runsc/boot/filter/config_arm64.go create mode 100644 runsc/boot/loader_amd64.go create mode 100644 runsc/boot/loader_arm64.go (limited to 'pkg/sentry/fs') diff --git a/pkg/seccomp/seccomp_test_victim.go b/pkg/seccomp/seccomp_test_victim.go index 48413f1fb..da6b9eaaf 100644 --- a/pkg/seccomp/seccomp_test_victim.go +++ b/pkg/seccomp/seccomp_test_victim.go @@ -38,7 +38,7 @@ func main() { syscall.SYS_CLONE: {}, syscall.SYS_CLOSE: {}, syscall.SYS_DUP: {}, - syscall.SYS_DUP2: {}, + syscall.SYS_DUP3: {}, syscall.SYS_EPOLL_CREATE1: {}, syscall.SYS_EPOLL_CTL: {}, syscall.SYS_EPOLL_WAIT: {}, diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 99910388f..54a8ceef8 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -268,7 +268,7 @@ func (i *inodeFileState) recreateReadHandles(ctx context.Context, writer *handle // operations on the old will see the new data. Then, make the new handle take // ownereship of the old FD and mark the old readHandle to not close the FD // when done. - if err := syscall.Dup2(h.Host.FD(), i.readHandles.Host.FD()); err != nil { + if err := syscall.Dup3(h.Host.FD(), i.readHandles.Host.FD(), 0); err != nil { return err } diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 58e86ae7f..847d2f91c 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -15,6 +15,8 @@ go_library( "fs.go", "limits.go", "loader.go", + "loader_amd64.go", + "loader_arm64.go", "network.go", "pprof.go", "strace.go", diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index f5509b6b7..3a9dcfc04 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -6,6 +6,8 @@ go_library( name = "filter", srcs = [ "config.go", + "config_amd64.go", + "config_arm64.go", "extra_filters.go", "extra_filters_msan.go", "extra_filters_race.go", diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 5ad108261..b5bd61a3a 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -26,10 +26,6 @@ import ( // allowedSyscalls is the set of syscalls executed by the Sentry to the host OS. var allowedSyscalls = seccomp.SyscallRules{ - syscall.SYS_ARCH_PRCTL: []seccomp.Rule{ - {seccomp.AllowValue(linux.ARCH_GET_FS)}, - {seccomp.AllowValue(linux.ARCH_SET_FS)}, - }, syscall.SYS_CLOCK_GETTIME: {}, syscall.SYS_CLONE: []seccomp.Rule{ { @@ -44,7 +40,7 @@ var allowedSyscalls = seccomp.SyscallRules{ }, syscall.SYS_CLOSE: {}, syscall.SYS_DUP: {}, - syscall.SYS_DUP2: {}, + syscall.SYS_DUP3: {}, syscall.SYS_EPOLL_CREATE1: {}, syscall.SYS_EPOLL_CTL: {}, syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go new file mode 100644 index 000000000..058d9c264 --- /dev/null +++ b/runsc/boot/filter/config_amd64.go @@ -0,0 +1,31 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package filter + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/seccomp" +) + +func init() { + allowedSyscalls[syscall.SYS_ARCH_PRCTL] = []seccomp.Rule{ + {seccomp.AllowValue(linux.ARCH_GET_FS)}, + {seccomp.AllowValue(linux.ARCH_SET_FS)}, + } +} diff --git a/runsc/boot/filter/config_arm64.go b/runsc/boot/filter/config_arm64.go new file mode 100644 index 000000000..7fa9bbda3 --- /dev/null +++ b/runsc/boot/filter/config_arm64.go @@ -0,0 +1,21 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package filter + +// Reserve for future customization. +func init() { +} diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index f05d5973f..df6052c88 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -43,7 +43,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/sighandling" - slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/watchdog" @@ -147,9 +146,6 @@ type execProcess struct { func init() { // Initialize the random number generator. mrand.Seed(gtime.Now().UnixNano()) - - // Register the global syscall table. - kernel.RegisterSyscallTable(slinux.AMD64) } // Args are the arguments for New(). diff --git a/runsc/boot/loader_amd64.go b/runsc/boot/loader_amd64.go new file mode 100644 index 000000000..d16d20d89 --- /dev/null +++ b/runsc/boot/loader_amd64.go @@ -0,0 +1,28 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +// Package boot loads the kernel and runs a container. +package boot + +import ( + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" +) + +func init() { + // Register the global syscall table. + kernel.RegisterSyscallTable(linux.AMD64) +} diff --git a/runsc/boot/loader_arm64.go b/runsc/boot/loader_arm64.go new file mode 100644 index 000000000..8712e764a --- /dev/null +++ b/runsc/boot/loader_arm64.go @@ -0,0 +1,28 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +// Package boot loads the kernel and runs a container. +package boot + +import ( + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" +) + +func init() { + // Register the global syscall table. + kernel.RegisterSyscallTable(linux.ARM64) +} -- cgit v1.2.3 From c5d9b5b8816e99507661e1d39ec51033fb69e212 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Mon, 11 Nov 2019 09:15:45 +0000 Subject: Enable sentry/fs/host support on arm64. newfstatat() syscall is not supported on arm64, so we resort to use the fstatat() syscall. Signed-off-by: Haibo Xu Change-Id: Iea95550ea53bcf85c01f7b3b95da70ad0952177d --- pkg/sentry/fs/host/BUILD | 2 ++ pkg/sentry/fs/host/util.go | 2 +- pkg/sentry/fs/host/util_amd64_unsafe.go | 41 +++++++++++++++++++++++++++++++++ pkg/sentry/fs/host/util_arm64_unsafe.go | 41 +++++++++++++++++++++++++++++++++ pkg/sentry/fs/host/util_unsafe.go | 19 --------------- 5 files changed, 85 insertions(+), 20 deletions(-) create mode 100644 pkg/sentry/fs/host/util_amd64_unsafe.go create mode 100644 pkg/sentry/fs/host/util_arm64_unsafe.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 1cbed07ae..23daeb528 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -21,6 +21,8 @@ go_library( "socket_unsafe.go", "tty.go", "util.go", + "util_amd64_unsafe.go", + "util_arm64_unsafe.go", "util_unsafe.go", ], importpath = "gvisor.dev/gvisor/pkg/sentry/fs/host", diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go index bad61a9a1..e37e687c6 100644 --- a/pkg/sentry/fs/host/util.go +++ b/pkg/sentry/fs/host/util.go @@ -155,7 +155,7 @@ func unstableAttr(mo *superOperations, s *syscall.Stat_t) fs.UnstableAttr { AccessTime: ktime.FromUnix(s.Atim.Sec, s.Atim.Nsec), ModificationTime: ktime.FromUnix(s.Mtim.Sec, s.Mtim.Nsec), StatusChangeTime: ktime.FromUnix(s.Ctim.Sec, s.Ctim.Nsec), - Links: s.Nlink, + Links: uint64(s.Nlink), } } diff --git a/pkg/sentry/fs/host/util_amd64_unsafe.go b/pkg/sentry/fs/host/util_amd64_unsafe.go new file mode 100644 index 000000000..66da6e9f5 --- /dev/null +++ b/pkg/sentry/fs/host/util_amd64_unsafe.go @@ -0,0 +1,41 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 + +package host + +import ( + "syscall" + "unsafe" +) + +func fstatat(fd int, name string, flags int) (syscall.Stat_t, error) { + var stat syscall.Stat_t + namePtr, err := syscall.BytePtrFromString(name) + if err != nil { + return stat, err + } + _, _, errno := syscall.Syscall6( + syscall.SYS_NEWFSTATAT, + uintptr(fd), + uintptr(unsafe.Pointer(namePtr)), + uintptr(unsafe.Pointer(&stat)), + uintptr(flags), + 0, 0) + if errno != 0 { + return stat, errno + } + return stat, nil +} diff --git a/pkg/sentry/fs/host/util_arm64_unsafe.go b/pkg/sentry/fs/host/util_arm64_unsafe.go new file mode 100644 index 000000000..e8cb94aeb --- /dev/null +++ b/pkg/sentry/fs/host/util_arm64_unsafe.go @@ -0,0 +1,41 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package host + +import ( + "syscall" + "unsafe" +) + +func fstatat(fd int, name string, flags int) (syscall.Stat_t, error) { + var stat syscall.Stat_t + namePtr, err := syscall.BytePtrFromString(name) + if err != nil { + return stat, err + } + _, _, errno := syscall.Syscall6( + syscall.SYS_FSTATAT, + uintptr(fd), + uintptr(unsafe.Pointer(namePtr)), + uintptr(unsafe.Pointer(&stat)), + uintptr(flags), + 0, 0) + if errno != 0 { + return stat, errno + } + return stat, nil +} diff --git a/pkg/sentry/fs/host/util_unsafe.go b/pkg/sentry/fs/host/util_unsafe.go index 2b76f1065..3ab36b088 100644 --- a/pkg/sentry/fs/host/util_unsafe.go +++ b/pkg/sentry/fs/host/util_unsafe.go @@ -116,22 +116,3 @@ func setTimestamps(fd int, ts fs.TimeSpec) error { } return nil } - -func fstatat(fd int, name string, flags int) (syscall.Stat_t, error) { - var stat syscall.Stat_t - namePtr, err := syscall.BytePtrFromString(name) - if err != nil { - return stat, err - } - _, _, errno := syscall.Syscall6( - syscall.SYS_NEWFSTATAT, - uintptr(fd), - uintptr(unsafe.Pointer(namePtr)), - uintptr(unsafe.Pointer(&stat)), - uintptr(flags), - 0, 0) - if errno != 0 { - return stat, errno - } - return stat, nil -} -- cgit v1.2.3 From c2d3dc0c13e1adfc182c33d57e410c46fe12415f Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Tue, 12 Nov 2019 22:49:22 -0800 Subject: Use overlay MountSource when binding socket in overlay. PiperOrigin-RevId: 280131840 --- pkg/sentry/fs/inode_overlay.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 5a388dad1..a09147080 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -462,7 +462,9 @@ func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name stri inode.DecRef() return nil, err } - return NewDirent(ctx, newOverlayInode(ctx, entry, inode.MountSource), name), nil + // Use the parent's MountSource, since that corresponds to the overlay, + // and not the upper filesystem. + return NewDirent(ctx, newOverlayInode(ctx, entry, parent.Inode.MountSource), name), nil } func overlayBoundEndpoint(o *overlayEntry, path string) transport.BoundEndpoint { -- cgit v1.2.3 From 339536de5eefe782813aabae4aeeb312b3c4dde7 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Thu, 14 Nov 2019 15:55:07 -0800 Subject: Check that a file is a regular file with open(O_TRUNC). It was possible to panic the sentry by opening a cache revalidating folder with O_TRUNC|O_CREAT. Avoids breaking php tests. PiperOrigin-RevId: 280533213 --- pkg/sentry/fs/inode.go | 4 ++++ pkg/sentry/fs/tty/master.go | 1 + pkg/sentry/fs/tty/slave.go | 1 + pkg/sentry/syscalls/linux/sys_file.go | 9 +++++---- test/syscalls/linux/open.cc | 22 ++++++++++++++++++++++ test/syscalls/linux/open_create.cc | 24 ++++++++++++++++++++++++ test/syscalls/linux/pty.cc | 20 +++++++++++++++++++- test/util/pty_util.cc | 10 +++++++++- test/util/pty_util.h | 3 +++ 9 files changed, 88 insertions(+), 6 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index f4ddfa406..2d43dff1d 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -344,6 +344,10 @@ func (i *Inode) SetTimestamps(ctx context.Context, d *Dirent, ts TimeSpec) error // Truncate calls i.InodeOperations.Truncate with i as the Inode. func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error { + if IsDir(i.StableAttr) { + return syserror.EISDIR + } + if i.overlay != nil { return overlayTruncate(ctx, i.overlay, d, size) } diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index 19b7557d5..bc56be696 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -32,6 +32,7 @@ import ( // +stateify savable type masterInodeOperations struct { fsutil.SimpleFileInode + fsutil.InodeNoopTruncate // d is the containing dir. d *dirInodeOperations diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index 944c4ada1..4cbea0367 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -31,6 +31,7 @@ import ( // +stateify savable type slaveInodeOperations struct { fsutil.SimpleFileInode + fsutil.InodeNoopTruncate // d is the containing dir. d *dirInodeOperations diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index b9a8e3e21..167c2b60b 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -169,10 +169,11 @@ func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uint if dirPath { return syserror.ENOTDIR } - if flags&linux.O_TRUNC != 0 { - if err := d.Inode.Truncate(t, d, 0); err != nil { - return err - } + } + + if flags&linux.O_TRUNC != 0 { + if err := d.Inode.Truncate(t, d, 0); err != nil { + return err } } diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index 2b1df52ce..267ae19f6 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -73,6 +73,28 @@ class OpenTest : public FileTest { const std::string test_data_ = "hello world\n"; }; +TEST_F(OpenTest, OTrunc) { + auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_TRUNC, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(OpenTest, OTruncAndReadOnlyDir) { + auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_TRUNC | O_RDONLY, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST_F(OpenTest, OTruncAndReadOnlyFile) { + auto dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncfile"); + const FileDescriptor existing = + ASSERT_NO_ERRNO_AND_VALUE(Open(dirpath.c_str(), O_RDWR | O_CREAT, 0666)); + const FileDescriptor otrunc = ASSERT_NO_ERRNO_AND_VALUE( + Open(dirpath.c_str(), O_TRUNC | O_RDONLY, 0666)); +} + TEST_F(OpenTest, ReadOnly) { char buf; const FileDescriptor ro_file = diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc index e5a85ef9d..431733dbe 100644 --- a/test/syscalls/linux/open_create.cc +++ b/test/syscalls/linux/open_create.cc @@ -88,6 +88,30 @@ TEST(CreateTest, CreateExclusively) { SyscallFailsWithErrno(EEXIST)); } +TEST(CreateTeast, CreatWithOTrunc) { + std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(CreateTeast, CreatDirWithOTruncAndReadOnly) { + std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncd"); + ASSERT_THAT(mkdir(dirpath.c_str(), 0777), SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0666), + SyscallFailsWithErrno(EISDIR)); +} + +TEST(CreateTeast, CreatFileWithOTruncAndReadOnly) { + std::string dirpath = JoinPath(GetAbsoluteTestTmpdir(), "truncfile"); + int dirfd; + ASSERT_THAT(dirfd = open(dirpath.c_str(), O_RDWR | O_CREAT, 0666), + SyscallSucceeds()); + ASSERT_THAT(open(dirpath.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0666), + SyscallSucceeds()); + ASSERT_THAT(close(dirfd), SyscallSucceeds()); +} + TEST(CreateTest, CreateFailsOnUnpermittedDir) { // Make sure we don't have CAP_DAC_OVERRIDE, since that allows the user to // always override directory permissions. diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc index 99a0df235..dafe64d20 100644 --- a/test/syscalls/linux/pty.cc +++ b/test/syscalls/linux/pty.cc @@ -70,6 +70,8 @@ constexpr absl::Duration kTimeout = absl::Seconds(20); // The maximum line size in bytes returned per read from a pty file. constexpr int kMaxLineSize = 4096; +constexpr char kMasterPath[] = "/dev/ptmx"; + // glibc defines its own, different, version of struct termios. We care about // what the kernel does, not glibc. #define KERNEL_NCCS 19 @@ -376,9 +378,25 @@ PosixErrorOr PollAndReadFd(int fd, void* buf, size_t count, return PosixError(ETIMEDOUT, "Poll timed out"); } +TEST(PtyTrunc, Truncate) { + // Opening PTYs with O_TRUNC shouldn't cause an error, but calls to + // (f)truncate should. + FileDescriptor master = + ASSERT_NO_ERRNO_AND_VALUE(Open(kMasterPath, O_RDWR | O_TRUNC)); + int n = ASSERT_NO_ERRNO_AND_VALUE(SlaveID(master)); + std::string spath = absl::StrCat("/dev/pts/", n); + FileDescriptor slave = + ASSERT_NO_ERRNO_AND_VALUE(Open(spath, O_RDWR | O_NONBLOCK | O_TRUNC)); + + EXPECT_THAT(truncate(kMasterPath, 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(truncate(spath.c_str(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(ftruncate(master.get(), 0), SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(ftruncate(slave.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + TEST(BasicPtyTest, StatUnopenedMaster) { struct stat s; - ASSERT_THAT(stat("/dev/ptmx", &s), SyscallSucceeds()); + ASSERT_THAT(stat(kMasterPath, &s), SyscallSucceeds()); EXPECT_EQ(s.st_rdev, makedev(TTYAUX_MAJOR, kPtmxMinor)); EXPECT_EQ(s.st_size, 0); diff --git a/test/util/pty_util.cc b/test/util/pty_util.cc index c0fd9a095..c01f916aa 100644 --- a/test/util/pty_util.cc +++ b/test/util/pty_util.cc @@ -24,6 +24,14 @@ namespace gvisor { namespace testing { PosixErrorOr OpenSlave(const FileDescriptor& master) { + PosixErrorOr n = SlaveID(master); + if (!n.ok()) { + return PosixErrorOr(n.error()); + } + return Open(absl::StrCat("/dev/pts/", n.ValueOrDie()), O_RDWR | O_NONBLOCK); +} + +PosixErrorOr SlaveID(const FileDescriptor& master) { // Get pty index. int n; int ret = ioctl(master.get(), TIOCGPTN, &n); @@ -38,7 +46,7 @@ PosixErrorOr OpenSlave(const FileDescriptor& master) { return PosixError(errno, "ioctl(TIOSPTLCK) failed"); } - return Open(absl::StrCat("/dev/pts/", n), O_RDWR | O_NONBLOCK); + return n; } } // namespace testing diff --git a/test/util/pty_util.h b/test/util/pty_util.h index 367b14f15..0722da379 100644 --- a/test/util/pty_util.h +++ b/test/util/pty_util.h @@ -24,6 +24,9 @@ namespace testing { // Opens the slave end of the passed master as R/W and nonblocking. PosixErrorOr OpenSlave(const FileDescriptor& master); +// Get the number of the slave end of the master. +PosixErrorOr SlaveID(const FileDescriptor& master); + } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 012102eefd2b145ddee774cba28e4fa889fadd49 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 20 Nov 2019 14:54:03 -0800 Subject: Pass OpenTruncate to gofer in Open call when opening file with O_TRUNC. Note that the Sentry still calls Truncate() on the file before calling Open. A new p9 version check was added to ensure that the p9 server can handle the the OpenTruncate flag. If not, then the flag is stripped before sending. PiperOrigin-RevId: 281609112 --- pkg/p9/handlers.go | 18 ++++----- pkg/p9/p9.go | 4 -- pkg/p9/p9test/client_test.go | 78 +++++++++++++++++++++++--------------- pkg/p9/version.go | 8 +++- pkg/sentry/fs/flags.go | 7 ++++ pkg/sentry/fs/gofer/file_state.go | 8 +++- pkg/sentry/fs/gofer/handles.go | 5 ++- pkg/sentry/fs/gofer/inode.go | 18 ++++++--- pkg/sentry/syscalls/linux/flags.go | 1 + 9 files changed, 95 insertions(+), 52 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go index ba9a55d6d..51869c7d6 100644 --- a/pkg/p9/handlers.go +++ b/pkg/p9/handlers.go @@ -272,15 +272,15 @@ func (t *Tlopen) handle(cs *connState) message { return newErr(syscall.EINVAL) } - // Are flags valid? - flags := t.Flags &^ OpenFlagsIgnoreMask - if flags&^OpenFlagsModeMask != 0 { - return newErr(syscall.EINVAL) - } - - // Is this an attempt to open a directory as writable? Don't accept. - if ref.mode.IsDir() && flags != ReadOnly { - return newErr(syscall.EINVAL) + if ref.mode.IsDir() { + // Directory must be opened ReadOnly. + if t.Flags&OpenFlagsModeMask != ReadOnly { + return newErr(syscall.EISDIR) + } + // Directory not truncatable. + if t.Flags&OpenTruncate != 0 { + return newErr(syscall.EISDIR) + } } var ( diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go index 415200d60..d3090535a 100644 --- a/pkg/p9/p9.go +++ b/pkg/p9/p9.go @@ -47,10 +47,6 @@ const ( // OpenTruncate is a Tlopen flag indicating that the opened file should be // truncated. OpenTruncate OpenFlags = 01000 - - // OpenFlagsIgnoreMask is a list of OpenFlags mode bits that are ignored for Tlopen. - // Note that syscall.O_LARGEFILE is set to zero, use value from Linux fcntl.h. - OpenFlagsIgnoreMask OpenFlags = syscall.O_DIRECTORY | syscall.O_NOATIME | 0100000 ) // ConnectFlags is the mode passed to Connect operations. diff --git a/pkg/p9/p9test/client_test.go b/pkg/p9/p9test/client_test.go index 8bbdb2488..6e758148d 100644 --- a/pkg/p9/p9test/client_test.go +++ b/pkg/p9/p9test/client_test.go @@ -1044,11 +1044,11 @@ func TestReaddir(t *testing.T) { if _, err := f.Readdir(0, 1); err != syscall.EINVAL { t.Errorf("readdir got %v, wanted EINVAL", err) } - if _, _, _, err := f.Open(p9.ReadWrite); err != syscall.EINVAL { - t.Errorf("readdir got %v, wanted EINVAL", err) + if _, _, _, err := f.Open(p9.ReadWrite); err != syscall.EISDIR { + t.Errorf("readdir got %v, wanted EISDIR", err) } - if _, _, _, err := f.Open(p9.WriteOnly); err != syscall.EINVAL { - t.Errorf("readdir got %v, wanted EINVAL", err) + if _, _, _, err := f.Open(p9.WriteOnly); err != syscall.EISDIR { + t.Errorf("readdir got %v, wanted EISDIR", err) } backend.EXPECT().Open(p9.ReadOnly).Times(1) if _, _, _, err := f.Open(p9.ReadOnly); err != nil { @@ -1065,75 +1065,93 @@ func TestReaddir(t *testing.T) { func TestOpen(t *testing.T) { type openTest struct { name string - mode p9.OpenFlags + flags p9.OpenFlags err error match func(p9.FileMode) bool } cases := []openTest{ - { - name: "invalid", - mode: ^p9.OpenFlagsModeMask, - err: syscall.EINVAL, - match: func(p9.FileMode) bool { return true }, - }, { name: "not-openable-read-only", - mode: p9.ReadOnly, + flags: p9.ReadOnly, err: syscall.EINVAL, match: func(mode p9.FileMode) bool { return !p9.CanOpen(mode) }, }, { name: "not-openable-write-only", - mode: p9.WriteOnly, + flags: p9.WriteOnly, err: syscall.EINVAL, match: func(mode p9.FileMode) bool { return !p9.CanOpen(mode) }, }, { name: "not-openable-read-write", - mode: p9.ReadWrite, + flags: p9.ReadWrite, err: syscall.EINVAL, match: func(mode p9.FileMode) bool { return !p9.CanOpen(mode) }, }, { name: "directory-read-only", - mode: p9.ReadOnly, + flags: p9.ReadOnly, err: nil, match: func(mode p9.FileMode) bool { return mode.IsDir() }, }, { name: "directory-read-write", - mode: p9.ReadWrite, - err: syscall.EINVAL, + flags: p9.ReadWrite, + err: syscall.EISDIR, match: func(mode p9.FileMode) bool { return mode.IsDir() }, }, { name: "directory-write-only", - mode: p9.WriteOnly, - err: syscall.EINVAL, + flags: p9.WriteOnly, + err: syscall.EISDIR, match: func(mode p9.FileMode) bool { return mode.IsDir() }, }, { name: "read-only", - mode: p9.ReadOnly, + flags: p9.ReadOnly, err: nil, match: func(mode p9.FileMode) bool { return p9.CanOpen(mode) }, }, { name: "write-only", - mode: p9.WriteOnly, + flags: p9.WriteOnly, err: nil, match: func(mode p9.FileMode) bool { return p9.CanOpen(mode) && !mode.IsDir() }, }, { name: "read-write", - mode: p9.ReadWrite, + flags: p9.ReadWrite, + err: nil, + match: func(mode p9.FileMode) bool { return p9.CanOpen(mode) && !mode.IsDir() }, + }, + { + name: "directory-read-only-truncate", + flags: p9.ReadOnly | p9.OpenTruncate, + err: syscall.EISDIR, + match: func(mode p9.FileMode) bool { return mode.IsDir() }, + }, + { + name: "read-only-truncate", + flags: p9.ReadOnly | p9.OpenTruncate, + err: nil, + match: func(mode p9.FileMode) bool { return p9.CanOpen(mode) && !mode.IsDir() }, + }, + { + name: "write-only-truncate", + flags: p9.WriteOnly | p9.OpenTruncate, + err: nil, + match: func(mode p9.FileMode) bool { return p9.CanOpen(mode) && !mode.IsDir() }, + }, + { + name: "read-write-truncate", + flags: p9.ReadWrite | p9.OpenTruncate, err: nil, match: func(mode p9.FileMode) bool { return p9.CanOpen(mode) && !mode.IsDir() }, }, } - // Open(mode OpenFlags) (*fd.FD, QID, uint32, error) + // Open(flags OpenFlags) (*fd.FD, QID, uint32, error) // - only works on Regular, NamedPipe, BLockDevice, CharacterDevice // - returning a file works as expected for name := range newTypeMap(nil) { @@ -1171,25 +1189,25 @@ func TestOpen(t *testing.T) { // Attempt the given open. if tc.err != nil { // We expect an error, just test and return. - if _, _, _, err := f.Open(tc.mode); err != tc.err { - t.Fatalf("open with mode %v got %v, want %v", tc.mode, err, tc.err) + if _, _, _, err := f.Open(tc.flags); err != tc.err { + t.Fatalf("open with flags %v got %v, want %v", tc.flags, err, tc.err) } return } // Run an FD test, since we expect success. fdTest(t, func(send *fd.FD) *fd.FD { - backend.EXPECT().Open(tc.mode).Return(send, p9.QID{}, uint32(0), nil).Times(1) - recv, _, _, err := f.Open(tc.mode) + backend.EXPECT().Open(tc.flags).Return(send, p9.QID{}, uint32(0), nil).Times(1) + recv, _, _, err := f.Open(tc.flags) if err != tc.err { - t.Fatalf("open with mode %v got %v, want %v", tc.mode, err, tc.err) + t.Fatalf("open with flags %v got %v, want %v", tc.flags, err, tc.err) } return recv }) // If the open was successful, attempt another one. - if _, _, _, err := f.Open(tc.mode); err != syscall.EINVAL { - t.Errorf("second open with mode %v got %v, want EINVAL", tc.mode, err) + if _, _, _, err := f.Open(tc.flags); err != syscall.EINVAL { + t.Errorf("second open with flags %v got %v, want EINVAL", tc.flags, err) } // Ensure that all illegal operations fail. diff --git a/pkg/p9/version.go b/pkg/p9/version.go index f1ffdd23a..36a694c58 100644 --- a/pkg/p9/version.go +++ b/pkg/p9/version.go @@ -26,7 +26,7 @@ const ( // // Clients are expected to start requesting this version number and // to continuously decrement it until a Tversion request succeeds. - highestSupportedVersion uint32 = 8 + highestSupportedVersion uint32 = 9 // lowestSupportedVersion is the lowest supported version X in a // version string of the format 9P2000.L.Google.X. @@ -155,3 +155,9 @@ func versionSupportsTallocate(v uint32) bool { func versionSupportsFlipcall(v uint32) bool { return v >= 8 } + +// VersionSupportsOpenTruncateFlag returns true if version v supports +// passing the OpenTruncate flag to Tlopen. +func VersionSupportsOpenTruncateFlag(v uint32) bool { + return v >= 9 +} diff --git a/pkg/sentry/fs/flags.go b/pkg/sentry/fs/flags.go index 0fab876a9..4338ae1fa 100644 --- a/pkg/sentry/fs/flags.go +++ b/pkg/sentry/fs/flags.go @@ -64,6 +64,10 @@ type FileFlags struct { // NonSeekable indicates that file.offset isn't used. NonSeekable bool + + // Truncate indicates that the file should be truncated before opened. + // This is only applicable if the file is regular. + Truncate bool } // SettableFileFlags is a subset of FileFlags above that can be changed @@ -118,6 +122,9 @@ func (f FileFlags) ToLinux() (mask uint) { if f.LargeFile { mask |= linux.O_LARGEFILE } + if f.Truncate { + mask |= linux.O_TRUNC + } switch { case f.Read && f.Write: diff --git a/pkg/sentry/fs/gofer/file_state.go b/pkg/sentry/fs/gofer/file_state.go index c2fbb4be9..bb8312849 100644 --- a/pkg/sentry/fs/gofer/file_state.go +++ b/pkg/sentry/fs/gofer/file_state.go @@ -28,8 +28,14 @@ func (f *fileOperations) afterLoad() { // Manually load the open handles. var err error + + // The file may have been opened with Truncate, but we don't + // want to re-open it with Truncate or we will lose data. + flags := f.flags + flags.Truncate = false + // TODO(b/38173783): Context is not plumbed to save/restore. - f.handles, err = f.inodeOperations.fileState.getHandles(context.Background(), f.flags, f.inodeOperations.cachingInodeOps) + f.handles, err = f.inodeOperations.fileState.getHandles(context.Background(), flags, f.inodeOperations.cachingInodeOps) if err != nil { return fmt.Errorf("failed to re-open handle: %v", err) } diff --git a/pkg/sentry/fs/gofer/handles.go b/pkg/sentry/fs/gofer/handles.go index 39c8ec33d..b86c49b39 100644 --- a/pkg/sentry/fs/gofer/handles.go +++ b/pkg/sentry/fs/gofer/handles.go @@ -64,7 +64,7 @@ func (h *handles) DecRef() { }) } -func newHandles(ctx context.Context, file contextFile, flags fs.FileFlags) (*handles, error) { +func newHandles(ctx context.Context, client *p9.Client, file contextFile, flags fs.FileFlags) (*handles, error) { _, newFile, err := file.walk(ctx, nil) if err != nil { return nil, err @@ -81,6 +81,9 @@ func newHandles(ctx context.Context, file contextFile, flags fs.FileFlags) (*han default: panic("impossible fs.FileFlags") } + if flags.Truncate && p9.VersionSupportsOpenTruncateFlag(client.Version()) { + p9flags |= p9.OpenTruncate + } hostFile, _, _, err := newFile.open(ctx, p9flags) if err != nil { diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 99910388f..4237bf353 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -180,7 +180,7 @@ func (i *inodeFileState) setSharedHandlesLocked(flags fs.FileFlags, h *handles) // given flags. func (i *inodeFileState) getHandles(ctx context.Context, flags fs.FileFlags, cache *fsutil.CachingInodeOperations) (*handles, error) { if !i.canShareHandles() { - return newHandles(ctx, i.file, flags) + return newHandles(ctx, i.s.client, i.file, flags) } i.handlesMu.Lock() @@ -201,19 +201,25 @@ func (i *inodeFileState) getHandles(ctx context.Context, flags fs.FileFlags, cac // whether previously open read handle was recreated. Host mappings must be // invalidated if so. func (i *inodeFileState) getHandlesLocked(ctx context.Context, flags fs.FileFlags) (*handles, bool, error) { - // Do we already have usable shared handles? - if flags.Write { + // Check if we are able to use cached handles. + if flags.Truncate && p9.VersionSupportsOpenTruncateFlag(i.s.client.Version()) { + // If we are truncating (and the gofer supports it), then we + // always need a new handle. Don't return one from the cache. + } else if flags.Write { if i.writeHandles != nil && (i.writeHandlesRW || !flags.Read) { + // File is opened for writing, and we have cached write + // handles that we can use. i.writeHandles.IncRef() return i.writeHandles, false, nil } } else if i.readHandles != nil { + // File is opened for reading and we have cached handles. i.readHandles.IncRef() return i.readHandles, false, nil } - // No; get new handles and cache them for future sharing. - h, err := newHandles(ctx, i.file, flags) + // Get new handles and cache them for future sharing. + h, err := newHandles(ctx, i.s.client, i.file, flags) if err != nil { return nil, false, err } @@ -239,7 +245,7 @@ func (i *inodeFileState) recreateReadHandles(ctx context.Context, writer *handle if !flags.Read { // Writer can't be used for read, must create a new handle. var err error - h, err = newHandles(ctx, i.file, fs.FileFlags{Read: true}) + h, err = newHandles(ctx, i.s.client, i.file, fs.FileFlags{Read: true}) if err != nil { return err } diff --git a/pkg/sentry/syscalls/linux/flags.go b/pkg/sentry/syscalls/linux/flags.go index 444f2b004..07961dad9 100644 --- a/pkg/sentry/syscalls/linux/flags.go +++ b/pkg/sentry/syscalls/linux/flags.go @@ -50,5 +50,6 @@ func linuxToFlags(mask uint) fs.FileFlags { Directory: mask&linux.O_DIRECTORY != 0, Async: mask&linux.O_ASYNC != 0, LargeFile: mask&linux.O_LARGEFILE != 0, + Truncate: mask&linux.O_TRUNC != 0, } } -- cgit v1.2.3 From c0f89eba6ebdec08460bd796fc62d6aef674d141 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 21 Nov 2019 11:29:49 -0800 Subject: Import and structure cleanup. PiperOrigin-RevId: 281795269 --- pkg/eventchannel/BUILD | 1 + pkg/flipcall/BUILD | 2 +- pkg/flipcall/flipcall_unsafe.go | 10 +- pkg/sentry/BUILD | 3 + pkg/sentry/fs/BUILD | 2 +- pkg/sentry/fs/fdpipe/pipe_opener_test.go | 1 + pkg/sentry/fs/overlay.go | 4 +- pkg/sentry/fsimpl/memfs/BUILD | 3 +- pkg/sentry/kernel/BUILD | 4 +- pkg/sentry/kernel/auth/BUILD | 2 +- pkg/sentry/kernel/futex/BUILD | 2 +- pkg/sentry/kernel/signalfd/BUILD | 4 +- pkg/sentry/kernel/task.go | 4 +- pkg/sentry/mm/BUILD | 2 +- pkg/sentry/mm/mm.go | 6 +- pkg/sentry/strace/strace.proto | 3 +- pkg/sentry/time/BUILD | 4 +- pkg/sentry/vfs/BUILD | 2 +- pkg/sentry/vfs/mount_unsafe.go | 4 +- pkg/state/object.proto | 56 ++++---- pkg/syncutil/BUILD | 54 ++++++++ pkg/syncutil/LICENSE | 27 ++++ pkg/syncutil/README.md | 5 + pkg/syncutil/atomicptr_unsafe.go | 47 +++++++ pkg/syncutil/atomicptrtest/BUILD | 29 ++++ pkg/syncutil/atomicptrtest/atomicptr_test.go | 31 +++++ pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go | 21 +++ pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go | 16 +++ pkg/syncutil/downgradable_rwmutex_test.go | 150 ++++++++++++++++++++ pkg/syncutil/downgradable_rwmutex_unsafe.go | 143 +++++++++++++++++++ pkg/syncutil/memmove_unsafe.go | 28 ++++ pkg/syncutil/norace_unsafe.go | 35 +++++ pkg/syncutil/race_unsafe.go | 41 ++++++ pkg/syncutil/seqatomic_unsafe.go | 72 ++++++++++ pkg/syncutil/seqatomictest/BUILD | 35 +++++ pkg/syncutil/seqatomictest/seqatomic_test.go | 132 ++++++++++++++++++ pkg/syncutil/seqcount.go | 149 ++++++++++++++++++++ pkg/syncutil/seqcount_test.go | 153 +++++++++++++++++++++ pkg/syncutil/syncutil.go | 7 + test/syscalls/linux/accept_bind.cc | 2 + test/syscalls/linux/accept_bind_stream.cc | 2 + test/syscalls/linux/chmod.cc | 1 + test/syscalls/linux/chroot.cc | 1 + test/syscalls/linux/clock_gettime.cc | 1 + test/syscalls/linux/concurrency.cc | 1 + test/syscalls/linux/exec_binary.cc | 1 + test/syscalls/linux/file_base.h | 1 + test/syscalls/linux/flock.cc | 1 + test/syscalls/linux/fork.cc | 1 + test/syscalls/linux/getdents.cc | 1 + test/syscalls/linux/ip_socket_test_util.cc | 5 +- test/syscalls/linux/memory_accounting.cc | 1 + test/syscalls/linux/mlock.cc | 1 + test/syscalls/linux/mmap.cc | 1 + test/syscalls/linux/mount.cc | 1 + test/syscalls/linux/read.cc | 1 + test/syscalls/linux/rename.cc | 1 + test/syscalls/linux/seccomp.cc | 1 + test/syscalls/linux/select.cc | 1 + test/syscalls/linux/shm.cc | 1 - test/syscalls/linux/socket_blocking.cc | 1 + test/syscalls/linux/socket_ip_loopback_blocking.cc | 1 + .../linux/socket_ip_tcp_generic_loopback.cc | 1 + .../linux/socket_ip_tcp_loopback_blocking.cc | 1 + .../linux/socket_ip_tcp_loopback_nonblock.cc | 1 + .../socket_ipv4_tcp_unbound_external_networking.cc | 1 + ...et_ipv4_tcp_unbound_external_networking_test.cc | 3 +- ...et_ipv4_udp_unbound_external_networking_test.cc | 3 +- test/syscalls/linux/socket_netlink_util.cc | 4 +- test/syscalls/linux/socket_unix_blocking_local.cc | 3 +- test/syscalls/linux/socket_unix_dgram.cc | 1 + .../linux/socket_unix_dgram_non_blocking.cc | 1 + .../linux/socket_unix_non_stream_blocking_local.cc | 3 +- test/syscalls/linux/socket_unix_seqpacket.cc | 1 + .../linux/socket_unix_stream_blocking_local.cc | 3 +- .../linux/socket_unix_stream_nonblock_local.cc | 3 +- .../syscalls/linux/socket_unix_unbound_abstract.cc | 1 + .../linux/socket_unix_unbound_filesystem.cc | 1 + .../linux/socket_unix_unbound_seqpacket.cc | 1 + test/syscalls/linux/socket_unix_unbound_stream.cc | 1 + test/syscalls/linux/sync.cc | 3 +- test/syscalls/linux/truncate.cc | 1 + .../syscalls/linux/unix_domain_socket_test_util.cc | 1 + test/syscalls/linux/unix_domain_socket_test_util.h | 1 + test/syscalls/linux/utimes.cc | 1 + test/syscalls/linux/vdso_clock_gettime.cc | 1 + test/util/fs_util_test.cc | 4 +- test/util/mount_util.h | 1 + test/util/posix_error_test.cc | 1 + test/util/rlimit_util.cc | 1 + test/util/signal_util.cc | 1 + test/util/signal_util.h | 1 + test/util/temp_path.h | 1 + test/util/test_util_test.cc | 1 + third_party/gvsync/BUILD | 53 ------- third_party/gvsync/LICENSE | 27 ---- third_party/gvsync/README.md | 3 - third_party/gvsync/atomicptr_unsafe.go | 47 ------- third_party/gvsync/atomicptrtest/BUILD | 28 ---- third_party/gvsync/atomicptrtest/atomicptr_test.go | 31 ----- .../gvsync/downgradable_rwmutex_1_12_unsafe.go | 21 --- .../gvsync/downgradable_rwmutex_1_13_unsafe.go | 16 --- third_party/gvsync/downgradable_rwmutex_test.go | 150 -------------------- third_party/gvsync/downgradable_rwmutex_unsafe.go | 143 ------------------- third_party/gvsync/gvsync.go | 7 - third_party/gvsync/memmove_unsafe.go | 28 ---- third_party/gvsync/norace_unsafe.go | 35 ----- third_party/gvsync/race_unsafe.go | 41 ------ third_party/gvsync/seqatomic_unsafe.go | 72 ---------- third_party/gvsync/seqatomictest/BUILD | 34 ----- third_party/gvsync/seqatomictest/seqatomic_test.go | 132 ------------------ third_party/gvsync/seqcount.go | 149 -------------------- third_party/gvsync/seqcount_test.go | 153 --------------------- tools/go_marshal/test/BUILD | 3 +- tools/go_marshal/test/external/BUILD | 4 +- 115 files changed, 1302 insertions(+), 1250 deletions(-) create mode 100644 pkg/syncutil/BUILD create mode 100644 pkg/syncutil/LICENSE create mode 100644 pkg/syncutil/README.md create mode 100644 pkg/syncutil/atomicptr_unsafe.go create mode 100644 pkg/syncutil/atomicptrtest/BUILD create mode 100644 pkg/syncutil/atomicptrtest/atomicptr_test.go create mode 100644 pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go create mode 100644 pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go create mode 100644 pkg/syncutil/downgradable_rwmutex_test.go create mode 100644 pkg/syncutil/downgradable_rwmutex_unsafe.go create mode 100644 pkg/syncutil/memmove_unsafe.go create mode 100644 pkg/syncutil/norace_unsafe.go create mode 100644 pkg/syncutil/race_unsafe.go create mode 100644 pkg/syncutil/seqatomic_unsafe.go create mode 100644 pkg/syncutil/seqatomictest/BUILD create mode 100644 pkg/syncutil/seqatomictest/seqatomic_test.go create mode 100644 pkg/syncutil/seqcount.go create mode 100644 pkg/syncutil/seqcount_test.go create mode 100644 pkg/syncutil/syncutil.go delete mode 100644 third_party/gvsync/BUILD delete mode 100644 third_party/gvsync/LICENSE delete mode 100644 third_party/gvsync/README.md delete mode 100644 third_party/gvsync/atomicptr_unsafe.go delete mode 100644 third_party/gvsync/atomicptrtest/BUILD delete mode 100644 third_party/gvsync/atomicptrtest/atomicptr_test.go delete mode 100644 third_party/gvsync/downgradable_rwmutex_1_12_unsafe.go delete mode 100644 third_party/gvsync/downgradable_rwmutex_1_13_unsafe.go delete mode 100644 third_party/gvsync/downgradable_rwmutex_test.go delete mode 100644 third_party/gvsync/downgradable_rwmutex_unsafe.go delete mode 100644 third_party/gvsync/gvsync.go delete mode 100644 third_party/gvsync/memmove_unsafe.go delete mode 100644 third_party/gvsync/norace_unsafe.go delete mode 100644 third_party/gvsync/race_unsafe.go delete mode 100644 third_party/gvsync/seqatomic_unsafe.go delete mode 100644 third_party/gvsync/seqatomictest/BUILD delete mode 100644 third_party/gvsync/seqatomictest/seqatomic_test.go delete mode 100644 third_party/gvsync/seqcount.go delete mode 100644 third_party/gvsync/seqcount_test.go (limited to 'pkg/sentry/fs') diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD index 71f2abc83..0b4b7cc44 100644 --- a/pkg/eventchannel/BUILD +++ b/pkg/eventchannel/BUILD @@ -25,6 +25,7 @@ go_library( proto_library( name = "eventchannel_proto", srcs = ["event.proto"], + visibility = ["//:sandbox"], ) go_proto_library( diff --git a/pkg/flipcall/BUILD b/pkg/flipcall/BUILD index 5643d5f26..e590a71ba 100644 --- a/pkg/flipcall/BUILD +++ b/pkg/flipcall/BUILD @@ -19,7 +19,7 @@ go_library( "//pkg/abi/linux", "//pkg/log", "//pkg/memutil", - "//third_party/gvsync", + "//pkg/syncutil", ], ) diff --git a/pkg/flipcall/flipcall_unsafe.go b/pkg/flipcall/flipcall_unsafe.go index a37952637..27b8939fc 100644 --- a/pkg/flipcall/flipcall_unsafe.go +++ b/pkg/flipcall/flipcall_unsafe.go @@ -18,7 +18,7 @@ import ( "reflect" "unsafe" - "gvisor.dev/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/pkg/syncutil" ) // Packets consist of a 16-byte header followed by an arbitrarily-sized @@ -75,13 +75,13 @@ func (ep *Endpoint) Data() []byte { var ioSync int64 func raceBecomeActive() { - if gvsync.RaceEnabled { - gvsync.RaceAcquire((unsafe.Pointer)(&ioSync)) + if syncutil.RaceEnabled { + syncutil.RaceAcquire((unsafe.Pointer)(&ioSync)) } } func raceBecomeInactive() { - if gvsync.RaceEnabled { - gvsync.RaceReleaseMerge((unsafe.Pointer)(&ioSync)) + if syncutil.RaceEnabled { + syncutil.RaceReleaseMerge((unsafe.Pointer)(&ioSync)) } } diff --git a/pkg/sentry/BUILD b/pkg/sentry/BUILD index 2d6379c86..2a7122957 100644 --- a/pkg/sentry/BUILD +++ b/pkg/sentry/BUILD @@ -10,5 +10,8 @@ package_group( "//runsc/...", # Code generated by go_marshal relies on go_marshal libraries. "//tools/go_marshal/...", + + # Keep the old paths as a temporary measure. + "//third_party/golang/gvisor/pkg/sentry/...", ], ) diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 378602cc9..c035ffff7 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -68,9 +68,9 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/usermem", "//pkg/state", + "//pkg/syncutil", "//pkg/syserror", "//pkg/waiter", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go index 8e4d839e1..577445148 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go @@ -25,6 +25,7 @@ import ( "time" "github.com/google/uuid" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index 1d3ff39e0..25573e986 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syncutil" "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/third_party/gvsync" ) // The virtual filesystem implements an overlay configuration. For a high-level @@ -199,7 +199,7 @@ type overlayEntry struct { upper *Inode // dirCacheMu protects dirCache. - dirCacheMu gvsync.DowngradableRWMutex `state:"nosave"` + dirCacheMu syncutil.DowngradableRWMutex `state:"nosave"` // dirCache is cache of DentAttrs from upper and lower Inodes. dirCache *SortedDentryMap diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD index 04d667273..952b20c51 100644 --- a/pkg/sentry/fsimpl/memfs/BUILD +++ b/pkg/sentry/fsimpl/memfs/BUILD @@ -1,10 +1,9 @@ load("//tools/go_stateify:defs.bzl", "go_library") load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) -load("//tools/go_generics:defs.bzl", "go_template_instance") - go_template_instance( name = "dentry_list", out = "dentry_list.go", diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index e041c51b3..2706927ff 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -35,7 +35,7 @@ go_template_instance( out = "seqatomic_taskgoroutineschedinfo_unsafe.go", package = "kernel", suffix = "TaskGoroutineSchedInfo", - template = "//third_party/gvsync:generic_seqatomic", + template = "//pkg/syncutil:generic_seqatomic", types = { "Value": "TaskGoroutineSchedInfo", }, @@ -209,12 +209,12 @@ go_library( "//pkg/sentry/usermem", "//pkg/state", "//pkg/state/statefile", + "//pkg/syncutil", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/stack", "//pkg/waiter", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index 51de4568a..04c244447 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -8,7 +8,7 @@ go_template_instance( out = "atomicptr_credentials_unsafe.go", package = "auth", suffix = "Credentials", - template = "//third_party/gvsync:generic_atomicptr", + template = "//pkg/syncutil:generic_atomicptr", types = { "Value": "Credentials", }, diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index 34286c7a8..75ec31761 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -9,7 +9,7 @@ go_template_instance( out = "atomicptr_bucket_unsafe.go", package = "futex", suffix = "Bucket", - template = "//third_party/gvsync:generic_atomicptr", + template = "//pkg/syncutil:generic_atomicptr", types = { "Value": "bucket", }, diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD index 50b69d154..9f7e19b4d 100644 --- a/pkg/sentry/kernel/signalfd/BUILD +++ b/pkg/sentry/kernel/signalfd/BUILD @@ -1,7 +1,7 @@ -package(licenses = ["notice"]) - load("//tools/go_stateify:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_library( name = "signalfd", srcs = ["signalfd.go"], diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 9be3dae3c..80c8e5464 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -35,8 +35,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syncutil" "gvisor.dev/gvisor/pkg/waiter" - "gvisor.dev/gvisor/third_party/gvsync" ) // Task represents a thread of execution in the untrusted app. It @@ -83,7 +83,7 @@ type Task struct { // // gosched is protected by goschedSeq. gosched is owned by the task // goroutine. - goschedSeq gvsync.SeqCount `state:"nosave"` + goschedSeq syncutil.SeqCount `state:"nosave"` gosched TaskGoroutineSchedInfo // yieldCount is the number of times the task goroutine has called diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index a804b8b5c..839931f67 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -118,9 +118,9 @@ go_library( "//pkg/sentry/safemem", "//pkg/sentry/usage", "//pkg/sentry/usermem", + "//pkg/syncutil", "//pkg/syserror", "//pkg/tcpip/buffer", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index f350e0109..58a5c186d 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -44,7 +44,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/pkg/syncutil" ) // MemoryManager implements a virtual address space. @@ -82,7 +82,7 @@ type MemoryManager struct { users int32 // mappingMu is analogous to Linux's struct mm_struct::mmap_sem. - mappingMu gvsync.DowngradableRWMutex `state:"nosave"` + mappingMu syncutil.DowngradableRWMutex `state:"nosave"` // vmas stores virtual memory areas. Since vmas are stored by value, // clients should usually use vmaIterator.ValuePtr() instead of @@ -125,7 +125,7 @@ type MemoryManager struct { // activeMu is loosely analogous to Linux's struct // mm_struct::page_table_lock. - activeMu gvsync.DowngradableRWMutex `state:"nosave"` + activeMu syncutil.DowngradableRWMutex `state:"nosave"` // pmas stores platform mapping areas used to implement vmas. Since pmas // are stored by value, clients should usually use pmaIterator.ValuePtr() diff --git a/pkg/sentry/strace/strace.proto b/pkg/sentry/strace/strace.proto index 4b2f73a5f..906c52c51 100644 --- a/pkg/sentry/strace/strace.proto +++ b/pkg/sentry/strace/strace.proto @@ -32,8 +32,7 @@ message Strace { } } -message StraceEnter { -} +message StraceEnter {} message StraceExit { // Return value formatted as string. diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD index d3a4cd943..18e212dff 100644 --- a/pkg/sentry/time/BUILD +++ b/pkg/sentry/time/BUILD @@ -9,7 +9,7 @@ go_template_instance( out = "seqatomic_parameters_unsafe.go", package = "time", suffix = "Parameters", - template = "//third_party/gvsync:generic_seqatomic", + template = "//pkg/syncutil:generic_seqatomic", types = { "Value": "Parameters", }, @@ -36,8 +36,8 @@ go_library( deps = [ "//pkg/log", "//pkg/metric", + "//pkg/syncutil", "//pkg/syserror", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 4f2c2de9f..74a325309 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -33,9 +33,9 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/usermem", + "//pkg/syncutil", "//pkg/syserror", "//pkg/waiter", - "//third_party/gvsync", ], ) diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go index 75e6c7dfa..c98b42f91 100644 --- a/pkg/sentry/vfs/mount_unsafe.go +++ b/pkg/sentry/vfs/mount_unsafe.go @@ -26,7 +26,7 @@ import ( "sync/atomic" "unsafe" - "gvisor.dev/gvisor/third_party/gvsync" + "gvisor.dev/gvisor/pkg/syncutil" ) // mountKey represents the location at which a Mount is mounted. It is @@ -72,7 +72,7 @@ type mountTable struct { // intrinsics and inline assembly, limiting the performance of this // approach.) - seq gvsync.SeqCount + seq syncutil.SeqCount seed uint32 // for hashing keys // size holds both length (number of elements) and capacity (number of diff --git a/pkg/state/object.proto b/pkg/state/object.proto index 952289069..5ebcfb151 100644 --- a/pkg/state/object.proto +++ b/pkg/state/object.proto @@ -18,8 +18,8 @@ package gvisor.state.statefile; // Slice is a slice value. message Slice { - uint32 length = 1; - uint32 capacity = 2; + uint32 length = 1; + uint32 capacity = 2; uint64 ref_value = 3; } @@ -30,13 +30,13 @@ message Array { // Map is a map value. message Map { - repeated Object keys = 1; + repeated Object keys = 1; repeated Object values = 2; } // Interface is an interface value. message Interface { - string type = 1; + string type = 1; Object value = 2; } @@ -47,7 +47,7 @@ message Struct { // Field encodes a single field. message Field { - string name = 1; + string name = 1; Object value = 2; } @@ -113,28 +113,28 @@ message Float32s { // Note that ref_value references an Object.id, below. message Object { oneof value { - bool bool_value = 1; - bytes string_value = 2; - int64 int64_value = 3; - uint64 uint64_value = 4; - double double_value = 5; - uint64 ref_value = 6; - Slice slice_value = 7; - Array array_value = 8; - Interface interface_value = 9; - Struct struct_value = 10; - Map map_value = 11; - bytes byte_array_value = 12; - Uint16s uint16_array_value = 13; - Uint32s uint32_array_value = 14; - Uint64s uint64_array_value = 15; - Uintptrs uintptr_array_value = 16; - Int8s int8_array_value = 17; - Int16s int16_array_value = 18; - Int32s int32_array_value = 19; - Int64s int64_array_value = 20; - Bools bool_array_value = 21; - Float64s float64_array_value = 22; - Float32s float32_array_value = 23; + bool bool_value = 1; + bytes string_value = 2; + int64 int64_value = 3; + uint64 uint64_value = 4; + double double_value = 5; + uint64 ref_value = 6; + Slice slice_value = 7; + Array array_value = 8; + Interface interface_value = 9; + Struct struct_value = 10; + Map map_value = 11; + bytes byte_array_value = 12; + Uint16s uint16_array_value = 13; + Uint32s uint32_array_value = 14; + Uint64s uint64_array_value = 15; + Uintptrs uintptr_array_value = 16; + Int8s int8_array_value = 17; + Int16s int16_array_value = 18; + Int32s int32_array_value = 19; + Int64s int64_array_value = 20; + Bools bool_array_value = 21; + Float64s float64_array_value = 22; + Float32s float32_array_value = 23; } } diff --git a/pkg/syncutil/BUILD b/pkg/syncutil/BUILD new file mode 100644 index 000000000..b06a90bef --- /dev/null +++ b/pkg/syncutil/BUILD @@ -0,0 +1,54 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template") + +package( + default_visibility = ["//:sandbox"], + licenses = ["notice"], +) + +exports_files(["LICENSE"]) + +go_template( + name = "generic_atomicptr", + srcs = ["atomicptr_unsafe.go"], + types = [ + "Value", + ], +) + +go_template( + name = "generic_seqatomic", + srcs = ["seqatomic_unsafe.go"], + types = [ + "Value", + ], + deps = [ + ":sync", + ], +) + +go_library( + name = "syncutil", + srcs = [ + "downgradable_rwmutex_1_12_unsafe.go", + "downgradable_rwmutex_1_13_unsafe.go", + "downgradable_rwmutex_unsafe.go", + "memmove_unsafe.go", + "norace_unsafe.go", + "race_unsafe.go", + "seqcount.go", + "syncutil.go", + ], + importpath = "gvisor.dev/gvisor/pkg/syncutil", +) + +go_test( + name = "syncutil_test", + size = "small", + srcs = [ + "downgradable_rwmutex_test.go", + "seqcount_test.go", + ], + embed = [":syncutil"], +) diff --git a/pkg/syncutil/LICENSE b/pkg/syncutil/LICENSE new file mode 100644 index 000000000..6a66aea5e --- /dev/null +++ b/pkg/syncutil/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkg/syncutil/README.md b/pkg/syncutil/README.md new file mode 100644 index 000000000..2183c4e20 --- /dev/null +++ b/pkg/syncutil/README.md @@ -0,0 +1,5 @@ +# Syncutil + +This package provides additional synchronization primitives not provided by the +Go stdlib 'sync' package. It is partially derived from the upstream 'sync' +package from go1.10. diff --git a/pkg/syncutil/atomicptr_unsafe.go b/pkg/syncutil/atomicptr_unsafe.go new file mode 100644 index 000000000..525c4beed --- /dev/null +++ b/pkg/syncutil/atomicptr_unsafe.go @@ -0,0 +1,47 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package template doesn't exist. This file must be instantiated using the +// go_template_instance rule in tools/go_generics/defs.bzl. +package template + +import ( + "sync/atomic" + "unsafe" +) + +// Value is a required type parameter. +type Value struct{} + +// An AtomicPtr is a pointer to a value of type Value that can be atomically +// loaded and stored. The zero value of an AtomicPtr represents nil. +// +// Note that copying AtomicPtr by value performs a non-atomic read of the +// stored pointer, which is unsafe if Store() can be called concurrently; in +// this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable +type AtomicPtr struct { + ptr unsafe.Pointer `state:".(*Value)"` +} + +func (p *AtomicPtr) savePtr() *Value { + return p.Load() +} + +func (p *AtomicPtr) loadPtr(v *Value) { + p.Store(v) +} + +// Load returns the value set by the most recent Store. It returns nil if there +// has been no previous call to Store. +func (p *AtomicPtr) Load() *Value { + return (*Value)(atomic.LoadPointer(&p.ptr)) +} + +// Store sets the value returned by Load to x. +func (p *AtomicPtr) Store(x *Value) { + atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) +} diff --git a/pkg/syncutil/atomicptrtest/BUILD b/pkg/syncutil/atomicptrtest/BUILD new file mode 100644 index 000000000..63f411a90 --- /dev/null +++ b/pkg/syncutil/atomicptrtest/BUILD @@ -0,0 +1,29 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "atomicptr_int", + out = "atomicptr_int_unsafe.go", + package = "atomicptr", + suffix = "Int", + template = "//pkg/syncutil:generic_atomicptr", + types = { + "Value": "int", + }, +) + +go_library( + name = "atomicptr", + srcs = ["atomicptr_int_unsafe.go"], + importpath = "gvisor.dev/gvisor/pkg/syncutil/atomicptr", +) + +go_test( + name = "atomicptr_test", + size = "small", + srcs = ["atomicptr_test.go"], + embed = [":atomicptr"], +) diff --git a/pkg/syncutil/atomicptrtest/atomicptr_test.go b/pkg/syncutil/atomicptrtest/atomicptr_test.go new file mode 100644 index 000000000..8fdc5112e --- /dev/null +++ b/pkg/syncutil/atomicptrtest/atomicptr_test.go @@ -0,0 +1,31 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package atomicptr + +import ( + "testing" +) + +func newInt(val int) *int { + return &val +} + +func TestAtomicPtr(t *testing.T) { + var p AtomicPtrInt + if got := p.Load(); got != nil { + t.Errorf("initial value is %p (%v), wanted nil", got, got) + } + want := newInt(42) + p.Store(want) + if got := p.Load(); got != want { + t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) + } + want = newInt(100) + p.Store(want) + if got := p.Load(); got != want { + t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) + } +} diff --git a/pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go b/pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go new file mode 100644 index 000000000..7c6336e62 --- /dev/null +++ b/pkg/syncutil/downgradable_rwmutex_1_12_unsafe.go @@ -0,0 +1,21 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.12 +// +build !go1.13 + +// TODO(b/133868570): Delete once Go 1.12 is no longer supported. + +package syncutil + +import _ "unsafe" + +//go:linkname runtimeSemrelease112 sync.runtime_Semrelease +func runtimeSemrelease112(s *uint32, handoff bool) + +func runtimeSemrelease(s *uint32, handoff bool, skipframes int) { + // 'skipframes' is only available starting from 1.13. + runtimeSemrelease112(s, handoff) +} diff --git a/pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go b/pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go new file mode 100644 index 000000000..3c3673119 --- /dev/null +++ b/pkg/syncutil/downgradable_rwmutex_1_13_unsafe.go @@ -0,0 +1,16 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.13 +// +build !go1.15 + +// Check go:linkname function signatures when updating Go version. + +package syncutil + +import _ "unsafe" + +//go:linkname runtimeSemrelease sync.runtime_Semrelease +func runtimeSemrelease(s *uint32, handoff bool, skipframes int) diff --git a/pkg/syncutil/downgradable_rwmutex_test.go b/pkg/syncutil/downgradable_rwmutex_test.go new file mode 100644 index 000000000..ffaf7ecc7 --- /dev/null +++ b/pkg/syncutil/downgradable_rwmutex_test.go @@ -0,0 +1,150 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// GOMAXPROCS=10 go test + +// Copy/pasted from the standard library's sync/rwmutex_test.go, except for the +// addition of downgradingWriter and the renaming of num_iterations to +// numIterations to shut up Golint. + +package syncutil + +import ( + "fmt" + "runtime" + "sync/atomic" + "testing" +) + +func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { + m.RLock() + clocked <- true + <-cunlock + m.RUnlock() + cdone <- true +} + +func doTestParallelReaders(numReaders, gomaxprocs int) { + runtime.GOMAXPROCS(gomaxprocs) + var m DowngradableRWMutex + clocked := make(chan bool) + cunlock := make(chan bool) + cdone := make(chan bool) + for i := 0; i < numReaders; i++ { + go parallelReader(&m, clocked, cunlock, cdone) + } + // Wait for all parallel RLock()s to succeed. + for i := 0; i < numReaders; i++ { + <-clocked + } + for i := 0; i < numReaders; i++ { + cunlock <- true + } + // Wait for the goroutines to finish. + for i := 0; i < numReaders; i++ { + <-cdone + } +} + +func TestParallelReaders(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) + doTestParallelReaders(1, 4) + doTestParallelReaders(3, 4) + doTestParallelReaders(4, 2) +} + +func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { + for i := 0; i < numIterations; i++ { + rwm.RLock() + n := atomic.AddInt32(activity, 1) + if n < 1 || n >= 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + atomic.AddInt32(activity, -1) + rwm.RUnlock() + } + cdone <- true +} + +func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { + for i := 0; i < numIterations; i++ { + rwm.Lock() + n := atomic.AddInt32(activity, 10000) + if n != 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + atomic.AddInt32(activity, -10000) + rwm.Unlock() + } + cdone <- true +} + +func downgradingWriter(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { + for i := 0; i < numIterations; i++ { + rwm.Lock() + n := atomic.AddInt32(activity, 10000) + if n != 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + atomic.AddInt32(activity, -10000) + rwm.DowngradeLock() + n = atomic.AddInt32(activity, 1) + if n < 1 || n >= 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + n = atomic.AddInt32(activity, -1) + rwm.RUnlock() + } + cdone <- true +} + +func HammerDowngradableRWMutex(gomaxprocs, numReaders, numIterations int) { + runtime.GOMAXPROCS(gomaxprocs) + // Number of active readers + 10000 * number of active writers. + var activity int32 + var rwm DowngradableRWMutex + cdone := make(chan bool) + go writer(&rwm, numIterations, &activity, cdone) + go downgradingWriter(&rwm, numIterations, &activity, cdone) + var i int + for i = 0; i < numReaders/2; i++ { + go reader(&rwm, numIterations, &activity, cdone) + } + go writer(&rwm, numIterations, &activity, cdone) + go downgradingWriter(&rwm, numIterations, &activity, cdone) + for ; i < numReaders; i++ { + go reader(&rwm, numIterations, &activity, cdone) + } + // Wait for the 4 writers and all readers to finish. + for i := 0; i < 4+numReaders; i++ { + <-cdone + } +} + +func TestDowngradableRWMutex(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) + n := 1000 + if testing.Short() { + n = 5 + } + HammerDowngradableRWMutex(1, 1, n) + HammerDowngradableRWMutex(1, 3, n) + HammerDowngradableRWMutex(1, 10, n) + HammerDowngradableRWMutex(4, 1, n) + HammerDowngradableRWMutex(4, 3, n) + HammerDowngradableRWMutex(4, 10, n) + HammerDowngradableRWMutex(10, 1, n) + HammerDowngradableRWMutex(10, 3, n) + HammerDowngradableRWMutex(10, 10, n) + HammerDowngradableRWMutex(10, 5, n) +} diff --git a/pkg/syncutil/downgradable_rwmutex_unsafe.go b/pkg/syncutil/downgradable_rwmutex_unsafe.go new file mode 100644 index 000000000..07feca402 --- /dev/null +++ b/pkg/syncutil/downgradable_rwmutex_unsafe.go @@ -0,0 +1,143 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.12 +// +build !go1.15 + +// Check go:linkname function signatures when updating Go version. + +// This is mostly copied from the standard library's sync/rwmutex.go. +// +// Happens-before relationships indicated to the race detector: +// - Unlock -> Lock (via writerSem) +// - Unlock -> RLock (via readerSem) +// - RUnlock -> Lock (via writerSem) +// - DowngradeLock -> RLock (via readerSem) + +package syncutil + +import ( + "sync" + "sync/atomic" + "unsafe" +) + +//go:linkname runtimeSemacquire sync.runtime_Semacquire +func runtimeSemacquire(s *uint32) + +// DowngradableRWMutex is identical to sync.RWMutex, but adds the DowngradeLock +// method. +type DowngradableRWMutex struct { + w sync.Mutex // held if there are pending writers + writerSem uint32 // semaphore for writers to wait for completing readers + readerSem uint32 // semaphore for readers to wait for completing writers + readerCount int32 // number of pending readers + readerWait int32 // number of departing readers +} + +const rwmutexMaxReaders = 1 << 30 + +// RLock locks rw for reading. +func (rw *DowngradableRWMutex) RLock() { + if RaceEnabled { + RaceDisable() + } + if atomic.AddInt32(&rw.readerCount, 1) < 0 { + // A writer is pending, wait for it. + runtimeSemacquire(&rw.readerSem) + } + if RaceEnabled { + RaceEnable() + RaceAcquire(unsafe.Pointer(&rw.readerSem)) + } +} + +// RUnlock undoes a single RLock call. +func (rw *DowngradableRWMutex) RUnlock() { + if RaceEnabled { + RaceReleaseMerge(unsafe.Pointer(&rw.writerSem)) + RaceDisable() + } + if r := atomic.AddInt32(&rw.readerCount, -1); r < 0 { + if r+1 == 0 || r+1 == -rwmutexMaxReaders { + panic("RUnlock of unlocked DowngradableRWMutex") + } + // A writer is pending. + if atomic.AddInt32(&rw.readerWait, -1) == 0 { + // The last reader unblocks the writer. + runtimeSemrelease(&rw.writerSem, false, 0) + } + } + if RaceEnabled { + RaceEnable() + } +} + +// Lock locks rw for writing. +func (rw *DowngradableRWMutex) Lock() { + if RaceEnabled { + RaceDisable() + } + // First, resolve competition with other writers. + rw.w.Lock() + // Announce to readers there is a pending writer. + r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders + // Wait for active readers. + if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 { + runtimeSemacquire(&rw.writerSem) + } + if RaceEnabled { + RaceEnable() + RaceAcquire(unsafe.Pointer(&rw.writerSem)) + } +} + +// Unlock unlocks rw for writing. +func (rw *DowngradableRWMutex) Unlock() { + if RaceEnabled { + RaceRelease(unsafe.Pointer(&rw.writerSem)) + RaceRelease(unsafe.Pointer(&rw.readerSem)) + RaceDisable() + } + // Announce to readers there is no active writer. + r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders) + if r >= rwmutexMaxReaders { + panic("Unlock of unlocked DowngradableRWMutex") + } + // Unblock blocked readers, if any. + for i := 0; i < int(r); i++ { + runtimeSemrelease(&rw.readerSem, false, 0) + } + // Allow other writers to proceed. + rw.w.Unlock() + if RaceEnabled { + RaceEnable() + } +} + +// DowngradeLock atomically unlocks rw for writing and locks it for reading. +func (rw *DowngradableRWMutex) DowngradeLock() { + if RaceEnabled { + RaceRelease(unsafe.Pointer(&rw.readerSem)) + RaceDisable() + } + // Announce to readers there is no active writer and one additional reader. + r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders+1) + if r >= rwmutexMaxReaders+1 { + panic("DowngradeLock of unlocked DowngradableRWMutex") + } + // Unblock blocked readers, if any. Note that this loop starts as 1 since r + // includes this goroutine. + for i := 1; i < int(r); i++ { + runtimeSemrelease(&rw.readerSem, false, 0) + } + // Allow other writers to proceed to rw.w.Lock(). Note that they will still + // block on rw.writerSem since at least this reader exists, such that + // DowngradeLock() is atomic with the previous write lock. + rw.w.Unlock() + if RaceEnabled { + RaceEnable() + } +} diff --git a/pkg/syncutil/memmove_unsafe.go b/pkg/syncutil/memmove_unsafe.go new file mode 100644 index 000000000..348675baa --- /dev/null +++ b/pkg/syncutil/memmove_unsafe.go @@ -0,0 +1,28 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.12 +// +build !go1.15 + +// Check go:linkname function signatures when updating Go version. + +package syncutil + +import ( + "unsafe" +) + +//go:linkname memmove runtime.memmove +//go:noescape +func memmove(to, from unsafe.Pointer, n uintptr) + +// Memmove is exported for SeqAtomicLoad/SeqAtomicTryLoad, which can't +// define it because go_generics can't update the go:linkname annotation. +// Furthermore, go:linkname silently doesn't work if the local name is exported +// (this is of course undocumented), which is why this indirection is +// necessary. +func Memmove(to, from unsafe.Pointer, n uintptr) { + memmove(to, from, n) +} diff --git a/pkg/syncutil/norace_unsafe.go b/pkg/syncutil/norace_unsafe.go new file mode 100644 index 000000000..0a0a9deda --- /dev/null +++ b/pkg/syncutil/norace_unsafe.go @@ -0,0 +1,35 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !race + +package syncutil + +import ( + "unsafe" +) + +// RaceEnabled is true if the Go data race detector is enabled. +const RaceEnabled = false + +// RaceDisable has the same semantics as runtime.RaceDisable. +func RaceDisable() { +} + +// RaceEnable has the same semantics as runtime.RaceEnable. +func RaceEnable() { +} + +// RaceAcquire has the same semantics as runtime.RaceAcquire. +func RaceAcquire(addr unsafe.Pointer) { +} + +// RaceRelease has the same semantics as runtime.RaceRelease. +func RaceRelease(addr unsafe.Pointer) { +} + +// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. +func RaceReleaseMerge(addr unsafe.Pointer) { +} diff --git a/pkg/syncutil/race_unsafe.go b/pkg/syncutil/race_unsafe.go new file mode 100644 index 000000000..206067ec1 --- /dev/null +++ b/pkg/syncutil/race_unsafe.go @@ -0,0 +1,41 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build race + +package syncutil + +import ( + "runtime" + "unsafe" +) + +// RaceEnabled is true if the Go data race detector is enabled. +const RaceEnabled = true + +// RaceDisable has the same semantics as runtime.RaceDisable. +func RaceDisable() { + runtime.RaceDisable() +} + +// RaceEnable has the same semantics as runtime.RaceEnable. +func RaceEnable() { + runtime.RaceEnable() +} + +// RaceAcquire has the same semantics as runtime.RaceAcquire. +func RaceAcquire(addr unsafe.Pointer) { + runtime.RaceAcquire(addr) +} + +// RaceRelease has the same semantics as runtime.RaceRelease. +func RaceRelease(addr unsafe.Pointer) { + runtime.RaceRelease(addr) +} + +// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. +func RaceReleaseMerge(addr unsafe.Pointer) { + runtime.RaceReleaseMerge(addr) +} diff --git a/pkg/syncutil/seqatomic_unsafe.go b/pkg/syncutil/seqatomic_unsafe.go new file mode 100644 index 000000000..cb6d2eb22 --- /dev/null +++ b/pkg/syncutil/seqatomic_unsafe.go @@ -0,0 +1,72 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package template doesn't exist. This file must be instantiated using the +// go_template_instance rule in tools/go_generics/defs.bzl. +package template + +import ( + "fmt" + "reflect" + "strings" + "unsafe" + + "gvisor.dev/gvisor/pkg/syncutil" +) + +// Value is a required type parameter. +// +// Value must not contain any pointers, including interface objects, function +// objects, slices, maps, channels, unsafe.Pointer, and arrays or structs +// containing any of the above. An init() function will panic if this property +// does not hold. +type Value struct{} + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in sc. +func SeqAtomicLoad(sc *syncutil.SeqCount, ptr *Value) Value { + // This function doesn't use SeqAtomicTryLoad because doing so is + // measurably, significantly (~20%) slower; Go is awful at inlining. + var val Value + for { + epoch := sc.BeginRead() + if syncutil.RaceEnabled { + // runtime.RaceDisable() doesn't actually stop the race detector, + // so it can't help us here. Instead, call runtime.memmove + // directly, which is not instrumented by the race detector. + syncutil.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + // This is ~40% faster for short reads than going through memmove. + val = *ptr + } + if sc.ReadOk(epoch) { + break + } + } + return val +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read +// would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +func SeqAtomicTryLoad(sc *syncutil.SeqCount, epoch syncutil.SeqCountEpoch, ptr *Value) (Value, bool) { + var val Value + if syncutil.RaceEnabled { + syncutil.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + val = *ptr + } + return val, sc.ReadOk(epoch) +} + +func init() { + var val Value + typ := reflect.TypeOf(val) + name := typ.Name() + if ptrs := syncutil.PointersInType(typ, name); len(ptrs) != 0 { + panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n"))) + } +} diff --git a/pkg/syncutil/seqatomictest/BUILD b/pkg/syncutil/seqatomictest/BUILD new file mode 100644 index 000000000..ba18f3238 --- /dev/null +++ b/pkg/syncutil/seqatomictest/BUILD @@ -0,0 +1,35 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "seqatomic_int", + out = "seqatomic_int_unsafe.go", + package = "seqatomic", + suffix = "Int", + template = "//pkg/syncutil:generic_seqatomic", + types = { + "Value": "int", + }, +) + +go_library( + name = "seqatomic", + srcs = ["seqatomic_int_unsafe.go"], + importpath = "gvisor.dev/gvisor/pkg/syncutil/seqatomic", + deps = [ + "//pkg/syncutil", + ], +) + +go_test( + name = "seqatomic_test", + size = "small", + srcs = ["seqatomic_test.go"], + embed = [":seqatomic"], + deps = [ + "//pkg/syncutil", + ], +) diff --git a/pkg/syncutil/seqatomictest/seqatomic_test.go b/pkg/syncutil/seqatomictest/seqatomic_test.go new file mode 100644 index 000000000..b0db44999 --- /dev/null +++ b/pkg/syncutil/seqatomictest/seqatomic_test.go @@ -0,0 +1,132 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seqatomic + +import ( + "sync/atomic" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/syncutil" +) + +func TestSeqAtomicLoadUncontended(t *testing.T) { + var seq syncutil.SeqCount + const want = 1 + data := want + if got := SeqAtomicLoadInt(&seq, &data); got != want { + t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } +} + +func TestSeqAtomicLoadAfterWrite(t *testing.T) { + var seq syncutil.SeqCount + var data int + const want = 1 + seq.BeginWrite() + data = want + seq.EndWrite() + if got := SeqAtomicLoadInt(&seq, &data); got != want { + t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } +} + +func TestSeqAtomicLoadDuringWrite(t *testing.T) { + var seq syncutil.SeqCount + var data int + const want = 1 + seq.BeginWrite() + go func() { + time.Sleep(time.Second) + data = want + seq.EndWrite() + }() + if got := SeqAtomicLoadInt(&seq, &data); got != want { + t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } +} + +func TestSeqAtomicTryLoadUncontended(t *testing.T) { + var seq syncutil.SeqCount + const want = 1 + data := want + epoch := seq.BeginRead() + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { + t.Errorf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) + } +} + +func TestSeqAtomicTryLoadDuringWrite(t *testing.T) { + var seq syncutil.SeqCount + var data int + epoch := seq.BeginRead() + seq.BeginWrite() + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { + t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) + } + seq.EndWrite() +} + +func TestSeqAtomicTryLoadAfterWrite(t *testing.T) { + var seq syncutil.SeqCount + var data int + epoch := seq.BeginRead() + seq.BeginWrite() + seq.EndWrite() + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { + t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) + } +} + +func BenchmarkSeqAtomicLoadIntUncontended(b *testing.B) { + var seq syncutil.SeqCount + const want = 42 + data := want + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if got := SeqAtomicLoadInt(&seq, &data); got != want { + b.Fatalf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } + } + }) +} + +func BenchmarkSeqAtomicTryLoadIntUncontended(b *testing.B) { + var seq syncutil.SeqCount + const want = 42 + data := want + b.RunParallel(func(pb *testing.PB) { + epoch := seq.BeginRead() + for pb.Next() { + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { + b.Fatalf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) + } + } + }) +} + +// For comparison: +func BenchmarkAtomicValueLoadIntUncontended(b *testing.B) { + var a atomic.Value + const want = 42 + a.Store(int(want)) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if got := a.Load().(int); got != want { + b.Fatalf("atomic.Value.Load: got %v, wanted %v", got, want) + } + } + }) +} diff --git a/pkg/syncutil/seqcount.go b/pkg/syncutil/seqcount.go new file mode 100644 index 000000000..11d8dbfaa --- /dev/null +++ b/pkg/syncutil/seqcount.go @@ -0,0 +1,149 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syncutil + +import ( + "fmt" + "reflect" + "runtime" + "sync/atomic" +) + +// SeqCount is a synchronization primitive for optimistic reader/writer +// synchronization in cases where readers can work with stale data and +// therefore do not need to block writers. +// +// Compared to sync/atomic.Value: +// +// - Mutation of SeqCount-protected data does not require memory allocation, +// whereas atomic.Value generally does. This is a significant advantage when +// writes are common. +// +// - Atomic reads of SeqCount-protected data require copying. This is a +// disadvantage when atomic reads are common. +// +// - SeqCount may be more flexible: correct use of SeqCount.ReadOk allows other +// operations to be made atomic with reads of SeqCount-protected data. +// +// - SeqCount may be less flexible: as of this writing, SeqCount-protected data +// cannot include pointers. +// +// - SeqCount is more cumbersome to use; atomic reads of SeqCount-protected +// data require instantiating function templates using go_generics (see +// seqatomic.go). +type SeqCount struct { + // epoch is incremented by BeginWrite and EndWrite, such that epoch is odd + // if a writer critical section is active, and a read from data protected + // by this SeqCount is atomic iff epoch is the same even value before and + // after the read. + epoch uint32 +} + +// SeqCountEpoch tracks writer critical sections in a SeqCount. +type SeqCountEpoch struct { + val uint32 +} + +// We assume that: +// +// - All functions in sync/atomic that perform a memory read are at least a +// read fence: memory reads before calls to such functions cannot be reordered +// after the call, and memory reads after calls to such functions cannot be +// reordered before the call, even if those reads do not use sync/atomic. +// +// - All functions in sync/atomic that perform a memory write are at least a +// write fence: memory writes before calls to such functions cannot be +// reordered after the call, and memory writes after calls to such functions +// cannot be reordered before the call, even if those writes do not use +// sync/atomic. +// +// As of this writing, the Go memory model completely fails to describe +// sync/atomic, but these properties are implied by +// https://groups.google.com/forum/#!topic/golang-nuts/7EnEhM3U7B8. + +// BeginRead indicates the beginning of a reader critical section. Reader +// critical sections DO NOT BLOCK writer critical sections, so operations in a +// reader critical section MAY RACE with writer critical sections. Races are +// detected by ReadOk at the end of the reader critical section. Thus, the +// low-level structure of readers is generally: +// +// for { +// epoch := seq.BeginRead() +// // do something idempotent with seq-protected data +// if seq.ReadOk(epoch) { +// break +// } +// } +// +// However, since reader critical sections may race with writer critical +// sections, the Go race detector will (accurately) flag data races in readers +// using this pattern. Most users of SeqCount will need to use the +// SeqAtomicLoad function template in seqatomic.go. +func (s *SeqCount) BeginRead() SeqCountEpoch { + epoch := atomic.LoadUint32(&s.epoch) + for epoch&1 != 0 { + runtime.Gosched() + epoch = atomic.LoadUint32(&s.epoch) + } + return SeqCountEpoch{epoch} +} + +// ReadOk returns true if the reader critical section initiated by a previous +// call to BeginRead() that returned epoch did not race with any writer critical +// sections. +// +// ReadOk may be called any number of times during a reader critical section. +// Reader critical sections do not need to be explicitly terminated; the last +// call to ReadOk is implicitly the end of the reader critical section. +func (s *SeqCount) ReadOk(epoch SeqCountEpoch) bool { + return atomic.LoadUint32(&s.epoch) == epoch.val +} + +// BeginWrite indicates the beginning of a writer critical section. +// +// SeqCount does not support concurrent writer critical sections; clients with +// concurrent writers must synchronize them using e.g. sync.Mutex. +func (s *SeqCount) BeginWrite() { + if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 == 0 { + panic("SeqCount.BeginWrite during writer critical section") + } +} + +// EndWrite ends the effect of a preceding BeginWrite. +func (s *SeqCount) EndWrite() { + if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 != 0 { + panic("SeqCount.EndWrite outside writer critical section") + } +} + +// PointersInType returns a list of pointers reachable from values named +// valName of the given type. +// +// PointersInType is not exhaustive, but it is guaranteed that if typ contains +// at least one pointer, then PointersInTypeOf returns a non-empty list. +func PointersInType(typ reflect.Type, valName string) []string { + switch kind := typ.Kind(); kind { + case reflect.Bool, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + return nil + + case reflect.Chan, reflect.Func, reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice, reflect.String, reflect.UnsafePointer: + return []string{valName} + + case reflect.Array: + return PointersInType(typ.Elem(), valName+"[]") + + case reflect.Struct: + var ptrs []string + for i, n := 0, typ.NumField(); i < n; i++ { + field := typ.Field(i) + ptrs = append(ptrs, PointersInType(field.Type, fmt.Sprintf("%s.%s", valName, field.Name))...) + } + return ptrs + + default: + return []string{fmt.Sprintf("%s (of type %s with unknown kind %s)", valName, typ, kind)} + } +} diff --git a/pkg/syncutil/seqcount_test.go b/pkg/syncutil/seqcount_test.go new file mode 100644 index 000000000..14d6aedea --- /dev/null +++ b/pkg/syncutil/seqcount_test.go @@ -0,0 +1,153 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syncutil + +import ( + "reflect" + "testing" + "time" +) + +func TestSeqCountWriteUncontended(t *testing.T) { + var seq SeqCount + seq.BeginWrite() + seq.EndWrite() +} + +func TestSeqCountReadUncontended(t *testing.T) { + var seq SeqCount + epoch := seq.BeginRead() + if !seq.ReadOk(epoch) { + t.Errorf("ReadOk: got false, wanted true") + } +} + +func TestSeqCountBeginReadAfterWrite(t *testing.T) { + var seq SeqCount + var data int32 + const want = 1 + seq.BeginWrite() + data = want + seq.EndWrite() + epoch := seq.BeginRead() + if data != want { + t.Errorf("Reader: got %v, wanted %v", data, want) + } + if !seq.ReadOk(epoch) { + t.Errorf("ReadOk: got false, wanted true") + } +} + +func TestSeqCountBeginReadDuringWrite(t *testing.T) { + var seq SeqCount + var data int + const want = 1 + seq.BeginWrite() + go func() { + time.Sleep(time.Second) + data = want + seq.EndWrite() + }() + epoch := seq.BeginRead() + if data != want { + t.Errorf("Reader: got %v, wanted %v", data, want) + } + if !seq.ReadOk(epoch) { + t.Errorf("ReadOk: got false, wanted true") + } +} + +func TestSeqCountReadOkAfterWrite(t *testing.T) { + var seq SeqCount + epoch := seq.BeginRead() + seq.BeginWrite() + seq.EndWrite() + if seq.ReadOk(epoch) { + t.Errorf("ReadOk: got true, wanted false") + } +} + +func TestSeqCountReadOkDuringWrite(t *testing.T) { + var seq SeqCount + epoch := seq.BeginRead() + seq.BeginWrite() + if seq.ReadOk(epoch) { + t.Errorf("ReadOk: got true, wanted false") + } + seq.EndWrite() +} + +func BenchmarkSeqCountWriteUncontended(b *testing.B) { + var seq SeqCount + for i := 0; i < b.N; i++ { + seq.BeginWrite() + seq.EndWrite() + } +} + +func BenchmarkSeqCountReadUncontended(b *testing.B) { + var seq SeqCount + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + epoch := seq.BeginRead() + if !seq.ReadOk(epoch) { + b.Fatalf("ReadOk: got false, wanted true") + } + } + }) +} + +func TestPointersInType(t *testing.T) { + for _, test := range []struct { + name string // used for both test and value name + val interface{} + ptrs []string + }{ + { + name: "EmptyStruct", + val: struct{}{}, + }, + { + name: "Int", + val: int(0), + }, + { + name: "MixedStruct", + val: struct { + b bool + I int + ExportedPtr *struct{} + unexportedPtr *struct{} + arr [2]int + ptrArr [2]*int + nestedStruct struct { + nestedNonptr int + nestedPtr *int + } + structArr [1]struct { + nonptr int + ptr *int + } + }{}, + ptrs: []string{ + "MixedStruct.ExportedPtr", + "MixedStruct.unexportedPtr", + "MixedStruct.ptrArr[]", + "MixedStruct.nestedStruct.nestedPtr", + "MixedStruct.structArr[].ptr", + }, + }, + } { + t.Run(test.name, func(t *testing.T) { + typ := reflect.TypeOf(test.val) + ptrs := PointersInType(typ, test.name) + t.Logf("Found pointers: %v", ptrs) + if (len(ptrs) != 0 || len(test.ptrs) != 0) && !reflect.DeepEqual(ptrs, test.ptrs) { + t.Errorf("Got %v, wanted %v", ptrs, test.ptrs) + } + }) + } +} diff --git a/pkg/syncutil/syncutil.go b/pkg/syncutil/syncutil.go new file mode 100644 index 000000000..66e750d06 --- /dev/null +++ b/pkg/syncutil/syncutil.go @@ -0,0 +1,7 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package syncutil provides synchronization primitives. +package syncutil diff --git a/test/syscalls/linux/accept_bind.cc b/test/syscalls/linux/accept_bind.cc index 427c42ede..e08c578f0 100644 --- a/test/syscalls/linux/accept_bind.cc +++ b/test/syscalls/linux/accept_bind.cc @@ -14,8 +14,10 @@ #include #include + #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/accept_bind_stream.cc b/test/syscalls/linux/accept_bind_stream.cc index 7bcd91e9e..4857f160b 100644 --- a/test/syscalls/linux/accept_bind_stream.cc +++ b/test/syscalls/linux/accept_bind_stream.cc @@ -14,8 +14,10 @@ #include #include + #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/chmod.cc b/test/syscalls/linux/chmod.cc index 7e918b9b2..a06b5cfd6 100644 --- a/test/syscalls/linux/chmod.cc +++ b/test/syscalls/linux/chmod.cc @@ -16,6 +16,7 @@ #include #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc index de1611c21..04bc2d7b9 100644 --- a/test/syscalls/linux/chroot.cc +++ b/test/syscalls/linux/chroot.cc @@ -19,6 +19,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/clock_gettime.cc b/test/syscalls/linux/clock_gettime.cc index c9e3ed6b2..2aa91691e 100644 --- a/test/syscalls/linux/clock_gettime.cc +++ b/test/syscalls/linux/clock_gettime.cc @@ -14,6 +14,7 @@ #include #include + #include #include #include diff --git a/test/syscalls/linux/concurrency.cc b/test/syscalls/linux/concurrency.cc index 4e0a13f8b..00b96b34a 100644 --- a/test/syscalls/linux/concurrency.cc +++ b/test/syscalls/linux/concurrency.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/exec_binary.cc b/test/syscalls/linux/exec_binary.cc index 0a3931e5a..736452b0c 100644 --- a/test/syscalls/linux/exec_binary.cc +++ b/test/syscalls/linux/exec_binary.cc @@ -20,6 +20,7 @@ #include #include #include + #include #include #include diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h index 4d155b618..4e048320e 100644 --- a/test/syscalls/linux/file_base.h +++ b/test/syscalls/linux/file_base.h @@ -27,6 +27,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc index b4a91455d..3ecb8db8e 100644 --- a/test/syscalls/linux/flock.cc +++ b/test/syscalls/linux/flock.cc @@ -14,6 +14,7 @@ #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index dd6e1a422..371890110 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -20,6 +20,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc index fe9cfafe8..ad2dbacb8 100644 --- a/test/syscalls/linux/getdents.cc +++ b/test/syscalls/linux/getdents.cc @@ -23,6 +23,7 @@ #include #include #include + #include #include #include diff --git a/test/syscalls/linux/ip_socket_test_util.cc b/test/syscalls/linux/ip_socket_test_util.cc index 57e99596f..8398fc95f 100644 --- a/test/syscalls/linux/ip_socket_test_util.cc +++ b/test/syscalls/linux/ip_socket_test_util.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "test/syscalls/linux/ip_socket_test_util.h" + #include #include #include #include -#include -#include "test/syscalls/linux/ip_socket_test_util.h" +#include namespace gvisor { namespace testing { diff --git a/test/syscalls/linux/memory_accounting.cc b/test/syscalls/linux/memory_accounting.cc index ff2f49863..94aea4077 100644 --- a/test/syscalls/linux/memory_accounting.cc +++ b/test/syscalls/linux/memory_accounting.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/mlock.cc b/test/syscalls/linux/mlock.cc index 283c21ed3..620b4f8b4 100644 --- a/test/syscalls/linux/mlock.cc +++ b/test/syscalls/linux/mlock.cc @@ -16,6 +16,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index a112316e9..6f2639d8a 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -28,6 +28,7 @@ #include #include #include + #include #include "gmock/gmock.h" diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc index e35be3cab..a3e9745cf 100644 --- a/test/syscalls/linux/mount.cc +++ b/test/syscalls/linux/mount.cc @@ -18,6 +18,7 @@ #include #include #include + #include #include #include diff --git a/test/syscalls/linux/read.cc b/test/syscalls/linux/read.cc index 4430fa3c2..2633ba31b 100644 --- a/test/syscalls/linux/read.cc +++ b/test/syscalls/linux/read.cc @@ -14,6 +14,7 @@ #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/rename.cc b/test/syscalls/linux/rename.cc index 5b474ff32..833c0dc4f 100644 --- a/test/syscalls/linux/rename.cc +++ b/test/syscalls/linux/rename.cc @@ -14,6 +14,7 @@ #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/seccomp.cc b/test/syscalls/linux/seccomp.cc index e77586852..7e41fe7d8 100644 --- a/test/syscalls/linux/seccomp.cc +++ b/test/syscalls/linux/seccomp.cc @@ -25,6 +25,7 @@ #include #include #include + #include #include "gmock/gmock.h" diff --git a/test/syscalls/linux/select.cc b/test/syscalls/linux/select.cc index e06a2666d..424e2a67f 100644 --- a/test/syscalls/linux/select.cc +++ b/test/syscalls/linux/select.cc @@ -16,6 +16,7 @@ #include #include #include + #include #include #include diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc index eb7a3966f..7ba752599 100644 --- a/test/syscalls/linux/shm.cc +++ b/test/syscalls/linux/shm.cc @@ -13,7 +13,6 @@ // limitations under the License. #include - #include #include #include diff --git a/test/syscalls/linux/socket_blocking.cc b/test/syscalls/linux/socket_blocking.cc index d7ce57566..7e88aa2d9 100644 --- a/test/syscalls/linux/socket_blocking.cc +++ b/test/syscalls/linux/socket_blocking.cc @@ -17,6 +17,7 @@ #include #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/socket_ip_loopback_blocking.cc b/test/syscalls/linux/socket_ip_loopback_blocking.cc index d7fc9715b..e58eedaba 100644 --- a/test/syscalls/linux/socket_ip_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_loopback_blocking.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "test/syscalls/linux/ip_socket_test_util.h" diff --git a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc index 0dc274e2d..d11f7cc23 100644 --- a/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc +++ b/test/syscalls/linux/socket_ip_tcp_generic_loopback.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "test/syscalls/linux/ip_socket_test_util.h" diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc index cd3ad97d0..fcd20102f 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback_blocking.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "test/syscalls/linux/ip_socket_test_util.h" diff --git a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc index 1acdecc17..63a05b799 100644 --- a/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc +++ b/test/syscalls/linux/socket_ip_tcp_loopback_nonblock.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "test/syscalls/linux/ip_socket_test_util.h" diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc index 3c3712b50..80f12b0a9 100644 --- a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc +++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.cc @@ -18,6 +18,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc index 92f03e045..3ac790873 100644 --- a/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc +++ b/test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking_test.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h" + #include #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_ipv4_tcp_unbound_external_networking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc index 9d4e1ab97..8f47952b0 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound_external_networking_test.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h" + #include #include "test/syscalls/linux/ip_socket_test_util.h" -#include "test/syscalls/linux/socket_ipv4_udp_unbound_external_networking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc index 5f05bab10..723f5d728 100644 --- a/test/syscalls/linux/socket_netlink_util.cc +++ b/test/syscalls/linux/socket_netlink_util.cc @@ -12,15 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include "test/syscalls/linux/socket_netlink_util.h" #include #include +#include #include #include "absl/strings/str_cat.h" -#include "test/syscalls/linux/socket_netlink_util.h" #include "test/syscalls/linux/socket_test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/socket_unix_blocking_local.cc b/test/syscalls/linux/socket_unix_blocking_local.cc index 1994139e6..6f84221b2 100644 --- a/test/syscalls/linux/socket_unix_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_blocking_local.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_blocking.h" - #include +#include "test/syscalls/linux/socket_blocking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_unix_dgram.cc b/test/syscalls/linux/socket_unix_dgram.cc index 3245cf7c9..af0df4fb4 100644 --- a/test/syscalls/linux/socket_unix_dgram.cc +++ b/test/syscalls/linux/socket_unix_dgram.cc @@ -16,6 +16,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc index cd4fba25c..2db8b68d3 100644 --- a/test/syscalls/linux/socket_unix_dgram_non_blocking.cc +++ b/test/syscalls/linux/socket_unix_dgram_non_blocking.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc index da762cd83..8855d5001 100644 --- a/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_non_stream_blocking_local.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_non_stream_blocking.h" - #include +#include "test/syscalls/linux/socket_non_stream_blocking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_unix_seqpacket.cc b/test/syscalls/linux/socket_unix_seqpacket.cc index 60fa9e38a..84d3a569e 100644 --- a/test/syscalls/linux/socket_unix_seqpacket.cc +++ b/test/syscalls/linux/socket_unix_seqpacket.cc @@ -16,6 +16,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_stream_blocking_local.cc b/test/syscalls/linux/socket_unix_stream_blocking_local.cc index fa0a9d367..08e579ba7 100644 --- a/test/syscalls/linux/socket_unix_stream_blocking_local.cc +++ b/test/syscalls/linux/socket_unix_stream_blocking_local.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_stream_blocking.h" - #include +#include "test/syscalls/linux/socket_stream_blocking.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc index ec777c59f..1936aa135 100644 --- a/test/syscalls/linux/socket_unix_stream_nonblock_local.cc +++ b/test/syscalls/linux/socket_unix_stream_nonblock_local.cc @@ -11,10 +11,9 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "test/syscalls/linux/socket_stream_nonblock.h" - #include +#include "test/syscalls/linux/socket_stream_nonblock.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include "test/util/test_util.h" diff --git a/test/syscalls/linux/socket_unix_unbound_abstract.cc b/test/syscalls/linux/socket_unix_unbound_abstract.cc index 7f5816ace..8b1762000 100644 --- a/test/syscalls/linux/socket_unix_unbound_abstract.cc +++ b/test/syscalls/linux/socket_unix_unbound_abstract.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_unbound_filesystem.cc b/test/syscalls/linux/socket_unix_unbound_filesystem.cc index b14f24086..cab912152 100644 --- a/test/syscalls/linux/socket_unix_unbound_filesystem.cc +++ b/test/syscalls/linux/socket_unix_unbound_filesystem.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc index 50ffa1d04..cb99030f5 100644 --- a/test/syscalls/linux/socket_unix_unbound_seqpacket.cc +++ b/test/syscalls/linux/socket_unix_unbound_seqpacket.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/socket_unix_unbound_stream.cc b/test/syscalls/linux/socket_unix_unbound_stream.cc index 344918c34..f185dded3 100644 --- a/test/syscalls/linux/socket_unix_unbound_stream.cc +++ b/test/syscalls/linux/socket_unix_unbound_stream.cc @@ -14,6 +14,7 @@ #include #include + #include "gtest/gtest.h" #include "test/syscalls/linux/socket_test_util.h" #include "test/syscalls/linux/unix_domain_socket_test_util.h" diff --git a/test/syscalls/linux/sync.cc b/test/syscalls/linux/sync.cc index fe479390d..8aa2525a9 100644 --- a/test/syscalls/linux/sync.cc +++ b/test/syscalls/linux/sync.cc @@ -14,10 +14,9 @@ #include #include -#include - #include #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/truncate.cc b/test/syscalls/linux/truncate.cc index e5cc5d97c..c988c6380 100644 --- a/test/syscalls/linux/truncate.cc +++ b/test/syscalls/linux/truncate.cc @@ -19,6 +19,7 @@ #include #include #include + #include #include diff --git a/test/syscalls/linux/unix_domain_socket_test_util.cc b/test/syscalls/linux/unix_domain_socket_test_util.cc index 7fb9eed8d..b05ab2900 100644 --- a/test/syscalls/linux/unix_domain_socket_test_util.cc +++ b/test/syscalls/linux/unix_domain_socket_test_util.cc @@ -15,6 +15,7 @@ #include "test/syscalls/linux/unix_domain_socket_test_util.h" #include + #include #include "gtest/gtest.h" diff --git a/test/syscalls/linux/unix_domain_socket_test_util.h b/test/syscalls/linux/unix_domain_socket_test_util.h index 5eca0b7f0..b8073db17 100644 --- a/test/syscalls/linux/unix_domain_socket_test_util.h +++ b/test/syscalls/linux/unix_domain_socket_test_util.h @@ -16,6 +16,7 @@ #define GVISOR_TEST_SYSCALLS_UNIX_DOMAIN_SOCKET_TEST_UTIL_H_ #include + #include "test/syscalls/linux/socket_test_util.h" namespace gvisor { diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc index 80716859a..12b925a51 100644 --- a/test/syscalls/linux/utimes.cc +++ b/test/syscalls/linux/utimes.cc @@ -20,6 +20,7 @@ #include #include #include + #include #include "absl/time/time.h" diff --git a/test/syscalls/linux/vdso_clock_gettime.cc b/test/syscalls/linux/vdso_clock_gettime.cc index 40c0014b9..ce1899f45 100644 --- a/test/syscalls/linux/vdso_clock_gettime.cc +++ b/test/syscalls/linux/vdso_clock_gettime.cc @@ -17,6 +17,7 @@ #include #include #include + #include #include #include diff --git a/test/util/fs_util_test.cc b/test/util/fs_util_test.cc index 2a200320a..657b6a46e 100644 --- a/test/util/fs_util_test.cc +++ b/test/util/fs_util_test.cc @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "test/util/fs_util.h" + #include + #include #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "test/util/fs_util.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" diff --git a/test/util/mount_util.h b/test/util/mount_util.h index 38ec6c8a1..484de560e 100644 --- a/test/util/mount_util.h +++ b/test/util/mount_util.h @@ -17,6 +17,7 @@ #include #include + #include #include diff --git a/test/util/posix_error_test.cc b/test/util/posix_error_test.cc index d67270842..bf9465abb 100644 --- a/test/util/posix_error_test.cc +++ b/test/util/posix_error_test.cc @@ -15,6 +15,7 @@ #include "test/util/posix_error.h" #include + #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/test/util/rlimit_util.cc b/test/util/rlimit_util.cc index 684253f78..d7bfc1606 100644 --- a/test/util/rlimit_util.cc +++ b/test/util/rlimit_util.cc @@ -15,6 +15,7 @@ #include "test/util/rlimit_util.h" #include + #include #include "test/util/cleanup.h" diff --git a/test/util/signal_util.cc b/test/util/signal_util.cc index 26738864f..5ee95ee80 100644 --- a/test/util/signal_util.cc +++ b/test/util/signal_util.cc @@ -15,6 +15,7 @@ #include "test/util/signal_util.h" #include + #include #include "gtest/gtest.h" diff --git a/test/util/signal_util.h b/test/util/signal_util.h index 7fd2af015..bcf85c337 100644 --- a/test/util/signal_util.h +++ b/test/util/signal_util.h @@ -18,6 +18,7 @@ #include #include #include + #include #include "gmock/gmock.h" diff --git a/test/util/temp_path.h b/test/util/temp_path.h index 92d669503..9e5ac11f4 100644 --- a/test/util/temp_path.h +++ b/test/util/temp_path.h @@ -16,6 +16,7 @@ #define GVISOR_TEST_UTIL_TEMP_PATH_H_ #include + #include #include diff --git a/test/util/test_util_test.cc b/test/util/test_util_test.cc index b7300d9e5..f42100374 100644 --- a/test/util/test_util_test.cc +++ b/test/util/test_util_test.cc @@ -15,6 +15,7 @@ #include "test/util/test_util.h" #include + #include #include "gmock/gmock.h" diff --git a/third_party/gvsync/BUILD b/third_party/gvsync/BUILD deleted file mode 100644 index 7d6d59c48..000000000 --- a/third_party/gvsync/BUILD +++ /dev/null @@ -1,53 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template") - -package( - default_visibility = ["//:sandbox"], - licenses = ["notice"], -) - -exports_files(["LICENSE"]) - -go_template( - name = "generic_atomicptr", - srcs = ["atomicptr_unsafe.go"], - types = [ - "Value", - ], -) - -go_template( - name = "generic_seqatomic", - srcs = ["seqatomic_unsafe.go"], - types = [ - "Value", - ], - deps = [ - ":sync", - ], -) - -go_library( - name = "gvsync", - srcs = [ - "downgradable_rwmutex_1_12_unsafe.go", - "downgradable_rwmutex_1_13_unsafe.go", - "downgradable_rwmutex_unsafe.go", - "gvsync.go", - "memmove_unsafe.go", - "norace_unsafe.go", - "race_unsafe.go", - "seqcount.go", - ], - importpath = "gvisor.dev/gvisor/third_party/gvsync", -) - -go_test( - name = "gvsync_test", - size = "small", - srcs = [ - "downgradable_rwmutex_test.go", - "seqcount_test.go", - ], - embed = [":gvsync"], -) diff --git a/third_party/gvsync/LICENSE b/third_party/gvsync/LICENSE deleted file mode 100644 index 6a66aea5e..000000000 --- a/third_party/gvsync/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/gvsync/README.md b/third_party/gvsync/README.md deleted file mode 100644 index fcc7e6f44..000000000 --- a/third_party/gvsync/README.md +++ /dev/null @@ -1,3 +0,0 @@ -This package provides additional synchronization primitives not provided by the -Go stdlib 'sync' package. It is partially derived from the upstream 'sync' -package. diff --git a/third_party/gvsync/atomicptr_unsafe.go b/third_party/gvsync/atomicptr_unsafe.go deleted file mode 100644 index 525c4beed..000000000 --- a/third_party/gvsync/atomicptr_unsafe.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package template doesn't exist. This file must be instantiated using the -// go_template_instance rule in tools/go_generics/defs.bzl. -package template - -import ( - "sync/atomic" - "unsafe" -) - -// Value is a required type parameter. -type Value struct{} - -// An AtomicPtr is a pointer to a value of type Value that can be atomically -// loaded and stored. The zero value of an AtomicPtr represents nil. -// -// Note that copying AtomicPtr by value performs a non-atomic read of the -// stored pointer, which is unsafe if Store() can be called concurrently; in -// this case, do `dst.Store(src.Load())` instead. -// -// +stateify savable -type AtomicPtr struct { - ptr unsafe.Pointer `state:".(*Value)"` -} - -func (p *AtomicPtr) savePtr() *Value { - return p.Load() -} - -func (p *AtomicPtr) loadPtr(v *Value) { - p.Store(v) -} - -// Load returns the value set by the most recent Store. It returns nil if there -// has been no previous call to Store. -func (p *AtomicPtr) Load() *Value { - return (*Value)(atomic.LoadPointer(&p.ptr)) -} - -// Store sets the value returned by Load to x. -func (p *AtomicPtr) Store(x *Value) { - atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) -} diff --git a/third_party/gvsync/atomicptrtest/BUILD b/third_party/gvsync/atomicptrtest/BUILD deleted file mode 100644 index 447ecf96a..000000000 --- a/third_party/gvsync/atomicptrtest/BUILD +++ /dev/null @@ -1,28 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "atomicptr_int", - out = "atomicptr_int_unsafe.go", - package = "atomicptr", - suffix = "Int", - template = "//third_party/gvsync:generic_atomicptr", - types = { - "Value": "int", - }, -) - -go_library( - name = "atomicptr", - srcs = ["atomicptr_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/third_party/gvsync/atomicptr", -) - -go_test( - name = "atomicptr_test", - size = "small", - srcs = ["atomicptr_test.go"], - embed = [":atomicptr"], -) diff --git a/third_party/gvsync/atomicptrtest/atomicptr_test.go b/third_party/gvsync/atomicptrtest/atomicptr_test.go deleted file mode 100644 index 8fdc5112e..000000000 --- a/third_party/gvsync/atomicptrtest/atomicptr_test.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package atomicptr - -import ( - "testing" -) - -func newInt(val int) *int { - return &val -} - -func TestAtomicPtr(t *testing.T) { - var p AtomicPtrInt - if got := p.Load(); got != nil { - t.Errorf("initial value is %p (%v), wanted nil", got, got) - } - want := newInt(42) - p.Store(want) - if got := p.Load(); got != want { - t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) - } - want = newInt(100) - p.Store(want) - if got := p.Load(); got != want { - t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) - } -} diff --git a/third_party/gvsync/downgradable_rwmutex_1_12_unsafe.go b/third_party/gvsync/downgradable_rwmutex_1_12_unsafe.go deleted file mode 100644 index 855b2a2b1..000000000 --- a/third_party/gvsync/downgradable_rwmutex_1_12_unsafe.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.12 -// +build !go1.13 - -// TODO(b/133868570): Delete once Go 1.12 is no longer supported. - -package gvsync - -import _ "unsafe" - -//go:linkname runtimeSemrelease112 sync.runtime_Semrelease -func runtimeSemrelease112(s *uint32, handoff bool) - -func runtimeSemrelease(s *uint32, handoff bool, skipframes int) { - // 'skipframes' is only available starting from 1.13. - runtimeSemrelease112(s, handoff) -} diff --git a/third_party/gvsync/downgradable_rwmutex_1_13_unsafe.go b/third_party/gvsync/downgradable_rwmutex_1_13_unsafe.go deleted file mode 100644 index 3b9346843..000000000 --- a/third_party/gvsync/downgradable_rwmutex_1_13_unsafe.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.13 -// +build !go1.15 - -// Check go:linkname function signatures when updating Go version. - -package gvsync - -import _ "unsafe" - -//go:linkname runtimeSemrelease sync.runtime_Semrelease -func runtimeSemrelease(s *uint32, handoff bool, skipframes int) diff --git a/third_party/gvsync/downgradable_rwmutex_test.go b/third_party/gvsync/downgradable_rwmutex_test.go deleted file mode 100644 index 40c384b8b..000000000 --- a/third_party/gvsync/downgradable_rwmutex_test.go +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// GOMAXPROCS=10 go test - -// Copy/pasted from the standard library's sync/rwmutex_test.go, except for the -// addition of downgradingWriter and the renaming of num_iterations to -// numIterations to shut up Golint. - -package gvsync - -import ( - "fmt" - "runtime" - "sync/atomic" - "testing" -) - -func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { - m.RLock() - clocked <- true - <-cunlock - m.RUnlock() - cdone <- true -} - -func doTestParallelReaders(numReaders, gomaxprocs int) { - runtime.GOMAXPROCS(gomaxprocs) - var m DowngradableRWMutex - clocked := make(chan bool) - cunlock := make(chan bool) - cdone := make(chan bool) - for i := 0; i < numReaders; i++ { - go parallelReader(&m, clocked, cunlock, cdone) - } - // Wait for all parallel RLock()s to succeed. - for i := 0; i < numReaders; i++ { - <-clocked - } - for i := 0; i < numReaders; i++ { - cunlock <- true - } - // Wait for the goroutines to finish. - for i := 0; i < numReaders; i++ { - <-cdone - } -} - -func TestParallelReaders(t *testing.T) { - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) - doTestParallelReaders(1, 4) - doTestParallelReaders(3, 4) - doTestParallelReaders(4, 2) -} - -func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { - for i := 0; i < numIterations; i++ { - rwm.RLock() - n := atomic.AddInt32(activity, 1) - if n < 1 || n >= 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - atomic.AddInt32(activity, -1) - rwm.RUnlock() - } - cdone <- true -} - -func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { - for i := 0; i < numIterations; i++ { - rwm.Lock() - n := atomic.AddInt32(activity, 10000) - if n != 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - atomic.AddInt32(activity, -10000) - rwm.Unlock() - } - cdone <- true -} - -func downgradingWriter(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { - for i := 0; i < numIterations; i++ { - rwm.Lock() - n := atomic.AddInt32(activity, 10000) - if n != 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - atomic.AddInt32(activity, -10000) - rwm.DowngradeLock() - n = atomic.AddInt32(activity, 1) - if n < 1 || n >= 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - n = atomic.AddInt32(activity, -1) - rwm.RUnlock() - } - cdone <- true -} - -func HammerDowngradableRWMutex(gomaxprocs, numReaders, numIterations int) { - runtime.GOMAXPROCS(gomaxprocs) - // Number of active readers + 10000 * number of active writers. - var activity int32 - var rwm DowngradableRWMutex - cdone := make(chan bool) - go writer(&rwm, numIterations, &activity, cdone) - go downgradingWriter(&rwm, numIterations, &activity, cdone) - var i int - for i = 0; i < numReaders/2; i++ { - go reader(&rwm, numIterations, &activity, cdone) - } - go writer(&rwm, numIterations, &activity, cdone) - go downgradingWriter(&rwm, numIterations, &activity, cdone) - for ; i < numReaders; i++ { - go reader(&rwm, numIterations, &activity, cdone) - } - // Wait for the 4 writers and all readers to finish. - for i := 0; i < 4+numReaders; i++ { - <-cdone - } -} - -func TestDowngradableRWMutex(t *testing.T) { - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) - n := 1000 - if testing.Short() { - n = 5 - } - HammerDowngradableRWMutex(1, 1, n) - HammerDowngradableRWMutex(1, 3, n) - HammerDowngradableRWMutex(1, 10, n) - HammerDowngradableRWMutex(4, 1, n) - HammerDowngradableRWMutex(4, 3, n) - HammerDowngradableRWMutex(4, 10, n) - HammerDowngradableRWMutex(10, 1, n) - HammerDowngradableRWMutex(10, 3, n) - HammerDowngradableRWMutex(10, 10, n) - HammerDowngradableRWMutex(10, 5, n) -} diff --git a/third_party/gvsync/downgradable_rwmutex_unsafe.go b/third_party/gvsync/downgradable_rwmutex_unsafe.go deleted file mode 100644 index b7862d185..000000000 --- a/third_party/gvsync/downgradable_rwmutex_unsafe.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.12 -// +build !go1.15 - -// Check go:linkname function signatures when updating Go version. - -// This is mostly copied from the standard library's sync/rwmutex.go. -// -// Happens-before relationships indicated to the race detector: -// - Unlock -> Lock (via writerSem) -// - Unlock -> RLock (via readerSem) -// - RUnlock -> Lock (via writerSem) -// - DowngradeLock -> RLock (via readerSem) - -package gvsync - -import ( - "sync" - "sync/atomic" - "unsafe" -) - -//go:linkname runtimeSemacquire sync.runtime_Semacquire -func runtimeSemacquire(s *uint32) - -// DowngradableRWMutex is identical to sync.RWMutex, but adds the DowngradeLock -// method. -type DowngradableRWMutex struct { - w sync.Mutex // held if there are pending writers - writerSem uint32 // semaphore for writers to wait for completing readers - readerSem uint32 // semaphore for readers to wait for completing writers - readerCount int32 // number of pending readers - readerWait int32 // number of departing readers -} - -const rwmutexMaxReaders = 1 << 30 - -// RLock locks rw for reading. -func (rw *DowngradableRWMutex) RLock() { - if RaceEnabled { - RaceDisable() - } - if atomic.AddInt32(&rw.readerCount, 1) < 0 { - // A writer is pending, wait for it. - runtimeSemacquire(&rw.readerSem) - } - if RaceEnabled { - RaceEnable() - RaceAcquire(unsafe.Pointer(&rw.readerSem)) - } -} - -// RUnlock undoes a single RLock call. -func (rw *DowngradableRWMutex) RUnlock() { - if RaceEnabled { - RaceReleaseMerge(unsafe.Pointer(&rw.writerSem)) - RaceDisable() - } - if r := atomic.AddInt32(&rw.readerCount, -1); r < 0 { - if r+1 == 0 || r+1 == -rwmutexMaxReaders { - panic("RUnlock of unlocked DowngradableRWMutex") - } - // A writer is pending. - if atomic.AddInt32(&rw.readerWait, -1) == 0 { - // The last reader unblocks the writer. - runtimeSemrelease(&rw.writerSem, false, 0) - } - } - if RaceEnabled { - RaceEnable() - } -} - -// Lock locks rw for writing. -func (rw *DowngradableRWMutex) Lock() { - if RaceEnabled { - RaceDisable() - } - // First, resolve competition with other writers. - rw.w.Lock() - // Announce to readers there is a pending writer. - r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders - // Wait for active readers. - if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 { - runtimeSemacquire(&rw.writerSem) - } - if RaceEnabled { - RaceEnable() - RaceAcquire(unsafe.Pointer(&rw.writerSem)) - } -} - -// Unlock unlocks rw for writing. -func (rw *DowngradableRWMutex) Unlock() { - if RaceEnabled { - RaceRelease(unsafe.Pointer(&rw.writerSem)) - RaceRelease(unsafe.Pointer(&rw.readerSem)) - RaceDisable() - } - // Announce to readers there is no active writer. - r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders) - if r >= rwmutexMaxReaders { - panic("Unlock of unlocked DowngradableRWMutex") - } - // Unblock blocked readers, if any. - for i := 0; i < int(r); i++ { - runtimeSemrelease(&rw.readerSem, false, 0) - } - // Allow other writers to proceed. - rw.w.Unlock() - if RaceEnabled { - RaceEnable() - } -} - -// DowngradeLock atomically unlocks rw for writing and locks it for reading. -func (rw *DowngradableRWMutex) DowngradeLock() { - if RaceEnabled { - RaceRelease(unsafe.Pointer(&rw.readerSem)) - RaceDisable() - } - // Announce to readers there is no active writer and one additional reader. - r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders+1) - if r >= rwmutexMaxReaders+1 { - panic("DowngradeLock of unlocked DowngradableRWMutex") - } - // Unblock blocked readers, if any. Note that this loop starts as 1 since r - // includes this goroutine. - for i := 1; i < int(r); i++ { - runtimeSemrelease(&rw.readerSem, false, 0) - } - // Allow other writers to proceed to rw.w.Lock(). Note that they will still - // block on rw.writerSem since at least this reader exists, such that - // DowngradeLock() is atomic with the previous write lock. - rw.w.Unlock() - if RaceEnabled { - RaceEnable() - } -} diff --git a/third_party/gvsync/gvsync.go b/third_party/gvsync/gvsync.go deleted file mode 100644 index 3bbef13c3..000000000 --- a/third_party/gvsync/gvsync.go +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package gvsync provides synchronization primitives. -package gvsync diff --git a/third_party/gvsync/memmove_unsafe.go b/third_party/gvsync/memmove_unsafe.go deleted file mode 100644 index 9dd1d6142..000000000 --- a/third_party/gvsync/memmove_unsafe.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.12 -// +build !go1.15 - -// Check go:linkname function signatures when updating Go version. - -package gvsync - -import ( - "unsafe" -) - -//go:linkname memmove runtime.memmove -//go:noescape -func memmove(to, from unsafe.Pointer, n uintptr) - -// Memmove is exported for SeqAtomicLoad/SeqAtomicTryLoad, which can't -// define it because go_generics can't update the go:linkname annotation. -// Furthermore, go:linkname silently doesn't work if the local name is exported -// (this is of course undocumented), which is why this indirection is -// necessary. -func Memmove(to, from unsafe.Pointer, n uintptr) { - memmove(to, from, n) -} diff --git a/third_party/gvsync/norace_unsafe.go b/third_party/gvsync/norace_unsafe.go deleted file mode 100644 index e3852db8c..000000000 --- a/third_party/gvsync/norace_unsafe.go +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !race - -package gvsync - -import ( - "unsafe" -) - -// RaceEnabled is true if the Go data race detector is enabled. -const RaceEnabled = false - -// RaceDisable has the same semantics as runtime.RaceDisable. -func RaceDisable() { -} - -// RaceEnable has the same semantics as runtime.RaceEnable. -func RaceEnable() { -} - -// RaceAcquire has the same semantics as runtime.RaceAcquire. -func RaceAcquire(addr unsafe.Pointer) { -} - -// RaceRelease has the same semantics as runtime.RaceRelease. -func RaceRelease(addr unsafe.Pointer) { -} - -// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. -func RaceReleaseMerge(addr unsafe.Pointer) { -} diff --git a/third_party/gvsync/race_unsafe.go b/third_party/gvsync/race_unsafe.go deleted file mode 100644 index 13c02a830..000000000 --- a/third_party/gvsync/race_unsafe.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build race - -package gvsync - -import ( - "runtime" - "unsafe" -) - -// RaceEnabled is true if the Go data race detector is enabled. -const RaceEnabled = true - -// RaceDisable has the same semantics as runtime.RaceDisable. -func RaceDisable() { - runtime.RaceDisable() -} - -// RaceEnable has the same semantics as runtime.RaceEnable. -func RaceEnable() { - runtime.RaceEnable() -} - -// RaceAcquire has the same semantics as runtime.RaceAcquire. -func RaceAcquire(addr unsafe.Pointer) { - runtime.RaceAcquire(addr) -} - -// RaceRelease has the same semantics as runtime.RaceRelease. -func RaceRelease(addr unsafe.Pointer) { - runtime.RaceRelease(addr) -} - -// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. -func RaceReleaseMerge(addr unsafe.Pointer) { - runtime.RaceReleaseMerge(addr) -} diff --git a/third_party/gvsync/seqatomic_unsafe.go b/third_party/gvsync/seqatomic_unsafe.go deleted file mode 100644 index 382eeed43..000000000 --- a/third_party/gvsync/seqatomic_unsafe.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package template doesn't exist. This file must be instantiated using the -// go_template_instance rule in tools/go_generics/defs.bzl. -package template - -import ( - "fmt" - "reflect" - "strings" - "unsafe" - - "gvisor.dev/gvisor/third_party/gvsync" -) - -// Value is a required type parameter. -// -// Value must not contain any pointers, including interface objects, function -// objects, slices, maps, channels, unsafe.Pointer, and arrays or structs -// containing any of the above. An init() function will panic if this property -// does not hold. -type Value struct{} - -// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race -// with any writer critical sections in sc. -func SeqAtomicLoad(sc *gvsync.SeqCount, ptr *Value) Value { - // This function doesn't use SeqAtomicTryLoad because doing so is - // measurably, significantly (~20%) slower; Go is awful at inlining. - var val Value - for { - epoch := sc.BeginRead() - if gvsync.RaceEnabled { - // runtime.RaceDisable() doesn't actually stop the race detector, - // so it can't help us here. Instead, call runtime.memmove - // directly, which is not instrumented by the race detector. - gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) - } else { - // This is ~40% faster for short reads than going through memmove. - val = *ptr - } - if sc.ReadOk(epoch) { - break - } - } - return val -} - -// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section -// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read -// would race with a writer critical section, SeqAtomicTryLoad returns -// (unspecified, false). -func SeqAtomicTryLoad(sc *gvsync.SeqCount, epoch gvsync.SeqCountEpoch, ptr *Value) (Value, bool) { - var val Value - if gvsync.RaceEnabled { - gvsync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) - } else { - val = *ptr - } - return val, sc.ReadOk(epoch) -} - -func init() { - var val Value - typ := reflect.TypeOf(val) - name := typ.Name() - if ptrs := gvsync.PointersInType(typ, name); len(ptrs) != 0 { - panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n"))) - } -} diff --git a/third_party/gvsync/seqatomictest/BUILD b/third_party/gvsync/seqatomictest/BUILD deleted file mode 100644 index c858c20c4..000000000 --- a/third_party/gvsync/seqatomictest/BUILD +++ /dev/null @@ -1,34 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "seqatomic_int", - out = "seqatomic_int_unsafe.go", - package = "seqatomic", - suffix = "Int", - template = "//third_party/gvsync:generic_seqatomic", - types = { - "Value": "int", - }, -) - -go_library( - name = "seqatomic", - srcs = ["seqatomic_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/third_party/gvsync/seqatomic", - deps = [ - "//third_party/gvsync", - ], -) - -go_test( - name = "seqatomic_test", - size = "small", - srcs = ["seqatomic_test.go"], - embed = [":seqatomic"], - deps = [ - "//third_party/gvsync", - ], -) diff --git a/third_party/gvsync/seqatomictest/seqatomic_test.go b/third_party/gvsync/seqatomictest/seqatomic_test.go deleted file mode 100644 index a5447f589..000000000 --- a/third_party/gvsync/seqatomictest/seqatomic_test.go +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package seqatomic - -import ( - "sync/atomic" - "testing" - "time" - - "gvisor.dev/gvisor/third_party/gvsync" -) - -func TestSeqAtomicLoadUncontended(t *testing.T) { - var seq gvsync.SeqCount - const want = 1 - data := want - if got := SeqAtomicLoadInt(&seq, &data); got != want { - t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } -} - -func TestSeqAtomicLoadAfterWrite(t *testing.T) { - var seq gvsync.SeqCount - var data int - const want = 1 - seq.BeginWrite() - data = want - seq.EndWrite() - if got := SeqAtomicLoadInt(&seq, &data); got != want { - t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } -} - -func TestSeqAtomicLoadDuringWrite(t *testing.T) { - var seq gvsync.SeqCount - var data int - const want = 1 - seq.BeginWrite() - go func() { - time.Sleep(time.Second) - data = want - seq.EndWrite() - }() - if got := SeqAtomicLoadInt(&seq, &data); got != want { - t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } -} - -func TestSeqAtomicTryLoadUncontended(t *testing.T) { - var seq gvsync.SeqCount - const want = 1 - data := want - epoch := seq.BeginRead() - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { - t.Errorf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) - } -} - -func TestSeqAtomicTryLoadDuringWrite(t *testing.T) { - var seq gvsync.SeqCount - var data int - epoch := seq.BeginRead() - seq.BeginWrite() - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { - t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) - } - seq.EndWrite() -} - -func TestSeqAtomicTryLoadAfterWrite(t *testing.T) { - var seq gvsync.SeqCount - var data int - epoch := seq.BeginRead() - seq.BeginWrite() - seq.EndWrite() - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { - t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) - } -} - -func BenchmarkSeqAtomicLoadIntUncontended(b *testing.B) { - var seq gvsync.SeqCount - const want = 42 - data := want - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - if got := SeqAtomicLoadInt(&seq, &data); got != want { - b.Fatalf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } - } - }) -} - -func BenchmarkSeqAtomicTryLoadIntUncontended(b *testing.B) { - var seq gvsync.SeqCount - const want = 42 - data := want - b.RunParallel(func(pb *testing.PB) { - epoch := seq.BeginRead() - for pb.Next() { - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { - b.Fatalf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) - } - } - }) -} - -// For comparison: -func BenchmarkAtomicValueLoadIntUncontended(b *testing.B) { - var a atomic.Value - const want = 42 - a.Store(int(want)) - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - if got := a.Load().(int); got != want { - b.Fatalf("atomic.Value.Load: got %v, wanted %v", got, want) - } - } - }) -} diff --git a/third_party/gvsync/seqcount.go b/third_party/gvsync/seqcount.go deleted file mode 100644 index 2c9c2c3d6..000000000 --- a/third_party/gvsync/seqcount.go +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package gvsync - -import ( - "fmt" - "reflect" - "runtime" - "sync/atomic" -) - -// SeqCount is a synchronization primitive for optimistic reader/writer -// synchronization in cases where readers can work with stale data and -// therefore do not need to block writers. -// -// Compared to sync/atomic.Value: -// -// - Mutation of SeqCount-protected data does not require memory allocation, -// whereas atomic.Value generally does. This is a significant advantage when -// writes are common. -// -// - Atomic reads of SeqCount-protected data require copying. This is a -// disadvantage when atomic reads are common. -// -// - SeqCount may be more flexible: correct use of SeqCount.ReadOk allows other -// operations to be made atomic with reads of SeqCount-protected data. -// -// - SeqCount may be less flexible: as of this writing, SeqCount-protected data -// cannot include pointers. -// -// - SeqCount is more cumbersome to use; atomic reads of SeqCount-protected -// data require instantiating function templates using go_generics (see -// seqatomic.go). -type SeqCount struct { - // epoch is incremented by BeginWrite and EndWrite, such that epoch is odd - // if a writer critical section is active, and a read from data protected - // by this SeqCount is atomic iff epoch is the same even value before and - // after the read. - epoch uint32 -} - -// SeqCountEpoch tracks writer critical sections in a SeqCount. -type SeqCountEpoch struct { - val uint32 -} - -// We assume that: -// -// - All functions in sync/atomic that perform a memory read are at least a -// read fence: memory reads before calls to such functions cannot be reordered -// after the call, and memory reads after calls to such functions cannot be -// reordered before the call, even if those reads do not use sync/atomic. -// -// - All functions in sync/atomic that perform a memory write are at least a -// write fence: memory writes before calls to such functions cannot be -// reordered after the call, and memory writes after calls to such functions -// cannot be reordered before the call, even if those writes do not use -// sync/atomic. -// -// As of this writing, the Go memory model completely fails to describe -// sync/atomic, but these properties are implied by -// https://groups.google.com/forum/#!topic/golang-nuts/7EnEhM3U7B8. - -// BeginRead indicates the beginning of a reader critical section. Reader -// critical sections DO NOT BLOCK writer critical sections, so operations in a -// reader critical section MAY RACE with writer critical sections. Races are -// detected by ReadOk at the end of the reader critical section. Thus, the -// low-level structure of readers is generally: -// -// for { -// epoch := seq.BeginRead() -// // do something idempotent with seq-protected data -// if seq.ReadOk(epoch) { -// break -// } -// } -// -// However, since reader critical sections may race with writer critical -// sections, the Go race detector will (accurately) flag data races in readers -// using this pattern. Most users of SeqCount will need to use the -// SeqAtomicLoad function template in seqatomic.go. -func (s *SeqCount) BeginRead() SeqCountEpoch { - epoch := atomic.LoadUint32(&s.epoch) - for epoch&1 != 0 { - runtime.Gosched() - epoch = atomic.LoadUint32(&s.epoch) - } - return SeqCountEpoch{epoch} -} - -// ReadOk returns true if the reader critical section initiated by a previous -// call to BeginRead() that returned epoch did not race with any writer critical -// sections. -// -// ReadOk may be called any number of times during a reader critical section. -// Reader critical sections do not need to be explicitly terminated; the last -// call to ReadOk is implicitly the end of the reader critical section. -func (s *SeqCount) ReadOk(epoch SeqCountEpoch) bool { - return atomic.LoadUint32(&s.epoch) == epoch.val -} - -// BeginWrite indicates the beginning of a writer critical section. -// -// SeqCount does not support concurrent writer critical sections; clients with -// concurrent writers must synchronize them using e.g. sync.Mutex. -func (s *SeqCount) BeginWrite() { - if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 == 0 { - panic("SeqCount.BeginWrite during writer critical section") - } -} - -// EndWrite ends the effect of a preceding BeginWrite. -func (s *SeqCount) EndWrite() { - if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 != 0 { - panic("SeqCount.EndWrite outside writer critical section") - } -} - -// PointersInType returns a list of pointers reachable from values named -// valName of the given type. -// -// PointersInType is not exhaustive, but it is guaranteed that if typ contains -// at least one pointer, then PointersInTypeOf returns a non-empty list. -func PointersInType(typ reflect.Type, valName string) []string { - switch kind := typ.Kind(); kind { - case reflect.Bool, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: - return nil - - case reflect.Chan, reflect.Func, reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice, reflect.String, reflect.UnsafePointer: - return []string{valName} - - case reflect.Array: - return PointersInType(typ.Elem(), valName+"[]") - - case reflect.Struct: - var ptrs []string - for i, n := 0, typ.NumField(); i < n; i++ { - field := typ.Field(i) - ptrs = append(ptrs, PointersInType(field.Type, fmt.Sprintf("%s.%s", valName, field.Name))...) - } - return ptrs - - default: - return []string{fmt.Sprintf("%s (of type %s with unknown kind %s)", valName, typ, kind)} - } -} diff --git a/third_party/gvsync/seqcount_test.go b/third_party/gvsync/seqcount_test.go deleted file mode 100644 index 085e574b3..000000000 --- a/third_party/gvsync/seqcount_test.go +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package gvsync - -import ( - "reflect" - "testing" - "time" -) - -func TestSeqCountWriteUncontended(t *testing.T) { - var seq SeqCount - seq.BeginWrite() - seq.EndWrite() -} - -func TestSeqCountReadUncontended(t *testing.T) { - var seq SeqCount - epoch := seq.BeginRead() - if !seq.ReadOk(epoch) { - t.Errorf("ReadOk: got false, wanted true") - } -} - -func TestSeqCountBeginReadAfterWrite(t *testing.T) { - var seq SeqCount - var data int32 - const want = 1 - seq.BeginWrite() - data = want - seq.EndWrite() - epoch := seq.BeginRead() - if data != want { - t.Errorf("Reader: got %v, wanted %v", data, want) - } - if !seq.ReadOk(epoch) { - t.Errorf("ReadOk: got false, wanted true") - } -} - -func TestSeqCountBeginReadDuringWrite(t *testing.T) { - var seq SeqCount - var data int - const want = 1 - seq.BeginWrite() - go func() { - time.Sleep(time.Second) - data = want - seq.EndWrite() - }() - epoch := seq.BeginRead() - if data != want { - t.Errorf("Reader: got %v, wanted %v", data, want) - } - if !seq.ReadOk(epoch) { - t.Errorf("ReadOk: got false, wanted true") - } -} - -func TestSeqCountReadOkAfterWrite(t *testing.T) { - var seq SeqCount - epoch := seq.BeginRead() - seq.BeginWrite() - seq.EndWrite() - if seq.ReadOk(epoch) { - t.Errorf("ReadOk: got true, wanted false") - } -} - -func TestSeqCountReadOkDuringWrite(t *testing.T) { - var seq SeqCount - epoch := seq.BeginRead() - seq.BeginWrite() - if seq.ReadOk(epoch) { - t.Errorf("ReadOk: got true, wanted false") - } - seq.EndWrite() -} - -func BenchmarkSeqCountWriteUncontended(b *testing.B) { - var seq SeqCount - for i := 0; i < b.N; i++ { - seq.BeginWrite() - seq.EndWrite() - } -} - -func BenchmarkSeqCountReadUncontended(b *testing.B) { - var seq SeqCount - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - epoch := seq.BeginRead() - if !seq.ReadOk(epoch) { - b.Fatalf("ReadOk: got false, wanted true") - } - } - }) -} - -func TestPointersInType(t *testing.T) { - for _, test := range []struct { - name string // used for both test and value name - val interface{} - ptrs []string - }{ - { - name: "EmptyStruct", - val: struct{}{}, - }, - { - name: "Int", - val: int(0), - }, - { - name: "MixedStruct", - val: struct { - b bool - I int - ExportedPtr *struct{} - unexportedPtr *struct{} - arr [2]int - ptrArr [2]*int - nestedStruct struct { - nestedNonptr int - nestedPtr *int - } - structArr [1]struct { - nonptr int - ptr *int - } - }{}, - ptrs: []string{ - "MixedStruct.ExportedPtr", - "MixedStruct.unexportedPtr", - "MixedStruct.ptrArr[]", - "MixedStruct.nestedStruct.nestedPtr", - "MixedStruct.structArr[].ptr", - }, - }, - } { - t.Run(test.name, func(t *testing.T) { - typ := reflect.TypeOf(test.val) - ptrs := PointersInType(typ, test.name) - t.Logf("Found pointers: %v", ptrs) - if (len(ptrs) != 0 || len(test.ptrs) != 0) && !reflect.DeepEqual(ptrs, test.ptrs) { - t.Errorf("Got %v, wanted %v", ptrs, test.ptrs) - } - }) - } -} diff --git a/tools/go_marshal/test/BUILD b/tools/go_marshal/test/BUILD index fa82f8e9b..d412e1ccf 100644 --- a/tools/go_marshal/test/BUILD +++ b/tools/go_marshal/test/BUILD @@ -1,9 +1,8 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_marshal:defs.bzl", "go_library") package(licenses = ["notice"]) -load("//tools/go_marshal:defs.bzl", "go_library") - package_group( name = "gomarshal_test", packages = [ diff --git a/tools/go_marshal/test/external/BUILD b/tools/go_marshal/test/external/BUILD index 8fb43179b..9bb89e1da 100644 --- a/tools/go_marshal/test/external/BUILD +++ b/tools/go_marshal/test/external/BUILD @@ -1,7 +1,7 @@ -package(licenses = ["notice"]) - load("//tools/go_marshal:defs.bzl", "go_library") +package(licenses = ["notice"]) + go_library( name = "external", testonly = 1, -- cgit v1.2.3 From 20279c305ece6a458006999c8dafc5672ca92803 Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Tue, 26 Nov 2019 18:19:47 -0800 Subject: Allow open(O_TRUNC) and (f)truncate for proc files. This allows writable proc and devices files to be opened with O_CREAT|O_TRUNC. This is encountered most frequently when interacting with proc or devices files via the command line. e.g. $ echo 8192 1048576 4194304 > /proc/sys/net/ipv4/tcp_rmem Also adds a test to test the behavior of open(O_TRUNC), truncate, and ftruncate on named pipes. Fixes #1116 PiperOrigin-RevId: 282677425 --- pkg/sentry/fs/proc/sys_net.go | 17 ++++++++++++++--- pkg/sentry/fs/tty/master.go | 6 +++++- pkg/sentry/fs/tty/slave.go | 6 +++++- pkg/sentry/syscalls/linux/sys_file.go | 12 ++++++++++-- test/syscalls/linux/pipe.cc | 14 ++++++++++++++ 5 files changed, 48 insertions(+), 7 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index f3b63dfc2..bd93f83fa 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -64,7 +64,7 @@ var _ fs.InodeOperations = (*tcpMemInode)(nil) func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir tcpMemDir) *fs.Inode { tm := &tcpMemInode{ - SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), s: s, dir: dir, } @@ -77,6 +77,11 @@ func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir return fs.NewInode(ctx, tm, msrc, sattr) } +// Truncate implements fs.InodeOperations.Truncate. +func (tcpMemInode) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + // GetFile implements fs.InodeOperations.GetFile. func (m *tcpMemInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { flags.Pread = true @@ -168,14 +173,15 @@ func writeSize(dirType tcpMemDir, s inet.Stack, size inet.TCPBufferSize) error { // +stateify savable type tcpSack struct { + fsutil.SimpleFileInode + stack inet.Stack `state:"wait"` enabled *bool - fsutil.SimpleFileInode } func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { ts := &tcpSack{ - SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), stack: s, } sattr := fs.StableAttr{ @@ -187,6 +193,11 @@ func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *f return fs.NewInode(ctx, ts, msrc, sattr) } +// Truncate implements fs.InodeOperations.Truncate. +func (tcpSack) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + // GetFile implements fs.InodeOperations.GetFile. func (s *tcpSack) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { flags.Pread = true diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index bc56be696..934828c12 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -32,7 +32,6 @@ import ( // +stateify savable type masterInodeOperations struct { fsutil.SimpleFileInode - fsutil.InodeNoopTruncate // d is the containing dir. d *dirInodeOperations @@ -77,6 +76,11 @@ func newMasterInode(ctx context.Context, d *dirInodeOperations, owner fs.FileOwn func (mi *masterInodeOperations) Release(ctx context.Context) { } +// Truncate implements fs.InodeOperations.Truncate. +func (masterInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + // GetFile implements fs.InodeOperations.GetFile. // // It allocates a new terminal. diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index 4cbea0367..2a51e6bab 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -31,7 +31,6 @@ import ( // +stateify savable type slaveInodeOperations struct { fsutil.SimpleFileInode - fsutil.InodeNoopTruncate // d is the containing dir. d *dirInodeOperations @@ -73,6 +72,11 @@ func (si *slaveInodeOperations) Release(ctx context.Context) { si.t.DecRef() } +// Truncate implements fs.InodeOperations.Truncate. +func (slaveInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + // GetFile implements fs.InodeOperations.GetFile. // // This may race with destruction of the terminal. If the terminal is gone, it diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 167c2b60b..3b9181002 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -171,6 +171,9 @@ func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uint } } + // Truncate is called when O_TRUNC is specified for any kind of + // existing Dirent. Behavior is delegated to the entry's Truncate + // implementation. if flags&linux.O_TRUNC != 0 { if err := d.Inode.Truncate(t, d, 0); err != nil { return err @@ -397,7 +400,9 @@ func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode l return err } - // Should we truncate the file? + // Truncate is called when O_TRUNC is specified for any kind of + // existing Dirent. Behavior is delegated to the entry's Truncate + // implementation. if flags&linux.O_TRUNC != 0 { if err := found.Inode.Truncate(t, found, 0); err != nil { return err @@ -1484,6 +1489,8 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if fs.IsDir(d.Inode.StableAttr) { return syserror.EISDIR } + // In contrast to open(O_TRUNC), truncate(2) is only valid for file + // types. if !fs.IsFile(d.Inode.StableAttr) { return syserror.EINVAL } @@ -1522,7 +1529,8 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, syserror.EINVAL } - // Note that this is different from truncate(2) above, where a + // In contrast to open(O_TRUNC), truncate(2) is only valid for file + // types. Note that this is different from truncate(2) above, where a // directory returns EISDIR. if !fs.IsFile(file.Dirent.Inode.StableAttr) { return 0, nil, syserror.EINVAL diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc index c0b354e65..ac9b21b24 100644 --- a/test/syscalls/linux/pipe.cc +++ b/test/syscalls/linux/pipe.cc @@ -212,6 +212,20 @@ TEST(Pipe2Test, BadOptions) { EXPECT_THAT(pipe2(fds, 0xDEAD), SyscallFailsWithErrno(EINVAL)); } +// Tests that opening named pipes with O_TRUNC shouldn't cause an error, but +// calls to (f)truncate should. +TEST(NamedPipeTest, Truncate) { + const std::string tmp_path = NewTempAbsPath(); + SKIP_IF(mkfifo(tmp_path.c_str(), 0644) != 0); + + ASSERT_THAT(open(tmp_path.c_str(), O_NONBLOCK | O_RDONLY), SyscallSucceeds()); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(tmp_path.c_str(), O_RDWR | O_NONBLOCK | O_TRUNC)); + + ASSERT_THAT(truncate(tmp_path.c_str(), 0), SyscallFailsWithErrno(EINVAL)); + ASSERT_THAT(ftruncate(fd.get(), 0), SyscallFailsWithErrno(EINVAL)); +} + TEST_P(PipeTest, Seek) { SKIP_IF(!CreateBlocking()); -- cgit v1.2.3 From 035407153989b189a3ce42df43d6f528fa95444f Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Tue, 3 Dec 2019 15:06:18 -0800 Subject: Fix printing /proc/[pid]/io for /proc/[pid]/task/[tid]/io. PiperOrigin-RevId: 283630669 --- pkg/sentry/fs/proc/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 87184ec67..2a598149d 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -639,7 +639,7 @@ func (i *ioData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se io.Accumulate(i.IOUsage()) var buf bytes.Buffer - fmt.Fprintf(&buf, "char: %d\n", io.CharsRead) + fmt.Fprintf(&buf, "rchar: %d\n", io.CharsRead) fmt.Fprintf(&buf, "wchar: %d\n", io.CharsWritten) fmt.Fprintf(&buf, "syscr: %d\n", io.ReadSyscalls) fmt.Fprintf(&buf, "syscw: %d\n", io.WriteSyscalls) -- cgit v1.2.3 From 0a32c0235744191947a6bf890031026e06788837 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Thu, 5 Dec 2019 13:22:31 -0800 Subject: Create correct file for /proc/[pid]/task/[tid]/io PiperOrigin-RevId: 284038840 --- pkg/sentry/fs/proc/task.go | 32 +++++++++++++++++--------------- test/syscalls/linux/proc.cc | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 15 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 2a598149d..0e46c5fb7 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -67,29 +67,28 @@ type taskDir struct { var _ fs.InodeOperations = (*taskDir)(nil) // newTaskDir creates a new proc task entry. -func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, showSubtasks bool) *fs.Inode { +func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode { contents := map[string]*fs.Inode{ - "auxv": newAuxvec(t, msrc), - "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), - "comm": newComm(t, msrc), - "environ": newExecArgInode(t, msrc, environExecArg), - "exe": newExe(t, msrc), - "fd": newFdDir(t, msrc), - "fdinfo": newFdInfoDir(t, msrc), - "gid_map": newGIDMap(t, msrc), - // FIXME(b/123511468): create the correct io file for threads. - "io": newIO(t, msrc), + "auxv": newAuxvec(t, msrc), + "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), + "comm": newComm(t, msrc), + "environ": newExecArgInode(t, msrc, environExecArg), + "exe": newExe(t, msrc), + "fd": newFdDir(t, msrc), + "fdinfo": newFdInfoDir(t, msrc), + "gid_map": newGIDMap(t, msrc), + "io": newIO(t, msrc, isThreadGroup), "maps": newMaps(t, msrc), "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), "ns": newNamespaceDir(t, msrc), "smaps": newSmaps(t, msrc), - "stat": newTaskStat(t, msrc, showSubtasks, p.pidns), + "stat": newTaskStat(t, msrc, isThreadGroup, p.pidns), "statm": newStatm(t, msrc), "status": newStatus(t, msrc, p.pidns), "uid_map": newUIDMap(t, msrc), } - if showSubtasks { + if isThreadGroup { contents["task"] = p.newSubtasks(t, msrc) } if len(p.cgroupControllers) > 0 { @@ -619,8 +618,11 @@ type ioData struct { ioUsage } -func newIO(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { - return newProcInode(t, seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t) +func newIO(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode { + if isThreadGroup { + return newProcInode(t, seqfile.NewSeqFile(t, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t) + } + return newProcInode(t, seqfile.NewSeqFile(t, &ioData{t}), msrc, fs.SpecialFile, t) } // NeedsUpdate returns whether the generation is old or not. diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 512de5ee0..8cf08991b 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,7 @@ #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" +#include "absl/synchronization/notification.h" #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/util/capability_util.h" @@ -1988,6 +1990,44 @@ TEST(Proc, GetdentsEnoent) { SyscallFailsWithErrno(ENOENT)); } +void CheckSyscwFromIOFile(const std::string& path, const std::string& regex) { + std::string output; + ASSERT_NO_ERRNO(GetContents(path, &output)); + ASSERT_THAT(output, ContainsRegex(absl::StrCat("syscw:\\s+", regex, "\n"))); +} + +// Checks that there is variable accounting of IO between threads/tasks. +TEST(Proc, PidTidIOAccounting) { + absl::Notification notification; + + // Run a thread with a bunch of writes. Check that io account records exactly + // the number of write calls. File open/close is there to prevent buffering. + ScopedThread writer([¬ification] { + const int num_writes = 100; + for (int i = 0; i < num_writes; i++) { + auto path = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + ASSERT_NO_ERRNO(SetContents(path.path(), "a")); + } + notification.Notify(); + const std::string& writer_dir = + absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); + + CheckSyscwFromIOFile(writer_dir, std::to_string(num_writes)); + }); + + // Run a thread and do no writes. Check that no writes are recorded. + ScopedThread noop([¬ification] { + notification.WaitForNotification(); + const std::string& noop_dir = + absl::StrCat("/proc/", getpid(), "/task/", gettid(), "/io"); + + CheckSyscwFromIOFile(noop_dir, "0"); + }); + + writer.Join(); + noop.Join(); +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 663fe840f79ac3d8e2ce1a1f1409d84cf2a9d37e Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Fri, 6 Dec 2019 14:32:53 -0800 Subject: Implement TTY field in control.Processes(). Threadgroups already know their TTY (if they have one), which now contains the TTY Index, and is returned in the Processes() call. PiperOrigin-RevId: 284263850 --- pkg/sentry/control/proc.go | 26 +++++++--- pkg/sentry/control/proc_test.go | 10 ++-- pkg/sentry/fs/tty/terminal.go | 4 +- pkg/sentry/kernel/tty.go | 11 +++++ runsc/container/container_test.go | 95 +++++++++++++++++++++++++++++++++++- runsc/container/test_app/BUILD | 1 + runsc/container/test_app/test_app.go | 40 +++++++++++++++ 7 files changed, 173 insertions(+), 14 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index c35faeb4c..a6f90b2bb 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -268,7 +268,6 @@ func (proc *Proc) Ps(args *PsArgs, out *string) error { } // Process contains information about a single process in a Sandbox. -// TODO(b/117881927): Implement TTY field. type Process struct { UID auth.KUID `json:"uid"` PID kernel.ThreadID `json:"pid"` @@ -276,6 +275,9 @@ type Process struct { PPID kernel.ThreadID `json:"ppid"` // Processor utilization C int32 `json:"c"` + // TTY name of the process. Will be of the form "pts/N" if there is a + // TTY, or "?" if there is not. + TTY string `json:"tty"` // Start time STime string `json:"stime"` // CPU time @@ -285,18 +287,19 @@ type Process struct { } // ProcessListToTable prints a table with the following format: -// UID PID PPID C STIME TIME CMD -// 0 1 0 0 14:04 505262ns tail +// UID PID PPID C TTY STIME TIME CMD +// 0 1 0 0 pty/4 14:04 505262ns tail func ProcessListToTable(pl []*Process) string { var buf bytes.Buffer tw := tabwriter.NewWriter(&buf, 10, 1, 3, ' ', 0) - fmt.Fprint(tw, "UID\tPID\tPPID\tC\tSTIME\tTIME\tCMD") + fmt.Fprint(tw, "UID\tPID\tPPID\tC\tTTY\tSTIME\tTIME\tCMD") for _, d := range pl { - fmt.Fprintf(tw, "\n%d\t%d\t%d\t%d\t%s\t%s\t%s", + fmt.Fprintf(tw, "\n%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s", d.UID, d.PID, d.PPID, d.C, + d.TTY, d.STime, d.Time, d.Cmd) @@ -347,7 +350,7 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error { if p := tg.Leader().Parent(); p != nil { ppid = p.PIDNamespace().IDOfThreadGroup(p.ThreadGroup()) } - *out = append(*out, &Process{ + p := Process{ UID: tg.Leader().Credentials().EffectiveKUID, PID: pid, PPID: ppid, @@ -355,7 +358,9 @@ func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error { C: percentCPU(tg.CPUStats(), tg.Leader().StartTime(), now), Time: tg.CPUStats().SysTime.String(), Cmd: tg.Leader().Name(), - }) + TTY: ttyName(tg.TTY()), + } + *out = append(*out, &p) } sort.Slice(*out, func(i, j int) bool { return (*out)[i].PID < (*out)[j].PID }) return nil @@ -395,3 +400,10 @@ func percentCPU(stats usage.CPUStats, startTime, now ktime.Time) int32 { } return int32(percentCPU) } + +func ttyName(tty *kernel.TTY) string { + if tty == nil { + return "?" + } + return fmt.Sprintf("pts/%d", tty.Index) +} diff --git a/pkg/sentry/control/proc_test.go b/pkg/sentry/control/proc_test.go index d8ada2694..0a88459b2 100644 --- a/pkg/sentry/control/proc_test.go +++ b/pkg/sentry/control/proc_test.go @@ -34,7 +34,7 @@ func TestProcessListTable(t *testing.T) { }{ { pl: []*Process{}, - expected: "UID PID PPID C STIME TIME CMD", + expected: "UID PID PPID C TTY STIME TIME CMD", }, { pl: []*Process{ @@ -43,6 +43,7 @@ func TestProcessListTable(t *testing.T) { PID: 0, PPID: 0, C: 0, + TTY: "?", STime: "0", Time: "0", Cmd: "zero", @@ -52,14 +53,15 @@ func TestProcessListTable(t *testing.T) { PID: 1, PPID: 1, C: 1, + TTY: "pts/4", STime: "1", Time: "1", Cmd: "one", }, }, - expected: `UID PID PPID C STIME TIME CMD -0 0 0 0 0 0 zero -1 1 1 1 1 1 one`, + expected: `UID PID PPID C TTY STIME TIME CMD +0 0 0 0 ? 0 0 zero +1 1 1 1 pts/4 1 1 one`, }, } diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go index ff8138820..917f90cc0 100644 --- a/pkg/sentry/fs/tty/terminal.go +++ b/pkg/sentry/fs/tty/terminal.go @@ -53,8 +53,8 @@ func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal d: d, n: n, ld: newLineDiscipline(termios), - masterKTTY: &kernel.TTY{}, - slaveKTTY: &kernel.TTY{}, + masterKTTY: &kernel.TTY{Index: n}, + slaveKTTY: &kernel.TTY{Index: n}, } t.EnableLeakCheck("tty.Terminal") return &t diff --git a/pkg/sentry/kernel/tty.go b/pkg/sentry/kernel/tty.go index 34f84487a..048de26dc 100644 --- a/pkg/sentry/kernel/tty.go +++ b/pkg/sentry/kernel/tty.go @@ -21,8 +21,19 @@ import "sync" // // +stateify savable type TTY struct { + // Index is the terminal index. It is immutable. + Index uint32 + mu sync.Mutex `state:"nosave"` // tg is protected by mu. tg *ThreadGroup } + +// TTY returns the thread group's controlling terminal. If nil, there is no +// controlling terminal. +func (tg *ThreadGroup) TTY() *TTY { + tg.signalHandlers.mu.Lock() + defer tg.signalHandlers.mu.Unlock() + return tg.tty +} diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 07eacaac0..1d06f2780 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -98,10 +98,14 @@ func procListsEqual(got, want []*control.Process) bool { for i := range got { pd1 := got[i] pd2 := want[i] - // Zero out unimplemented and timing dependant fields. + // Zero out timing dependant fields. pd1.Time = "" pd1.STime = "" pd1.C = 0 + // Ignore TTY field too, since it's not relevant in the cases + // where we use this method. Tests that care about the TTY + // field should check for it themselves. + pd1.TTY = "" if *pd1 != *pd2 { return false } @@ -2112,6 +2116,95 @@ func TestOverlayfsStaleRead(t *testing.T) { } } +// TestTTYField checks TTY field returned by container.Processes(). +func TestTTYField(t *testing.T) { + stop := testutil.StartReaper() + defer stop() + + testApp, err := testutil.FindFile("runsc/container/test_app/test_app") + if err != nil { + t.Fatal("error finding test_app:", err) + } + + testCases := []struct { + name string + useTTY bool + wantTTYField string + }{ + { + name: "no tty", + useTTY: false, + wantTTYField: "?", + }, + { + name: "tty used", + useTTY: true, + wantTTYField: "pts/0", + }, + } + + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + conf := testutil.TestConfig() + + // We will run /bin/sleep, possibly with an open TTY. + cmd := []string{"/bin/sleep", "10000"} + if test.useTTY { + // Run inside the "pty-runner". + cmd = append([]string{testApp, "pty-runner"}, cmd...) + } + + spec := testutil.NewSpecWithArgs(cmd...) + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(rootDir) + defer os.RemoveAll(bundleDir) + + // Create and start the container. + args := Args{ + ID: testutil.UniqueContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + c, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + + // Wait for sleep to be running, and check the TTY + // field. + var gotTTYField string + cb := func() error { + ps, err := c.Processes() + if err != nil { + err = fmt.Errorf("error getting process data from container: %v", err) + return &backoff.PermanentError{Err: err} + } + for _, p := range ps { + if strings.Contains(p.Cmd, "sleep") { + gotTTYField = p.TTY + return nil + } + } + return fmt.Errorf("sleep not running") + } + if err := testutil.Poll(cb, 30*time.Second); err != nil { + t.Fatalf("error waiting for sleep process: %v", err) + } + + if gotTTYField != test.wantTTYField { + t.Errorf("tty field got %q, want %q", gotTTYField, test.wantTTYField) + } + }) + } +} + // executeSync synchronously executes a new process. func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) { pid, err := cont.Execute(args) diff --git a/runsc/container/test_app/BUILD b/runsc/container/test_app/BUILD index 9bf9e6e9d..bfd338bb6 100644 --- a/runsc/container/test_app/BUILD +++ b/runsc/container/test_app/BUILD @@ -15,5 +15,6 @@ go_binary( "//pkg/unet", "//runsc/testutil", "@com_github_google_subcommands//:go_default_library", + "@com_github_kr_pty//:go_default_library", ], ) diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go index 913d781c6..a1c8a741a 100644 --- a/runsc/container/test_app/test_app.go +++ b/runsc/container/test_app/test_app.go @@ -19,6 +19,7 @@ package main import ( "context" "fmt" + "io" "io/ioutil" "log" "net" @@ -31,6 +32,7 @@ import ( "flag" "github.com/google/subcommands" + "github.com/kr/pty" "gvisor.dev/gvisor/runsc/testutil" ) @@ -41,6 +43,7 @@ func main() { subcommands.Register(new(fdReceiver), "") subcommands.Register(new(fdSender), "") subcommands.Register(new(forkBomb), "") + subcommands.Register(new(ptyRunner), "") subcommands.Register(new(reaper), "") subcommands.Register(new(syscall), "") subcommands.Register(new(taskTree), "") @@ -352,3 +355,40 @@ func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...inter return subcommands.ExitSuccess } + +type ptyRunner struct{} + +// Name implements subcommands.Command. +func (*ptyRunner) Name() string { + return "pty-runner" +} + +// Synopsis implements subcommands.Command. +func (*ptyRunner) Synopsis() string { + return "runs the given command with an open pty terminal" +} + +// Usage implements subcommands.Command. +func (*ptyRunner) Usage() string { + return "pty-runner [command]" +} + +// SetFlags implements subcommands.Command.SetFlags. +func (*ptyRunner) SetFlags(f *flag.FlagSet) {} + +// Execute implements subcommands.Command. +func (*ptyRunner) Execute(_ context.Context, fs *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus { + c := exec.Command(fs.Args()[0], fs.Args()[1:]...) + f, err := pty.Start(c) + if err != nil { + fmt.Printf("pty.Start failed: %v", err) + return subcommands.ExitFailure + } + defer f.Close() + + // Copy stdout from the command to keep this process alive until the + // subprocess exits. + io.Copy(os.Stdout, f) + + return subcommands.ExitSuccess +} -- cgit v1.2.3 From 898dcc2f839a975a9171271824af32176c2e5c27 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Mon, 9 Dec 2019 12:03:31 -0800 Subject: Redirect TODOs to gvisor.dev PiperOrigin-RevId: 284606233 --- pkg/sentry/fs/gofer/session.go | 6 +++--- pkg/sentry/kernel/semaphore/semaphore.go | 6 +++--- pkg/sentry/syscalls/linux/linux64_amd64.go | 2 +- pkg/sentry/syscalls/linux/linux64_arm64.go | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index 0da608548..4e358a46a 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -143,9 +143,9 @@ type session struct { // socket files. This allows unix domain sockets to be used with paths that // belong to a gofer. // - // TODO(b/77154739): there are few possible races with someone stat'ing the - // file and another deleting it concurrently, where the file will not be - // reported as socket file. + // TODO(gvisor.dev/issue/1200): there are few possible races with someone + // stat'ing the file and another deleting it concurrently, where the file + // will not be reported as socket file. endpoints *endpointMaps `state:"wait"` } diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index 93fe68a3e..de9617e9d 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -302,7 +302,7 @@ func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Cred return syserror.ERANGE } - // TODO(b/29354920): Clear undo entries in all processes + // TODO(gvisor.dev/issue/137): Clear undo entries in all processes. sem.value = val sem.pid = pid s.changeTime = ktime.NowFromContext(ctx) @@ -336,7 +336,7 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti for i, val := range vals { sem := &s.sems[i] - // TODO(b/29354920): Clear undo entries in all processes + // TODO(gvisor.dev/issue/137): Clear undo entries in all processes. sem.value = int16(val) sem.pid = pid sem.wakeWaiters() @@ -481,7 +481,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch } // All operations succeeded, apply them. - // TODO(b/29354920): handle undo operations. + // TODO(gvisor.dev/issue/137): handle undo operations. for i, v := range tmpVals { s.sems[i].value = v s.sems[i].wakeWaiters() diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 81e4f93a6..5642d69ea 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -260,7 +260,7 @@ var AMD64 = &kernel.SyscallTable{ 217: syscalls.Supported("getdents64", Getdents64), 218: syscalls.Supported("set_tid_address", SetTidAddress), 219: syscalls.Supported("restart_syscall", RestartSyscall), - 220: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920) + 220: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), 221: syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil), 222: syscalls.Supported("timer_create", TimerCreate), 223: syscalls.Supported("timer_settime", TimerSettime), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index f1dd4b0c0..f897bfff8 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -224,7 +224,7 @@ var ARM64 = &kernel.SyscallTable{ 189: syscalls.ErrorWithEvent("msgsnd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/135"}), // TODO(b/29354921) 190: syscalls.Supported("semget", Semget), 191: syscalls.PartiallySupported("semctl", Semctl, "Options IPC_INFO, SEM_INFO, IPC_STAT, SEM_STAT, SEM_STAT_ANY, GETNCNT, GETZCNT not supported.", nil), - 192: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), // TODO(b/29354920) + 192: syscalls.ErrorWithEvent("semtimedop", syserror.ENOSYS, "", []string{"gvisor.dev/issue/137"}), 193: syscalls.PartiallySupported("semop", Semop, "Option SEM_UNDO not supported.", nil), 194: syscalls.PartiallySupported("shmget", Shmget, "Option SHM_HUGETLB is not supported.", nil), 195: syscalls.PartiallySupported("shmctl", Shmctl, "Options SHM_LOCK, SHM_UNLOCK are not supported.", nil), -- cgit v1.2.3 From 8a46e83111c70c7ee81368dd4e9df53559213fa0 Mon Sep 17 00:00:00 2001 From: Yong He Date: Sun, 15 Dec 2019 11:20:12 +0800 Subject: Fix UDS bind cause fd leak in gofer After the finalizer optimize in 76039f895995c3fe0deef5958f843868685ecc38 commit, clientFile needs to closed before finalizer release it. The clientFile is not closed if it is created via gofer.(*inodeOperations).Bind, this will cause fd leak which is hold by gofer process. Fixes #1396 Signed-off-by: Yong He Signed-off-by: Jianfeng Tan --- pkg/sentry/fs/gofer/path.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index 8c17603f8..c09f3b71c 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -234,6 +234,8 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, if err != nil { return nil, err } + // We're not going to use newFile after return. + defer newFile.close(ctx) // Stabilize the endpoint map while creation is in progress. unlock := i.session().endpoints.lock() @@ -254,7 +256,6 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, // Get the attributes of the file to create inode key. qid, mask, attr, err := getattr(ctx, newFile) if err != nil { - newFile.close(ctx) return nil, err } @@ -270,7 +271,6 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, // cloned and re-opened multiple times after creation. _, unopened, err := i.fileState.file.walk(ctx, []string{name}) if err != nil { - newFile.close(ctx) return nil, err } -- cgit v1.2.3 From bd5c7bf58dd656dc16b920b48a346ef08f1efba8 Mon Sep 17 00:00:00 2001 From: Yong He Date: Mon, 16 Dec 2019 14:04:03 +0800 Subject: Fix deadlock in overlay bind Copy up parent when binding UDS on overlayfs is supported in commit 02ab1f187cd24c67b754b004229421d189cee264. But the using of copyUp in overlayBind will cause sentry stuck, reason is dead lock in renameMu. 1 [Process A] Invoke a Unix socket bind operation renameMu is hold in fs.(*Dirent).genericCreate by process A 2 [Process B] Invoke a read syscall on /proc/task/mounts waitng on Lock of renameMu in fs.(*MountNamespace).FindMount 3 [Process A] Continue Unix socket bind operation wating on RLock of renameMu in fs.copyUp Root cause is recursive reading lock of reanmeMu in bind call trace, if there are writing lock between the two reading lock, then deadlock occured. Fixes #1397 --- pkg/sentry/fs/inode_overlay.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index a09147080..8d4303cc4 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -436,7 +436,7 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena } func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name string, data transport.BoundEndpoint, perm FilePermissions) (*Dirent, error) { - if err := copyUp(ctx, parent); err != nil { + if err := copyUpLockedForRename(ctx, parent); err != nil { return nil, err } -- cgit v1.2.3 From e6f4124afd951c3b089f9c75c499c14f4d90a590 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 16 Dec 2019 13:18:36 -0800 Subject: Implement checks for get/setxattr at the syscall layer. Add checks for input arguments, file type, permissions, etc. that match the Linux implementation. A call to get/setxattr that passes all the checks will still currently return EOPNOTSUPP. Actual support will be added in following commits. Only allow user.* extended attributes for the time being. PiperOrigin-RevId: 285835159 --- pkg/abi/linux/BUILD | 1 + pkg/abi/linux/xattr.go | 27 +++++ pkg/sentry/fs/inode.go | 8 ++ pkg/sentry/fs/inode_overlay.go | 5 + pkg/sentry/syscalls/linux/BUILD | 1 + pkg/sentry/syscalls/linux/linux64_amd64.go | 4 +- pkg/sentry/syscalls/linux/linux64_arm64.go | 4 +- pkg/sentry/syscalls/linux/sys_xattr.go | 169 +++++++++++++++++++++++++++++ test/syscalls/linux/xattr.cc | 79 +++++++------- 9 files changed, 253 insertions(+), 45 deletions(-) create mode 100644 pkg/abi/linux/xattr.go create mode 100644 pkg/sentry/syscalls/linux/sys_xattr.go (limited to 'pkg/sentry/fs') diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index 51774c6b6..9553f164d 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -57,6 +57,7 @@ go_library( "uio.go", "utsname.go", "wait.go", + "xattr.go", ], importpath = "gvisor.dev/gvisor/pkg/abi/linux", visibility = ["//visibility:public"], diff --git a/pkg/abi/linux/xattr.go b/pkg/abi/linux/xattr.go new file mode 100644 index 000000000..a3b6406fa --- /dev/null +++ b/pkg/abi/linux/xattr.go @@ -0,0 +1,27 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Constants for extended attributes. +const ( + XATTR_NAME_MAX = 255 + XATTR_SIZE_MAX = 65536 + + XATTR_CREATE = 1 + XATTR_REPLACE = 2 + + XATTR_USER_PREFIX = "user." + XATTR_USER_PREFIX_LEN = len(XATTR_USER_PREFIX) +) diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 2d43dff1d..91e2fde2f 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -270,6 +270,14 @@ func (i *Inode) Getxattr(name string) (string, error) { return i.InodeOperations.Getxattr(i, name) } +// Setxattr calls i.InodeOperations.Setxattr with i as the Inode. +func (i *Inode) Setxattr(name, value string) error { + if i.overlay != nil { + return overlaySetxattr(i.overlay, name, value) + } + return i.InodeOperations.Setxattr(i, name, value) +} + // Listxattr calls i.InodeOperations.Listxattr with i as the Inode. func (i *Inode) Listxattr() (map[string]struct{}, error) { if i.overlay != nil { diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index a09147080..63a991beb 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -552,6 +552,11 @@ func overlayGetxattr(o *overlayEntry, name string) (string, error) { return s, err } +// TODO(b/146028302): Support setxattr for overlayfs. +func overlaySetxattr(o *overlayEntry, name, value string) error { + return syserror.EOPNOTSUPP +} + func overlayListxattr(o *overlayEntry) (map[string]struct{}, error) { o.copyMu.RLock() defer o.copyMu.RUnlock() diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 4c0bf96e4..6766ba587 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -49,6 +49,7 @@ go_library( "sys_tls.go", "sys_utsname.go", "sys_write.go", + "sys_xattr.go", "timespec.go", ], importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls/linux", diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 797542d28..272ae9991 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -228,10 +228,10 @@ var AMD64 = &kernel.SyscallTable{ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), - 188: syscalls.Error("setxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 188: syscalls.PartiallySupported("setxattr", Setxattr, "Only supported for tmpfs.", nil), 189: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 190: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 191: syscalls.PartiallySupported("getxattr", Getxattr, "Only supported for tmpfs.", nil), 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index 2bc7faff5..3b584eed9 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -41,10 +41,10 @@ var ARM64 = &kernel.SyscallTable{ 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 5: syscalls.Error("setxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 5: syscalls.PartiallySupported("setxattr", Setxattr, "Only supported for tmpfs.", nil), 6: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 7: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), + 8: syscalls.PartiallySupported("getxattr", Getxattr, "Only supported for tmpfs.", nil), 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go new file mode 100644 index 000000000..97d9a65ea --- /dev/null +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -0,0 +1,169 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Getxattr implements linux syscall getxattr(2). +func Getxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + nameAddr := args[1].Pointer() + valueAddr := args[2].Pointer() + size := args[3].SizeT() + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + valueLen := 0 + err = fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + value, err := getxattr(t, d, dirPath, nameAddr) + if err != nil { + return err + } + + valueLen = len(value) + if size == 0 { + return nil + } + if size > linux.XATTR_SIZE_MAX { + size = linux.XATTR_SIZE_MAX + } + if valueLen > int(size) { + return syserror.ERANGE + } + + _, err = t.CopyOutBytes(valueAddr, []byte(value)) + return err + }) + if err != nil { + return 0, nil, err + } + return uintptr(valueLen), nil, nil +} + +// getxattr implements getxattr from the given *fs.Dirent. +func getxattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr usermem.Addr) (string, error) { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return "", syserror.ENOTDIR + } + + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Read: true}); err != nil { + return "", err + } + + name, err := copyInXattrName(t, nameAddr) + if err != nil { + return "", err + } + + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return "", syserror.EOPNOTSUPP + } + + return d.Inode.Getxattr(name) +} + +// Setxattr implements linux syscall setxattr(2). +func Setxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + nameAddr := args[1].Pointer() + valueAddr := args[2].Pointer() + size := args[3].SizeT() + flags := args[4].Uint() + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + if flags&^(linux.XATTR_CREATE|linux.XATTR_REPLACE) != 0 { + return 0, nil, syserror.EINVAL + } + + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + return setxattr(t, d, dirPath, nameAddr, valueAddr, size, flags) + }) +} + +// setxattr implements setxattr from the given *fs.Dirent. +func setxattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr, valueAddr usermem.Addr, size uint, flags uint32) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { + return err + } + + name, err := copyInXattrName(t, nameAddr) + if err != nil { + return err + } + + if size > linux.XATTR_SIZE_MAX { + return syserror.E2BIG + } + buf := make([]byte, size) + if _, err = t.CopyInBytes(valueAddr, buf); err != nil { + return err + } + value := string(buf) + + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + + return d.Inode.Setxattr(name, value) +} + +func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { + name, err := t.CopyInString(nameAddr, linux.XATTR_NAME_MAX+1) + if err != nil { + if err == syserror.ENAMETOOLONG { + return "", syserror.ERANGE + } + return "", err + } + if len(name) == 0 { + return "", syserror.ERANGE + } + return name, nil +} + +func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error { + // Restrict xattrs to regular files and directories. + // + // In Linux, this restriction technically only applies to xattrs in the + // "user.*" namespace, but we don't allow any other xattr prefixes anyway. + if !fs.IsRegular(i.StableAttr) && !fs.IsDir(i.StableAttr) { + if perms.Write { + return syserror.EPERM + } + return syserror.ENODATA + } + + return i.CheckPermission(t, perms) +} diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 3e07b634b..75740238c 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -28,6 +28,7 @@ #include "test/util/capability_util.h" #include "test/util/posix_error.h" #include "test/util/temp_path.h" +#include "test/util/test_util.h" namespace gvisor { namespace testing { @@ -37,9 +38,6 @@ namespace { class XattrTest : public FileTest {}; TEST_F(XattrTest, XattrNullName) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), @@ -49,9 +47,6 @@ TEST_F(XattrTest, XattrNullName) { } TEST_F(XattrTest, XattrEmptyName) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0), @@ -60,16 +55,17 @@ TEST_F(XattrTest, XattrEmptyName) { } TEST_F(XattrTest, XattrLargeName) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); std::string name = "user."; name += std::string(XATTR_NAME_MAX - name.length(), 'a'); - EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), - SyscallSucceeds()); - EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), - SyscallSucceedsWithValue(0)); + + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), + SyscallSucceedsWithValue(0)); + } name += "a"; EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), @@ -79,9 +75,6 @@ TEST_F(XattrTest, XattrLargeName) { } TEST_F(XattrTest, XattrInvalidPrefix) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); std::string name(XATTR_NAME_MAX, 'a'); EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), @@ -91,9 +84,6 @@ TEST_F(XattrTest, XattrInvalidPrefix) { } TEST_F(XattrTest, XattrReadOnly) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -102,22 +92,28 @@ TEST_F(XattrTest, XattrReadOnly) { char name[] = "user.abc"; char val = 'a'; size_t size = sizeof(val); - EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), + SyscallSucceeds()); + } ASSERT_NO_ERRNO(testing::Chmod(test_file_name_, S_IRUSR)); EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallFailsWithErrno(EACCES)); - char buf = '-'; - EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); - EXPECT_EQ(buf, val); + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + char buf = '-'; + EXPECT_THAT(getxattr(path, name, &buf, size), + SyscallSucceedsWithValue(size)); + EXPECT_EQ(buf, val); + } } TEST_F(XattrTest, XattrWriteOnly) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -128,7 +124,12 @@ TEST_F(XattrTest, XattrWriteOnly) { char name[] = "user.abc"; char val = 'a'; size_t size = sizeof(val); - EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); + + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), + SyscallSucceeds()); + } EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(EACCES)); } @@ -172,9 +173,6 @@ TEST_F(XattrTest, XattrOnSymlink) { } TEST_F(XattrTest, XattrOnInvalidFileTypes) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - char name[] = "user.abc"; char char_device[] = "/dev/zero"; @@ -226,9 +224,6 @@ TEST_F(XattrTest, SetxattrZeroSize) { } TEST_F(XattrTest, SetxattrSizeTooLarge) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); char name[] = "user.abc"; @@ -240,19 +235,24 @@ TEST_F(XattrTest, SetxattrSizeTooLarge) { EXPECT_THAT(setxattr(path, name, val.data(), size, /*flags=*/0), SyscallFailsWithErrno(E2BIG)); - EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(getxattr(path, name, nullptr, 0), + SyscallFailsWithErrno(ENODATA)); + } } TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); char name[] = "user.abc"; EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0), SyscallFailsWithErrno(EFAULT)); - EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + // TODO(b/127675828): Support setxattr and getxattr. + if (!IsRunningOnGvisor()) { + EXPECT_THAT(getxattr(path, name, nullptr, 0), + SyscallFailsWithErrno(ENODATA)); + } } TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { @@ -350,9 +350,6 @@ TEST_F(XattrTest, SetxattrReplaceFlag) { } TEST_F(XattrTest, SetxattrInvalidFlags) { - // TODO(b/127675828): Support setxattr and getxattr. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); int invalid_flags = 0xff; EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags), -- cgit v1.2.3 From 334a513f11f0ecc260abcce549b1f1a74edc2c51 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 18 Dec 2019 12:59:32 -0800 Subject: Add Mems_allowed to /proc/PID/status PiperOrigin-RevId: 286248378 --- pkg/sentry/fs/proc/task.go | 4 ++++ pkg/sentry/fsimpl/proc/task.go | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 0e46c5fb7..9bf4b4527 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -604,6 +604,10 @@ func (s *statusData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ( fmt.Fprintf(&buf, "CapEff:\t%016x\n", creds.EffectiveCaps) fmt.Fprintf(&buf, "CapBnd:\t%016x\n", creds.BoundingCaps) fmt.Fprintf(&buf, "Seccomp:\t%d\n", s.t.SeccompMode()) + // We unconditionally report a single NUMA node. See + // pkg/sentry/syscalls/linux/sys_mempolicy.go. + fmt.Fprintf(&buf, "Mems_allowed:\t1\n") + fmt.Fprintf(&buf, "Mems_allowed_list:\t0\n") return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statusData)(nil)}}, 0 } diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index c46e05c3a..0d87be52b 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -229,6 +229,10 @@ func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error { fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps) fmt.Fprintf(buf, "CapBnd:\t%016x\n", creds.BoundingCaps) fmt.Fprintf(buf, "Seccomp:\t%d\n", s.t.SeccompMode()) + // We unconditionally report a single NUMA node. See + // pkg/sentry/syscalls/linux/sys_mempolicy.go. + fmt.Fprintf(buf, "Mems_allowed:\t1\n") + fmt.Fprintf(buf, "Mems_allowed_list:\t0\n") return nil } -- cgit v1.2.3 From bb00438f36ebd19968246a838c2ddd61b9e14b79 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Thu, 19 Dec 2019 14:29:57 -0800 Subject: Make masterInodeOperations.Truncate take a pointer receiver. Otherwise a copy happens, which triggers a data race when reading masterInodeOperations.SimpleFileOperations.uattr, which must be accessed with a lock held. PiperOrigin-RevId: 286464473 --- pkg/sentry/fs/tty/master.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index 934828c12..6b07f6bf2 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -77,7 +77,7 @@ func (mi *masterInodeOperations) Release(ctx context.Context) { } // Truncate implements fs.InodeOperations.Truncate. -func (masterInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { +func (*masterInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { return nil } -- cgit v1.2.3 From 51f3ab85e024fcd74c49d273ce5202a207577d31 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Mon, 6 Jan 2020 12:51:35 -0800 Subject: Convert memfs into proto-tmpfs. - Renamed memfs to tmpfs. - Copied fileRangeSet bits from fs/fsutil/ to fsimpl/tmpfs/ - Changed tmpfs to be backed by filemem instead of byte slice. - regularFileReadWriter uses a sync.Pool, similar to gofer client. PiperOrigin-RevId: 288356380 --- pkg/sentry/fs/fsutil/BUILD | 2 +- pkg/sentry/fs/fsutil/file_range_set.go | 14 +- pkg/sentry/fsimpl/memfs/BUILD | 80 --- pkg/sentry/fsimpl/memfs/benchmark_test.go | 487 ------------------- pkg/sentry/fsimpl/memfs/directory.go | 187 ------- pkg/sentry/fsimpl/memfs/filesystem.go | 698 --------------------------- pkg/sentry/fsimpl/memfs/memfs.go | 293 ----------- pkg/sentry/fsimpl/memfs/named_pipe.go | 60 --- pkg/sentry/fsimpl/memfs/pipe_test.go | 235 --------- pkg/sentry/fsimpl/memfs/regular_file.go | 154 ------ pkg/sentry/fsimpl/memfs/symlink.go | 36 -- pkg/sentry/fsimpl/tmpfs/BUILD | 92 ++++ pkg/sentry/fsimpl/tmpfs/benchmark_test.go | 487 +++++++++++++++++++ pkg/sentry/fsimpl/tmpfs/directory.go | 187 +++++++ pkg/sentry/fsimpl/tmpfs/filesystem.go | 698 +++++++++++++++++++++++++++ pkg/sentry/fsimpl/tmpfs/named_pipe.go | 60 +++ pkg/sentry/fsimpl/tmpfs/pipe_test.go | 235 +++++++++ pkg/sentry/fsimpl/tmpfs/regular_file.go | 357 ++++++++++++++ pkg/sentry/fsimpl/tmpfs/regular_file_test.go | 224 +++++++++ pkg/sentry/fsimpl/tmpfs/symlink.go | 36 ++ pkg/sentry/fsimpl/tmpfs/tmpfs.go | 299 ++++++++++++ 21 files changed, 2683 insertions(+), 2238 deletions(-) delete mode 100644 pkg/sentry/fsimpl/memfs/BUILD delete mode 100644 pkg/sentry/fsimpl/memfs/benchmark_test.go delete mode 100644 pkg/sentry/fsimpl/memfs/directory.go delete mode 100644 pkg/sentry/fsimpl/memfs/filesystem.go delete mode 100644 pkg/sentry/fsimpl/memfs/memfs.go delete mode 100644 pkg/sentry/fsimpl/memfs/named_pipe.go delete mode 100644 pkg/sentry/fsimpl/memfs/pipe_test.go delete mode 100644 pkg/sentry/fsimpl/memfs/regular_file.go delete mode 100644 pkg/sentry/fsimpl/memfs/symlink.go create mode 100644 pkg/sentry/fsimpl/tmpfs/BUILD create mode 100644 pkg/sentry/fsimpl/tmpfs/benchmark_test.go create mode 100644 pkg/sentry/fsimpl/tmpfs/directory.go create mode 100644 pkg/sentry/fsimpl/tmpfs/filesystem.go create mode 100644 pkg/sentry/fsimpl/tmpfs/named_pipe.go create mode 100644 pkg/sentry/fsimpl/tmpfs/pipe_test.go create mode 100644 pkg/sentry/fsimpl/tmpfs/regular_file.go create mode 100644 pkg/sentry/fsimpl/tmpfs/regular_file_test.go create mode 100644 pkg/sentry/fsimpl/tmpfs/symlink.go create mode 100644 pkg/sentry/fsimpl/tmpfs/tmpfs.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index b2e8d9c77..9ca695a95 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -53,7 +53,7 @@ go_template_instance( "Key": "uint64", "Range": "memmap.MappableRange", "Value": "uint64", - "Functions": "fileRangeSetFunctions", + "Functions": "FileRangeSetFunctions", }, ) diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go index 0a5466b0a..f52d712e3 100644 --- a/pkg/sentry/fs/fsutil/file_range_set.go +++ b/pkg/sentry/fs/fsutil/file_range_set.go @@ -34,25 +34,25 @@ import ( // // type FileRangeSet -// fileRangeSetFunctions implements segment.Functions for FileRangeSet. -type fileRangeSetFunctions struct{} +// FileRangeSetFunctions implements segment.Functions for FileRangeSet. +type FileRangeSetFunctions struct{} // MinKey implements segment.Functions.MinKey. -func (fileRangeSetFunctions) MinKey() uint64 { +func (FileRangeSetFunctions) MinKey() uint64 { return 0 } // MaxKey implements segment.Functions.MaxKey. -func (fileRangeSetFunctions) MaxKey() uint64 { +func (FileRangeSetFunctions) MaxKey() uint64 { return math.MaxUint64 } // ClearValue implements segment.Functions.ClearValue. -func (fileRangeSetFunctions) ClearValue(_ *uint64) { +func (FileRangeSetFunctions) ClearValue(_ *uint64) { } // Merge implements segment.Functions.Merge. -func (fileRangeSetFunctions) Merge(mr1 memmap.MappableRange, frstart1 uint64, _ memmap.MappableRange, frstart2 uint64) (uint64, bool) { +func (FileRangeSetFunctions) Merge(mr1 memmap.MappableRange, frstart1 uint64, _ memmap.MappableRange, frstart2 uint64) (uint64, bool) { if frstart1+mr1.Length() != frstart2 { return 0, false } @@ -60,7 +60,7 @@ func (fileRangeSetFunctions) Merge(mr1 memmap.MappableRange, frstart1 uint64, _ } // Split implements segment.Functions.Split. -func (fileRangeSetFunctions) Split(mr memmap.MappableRange, frstart uint64, split uint64) (uint64, uint64) { +func (FileRangeSetFunctions) Split(mr memmap.MappableRange, frstart uint64, split uint64) (uint64, uint64) { return frstart, frstart + (split - mr.Start) } diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD deleted file mode 100644 index 5689bed3b..000000000 --- a/pkg/sentry/fsimpl/memfs/BUILD +++ /dev/null @@ -1,80 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") - -package(licenses = ["notice"]) - -load("//tools/go_generics:defs.bzl", "go_template_instance") - -go_template_instance( - name = "dentry_list", - out = "dentry_list.go", - package = "memfs", - prefix = "dentry", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*dentry", - "Linker": "*dentry", - }, -) - -go_library( - name = "memfs", - srcs = [ - "dentry_list.go", - "directory.go", - "filesystem.go", - "memfs.go", - "named_pipe.go", - "regular_file.go", - "symlink.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/memfs", - deps = [ - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/fspath", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/usermem", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) - -go_test( - name = "benchmark_test", - size = "small", - srcs = ["benchmark_test.go"], - deps = [ - ":memfs", - "//pkg/abi/linux", - "//pkg/fspath", - "//pkg/refs", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) - -go_test( - name = "memfs_test", - size = "small", - srcs = ["pipe_test.go"], - embed = [":memfs"], - deps = [ - "//pkg/abi/linux", - "//pkg/fspath", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/usermem", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/fsimpl/memfs/benchmark_test.go b/pkg/sentry/fsimpl/memfs/benchmark_test.go deleted file mode 100644 index a27876a4e..000000000 --- a/pkg/sentry/fsimpl/memfs/benchmark_test.go +++ /dev/null @@ -1,487 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package benchmark_test - -import ( - "fmt" - "runtime" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/memfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// Differences from stat_benchmark: -// -// - Syscall interception, CopyInPath, copyOutStat, and overlayfs overheads are -// not included. -// -// - *MountStat benchmarks use a tmpfs root mount and a tmpfs submount at /tmp. -// Non-MountStat benchmarks use a tmpfs root mount and no submounts. -// stat_benchmark uses a varying root mount, a tmpfs submount at /tmp, and a -// subdirectory /tmp/ (assuming TEST_TMPDIR == "/tmp"). Thus -// stat_benchmark at depth 1 does a comparable amount of work to *MountStat -// benchmarks at depth 2, and non-MountStat benchmarks at depth 3. -var depths = []int{1, 2, 3, 8, 64, 100} - -const ( - mountPointName = "tmp" - filename = "gvisor_test_temp_0_1557494568" -) - -// This is copied from syscalls/linux/sys_file.go, with the dependency on -// kernel.Task stripped out. -func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error { - var ( - d *fs.Dirent // The file. - rel *fs.Dirent // The relative directory for search (if required.) - err error - ) - - // Extract the working directory (maybe). - if len(path) > 0 && path[0] == '/' { - // Absolute path; rel can be nil. - } else if dirFD == linux.AT_FDCWD { - // Need to reference the working directory. - rel = wd - } else { - // Need to extract the given FD. - return syserror.EBADF - } - - // Lookup the node. - remainingTraversals := uint(linux.MaxSymlinkTraversals) - if resolve { - d, err = mntns.FindInode(ctx, root, rel, path, &remainingTraversals) - } else { - d, err = mntns.FindLink(ctx, root, rel, path, &remainingTraversals) - } - if err != nil { - return err - } - - err = fn(root, d) - d.DecRef() - return err -} - -func BenchmarkVFS1TmpfsStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - - // Create VFS. - tmpfsFS, ok := fs.FindFilesystem("tmpfs") - if !ok { - b.Fatalf("failed to find tmpfs filesystem type") - } - rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - mntns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - b.Fatalf("failed to create mount namespace: %v", err) - } - defer mntns.DecRef() - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - root := mntns.Root() - defer root.DecRef() - d := root - d.IncRef() - defer d.DecRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - next, err := d.Walk(ctx, root, name) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - d.DecRef() - d = next - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - file.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - dirPath := false - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR - } - uattr, err := d.Inode.UnstableAttr(ctx) - if err != nil { - return err - } - // Sanity check. - if uattr.Perms.User.Execute { - b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) - } - return nil - }) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func BenchmarkVFS2MemfsStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.GetFilesystemOptions{}) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - defer mntns.DecRef(vfsObj) - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - root := mntns.Root() - defer root.DecRef() - vd := root - vd.IncRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - pop := vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(name), - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - vd.DecRef() - vd = nextVD - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(filename), - FollowFinalSymlink: true, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: 0644, - }) - vd.DecRef() - vd = vfs.VirtualDentry{} - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - defer fd.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filePath), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. - if stat.Mode&^linux.S_IFMT != 0644 { - b.Fatalf("got wrong permissions (%0o)", stat.Mode) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func BenchmarkVFS1TmpfsMountStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - - // Create VFS. - tmpfsFS, ok := fs.FindFilesystem("tmpfs") - if !ok { - b.Fatalf("failed to find tmpfs filesystem type") - } - rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - mntns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - b.Fatalf("failed to create mount namespace: %v", err) - } - defer mntns.DecRef() - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create and mount the submount. - root := mntns.Root() - defer root.DecRef() - if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create mount point: %v", err) - } - mountPoint, err := root.Walk(ctx, root, mountPointName) - if err != nil { - b.Fatalf("failed to walk to mount point: %v", err) - } - defer mountPoint.DecRef() - submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs submount: %v", err) - } - if err := mntns.Mount(ctx, mountPoint, submountInode); err != nil { - b.Fatalf("failed to mount tmpfs submount: %v", err) - } - filePathBuilder.WriteString(mountPointName) - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - d, err := root.Walk(ctx, root, mountPointName) - if err != nil { - b.Fatalf("failed to walk to mount root: %v", err) - } - defer d.DecRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - next, err := d.Walk(ctx, root, name) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - d.DecRef() - d = next - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - file.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - dirPath := false - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR - } - uattr, err := d.Inode.UnstableAttr(ctx) - if err != nil { - return err - } - // Sanity check. - if uattr.Perms.User.Execute { - b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) - } - return nil - }) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func BenchmarkVFS2MemfsMountStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.GetFilesystemOptions{}) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - defer mntns.DecRef(vfsObj) - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create the mount point. - root := mntns.Root() - defer root.DecRef() - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(mountPointName), - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create mount point: %v", err) - } - // Save the mount point for later use. - mountPoint, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to mount point: %v", err) - } - defer mountPoint.DecRef() - // Create and mount the submount. - if err := vfsObj.MountAt(ctx, creds, "", &pop, "memfs", &vfs.MountOptions{}); err != nil { - b.Fatalf("failed to mount tmpfs submount: %v", err) - } - filePathBuilder.WriteString(mountPointName) - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - vd, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to mount root: %v", err) - } - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - pop := vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(name), - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - vd.DecRef() - vd = nextVD - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Verify that we didn't create any directories under the mount - // point (i.e. they were all created on the submount). - firstDirName := fmt.Sprintf("%d", depth) - if child := mountPoint.Dentry().Child(firstDirName); child != nil { - b.Fatalf("created directory %q under root mount, not submount", firstDirName) - } - - // Create the file that will be stat'd. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(filename), - FollowFinalSymlink: true, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: 0644, - }) - vd.DecRef() - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - fd.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filePath), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. - if stat.Mode&^linux.S_IFMT != 0644 { - b.Fatalf("got wrong permissions (%0o)", stat.Mode) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func init() { - // Turn off reference leak checking for a fair comparison between vfs1 and - // vfs2. - refs.SetLeakMode(refs.NoLeakChecking) -} diff --git a/pkg/sentry/fsimpl/memfs/directory.go b/pkg/sentry/fsimpl/memfs/directory.go deleted file mode 100644 index 0bd82e480..000000000 --- a/pkg/sentry/fsimpl/memfs/directory.go +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -type directory struct { - inode inode - - // childList is a list containing (1) child Dentries and (2) fake Dentries - // (with inode == nil) that represent the iteration position of - // directoryFDs. childList is used to support directoryFD.IterDirents() - // efficiently. childList is protected by filesystem.mu. - childList dentryList -} - -func (fs *filesystem) newDirectory(creds *auth.Credentials, mode linux.FileMode) *inode { - dir := &directory{} - dir.inode.init(dir, fs, creds, mode) - dir.inode.nlink = 2 // from "." and parent directory or ".." for root - return &dir.inode -} - -func (i *inode) isDir() bool { - _, ok := i.impl.(*directory) - return ok -} - -type directoryFD struct { - fileDescription - vfs.DirectoryFileDescriptionDefaultImpl - - // Protected by filesystem.mu. - iter *dentry - off int64 -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *directoryFD) Release() { - if fd.iter != nil { - fs := fd.filesystem() - dir := fd.inode().impl.(*directory) - fs.mu.Lock() - dir.childList.Remove(fd.iter) - fs.mu.Unlock() - fd.iter = nil - } -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - fs := fd.filesystem() - vfsd := fd.vfsfd.VirtualDentry().Dentry() - - fs.mu.Lock() - defer fs.mu.Unlock() - - if fd.off == 0 { - if !cb.Handle(vfs.Dirent{ - Name: ".", - Type: linux.DT_DIR, - Ino: vfsd.Impl().(*dentry).inode.ino, - NextOff: 1, - }) { - return nil - } - fd.off++ - } - if fd.off == 1 { - parentInode := vfsd.ParentOrSelf().Impl().(*dentry).inode - if !cb.Handle(vfs.Dirent{ - Name: "..", - Type: parentInode.direntType(), - Ino: parentInode.ino, - NextOff: 2, - }) { - return nil - } - fd.off++ - } - - dir := vfsd.Impl().(*dentry).inode.impl.(*directory) - var child *dentry - if fd.iter == nil { - // Start iteration at the beginning of dir. - child = dir.childList.Front() - fd.iter = &dentry{} - } else { - // Continue iteration from where we left off. - child = fd.iter.Next() - dir.childList.Remove(fd.iter) - } - for child != nil { - // Skip other directoryFD iterators. - if child.inode != nil { - if !cb.Handle(vfs.Dirent{ - Name: child.vfsd.Name(), - Type: child.inode.direntType(), - Ino: child.inode.ino, - NextOff: fd.off + 1, - }) { - dir.childList.InsertBefore(child, fd.iter) - return nil - } - fd.off++ - } - child = child.Next() - } - dir.childList.PushBack(fd.iter) - return nil -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fs := fd.filesystem() - fs.mu.Lock() - defer fs.mu.Unlock() - - switch whence { - case linux.SEEK_SET: - // Use offset as given. - case linux.SEEK_CUR: - offset += fd.off - default: - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - - // If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't - // seek even if doing so might reposition the iterator due to concurrent - // mutation of the directory. Compare fs/libfs.c:dcache_dir_lseek(). - if fd.off == offset { - return offset, nil - } - - fd.off = offset - // Compensate for "." and "..". - remChildren := int64(0) - if offset >= 2 { - remChildren = offset - 2 - } - - dir := fd.inode().impl.(*directory) - - // Ensure that fd.iter exists and is not linked into dir.childList. - if fd.iter == nil { - fd.iter = &dentry{} - } else { - dir.childList.Remove(fd.iter) - } - // Insert fd.iter before the remChildren'th child, or at the end of the - // list if remChildren >= number of children. - child := dir.childList.Front() - for child != nil { - // Skip other directoryFD iterators. - if child.inode != nil { - if remChildren == 0 { - dir.childList.InsertBefore(child, fd.iter) - return offset, nil - } - remChildren-- - } - child = child.Next() - } - dir.childList.PushBack(fd.iter) - return offset, nil -} diff --git a/pkg/sentry/fsimpl/memfs/filesystem.go b/pkg/sentry/fsimpl/memfs/filesystem.go deleted file mode 100644 index b063e09a3..000000000 --- a/pkg/sentry/fsimpl/memfs/filesystem.go +++ /dev/null @@ -1,698 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "fmt" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// Sync implements vfs.FilesystemImpl.Sync. -func (fs *filesystem) Sync(ctx context.Context) error { - // All filesystem state is in-memory. - return nil -} - -// stepLocked resolves rp.Component() to an existing file, starting from the -// given directory. -// -// stepLocked is loosely analogous to fs/namei.c:walk_component(). -// -// Preconditions: filesystem.mu must be locked. !rp.Done(). -func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { - if !d.inode.isDir() { - return nil, syserror.ENOTDIR - } - if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { - return nil, err - } -afterSymlink: - nextVFSD, err := rp.ResolveComponent(&d.vfsd) - if err != nil { - return nil, err - } - if nextVFSD == nil { - // Since the Dentry tree is the sole source of truth for memfs, if it's - // not in the Dentry tree, it doesn't exist. - return nil, syserror.ENOENT - } - next := nextVFSD.Impl().(*dentry) - if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO: symlink traversals update access time - if err := rp.HandleSymlink(symlink.target); err != nil { - return nil, err - } - goto afterSymlink // don't check the current directory again - } - rp.Advance() - return next, nil -} - -// walkParentDirLocked resolves all but the last path component of rp to an -// existing directory, starting from the given directory (which is usually -// rp.Start().Impl().(*dentry)). It does not check that the returned directory -// is searchable by the provider of rp. -// -// walkParentDirLocked is loosely analogous to Linux's -// fs/namei.c:path_parentat(). -// -// Preconditions: filesystem.mu must be locked. !rp.Done(). -func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { - for !rp.Final() { - next, err := stepLocked(rp, d) - if err != nil { - return nil, err - } - d = next - } - if !d.inode.isDir() { - return nil, syserror.ENOTDIR - } - return d, nil -} - -// resolveLocked resolves rp to an existing file. -// -// resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat(). -// -// Preconditions: filesystem.mu must be locked. -func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) { - d := rp.Start().Impl().(*dentry) - for !rp.Done() { - next, err := stepLocked(rp, d) - if err != nil { - return nil, err - } - d = next - } - if rp.MustBeDir() && !d.inode.isDir() { - return nil, syserror.ENOTDIR - } - return d, nil -} - -// doCreateAt checks that creating a file at rp is permitted, then invokes -// create to do so. -// -// doCreateAt is loosely analogous to a conjunction of Linux's -// fs/namei.c:filename_create() and done_path_create(). -// -// Preconditions: !rp.Done(). For the final path component in rp, -// !rp.ShouldFollowSymlink(). -func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error { - fs.mu.Lock() - defer fs.mu.Unlock() - parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return err - } - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - name := rp.Component() - if name == "." || name == ".." { - return syserror.EEXIST - } - // Call parent.vfsd.Child() instead of stepLocked() or rp.ResolveChild(), - // because if the child exists we want to return EEXIST immediately instead - // of attempting symlink/mount traversal. - if parent.vfsd.Child(name) != nil { - return syserror.EEXIST - } - if !dir && rp.MustBeDir() { - return syserror.ENOENT - } - // In memfs, the only way to cause a dentry to be disowned is by removing - // it from the filesystem, so this check is equivalent to checking if - // parent has been removed. - if parent.vfsd.IsDisowned() { - return syserror.ENOENT - } - mnt := rp.Mount() - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - return create(parent, name) -} - -// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. -func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := resolveLocked(rp) - if err != nil { - return nil, err - } - if opts.CheckSearchable { - if !d.inode.isDir() { - return nil, syserror.ENOTDIR - } - if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true /* isDir */); err != nil { - return nil, err - } - } - d.IncRef() - return &d.vfsd, nil -} - -// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. -func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return nil, err - } - d.IncRef() - return &d.vfsd, nil -} - -// LinkAt implements vfs.FilesystemImpl.LinkAt. -func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { - return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { - if rp.Mount() != vd.Mount() { - return syserror.EXDEV - } - d := vd.Dentry().Impl().(*dentry) - if d.inode.isDir() { - return syserror.EPERM - } - if d.inode.nlink == 0 { - return syserror.ENOENT - } - if d.inode.nlink == maxLinks { - return syserror.EMLINK - } - d.inode.incLinksLocked() - child := fs.newDentry(d.inode) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - }) -} - -// MkdirAt implements vfs.FilesystemImpl.MkdirAt. -func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { - return fs.doCreateAt(rp, true /* dir */, func(parent *dentry, name string) error { - if parent.inode.nlink == maxLinks { - return syserror.EMLINK - } - parent.inode.incLinksLocked() // from child's ".." - child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - }) -} - -// MknodAt implements vfs.FilesystemImpl.MknodAt. -func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { - return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { - switch opts.Mode.FileType() { - case 0, linux.S_IFREG: - child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - case linux.S_IFIFO: - child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK: - // Not yet supported. - return syserror.EPERM - default: - return syserror.EINVAL - } - }) -} - -// OpenAt implements vfs.FilesystemImpl.OpenAt. -func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - if opts.Flags&linux.O_TMPFILE != 0 { - // Not yet supported. - return nil, syserror.EOPNOTSUPP - } - - // Handle O_CREAT and !O_CREAT separately, since in the latter case we - // don't need fs.mu for writing. - if opts.Flags&linux.O_CREAT == 0 { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := resolveLocked(rp) - if err != nil { - return nil, err - } - return d.open(ctx, rp, opts.Flags, false /* afterCreate */) - } - - mustCreate := opts.Flags&linux.O_EXCL != 0 - start := rp.Start().Impl().(*dentry) - fs.mu.Lock() - defer fs.mu.Unlock() - if rp.Done() { - // Reject attempts to open directories with O_CREAT. - if rp.MustBeDir() { - return nil, syserror.EISDIR - } - if mustCreate { - return nil, syserror.EEXIST - } - return start.open(ctx, rp, opts.Flags, false /* afterCreate */) - } -afterTrailingSymlink: - parent, err := walkParentDirLocked(rp, start) - if err != nil { - return nil, err - } - // Check for search permission in the parent directory. - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { - return nil, err - } - // Reject attempts to open directories with O_CREAT. - if rp.MustBeDir() { - return nil, syserror.EISDIR - } - name := rp.Component() - if name == "." || name == ".." { - return nil, syserror.EISDIR - } - // Determine whether or not we need to create a file. - child, err := stepLocked(rp, parent) - if err == syserror.ENOENT { - // Already checked for searchability above; now check for writability. - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { - return nil, err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return nil, err - } - defer rp.Mount().EndWrite() - // Create and open the child. - child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return child.open(ctx, rp, opts.Flags, true) - } - if err != nil { - return nil, err - } - // Do we need to resolve a trailing symlink? - if !rp.Done() { - start = parent - goto afterTrailingSymlink - } - // Open existing file. - if mustCreate { - return nil, syserror.EEXIST - } - return child.open(ctx, rp, opts.Flags, false) -} - -func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { - ats := vfs.AccessTypesForOpenFlags(flags) - if !afterCreate { - if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil { - return nil, err - } - } - mnt := rp.Mount() - switch impl := d.inode.impl.(type) { - case *regularFile: - var fd regularFileFD - fd.readable = vfs.MayReadFileWithOpenFlags(flags) - fd.writable = vfs.MayWriteFileWithOpenFlags(flags) - if fd.writable { - if err := mnt.CheckBeginWrite(); err != nil { - return nil, err - } - // mnt.EndWrite() is called by regularFileFD.Release(). - } - fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) - if flags&linux.O_TRUNC != 0 { - impl.mu.Lock() - impl.data = impl.data[:0] - atomic.StoreInt64(&impl.dataLen, 0) - impl.mu.Unlock() - } - return &fd.vfsfd, nil - case *directory: - // Can't open directories writably. - if ats&vfs.MayWrite != 0 { - return nil, syserror.EISDIR - } - var fd directoryFD - fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) - return &fd.vfsfd, nil - case *symlink: - // Can't open symlinks without O_PATH (which is unimplemented). - return nil, syserror.ELOOP - case *namedPipe: - return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags) - default: - panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl)) - } -} - -// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. -func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := resolveLocked(rp) - if err != nil { - return "", err - } - symlink, ok := d.inode.impl.(*symlink) - if !ok { - return "", syserror.EINVAL - } - return symlink.target, nil -} - -// RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { - if opts.Flags != 0 { - // TODO(b/145974740): Support renameat2 flags. - return syserror.EINVAL - } - - // Resolve newParent first to verify that it's on this Mount. - fs.mu.Lock() - defer fs.mu.Unlock() - newParent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return err - } - newName := rp.Component() - if newName == "." || newName == ".." { - return syserror.EBUSY - } - mnt := rp.Mount() - if mnt != oldParentVD.Mount() { - return syserror.EXDEV - } - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - - oldParent := oldParentVD.Dentry().Impl().(*dentry) - if err := oldParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - // Call vfs.Dentry.Child() instead of stepLocked() or rp.ResolveChild(), - // because if the existing child is a symlink or mount point then we want - // to rename over it rather than follow it. - renamedVFSD := oldParent.vfsd.Child(oldName) - if renamedVFSD == nil { - return syserror.ENOENT - } - renamed := renamedVFSD.Impl().(*dentry) - if renamed.inode.isDir() { - if renamed == newParent || renamedVFSD.IsAncestorOf(&newParent.vfsd) { - return syserror.EINVAL - } - if oldParent != newParent { - // Writability is needed to change renamed's "..". - if err := renamed.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true /* isDir */); err != nil { - return err - } - } - } else { - if opts.MustBeDir || rp.MustBeDir() { - return syserror.ENOTDIR - } - } - - if err := newParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - replacedVFSD := newParent.vfsd.Child(newName) - var replaced *dentry - if replacedVFSD != nil { - replaced = replacedVFSD.Impl().(*dentry) - if replaced.inode.isDir() { - if !renamed.inode.isDir() { - return syserror.EISDIR - } - if replaced.vfsd.HasChildren() { - return syserror.ENOTEMPTY - } - } else { - if rp.MustBeDir() { - return syserror.ENOTDIR - } - if renamed.inode.isDir() { - return syserror.ENOTDIR - } - } - } else { - if renamed.inode.isDir() && newParent.inode.nlink == maxLinks { - return syserror.EMLINK - } - } - if newParent.vfsd.IsDisowned() { - return syserror.ENOENT - } - - // Linux places this check before some of those above; we do it here for - // simplicity, under the assumption that applications are not intentionally - // doing noop renames expecting them to succeed where non-noop renames - // would fail. - if renamedVFSD == replacedVFSD { - return nil - } - vfsObj := rp.VirtualFilesystem() - oldParentDir := oldParent.inode.impl.(*directory) - newParentDir := newParent.inode.impl.(*directory) - if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil { - return err - } - if replaced != nil { - newParentDir.childList.Remove(replaced) - if replaced.inode.isDir() { - newParent.inode.decLinksLocked() // from replaced's ".." - } - replaced.inode.decLinksLocked() - } - oldParentDir.childList.Remove(renamed) - newParentDir.childList.PushBack(renamed) - if renamed.inode.isDir() { - oldParent.inode.decLinksLocked() - newParent.inode.incLinksLocked() - } - // TODO: update timestamps and parent directory sizes - vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD) - return nil -} - -// RmdirAt implements vfs.FilesystemImpl.RmdirAt. -func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { - fs.mu.Lock() - defer fs.mu.Unlock() - parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return err - } - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - name := rp.Component() - if name == "." { - return syserror.EINVAL - } - if name == ".." { - return syserror.ENOTEMPTY - } - childVFSD := parent.vfsd.Child(name) - if childVFSD == nil { - return syserror.ENOENT - } - child := childVFSD.Impl().(*dentry) - if !child.inode.isDir() { - return syserror.ENOTDIR - } - if childVFSD.HasChildren() { - return syserror.ENOTEMPTY - } - mnt := rp.Mount() - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - vfsObj := rp.VirtualFilesystem() - if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { - return err - } - parent.inode.impl.(*directory).childList.Remove(child) - parent.inode.decLinksLocked() // from child's ".." - child.inode.decLinksLocked() - vfsObj.CommitDeleteDentry(childVFSD) - return nil -} - -// SetStatAt implements vfs.FilesystemImpl.SetStatAt. -func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return err - } - if opts.Stat.Mask == 0 { - return nil - } - // TODO: implement inode.setStat - return syserror.EPERM -} - -// StatAt implements vfs.FilesystemImpl.StatAt. -func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := resolveLocked(rp) - if err != nil { - return linux.Statx{}, err - } - var stat linux.Statx - d.inode.statTo(&stat) - return stat, nil -} - -// StatFSAt implements vfs.FilesystemImpl.StatFSAt. -func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return linux.Statfs{}, err - } - // TODO: actually implement statfs - return linux.Statfs{}, syserror.ENOSYS -} - -// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. -func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { - return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { - child := fs.newDentry(fs.newSymlink(rp.Credentials(), target)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - }) -} - -// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. -func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { - fs.mu.Lock() - defer fs.mu.Unlock() - parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return err - } - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - name := rp.Component() - if name == "." || name == ".." { - return syserror.EISDIR - } - childVFSD := parent.vfsd.Child(name) - if childVFSD == nil { - return syserror.ENOENT - } - child := childVFSD.Impl().(*dentry) - if child.inode.isDir() { - return syserror.EISDIR - } - if !rp.MustBeDir() { - return syserror.ENOTDIR - } - mnt := rp.Mount() - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - vfsObj := rp.VirtualFilesystem() - if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { - return err - } - parent.inode.impl.(*directory).childList.Remove(child) - child.inode.decLinksLocked() - vfsObj.CommitDeleteDentry(childVFSD) - return nil -} - -// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return nil, err - } - // TODO(b/127675828): support extended attributes - return nil, syserror.ENOTSUP -} - -// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return "", err - } - // TODO(b/127675828): support extended attributes - return "", syserror.ENOTSUP -} - -// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. -func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return err - } - // TODO(b/127675828): support extended attributes - return syserror.ENOTSUP -} - -// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. -func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return err - } - // TODO(b/127675828): support extended attributes - return syserror.ENOTSUP -} - -// PrependPath implements vfs.FilesystemImpl.PrependPath. -func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - return vfs.GenericPrependPath(vfsroot, vd, b) -} diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go deleted file mode 100644 index 8d0167c93..000000000 --- a/pkg/sentry/fsimpl/memfs/memfs.go +++ /dev/null @@ -1,293 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package memfs provides a filesystem implementation that behaves like tmpfs: -// the Dentry tree is the sole source of truth for the state of the filesystem. -// -// memfs is intended primarily to demonstrate filesystem implementation -// patterns. Real uses cases for an in-memory filesystem should use tmpfs -// instead. -// -// Lock order: -// -// filesystem.mu -// regularFileFD.offMu -// regularFile.mu -// inode.mu -package memfs - -import ( - "fmt" - "math" - "sync" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// FilesystemType implements vfs.FilesystemType. -type FilesystemType struct{} - -// filesystem implements vfs.FilesystemImpl. -type filesystem struct { - vfsfs vfs.Filesystem - - // mu serializes changes to the Dentry tree. - mu sync.RWMutex - - nextInoMinusOne uint64 // accessed using atomic memory operations -} - -// GetFilesystem implements vfs.FilesystemType.GetFilesystem. -func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - var fs filesystem - fs.vfsfs.Init(vfsObj, &fs) - root := fs.newDentry(fs.newDirectory(creds, 01777)) - return &fs.vfsfs, &root.vfsd, nil -} - -// Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { -} - -// dentry implements vfs.DentryImpl. -type dentry struct { - vfsd vfs.Dentry - - // inode is the inode represented by this dentry. Multiple Dentries may - // share a single non-directory inode (with hard links). inode is - // immutable. - inode *inode - - // memfs doesn't count references on dentries; because the dentry tree is - // the sole source of truth, it is by definition always consistent with the - // state of the filesystem. However, it does count references on inodes, - // because inode resources are released when all references are dropped. - // (memfs doesn't really have resources to release, but we implement - // reference counting because tmpfs regular files will.) - - // dentryEntry (ugh) links dentries into their parent directory.childList. - dentryEntry -} - -func (fs *filesystem) newDentry(inode *inode) *dentry { - d := &dentry{ - inode: inode, - } - d.vfsd.Init(d) - return d -} - -// IncRef implements vfs.DentryImpl.IncRef. -func (d *dentry) IncRef() { - d.inode.incRef() -} - -// TryIncRef implements vfs.DentryImpl.TryIncRef. -func (d *dentry) TryIncRef() bool { - return d.inode.tryIncRef() -} - -// DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef() { - d.inode.decRef() -} - -// inode represents a filesystem object. -type inode struct { - // refs is a reference count. refs is accessed using atomic memory - // operations. - // - // A reference is held on all inodes that are reachable in the filesystem - // tree. For non-directories (which may have multiple hard links), this - // means that a reference is dropped when nlink reaches 0. For directories, - // nlink never reaches 0 due to the "." entry; instead, - // filesystem.RmdirAt() drops the reference. - refs int64 - - // Inode metadata; protected by mu and accessed using atomic memory - // operations unless otherwise specified. - mu sync.RWMutex - mode uint32 // excluding file type bits, which are based on impl - nlink uint32 // protected by filesystem.mu instead of inode.mu - uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic - gid uint32 // auth.KGID, but ... - ino uint64 // immutable - - impl interface{} // immutable -} - -const maxLinks = math.MaxUint32 - -func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) { - i.refs = 1 - i.mode = uint32(mode) - i.uid = uint32(creds.EffectiveKUID) - i.gid = uint32(creds.EffectiveKGID) - i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1) - // i.nlink initialized by caller - i.impl = impl -} - -// incLinksLocked increments i's link count. -// -// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. -// i.nlink < maxLinks. -func (i *inode) incLinksLocked() { - if i.nlink == 0 { - panic("memfs.inode.incLinksLocked() called with no existing links") - } - if i.nlink == maxLinks { - panic("memfs.inode.incLinksLocked() called with maximum link count") - } - atomic.AddUint32(&i.nlink, 1) -} - -// decLinksLocked decrements i's link count. -// -// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. -func (i *inode) decLinksLocked() { - if i.nlink == 0 { - panic("memfs.inode.decLinksLocked() called with no existing links") - } - atomic.AddUint32(&i.nlink, ^uint32(0)) -} - -func (i *inode) incRef() { - if atomic.AddInt64(&i.refs, 1) <= 1 { - panic("memfs.inode.incRef() called without holding a reference") - } -} - -func (i *inode) tryIncRef() bool { - for { - refs := atomic.LoadInt64(&i.refs) - if refs == 0 { - return false - } - if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) { - return true - } - } -} - -func (i *inode) decRef() { - if refs := atomic.AddInt64(&i.refs, -1); refs == 0 { - // This is unnecessary; it's mostly to simulate what tmpfs would do. - if regfile, ok := i.impl.(*regularFile); ok { - regfile.mu.Lock() - regfile.data = nil - atomic.StoreInt64(®file.dataLen, 0) - regfile.mu.Unlock() - } - } else if refs < 0 { - panic("memfs.inode.decRef() called without holding a reference") - } -} - -func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error { - return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))) -} - -// Go won't inline this function, and returning linux.Statx (which is quite -// big) means spending a lot of time in runtime.duffcopy(), so instead it's an -// output parameter. -func (i *inode) statTo(stat *linux.Statx) { - stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO - stat.Blksize = 1 // usermem.PageSize in tmpfs - stat.Nlink = atomic.LoadUint32(&i.nlink) - stat.UID = atomic.LoadUint32(&i.uid) - stat.GID = atomic.LoadUint32(&i.gid) - stat.Mode = uint16(atomic.LoadUint32(&i.mode)) - stat.Ino = i.ino - // TODO: device number - switch impl := i.impl.(type) { - case *regularFile: - stat.Mode |= linux.S_IFREG - stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS - stat.Size = uint64(atomic.LoadInt64(&impl.dataLen)) - // In tmpfs, this will be FileRangeSet.Span() / 512 (but also cached in - // a uint64 accessed using atomic memory operations to avoid taking - // locks). - stat.Blocks = allocatedBlocksForSize(stat.Size) - case *directory: - stat.Mode |= linux.S_IFDIR - case *symlink: - stat.Mode |= linux.S_IFLNK - stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS - stat.Size = uint64(len(impl.target)) - stat.Blocks = allocatedBlocksForSize(stat.Size) - case *namedPipe: - stat.Mode |= linux.S_IFIFO - default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) - } -} - -// allocatedBlocksForSize returns the number of 512B blocks needed to -// accommodate the given size in bytes, as appropriate for struct -// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block -// size is independent of the "preferred block size for I/O", struct -// stat::st_blksize and struct statx::stx_blksize.) -func allocatedBlocksForSize(size uint64) uint64 { - return (size + 511) / 512 -} - -func (i *inode) direntType() uint8 { - switch i.impl.(type) { - case *regularFile: - return linux.DT_REG - case *directory: - return linux.DT_DIR - case *symlink: - return linux.DT_LNK - default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) - } -} - -// fileDescription is embedded by memfs implementations of -// vfs.FileDescriptionImpl. -type fileDescription struct { - vfsfd vfs.FileDescription - vfs.FileDescriptionDefaultImpl -} - -func (fd *fileDescription) filesystem() *filesystem { - return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) -} - -func (fd *fileDescription) inode() *inode { - return fd.vfsfd.Dentry().Impl().(*dentry).inode -} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - var stat linux.Statx - fd.inode().statTo(&stat) - return stat, nil -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - if opts.Stat.Mask == 0 { - return nil - } - // TODO: implement inode.setStat - return syserror.EPERM -} diff --git a/pkg/sentry/fsimpl/memfs/named_pipe.go b/pkg/sentry/fsimpl/memfs/named_pipe.go deleted file mode 100644 index b5a204438..000000000 --- a/pkg/sentry/fsimpl/memfs/named_pipe.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -type namedPipe struct { - inode inode - - pipe *pipe.VFSPipe -} - -// Preconditions: -// * fs.mu must be locked. -// * rp.Mount().CheckBeginWrite() has been called successfully. -func (fs *filesystem) newNamedPipe(creds *auth.Credentials, mode linux.FileMode) *inode { - file := &namedPipe{pipe: pipe.NewVFSPipe(pipe.DefaultPipeSize, usermem.PageSize)} - file.inode.init(file, fs, creds, mode) - file.inode.nlink = 1 // Only the parent has a link. - return &file.inode -} - -// namedPipeFD implements vfs.FileDescriptionImpl. Methods are implemented -// entirely via struct embedding. -type namedPipeFD struct { - fileDescription - - *pipe.VFSPipeFD -} - -func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { - var err error - var fd namedPipeFD - fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, rp, vfsd, &fd.vfsfd, flags) - if err != nil { - return nil, err - } - mnt := rp.Mount() - fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) - return &fd.vfsfd, nil -} diff --git a/pkg/sentry/fsimpl/memfs/pipe_test.go b/pkg/sentry/fsimpl/memfs/pipe_test.go deleted file mode 100644 index 807c1af7a..000000000 --- a/pkg/sentry/fsimpl/memfs/pipe_test.go +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "bytes" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -const fileName = "mypipe" - -func TestSeparateFDs(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() - - // Open the read side. This is done in a concurrently because opening - // One end the pipe blocks until the other end is opened. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - rfdchan := make(chan *vfs.FileDescription) - go func() { - openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY} - rfd, _ := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - rfdchan <- rfd - }() - - // Open the write side. - openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY} - wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) - } - defer wfd.DecRef() - - rfd, ok := <-rfdchan - if !ok { - t.Fatalf("failed to open pipe for reading %q", fileName) - } - defer rfd.DecRef() - - const msg = "vamos azul" - checkEmpty(ctx, t, rfd) - checkWrite(ctx, t, wfd, msg) - checkRead(ctx, t, rfd, msg) -} - -func TestNonblockingRead(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() - - // Open the read side as nonblocking. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK} - rfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for reading %q: %v", fileName, err) - } - defer rfd.DecRef() - - // Open the write side. - openOpts = vfs.OpenOptions{Flags: linux.O_WRONLY} - wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) - } - defer wfd.DecRef() - - const msg = "geh blau" - checkEmpty(ctx, t, rfd) - checkWrite(ctx, t, wfd, msg) - checkRead(ctx, t, rfd, msg) -} - -func TestNonblockingWriteError(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() - - // Open the write side as nonblocking, which should return ENXIO. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK} - _, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != syserror.ENXIO { - t.Fatalf("expected ENXIO, but got error: %v", err) - } -} - -func TestSingleFD(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() - - // Open the pipe as readable and writable. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - openOpts := vfs.OpenOptions{Flags: linux.O_RDWR} - fd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) - } - defer fd.DecRef() - - const msg = "forza blu" - checkEmpty(ctx, t, fd) - checkWrite(ctx, t, fd, msg) - checkRead(ctx, t, fd, msg) -} - -// setup creates a VFS with a pipe in the root directory at path fileName. The -// returned VirtualDentry must be DecRef()'d be the caller. It calls t.Fatal -// upon failure. -func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesystem, vfs.VirtualDentry) { - ctx := contexttest.Context(t) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("memfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.GetFilesystemOptions{}) - if err != nil { - t.Fatalf("failed to create tmpfs root mount: %v", err) - } - - // Create the pipe. - root := mntns.Root() - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - } - mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644} - if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil { - t.Fatalf("failed to create file %q: %v", fileName, err) - } - - // Sanity check: the file pipe exists and has the correct mode. - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - t.Fatalf("stat(%q) failed: %v", fileName, err) - } - if stat.Mode&^linux.S_IFMT != 0644 { - t.Errorf("got wrong permissions (%0o)", stat.Mode) - } - if stat.Mode&linux.S_IFMT != linux.ModeNamedPipe { - t.Errorf("got wrong file type (%0o)", stat.Mode) - } - - return ctx, creds, vfsObj, root -} - -// checkEmpty calls t.Fatal if the pipe in fd is not empty. -func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) { - readData := make([]byte, 1) - dst := usermem.BytesIOSequence(readData) - bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{}) - if err != syserror.ErrWouldBlock { - t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err) - } - if bytesRead != 0 { - t.Fatalf("expected to read 0 bytes, but got %d", bytesRead) - } -} - -// checkWrite calls t.Fatal if it fails to write all of msg to fd. -func checkWrite(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) { - writeData := []byte(msg) - src := usermem.BytesIOSequence(writeData) - bytesWritten, err := fd.Write(ctx, src, vfs.WriteOptions{}) - if err != nil { - t.Fatalf("error writing to pipe %q: %v", fileName, err) - } - if bytesWritten != int64(len(writeData)) { - t.Fatalf("expected to write %d bytes, but wrote %d", len(writeData), bytesWritten) - } -} - -// checkRead calls t.Fatal if it fails to read msg from fd. -func checkRead(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) { - readData := make([]byte, len(msg)) - dst := usermem.BytesIOSequence(readData) - bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{}) - if err != nil { - t.Fatalf("error reading from pipe %q: %v", fileName, err) - } - if bytesRead != int64(len(msg)) { - t.Fatalf("expected to read %d bytes, but got %d", len(msg), bytesRead) - } - if !bytes.Equal(readData, []byte(msg)) { - t.Fatalf("expected to read %q from pipe, but got %q", msg, string(readData)) - } -} diff --git a/pkg/sentry/fsimpl/memfs/regular_file.go b/pkg/sentry/fsimpl/memfs/regular_file.go deleted file mode 100644 index b7f4853b3..000000000 --- a/pkg/sentry/fsimpl/memfs/regular_file.go +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "io" - "sync" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -type regularFile struct { - inode inode - - mu sync.RWMutex - data []byte - // dataLen is len(data), but accessed using atomic memory operations to - // avoid locking in inode.stat(). - dataLen int64 -} - -func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode { - file := ®ularFile{} - file.inode.init(file, fs, creds, mode) - file.inode.nlink = 1 // from parent directory - return &file.inode -} - -type regularFileFD struct { - fileDescription - - // These are immutable. - readable bool - writable bool - - // off is the file offset. off is accessed using atomic memory operations. - // offMu serializes operations that may mutate off. - off int64 - offMu sync.Mutex -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *regularFileFD) Release() { - if fd.writable { - fd.vfsfd.VirtualDentry().Mount().EndWrite() - } -} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - if !fd.readable { - return 0, syserror.EINVAL - } - f := fd.inode().impl.(*regularFile) - f.mu.RLock() - if offset >= int64(len(f.data)) { - f.mu.RUnlock() - return 0, io.EOF - } - n, err := dst.CopyOut(ctx, f.data[offset:]) - f.mu.RUnlock() - return int64(n), err -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - fd.offMu.Lock() - n, err := fd.PRead(ctx, dst, fd.off, opts) - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - if !fd.writable { - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - srclen := src.NumBytes() - if srclen == 0 { - return 0, nil - } - f := fd.inode().impl.(*regularFile) - f.mu.Lock() - end := offset + srclen - if end < offset { - // Overflow. - f.mu.Unlock() - return 0, syserror.EFBIG - } - if end > f.dataLen { - f.data = append(f.data, make([]byte, end-f.dataLen)...) - atomic.StoreInt64(&f.dataLen, end) - } - n, err := src.CopyIn(ctx, f.data[offset:end]) - f.mu.Unlock() - return int64(n), err -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - fd.offMu.Lock() - n, err := fd.PWrite(ctx, src, fd.off, opts) - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fd.offMu.Lock() - defer fd.offMu.Unlock() - switch whence { - case linux.SEEK_SET: - // use offset as specified - case linux.SEEK_CUR: - offset += fd.off - case linux.SEEK_END: - offset += atomic.LoadInt64(&fd.inode().impl.(*regularFile).dataLen) - default: - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - fd.off = offset - return offset, nil -} - -// Sync implements vfs.FileDescriptionImpl.Sync. -func (fd *regularFileFD) Sync(ctx context.Context) error { - return nil -} diff --git a/pkg/sentry/fsimpl/memfs/symlink.go b/pkg/sentry/fsimpl/memfs/symlink.go deleted file mode 100644 index b2ac2cbeb..000000000 --- a/pkg/sentry/fsimpl/memfs/symlink.go +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package memfs - -import ( - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" -) - -type symlink struct { - inode inode - target string // immutable -} - -func (fs *filesystem) newSymlink(creds *auth.Credentials, target string) *inode { - link := &symlink{ - target: target, - } - link.inode.init(link, fs, creds, 0777) - link.inode.nlink = 1 // from parent directory - return &link.inode -} - -// O_PATH is unimplemented, so there's no way to get a FileDescription -// representing a symlink yet. diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD new file mode 100644 index 000000000..a5b285987 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -0,0 +1,92 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "dentry_list", + out = "dentry_list.go", + package = "tmpfs", + prefix = "dentry", + template = "//pkg/ilist:generic_list", + types = { + "Element": "*dentry", + "Linker": "*dentry", + }, +) + +go_library( + name = "tmpfs", + srcs = [ + "dentry_list.go", + "directory.go", + "filesystem.go", + "named_pipe.go", + "regular_file.go", + "symlink.go", + "tmpfs.go", + ], + importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs", + deps = [ + "//pkg/abi/linux", + "//pkg/amutex", + "//pkg/fspath", + "//pkg/log", + "//pkg/sentry/arch", + "//pkg/sentry/context", + "//pkg/sentry/fs", + "//pkg/sentry/fs/fsutil", + "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/pipe", + "//pkg/sentry/memmap", + "//pkg/sentry/pgalloc", + "//pkg/sentry/platform", + "//pkg/sentry/safemem", + "//pkg/sentry/usage", + "//pkg/sentry/usermem", + "//pkg/sentry/vfs", + "//pkg/syserror", + ], +) + +go_test( + name = "benchmark_test", + size = "small", + srcs = ["benchmark_test.go"], + deps = [ + ":tmpfs", + "//pkg/abi/linux", + "//pkg/fspath", + "//pkg/refs", + "//pkg/sentry/context", + "//pkg/sentry/context/contexttest", + "//pkg/sentry/fs", + "//pkg/sentry/fs/tmpfs", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/vfs", + "//pkg/syserror", + ], +) + +go_test( + name = "tmpfs_test", + size = "small", + srcs = [ + "pipe_test.go", + "regular_file_test.go", + ], + embed = [":tmpfs"], + deps = [ + "//pkg/abi/linux", + "//pkg/fspath", + "//pkg/sentry/context", + "//pkg/sentry/context/contexttest", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/contexttest", + "//pkg/sentry/usermem", + "//pkg/sentry/vfs", + "//pkg/syserror", + ], +) diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go new file mode 100644 index 000000000..d88c83499 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go @@ -0,0 +1,487 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package benchmark_test + +import ( + "fmt" + "runtime" + "strings" + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/fs" + _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Differences from stat_benchmark: +// +// - Syscall interception, CopyInPath, copyOutStat, and overlayfs overheads are +// not included. +// +// - *MountStat benchmarks use a tmpfs root mount and a tmpfs submount at /tmp. +// Non-MountStat benchmarks use a tmpfs root mount and no submounts. +// stat_benchmark uses a varying root mount, a tmpfs submount at /tmp, and a +// subdirectory /tmp/ (assuming TEST_TMPDIR == "/tmp"). Thus +// stat_benchmark at depth 1 does a comparable amount of work to *MountStat +// benchmarks at depth 2, and non-MountStat benchmarks at depth 3. +var depths = []int{1, 2, 3, 8, 64, 100} + +const ( + mountPointName = "tmp" + filename = "gvisor_test_temp_0_1557494568" +) + +// This is copied from syscalls/linux/sys_file.go, with the dependency on +// kernel.Task stripped out. +func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error { + var ( + d *fs.Dirent // The file. + rel *fs.Dirent // The relative directory for search (if required.) + err error + ) + + // Extract the working directory (maybe). + if len(path) > 0 && path[0] == '/' { + // Absolute path; rel can be nil. + } else if dirFD == linux.AT_FDCWD { + // Need to reference the working directory. + rel = wd + } else { + // Need to extract the given FD. + return syserror.EBADF + } + + // Lookup the node. + remainingTraversals := uint(linux.MaxSymlinkTraversals) + if resolve { + d, err = mntns.FindInode(ctx, root, rel, path, &remainingTraversals) + } else { + d, err = mntns.FindLink(ctx, root, rel, path, &remainingTraversals) + } + if err != nil { + return err + } + + err = fn(root, d) + d.DecRef() + return err +} + +func BenchmarkVFS1TmpfsStat(b *testing.B) { + for _, depth := range depths { + b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { + ctx := contexttest.Context(b) + + // Create VFS. + tmpfsFS, ok := fs.FindFilesystem("tmpfs") + if !ok { + b.Fatalf("failed to find tmpfs filesystem type") + } + rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) + if err != nil { + b.Fatalf("failed to create tmpfs root mount: %v", err) + } + mntns, err := fs.NewMountNamespace(ctx, rootInode) + if err != nil { + b.Fatalf("failed to create mount namespace: %v", err) + } + defer mntns.DecRef() + + var filePathBuilder strings.Builder + filePathBuilder.WriteByte('/') + + // Create nested directories with given depth. + root := mntns.Root() + defer root.DecRef() + d := root + d.IncRef() + defer d.DecRef() + for i := depth; i > 0; i-- { + name := fmt.Sprintf("%d", i) + if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { + b.Fatalf("failed to create directory %q: %v", name, err) + } + next, err := d.Walk(ctx, root, name) + if err != nil { + b.Fatalf("failed to walk to directory %q: %v", name, err) + } + d.DecRef() + d = next + filePathBuilder.WriteString(name) + filePathBuilder.WriteByte('/') + } + + // Create the file that will be stat'd. + file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) + if err != nil { + b.Fatalf("failed to create file %q: %v", filename, err) + } + file.DecRef() + filePathBuilder.WriteString(filename) + filePath := filePathBuilder.String() + + dirPath := false + runtime.GC() + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + uattr, err := d.Inode.UnstableAttr(ctx) + if err != nil { + return err + } + // Sanity check. + if uattr.Perms.User.Execute { + b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) + } + return nil + }) + if err != nil { + b.Fatalf("stat(%q) failed: %v", filePath, err) + } + } + // Don't include deferred cleanup in benchmark time. + b.StopTimer() + }) + } +} + +func BenchmarkVFS2MemfsStat(b *testing.B) { + for _, depth := range depths { + b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { + ctx := contexttest.Context(b) + creds := auth.CredentialsFromContext(ctx) + + // Create VFS. + vfsObj := vfs.New() + vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + }) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) + if err != nil { + b.Fatalf("failed to create tmpfs root mount: %v", err) + } + defer mntns.DecRef(vfsObj) + + var filePathBuilder strings.Builder + filePathBuilder.WriteByte('/') + + // Create nested directories with given depth. + root := mntns.Root() + defer root.DecRef() + vd := root + vd.IncRef() + for i := depth; i > 0; i-- { + name := fmt.Sprintf("%d", i) + pop := vfs.PathOperation{ + Root: root, + Start: vd, + Path: fspath.Parse(name), + } + if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ + Mode: 0755, + }); err != nil { + b.Fatalf("failed to create directory %q: %v", name, err) + } + nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) + if err != nil { + b.Fatalf("failed to walk to directory %q: %v", name, err) + } + vd.DecRef() + vd = nextVD + filePathBuilder.WriteString(name) + filePathBuilder.WriteByte('/') + } + + // Create the file that will be stat'd. + fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: vd, + Path: fspath.Parse(filename), + FollowFinalSymlink: true, + }, &vfs.OpenOptions{ + Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, + Mode: 0644, + }) + vd.DecRef() + vd = vfs.VirtualDentry{} + if err != nil { + b.Fatalf("failed to create file %q: %v", filename, err) + } + defer fd.DecRef() + filePathBuilder.WriteString(filename) + filePath := filePathBuilder.String() + + runtime.GC() + b.ResetTimer() + for i := 0; i < b.N; i++ { + stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(filePath), + FollowFinalSymlink: true, + }, &vfs.StatOptions{}) + if err != nil { + b.Fatalf("stat(%q) failed: %v", filePath, err) + } + // Sanity check. + if stat.Mode&^linux.S_IFMT != 0644 { + b.Fatalf("got wrong permissions (%0o)", stat.Mode) + } + } + // Don't include deferred cleanup in benchmark time. + b.StopTimer() + }) + } +} + +func BenchmarkVFS1TmpfsMountStat(b *testing.B) { + for _, depth := range depths { + b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { + ctx := contexttest.Context(b) + + // Create VFS. + tmpfsFS, ok := fs.FindFilesystem("tmpfs") + if !ok { + b.Fatalf("failed to find tmpfs filesystem type") + } + rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) + if err != nil { + b.Fatalf("failed to create tmpfs root mount: %v", err) + } + mntns, err := fs.NewMountNamespace(ctx, rootInode) + if err != nil { + b.Fatalf("failed to create mount namespace: %v", err) + } + defer mntns.DecRef() + + var filePathBuilder strings.Builder + filePathBuilder.WriteByte('/') + + // Create and mount the submount. + root := mntns.Root() + defer root.DecRef() + if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil { + b.Fatalf("failed to create mount point: %v", err) + } + mountPoint, err := root.Walk(ctx, root, mountPointName) + if err != nil { + b.Fatalf("failed to walk to mount point: %v", err) + } + defer mountPoint.DecRef() + submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) + if err != nil { + b.Fatalf("failed to create tmpfs submount: %v", err) + } + if err := mntns.Mount(ctx, mountPoint, submountInode); err != nil { + b.Fatalf("failed to mount tmpfs submount: %v", err) + } + filePathBuilder.WriteString(mountPointName) + filePathBuilder.WriteByte('/') + + // Create nested directories with given depth. + d, err := root.Walk(ctx, root, mountPointName) + if err != nil { + b.Fatalf("failed to walk to mount root: %v", err) + } + defer d.DecRef() + for i := depth; i > 0; i-- { + name := fmt.Sprintf("%d", i) + if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { + b.Fatalf("failed to create directory %q: %v", name, err) + } + next, err := d.Walk(ctx, root, name) + if err != nil { + b.Fatalf("failed to walk to directory %q: %v", name, err) + } + d.DecRef() + d = next + filePathBuilder.WriteString(name) + filePathBuilder.WriteByte('/') + } + + // Create the file that will be stat'd. + file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) + if err != nil { + b.Fatalf("failed to create file %q: %v", filename, err) + } + file.DecRef() + filePathBuilder.WriteString(filename) + filePath := filePathBuilder.String() + + dirPath := false + runtime.GC() + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + uattr, err := d.Inode.UnstableAttr(ctx) + if err != nil { + return err + } + // Sanity check. + if uattr.Perms.User.Execute { + b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) + } + return nil + }) + if err != nil { + b.Fatalf("stat(%q) failed: %v", filePath, err) + } + } + // Don't include deferred cleanup in benchmark time. + b.StopTimer() + }) + } +} + +func BenchmarkVFS2MemfsMountStat(b *testing.B) { + for _, depth := range depths { + b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { + ctx := contexttest.Context(b) + creds := auth.CredentialsFromContext(ctx) + + // Create VFS. + vfsObj := vfs.New() + vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + }) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) + if err != nil { + b.Fatalf("failed to create tmpfs root mount: %v", err) + } + defer mntns.DecRef(vfsObj) + + var filePathBuilder strings.Builder + filePathBuilder.WriteByte('/') + + // Create the mount point. + root := mntns.Root() + defer root.DecRef() + pop := vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(mountPointName), + } + if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ + Mode: 0755, + }); err != nil { + b.Fatalf("failed to create mount point: %v", err) + } + // Save the mount point for later use. + mountPoint, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) + if err != nil { + b.Fatalf("failed to walk to mount point: %v", err) + } + defer mountPoint.DecRef() + // Create and mount the submount. + if err := vfsObj.MountAt(ctx, creds, "", &pop, "tmpfs", &vfs.MountOptions{}); err != nil { + b.Fatalf("failed to mount tmpfs submount: %v", err) + } + filePathBuilder.WriteString(mountPointName) + filePathBuilder.WriteByte('/') + + // Create nested directories with given depth. + vd, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) + if err != nil { + b.Fatalf("failed to walk to mount root: %v", err) + } + for i := depth; i > 0; i-- { + name := fmt.Sprintf("%d", i) + pop := vfs.PathOperation{ + Root: root, + Start: vd, + Path: fspath.Parse(name), + } + if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ + Mode: 0755, + }); err != nil { + b.Fatalf("failed to create directory %q: %v", name, err) + } + nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) + if err != nil { + b.Fatalf("failed to walk to directory %q: %v", name, err) + } + vd.DecRef() + vd = nextVD + filePathBuilder.WriteString(name) + filePathBuilder.WriteByte('/') + } + + // Verify that we didn't create any directories under the mount + // point (i.e. they were all created on the submount). + firstDirName := fmt.Sprintf("%d", depth) + if child := mountPoint.Dentry().Child(firstDirName); child != nil { + b.Fatalf("created directory %q under root mount, not submount", firstDirName) + } + + // Create the file that will be stat'd. + fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: vd, + Path: fspath.Parse(filename), + FollowFinalSymlink: true, + }, &vfs.OpenOptions{ + Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, + Mode: 0644, + }) + vd.DecRef() + if err != nil { + b.Fatalf("failed to create file %q: %v", filename, err) + } + fd.DecRef() + filePathBuilder.WriteString(filename) + filePath := filePathBuilder.String() + + runtime.GC() + b.ResetTimer() + for i := 0; i < b.N; i++ { + stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(filePath), + FollowFinalSymlink: true, + }, &vfs.StatOptions{}) + if err != nil { + b.Fatalf("stat(%q) failed: %v", filePath, err) + } + // Sanity check. + if stat.Mode&^linux.S_IFMT != 0644 { + b.Fatalf("got wrong permissions (%0o)", stat.Mode) + } + } + // Don't include deferred cleanup in benchmark time. + b.StopTimer() + }) + } +} + +func init() { + // Turn off reference leak checking for a fair comparison between vfs1 and + // vfs2. + refs.SetLeakMode(refs.NoLeakChecking) +} diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go new file mode 100644 index 000000000..887ca2619 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -0,0 +1,187 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +type directory struct { + inode inode + + // childList is a list containing (1) child Dentries and (2) fake Dentries + // (with inode == nil) that represent the iteration position of + // directoryFDs. childList is used to support directoryFD.IterDirents() + // efficiently. childList is protected by filesystem.mu. + childList dentryList +} + +func (fs *filesystem) newDirectory(creds *auth.Credentials, mode linux.FileMode) *inode { + dir := &directory{} + dir.inode.init(dir, fs, creds, mode) + dir.inode.nlink = 2 // from "." and parent directory or ".." for root + return &dir.inode +} + +func (i *inode) isDir() bool { + _, ok := i.impl.(*directory) + return ok +} + +type directoryFD struct { + fileDescription + vfs.DirectoryFileDescriptionDefaultImpl + + // Protected by filesystem.mu. + iter *dentry + off int64 +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *directoryFD) Release() { + if fd.iter != nil { + fs := fd.filesystem() + dir := fd.inode().impl.(*directory) + fs.mu.Lock() + dir.childList.Remove(fd.iter) + fs.mu.Unlock() + fd.iter = nil + } +} + +// IterDirents implements vfs.FileDescriptionImpl.IterDirents. +func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { + fs := fd.filesystem() + vfsd := fd.vfsfd.VirtualDentry().Dentry() + + fs.mu.Lock() + defer fs.mu.Unlock() + + if fd.off == 0 { + if !cb.Handle(vfs.Dirent{ + Name: ".", + Type: linux.DT_DIR, + Ino: vfsd.Impl().(*dentry).inode.ino, + NextOff: 1, + }) { + return nil + } + fd.off++ + } + if fd.off == 1 { + parentInode := vfsd.ParentOrSelf().Impl().(*dentry).inode + if !cb.Handle(vfs.Dirent{ + Name: "..", + Type: parentInode.direntType(), + Ino: parentInode.ino, + NextOff: 2, + }) { + return nil + } + fd.off++ + } + + dir := vfsd.Impl().(*dentry).inode.impl.(*directory) + var child *dentry + if fd.iter == nil { + // Start iteration at the beginning of dir. + child = dir.childList.Front() + fd.iter = &dentry{} + } else { + // Continue iteration from where we left off. + child = fd.iter.Next() + dir.childList.Remove(fd.iter) + } + for child != nil { + // Skip other directoryFD iterators. + if child.inode != nil { + if !cb.Handle(vfs.Dirent{ + Name: child.vfsd.Name(), + Type: child.inode.direntType(), + Ino: child.inode.ino, + NextOff: fd.off + 1, + }) { + dir.childList.InsertBefore(child, fd.iter) + return nil + } + fd.off++ + } + child = child.Next() + } + dir.childList.PushBack(fd.iter) + return nil +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + fs := fd.filesystem() + fs.mu.Lock() + defer fs.mu.Unlock() + + switch whence { + case linux.SEEK_SET: + // Use offset as given. + case linux.SEEK_CUR: + offset += fd.off + default: + return 0, syserror.EINVAL + } + if offset < 0 { + return 0, syserror.EINVAL + } + + // If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't + // seek even if doing so might reposition the iterator due to concurrent + // mutation of the directory. Compare fs/libfs.c:dcache_dir_lseek(). + if fd.off == offset { + return offset, nil + } + + fd.off = offset + // Compensate for "." and "..". + remChildren := int64(0) + if offset >= 2 { + remChildren = offset - 2 + } + + dir := fd.inode().impl.(*directory) + + // Ensure that fd.iter exists and is not linked into dir.childList. + if fd.iter == nil { + fd.iter = &dentry{} + } else { + dir.childList.Remove(fd.iter) + } + // Insert fd.iter before the remChildren'th child, or at the end of the + // list if remChildren >= number of children. + child := dir.childList.Front() + for child != nil { + // Skip other directoryFD iterators. + if child.inode != nil { + if remChildren == 0 { + dir.childList.InsertBefore(child, fd.iter) + return offset, nil + } + remChildren-- + } + child = child.Next() + } + dir.childList.PushBack(fd.iter) + return offset, nil +} diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go new file mode 100644 index 000000000..26979729e --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -0,0 +1,698 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Sync implements vfs.FilesystemImpl.Sync. +func (fs *filesystem) Sync(ctx context.Context) error { + // All filesystem state is in-memory. + return nil +} + +// stepLocked resolves rp.Component() to an existing file, starting from the +// given directory. +// +// stepLocked is loosely analogous to fs/namei.c:walk_component(). +// +// Preconditions: filesystem.mu must be locked. !rp.Done(). +func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { + if !d.inode.isDir() { + return nil, syserror.ENOTDIR + } + if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err + } +afterSymlink: + nextVFSD, err := rp.ResolveComponent(&d.vfsd) + if err != nil { + return nil, err + } + if nextVFSD == nil { + // Since the Dentry tree is the sole source of truth for tmpfs, if it's + // not in the Dentry tree, it doesn't exist. + return nil, syserror.ENOENT + } + next := nextVFSD.Impl().(*dentry) + if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { + // TODO: symlink traversals update access time + if err := rp.HandleSymlink(symlink.target); err != nil { + return nil, err + } + goto afterSymlink // don't check the current directory again + } + rp.Advance() + return next, nil +} + +// walkParentDirLocked resolves all but the last path component of rp to an +// existing directory, starting from the given directory (which is usually +// rp.Start().Impl().(*dentry)). It does not check that the returned directory +// is searchable by the provider of rp. +// +// walkParentDirLocked is loosely analogous to Linux's +// fs/namei.c:path_parentat(). +// +// Preconditions: filesystem.mu must be locked. !rp.Done(). +func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { + for !rp.Final() { + next, err := stepLocked(rp, d) + if err != nil { + return nil, err + } + d = next + } + if !d.inode.isDir() { + return nil, syserror.ENOTDIR + } + return d, nil +} + +// resolveLocked resolves rp to an existing file. +// +// resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat(). +// +// Preconditions: filesystem.mu must be locked. +func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) { + d := rp.Start().Impl().(*dentry) + for !rp.Done() { + next, err := stepLocked(rp, d) + if err != nil { + return nil, err + } + d = next + } + if rp.MustBeDir() && !d.inode.isDir() { + return nil, syserror.ENOTDIR + } + return d, nil +} + +// doCreateAt checks that creating a file at rp is permitted, then invokes +// create to do so. +// +// doCreateAt is loosely analogous to a conjunction of Linux's +// fs/namei.c:filename_create() and done_path_create(). +// +// Preconditions: !rp.Done(). For the final path component in rp, +// !rp.ShouldFollowSymlink(). +func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error { + fs.mu.Lock() + defer fs.mu.Unlock() + parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + if err != nil { + return err + } + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { + return err + } + name := rp.Component() + if name == "." || name == ".." { + return syserror.EEXIST + } + // Call parent.vfsd.Child() instead of stepLocked() or rp.ResolveChild(), + // because if the child exists we want to return EEXIST immediately instead + // of attempting symlink/mount traversal. + if parent.vfsd.Child(name) != nil { + return syserror.EEXIST + } + if !dir && rp.MustBeDir() { + return syserror.ENOENT + } + // In memfs, the only way to cause a dentry to be disowned is by removing + // it from the filesystem, so this check is equivalent to checking if + // parent has been removed. + if parent.vfsd.IsDisowned() { + return syserror.ENOENT + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + return create(parent, name) +} + +// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. +func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { + fs.mu.RLock() + defer fs.mu.RUnlock() + d, err := resolveLocked(rp) + if err != nil { + return nil, err + } + if opts.CheckSearchable { + if !d.inode.isDir() { + return nil, syserror.ENOTDIR + } + if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true /* isDir */); err != nil { + return nil, err + } + } + d.IncRef() + return &d.vfsd, nil +} + +// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. +func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { + fs.mu.RLock() + defer fs.mu.RUnlock() + d, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + if err != nil { + return nil, err + } + d.IncRef() + return &d.vfsd, nil +} + +// LinkAt implements vfs.FilesystemImpl.LinkAt. +func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { + return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + if rp.Mount() != vd.Mount() { + return syserror.EXDEV + } + d := vd.Dentry().Impl().(*dentry) + if d.inode.isDir() { + return syserror.EPERM + } + if d.inode.nlink == 0 { + return syserror.ENOENT + } + if d.inode.nlink == maxLinks { + return syserror.EMLINK + } + d.inode.incLinksLocked() + child := fs.newDentry(d.inode) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + }) +} + +// MkdirAt implements vfs.FilesystemImpl.MkdirAt. +func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { + return fs.doCreateAt(rp, true /* dir */, func(parent *dentry, name string) error { + if parent.inode.nlink == maxLinks { + return syserror.EMLINK + } + parent.inode.incLinksLocked() // from child's ".." + child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + }) +} + +// MknodAt implements vfs.FilesystemImpl.MknodAt. +func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { + return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + switch opts.Mode.FileType() { + case 0, linux.S_IFREG: + child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + case linux.S_IFIFO: + child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK: + // Not yet supported. + return syserror.EPERM + default: + return syserror.EINVAL + } + }) +} + +// OpenAt implements vfs.FilesystemImpl.OpenAt. +func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + if opts.Flags&linux.O_TMPFILE != 0 { + // Not yet supported. + return nil, syserror.EOPNOTSUPP + } + + // Handle O_CREAT and !O_CREAT separately, since in the latter case we + // don't need fs.mu for writing. + if opts.Flags&linux.O_CREAT == 0 { + fs.mu.RLock() + defer fs.mu.RUnlock() + d, err := resolveLocked(rp) + if err != nil { + return nil, err + } + return d.open(ctx, rp, opts.Flags, false /* afterCreate */) + } + + mustCreate := opts.Flags&linux.O_EXCL != 0 + start := rp.Start().Impl().(*dentry) + fs.mu.Lock() + defer fs.mu.Unlock() + if rp.Done() { + // Reject attempts to open directories with O_CREAT. + if rp.MustBeDir() { + return nil, syserror.EISDIR + } + if mustCreate { + return nil, syserror.EEXIST + } + return start.open(ctx, rp, opts.Flags, false /* afterCreate */) + } +afterTrailingSymlink: + parent, err := walkParentDirLocked(rp, start) + if err != nil { + return nil, err + } + // Check for search permission in the parent directory. + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err + } + // Reject attempts to open directories with O_CREAT. + if rp.MustBeDir() { + return nil, syserror.EISDIR + } + name := rp.Component() + if name == "." || name == ".." { + return nil, syserror.EISDIR + } + // Determine whether or not we need to create a file. + child, err := stepLocked(rp, parent) + if err == syserror.ENOENT { + // Already checked for searchability above; now check for writability. + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { + return nil, err + } + if err := rp.Mount().CheckBeginWrite(); err != nil { + return nil, err + } + defer rp.Mount().EndWrite() + // Create and open the child. + child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return child.open(ctx, rp, opts.Flags, true) + } + if err != nil { + return nil, err + } + // Do we need to resolve a trailing symlink? + if !rp.Done() { + start = parent + goto afterTrailingSymlink + } + // Open existing file. + if mustCreate { + return nil, syserror.EEXIST + } + return child.open(ctx, rp, opts.Flags, false) +} + +func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { + ats := vfs.AccessTypesForOpenFlags(flags) + if !afterCreate { + if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil { + return nil, err + } + } + mnt := rp.Mount() + switch impl := d.inode.impl.(type) { + case *regularFile: + var fd regularFileFD + fd.readable = vfs.MayReadFileWithOpenFlags(flags) + fd.writable = vfs.MayWriteFileWithOpenFlags(flags) + if fd.writable { + if err := mnt.CheckBeginWrite(); err != nil { + return nil, err + } + // mnt.EndWrite() is called by regularFileFD.Release(). + } + fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) + if flags&linux.O_TRUNC != 0 { + impl.mu.Lock() + impl.data.Truncate(0, impl.memFile) + atomic.StoreUint64(&impl.size, 0) + impl.mu.Unlock() + } + return &fd.vfsfd, nil + case *directory: + // Can't open directories writably. + if ats&vfs.MayWrite != 0 { + return nil, syserror.EISDIR + } + var fd directoryFD + fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) + return &fd.vfsfd, nil + case *symlink: + // Can't open symlinks without O_PATH (which is unimplemented). + return nil, syserror.ELOOP + case *namedPipe: + return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags) + default: + panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl)) + } +} + +// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. +func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { + fs.mu.RLock() + defer fs.mu.RUnlock() + d, err := resolveLocked(rp) + if err != nil { + return "", err + } + symlink, ok := d.inode.impl.(*symlink) + if !ok { + return "", syserror.EINVAL + } + return symlink.target, nil +} + +// RenameAt implements vfs.FilesystemImpl.RenameAt. +func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { + if opts.Flags != 0 { + // TODO(b/145974740): Support renameat2 flags. + return syserror.EINVAL + } + + // Resolve newParent first to verify that it's on this Mount. + fs.mu.Lock() + defer fs.mu.Unlock() + newParent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + if err != nil { + return err + } + newName := rp.Component() + if newName == "." || newName == ".." { + return syserror.EBUSY + } + mnt := rp.Mount() + if mnt != oldParentVD.Mount() { + return syserror.EXDEV + } + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + + oldParent := oldParentVD.Dentry().Impl().(*dentry) + if err := oldParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { + return err + } + // Call vfs.Dentry.Child() instead of stepLocked() or rp.ResolveChild(), + // because if the existing child is a symlink or mount point then we want + // to rename over it rather than follow it. + renamedVFSD := oldParent.vfsd.Child(oldName) + if renamedVFSD == nil { + return syserror.ENOENT + } + renamed := renamedVFSD.Impl().(*dentry) + if renamed.inode.isDir() { + if renamed == newParent || renamedVFSD.IsAncestorOf(&newParent.vfsd) { + return syserror.EINVAL + } + if oldParent != newParent { + // Writability is needed to change renamed's "..". + if err := renamed.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true /* isDir */); err != nil { + return err + } + } + } else { + if opts.MustBeDir || rp.MustBeDir() { + return syserror.ENOTDIR + } + } + + if err := newParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { + return err + } + replacedVFSD := newParent.vfsd.Child(newName) + var replaced *dentry + if replacedVFSD != nil { + replaced = replacedVFSD.Impl().(*dentry) + if replaced.inode.isDir() { + if !renamed.inode.isDir() { + return syserror.EISDIR + } + if replaced.vfsd.HasChildren() { + return syserror.ENOTEMPTY + } + } else { + if rp.MustBeDir() { + return syserror.ENOTDIR + } + if renamed.inode.isDir() { + return syserror.ENOTDIR + } + } + } else { + if renamed.inode.isDir() && newParent.inode.nlink == maxLinks { + return syserror.EMLINK + } + } + if newParent.vfsd.IsDisowned() { + return syserror.ENOENT + } + + // Linux places this check before some of those above; we do it here for + // simplicity, under the assumption that applications are not intentionally + // doing noop renames expecting them to succeed where non-noop renames + // would fail. + if renamedVFSD == replacedVFSD { + return nil + } + vfsObj := rp.VirtualFilesystem() + oldParentDir := oldParent.inode.impl.(*directory) + newParentDir := newParent.inode.impl.(*directory) + if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil { + return err + } + if replaced != nil { + newParentDir.childList.Remove(replaced) + if replaced.inode.isDir() { + newParent.inode.decLinksLocked() // from replaced's ".." + } + replaced.inode.decLinksLocked() + } + oldParentDir.childList.Remove(renamed) + newParentDir.childList.PushBack(renamed) + if renamed.inode.isDir() { + oldParent.inode.decLinksLocked() + newParent.inode.incLinksLocked() + } + // TODO: update timestamps and parent directory sizes + vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD) + return nil +} + +// RmdirAt implements vfs.FilesystemImpl.RmdirAt. +func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { + fs.mu.Lock() + defer fs.mu.Unlock() + parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + if err != nil { + return err + } + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { + return err + } + name := rp.Component() + if name == "." { + return syserror.EINVAL + } + if name == ".." { + return syserror.ENOTEMPTY + } + childVFSD := parent.vfsd.Child(name) + if childVFSD == nil { + return syserror.ENOENT + } + child := childVFSD.Impl().(*dentry) + if !child.inode.isDir() { + return syserror.ENOTDIR + } + if childVFSD.HasChildren() { + return syserror.ENOTEMPTY + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + vfsObj := rp.VirtualFilesystem() + if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { + return err + } + parent.inode.impl.(*directory).childList.Remove(child) + parent.inode.decLinksLocked() // from child's ".." + child.inode.decLinksLocked() + vfsObj.CommitDeleteDentry(childVFSD) + return nil +} + +// SetStatAt implements vfs.FilesystemImpl.SetStatAt. +func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { + fs.mu.RLock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) + if err != nil { + return err + } + if opts.Stat.Mask == 0 { + return nil + } + // TODO: implement inode.setStat + return syserror.EPERM +} + +// StatAt implements vfs.FilesystemImpl.StatAt. +func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { + fs.mu.RLock() + defer fs.mu.RUnlock() + d, err := resolveLocked(rp) + if err != nil { + return linux.Statx{}, err + } + var stat linux.Statx + d.inode.statTo(&stat) + return stat, nil +} + +// StatFSAt implements vfs.FilesystemImpl.StatFSAt. +func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { + fs.mu.RLock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) + if err != nil { + return linux.Statfs{}, err + } + // TODO: actually implement statfs + return linux.Statfs{}, syserror.ENOSYS +} + +// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. +func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { + return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + child := fs.newDentry(fs.newSymlink(rp.Credentials(), target)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + }) +} + +// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. +func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { + fs.mu.Lock() + defer fs.mu.Unlock() + parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + if err != nil { + return err + } + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { + return err + } + name := rp.Component() + if name == "." || name == ".." { + return syserror.EISDIR + } + childVFSD := parent.vfsd.Child(name) + if childVFSD == nil { + return syserror.ENOENT + } + child := childVFSD.Impl().(*dentry) + if child.inode.isDir() { + return syserror.EISDIR + } + if !rp.MustBeDir() { + return syserror.ENOTDIR + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + vfsObj := rp.VirtualFilesystem() + if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { + return err + } + parent.inode.impl.(*directory).childList.Remove(child) + child.inode.decLinksLocked() + vfsObj.CommitDeleteDentry(childVFSD) + return nil +} + +// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. +func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { + fs.mu.RLock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) + if err != nil { + return nil, err + } + // TODO(b/127675828): support extended attributes + return nil, syserror.ENOTSUP +} + +// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. +func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { + fs.mu.RLock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) + if err != nil { + return "", err + } + // TODO(b/127675828): support extended attributes + return "", syserror.ENOTSUP +} + +// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. +func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { + fs.mu.RLock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) + if err != nil { + return err + } + // TODO(b/127675828): support extended attributes + return syserror.ENOTSUP +} + +// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. +func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { + fs.mu.RLock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) + if err != nil { + return err + } + // TODO(b/127675828): support extended attributes + return syserror.ENOTSUP +} + +// PrependPath implements vfs.FilesystemImpl.PrependPath. +func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { + fs.mu.RLock() + defer fs.mu.RUnlock() + return vfs.GenericPrependPath(vfsroot, vd, b) +} diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go new file mode 100644 index 000000000..40bde54de --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/named_pipe.go @@ -0,0 +1,60 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +type namedPipe struct { + inode inode + + pipe *pipe.VFSPipe +} + +// Preconditions: +// * fs.mu must be locked. +// * rp.Mount().CheckBeginWrite() has been called successfully. +func (fs *filesystem) newNamedPipe(creds *auth.Credentials, mode linux.FileMode) *inode { + file := &namedPipe{pipe: pipe.NewVFSPipe(pipe.DefaultPipeSize, usermem.PageSize)} + file.inode.init(file, fs, creds, mode) + file.inode.nlink = 1 // Only the parent has a link. + return &file.inode +} + +// namedPipeFD implements vfs.FileDescriptionImpl. Methods are implemented +// entirely via struct embedding. +type namedPipeFD struct { + fileDescription + + *pipe.VFSPipeFD +} + +func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { + var err error + var fd namedPipeFD + fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, rp, vfsd, &fd.vfsfd, flags) + if err != nil { + return nil, err + } + mnt := rp.Mount() + fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) + return &fd.vfsfd, nil +} diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go new file mode 100644 index 000000000..70b42a6ec --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go @@ -0,0 +1,235 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "bytes" + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +const fileName = "mypipe" + +func TestSeparateFDs(t *testing.T) { + ctx, creds, vfsObj, root := setup(t) + defer root.DecRef() + + // Open the read side. This is done in a concurrently because opening + // One end the pipe blocks until the other end is opened. + pop := vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(fileName), + FollowFinalSymlink: true, + } + rfdchan := make(chan *vfs.FileDescription) + go func() { + openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY} + rfd, _ := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) + rfdchan <- rfd + }() + + // Open the write side. + openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY} + wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) + if err != nil { + t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) + } + defer wfd.DecRef() + + rfd, ok := <-rfdchan + if !ok { + t.Fatalf("failed to open pipe for reading %q", fileName) + } + defer rfd.DecRef() + + const msg = "vamos azul" + checkEmpty(ctx, t, rfd) + checkWrite(ctx, t, wfd, msg) + checkRead(ctx, t, rfd, msg) +} + +func TestNonblockingRead(t *testing.T) { + ctx, creds, vfsObj, root := setup(t) + defer root.DecRef() + + // Open the read side as nonblocking. + pop := vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(fileName), + FollowFinalSymlink: true, + } + openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK} + rfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) + if err != nil { + t.Fatalf("failed to open pipe for reading %q: %v", fileName, err) + } + defer rfd.DecRef() + + // Open the write side. + openOpts = vfs.OpenOptions{Flags: linux.O_WRONLY} + wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) + if err != nil { + t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) + } + defer wfd.DecRef() + + const msg = "geh blau" + checkEmpty(ctx, t, rfd) + checkWrite(ctx, t, wfd, msg) + checkRead(ctx, t, rfd, msg) +} + +func TestNonblockingWriteError(t *testing.T) { + ctx, creds, vfsObj, root := setup(t) + defer root.DecRef() + + // Open the write side as nonblocking, which should return ENXIO. + pop := vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(fileName), + FollowFinalSymlink: true, + } + openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK} + _, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) + if err != syserror.ENXIO { + t.Fatalf("expected ENXIO, but got error: %v", err) + } +} + +func TestSingleFD(t *testing.T) { + ctx, creds, vfsObj, root := setup(t) + defer root.DecRef() + + // Open the pipe as readable and writable. + pop := vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(fileName), + FollowFinalSymlink: true, + } + openOpts := vfs.OpenOptions{Flags: linux.O_RDWR} + fd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) + if err != nil { + t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) + } + defer fd.DecRef() + + const msg = "forza blu" + checkEmpty(ctx, t, fd) + checkWrite(ctx, t, fd, msg) + checkRead(ctx, t, fd, msg) +} + +// setup creates a VFS with a pipe in the root directory at path fileName. The +// returned VirtualDentry must be DecRef()'d be the caller. It calls t.Fatal +// upon failure. +func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesystem, vfs.VirtualDentry) { + ctx := contexttest.Context(t) + creds := auth.CredentialsFromContext(ctx) + + // Create VFS. + vfsObj := vfs.New() + vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + }) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) + if err != nil { + t.Fatalf("failed to create tmpfs root mount: %v", err) + } + + // Create the pipe. + root := mntns.Root() + pop := vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(fileName), + } + mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644} + if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil { + t.Fatalf("failed to create file %q: %v", fileName, err) + } + + // Sanity check: the file pipe exists and has the correct mode. + stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(fileName), + FollowFinalSymlink: true, + }, &vfs.StatOptions{}) + if err != nil { + t.Fatalf("stat(%q) failed: %v", fileName, err) + } + if stat.Mode&^linux.S_IFMT != 0644 { + t.Errorf("got wrong permissions (%0o)", stat.Mode) + } + if stat.Mode&linux.S_IFMT != linux.ModeNamedPipe { + t.Errorf("got wrong file type (%0o)", stat.Mode) + } + + return ctx, creds, vfsObj, root +} + +// checkEmpty calls t.Fatal if the pipe in fd is not empty. +func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) { + readData := make([]byte, 1) + dst := usermem.BytesIOSequence(readData) + bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{}) + if err != syserror.ErrWouldBlock { + t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err) + } + if bytesRead != 0 { + t.Fatalf("expected to read 0 bytes, but got %d", bytesRead) + } +} + +// checkWrite calls t.Fatal if it fails to write all of msg to fd. +func checkWrite(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) { + writeData := []byte(msg) + src := usermem.BytesIOSequence(writeData) + bytesWritten, err := fd.Write(ctx, src, vfs.WriteOptions{}) + if err != nil { + t.Fatalf("error writing to pipe %q: %v", fileName, err) + } + if bytesWritten != int64(len(writeData)) { + t.Fatalf("expected to write %d bytes, but wrote %d", len(writeData), bytesWritten) + } +} + +// checkRead calls t.Fatal if it fails to read msg from fd. +func checkRead(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) { + readData := make([]byte, len(msg)) + dst := usermem.BytesIOSequence(readData) + bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{}) + if err != nil { + t.Fatalf("error reading from pipe %q: %v", fileName, err) + } + if bytesRead != int64(len(msg)) { + t.Fatalf("expected to read %d bytes, but got %d", len(msg), bytesRead) + } + if !bytes.Equal(readData, []byte(msg)) { + t.Fatalf("expected to read %q from pipe, but got %q", msg, string(readData)) + } +} diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go new file mode 100644 index 000000000..f51e247a7 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -0,0 +1,357 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "io" + "math" + "sync" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +type regularFile struct { + inode inode + + // memFile is a platform.File used to allocate pages to this regularFile. + memFile *pgalloc.MemoryFile + + // mu protects the fields below. + mu sync.RWMutex + + // data maps offsets into the file to offsets into memFile that store + // the file's data. + data fsutil.FileRangeSet + + // size is the size of data, but accessed using atomic memory + // operations to avoid locking in inode.stat(). + size uint64 + + // seals represents file seals on this inode. + seals uint32 +} + +func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode { + file := ®ularFile{ + memFile: fs.memFile, + } + file.inode.init(file, fs, creds, mode) + file.inode.nlink = 1 // from parent directory + return &file.inode +} + +type regularFileFD struct { + fileDescription + + // These are immutable. + readable bool + writable bool + + // off is the file offset. off is accessed using atomic memory operations. + // offMu serializes operations that may mutate off. + off int64 + offMu sync.Mutex +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *regularFileFD) Release() { + if fd.writable { + fd.vfsfd.VirtualDentry().Mount().EndWrite() + } +} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + if !fd.readable { + return 0, syserror.EINVAL + } + if offset < 0 { + return 0, syserror.EINVAL + } + if dst.NumBytes() == 0 { + return 0, nil + } + f := fd.inode().impl.(*regularFile) + rw := getRegularFileReadWriter(f, offset) + n, err := dst.CopyOutFrom(ctx, rw) + putRegularFileReadWriter(rw) + return int64(n), err +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + fd.offMu.Lock() + n, err := fd.PRead(ctx, dst, fd.off, opts) + fd.off += n + fd.offMu.Unlock() + return n, err +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + if !fd.writable { + return 0, syserror.EINVAL + } + if offset < 0 { + return 0, syserror.EINVAL + } + srclen := src.NumBytes() + if srclen == 0 { + return 0, nil + } + f := fd.inode().impl.(*regularFile) + end := offset + srclen + if end < offset { + // Overflow. + return 0, syserror.EFBIG + } + rw := getRegularFileReadWriter(f, offset) + n, err := src.CopyInTo(ctx, rw) + putRegularFileReadWriter(rw) + return n, err +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + fd.offMu.Lock() + n, err := fd.PWrite(ctx, src, fd.off, opts) + fd.off += n + fd.offMu.Unlock() + return n, err +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + fd.offMu.Lock() + defer fd.offMu.Unlock() + switch whence { + case linux.SEEK_SET: + // use offset as specified + case linux.SEEK_CUR: + offset += fd.off + case linux.SEEK_END: + offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size)) + default: + return 0, syserror.EINVAL + } + if offset < 0 { + return 0, syserror.EINVAL + } + fd.off = offset + return offset, nil +} + +// Sync implements vfs.FileDescriptionImpl.Sync. +func (fd *regularFileFD) Sync(ctx context.Context) error { + return nil +} + +// regularFileReadWriter implements safemem.Reader and Safemem.Writer. +type regularFileReadWriter struct { + file *regularFile + + // Offset into the file to read/write at. Note that this may be + // different from the FD offset if PRead/PWrite is used. + off uint64 +} + +var regularFileReadWriterPool = sync.Pool{ + New: func() interface{} { + return ®ularFileReadWriter{} + }, +} + +func getRegularFileReadWriter(file *regularFile, offset int64) *regularFileReadWriter { + rw := regularFileReadWriterPool.Get().(*regularFileReadWriter) + rw.file = file + rw.off = uint64(offset) + return rw +} + +func putRegularFileReadWriter(rw *regularFileReadWriter) { + rw.file = nil + regularFileReadWriterPool.Put(rw) +} + +// ReadToBlocks implements safemem.Reader.ReadToBlocks. +func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { + rw.file.mu.RLock() + + // Compute the range to read (limited by file size and overflow-checked). + if rw.off >= rw.file.size { + rw.file.mu.RUnlock() + return 0, io.EOF + } + end := rw.file.size + if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end { + end = rend + } + + var done uint64 + seg, gap := rw.file.data.Find(uint64(rw.off)) + for rw.off < end { + mr := memmap.MappableRange{uint64(rw.off), uint64(end)} + switch { + case seg.Ok(): + // Get internal mappings. + ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) + if err != nil { + rw.file.mu.RUnlock() + return done, err + } + + // Copy from internal mappings. + n, err := safemem.CopySeq(dsts, ims) + done += n + rw.off += uint64(n) + dsts = dsts.DropFirst64(n) + if err != nil { + rw.file.mu.RUnlock() + return done, err + } + + // Continue. + seg, gap = seg.NextNonEmpty() + + case gap.Ok(): + // Tmpfs holes are zero-filled. + gapmr := gap.Range().Intersect(mr) + dst := dsts.TakeFirst64(gapmr.Length()) + n, err := safemem.ZeroSeq(dst) + done += n + rw.off += uint64(n) + dsts = dsts.DropFirst64(n) + if err != nil { + rw.file.mu.RUnlock() + return done, err + } + + // Continue. + seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} + } + } + rw.file.mu.RUnlock() + return done, nil +} + +// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. +func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { + rw.file.mu.Lock() + + // Compute the range to write (overflow-checked). + end := rw.off + srcs.NumBytes() + if end <= rw.off { + end = math.MaxInt64 + } + + // Check if seals prevent either file growth or all writes. + switch { + case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed + rw.file.mu.Unlock() + return 0, syserror.EPERM + case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed + // When growth is sealed, Linux effectively allows writes which would + // normally grow the file to partially succeed up to the current EOF, + // rounded down to the page boundary before the EOF. + // + // This happens because writes (and thus the growth check) for tmpfs + // files proceed page-by-page on Linux, and the final write to the page + // containing EOF fails, resulting in a partial write up to the start of + // that page. + // + // To emulate this behaviour, artifically truncate the write to the + // start of the page containing the current EOF. + // + // See Linux, mm/filemap.c:generic_perform_write() and + // mm/shmem.c:shmem_write_begin(). + if pgstart := uint64(usermem.Addr(rw.file.size).RoundDown()); end > pgstart { + end = pgstart + } + if end <= rw.off { + // Truncation would result in no data being written. + rw.file.mu.Unlock() + return 0, syserror.EPERM + } + } + + // Page-aligned mr for when we need to allocate memory. RoundUp can't + // overflow since end is an int64. + pgstartaddr := usermem.Addr(rw.off).RoundDown() + pgendaddr, _ := usermem.Addr(end).RoundUp() + pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)} + + var ( + done uint64 + retErr error + ) + seg, gap := rw.file.data.Find(uint64(rw.off)) + for rw.off < end { + mr := memmap.MappableRange{uint64(rw.off), uint64(end)} + switch { + case seg.Ok(): + // Get internal mappings. + ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write) + if err != nil { + retErr = err + goto exitLoop + } + + // Copy to internal mappings. + n, err := safemem.CopySeq(ims, srcs) + done += n + rw.off += uint64(n) + srcs = srcs.DropFirst64(n) + if err != nil { + retErr = err + goto exitLoop + } + + // Continue. + seg, gap = seg.NextNonEmpty() + + case gap.Ok(): + // Allocate memory for the write. + gapMR := gap.Range().Intersect(pgMR) + fr, err := rw.file.memFile.Allocate(gapMR.Length(), usage.Tmpfs) + if err != nil { + retErr = err + goto exitLoop + } + + // Write to that memory as usual. + seg, gap = rw.file.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{} + } + } +exitLoop: + // If the write ends beyond the file's previous size, it causes the + // file to grow. + if rw.off > rw.file.size { + atomic.StoreUint64(&rw.file.size, rw.off) + } + + rw.file.mu.Unlock() + return done, retErr +} diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go new file mode 100644 index 000000000..3731c5b6f --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go @@ -0,0 +1,224 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "bytes" + "fmt" + "io" + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" + "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If +// the returned err is not nil, then cleanup should be called when the FD is no +// longer needed. +func newFileFD(ctx context.Context, filename string) (*vfs.FileDescription, func(), error) { + creds := auth.CredentialsFromContext(ctx) + + vfsObj := vfs.New() + vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + }) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) + if err != nil { + return nil, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err) + } + root := mntns.Root() + + // Create the file that will be write/read. + fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(filename), + FollowFinalSymlink: true, + }, &vfs.OpenOptions{ + Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, + Mode: 0644, + }) + if err != nil { + root.DecRef() + mntns.DecRef(vfsObj) + return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err) + } + + return fd, func() { + root.DecRef() + mntns.DecRef(vfsObj) + }, nil +} + +// Test that we can write some data to a file and read it back.` +func TestSimpleWriteRead(t *testing.T) { + ctx := contexttest.Context(t) + fd, cleanup, err := newFileFD(ctx, "simpleReadWrite") + if err != nil { + t.Fatal(err) + } + defer cleanup() + + // Write. + data := []byte("foobarbaz") + n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) + if err != nil { + t.Fatalf("fd.Write failed: %v", err) + } + if n != int64(len(data)) { + t.Errorf("fd.Write got short write length %d, want %d", n, len(data)) + } + if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want { + t.Errorf("fd.Write left offset at %d, want %d", got, want) + } + + // Seek back to beginning. + if _, err := fd.Seek(ctx, 0, linux.SEEK_SET); err != nil { + t.Fatalf("fd.Seek failed: %v", err) + } + if got, want := fd.Impl().(*regularFileFD).off, int64(0); got != want { + t.Errorf("fd.Seek(0) left offset at %d, want %d", got, want) + } + + // Read. + buf := make([]byte, len(data)) + n, err = fd.Read(ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) + if err != nil && err != io.EOF { + t.Fatalf("fd.Read failed: %v", err) + } + if n != int64(len(data)) { + t.Errorf("fd.Read got short read length %d, want %d", n, len(data)) + } + if got, want := string(buf), string(data); got != want { + t.Errorf("Read got %q want %s", got, want) + } + if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want { + t.Errorf("fd.Write left offset at %d, want %d", got, want) + } +} + +func TestPWrite(t *testing.T) { + ctx := contexttest.Context(t) + fd, cleanup, err := newFileFD(ctx, "PRead") + if err != nil { + t.Fatal(err) + } + defer cleanup() + + // Fill file with 1k 'a's. + data := bytes.Repeat([]byte{'a'}, 1000) + n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) + if err != nil { + t.Fatalf("fd.Write failed: %v", err) + } + if n != int64(len(data)) { + t.Errorf("fd.Write got short write length %d, want %d", n, len(data)) + } + + // Write "gVisor is awesome" at various offsets. + buf := []byte("gVisor is awesome") + offsets := []int{0, 1, 2, 10, 20, 50, 100, len(data) - 100, len(data) - 1, len(data), len(data) + 1} + for _, offset := range offsets { + name := fmt.Sprintf("PWrite offset=%d", offset) + t.Run(name, func(t *testing.T) { + n, err := fd.PWrite(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.WriteOptions{}) + if err != nil { + t.Errorf("fd.PWrite got err %v want nil", err) + } + if n != int64(len(buf)) { + t.Errorf("fd.PWrite got %d bytes want %d", n, len(buf)) + } + + // Update data to reflect expected file contents. + if len(data) < offset+len(buf) { + data = append(data, make([]byte, (offset+len(buf))-len(data))...) + } + copy(data[offset:], buf) + + // Read the whole file and compare with data. + readBuf := make([]byte, len(data)) + n, err = fd.PRead(ctx, usermem.BytesIOSequence(readBuf), 0, vfs.ReadOptions{}) + if err != nil { + t.Fatalf("fd.PRead failed: %v", err) + } + if n != int64(len(data)) { + t.Errorf("fd.PRead got short read length %d, want %d", n, len(data)) + } + if got, want := string(readBuf), string(data); got != want { + t.Errorf("PRead got %q want %s", got, want) + } + + }) + } +} + +func TestPRead(t *testing.T) { + ctx := contexttest.Context(t) + fd, cleanup, err := newFileFD(ctx, "PRead") + if err != nil { + t.Fatal(err) + } + defer cleanup() + + // Write 100 sequences of 'gVisor is awesome'. + data := bytes.Repeat([]byte("gVisor is awsome"), 100) + n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) + if err != nil { + t.Fatalf("fd.Write failed: %v", err) + } + if n != int64(len(data)) { + t.Errorf("fd.Write got short write length %d, want %d", n, len(data)) + } + + // Read various sizes from various offsets. + sizes := []int{0, 1, 2, 10, 20, 50, 100, 1000} + offsets := []int{0, 1, 2, 10, 20, 50, 100, 1000, len(data) - 100, len(data) - 1, len(data), len(data) + 1} + + for _, size := range sizes { + for _, offset := range offsets { + name := fmt.Sprintf("PRead offset=%d size=%d", offset, size) + t.Run(name, func(t *testing.T) { + var ( + wantRead []byte + wantErr error + ) + if offset < len(data) { + wantRead = data[offset:] + } else if size > 0 { + wantErr = io.EOF + } + if offset+size < len(data) { + wantRead = wantRead[:size] + } + buf := make([]byte, size) + n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.ReadOptions{}) + if err != wantErr { + t.Errorf("fd.PRead got err %v want %v", err, wantErr) + } + if n != int64(len(wantRead)) { + t.Errorf("fd.PRead got %d bytes want %d", n, len(wantRead)) + } + if got := string(buf[:n]); got != string(wantRead) { + t.Errorf("fd.PRead got %q want %q", got, string(wantRead)) + } + }) + } + } +} diff --git a/pkg/sentry/fsimpl/tmpfs/symlink.go b/pkg/sentry/fsimpl/tmpfs/symlink.go new file mode 100644 index 000000000..5246aca84 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/symlink.go @@ -0,0 +1,36 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +type symlink struct { + inode inode + target string // immutable +} + +func (fs *filesystem) newSymlink(creds *auth.Credentials, target string) *inode { + link := &symlink{ + target: target, + } + link.inode.init(link, fs, creds, 0777) + link.inode.nlink = 1 // from parent directory + return &link.inode +} + +// O_PATH is unimplemented, so there's no way to get a FileDescription +// representing a symlink yet. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go new file mode 100644 index 000000000..7be6faa5b --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -0,0 +1,299 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package tmpfs provides a filesystem implementation that behaves like tmpfs: +// the Dentry tree is the sole source of truth for the state of the filesystem. +// +// Lock order: +// +// filesystem.mu +// regularFileFD.offMu +// regularFile.mu +// inode.mu +package tmpfs + +import ( + "fmt" + "math" + "sync" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// FilesystemType implements vfs.FilesystemType. +type FilesystemType struct{} + +// filesystem implements vfs.FilesystemImpl. +type filesystem struct { + vfsfs vfs.Filesystem + + // memFile is used to allocate pages to for regular files. + memFile *pgalloc.MemoryFile + + // mu serializes changes to the Dentry tree. + mu sync.RWMutex + + nextInoMinusOne uint64 // accessed using atomic memory operations +} + +// GetFilesystem implements vfs.FilesystemType.GetFilesystem. +func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + memFileProvider := pgalloc.MemoryFileProviderFromContext(ctx) + if memFileProvider == nil { + panic("MemoryFileProviderFromContext returned nil") + } + fs := filesystem{ + memFile: memFileProvider.MemoryFile(), + } + fs.vfsfs.Init(vfsObj, &fs) + root := fs.newDentry(fs.newDirectory(creds, 01777)) + return &fs.vfsfs, &root.vfsd, nil +} + +// Release implements vfs.FilesystemImpl.Release. +func (fs *filesystem) Release() { +} + +// dentry implements vfs.DentryImpl. +type dentry struct { + vfsd vfs.Dentry + + // inode is the inode represented by this dentry. Multiple Dentries may + // share a single non-directory inode (with hard links). inode is + // immutable. + inode *inode + + // tmpfs doesn't count references on dentries; because the dentry tree is + // the sole source of truth, it is by definition always consistent with the + // state of the filesystem. However, it does count references on inodes, + // because inode resources are released when all references are dropped. + // (tmpfs doesn't really have resources to release, but we implement + // reference counting because tmpfs regular files will.) + + // dentryEntry (ugh) links dentries into their parent directory.childList. + dentryEntry +} + +func (fs *filesystem) newDentry(inode *inode) *dentry { + d := &dentry{ + inode: inode, + } + d.vfsd.Init(d) + return d +} + +// IncRef implements vfs.DentryImpl.IncRef. +func (d *dentry) IncRef() { + d.inode.incRef() +} + +// TryIncRef implements vfs.DentryImpl.TryIncRef. +func (d *dentry) TryIncRef() bool { + return d.inode.tryIncRef() +} + +// DecRef implements vfs.DentryImpl.DecRef. +func (d *dentry) DecRef() { + d.inode.decRef() +} + +// inode represents a filesystem object. +type inode struct { + // refs is a reference count. refs is accessed using atomic memory + // operations. + // + // A reference is held on all inodes that are reachable in the filesystem + // tree. For non-directories (which may have multiple hard links), this + // means that a reference is dropped when nlink reaches 0. For directories, + // nlink never reaches 0 due to the "." entry; instead, + // filesystem.RmdirAt() drops the reference. + refs int64 + + // Inode metadata; protected by mu and accessed using atomic memory + // operations unless otherwise specified. + mu sync.RWMutex + mode uint32 // excluding file type bits, which are based on impl + nlink uint32 // protected by filesystem.mu instead of inode.mu + uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic + gid uint32 // auth.KGID, but ... + ino uint64 // immutable + + impl interface{} // immutable +} + +const maxLinks = math.MaxUint32 + +func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) { + i.refs = 1 + i.mode = uint32(mode) + i.uid = uint32(creds.EffectiveKUID) + i.gid = uint32(creds.EffectiveKGID) + i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1) + // i.nlink initialized by caller + i.impl = impl +} + +// incLinksLocked increments i's link count. +// +// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. +// i.nlink < maxLinks. +func (i *inode) incLinksLocked() { + if i.nlink == 0 { + panic("tmpfs.inode.incLinksLocked() called with no existing links") + } + if i.nlink == maxLinks { + panic("memfs.inode.incLinksLocked() called with maximum link count") + } + atomic.AddUint32(&i.nlink, 1) +} + +// decLinksLocked decrements i's link count. +// +// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. +func (i *inode) decLinksLocked() { + if i.nlink == 0 { + panic("tmpfs.inode.decLinksLocked() called with no existing links") + } + atomic.AddUint32(&i.nlink, ^uint32(0)) +} + +func (i *inode) incRef() { + if atomic.AddInt64(&i.refs, 1) <= 1 { + panic("tmpfs.inode.incRef() called without holding a reference") + } +} + +func (i *inode) tryIncRef() bool { + for { + refs := atomic.LoadInt64(&i.refs) + if refs == 0 { + return false + } + if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) { + return true + } + } +} + +func (i *inode) decRef() { + if refs := atomic.AddInt64(&i.refs, -1); refs == 0 { + // This is unnecessary; it's mostly to simulate what tmpfs would do. + if regFile, ok := i.impl.(*regularFile); ok { + regFile.mu.Lock() + regFile.data.DropAll(regFile.memFile) + atomic.StoreUint64(®File.size, 0) + regFile.mu.Unlock() + } + } else if refs < 0 { + panic("tmpfs.inode.decRef() called without holding a reference") + } +} + +func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error { + return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))) +} + +// Go won't inline this function, and returning linux.Statx (which is quite +// big) means spending a lot of time in runtime.duffcopy(), so instead it's an +// output parameter. +func (i *inode) statTo(stat *linux.Statx) { + stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO + stat.Blksize = 1 // usermem.PageSize in tmpfs + stat.Nlink = atomic.LoadUint32(&i.nlink) + stat.UID = atomic.LoadUint32(&i.uid) + stat.GID = atomic.LoadUint32(&i.gid) + stat.Mode = uint16(atomic.LoadUint32(&i.mode)) + stat.Ino = i.ino + // TODO: device number + switch impl := i.impl.(type) { + case *regularFile: + stat.Mode |= linux.S_IFREG + stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS + stat.Size = uint64(atomic.LoadUint64(&impl.size)) + // In tmpfs, this will be FileRangeSet.Span() / 512 (but also cached in + // a uint64 accessed using atomic memory operations to avoid taking + // locks). + stat.Blocks = allocatedBlocksForSize(stat.Size) + case *directory: + stat.Mode |= linux.S_IFDIR + case *symlink: + stat.Mode |= linux.S_IFLNK + stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS + stat.Size = uint64(len(impl.target)) + stat.Blocks = allocatedBlocksForSize(stat.Size) + case *namedPipe: + stat.Mode |= linux.S_IFIFO + default: + panic(fmt.Sprintf("unknown inode type: %T", i.impl)) + } +} + +// allocatedBlocksForSize returns the number of 512B blocks needed to +// accommodate the given size in bytes, as appropriate for struct +// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block +// size is independent of the "preferred block size for I/O", struct +// stat::st_blksize and struct statx::stx_blksize.) +func allocatedBlocksForSize(size uint64) uint64 { + return (size + 511) / 512 +} + +func (i *inode) direntType() uint8 { + switch i.impl.(type) { + case *regularFile: + return linux.DT_REG + case *directory: + return linux.DT_DIR + case *symlink: + return linux.DT_LNK + default: + panic(fmt.Sprintf("unknown inode type: %T", i.impl)) + } +} + +// fileDescription is embedded by tmpfs implementations of +// vfs.FileDescriptionImpl. +type fileDescription struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl +} + +func (fd *fileDescription) filesystem() *filesystem { + return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) +} + +func (fd *fileDescription) inode() *inode { + return fd.vfsfd.Dentry().Impl().(*dentry).inode +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + var stat linux.Statx + fd.inode().statTo(&stat) + return stat, nil +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + if opts.Stat.Mask == 0 { + return nil + } + // TODO: implement inode.setStat + return syserror.EPERM +} -- cgit v1.2.3 From a53ac7307abfeb7172e67f48d0a7aaa4b5c3f31e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 7 Jan 2020 23:52:59 -0800 Subject: fs/splice: don't report a partialResult error if there is no data loss PiperOrigin-RevId: 288642552 --- pkg/sentry/fs/file.go | 7 +++++++ pkg/sentry/fs/splice.go | 5 +++++ test/syscalls/linux/inotify.cc | 28 ++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index c0a6e884b..a2f966cb6 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -555,6 +555,10 @@ type lockedWriter struct { // // This applies only to Write, not WriteAt. Offset int64 + + // Err contains the first error encountered while copying. This is + // useful to determine whether Writer or Reader failed during io.Copy. + Err error } // Write implements io.Writer.Write. @@ -590,5 +594,8 @@ func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) { break } } + if w.Err == nil { + w.Err = err + } return written, err } diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index 311798811..389c330a0 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -167,6 +167,11 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, if !srcPipe && !opts.SrcOffset { atomic.StoreInt64(&src.offset, src.offset+n) } + + // Don't report any errors if we have some progress without data loss. + if w.Err == nil { + err = nil + } } // Drop locks. diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 7384c27dc..59ec9940a 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -1591,6 +1591,34 @@ TEST(Inotify, EpollNoDeadlock) { } } +TEST(Inotify, SpliceEvent) { + int pipes[2]; + ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); + + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file1 = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + root.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor file1_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_RDONLY)); + const int watcher = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), file1.path(), IN_ALL_EVENTS)); + + char buf; + EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds()); + + EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr, + sizeof(struct inotify_event) + 1, SPLICE_F_NONBLOCK), + SyscallSucceedsWithValue(sizeof(struct inotify_event))); + + const FileDescriptor read_fd(pipes[0]); + const std::vector events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(read_fd.get())); + ASSERT_THAT(events, Are({Event(IN_ACCESS, watcher)})); +} + } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3 From 27500d529f7fb87eef8812278fd1bbca67bcba72 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Thu, 9 Jan 2020 22:00:42 -0800 Subject: New sync package. * Rename syncutil to sync. * Add aliases to sync types. * Replace existing usage of standard library sync package. This will make it easier to swap out synchronization primitives. For example, this will allow us to use primitives from github.com/sasha-s/go-deadlock to check for lock ordering violations. Updates #1472 PiperOrigin-RevId: 289033387 --- pkg/amutex/BUILD | 1 + pkg/amutex/amutex_test.go | 3 +- pkg/atomicbitops/BUILD | 1 + pkg/atomicbitops/atomic_bitops_test.go | 3 +- pkg/compressio/BUILD | 5 +- pkg/compressio/compressio.go | 2 +- pkg/control/server/BUILD | 1 + pkg/control/server/server.go | 2 +- pkg/eventchannel/BUILD | 2 + pkg/eventchannel/event.go | 2 +- pkg/eventchannel/event_test.go | 2 +- pkg/fdchannel/BUILD | 1 + pkg/fdchannel/fdchannel_test.go | 3 +- pkg/fdnotifier/BUILD | 1 + pkg/fdnotifier/fdnotifier.go | 2 +- pkg/flipcall/BUILD | 3 +- pkg/flipcall/flipcall_example_test.go | 3 +- pkg/flipcall/flipcall_test.go | 3 +- pkg/flipcall/flipcall_unsafe.go | 10 +- pkg/gate/BUILD | 1 + pkg/gate/gate_test.go | 2 +- pkg/linewriter/BUILD | 1 + pkg/linewriter/linewriter.go | 3 +- pkg/log/BUILD | 5 +- pkg/log/log.go | 2 +- pkg/metric/BUILD | 1 + pkg/metric/metric.go | 2 +- pkg/p9/BUILD | 1 + pkg/p9/client.go | 2 +- pkg/p9/p9test/BUILD | 2 + pkg/p9/p9test/client_test.go | 2 +- pkg/p9/p9test/p9test.go | 2 +- pkg/p9/path_tree.go | 3 +- pkg/p9/pool.go | 2 +- pkg/p9/server.go | 2 +- pkg/p9/transport.go | 2 +- pkg/procid/BUILD | 2 + pkg/procid/procid_test.go | 3 +- pkg/rand/BUILD | 5 +- pkg/rand/rand_linux.go | 2 +- pkg/refs/BUILD | 2 + pkg/refs/refcounter.go | 2 +- pkg/refs/refcounter_test.go | 3 +- pkg/sentry/arch/BUILD | 1 + pkg/sentry/arch/arch_x86.go | 2 +- pkg/sentry/control/BUILD | 1 + pkg/sentry/control/pprof.go | 2 +- pkg/sentry/device/BUILD | 5 +- pkg/sentry/device/device.go | 2 +- pkg/sentry/fs/BUILD | 3 +- pkg/sentry/fs/copy_up.go | 2 +- pkg/sentry/fs/copy_up_test.go | 2 +- pkg/sentry/fs/dirent.go | 2 +- pkg/sentry/fs/dirent_cache.go | 3 +- pkg/sentry/fs/dirent_cache_limiter.go | 3 +- pkg/sentry/fs/fdpipe/BUILD | 1 + pkg/sentry/fs/fdpipe/pipe.go | 2 +- pkg/sentry/fs/fdpipe/pipe_state.go | 2 +- pkg/sentry/fs/file.go | 2 +- pkg/sentry/fs/file_overlay.go | 2 +- pkg/sentry/fs/filesystems.go | 2 +- pkg/sentry/fs/fs.go | 3 +- pkg/sentry/fs/fsutil/BUILD | 1 + pkg/sentry/fs/fsutil/host_file_mapper.go | 2 +- pkg/sentry/fs/fsutil/host_mappable.go | 2 +- pkg/sentry/fs/fsutil/inode.go | 3 +- pkg/sentry/fs/fsutil/inode_cached.go | 2 +- pkg/sentry/fs/gofer/BUILD | 1 + pkg/sentry/fs/gofer/inode.go | 2 +- pkg/sentry/fs/gofer/session.go | 2 +- pkg/sentry/fs/host/BUILD | 1 + pkg/sentry/fs/host/inode.go | 2 +- pkg/sentry/fs/host/socket.go | 2 +- pkg/sentry/fs/host/tty.go | 3 +- pkg/sentry/fs/inode.go | 3 +- pkg/sentry/fs/inode_inotify.go | 3 +- pkg/sentry/fs/inotify.go | 2 +- pkg/sentry/fs/inotify_watch.go | 2 +- pkg/sentry/fs/lock/BUILD | 1 + pkg/sentry/fs/lock/lock.go | 2 +- pkg/sentry/fs/mounts.go | 2 +- pkg/sentry/fs/overlay.go | 5 +- pkg/sentry/fs/proc/BUILD | 1 + pkg/sentry/fs/proc/seqfile/BUILD | 1 + pkg/sentry/fs/proc/seqfile/seqfile.go | 2 +- pkg/sentry/fs/proc/sys_net.go | 2 +- pkg/sentry/fs/ramfs/BUILD | 1 + pkg/sentry/fs/ramfs/dir.go | 2 +- pkg/sentry/fs/restore.go | 2 +- pkg/sentry/fs/tmpfs/BUILD | 1 + pkg/sentry/fs/tmpfs/inode_file.go | 2 +- pkg/sentry/fs/tty/BUILD | 1 + pkg/sentry/fs/tty/dir.go | 2 +- pkg/sentry/fs/tty/line_discipline.go | 2 +- pkg/sentry/fs/tty/queue.go | 3 +- pkg/sentry/fsimpl/ext/BUILD | 1 + pkg/sentry/fsimpl/ext/directory.go | 3 +- pkg/sentry/fsimpl/ext/filesystem.go | 2 +- pkg/sentry/fsimpl/ext/regular_file.go | 2 +- pkg/sentry/fsimpl/kernfs/BUILD | 2 + pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 2 +- pkg/sentry/fsimpl/kernfs/kernfs.go | 2 +- pkg/sentry/fsimpl/kernfs/kernfs_test.go | 2 +- pkg/sentry/fsimpl/tmpfs/BUILD | 1 + pkg/sentry/fsimpl/tmpfs/regular_file.go | 2 +- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 2 +- pkg/sentry/kernel/BUILD | 5 +- pkg/sentry/kernel/abstract_socket_namespace.go | 2 +- pkg/sentry/kernel/auth/BUILD | 3 +- pkg/sentry/kernel/auth/user_namespace.go | 2 +- pkg/sentry/kernel/epoll/BUILD | 1 + pkg/sentry/kernel/epoll/epoll.go | 2 +- pkg/sentry/kernel/eventfd/BUILD | 1 + pkg/sentry/kernel/eventfd/eventfd.go | 2 +- pkg/sentry/kernel/fasync/BUILD | 1 + pkg/sentry/kernel/fasync/fasync.go | 3 +- pkg/sentry/kernel/fd_table.go | 2 +- pkg/sentry/kernel/fd_table_test.go | 2 +- pkg/sentry/kernel/fs_context.go | 2 +- pkg/sentry/kernel/futex/BUILD | 8 +- pkg/sentry/kernel/futex/futex.go | 3 +- pkg/sentry/kernel/futex/futex_test.go | 2 +- pkg/sentry/kernel/kernel.go | 2 +- pkg/sentry/kernel/memevent/BUILD | 1 + pkg/sentry/kernel/memevent/memory_events.go | 2 +- pkg/sentry/kernel/pipe/BUILD | 1 + pkg/sentry/kernel/pipe/buffer.go | 2 +- pkg/sentry/kernel/pipe/node.go | 3 +- pkg/sentry/kernel/pipe/pipe.go | 2 +- pkg/sentry/kernel/pipe/pipe_util.go | 2 +- pkg/sentry/kernel/pipe/vfs.go | 3 +- pkg/sentry/kernel/semaphore/BUILD | 1 + pkg/sentry/kernel/semaphore/semaphore.go | 2 +- pkg/sentry/kernel/shm/BUILD | 1 + pkg/sentry/kernel/shm/shm.go | 2 +- pkg/sentry/kernel/signal_handlers.go | 3 +- pkg/sentry/kernel/signalfd/BUILD | 1 + pkg/sentry/kernel/signalfd/signalfd.go | 3 +- pkg/sentry/kernel/syscalls.go | 2 +- pkg/sentry/kernel/syslog.go | 3 +- pkg/sentry/kernel/task.go | 5 +- pkg/sentry/kernel/thread_group.go | 2 +- pkg/sentry/kernel/threads.go | 2 +- pkg/sentry/kernel/time/BUILD | 1 + pkg/sentry/kernel/time/time.go | 2 +- pkg/sentry/kernel/timekeeper.go | 2 +- pkg/sentry/kernel/tty.go | 2 +- pkg/sentry/kernel/uts_namespace.go | 3 +- pkg/sentry/limits/BUILD | 1 + pkg/sentry/limits/limits.go | 3 +- pkg/sentry/mm/BUILD | 2 +- pkg/sentry/mm/aio_context.go | 3 +- pkg/sentry/mm/mm.go | 8 +- pkg/sentry/pgalloc/BUILD | 1 + pkg/sentry/pgalloc/pgalloc.go | 2 +- pkg/sentry/platform/interrupt/BUILD | 1 + pkg/sentry/platform/interrupt/interrupt.go | 3 +- pkg/sentry/platform/kvm/BUILD | 1 + pkg/sentry/platform/kvm/address_space.go | 2 +- pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go | 2 - pkg/sentry/platform/kvm/kvm.go | 2 +- pkg/sentry/platform/kvm/machine.go | 2 +- pkg/sentry/platform/ptrace/BUILD | 1 + pkg/sentry/platform/ptrace/ptrace.go | 2 +- pkg/sentry/platform/ptrace/subprocess.go | 2 +- .../platform/ptrace/subprocess_linux_unsafe.go | 2 +- pkg/sentry/platform/ring0/defs.go | 2 +- pkg/sentry/platform/ring0/defs_amd64.go | 1 + pkg/sentry/platform/ring0/defs_arm64.go | 1 + pkg/sentry/platform/ring0/pagetables/BUILD | 5 +- pkg/sentry/platform/ring0/pagetables/pcids_x86.go | 2 +- pkg/sentry/socket/netlink/BUILD | 1 + pkg/sentry/socket/netlink/port/BUILD | 1 + pkg/sentry/socket/netlink/port/port.go | 3 +- pkg/sentry/socket/netlink/socket.go | 2 +- pkg/sentry/socket/netstack/BUILD | 1 + pkg/sentry/socket/netstack/netstack.go | 2 +- pkg/sentry/socket/rpcinet/conn/BUILD | 1 + pkg/sentry/socket/rpcinet/conn/conn.go | 2 +- pkg/sentry/socket/rpcinet/notifier/BUILD | 1 + pkg/sentry/socket/rpcinet/notifier/notifier.go | 2 +- pkg/sentry/socket/unix/transport/BUILD | 1 + pkg/sentry/socket/unix/transport/connectioned.go | 3 +- pkg/sentry/socket/unix/transport/queue.go | 3 +- pkg/sentry/socket/unix/transport/unix.go | 2 +- pkg/sentry/syscalls/linux/BUILD | 1 + pkg/sentry/syscalls/linux/error.go | 2 +- pkg/sentry/time/BUILD | 4 +- pkg/sentry/time/calibrated_clock.go | 2 +- pkg/sentry/usage/BUILD | 1 + pkg/sentry/usage/memory.go | 2 +- pkg/sentry/vfs/BUILD | 3 +- pkg/sentry/vfs/dentry.go | 2 +- pkg/sentry/vfs/file_description_impl_util.go | 2 +- pkg/sentry/vfs/mount_test.go | 3 +- pkg/sentry/vfs/mount_unsafe.go | 4 +- pkg/sentry/vfs/pathname.go | 3 +- pkg/sentry/vfs/resolving_path.go | 2 +- pkg/sentry/vfs/vfs.go | 2 +- pkg/sentry/watchdog/BUILD | 1 + pkg/sentry/watchdog/watchdog.go | 2 +- pkg/sync/BUILD | 53 +++++++ pkg/sync/LICENSE | 27 ++++ pkg/sync/README.md | 5 + pkg/sync/aliases.go | 37 +++++ pkg/sync/atomicptr_unsafe.go | 47 +++++++ pkg/sync/atomicptrtest/BUILD | 29 ++++ pkg/sync/atomicptrtest/atomicptr_test.go | 31 +++++ pkg/sync/downgradable_rwmutex_test.go | 150 ++++++++++++++++++++ pkg/sync/downgradable_rwmutex_unsafe.go | 146 ++++++++++++++++++++ pkg/sync/memmove_unsafe.go | 28 ++++ pkg/sync/norace_unsafe.go | 35 +++++ pkg/sync/race_unsafe.go | 41 ++++++ pkg/sync/seqatomic_unsafe.go | 72 ++++++++++ pkg/sync/seqatomictest/BUILD | 33 +++++ pkg/sync/seqatomictest/seqatomic_test.go | 132 ++++++++++++++++++ pkg/sync/seqcount.go | 149 ++++++++++++++++++++ pkg/sync/seqcount_test.go | 153 +++++++++++++++++++++ pkg/sync/syncutil.go | 7 + pkg/syncutil/BUILD | 52 ------- pkg/syncutil/LICENSE | 27 ---- pkg/syncutil/README.md | 5 - pkg/syncutil/atomicptr_unsafe.go | 47 ------- pkg/syncutil/atomicptrtest/BUILD | 29 ---- pkg/syncutil/atomicptrtest/atomicptr_test.go | 31 ----- pkg/syncutil/downgradable_rwmutex_test.go | 150 -------------------- pkg/syncutil/downgradable_rwmutex_unsafe.go | 146 -------------------- pkg/syncutil/memmove_unsafe.go | 28 ---- pkg/syncutil/norace_unsafe.go | 35 ----- pkg/syncutil/race_unsafe.go | 41 ------ pkg/syncutil/seqatomic_unsafe.go | 72 ---------- pkg/syncutil/seqatomictest/BUILD | 35 ----- pkg/syncutil/seqatomictest/seqatomic_test.go | 132 ------------------ pkg/syncutil/seqcount.go | 149 -------------------- pkg/syncutil/seqcount_test.go | 153 --------------------- pkg/syncutil/syncutil.go | 7 - pkg/tcpip/BUILD | 1 + pkg/tcpip/adapters/gonet/BUILD | 1 + pkg/tcpip/adapters/gonet/gonet.go | 2 +- pkg/tcpip/link/fdbased/BUILD | 1 + pkg/tcpip/link/fdbased/endpoint.go | 2 +- pkg/tcpip/link/sharedmem/BUILD | 2 + pkg/tcpip/link/sharedmem/pipe/BUILD | 1 + pkg/tcpip/link/sharedmem/pipe/pipe_test.go | 3 +- pkg/tcpip/link/sharedmem/sharedmem.go | 2 +- pkg/tcpip/link/sharedmem/sharedmem_test.go | 2 +- pkg/tcpip/network/fragmentation/BUILD | 1 + pkg/tcpip/network/fragmentation/fragmentation.go | 2 +- pkg/tcpip/network/fragmentation/reassembler.go | 2 +- pkg/tcpip/ports/BUILD | 1 + pkg/tcpip/ports/ports.go | 2 +- pkg/tcpip/stack/BUILD | 2 + pkg/tcpip/stack/linkaddrcache.go | 2 +- pkg/tcpip/stack/linkaddrcache_test.go | 2 +- pkg/tcpip/stack/nic.go | 2 +- pkg/tcpip/stack/stack.go | 2 +- pkg/tcpip/stack/transport_demuxer.go | 2 +- pkg/tcpip/tcpip.go | 2 +- pkg/tcpip/transport/icmp/BUILD | 1 + pkg/tcpip/transport/icmp/endpoint.go | 3 +- pkg/tcpip/transport/packet/BUILD | 1 + pkg/tcpip/transport/packet/endpoint.go | 3 +- pkg/tcpip/transport/raw/BUILD | 1 + pkg/tcpip/transport/raw/endpoint.go | 3 +- pkg/tcpip/transport/tcp/BUILD | 1 + pkg/tcpip/transport/tcp/accept.go | 2 +- pkg/tcpip/transport/tcp/connect.go | 2 +- pkg/tcpip/transport/tcp/endpoint.go | 2 +- pkg/tcpip/transport/tcp/endpoint_state.go | 2 +- pkg/tcpip/transport/tcp/forwarder.go | 3 +- pkg/tcpip/transport/tcp/protocol.go | 2 +- pkg/tcpip/transport/tcp/segment_queue.go | 2 +- pkg/tcpip/transport/tcp/snd.go | 2 +- pkg/tcpip/transport/udp/BUILD | 1 + pkg/tcpip/transport/udp/endpoint.go | 3 +- pkg/tmutex/BUILD | 1 + pkg/tmutex/tmutex_test.go | 3 +- pkg/unet/BUILD | 1 + pkg/unet/unet_test.go | 3 +- pkg/urpc/BUILD | 1 + pkg/urpc/urpc.go | 2 +- pkg/waiter/BUILD | 1 + pkg/waiter/waiter.go | 2 +- runsc/boot/BUILD | 2 + runsc/boot/compat.go | 2 +- runsc/boot/limits.go | 2 +- runsc/boot/loader.go | 2 +- runsc/boot/loader_test.go | 2 +- runsc/cmd/BUILD | 1 + runsc/cmd/create.go | 1 + runsc/cmd/gofer.go | 2 +- runsc/cmd/start.go | 1 + runsc/container/BUILD | 2 + runsc/container/console_test.go | 2 +- runsc/container/container_test.go | 2 +- runsc/container/multi_container_test.go | 2 +- runsc/container/state_file.go | 2 +- runsc/fsgofer/BUILD | 1 + runsc/fsgofer/fsgofer.go | 2 +- runsc/sandbox/BUILD | 1 + runsc/sandbox/sandbox.go | 2 +- runsc/testutil/BUILD | 1 + runsc/testutil/testutil.go | 2 +- 303 files changed, 1507 insertions(+), 1368 deletions(-) create mode 100644 pkg/sync/BUILD create mode 100644 pkg/sync/LICENSE create mode 100644 pkg/sync/README.md create mode 100644 pkg/sync/aliases.go create mode 100644 pkg/sync/atomicptr_unsafe.go create mode 100644 pkg/sync/atomicptrtest/BUILD create mode 100644 pkg/sync/atomicptrtest/atomicptr_test.go create mode 100644 pkg/sync/downgradable_rwmutex_test.go create mode 100644 pkg/sync/downgradable_rwmutex_unsafe.go create mode 100644 pkg/sync/memmove_unsafe.go create mode 100644 pkg/sync/norace_unsafe.go create mode 100644 pkg/sync/race_unsafe.go create mode 100644 pkg/sync/seqatomic_unsafe.go create mode 100644 pkg/sync/seqatomictest/BUILD create mode 100644 pkg/sync/seqatomictest/seqatomic_test.go create mode 100644 pkg/sync/seqcount.go create mode 100644 pkg/sync/seqcount_test.go create mode 100644 pkg/sync/syncutil.go delete mode 100644 pkg/syncutil/BUILD delete mode 100644 pkg/syncutil/LICENSE delete mode 100644 pkg/syncutil/README.md delete mode 100644 pkg/syncutil/atomicptr_unsafe.go delete mode 100644 pkg/syncutil/atomicptrtest/BUILD delete mode 100644 pkg/syncutil/atomicptrtest/atomicptr_test.go delete mode 100644 pkg/syncutil/downgradable_rwmutex_test.go delete mode 100644 pkg/syncutil/downgradable_rwmutex_unsafe.go delete mode 100644 pkg/syncutil/memmove_unsafe.go delete mode 100644 pkg/syncutil/norace_unsafe.go delete mode 100644 pkg/syncutil/race_unsafe.go delete mode 100644 pkg/syncutil/seqatomic_unsafe.go delete mode 100644 pkg/syncutil/seqatomictest/BUILD delete mode 100644 pkg/syncutil/seqatomictest/seqatomic_test.go delete mode 100644 pkg/syncutil/seqcount.go delete mode 100644 pkg/syncutil/seqcount_test.go delete mode 100644 pkg/syncutil/syncutil.go (limited to 'pkg/sentry/fs') diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD index 6bc486b62..d99e37b40 100644 --- a/pkg/amutex/BUILD +++ b/pkg/amutex/BUILD @@ -15,4 +15,5 @@ go_test( size = "small", srcs = ["amutex_test.go"], embed = [":amutex"], + deps = ["//pkg/sync"], ) diff --git a/pkg/amutex/amutex_test.go b/pkg/amutex/amutex_test.go index 1d7f45641..8a3952f2a 100644 --- a/pkg/amutex/amutex_test.go +++ b/pkg/amutex/amutex_test.go @@ -15,9 +15,10 @@ package amutex import ( - "sync" "testing" "time" + + "gvisor.dev/gvisor/pkg/sync" ) type sleeper struct { diff --git a/pkg/atomicbitops/BUILD b/pkg/atomicbitops/BUILD index 36beaade9..6403c60c2 100644 --- a/pkg/atomicbitops/BUILD +++ b/pkg/atomicbitops/BUILD @@ -20,4 +20,5 @@ go_test( size = "small", srcs = ["atomic_bitops_test.go"], embed = [":atomicbitops"], + deps = ["//pkg/sync"], ) diff --git a/pkg/atomicbitops/atomic_bitops_test.go b/pkg/atomicbitops/atomic_bitops_test.go index 965e9be79..9466d3e23 100644 --- a/pkg/atomicbitops/atomic_bitops_test.go +++ b/pkg/atomicbitops/atomic_bitops_test.go @@ -16,8 +16,9 @@ package atomicbitops import ( "runtime" - "sync" "testing" + + "gvisor.dev/gvisor/pkg/sync" ) const iterations = 100 diff --git a/pkg/compressio/BUILD b/pkg/compressio/BUILD index a0b21d4bd..2bb581b18 100644 --- a/pkg/compressio/BUILD +++ b/pkg/compressio/BUILD @@ -8,7 +8,10 @@ go_library( srcs = ["compressio.go"], importpath = "gvisor.dev/gvisor/pkg/compressio", visibility = ["//:sandbox"], - deps = ["//pkg/binary"], + deps = [ + "//pkg/binary", + "//pkg/sync", + ], ) go_test( diff --git a/pkg/compressio/compressio.go b/pkg/compressio/compressio.go index 3b0bb086e..5f52cbe74 100644 --- a/pkg/compressio/compressio.go +++ b/pkg/compressio/compressio.go @@ -52,9 +52,9 @@ import ( "hash" "io" "runtime" - "sync" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sync" ) var bufPool = sync.Pool{ diff --git a/pkg/control/server/BUILD b/pkg/control/server/BUILD index 21adf3adf..adbd1e3f8 100644 --- a/pkg/control/server/BUILD +++ b/pkg/control/server/BUILD @@ -9,6 +9,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/log", + "//pkg/sync", "//pkg/unet", "//pkg/urpc", ], diff --git a/pkg/control/server/server.go b/pkg/control/server/server.go index a56152d10..41abe1f2d 100644 --- a/pkg/control/server/server.go +++ b/pkg/control/server/server.go @@ -22,9 +22,9 @@ package server import ( "os" - "sync" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/urpc" ) diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD index 0b4b7cc44..9d68682c7 100644 --- a/pkg/eventchannel/BUILD +++ b/pkg/eventchannel/BUILD @@ -15,6 +15,7 @@ go_library( deps = [ ":eventchannel_go_proto", "//pkg/log", + "//pkg/sync", "//pkg/unet", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_golang_protobuf//ptypes:go_default_library_gen", @@ -40,6 +41,7 @@ go_test( srcs = ["event_test.go"], embed = [":eventchannel"], deps = [ + "//pkg/sync", "@com_github_golang_protobuf//proto:go_default_library", ], ) diff --git a/pkg/eventchannel/event.go b/pkg/eventchannel/event.go index d37ad0428..9a29c58bd 100644 --- a/pkg/eventchannel/event.go +++ b/pkg/eventchannel/event.go @@ -22,13 +22,13 @@ package eventchannel import ( "encoding/binary" "fmt" - "sync" "syscall" "github.com/golang/protobuf/proto" "github.com/golang/protobuf/ptypes" pb "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" ) diff --git a/pkg/eventchannel/event_test.go b/pkg/eventchannel/event_test.go index 3649097d6..7f41b4a27 100644 --- a/pkg/eventchannel/event_test.go +++ b/pkg/eventchannel/event_test.go @@ -16,11 +16,11 @@ package eventchannel import ( "fmt" - "sync" "testing" "time" "github.com/golang/protobuf/proto" + "gvisor.dev/gvisor/pkg/sync" ) // testEmitter is an emitter that can be used in tests. It records all events diff --git a/pkg/fdchannel/BUILD b/pkg/fdchannel/BUILD index 56495cbd9..b0478c672 100644 --- a/pkg/fdchannel/BUILD +++ b/pkg/fdchannel/BUILD @@ -15,4 +15,5 @@ go_test( size = "small", srcs = ["fdchannel_test.go"], embed = [":fdchannel"], + deps = ["//pkg/sync"], ) diff --git a/pkg/fdchannel/fdchannel_test.go b/pkg/fdchannel/fdchannel_test.go index 5d01dc636..7a8a63a59 100644 --- a/pkg/fdchannel/fdchannel_test.go +++ b/pkg/fdchannel/fdchannel_test.go @@ -17,10 +17,11 @@ package fdchannel import ( "io/ioutil" "os" - "sync" "syscall" "testing" "time" + + "gvisor.dev/gvisor/pkg/sync" ) func TestSendRecvFD(t *testing.T) { diff --git a/pkg/fdnotifier/BUILD b/pkg/fdnotifier/BUILD index aca2d8a82..91a202a30 100644 --- a/pkg/fdnotifier/BUILD +++ b/pkg/fdnotifier/BUILD @@ -11,6 +11,7 @@ go_library( importpath = "gvisor.dev/gvisor/pkg/fdnotifier", visibility = ["//:sandbox"], deps = [ + "//pkg/sync", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/fdnotifier/fdnotifier.go b/pkg/fdnotifier/fdnotifier.go index f4aae1953..a6b63c982 100644 --- a/pkg/fdnotifier/fdnotifier.go +++ b/pkg/fdnotifier/fdnotifier.go @@ -22,10 +22,10 @@ package fdnotifier import ( "fmt" - "sync" "syscall" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/flipcall/BUILD b/pkg/flipcall/BUILD index e590a71ba..85bd83af1 100644 --- a/pkg/flipcall/BUILD +++ b/pkg/flipcall/BUILD @@ -19,7 +19,7 @@ go_library( "//pkg/abi/linux", "//pkg/log", "//pkg/memutil", - "//pkg/syncutil", + "//pkg/sync", ], ) @@ -31,4 +31,5 @@ go_test( "flipcall_test.go", ], embed = [":flipcall"], + deps = ["//pkg/sync"], ) diff --git a/pkg/flipcall/flipcall_example_test.go b/pkg/flipcall/flipcall_example_test.go index 8d88b845d..2e28a149a 100644 --- a/pkg/flipcall/flipcall_example_test.go +++ b/pkg/flipcall/flipcall_example_test.go @@ -17,7 +17,8 @@ package flipcall import ( "bytes" "fmt" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) func Example() { diff --git a/pkg/flipcall/flipcall_test.go b/pkg/flipcall/flipcall_test.go index 168a487ec..33fd55a44 100644 --- a/pkg/flipcall/flipcall_test.go +++ b/pkg/flipcall/flipcall_test.go @@ -16,9 +16,10 @@ package flipcall import ( "runtime" - "sync" "testing" "time" + + "gvisor.dev/gvisor/pkg/sync" ) var testPacketWindowSize = pageSize diff --git a/pkg/flipcall/flipcall_unsafe.go b/pkg/flipcall/flipcall_unsafe.go index 27b8939fc..ac974b232 100644 --- a/pkg/flipcall/flipcall_unsafe.go +++ b/pkg/flipcall/flipcall_unsafe.go @@ -18,7 +18,7 @@ import ( "reflect" "unsafe" - "gvisor.dev/gvisor/pkg/syncutil" + "gvisor.dev/gvisor/pkg/sync" ) // Packets consist of a 16-byte header followed by an arbitrarily-sized @@ -75,13 +75,13 @@ func (ep *Endpoint) Data() []byte { var ioSync int64 func raceBecomeActive() { - if syncutil.RaceEnabled { - syncutil.RaceAcquire((unsafe.Pointer)(&ioSync)) + if sync.RaceEnabled { + sync.RaceAcquire((unsafe.Pointer)(&ioSync)) } } func raceBecomeInactive() { - if syncutil.RaceEnabled { - syncutil.RaceReleaseMerge((unsafe.Pointer)(&ioSync)) + if sync.RaceEnabled { + sync.RaceReleaseMerge((unsafe.Pointer)(&ioSync)) } } diff --git a/pkg/gate/BUILD b/pkg/gate/BUILD index 4b9321711..f22bd070d 100644 --- a/pkg/gate/BUILD +++ b/pkg/gate/BUILD @@ -19,5 +19,6 @@ go_test( ], deps = [ ":gate", + "//pkg/sync", ], ) diff --git a/pkg/gate/gate_test.go b/pkg/gate/gate_test.go index 5dbd8d712..850693df8 100644 --- a/pkg/gate/gate_test.go +++ b/pkg/gate/gate_test.go @@ -15,11 +15,11 @@ package gate_test import ( - "sync" "testing" "time" "gvisor.dev/gvisor/pkg/gate" + "gvisor.dev/gvisor/pkg/sync" ) func TestBasicEnter(t *testing.T) { diff --git a/pkg/linewriter/BUILD b/pkg/linewriter/BUILD index a5d980d14..bcde6d308 100644 --- a/pkg/linewriter/BUILD +++ b/pkg/linewriter/BUILD @@ -8,6 +8,7 @@ go_library( srcs = ["linewriter.go"], importpath = "gvisor.dev/gvisor/pkg/linewriter", visibility = ["//visibility:public"], + deps = ["//pkg/sync"], ) go_test( diff --git a/pkg/linewriter/linewriter.go b/pkg/linewriter/linewriter.go index cd6e4e2ce..a1b1285d4 100644 --- a/pkg/linewriter/linewriter.go +++ b/pkg/linewriter/linewriter.go @@ -17,7 +17,8 @@ package linewriter import ( "bytes" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) // Writer is an io.Writer which buffers input, flushing diff --git a/pkg/log/BUILD b/pkg/log/BUILD index fc5f5779b..0df0f2849 100644 --- a/pkg/log/BUILD +++ b/pkg/log/BUILD @@ -16,7 +16,10 @@ go_library( visibility = [ "//visibility:public", ], - deps = ["//pkg/linewriter"], + deps = [ + "//pkg/linewriter", + "//pkg/sync", + ], ) go_test( diff --git a/pkg/log/log.go b/pkg/log/log.go index 9387586e6..91a81b288 100644 --- a/pkg/log/log.go +++ b/pkg/log/log.go @@ -25,12 +25,12 @@ import ( stdlog "log" "os" "runtime" - "sync" "sync/atomic" "syscall" "time" "gvisor.dev/gvisor/pkg/linewriter" + "gvisor.dev/gvisor/pkg/sync" ) // Level is the log level. diff --git a/pkg/metric/BUILD b/pkg/metric/BUILD index dd6ca6d39..9145f3233 100644 --- a/pkg/metric/BUILD +++ b/pkg/metric/BUILD @@ -14,6 +14,7 @@ go_library( ":metric_go_proto", "//pkg/eventchannel", "//pkg/log", + "//pkg/sync", ], ) diff --git a/pkg/metric/metric.go b/pkg/metric/metric.go index eadde06e4..93d4f2b8c 100644 --- a/pkg/metric/metric.go +++ b/pkg/metric/metric.go @@ -18,12 +18,12 @@ package metric import ( "errors" "fmt" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/log" pb "gvisor.dev/gvisor/pkg/metric/metric_go_proto" + "gvisor.dev/gvisor/pkg/sync" ) var ( diff --git a/pkg/p9/BUILD b/pkg/p9/BUILD index f32244c69..a3e05c96d 100644 --- a/pkg/p9/BUILD +++ b/pkg/p9/BUILD @@ -29,6 +29,7 @@ go_library( "//pkg/fdchannel", "//pkg/flipcall", "//pkg/log", + "//pkg/sync", "//pkg/unet", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/p9/client.go b/pkg/p9/client.go index 221516c6c..4045e41fa 100644 --- a/pkg/p9/client.go +++ b/pkg/p9/client.go @@ -17,12 +17,12 @@ package p9 import ( "errors" "fmt" - "sync" "syscall" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/flipcall" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" ) diff --git a/pkg/p9/p9test/BUILD b/pkg/p9/p9test/BUILD index 28707c0ca..f4edd68b2 100644 --- a/pkg/p9/p9test/BUILD +++ b/pkg/p9/p9test/BUILD @@ -70,6 +70,7 @@ go_library( "//pkg/fd", "//pkg/log", "//pkg/p9", + "//pkg/sync", "//pkg/unet", "@com_github_golang_mock//gomock:go_default_library", ], @@ -83,6 +84,7 @@ go_test( deps = [ "//pkg/fd", "//pkg/p9", + "//pkg/sync", "@com_github_golang_mock//gomock:go_default_library", ], ) diff --git a/pkg/p9/p9test/client_test.go b/pkg/p9/p9test/client_test.go index 6e758148d..6e7bb3db2 100644 --- a/pkg/p9/p9test/client_test.go +++ b/pkg/p9/p9test/client_test.go @@ -22,7 +22,6 @@ import ( "os" "reflect" "strings" - "sync" "syscall" "testing" "time" @@ -30,6 +29,7 @@ import ( "github.com/golang/mock/gomock" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sync" ) func TestPanic(t *testing.T) { diff --git a/pkg/p9/p9test/p9test.go b/pkg/p9/p9test/p9test.go index 4d3271b37..dd8b01b6d 100644 --- a/pkg/p9/p9test/p9test.go +++ b/pkg/p9/p9test/p9test.go @@ -17,13 +17,13 @@ package p9test import ( "fmt" - "sync" "sync/atomic" "syscall" "testing" "github.com/golang/mock/gomock" "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" ) diff --git a/pkg/p9/path_tree.go b/pkg/p9/path_tree.go index 865459411..72ef53313 100644 --- a/pkg/p9/path_tree.go +++ b/pkg/p9/path_tree.go @@ -16,7 +16,8 @@ package p9 import ( "fmt" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) // pathNode is a single node in a path traversal. diff --git a/pkg/p9/pool.go b/pkg/p9/pool.go index 52de889e1..2b14a5ce3 100644 --- a/pkg/p9/pool.go +++ b/pkg/p9/pool.go @@ -15,7 +15,7 @@ package p9 import ( - "sync" + "gvisor.dev/gvisor/pkg/sync" ) // pool is a simple allocator. diff --git a/pkg/p9/server.go b/pkg/p9/server.go index 40b8fa023..fdfa83648 100644 --- a/pkg/p9/server.go +++ b/pkg/p9/server.go @@ -17,7 +17,6 @@ package p9 import ( "io" "runtime/debug" - "sync" "sync/atomic" "syscall" @@ -25,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/fdchannel" "gvisor.dev/gvisor/pkg/flipcall" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" ) diff --git a/pkg/p9/transport.go b/pkg/p9/transport.go index 6e8b4bbcd..9c11e28ce 100644 --- a/pkg/p9/transport.go +++ b/pkg/p9/transport.go @@ -19,11 +19,11 @@ import ( "fmt" "io" "io/ioutil" - "sync" "syscall" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" ) diff --git a/pkg/procid/BUILD b/pkg/procid/BUILD index 078f084b2..b506813f0 100644 --- a/pkg/procid/BUILD +++ b/pkg/procid/BUILD @@ -21,6 +21,7 @@ go_test( "procid_test.go", ], embed = [":procid"], + deps = ["//pkg/sync"], ) go_test( @@ -31,4 +32,5 @@ go_test( "procid_test.go", ], embed = [":procid"], + deps = ["//pkg/sync"], ) diff --git a/pkg/procid/procid_test.go b/pkg/procid/procid_test.go index 88dd0b3ae..9ec08c3d6 100644 --- a/pkg/procid/procid_test.go +++ b/pkg/procid/procid_test.go @@ -17,9 +17,10 @@ package procid import ( "os" "runtime" - "sync" "syscall" "testing" + + "gvisor.dev/gvisor/pkg/sync" ) // runOnMain is used to send functions to run on the main (initial) thread. diff --git a/pkg/rand/BUILD b/pkg/rand/BUILD index f4f2001f3..9d5b4859b 100644 --- a/pkg/rand/BUILD +++ b/pkg/rand/BUILD @@ -10,5 +10,8 @@ go_library( ], importpath = "gvisor.dev/gvisor/pkg/rand", visibility = ["//:sandbox"], - deps = ["@org_golang_x_sys//unix:go_default_library"], + deps = [ + "//pkg/sync", + "@org_golang_x_sys//unix:go_default_library", + ], ) diff --git a/pkg/rand/rand_linux.go b/pkg/rand/rand_linux.go index 2b92db3e6..0bdad5fad 100644 --- a/pkg/rand/rand_linux.go +++ b/pkg/rand/rand_linux.go @@ -19,9 +19,9 @@ package rand import ( "crypto/rand" "io" - "sync" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/sync" ) // reader implements an io.Reader that returns pseudorandom bytes. diff --git a/pkg/refs/BUILD b/pkg/refs/BUILD index 7ad59dfd7..974d9af9b 100644 --- a/pkg/refs/BUILD +++ b/pkg/refs/BUILD @@ -27,6 +27,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/log", + "//pkg/sync", ], ) @@ -35,4 +36,5 @@ go_test( size = "small", srcs = ["refcounter_test.go"], embed = [":refs"], + deps = ["//pkg/sync"], ) diff --git a/pkg/refs/refcounter.go b/pkg/refs/refcounter.go index ad69e0757..c45ba8200 100644 --- a/pkg/refs/refcounter.go +++ b/pkg/refs/refcounter.go @@ -21,10 +21,10 @@ import ( "fmt" "reflect" "runtime" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" ) // RefCounter is the interface to be implemented by objects that are reference diff --git a/pkg/refs/refcounter_test.go b/pkg/refs/refcounter_test.go index ffd3d3f07..1ab4a4440 100644 --- a/pkg/refs/refcounter_test.go +++ b/pkg/refs/refcounter_test.go @@ -16,8 +16,9 @@ package refs import ( "reflect" - "sync" "testing" + + "gvisor.dev/gvisor/pkg/sync" ) type testCounter struct { diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index 18c73cc24..ae3e364cd 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/sentry/context", "//pkg/sentry/limits", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go index 9294ac773..9f41e566f 100644 --- a/pkg/sentry/arch/arch_x86.go +++ b/pkg/sentry/arch/arch_x86.go @@ -19,7 +19,6 @@ package arch import ( "fmt" "io" - "sync" "syscall" "gvisor.dev/gvisor/pkg/binary" @@ -27,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/log" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index 5522cecd0..2561a6109 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -30,6 +30,7 @@ go_library( "//pkg/sentry/strace", "//pkg/sentry/usage", "//pkg/sentry/watchdog", + "//pkg/sync", "//pkg/tcpip/link/sniffer", "//pkg/urpc", ], diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go index e1f2fea60..151808911 100644 --- a/pkg/sentry/control/pprof.go +++ b/pkg/sentry/control/pprof.go @@ -19,10 +19,10 @@ import ( "runtime" "runtime/pprof" "runtime/trace" - "sync" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/urpc" ) diff --git a/pkg/sentry/device/BUILD b/pkg/sentry/device/BUILD index 1098ed777..97fa1512c 100644 --- a/pkg/sentry/device/BUILD +++ b/pkg/sentry/device/BUILD @@ -8,7 +8,10 @@ go_library( srcs = ["device.go"], importpath = "gvisor.dev/gvisor/pkg/sentry/device", visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/abi/linux"], + deps = [ + "//pkg/abi/linux", + "//pkg/sync", + ], ) go_test( diff --git a/pkg/sentry/device/device.go b/pkg/sentry/device/device.go index 47945d1a7..69e71e322 100644 --- a/pkg/sentry/device/device.go +++ b/pkg/sentry/device/device.go @@ -19,10 +19,10 @@ package device import ( "bytes" "fmt" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sync" ) // Registry tracks all simple devices and related state on the system for diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index c035ffff7..7d5d72d5a 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -68,7 +68,7 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/usermem", "//pkg/state", - "//pkg/syncutil", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], @@ -115,6 +115,7 @@ go_test( "//pkg/sentry/fs/tmpfs", "//pkg/sentry/kernel/contexttest", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index 9ac62c84d..734177e90 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -17,12 +17,12 @@ package fs import ( "fmt" "io" - "sync" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/copy_up_test.go b/pkg/sentry/fs/copy_up_test.go index 1d80bf15a..738580c5f 100644 --- a/pkg/sentry/fs/copy_up_test.go +++ b/pkg/sentry/fs/copy_up_test.go @@ -19,13 +19,13 @@ import ( "crypto/rand" "fmt" "io" - "sync" "testing" "gvisor.dev/gvisor/pkg/sentry/fs" _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) const ( diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index 3cb73bd78..31fc4d87b 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -18,7 +18,6 @@ import ( "fmt" "path" "sort" - "sync" "sync/atomic" "syscall" @@ -28,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/dirent_cache.go b/pkg/sentry/fs/dirent_cache.go index 60a15a275..25514ace4 100644 --- a/pkg/sentry/fs/dirent_cache.go +++ b/pkg/sentry/fs/dirent_cache.go @@ -16,7 +16,8 @@ package fs import ( "fmt" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) // DirentCache is an LRU cache of Dirents. The Dirent's refCount is diff --git a/pkg/sentry/fs/dirent_cache_limiter.go b/pkg/sentry/fs/dirent_cache_limiter.go index ebb80bd50..525ee25f9 100644 --- a/pkg/sentry/fs/dirent_cache_limiter.go +++ b/pkg/sentry/fs/dirent_cache_limiter.go @@ -16,7 +16,8 @@ package fs import ( "fmt" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) // DirentCacheLimiter acts as a global limit for all dirent caches in the diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD index 277ee4c31..cc43de69d 100644 --- a/pkg/sentry/fs/fdpipe/BUILD +++ b/pkg/sentry/fs/fdpipe/BUILD @@ -23,6 +23,7 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/safemem", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go index 669ffcb75..5b6cfeb0a 100644 --- a/pkg/sentry/fs/fdpipe/pipe.go +++ b/pkg/sentry/fs/fdpipe/pipe.go @@ -17,7 +17,6 @@ package fdpipe import ( "os" - "sync" "syscall" "gvisor.dev/gvisor/pkg/fd" @@ -29,6 +28,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/fdpipe/pipe_state.go b/pkg/sentry/fs/fdpipe/pipe_state.go index 29175fb3d..cee87f726 100644 --- a/pkg/sentry/fs/fdpipe/pipe_state.go +++ b/pkg/sentry/fs/fdpipe/pipe_state.go @@ -17,10 +17,10 @@ package fdpipe import ( "fmt" "io/ioutil" - "sync" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sync" ) // beforeSave is invoked by stateify. diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index a2f966cb6..7c4586296 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -16,7 +16,6 @@ package fs import ( "math" - "sync" "sync/atomic" "time" @@ -29,6 +28,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go index 225e40186..8a633b1ba 100644 --- a/pkg/sentry/fs/file_overlay.go +++ b/pkg/sentry/fs/file_overlay.go @@ -16,13 +16,13 @@ package fs import ( "io" - "sync" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/filesystems.go b/pkg/sentry/fs/filesystems.go index b157fd228..c5b51620a 100644 --- a/pkg/sentry/fs/filesystems.go +++ b/pkg/sentry/fs/filesystems.go @@ -18,9 +18,9 @@ import ( "fmt" "sort" "strings" - "sync" "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sync" ) // FilesystemFlags matches include/linux/fs.h:file_system_type.fs_flags. diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go index 8b2a5e6b2..26abf49e2 100644 --- a/pkg/sentry/fs/fs.go +++ b/pkg/sentry/fs/fs.go @@ -54,10 +54,9 @@ package fs import ( - "sync" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sync" ) var ( diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 9ca695a95..945b6270d 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -93,6 +93,7 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/usermem", "//pkg/state", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go index b06a71cc2..837fc70b5 100644 --- a/pkg/sentry/fs/fsutil/host_file_mapper.go +++ b/pkg/sentry/fs/fsutil/host_file_mapper.go @@ -16,7 +16,6 @@ package fsutil import ( "fmt" - "sync" "syscall" "gvisor.dev/gvisor/pkg/log" @@ -24,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) // HostFileMapper caches mappings of an arbitrary host file descriptor. It is diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go index 30475f340..a625f0e26 100644 --- a/pkg/sentry/fs/fsutil/host_mappable.go +++ b/pkg/sentry/fs/fsutil/host_mappable.go @@ -16,7 +16,6 @@ package fsutil import ( "math" - "sync" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -24,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) // HostMappable implements memmap.Mappable and platform.File over a diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index 4e100a402..adf5ec69c 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -15,13 +15,12 @@ package fsutil import ( - "sync" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 798920d18..20a014402 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -17,7 +17,6 @@ package fsutil import ( "fmt" "io" - "sync" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/context" @@ -30,6 +29,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) // Lock order (compare the lock order model in mm/mm.go): diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index 4a005c605..fd870e8e1 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -44,6 +44,7 @@ go_library( "//pkg/sentry/safemem", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/unet", diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 91263ebdc..245fe2ef1 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -16,7 +16,6 @@ package gofer import ( "errors" - "sync" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" @@ -31,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/host" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index 4e358a46a..edc796ce0 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -16,7 +16,6 @@ package gofer import ( "fmt" - "sync" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/refs" @@ -25,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" ) diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 23daeb528..2b581aa69 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -50,6 +50,7 @@ go_library( "//pkg/sentry/unimpl", "//pkg/sentry/uniqueid", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index a6e4a09e3..873a1c52d 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -15,7 +15,6 @@ package host import ( - "sync" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" @@ -28,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index 107336a3e..c076d5bdd 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -16,7 +16,6 @@ package host import ( "fmt" - "sync" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" @@ -30,6 +29,7 @@ import ( unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 90331e3b2..753ef8cd6 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -15,8 +15,6 @@ package host import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/context" @@ -24,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 91e2fde2f..468043df0 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -15,8 +15,6 @@ package fs import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" @@ -26,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/inode_inotify.go b/pkg/sentry/fs/inode_inotify.go index 0f2a66a79..efd3c962b 100644 --- a/pkg/sentry/fs/inode_inotify.go +++ b/pkg/sentry/fs/inode_inotify.go @@ -16,7 +16,8 @@ package fs import ( "fmt" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) // Watches is the collection of inotify watches on an inode. diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index ba3e0233d..cc7dd1c92 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -16,7 +16,6 @@ package fs import ( "io" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -25,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/inotify_watch.go b/pkg/sentry/fs/inotify_watch.go index 0aa0a5e9b..900cba3ca 100644 --- a/pkg/sentry/fs/inotify_watch.go +++ b/pkg/sentry/fs/inotify_watch.go @@ -15,10 +15,10 @@ package fs import ( - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sync" ) // Watch represent a particular inotify watch created by inotify_add_watch. diff --git a/pkg/sentry/fs/lock/BUILD b/pkg/sentry/fs/lock/BUILD index 8d62642e7..2c332a82a 100644 --- a/pkg/sentry/fs/lock/BUILD +++ b/pkg/sentry/fs/lock/BUILD @@ -44,6 +44,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", + "//pkg/sync", "//pkg/waiter", ], ) diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go index 636484424..41b040818 100644 --- a/pkg/sentry/fs/lock/lock.go +++ b/pkg/sentry/fs/lock/lock.go @@ -52,9 +52,9 @@ package lock import ( "fmt" "math" - "sync" "syscall" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index ac0398bd9..db3dfd096 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -19,7 +19,6 @@ import ( "math" "path" "strings" - "sync" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" @@ -27,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index 25573e986..4cad55327 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -17,13 +17,12 @@ package fs import ( "fmt" "strings" - "sync" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syncutil" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) @@ -199,7 +198,7 @@ type overlayEntry struct { upper *Inode // dirCacheMu protects dirCache. - dirCacheMu syncutil.DowngradableRWMutex `state:"nosave"` + dirCacheMu sync.DowngradableRWMutex `state:"nosave"` // dirCache is cache of DentAttrs from upper and lower Inodes. dirCache *SortedDentryMap diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index 75cbb0622..94d46ab1b 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -51,6 +51,7 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", "//pkg/tcpip/header", "//pkg/waiter", diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD index fe7067be1..38b246dff 100644 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ b/pkg/sentry/fs/proc/seqfile/BUILD @@ -16,6 +16,7 @@ go_library( "//pkg/sentry/fs/proc/device", "//pkg/sentry/kernel/time", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go index 5fe823000..f9af191d5 100644 --- a/pkg/sentry/fs/proc/seqfile/seqfile.go +++ b/pkg/sentry/fs/proc/seqfile/seqfile.go @@ -17,7 +17,6 @@ package seqfile import ( "io" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" @@ -26,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index bd93f83fa..a37e1fa06 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -17,7 +17,6 @@ package proc import ( "fmt" "io" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" @@ -27,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD index 012cb3e44..3fb7b0633 100644 --- a/pkg/sentry/fs/ramfs/BUILD +++ b/pkg/sentry/fs/ramfs/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index 78e082b8e..dcbb8eb2e 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -17,7 +17,6 @@ package ramfs import ( "fmt" - "sync" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" @@ -25,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/restore.go b/pkg/sentry/fs/restore.go index f10168125..64c6a6ae9 100644 --- a/pkg/sentry/fs/restore.go +++ b/pkg/sentry/fs/restore.go @@ -15,7 +15,7 @@ package fs import ( - "sync" + "gvisor.dev/gvisor/pkg/sync" ) // RestoreEnvironment is the restore environment for file systems. It consists diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD index 59ce400c2..3400b940c 100644 --- a/pkg/sentry/fs/tmpfs/BUILD +++ b/pkg/sentry/fs/tmpfs/BUILD @@ -31,6 +31,7 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index f86dfaa36..f1c87fe41 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -17,7 +17,6 @@ package tmpfs import ( "fmt" "io" - "sync" "time" "gvisor.dev/gvisor/pkg/abi/linux" @@ -31,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index 95ad98cb0..f6f60d0cf 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -30,6 +30,7 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/unimpl", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index 2f639c823..88aa66b24 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -19,7 +19,6 @@ import ( "fmt" "math" "strconv" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" @@ -28,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go index 7cc0eb409..894964260 100644 --- a/pkg/sentry/fs/tty/line_discipline.go +++ b/pkg/sentry/fs/tty/line_discipline.go @@ -16,13 +16,13 @@ package tty import ( "bytes" - "sync" "unicode/utf8" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index 231e4e6eb..8b5d4699a 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -15,13 +15,12 @@ package tty import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index bc90330bc..903874141 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -50,6 +50,7 @@ go_library( "//pkg/sentry/syscalls/linux", "//pkg/sentry/usermem", "//pkg/sentry/vfs", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go index 91802dc1e..8944171c8 100644 --- a/pkg/sentry/fsimpl/ext/directory.go +++ b/pkg/sentry/fsimpl/ext/directory.go @@ -15,8 +15,6 @@ package ext import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/log" @@ -25,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index 616fc002a..9afb1a84c 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -17,13 +17,13 @@ package ext import ( "errors" "io" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go index aec33e00a..d11153c90 100644 --- a/pkg/sentry/fsimpl/ext/regular_file.go +++ b/pkg/sentry/fsimpl/ext/regular_file.go @@ -16,7 +16,6 @@ package ext import ( "io" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" @@ -24,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index 39c03ee9d..809178250 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -39,6 +39,7 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/usermem", "//pkg/sentry/vfs", + "//pkg/sync", "//pkg/syserror", ], ) @@ -56,6 +57,7 @@ go_test( "//pkg/sentry/kernel/auth", "//pkg/sentry/usermem", "//pkg/sentry/vfs", + "//pkg/sync", "//pkg/syserror", "@com_github_google_go-cmp//cmp:go_default_library", ], diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 752e0f659..1d469a0db 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -16,7 +16,6 @@ package kernfs import ( "fmt" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -24,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index d69b299ae..bb12f39a2 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -53,7 +53,6 @@ package kernfs import ( "fmt" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -61,6 +60,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" ) // FilesystemType implements vfs.FilesystemType. diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index 4b6b95f5f..5c9d580e1 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -19,7 +19,6 @@ import ( "fmt" "io" "runtime" - "sync" "testing" "github.com/google/go-cmp/cmp" @@ -31,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index a5b285987..82f5c2f41 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -47,6 +47,7 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/usermem", "//pkg/sentry/vfs", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index f51e247a7..f200e767d 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -17,7 +17,6 @@ package tmpfs import ( "io" "math" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -30,6 +29,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 7be6faa5b..701826f90 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -26,7 +26,6 @@ package tmpfs import ( "fmt" "math" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -34,6 +33,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 2706927ff..ac85ba0c8 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -35,7 +35,7 @@ go_template_instance( out = "seqatomic_taskgoroutineschedinfo_unsafe.go", package = "kernel", suffix = "TaskGoroutineSchedInfo", - template = "//pkg/syncutil:generic_seqatomic", + template = "//pkg/sync:generic_seqatomic", types = { "Value": "TaskGoroutineSchedInfo", }, @@ -209,7 +209,7 @@ go_library( "//pkg/sentry/usermem", "//pkg/state", "//pkg/state/statefile", - "//pkg/syncutil", + "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", @@ -241,6 +241,7 @@ go_test( "//pkg/sentry/time", "//pkg/sentry/usage", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/kernel/abstract_socket_namespace.go b/pkg/sentry/kernel/abstract_socket_namespace.go index 244655b5c..920fe4329 100644 --- a/pkg/sentry/kernel/abstract_socket_namespace.go +++ b/pkg/sentry/kernel/abstract_socket_namespace.go @@ -15,11 +15,11 @@ package kernel import ( - "sync" "syscall" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sync" ) // +stateify savable diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index 04c244447..1aa72fa47 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -8,7 +8,7 @@ go_template_instance( out = "atomicptr_credentials_unsafe.go", package = "auth", suffix = "Credentials", - template = "//pkg/syncutil:generic_atomicptr", + template = "//pkg/sync:generic_atomicptr", types = { "Value": "Credentials", }, @@ -64,6 +64,7 @@ go_library( "//pkg/bits", "//pkg/log", "//pkg/sentry/context", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go index af28ccc65..9dd52c860 100644 --- a/pkg/sentry/kernel/auth/user_namespace.go +++ b/pkg/sentry/kernel/auth/user_namespace.go @@ -16,8 +16,8 @@ package auth import ( "math" - "sync" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD index 3361e8b7d..c47f6b6fc 100644 --- a/pkg/sentry/kernel/epoll/BUILD +++ b/pkg/sentry/kernel/epoll/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/waiter", ], ) diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go index 9c0a4e1b4..430311cc0 100644 --- a/pkg/sentry/kernel/epoll/epoll.go +++ b/pkg/sentry/kernel/epoll/epoll.go @@ -18,7 +18,6 @@ package epoll import ( "fmt" - "sync" "syscall" "gvisor.dev/gvisor/pkg/refs" @@ -27,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD index e65b961e8..c831fbab2 100644 --- a/pkg/sentry/kernel/eventfd/BUILD +++ b/pkg/sentry/kernel/eventfd/BUILD @@ -16,6 +16,7 @@ go_library( "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go index 12f0d429b..687690679 100644 --- a/pkg/sentry/kernel/eventfd/eventfd.go +++ b/pkg/sentry/kernel/eventfd/eventfd.go @@ -18,7 +18,6 @@ package eventfd import ( "math" - "sync" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" @@ -28,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD index 49d81b712..6b36bc63e 100644 --- a/pkg/sentry/kernel/fasync/BUILD +++ b/pkg/sentry/kernel/fasync/BUILD @@ -12,6 +12,7 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sync", "//pkg/waiter", ], ) diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go index 6b0bb0324..d32c3e90a 100644 --- a/pkg/sentry/kernel/fasync/fasync.go +++ b/pkg/sentry/kernel/fasync/fasync.go @@ -16,12 +16,11 @@ package fasync import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 11f613a11..cd1501f85 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -18,7 +18,6 @@ import ( "bytes" "fmt" "math" - "sync" "sync/atomic" "syscall" @@ -28,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sync" ) // FDFlags define flags for an individual descriptor. diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go index 2bcb6216a..eccb7d1e7 100644 --- a/pkg/sentry/kernel/fd_table_test.go +++ b/pkg/sentry/kernel/fd_table_test.go @@ -16,7 +16,6 @@ package kernel import ( "runtime" - "sync" "testing" "gvisor.dev/gvisor/pkg/sentry/context" @@ -24,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/filetest" "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sync" ) const ( diff --git a/pkg/sentry/kernel/fs_context.go b/pkg/sentry/kernel/fs_context.go index ded27d668..2448c1d99 100644 --- a/pkg/sentry/kernel/fs_context.go +++ b/pkg/sentry/kernel/fs_context.go @@ -16,10 +16,10 @@ package kernel import ( "fmt" - "sync" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sync" ) // FSContext contains filesystem context. diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index 75ec31761..50db443ce 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -9,7 +9,7 @@ go_template_instance( out = "atomicptr_bucket_unsafe.go", package = "futex", suffix = "Bucket", - template = "//pkg/syncutil:generic_atomicptr", + template = "//pkg/sync:generic_atomicptr", types = { "Value": "bucket", }, @@ -42,6 +42,7 @@ go_library( "//pkg/sentry/context", "//pkg/sentry/memmap", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", ], ) @@ -51,5 +52,8 @@ go_test( size = "small", srcs = ["futex_test.go"], embed = [":futex"], - deps = ["//pkg/sentry/usermem"], + deps = [ + "//pkg/sentry/usermem", + "//pkg/sync", + ], ) diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go index 278cc8143..d1931c8f4 100644 --- a/pkg/sentry/kernel/futex/futex.go +++ b/pkg/sentry/kernel/futex/futex.go @@ -18,11 +18,10 @@ package futex import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go index 65e5d1428..c23126ca5 100644 --- a/pkg/sentry/kernel/futex/futex_test.go +++ b/pkg/sentry/kernel/futex/futex_test.go @@ -17,13 +17,13 @@ package futex import ( "math" "runtime" - "sync" "sync/atomic" "syscall" "testing" "unsafe" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) // testData implements the Target interface, and allows us to diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 8653d2f63..c85e97fef 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -36,7 +36,6 @@ import ( "fmt" "io" "path/filepath" - "sync" "sync/atomic" "time" @@ -67,6 +66,7 @@ import ( uspb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" ) diff --git a/pkg/sentry/kernel/memevent/BUILD b/pkg/sentry/kernel/memevent/BUILD index d7a7d1169..7f36252a9 100644 --- a/pkg/sentry/kernel/memevent/BUILD +++ b/pkg/sentry/kernel/memevent/BUILD @@ -16,6 +16,7 @@ go_library( "//pkg/metric", "//pkg/sentry/kernel", "//pkg/sentry/usage", + "//pkg/sync", ], ) diff --git a/pkg/sentry/kernel/memevent/memory_events.go b/pkg/sentry/kernel/memevent/memory_events.go index b0d98e7f0..200565bb8 100644 --- a/pkg/sentry/kernel/memevent/memory_events.go +++ b/pkg/sentry/kernel/memevent/memory_events.go @@ -17,7 +17,6 @@ package memevent import ( - "sync" "time" "gvisor.dev/gvisor/pkg/eventchannel" @@ -26,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" pb "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto" "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sync" ) var totalTicks = metric.MustCreateNewUint64Metric("/memory_events/ticks", false /*sync*/, "Total number of memory event periods that have elapsed since startup.") diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 9d34f6d4d..5eeaeff66 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -43,6 +43,7 @@ go_library( "//pkg/sentry/safemem", "//pkg/sentry/usermem", "//pkg/sentry/vfs", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/pipe/buffer.go b/pkg/sentry/kernel/pipe/buffer.go index 95bee2d37..1c0f34269 100644 --- a/pkg/sentry/kernel/pipe/buffer.go +++ b/pkg/sentry/kernel/pipe/buffer.go @@ -16,9 +16,9 @@ package pipe import ( "io" - "sync" "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/sync" ) // buffer encapsulates a queueable byte buffer. diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go index 4a19ab7ce..716f589af 100644 --- a/pkg/sentry/kernel/pipe/node.go +++ b/pkg/sentry/kernel/pipe/node.go @@ -15,12 +15,11 @@ package pipe import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 1a1b38f83..e4fd7d420 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -17,12 +17,12 @@ package pipe import ( "fmt" - "sync" "sync/atomic" "syscall" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go index ef9641e6a..8394eb78b 100644 --- a/pkg/sentry/kernel/pipe/pipe_util.go +++ b/pkg/sentry/kernel/pipe/pipe_util.go @@ -17,7 +17,6 @@ package pipe import ( "io" "math" - "sync" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" @@ -25,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 6416e0dd8..bf7461cbb 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -15,13 +15,12 @@ package pipe import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index f4c00cd86..13a961594 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -31,6 +31,7 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index de9617e9d..18299814e 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -17,7 +17,6 @@ package semaphore import ( "fmt" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" @@ -25,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index cd48945e6..7321b22ed 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -24,6 +24,7 @@ go_library( "//pkg/sentry/platform", "//pkg/sentry/usage", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index 19034a21e..8ddef7eb8 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -35,7 +35,6 @@ package shm import ( "fmt" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" @@ -49,6 +48,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/kernel/signal_handlers.go b/pkg/sentry/kernel/signal_handlers.go index a16f3d57f..768fda220 100644 --- a/pkg/sentry/kernel/signal_handlers.go +++ b/pkg/sentry/kernel/signal_handlers.go @@ -15,10 +15,9 @@ package kernel import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sync" ) // SignalHandlers holds information about signal actions. diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD index 9f7e19b4d..89e4d84b1 100644 --- a/pkg/sentry/kernel/signalfd/BUILD +++ b/pkg/sentry/kernel/signalfd/BUILD @@ -16,6 +16,7 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go index 4b08d7d72..28be4a939 100644 --- a/pkg/sentry/kernel/signalfd/signalfd.go +++ b/pkg/sentry/kernel/signalfd/signalfd.go @@ -16,8 +16,6 @@ package signalfd import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/sentry/context" @@ -26,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go index 2fdee0282..d2d01add4 100644 --- a/pkg/sentry/kernel/syscalls.go +++ b/pkg/sentry/kernel/syscalls.go @@ -16,13 +16,13 @@ package kernel import ( "fmt" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/bits" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) // maxSyscallNum is the highest supported syscall number. diff --git a/pkg/sentry/kernel/syslog.go b/pkg/sentry/kernel/syslog.go index 8227ecf1d..4607cde2f 100644 --- a/pkg/sentry/kernel/syslog.go +++ b/pkg/sentry/kernel/syslog.go @@ -17,7 +17,8 @@ package kernel import ( "fmt" "math/rand" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) // syslog represents a sentry-global kernel log. diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index d25a7903b..978d66da8 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -17,7 +17,6 @@ package kernel import ( gocontext "context" "runtime/trace" - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -37,7 +36,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syncutil" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) @@ -85,7 +84,7 @@ type Task struct { // // gosched is protected by goschedSeq. gosched is owned by the task // goroutine. - goschedSeq syncutil.SeqCount `state:"nosave"` + goschedSeq sync.SeqCount `state:"nosave"` gosched TaskGoroutineSchedInfo // yieldCount is the number of times the task goroutine has called diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index c0197a563..768e958d2 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -15,7 +15,6 @@ package kernel import ( - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -25,6 +24,7 @@ import ( ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index 8267929a6..bf2dabb6e 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -16,9 +16,9 @@ package kernel import ( "fmt" - "sync" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD index 31847e1df..4e4de0512 100644 --- a/pkg/sentry/kernel/time/BUILD +++ b/pkg/sentry/kernel/time/BUILD @@ -13,6 +13,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/sentry/context", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go index 107394183..706de83ef 100644 --- a/pkg/sentry/kernel/time/time.go +++ b/pkg/sentry/kernel/time/time.go @@ -19,10 +19,10 @@ package time import ( "fmt" "math" - "sync" "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/timekeeper.go b/pkg/sentry/kernel/timekeeper.go index 76417342a..dc99301de 100644 --- a/pkg/sentry/kernel/timekeeper.go +++ b/pkg/sentry/kernel/timekeeper.go @@ -16,7 +16,6 @@ package kernel import ( "fmt" - "sync" "time" "gvisor.dev/gvisor/pkg/log" @@ -24,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" sentrytime "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sync" ) // Timekeeper manages all of the kernel clocks. diff --git a/pkg/sentry/kernel/tty.go b/pkg/sentry/kernel/tty.go index 048de26dc..464d2306a 100644 --- a/pkg/sentry/kernel/tty.go +++ b/pkg/sentry/kernel/tty.go @@ -14,7 +14,7 @@ package kernel -import "sync" +import "gvisor.dev/gvisor/pkg/sync" // TTY defines the relationship between a thread group and its controlling // terminal. diff --git a/pkg/sentry/kernel/uts_namespace.go b/pkg/sentry/kernel/uts_namespace.go index 0a563e715..8ccf04bd1 100644 --- a/pkg/sentry/kernel/uts_namespace.go +++ b/pkg/sentry/kernel/uts_namespace.go @@ -15,9 +15,8 @@ package kernel import ( - "sync" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sync" ) // UTSNamespace represents a UTS namespace, a holder of two system identifiers: diff --git a/pkg/sentry/limits/BUILD b/pkg/sentry/limits/BUILD index 156e67bf8..9fa841e8b 100644 --- a/pkg/sentry/limits/BUILD +++ b/pkg/sentry/limits/BUILD @@ -15,6 +15,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/sentry/context", + "//pkg/sync", ], ) diff --git a/pkg/sentry/limits/limits.go b/pkg/sentry/limits/limits.go index b6c22656b..31b9e9ff6 100644 --- a/pkg/sentry/limits/limits.go +++ b/pkg/sentry/limits/limits.go @@ -16,8 +16,9 @@ package limits import ( - "sync" "syscall" + + "gvisor.dev/gvisor/pkg/sync" ) // LimitType defines a type of resource limit. diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index 839931f67..83e248431 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -118,7 +118,7 @@ go_library( "//pkg/sentry/safemem", "//pkg/sentry/usage", "//pkg/sentry/usermem", - "//pkg/syncutil", + "//pkg/sync", "//pkg/syserror", "//pkg/tcpip/buffer", ], diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 1b746d030..4b48866ad 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -15,8 +15,6 @@ package mm import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/context" @@ -25,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index 58a5c186d..fa86ebced 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -35,8 +35,6 @@ package mm import ( - "sync" - "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -44,7 +42,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syncutil" + "gvisor.dev/gvisor/pkg/sync" ) // MemoryManager implements a virtual address space. @@ -82,7 +80,7 @@ type MemoryManager struct { users int32 // mappingMu is analogous to Linux's struct mm_struct::mmap_sem. - mappingMu syncutil.DowngradableRWMutex `state:"nosave"` + mappingMu sync.DowngradableRWMutex `state:"nosave"` // vmas stores virtual memory areas. Since vmas are stored by value, // clients should usually use vmaIterator.ValuePtr() instead of @@ -125,7 +123,7 @@ type MemoryManager struct { // activeMu is loosely analogous to Linux's struct // mm_struct::page_table_lock. - activeMu syncutil.DowngradableRWMutex `state:"nosave"` + activeMu sync.DowngradableRWMutex `state:"nosave"` // pmas stores platform mapping areas used to implement vmas. Since pmas // are stored by value, clients should usually use pmaIterator.ValuePtr() diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index f404107af..a9a2642c5 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -73,6 +73,7 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/usermem", "//pkg/state", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index f7f7298c4..c99e023d9 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -25,7 +25,6 @@ import ( "fmt" "math" "os" - "sync" "sync/atomic" "syscall" "time" @@ -37,6 +36,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/platform/interrupt/BUILD b/pkg/sentry/platform/interrupt/BUILD index b6d008dbe..85e882df9 100644 --- a/pkg/sentry/platform/interrupt/BUILD +++ b/pkg/sentry/platform/interrupt/BUILD @@ -10,6 +10,7 @@ go_library( ], importpath = "gvisor.dev/gvisor/pkg/sentry/platform/interrupt", visibility = ["//pkg/sentry:internal"], + deps = ["//pkg/sync"], ) go_test( diff --git a/pkg/sentry/platform/interrupt/interrupt.go b/pkg/sentry/platform/interrupt/interrupt.go index a4651f500..57be41647 100644 --- a/pkg/sentry/platform/interrupt/interrupt.go +++ b/pkg/sentry/platform/interrupt/interrupt.go @@ -17,7 +17,8 @@ package interrupt import ( "fmt" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) // Receiver receives interrupt notifications from a Forwarder. diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index f3afd98da..6a358d1d4 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -55,6 +55,7 @@ go_library( "//pkg/sentry/platform/safecopy", "//pkg/sentry/time", "//pkg/sentry/usermem", + "//pkg/sync", ], ) diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index ea8b9632e..a25f3c449 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -15,13 +15,13 @@ package kvm import ( - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) // dirtySet tracks vCPUs for invalidation. diff --git a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go index e5fac0d6a..2f02c03cf 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go @@ -17,8 +17,6 @@ package kvm import ( - "unsafe" - "gvisor.dev/gvisor/pkg/sentry/arch" ) diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index f2c2c059e..a7850faed 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -18,13 +18,13 @@ package kvm import ( "fmt" "os" - "sync" "syscall" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/ring0" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) // KVM represents a lightweight VM context. diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 7d02ebf19..e6d912168 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -17,7 +17,6 @@ package kvm import ( "fmt" "runtime" - "sync" "sync/atomic" "syscall" @@ -27,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform/ring0" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) // machine contains state associated with the VM as a whole. diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD index 0df8cfa0f..cd13390c3 100644 --- a/pkg/sentry/platform/ptrace/BUILD +++ b/pkg/sentry/platform/ptrace/BUILD @@ -33,6 +33,7 @@ go_library( "//pkg/sentry/platform/interrupt", "//pkg/sentry/platform/safecopy", "//pkg/sentry/usermem", + "//pkg/sync", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go index 7b120a15d..bb0e03880 100644 --- a/pkg/sentry/platform/ptrace/ptrace.go +++ b/pkg/sentry/platform/ptrace/ptrace.go @@ -46,13 +46,13 @@ package ptrace import ( "os" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/interrupt" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) var ( diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go index 20244fd95..15dc46a5b 100644 --- a/pkg/sentry/platform/ptrace/subprocess.go +++ b/pkg/sentry/platform/ptrace/subprocess.go @@ -18,7 +18,6 @@ import ( "fmt" "os" "runtime" - "sync" "syscall" "golang.org/x/sys/unix" @@ -27,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" ) // Linux kernel errnos which "should never be seen by user programs", but will diff --git a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go index 2e6fbe488..245b20722 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux_unsafe.go @@ -18,7 +18,6 @@ package ptrace import ( - "sync" "sync/atomic" "syscall" "unsafe" @@ -26,6 +25,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/hostcpu" + "gvisor.dev/gvisor/pkg/sync" ) // maskPool contains reusable CPU masks for setting affinity. Unfortunately, diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go index 3f094c2a7..86fd5ed58 100644 --- a/pkg/sentry/platform/ring0/defs.go +++ b/pkg/sentry/platform/ring0/defs.go @@ -17,7 +17,7 @@ package ring0 import ( "syscall" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" ) // Kernel is a global kernel object. diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go index 10dbd381f..9dae0dccb 100644 --- a/pkg/sentry/platform/ring0/defs_amd64.go +++ b/pkg/sentry/platform/ring0/defs_amd64.go @@ -18,6 +18,7 @@ package ring0 import ( "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) var ( diff --git a/pkg/sentry/platform/ring0/defs_arm64.go b/pkg/sentry/platform/ring0/defs_arm64.go index dc0eeec01..a850ce6cf 100644 --- a/pkg/sentry/platform/ring0/defs_arm64.go +++ b/pkg/sentry/platform/ring0/defs_arm64.go @@ -18,6 +18,7 @@ package ring0 import ( "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" + "gvisor.dev/gvisor/pkg/sentry/usermem" ) var ( diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index e2e15ba5c..387a7f6c3 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -96,7 +96,10 @@ go_library( "//pkg/sentry/platform/kvm:__subpackages__", "//pkg/sentry/platform/ring0:__subpackages__", ], - deps = ["//pkg/sentry/usermem"], + deps = [ + "//pkg/sentry/usermem", + "//pkg/sync", + ], ) go_test( diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go index 0f029f25d..e199bae18 100644 --- a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go @@ -17,7 +17,7 @@ package pagetables import ( - "sync" + "gvisor.dev/gvisor/pkg/sync" ) // limitPCID is the number of valid PCIDs. diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 136821963..103933144 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -27,6 +27,7 @@ go_library( "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", diff --git a/pkg/sentry/socket/netlink/port/BUILD b/pkg/sentry/socket/netlink/port/BUILD index 463544c1a..2d9f4ba9b 100644 --- a/pkg/sentry/socket/netlink/port/BUILD +++ b/pkg/sentry/socket/netlink/port/BUILD @@ -8,6 +8,7 @@ go_library( srcs = ["port.go"], importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port", visibility = ["//pkg/sentry:internal"], + deps = ["//pkg/sync"], ) go_test( diff --git a/pkg/sentry/socket/netlink/port/port.go b/pkg/sentry/socket/netlink/port/port.go index e9d3275b1..2cd3afc22 100644 --- a/pkg/sentry/socket/netlink/port/port.go +++ b/pkg/sentry/socket/netlink/port/port.go @@ -24,7 +24,8 @@ import ( "fmt" "math" "math/rand" - "sync" + + "gvisor.dev/gvisor/pkg/sync" ) // maxPorts is a sanity limit on the maximum number of ports to allocate per diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index d2e3644a6..cea56f4ed 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -17,7 +17,6 @@ package netlink import ( "math" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" @@ -34,6 +33,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index e414d8055..f78784569 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -34,6 +34,7 @@ go_library( "//pkg/sentry/socket/netfilter", "//pkg/sentry/unimpl", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 764f11a6b..0affb8071 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -29,7 +29,6 @@ import ( "io" "math" "reflect" - "sync" "syscall" "time" @@ -49,6 +48,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" diff --git a/pkg/sentry/socket/rpcinet/conn/BUILD b/pkg/sentry/socket/rpcinet/conn/BUILD index 23eadcb1b..b2677c659 100644 --- a/pkg/sentry/socket/rpcinet/conn/BUILD +++ b/pkg/sentry/socket/rpcinet/conn/BUILD @@ -10,6 +10,7 @@ go_library( deps = [ "//pkg/binary", "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", + "//pkg/sync", "//pkg/syserr", "//pkg/unet", "@com_github_golang_protobuf//proto:go_default_library", diff --git a/pkg/sentry/socket/rpcinet/conn/conn.go b/pkg/sentry/socket/rpcinet/conn/conn.go index 356adad99..02f39c767 100644 --- a/pkg/sentry/socket/rpcinet/conn/conn.go +++ b/pkg/sentry/socket/rpcinet/conn/conn.go @@ -17,12 +17,12 @@ package conn import ( "fmt" - "sync" "sync/atomic" "syscall" "github.com/golang/protobuf/proto" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/unet" diff --git a/pkg/sentry/socket/rpcinet/notifier/BUILD b/pkg/sentry/socket/rpcinet/notifier/BUILD index a3585e10d..a5954f22b 100644 --- a/pkg/sentry/socket/rpcinet/notifier/BUILD +++ b/pkg/sentry/socket/rpcinet/notifier/BUILD @@ -10,6 +10,7 @@ go_library( deps = [ "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", "//pkg/sentry/socket/rpcinet/conn", + "//pkg/sync", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier.go b/pkg/sentry/socket/rpcinet/notifier/notifier.go index 7efe4301f..82b75d6dd 100644 --- a/pkg/sentry/socket/rpcinet/notifier/notifier.go +++ b/pkg/sentry/socket/rpcinet/notifier/notifier.go @@ -17,12 +17,12 @@ package notifier import ( "fmt" - "sync" "syscall" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD index 788ad70d2..d7ba95dff 100644 --- a/pkg/sentry/socket/unix/transport/BUILD +++ b/pkg/sentry/socket/unix/transport/BUILD @@ -32,6 +32,7 @@ go_library( "//pkg/ilist", "//pkg/refs", "//pkg/sentry/context", + "//pkg/sync", "//pkg/syserr", "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go index dea11e253..9e6fbc111 100644 --- a/pkg/sentry/socket/unix/transport/connectioned.go +++ b/pkg/sentry/socket/unix/transport/connectioned.go @@ -15,10 +15,9 @@ package transport import ( - "sync" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/waiter" diff --git a/pkg/sentry/socket/unix/transport/queue.go b/pkg/sentry/socket/unix/transport/queue.go index e27b1c714..5dcd3d95e 100644 --- a/pkg/sentry/socket/unix/transport/queue.go +++ b/pkg/sentry/socket/unix/transport/queue.go @@ -15,9 +15,8 @@ package transport import ( - "sync" - "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go index 37c7ac3c1..fcc0da332 100644 --- a/pkg/sentry/socket/unix/transport/unix.go +++ b/pkg/sentry/socket/unix/transport/unix.go @@ -16,11 +16,11 @@ package transport import ( - "sync" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index a76975cee..aa05e208a 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -91,6 +91,7 @@ go_library( "//pkg/sentry/syscalls", "//pkg/sentry/usage", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/waiter", diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go index 1d9018c96..60469549d 100644 --- a/pkg/sentry/syscalls/linux/error.go +++ b/pkg/sentry/syscalls/linux/error.go @@ -16,13 +16,13 @@ package linux import ( "io" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD index 18e212dff..3cde3a0be 100644 --- a/pkg/sentry/time/BUILD +++ b/pkg/sentry/time/BUILD @@ -9,7 +9,7 @@ go_template_instance( out = "seqatomic_parameters_unsafe.go", package = "time", suffix = "Parameters", - template = "//pkg/syncutil:generic_seqatomic", + template = "//pkg/sync:generic_seqatomic", types = { "Value": "Parameters", }, @@ -36,7 +36,7 @@ go_library( deps = [ "//pkg/log", "//pkg/metric", - "//pkg/syncutil", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/time/calibrated_clock.go b/pkg/sentry/time/calibrated_clock.go index 318503277..f9a93115d 100644 --- a/pkg/sentry/time/calibrated_clock.go +++ b/pkg/sentry/time/calibrated_clock.go @@ -17,11 +17,11 @@ package time import ( - "sync" "time" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/usage/BUILD b/pkg/sentry/usage/BUILD index c32fe3241..5518ac3d0 100644 --- a/pkg/sentry/usage/BUILD +++ b/pkg/sentry/usage/BUILD @@ -18,5 +18,6 @@ go_library( deps = [ "//pkg/bits", "//pkg/memutil", + "//pkg/sync", ], ) diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go index d6ef644d8..538c645eb 100644 --- a/pkg/sentry/usage/memory.go +++ b/pkg/sentry/usage/memory.go @@ -17,12 +17,12 @@ package usage import ( "fmt" "os" - "sync" "sync/atomic" "syscall" "gvisor.dev/gvisor/pkg/bits" "gvisor.dev/gvisor/pkg/memutil" + "gvisor.dev/gvisor/pkg/sync" ) // MemoryKind represents a type of memory used by the application. diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 4c6aa04a1..35c7be259 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -34,7 +34,7 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/usermem", - "//pkg/syncutil", + "//pkg/sync", "//pkg/syserror", "//pkg/waiter", ], @@ -54,6 +54,7 @@ go_test( "//pkg/sentry/context/contexttest", "//pkg/sentry/kernel/auth", "//pkg/sentry/usermem", + "//pkg/sync", "//pkg/syserror", ], ) diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go index 1bc9c4a38..486a76475 100644 --- a/pkg/sentry/vfs/dentry.go +++ b/pkg/sentry/vfs/dentry.go @@ -16,9 +16,9 @@ package vfs import ( "fmt" - "sync" "sync/atomic" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index 66eb57bc2..c00b3c84b 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -17,13 +17,13 @@ package vfs import ( "bytes" "io" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/vfs/mount_test.go b/pkg/sentry/vfs/mount_test.go index adff0b94b..3b933468d 100644 --- a/pkg/sentry/vfs/mount_test.go +++ b/pkg/sentry/vfs/mount_test.go @@ -17,8 +17,9 @@ package vfs import ( "fmt" "runtime" - "sync" "testing" + + "gvisor.dev/gvisor/pkg/sync" ) func TestMountTableLookupEmpty(t *testing.T) { diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go index ab13fa461..bd90d36c4 100644 --- a/pkg/sentry/vfs/mount_unsafe.go +++ b/pkg/sentry/vfs/mount_unsafe.go @@ -26,7 +26,7 @@ import ( "sync/atomic" "unsafe" - "gvisor.dev/gvisor/pkg/syncutil" + "gvisor.dev/gvisor/pkg/sync" ) // mountKey represents the location at which a Mount is mounted. It is @@ -75,7 +75,7 @@ type mountTable struct { // intrinsics and inline assembly, limiting the performance of this // approach.) - seq syncutil.SeqCount + seq sync.SeqCount seed uint32 // for hashing keys // size holds both length (number of elements) and capacity (number of diff --git a/pkg/sentry/vfs/pathname.go b/pkg/sentry/vfs/pathname.go index 8e155654f..cf80df90e 100644 --- a/pkg/sentry/vfs/pathname.go +++ b/pkg/sentry/vfs/pathname.go @@ -15,10 +15,9 @@ package vfs import ( - "sync" - "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go index f0641d314..8a0b382f6 100644 --- a/pkg/sentry/vfs/resolving_path.go +++ b/pkg/sentry/vfs/resolving_path.go @@ -16,11 +16,11 @@ package vfs import ( "fmt" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index ea2db7031..1f21b0b31 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -29,12 +29,12 @@ package vfs import ( "fmt" - "sync" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/watchdog/BUILD b/pkg/sentry/watchdog/BUILD index 4d8435265..28f21f13d 100644 --- a/pkg/sentry/watchdog/BUILD +++ b/pkg/sentry/watchdog/BUILD @@ -13,5 +13,6 @@ go_library( "//pkg/metric", "//pkg/sentry/kernel", "//pkg/sentry/kernel/time", + "//pkg/sync", ], ) diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go index 5e4611333..bfb2fac26 100644 --- a/pkg/sentry/watchdog/watchdog.go +++ b/pkg/sentry/watchdog/watchdog.go @@ -32,7 +32,6 @@ package watchdog import ( "bytes" "fmt" - "sync" "time" "gvisor.dev/gvisor/pkg/abi/linux" @@ -40,6 +39,7 @@ import ( "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sync" ) // Opts configures the watchdog. diff --git a/pkg/sync/BUILD b/pkg/sync/BUILD new file mode 100644 index 000000000..e8cd16b8f --- /dev/null +++ b/pkg/sync/BUILD @@ -0,0 +1,53 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template") + +package( + default_visibility = ["//:sandbox"], + licenses = ["notice"], +) + +exports_files(["LICENSE"]) + +go_template( + name = "generic_atomicptr", + srcs = ["atomicptr_unsafe.go"], + types = [ + "Value", + ], +) + +go_template( + name = "generic_seqatomic", + srcs = ["seqatomic_unsafe.go"], + types = [ + "Value", + ], + deps = [ + ":sync", + ], +) + +go_library( + name = "sync", + srcs = [ + "aliases.go", + "downgradable_rwmutex_unsafe.go", + "memmove_unsafe.go", + "norace_unsafe.go", + "race_unsafe.go", + "seqcount.go", + "syncutil.go", + ], + importpath = "gvisor.dev/gvisor/pkg/sync", +) + +go_test( + name = "sync_test", + size = "small", + srcs = [ + "downgradable_rwmutex_test.go", + "seqcount_test.go", + ], + embed = [":sync"], +) diff --git a/pkg/sync/LICENSE b/pkg/sync/LICENSE new file mode 100644 index 000000000..6a66aea5e --- /dev/null +++ b/pkg/sync/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkg/sync/README.md b/pkg/sync/README.md new file mode 100644 index 000000000..2183c4e20 --- /dev/null +++ b/pkg/sync/README.md @@ -0,0 +1,5 @@ +# Syncutil + +This package provides additional synchronization primitives not provided by the +Go stdlib 'sync' package. It is partially derived from the upstream 'sync' +package from go1.10. diff --git a/pkg/sync/aliases.go b/pkg/sync/aliases.go new file mode 100644 index 000000000..20c7ca041 --- /dev/null +++ b/pkg/sync/aliases.go @@ -0,0 +1,37 @@ +// Copyright 2020 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sync + +import ( + "sync" +) + +// Aliases of standard library types. +type ( + // Mutex is an alias of sync.Mutex. + Mutex = sync.Mutex + + // RWMutex is an alias of sync.RWMutex. + RWMutex = sync.RWMutex + + // Cond is an alias of sync.Cond. + Cond = sync.Cond + + // Locker is an alias of sync.Locker. + Locker = sync.Locker + + // Once is an alias of sync.Once. + Once = sync.Once + + // Pool is an alias of sync.Pool. + Pool = sync.Pool + + // WaitGroup is an alias of sync.WaitGroup. + WaitGroup = sync.WaitGroup + + // Map is an alias of sync.Map. + Map = sync.Map +) diff --git a/pkg/sync/atomicptr_unsafe.go b/pkg/sync/atomicptr_unsafe.go new file mode 100644 index 000000000..525c4beed --- /dev/null +++ b/pkg/sync/atomicptr_unsafe.go @@ -0,0 +1,47 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package template doesn't exist. This file must be instantiated using the +// go_template_instance rule in tools/go_generics/defs.bzl. +package template + +import ( + "sync/atomic" + "unsafe" +) + +// Value is a required type parameter. +type Value struct{} + +// An AtomicPtr is a pointer to a value of type Value that can be atomically +// loaded and stored. The zero value of an AtomicPtr represents nil. +// +// Note that copying AtomicPtr by value performs a non-atomic read of the +// stored pointer, which is unsafe if Store() can be called concurrently; in +// this case, do `dst.Store(src.Load())` instead. +// +// +stateify savable +type AtomicPtr struct { + ptr unsafe.Pointer `state:".(*Value)"` +} + +func (p *AtomicPtr) savePtr() *Value { + return p.Load() +} + +func (p *AtomicPtr) loadPtr(v *Value) { + p.Store(v) +} + +// Load returns the value set by the most recent Store. It returns nil if there +// has been no previous call to Store. +func (p *AtomicPtr) Load() *Value { + return (*Value)(atomic.LoadPointer(&p.ptr)) +} + +// Store sets the value returned by Load to x. +func (p *AtomicPtr) Store(x *Value) { + atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) +} diff --git a/pkg/sync/atomicptrtest/BUILD b/pkg/sync/atomicptrtest/BUILD new file mode 100644 index 000000000..418eda29c --- /dev/null +++ b/pkg/sync/atomicptrtest/BUILD @@ -0,0 +1,29 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "atomicptr_int", + out = "atomicptr_int_unsafe.go", + package = "atomicptr", + suffix = "Int", + template = "//pkg/sync:generic_atomicptr", + types = { + "Value": "int", + }, +) + +go_library( + name = "atomicptr", + srcs = ["atomicptr_int_unsafe.go"], + importpath = "gvisor.dev/gvisor/pkg/sync/atomicptr", +) + +go_test( + name = "atomicptr_test", + size = "small", + srcs = ["atomicptr_test.go"], + embed = [":atomicptr"], +) diff --git a/pkg/sync/atomicptrtest/atomicptr_test.go b/pkg/sync/atomicptrtest/atomicptr_test.go new file mode 100644 index 000000000..8fdc5112e --- /dev/null +++ b/pkg/sync/atomicptrtest/atomicptr_test.go @@ -0,0 +1,31 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package atomicptr + +import ( + "testing" +) + +func newInt(val int) *int { + return &val +} + +func TestAtomicPtr(t *testing.T) { + var p AtomicPtrInt + if got := p.Load(); got != nil { + t.Errorf("initial value is %p (%v), wanted nil", got, got) + } + want := newInt(42) + p.Store(want) + if got := p.Load(); got != want { + t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) + } + want = newInt(100) + p.Store(want) + if got := p.Load(); got != want { + t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) + } +} diff --git a/pkg/sync/downgradable_rwmutex_test.go b/pkg/sync/downgradable_rwmutex_test.go new file mode 100644 index 000000000..f04496bc5 --- /dev/null +++ b/pkg/sync/downgradable_rwmutex_test.go @@ -0,0 +1,150 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// GOMAXPROCS=10 go test + +// Copy/pasted from the standard library's sync/rwmutex_test.go, except for the +// addition of downgradingWriter and the renaming of num_iterations to +// numIterations to shut up Golint. + +package sync + +import ( + "fmt" + "runtime" + "sync/atomic" + "testing" +) + +func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { + m.RLock() + clocked <- true + <-cunlock + m.RUnlock() + cdone <- true +} + +func doTestParallelReaders(numReaders, gomaxprocs int) { + runtime.GOMAXPROCS(gomaxprocs) + var m DowngradableRWMutex + clocked := make(chan bool) + cunlock := make(chan bool) + cdone := make(chan bool) + for i := 0; i < numReaders; i++ { + go parallelReader(&m, clocked, cunlock, cdone) + } + // Wait for all parallel RLock()s to succeed. + for i := 0; i < numReaders; i++ { + <-clocked + } + for i := 0; i < numReaders; i++ { + cunlock <- true + } + // Wait for the goroutines to finish. + for i := 0; i < numReaders; i++ { + <-cdone + } +} + +func TestParallelReaders(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) + doTestParallelReaders(1, 4) + doTestParallelReaders(3, 4) + doTestParallelReaders(4, 2) +} + +func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { + for i := 0; i < numIterations; i++ { + rwm.RLock() + n := atomic.AddInt32(activity, 1) + if n < 1 || n >= 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + atomic.AddInt32(activity, -1) + rwm.RUnlock() + } + cdone <- true +} + +func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { + for i := 0; i < numIterations; i++ { + rwm.Lock() + n := atomic.AddInt32(activity, 10000) + if n != 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + atomic.AddInt32(activity, -10000) + rwm.Unlock() + } + cdone <- true +} + +func downgradingWriter(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { + for i := 0; i < numIterations; i++ { + rwm.Lock() + n := atomic.AddInt32(activity, 10000) + if n != 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + atomic.AddInt32(activity, -10000) + rwm.DowngradeLock() + n = atomic.AddInt32(activity, 1) + if n < 1 || n >= 10000 { + panic(fmt.Sprintf("wlock(%d)\n", n)) + } + for i := 0; i < 100; i++ { + } + n = atomic.AddInt32(activity, -1) + rwm.RUnlock() + } + cdone <- true +} + +func HammerDowngradableRWMutex(gomaxprocs, numReaders, numIterations int) { + runtime.GOMAXPROCS(gomaxprocs) + // Number of active readers + 10000 * number of active writers. + var activity int32 + var rwm DowngradableRWMutex + cdone := make(chan bool) + go writer(&rwm, numIterations, &activity, cdone) + go downgradingWriter(&rwm, numIterations, &activity, cdone) + var i int + for i = 0; i < numReaders/2; i++ { + go reader(&rwm, numIterations, &activity, cdone) + } + go writer(&rwm, numIterations, &activity, cdone) + go downgradingWriter(&rwm, numIterations, &activity, cdone) + for ; i < numReaders; i++ { + go reader(&rwm, numIterations, &activity, cdone) + } + // Wait for the 4 writers and all readers to finish. + for i := 0; i < 4+numReaders; i++ { + <-cdone + } +} + +func TestDowngradableRWMutex(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) + n := 1000 + if testing.Short() { + n = 5 + } + HammerDowngradableRWMutex(1, 1, n) + HammerDowngradableRWMutex(1, 3, n) + HammerDowngradableRWMutex(1, 10, n) + HammerDowngradableRWMutex(4, 1, n) + HammerDowngradableRWMutex(4, 3, n) + HammerDowngradableRWMutex(4, 10, n) + HammerDowngradableRWMutex(10, 1, n) + HammerDowngradableRWMutex(10, 3, n) + HammerDowngradableRWMutex(10, 10, n) + HammerDowngradableRWMutex(10, 5, n) +} diff --git a/pkg/sync/downgradable_rwmutex_unsafe.go b/pkg/sync/downgradable_rwmutex_unsafe.go new file mode 100644 index 000000000..9bb55cd3a --- /dev/null +++ b/pkg/sync/downgradable_rwmutex_unsafe.go @@ -0,0 +1,146 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright 2019 The gVisor Authors. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.13 +// +build !go1.15 + +// Check go:linkname function signatures when updating Go version. + +// This is mostly copied from the standard library's sync/rwmutex.go. +// +// Happens-before relationships indicated to the race detector: +// - Unlock -> Lock (via writerSem) +// - Unlock -> RLock (via readerSem) +// - RUnlock -> Lock (via writerSem) +// - DowngradeLock -> RLock (via readerSem) + +package sync + +import ( + "sync" + "sync/atomic" + "unsafe" +) + +//go:linkname runtimeSemacquire sync.runtime_Semacquire +func runtimeSemacquire(s *uint32) + +//go:linkname runtimeSemrelease sync.runtime_Semrelease +func runtimeSemrelease(s *uint32, handoff bool, skipframes int) + +// DowngradableRWMutex is identical to sync.RWMutex, but adds the DowngradeLock +// method. +type DowngradableRWMutex struct { + w sync.Mutex // held if there are pending writers + writerSem uint32 // semaphore for writers to wait for completing readers + readerSem uint32 // semaphore for readers to wait for completing writers + readerCount int32 // number of pending readers + readerWait int32 // number of departing readers +} + +const rwmutexMaxReaders = 1 << 30 + +// RLock locks rw for reading. +func (rw *DowngradableRWMutex) RLock() { + if RaceEnabled { + RaceDisable() + } + if atomic.AddInt32(&rw.readerCount, 1) < 0 { + // A writer is pending, wait for it. + runtimeSemacquire(&rw.readerSem) + } + if RaceEnabled { + RaceEnable() + RaceAcquire(unsafe.Pointer(&rw.readerSem)) + } +} + +// RUnlock undoes a single RLock call. +func (rw *DowngradableRWMutex) RUnlock() { + if RaceEnabled { + RaceReleaseMerge(unsafe.Pointer(&rw.writerSem)) + RaceDisable() + } + if r := atomic.AddInt32(&rw.readerCount, -1); r < 0 { + if r+1 == 0 || r+1 == -rwmutexMaxReaders { + panic("RUnlock of unlocked DowngradableRWMutex") + } + // A writer is pending. + if atomic.AddInt32(&rw.readerWait, -1) == 0 { + // The last reader unblocks the writer. + runtimeSemrelease(&rw.writerSem, false, 0) + } + } + if RaceEnabled { + RaceEnable() + } +} + +// Lock locks rw for writing. +func (rw *DowngradableRWMutex) Lock() { + if RaceEnabled { + RaceDisable() + } + // First, resolve competition with other writers. + rw.w.Lock() + // Announce to readers there is a pending writer. + r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders + // Wait for active readers. + if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 { + runtimeSemacquire(&rw.writerSem) + } + if RaceEnabled { + RaceEnable() + RaceAcquire(unsafe.Pointer(&rw.writerSem)) + } +} + +// Unlock unlocks rw for writing. +func (rw *DowngradableRWMutex) Unlock() { + if RaceEnabled { + RaceRelease(unsafe.Pointer(&rw.writerSem)) + RaceRelease(unsafe.Pointer(&rw.readerSem)) + RaceDisable() + } + // Announce to readers there is no active writer. + r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders) + if r >= rwmutexMaxReaders { + panic("Unlock of unlocked DowngradableRWMutex") + } + // Unblock blocked readers, if any. + for i := 0; i < int(r); i++ { + runtimeSemrelease(&rw.readerSem, false, 0) + } + // Allow other writers to proceed. + rw.w.Unlock() + if RaceEnabled { + RaceEnable() + } +} + +// DowngradeLock atomically unlocks rw for writing and locks it for reading. +func (rw *DowngradableRWMutex) DowngradeLock() { + if RaceEnabled { + RaceRelease(unsafe.Pointer(&rw.readerSem)) + RaceDisable() + } + // Announce to readers there is no active writer and one additional reader. + r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders+1) + if r >= rwmutexMaxReaders+1 { + panic("DowngradeLock of unlocked DowngradableRWMutex") + } + // Unblock blocked readers, if any. Note that this loop starts as 1 since r + // includes this goroutine. + for i := 1; i < int(r); i++ { + runtimeSemrelease(&rw.readerSem, false, 0) + } + // Allow other writers to proceed to rw.w.Lock(). Note that they will still + // block on rw.writerSem since at least this reader exists, such that + // DowngradeLock() is atomic with the previous write lock. + rw.w.Unlock() + if RaceEnabled { + RaceEnable() + } +} diff --git a/pkg/sync/memmove_unsafe.go b/pkg/sync/memmove_unsafe.go new file mode 100644 index 000000000..ad4a3a37e --- /dev/null +++ b/pkg/sync/memmove_unsafe.go @@ -0,0 +1,28 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.12 +// +build !go1.15 + +// Check go:linkname function signatures when updating Go version. + +package sync + +import ( + "unsafe" +) + +//go:linkname memmove runtime.memmove +//go:noescape +func memmove(to, from unsafe.Pointer, n uintptr) + +// Memmove is exported for SeqAtomicLoad/SeqAtomicTryLoad, which can't +// define it because go_generics can't update the go:linkname annotation. +// Furthermore, go:linkname silently doesn't work if the local name is exported +// (this is of course undocumented), which is why this indirection is +// necessary. +func Memmove(to, from unsafe.Pointer, n uintptr) { + memmove(to, from, n) +} diff --git a/pkg/sync/norace_unsafe.go b/pkg/sync/norace_unsafe.go new file mode 100644 index 000000000..006055dd6 --- /dev/null +++ b/pkg/sync/norace_unsafe.go @@ -0,0 +1,35 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !race + +package sync + +import ( + "unsafe" +) + +// RaceEnabled is true if the Go data race detector is enabled. +const RaceEnabled = false + +// RaceDisable has the same semantics as runtime.RaceDisable. +func RaceDisable() { +} + +// RaceEnable has the same semantics as runtime.RaceEnable. +func RaceEnable() { +} + +// RaceAcquire has the same semantics as runtime.RaceAcquire. +func RaceAcquire(addr unsafe.Pointer) { +} + +// RaceRelease has the same semantics as runtime.RaceRelease. +func RaceRelease(addr unsafe.Pointer) { +} + +// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. +func RaceReleaseMerge(addr unsafe.Pointer) { +} diff --git a/pkg/sync/race_unsafe.go b/pkg/sync/race_unsafe.go new file mode 100644 index 000000000..31d8fa9a6 --- /dev/null +++ b/pkg/sync/race_unsafe.go @@ -0,0 +1,41 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build race + +package sync + +import ( + "runtime" + "unsafe" +) + +// RaceEnabled is true if the Go data race detector is enabled. +const RaceEnabled = true + +// RaceDisable has the same semantics as runtime.RaceDisable. +func RaceDisable() { + runtime.RaceDisable() +} + +// RaceEnable has the same semantics as runtime.RaceEnable. +func RaceEnable() { + runtime.RaceEnable() +} + +// RaceAcquire has the same semantics as runtime.RaceAcquire. +func RaceAcquire(addr unsafe.Pointer) { + runtime.RaceAcquire(addr) +} + +// RaceRelease has the same semantics as runtime.RaceRelease. +func RaceRelease(addr unsafe.Pointer) { + runtime.RaceRelease(addr) +} + +// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. +func RaceReleaseMerge(addr unsafe.Pointer) { + runtime.RaceReleaseMerge(addr) +} diff --git a/pkg/sync/seqatomic_unsafe.go b/pkg/sync/seqatomic_unsafe.go new file mode 100644 index 000000000..eda6fb131 --- /dev/null +++ b/pkg/sync/seqatomic_unsafe.go @@ -0,0 +1,72 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package template doesn't exist. This file must be instantiated using the +// go_template_instance rule in tools/go_generics/defs.bzl. +package template + +import ( + "fmt" + "reflect" + "strings" + "unsafe" + + "gvisor.dev/gvisor/pkg/sync" +) + +// Value is a required type parameter. +// +// Value must not contain any pointers, including interface objects, function +// objects, slices, maps, channels, unsafe.Pointer, and arrays or structs +// containing any of the above. An init() function will panic if this property +// does not hold. +type Value struct{} + +// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race +// with any writer critical sections in sc. +func SeqAtomicLoad(sc *sync.SeqCount, ptr *Value) Value { + // This function doesn't use SeqAtomicTryLoad because doing so is + // measurably, significantly (~20%) slower; Go is awful at inlining. + var val Value + for { + epoch := sc.BeginRead() + if sync.RaceEnabled { + // runtime.RaceDisable() doesn't actually stop the race detector, + // so it can't help us here. Instead, call runtime.memmove + // directly, which is not instrumented by the race detector. + sync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + // This is ~40% faster for short reads than going through memmove. + val = *ptr + } + if sc.ReadOk(epoch) { + break + } + } + return val +} + +// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section +// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read +// would race with a writer critical section, SeqAtomicTryLoad returns +// (unspecified, false). +func SeqAtomicTryLoad(sc *sync.SeqCount, epoch sync.SeqCountEpoch, ptr *Value) (Value, bool) { + var val Value + if sync.RaceEnabled { + sync.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) + } else { + val = *ptr + } + return val, sc.ReadOk(epoch) +} + +func init() { + var val Value + typ := reflect.TypeOf(val) + name := typ.Name() + if ptrs := sync.PointersInType(typ, name); len(ptrs) != 0 { + panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n"))) + } +} diff --git a/pkg/sync/seqatomictest/BUILD b/pkg/sync/seqatomictest/BUILD new file mode 100644 index 000000000..eba21518d --- /dev/null +++ b/pkg/sync/seqatomictest/BUILD @@ -0,0 +1,33 @@ +load("//tools/go_stateify:defs.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "seqatomic_int", + out = "seqatomic_int_unsafe.go", + package = "seqatomic", + suffix = "Int", + template = "//pkg/sync:generic_seqatomic", + types = { + "Value": "int", + }, +) + +go_library( + name = "seqatomic", + srcs = ["seqatomic_int_unsafe.go"], + importpath = "gvisor.dev/gvisor/pkg/sync/seqatomic", + deps = [ + "//pkg/sync", + ], +) + +go_test( + name = "seqatomic_test", + size = "small", + srcs = ["seqatomic_test.go"], + embed = [":seqatomic"], + deps = ["//pkg/sync"], +) diff --git a/pkg/sync/seqatomictest/seqatomic_test.go b/pkg/sync/seqatomictest/seqatomic_test.go new file mode 100644 index 000000000..2c4568b07 --- /dev/null +++ b/pkg/sync/seqatomictest/seqatomic_test.go @@ -0,0 +1,132 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seqatomic + +import ( + "sync/atomic" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/sync" +) + +func TestSeqAtomicLoadUncontended(t *testing.T) { + var seq sync.SeqCount + const want = 1 + data := want + if got := SeqAtomicLoadInt(&seq, &data); got != want { + t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } +} + +func TestSeqAtomicLoadAfterWrite(t *testing.T) { + var seq sync.SeqCount + var data int + const want = 1 + seq.BeginWrite() + data = want + seq.EndWrite() + if got := SeqAtomicLoadInt(&seq, &data); got != want { + t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } +} + +func TestSeqAtomicLoadDuringWrite(t *testing.T) { + var seq sync.SeqCount + var data int + const want = 1 + seq.BeginWrite() + go func() { + time.Sleep(time.Second) + data = want + seq.EndWrite() + }() + if got := SeqAtomicLoadInt(&seq, &data); got != want { + t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } +} + +func TestSeqAtomicTryLoadUncontended(t *testing.T) { + var seq sync.SeqCount + const want = 1 + data := want + epoch := seq.BeginRead() + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { + t.Errorf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) + } +} + +func TestSeqAtomicTryLoadDuringWrite(t *testing.T) { + var seq sync.SeqCount + var data int + epoch := seq.BeginRead() + seq.BeginWrite() + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { + t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) + } + seq.EndWrite() +} + +func TestSeqAtomicTryLoadAfterWrite(t *testing.T) { + var seq sync.SeqCount + var data int + epoch := seq.BeginRead() + seq.BeginWrite() + seq.EndWrite() + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { + t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) + } +} + +func BenchmarkSeqAtomicLoadIntUncontended(b *testing.B) { + var seq sync.SeqCount + const want = 42 + data := want + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if got := SeqAtomicLoadInt(&seq, &data); got != want { + b.Fatalf("SeqAtomicLoadInt: got %v, wanted %v", got, want) + } + } + }) +} + +func BenchmarkSeqAtomicTryLoadIntUncontended(b *testing.B) { + var seq sync.SeqCount + const want = 42 + data := want + b.RunParallel(func(pb *testing.PB) { + epoch := seq.BeginRead() + for pb.Next() { + if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { + b.Fatalf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) + } + } + }) +} + +// For comparison: +func BenchmarkAtomicValueLoadIntUncontended(b *testing.B) { + var a atomic.Value + const want = 42 + a.Store(int(want)) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + if got := a.Load().(int); got != want { + b.Fatalf("atomic.Value.Load: got %v, wanted %v", got, want) + } + } + }) +} diff --git a/pkg/sync/seqcount.go b/pkg/sync/seqcount.go new file mode 100644 index 000000000..a1e895352 --- /dev/null +++ b/pkg/sync/seqcount.go @@ -0,0 +1,149 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sync + +import ( + "fmt" + "reflect" + "runtime" + "sync/atomic" +) + +// SeqCount is a synchronization primitive for optimistic reader/writer +// synchronization in cases where readers can work with stale data and +// therefore do not need to block writers. +// +// Compared to sync/atomic.Value: +// +// - Mutation of SeqCount-protected data does not require memory allocation, +// whereas atomic.Value generally does. This is a significant advantage when +// writes are common. +// +// - Atomic reads of SeqCount-protected data require copying. This is a +// disadvantage when atomic reads are common. +// +// - SeqCount may be more flexible: correct use of SeqCount.ReadOk allows other +// operations to be made atomic with reads of SeqCount-protected data. +// +// - SeqCount may be less flexible: as of this writing, SeqCount-protected data +// cannot include pointers. +// +// - SeqCount is more cumbersome to use; atomic reads of SeqCount-protected +// data require instantiating function templates using go_generics (see +// seqatomic.go). +type SeqCount struct { + // epoch is incremented by BeginWrite and EndWrite, such that epoch is odd + // if a writer critical section is active, and a read from data protected + // by this SeqCount is atomic iff epoch is the same even value before and + // after the read. + epoch uint32 +} + +// SeqCountEpoch tracks writer critical sections in a SeqCount. +type SeqCountEpoch struct { + val uint32 +} + +// We assume that: +// +// - All functions in sync/atomic that perform a memory read are at least a +// read fence: memory reads before calls to such functions cannot be reordered +// after the call, and memory reads after calls to such functions cannot be +// reordered before the call, even if those reads do not use sync/atomic. +// +// - All functions in sync/atomic that perform a memory write are at least a +// write fence: memory writes before calls to such functions cannot be +// reordered after the call, and memory writes after calls to such functions +// cannot be reordered before the call, even if those writes do not use +// sync/atomic. +// +// As of this writing, the Go memory model completely fails to describe +// sync/atomic, but these properties are implied by +// https://groups.google.com/forum/#!topic/golang-nuts/7EnEhM3U7B8. + +// BeginRead indicates the beginning of a reader critical section. Reader +// critical sections DO NOT BLOCK writer critical sections, so operations in a +// reader critical section MAY RACE with writer critical sections. Races are +// detected by ReadOk at the end of the reader critical section. Thus, the +// low-level structure of readers is generally: +// +// for { +// epoch := seq.BeginRead() +// // do something idempotent with seq-protected data +// if seq.ReadOk(epoch) { +// break +// } +// } +// +// However, since reader critical sections may race with writer critical +// sections, the Go race detector will (accurately) flag data races in readers +// using this pattern. Most users of SeqCount will need to use the +// SeqAtomicLoad function template in seqatomic.go. +func (s *SeqCount) BeginRead() SeqCountEpoch { + epoch := atomic.LoadUint32(&s.epoch) + for epoch&1 != 0 { + runtime.Gosched() + epoch = atomic.LoadUint32(&s.epoch) + } + return SeqCountEpoch{epoch} +} + +// ReadOk returns true if the reader critical section initiated by a previous +// call to BeginRead() that returned epoch did not race with any writer critical +// sections. +// +// ReadOk may be called any number of times during a reader critical section. +// Reader critical sections do not need to be explicitly terminated; the last +// call to ReadOk is implicitly the end of the reader critical section. +func (s *SeqCount) ReadOk(epoch SeqCountEpoch) bool { + return atomic.LoadUint32(&s.epoch) == epoch.val +} + +// BeginWrite indicates the beginning of a writer critical section. +// +// SeqCount does not support concurrent writer critical sections; clients with +// concurrent writers must synchronize them using e.g. sync.Mutex. +func (s *SeqCount) BeginWrite() { + if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 == 0 { + panic("SeqCount.BeginWrite during writer critical section") + } +} + +// EndWrite ends the effect of a preceding BeginWrite. +func (s *SeqCount) EndWrite() { + if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 != 0 { + panic("SeqCount.EndWrite outside writer critical section") + } +} + +// PointersInType returns a list of pointers reachable from values named +// valName of the given type. +// +// PointersInType is not exhaustive, but it is guaranteed that if typ contains +// at least one pointer, then PointersInTypeOf returns a non-empty list. +func PointersInType(typ reflect.Type, valName string) []string { + switch kind := typ.Kind(); kind { + case reflect.Bool, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + return nil + + case reflect.Chan, reflect.Func, reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice, reflect.String, reflect.UnsafePointer: + return []string{valName} + + case reflect.Array: + return PointersInType(typ.Elem(), valName+"[]") + + case reflect.Struct: + var ptrs []string + for i, n := 0, typ.NumField(); i < n; i++ { + field := typ.Field(i) + ptrs = append(ptrs, PointersInType(field.Type, fmt.Sprintf("%s.%s", valName, field.Name))...) + } + return ptrs + + default: + return []string{fmt.Sprintf("%s (of type %s with unknown kind %s)", valName, typ, kind)} + } +} diff --git a/pkg/sync/seqcount_test.go b/pkg/sync/seqcount_test.go new file mode 100644 index 000000000..6eb7b4b59 --- /dev/null +++ b/pkg/sync/seqcount_test.go @@ -0,0 +1,153 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sync + +import ( + "reflect" + "testing" + "time" +) + +func TestSeqCountWriteUncontended(t *testing.T) { + var seq SeqCount + seq.BeginWrite() + seq.EndWrite() +} + +func TestSeqCountReadUncontended(t *testing.T) { + var seq SeqCount + epoch := seq.BeginRead() + if !seq.ReadOk(epoch) { + t.Errorf("ReadOk: got false, wanted true") + } +} + +func TestSeqCountBeginReadAfterWrite(t *testing.T) { + var seq SeqCount + var data int32 + const want = 1 + seq.BeginWrite() + data = want + seq.EndWrite() + epoch := seq.BeginRead() + if data != want { + t.Errorf("Reader: got %v, wanted %v", data, want) + } + if !seq.ReadOk(epoch) { + t.Errorf("ReadOk: got false, wanted true") + } +} + +func TestSeqCountBeginReadDuringWrite(t *testing.T) { + var seq SeqCount + var data int + const want = 1 + seq.BeginWrite() + go func() { + time.Sleep(time.Second) + data = want + seq.EndWrite() + }() + epoch := seq.BeginRead() + if data != want { + t.Errorf("Reader: got %v, wanted %v", data, want) + } + if !seq.ReadOk(epoch) { + t.Errorf("ReadOk: got false, wanted true") + } +} + +func TestSeqCountReadOkAfterWrite(t *testing.T) { + var seq SeqCount + epoch := seq.BeginRead() + seq.BeginWrite() + seq.EndWrite() + if seq.ReadOk(epoch) { + t.Errorf("ReadOk: got true, wanted false") + } +} + +func TestSeqCountReadOkDuringWrite(t *testing.T) { + var seq SeqCount + epoch := seq.BeginRead() + seq.BeginWrite() + if seq.ReadOk(epoch) { + t.Errorf("ReadOk: got true, wanted false") + } + seq.EndWrite() +} + +func BenchmarkSeqCountWriteUncontended(b *testing.B) { + var seq SeqCount + for i := 0; i < b.N; i++ { + seq.BeginWrite() + seq.EndWrite() + } +} + +func BenchmarkSeqCountReadUncontended(b *testing.B) { + var seq SeqCount + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + epoch := seq.BeginRead() + if !seq.ReadOk(epoch) { + b.Fatalf("ReadOk: got false, wanted true") + } + } + }) +} + +func TestPointersInType(t *testing.T) { + for _, test := range []struct { + name string // used for both test and value name + val interface{} + ptrs []string + }{ + { + name: "EmptyStruct", + val: struct{}{}, + }, + { + name: "Int", + val: int(0), + }, + { + name: "MixedStruct", + val: struct { + b bool + I int + ExportedPtr *struct{} + unexportedPtr *struct{} + arr [2]int + ptrArr [2]*int + nestedStruct struct { + nestedNonptr int + nestedPtr *int + } + structArr [1]struct { + nonptr int + ptr *int + } + }{}, + ptrs: []string{ + "MixedStruct.ExportedPtr", + "MixedStruct.unexportedPtr", + "MixedStruct.ptrArr[]", + "MixedStruct.nestedStruct.nestedPtr", + "MixedStruct.structArr[].ptr", + }, + }, + } { + t.Run(test.name, func(t *testing.T) { + typ := reflect.TypeOf(test.val) + ptrs := PointersInType(typ, test.name) + t.Logf("Found pointers: %v", ptrs) + if (len(ptrs) != 0 || len(test.ptrs) != 0) && !reflect.DeepEqual(ptrs, test.ptrs) { + t.Errorf("Got %v, wanted %v", ptrs, test.ptrs) + } + }) + } +} diff --git a/pkg/sync/syncutil.go b/pkg/sync/syncutil.go new file mode 100644 index 000000000..b16cf5333 --- /dev/null +++ b/pkg/sync/syncutil.go @@ -0,0 +1,7 @@ +// Copyright 2019 The gVisor Authors. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package sync provides synchronization primitives. +package sync diff --git a/pkg/syncutil/BUILD b/pkg/syncutil/BUILD deleted file mode 100644 index cb1f41628..000000000 --- a/pkg/syncutil/BUILD +++ /dev/null @@ -1,52 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_generics:defs.bzl", "go_template") - -package( - default_visibility = ["//:sandbox"], - licenses = ["notice"], -) - -exports_files(["LICENSE"]) - -go_template( - name = "generic_atomicptr", - srcs = ["atomicptr_unsafe.go"], - types = [ - "Value", - ], -) - -go_template( - name = "generic_seqatomic", - srcs = ["seqatomic_unsafe.go"], - types = [ - "Value", - ], - deps = [ - ":sync", - ], -) - -go_library( - name = "syncutil", - srcs = [ - "downgradable_rwmutex_unsafe.go", - "memmove_unsafe.go", - "norace_unsafe.go", - "race_unsafe.go", - "seqcount.go", - "syncutil.go", - ], - importpath = "gvisor.dev/gvisor/pkg/syncutil", -) - -go_test( - name = "syncutil_test", - size = "small", - srcs = [ - "downgradable_rwmutex_test.go", - "seqcount_test.go", - ], - embed = [":syncutil"], -) diff --git a/pkg/syncutil/LICENSE b/pkg/syncutil/LICENSE deleted file mode 100644 index 6a66aea5e..000000000 --- a/pkg/syncutil/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkg/syncutil/README.md b/pkg/syncutil/README.md deleted file mode 100644 index 2183c4e20..000000000 --- a/pkg/syncutil/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Syncutil - -This package provides additional synchronization primitives not provided by the -Go stdlib 'sync' package. It is partially derived from the upstream 'sync' -package from go1.10. diff --git a/pkg/syncutil/atomicptr_unsafe.go b/pkg/syncutil/atomicptr_unsafe.go deleted file mode 100644 index 525c4beed..000000000 --- a/pkg/syncutil/atomicptr_unsafe.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package template doesn't exist. This file must be instantiated using the -// go_template_instance rule in tools/go_generics/defs.bzl. -package template - -import ( - "sync/atomic" - "unsafe" -) - -// Value is a required type parameter. -type Value struct{} - -// An AtomicPtr is a pointer to a value of type Value that can be atomically -// loaded and stored. The zero value of an AtomicPtr represents nil. -// -// Note that copying AtomicPtr by value performs a non-atomic read of the -// stored pointer, which is unsafe if Store() can be called concurrently; in -// this case, do `dst.Store(src.Load())` instead. -// -// +stateify savable -type AtomicPtr struct { - ptr unsafe.Pointer `state:".(*Value)"` -} - -func (p *AtomicPtr) savePtr() *Value { - return p.Load() -} - -func (p *AtomicPtr) loadPtr(v *Value) { - p.Store(v) -} - -// Load returns the value set by the most recent Store. It returns nil if there -// has been no previous call to Store. -func (p *AtomicPtr) Load() *Value { - return (*Value)(atomic.LoadPointer(&p.ptr)) -} - -// Store sets the value returned by Load to x. -func (p *AtomicPtr) Store(x *Value) { - atomic.StorePointer(&p.ptr, (unsafe.Pointer)(x)) -} diff --git a/pkg/syncutil/atomicptrtest/BUILD b/pkg/syncutil/atomicptrtest/BUILD deleted file mode 100644 index 63f411a90..000000000 --- a/pkg/syncutil/atomicptrtest/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "atomicptr_int", - out = "atomicptr_int_unsafe.go", - package = "atomicptr", - suffix = "Int", - template = "//pkg/syncutil:generic_atomicptr", - types = { - "Value": "int", - }, -) - -go_library( - name = "atomicptr", - srcs = ["atomicptr_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/syncutil/atomicptr", -) - -go_test( - name = "atomicptr_test", - size = "small", - srcs = ["atomicptr_test.go"], - embed = [":atomicptr"], -) diff --git a/pkg/syncutil/atomicptrtest/atomicptr_test.go b/pkg/syncutil/atomicptrtest/atomicptr_test.go deleted file mode 100644 index 8fdc5112e..000000000 --- a/pkg/syncutil/atomicptrtest/atomicptr_test.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package atomicptr - -import ( - "testing" -) - -func newInt(val int) *int { - return &val -} - -func TestAtomicPtr(t *testing.T) { - var p AtomicPtrInt - if got := p.Load(); got != nil { - t.Errorf("initial value is %p (%v), wanted nil", got, got) - } - want := newInt(42) - p.Store(want) - if got := p.Load(); got != want { - t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) - } - want = newInt(100) - p.Store(want) - if got := p.Load(); got != want { - t.Errorf("wrong value: got %p (%v), wanted %p (%v)", got, got, want, want) - } -} diff --git a/pkg/syncutil/downgradable_rwmutex_test.go b/pkg/syncutil/downgradable_rwmutex_test.go deleted file mode 100644 index ffaf7ecc7..000000000 --- a/pkg/syncutil/downgradable_rwmutex_test.go +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// GOMAXPROCS=10 go test - -// Copy/pasted from the standard library's sync/rwmutex_test.go, except for the -// addition of downgradingWriter and the renaming of num_iterations to -// numIterations to shut up Golint. - -package syncutil - -import ( - "fmt" - "runtime" - "sync/atomic" - "testing" -) - -func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { - m.RLock() - clocked <- true - <-cunlock - m.RUnlock() - cdone <- true -} - -func doTestParallelReaders(numReaders, gomaxprocs int) { - runtime.GOMAXPROCS(gomaxprocs) - var m DowngradableRWMutex - clocked := make(chan bool) - cunlock := make(chan bool) - cdone := make(chan bool) - for i := 0; i < numReaders; i++ { - go parallelReader(&m, clocked, cunlock, cdone) - } - // Wait for all parallel RLock()s to succeed. - for i := 0; i < numReaders; i++ { - <-clocked - } - for i := 0; i < numReaders; i++ { - cunlock <- true - } - // Wait for the goroutines to finish. - for i := 0; i < numReaders; i++ { - <-cdone - } -} - -func TestParallelReaders(t *testing.T) { - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) - doTestParallelReaders(1, 4) - doTestParallelReaders(3, 4) - doTestParallelReaders(4, 2) -} - -func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { - for i := 0; i < numIterations; i++ { - rwm.RLock() - n := atomic.AddInt32(activity, 1) - if n < 1 || n >= 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - atomic.AddInt32(activity, -1) - rwm.RUnlock() - } - cdone <- true -} - -func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { - for i := 0; i < numIterations; i++ { - rwm.Lock() - n := atomic.AddInt32(activity, 10000) - if n != 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - atomic.AddInt32(activity, -10000) - rwm.Unlock() - } - cdone <- true -} - -func downgradingWriter(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { - for i := 0; i < numIterations; i++ { - rwm.Lock() - n := atomic.AddInt32(activity, 10000) - if n != 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - atomic.AddInt32(activity, -10000) - rwm.DowngradeLock() - n = atomic.AddInt32(activity, 1) - if n < 1 || n >= 10000 { - panic(fmt.Sprintf("wlock(%d)\n", n)) - } - for i := 0; i < 100; i++ { - } - n = atomic.AddInt32(activity, -1) - rwm.RUnlock() - } - cdone <- true -} - -func HammerDowngradableRWMutex(gomaxprocs, numReaders, numIterations int) { - runtime.GOMAXPROCS(gomaxprocs) - // Number of active readers + 10000 * number of active writers. - var activity int32 - var rwm DowngradableRWMutex - cdone := make(chan bool) - go writer(&rwm, numIterations, &activity, cdone) - go downgradingWriter(&rwm, numIterations, &activity, cdone) - var i int - for i = 0; i < numReaders/2; i++ { - go reader(&rwm, numIterations, &activity, cdone) - } - go writer(&rwm, numIterations, &activity, cdone) - go downgradingWriter(&rwm, numIterations, &activity, cdone) - for ; i < numReaders; i++ { - go reader(&rwm, numIterations, &activity, cdone) - } - // Wait for the 4 writers and all readers to finish. - for i := 0; i < 4+numReaders; i++ { - <-cdone - } -} - -func TestDowngradableRWMutex(t *testing.T) { - defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1)) - n := 1000 - if testing.Short() { - n = 5 - } - HammerDowngradableRWMutex(1, 1, n) - HammerDowngradableRWMutex(1, 3, n) - HammerDowngradableRWMutex(1, 10, n) - HammerDowngradableRWMutex(4, 1, n) - HammerDowngradableRWMutex(4, 3, n) - HammerDowngradableRWMutex(4, 10, n) - HammerDowngradableRWMutex(10, 1, n) - HammerDowngradableRWMutex(10, 3, n) - HammerDowngradableRWMutex(10, 10, n) - HammerDowngradableRWMutex(10, 5, n) -} diff --git a/pkg/syncutil/downgradable_rwmutex_unsafe.go b/pkg/syncutil/downgradable_rwmutex_unsafe.go deleted file mode 100644 index 51e11555d..000000000 --- a/pkg/syncutil/downgradable_rwmutex_unsafe.go +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright 2019 The gVisor Authors. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.13 -// +build !go1.15 - -// Check go:linkname function signatures when updating Go version. - -// This is mostly copied from the standard library's sync/rwmutex.go. -// -// Happens-before relationships indicated to the race detector: -// - Unlock -> Lock (via writerSem) -// - Unlock -> RLock (via readerSem) -// - RUnlock -> Lock (via writerSem) -// - DowngradeLock -> RLock (via readerSem) - -package syncutil - -import ( - "sync" - "sync/atomic" - "unsafe" -) - -//go:linkname runtimeSemacquire sync.runtime_Semacquire -func runtimeSemacquire(s *uint32) - -//go:linkname runtimeSemrelease sync.runtime_Semrelease -func runtimeSemrelease(s *uint32, handoff bool, skipframes int) - -// DowngradableRWMutex is identical to sync.RWMutex, but adds the DowngradeLock -// method. -type DowngradableRWMutex struct { - w sync.Mutex // held if there are pending writers - writerSem uint32 // semaphore for writers to wait for completing readers - readerSem uint32 // semaphore for readers to wait for completing writers - readerCount int32 // number of pending readers - readerWait int32 // number of departing readers -} - -const rwmutexMaxReaders = 1 << 30 - -// RLock locks rw for reading. -func (rw *DowngradableRWMutex) RLock() { - if RaceEnabled { - RaceDisable() - } - if atomic.AddInt32(&rw.readerCount, 1) < 0 { - // A writer is pending, wait for it. - runtimeSemacquire(&rw.readerSem) - } - if RaceEnabled { - RaceEnable() - RaceAcquire(unsafe.Pointer(&rw.readerSem)) - } -} - -// RUnlock undoes a single RLock call. -func (rw *DowngradableRWMutex) RUnlock() { - if RaceEnabled { - RaceReleaseMerge(unsafe.Pointer(&rw.writerSem)) - RaceDisable() - } - if r := atomic.AddInt32(&rw.readerCount, -1); r < 0 { - if r+1 == 0 || r+1 == -rwmutexMaxReaders { - panic("RUnlock of unlocked DowngradableRWMutex") - } - // A writer is pending. - if atomic.AddInt32(&rw.readerWait, -1) == 0 { - // The last reader unblocks the writer. - runtimeSemrelease(&rw.writerSem, false, 0) - } - } - if RaceEnabled { - RaceEnable() - } -} - -// Lock locks rw for writing. -func (rw *DowngradableRWMutex) Lock() { - if RaceEnabled { - RaceDisable() - } - // First, resolve competition with other writers. - rw.w.Lock() - // Announce to readers there is a pending writer. - r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders - // Wait for active readers. - if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 { - runtimeSemacquire(&rw.writerSem) - } - if RaceEnabled { - RaceEnable() - RaceAcquire(unsafe.Pointer(&rw.writerSem)) - } -} - -// Unlock unlocks rw for writing. -func (rw *DowngradableRWMutex) Unlock() { - if RaceEnabled { - RaceRelease(unsafe.Pointer(&rw.writerSem)) - RaceRelease(unsafe.Pointer(&rw.readerSem)) - RaceDisable() - } - // Announce to readers there is no active writer. - r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders) - if r >= rwmutexMaxReaders { - panic("Unlock of unlocked DowngradableRWMutex") - } - // Unblock blocked readers, if any. - for i := 0; i < int(r); i++ { - runtimeSemrelease(&rw.readerSem, false, 0) - } - // Allow other writers to proceed. - rw.w.Unlock() - if RaceEnabled { - RaceEnable() - } -} - -// DowngradeLock atomically unlocks rw for writing and locks it for reading. -func (rw *DowngradableRWMutex) DowngradeLock() { - if RaceEnabled { - RaceRelease(unsafe.Pointer(&rw.readerSem)) - RaceDisable() - } - // Announce to readers there is no active writer and one additional reader. - r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders+1) - if r >= rwmutexMaxReaders+1 { - panic("DowngradeLock of unlocked DowngradableRWMutex") - } - // Unblock blocked readers, if any. Note that this loop starts as 1 since r - // includes this goroutine. - for i := 1; i < int(r); i++ { - runtimeSemrelease(&rw.readerSem, false, 0) - } - // Allow other writers to proceed to rw.w.Lock(). Note that they will still - // block on rw.writerSem since at least this reader exists, such that - // DowngradeLock() is atomic with the previous write lock. - rw.w.Unlock() - if RaceEnabled { - RaceEnable() - } -} diff --git a/pkg/syncutil/memmove_unsafe.go b/pkg/syncutil/memmove_unsafe.go deleted file mode 100644 index 348675baa..000000000 --- a/pkg/syncutil/memmove_unsafe.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build go1.12 -// +build !go1.15 - -// Check go:linkname function signatures when updating Go version. - -package syncutil - -import ( - "unsafe" -) - -//go:linkname memmove runtime.memmove -//go:noescape -func memmove(to, from unsafe.Pointer, n uintptr) - -// Memmove is exported for SeqAtomicLoad/SeqAtomicTryLoad, which can't -// define it because go_generics can't update the go:linkname annotation. -// Furthermore, go:linkname silently doesn't work if the local name is exported -// (this is of course undocumented), which is why this indirection is -// necessary. -func Memmove(to, from unsafe.Pointer, n uintptr) { - memmove(to, from, n) -} diff --git a/pkg/syncutil/norace_unsafe.go b/pkg/syncutil/norace_unsafe.go deleted file mode 100644 index 0a0a9deda..000000000 --- a/pkg/syncutil/norace_unsafe.go +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !race - -package syncutil - -import ( - "unsafe" -) - -// RaceEnabled is true if the Go data race detector is enabled. -const RaceEnabled = false - -// RaceDisable has the same semantics as runtime.RaceDisable. -func RaceDisable() { -} - -// RaceEnable has the same semantics as runtime.RaceEnable. -func RaceEnable() { -} - -// RaceAcquire has the same semantics as runtime.RaceAcquire. -func RaceAcquire(addr unsafe.Pointer) { -} - -// RaceRelease has the same semantics as runtime.RaceRelease. -func RaceRelease(addr unsafe.Pointer) { -} - -// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. -func RaceReleaseMerge(addr unsafe.Pointer) { -} diff --git a/pkg/syncutil/race_unsafe.go b/pkg/syncutil/race_unsafe.go deleted file mode 100644 index 206067ec1..000000000 --- a/pkg/syncutil/race_unsafe.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build race - -package syncutil - -import ( - "runtime" - "unsafe" -) - -// RaceEnabled is true if the Go data race detector is enabled. -const RaceEnabled = true - -// RaceDisable has the same semantics as runtime.RaceDisable. -func RaceDisable() { - runtime.RaceDisable() -} - -// RaceEnable has the same semantics as runtime.RaceEnable. -func RaceEnable() { - runtime.RaceEnable() -} - -// RaceAcquire has the same semantics as runtime.RaceAcquire. -func RaceAcquire(addr unsafe.Pointer) { - runtime.RaceAcquire(addr) -} - -// RaceRelease has the same semantics as runtime.RaceRelease. -func RaceRelease(addr unsafe.Pointer) { - runtime.RaceRelease(addr) -} - -// RaceReleaseMerge has the same semantics as runtime.RaceReleaseMerge. -func RaceReleaseMerge(addr unsafe.Pointer) { - runtime.RaceReleaseMerge(addr) -} diff --git a/pkg/syncutil/seqatomic_unsafe.go b/pkg/syncutil/seqatomic_unsafe.go deleted file mode 100644 index cb6d2eb22..000000000 --- a/pkg/syncutil/seqatomic_unsafe.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package template doesn't exist. This file must be instantiated using the -// go_template_instance rule in tools/go_generics/defs.bzl. -package template - -import ( - "fmt" - "reflect" - "strings" - "unsafe" - - "gvisor.dev/gvisor/pkg/syncutil" -) - -// Value is a required type parameter. -// -// Value must not contain any pointers, including interface objects, function -// objects, slices, maps, channels, unsafe.Pointer, and arrays or structs -// containing any of the above. An init() function will panic if this property -// does not hold. -type Value struct{} - -// SeqAtomicLoad returns a copy of *ptr, ensuring that the read does not race -// with any writer critical sections in sc. -func SeqAtomicLoad(sc *syncutil.SeqCount, ptr *Value) Value { - // This function doesn't use SeqAtomicTryLoad because doing so is - // measurably, significantly (~20%) slower; Go is awful at inlining. - var val Value - for { - epoch := sc.BeginRead() - if syncutil.RaceEnabled { - // runtime.RaceDisable() doesn't actually stop the race detector, - // so it can't help us here. Instead, call runtime.memmove - // directly, which is not instrumented by the race detector. - syncutil.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) - } else { - // This is ~40% faster for short reads than going through memmove. - val = *ptr - } - if sc.ReadOk(epoch) { - break - } - } - return val -} - -// SeqAtomicTryLoad returns a copy of *ptr while in a reader critical section -// in sc initiated by a call to sc.BeginRead() that returned epoch. If the read -// would race with a writer critical section, SeqAtomicTryLoad returns -// (unspecified, false). -func SeqAtomicTryLoad(sc *syncutil.SeqCount, epoch syncutil.SeqCountEpoch, ptr *Value) (Value, bool) { - var val Value - if syncutil.RaceEnabled { - syncutil.Memmove(unsafe.Pointer(&val), unsafe.Pointer(ptr), unsafe.Sizeof(val)) - } else { - val = *ptr - } - return val, sc.ReadOk(epoch) -} - -func init() { - var val Value - typ := reflect.TypeOf(val) - name := typ.Name() - if ptrs := syncutil.PointersInType(typ, name); len(ptrs) != 0 { - panic(fmt.Sprintf("SeqAtomicLoad<%s> is invalid since values %s of type %s contain pointers:\n%s", typ, name, typ, strings.Join(ptrs, "\n"))) - } -} diff --git a/pkg/syncutil/seqatomictest/BUILD b/pkg/syncutil/seqatomictest/BUILD deleted file mode 100644 index ba18f3238..000000000 --- a/pkg/syncutil/seqatomictest/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "seqatomic_int", - out = "seqatomic_int_unsafe.go", - package = "seqatomic", - suffix = "Int", - template = "//pkg/syncutil:generic_seqatomic", - types = { - "Value": "int", - }, -) - -go_library( - name = "seqatomic", - srcs = ["seqatomic_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/syncutil/seqatomic", - deps = [ - "//pkg/syncutil", - ], -) - -go_test( - name = "seqatomic_test", - size = "small", - srcs = ["seqatomic_test.go"], - embed = [":seqatomic"], - deps = [ - "//pkg/syncutil", - ], -) diff --git a/pkg/syncutil/seqatomictest/seqatomic_test.go b/pkg/syncutil/seqatomictest/seqatomic_test.go deleted file mode 100644 index b0db44999..000000000 --- a/pkg/syncutil/seqatomictest/seqatomic_test.go +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package seqatomic - -import ( - "sync/atomic" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/syncutil" -) - -func TestSeqAtomicLoadUncontended(t *testing.T) { - var seq syncutil.SeqCount - const want = 1 - data := want - if got := SeqAtomicLoadInt(&seq, &data); got != want { - t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } -} - -func TestSeqAtomicLoadAfterWrite(t *testing.T) { - var seq syncutil.SeqCount - var data int - const want = 1 - seq.BeginWrite() - data = want - seq.EndWrite() - if got := SeqAtomicLoadInt(&seq, &data); got != want { - t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } -} - -func TestSeqAtomicLoadDuringWrite(t *testing.T) { - var seq syncutil.SeqCount - var data int - const want = 1 - seq.BeginWrite() - go func() { - time.Sleep(time.Second) - data = want - seq.EndWrite() - }() - if got := SeqAtomicLoadInt(&seq, &data); got != want { - t.Errorf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } -} - -func TestSeqAtomicTryLoadUncontended(t *testing.T) { - var seq syncutil.SeqCount - const want = 1 - data := want - epoch := seq.BeginRead() - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { - t.Errorf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) - } -} - -func TestSeqAtomicTryLoadDuringWrite(t *testing.T) { - var seq syncutil.SeqCount - var data int - epoch := seq.BeginRead() - seq.BeginWrite() - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { - t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) - } - seq.EndWrite() -} - -func TestSeqAtomicTryLoadAfterWrite(t *testing.T) { - var seq syncutil.SeqCount - var data int - epoch := seq.BeginRead() - seq.BeginWrite() - seq.EndWrite() - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); ok { - t.Errorf("SeqAtomicTryLoadInt: got (%v, true), wanted (_, false)", got) - } -} - -func BenchmarkSeqAtomicLoadIntUncontended(b *testing.B) { - var seq syncutil.SeqCount - const want = 42 - data := want - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - if got := SeqAtomicLoadInt(&seq, &data); got != want { - b.Fatalf("SeqAtomicLoadInt: got %v, wanted %v", got, want) - } - } - }) -} - -func BenchmarkSeqAtomicTryLoadIntUncontended(b *testing.B) { - var seq syncutil.SeqCount - const want = 42 - data := want - b.RunParallel(func(pb *testing.PB) { - epoch := seq.BeginRead() - for pb.Next() { - if got, ok := SeqAtomicTryLoadInt(&seq, epoch, &data); !ok || got != want { - b.Fatalf("SeqAtomicTryLoadInt: got (%v, %v), wanted (%v, true)", got, ok, want) - } - } - }) -} - -// For comparison: -func BenchmarkAtomicValueLoadIntUncontended(b *testing.B) { - var a atomic.Value - const want = 42 - a.Store(int(want)) - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - if got := a.Load().(int); got != want { - b.Fatalf("atomic.Value.Load: got %v, wanted %v", got, want) - } - } - }) -} diff --git a/pkg/syncutil/seqcount.go b/pkg/syncutil/seqcount.go deleted file mode 100644 index 11d8dbfaa..000000000 --- a/pkg/syncutil/seqcount.go +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syncutil - -import ( - "fmt" - "reflect" - "runtime" - "sync/atomic" -) - -// SeqCount is a synchronization primitive for optimistic reader/writer -// synchronization in cases where readers can work with stale data and -// therefore do not need to block writers. -// -// Compared to sync/atomic.Value: -// -// - Mutation of SeqCount-protected data does not require memory allocation, -// whereas atomic.Value generally does. This is a significant advantage when -// writes are common. -// -// - Atomic reads of SeqCount-protected data require copying. This is a -// disadvantage when atomic reads are common. -// -// - SeqCount may be more flexible: correct use of SeqCount.ReadOk allows other -// operations to be made atomic with reads of SeqCount-protected data. -// -// - SeqCount may be less flexible: as of this writing, SeqCount-protected data -// cannot include pointers. -// -// - SeqCount is more cumbersome to use; atomic reads of SeqCount-protected -// data require instantiating function templates using go_generics (see -// seqatomic.go). -type SeqCount struct { - // epoch is incremented by BeginWrite and EndWrite, such that epoch is odd - // if a writer critical section is active, and a read from data protected - // by this SeqCount is atomic iff epoch is the same even value before and - // after the read. - epoch uint32 -} - -// SeqCountEpoch tracks writer critical sections in a SeqCount. -type SeqCountEpoch struct { - val uint32 -} - -// We assume that: -// -// - All functions in sync/atomic that perform a memory read are at least a -// read fence: memory reads before calls to such functions cannot be reordered -// after the call, and memory reads after calls to such functions cannot be -// reordered before the call, even if those reads do not use sync/atomic. -// -// - All functions in sync/atomic that perform a memory write are at least a -// write fence: memory writes before calls to such functions cannot be -// reordered after the call, and memory writes after calls to such functions -// cannot be reordered before the call, even if those writes do not use -// sync/atomic. -// -// As of this writing, the Go memory model completely fails to describe -// sync/atomic, but these properties are implied by -// https://groups.google.com/forum/#!topic/golang-nuts/7EnEhM3U7B8. - -// BeginRead indicates the beginning of a reader critical section. Reader -// critical sections DO NOT BLOCK writer critical sections, so operations in a -// reader critical section MAY RACE with writer critical sections. Races are -// detected by ReadOk at the end of the reader critical section. Thus, the -// low-level structure of readers is generally: -// -// for { -// epoch := seq.BeginRead() -// // do something idempotent with seq-protected data -// if seq.ReadOk(epoch) { -// break -// } -// } -// -// However, since reader critical sections may race with writer critical -// sections, the Go race detector will (accurately) flag data races in readers -// using this pattern. Most users of SeqCount will need to use the -// SeqAtomicLoad function template in seqatomic.go. -func (s *SeqCount) BeginRead() SeqCountEpoch { - epoch := atomic.LoadUint32(&s.epoch) - for epoch&1 != 0 { - runtime.Gosched() - epoch = atomic.LoadUint32(&s.epoch) - } - return SeqCountEpoch{epoch} -} - -// ReadOk returns true if the reader critical section initiated by a previous -// call to BeginRead() that returned epoch did not race with any writer critical -// sections. -// -// ReadOk may be called any number of times during a reader critical section. -// Reader critical sections do not need to be explicitly terminated; the last -// call to ReadOk is implicitly the end of the reader critical section. -func (s *SeqCount) ReadOk(epoch SeqCountEpoch) bool { - return atomic.LoadUint32(&s.epoch) == epoch.val -} - -// BeginWrite indicates the beginning of a writer critical section. -// -// SeqCount does not support concurrent writer critical sections; clients with -// concurrent writers must synchronize them using e.g. sync.Mutex. -func (s *SeqCount) BeginWrite() { - if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 == 0 { - panic("SeqCount.BeginWrite during writer critical section") - } -} - -// EndWrite ends the effect of a preceding BeginWrite. -func (s *SeqCount) EndWrite() { - if epoch := atomic.AddUint32(&s.epoch, 1); epoch&1 != 0 { - panic("SeqCount.EndWrite outside writer critical section") - } -} - -// PointersInType returns a list of pointers reachable from values named -// valName of the given type. -// -// PointersInType is not exhaustive, but it is guaranteed that if typ contains -// at least one pointer, then PointersInTypeOf returns a non-empty list. -func PointersInType(typ reflect.Type, valName string) []string { - switch kind := typ.Kind(); kind { - case reflect.Bool, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: - return nil - - case reflect.Chan, reflect.Func, reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice, reflect.String, reflect.UnsafePointer: - return []string{valName} - - case reflect.Array: - return PointersInType(typ.Elem(), valName+"[]") - - case reflect.Struct: - var ptrs []string - for i, n := 0, typ.NumField(); i < n; i++ { - field := typ.Field(i) - ptrs = append(ptrs, PointersInType(field.Type, fmt.Sprintf("%s.%s", valName, field.Name))...) - } - return ptrs - - default: - return []string{fmt.Sprintf("%s (of type %s with unknown kind %s)", valName, typ, kind)} - } -} diff --git a/pkg/syncutil/seqcount_test.go b/pkg/syncutil/seqcount_test.go deleted file mode 100644 index 14d6aedea..000000000 --- a/pkg/syncutil/seqcount_test.go +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syncutil - -import ( - "reflect" - "testing" - "time" -) - -func TestSeqCountWriteUncontended(t *testing.T) { - var seq SeqCount - seq.BeginWrite() - seq.EndWrite() -} - -func TestSeqCountReadUncontended(t *testing.T) { - var seq SeqCount - epoch := seq.BeginRead() - if !seq.ReadOk(epoch) { - t.Errorf("ReadOk: got false, wanted true") - } -} - -func TestSeqCountBeginReadAfterWrite(t *testing.T) { - var seq SeqCount - var data int32 - const want = 1 - seq.BeginWrite() - data = want - seq.EndWrite() - epoch := seq.BeginRead() - if data != want { - t.Errorf("Reader: got %v, wanted %v", data, want) - } - if !seq.ReadOk(epoch) { - t.Errorf("ReadOk: got false, wanted true") - } -} - -func TestSeqCountBeginReadDuringWrite(t *testing.T) { - var seq SeqCount - var data int - const want = 1 - seq.BeginWrite() - go func() { - time.Sleep(time.Second) - data = want - seq.EndWrite() - }() - epoch := seq.BeginRead() - if data != want { - t.Errorf("Reader: got %v, wanted %v", data, want) - } - if !seq.ReadOk(epoch) { - t.Errorf("ReadOk: got false, wanted true") - } -} - -func TestSeqCountReadOkAfterWrite(t *testing.T) { - var seq SeqCount - epoch := seq.BeginRead() - seq.BeginWrite() - seq.EndWrite() - if seq.ReadOk(epoch) { - t.Errorf("ReadOk: got true, wanted false") - } -} - -func TestSeqCountReadOkDuringWrite(t *testing.T) { - var seq SeqCount - epoch := seq.BeginRead() - seq.BeginWrite() - if seq.ReadOk(epoch) { - t.Errorf("ReadOk: got true, wanted false") - } - seq.EndWrite() -} - -func BenchmarkSeqCountWriteUncontended(b *testing.B) { - var seq SeqCount - for i := 0; i < b.N; i++ { - seq.BeginWrite() - seq.EndWrite() - } -} - -func BenchmarkSeqCountReadUncontended(b *testing.B) { - var seq SeqCount - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - epoch := seq.BeginRead() - if !seq.ReadOk(epoch) { - b.Fatalf("ReadOk: got false, wanted true") - } - } - }) -} - -func TestPointersInType(t *testing.T) { - for _, test := range []struct { - name string // used for both test and value name - val interface{} - ptrs []string - }{ - { - name: "EmptyStruct", - val: struct{}{}, - }, - { - name: "Int", - val: int(0), - }, - { - name: "MixedStruct", - val: struct { - b bool - I int - ExportedPtr *struct{} - unexportedPtr *struct{} - arr [2]int - ptrArr [2]*int - nestedStruct struct { - nestedNonptr int - nestedPtr *int - } - structArr [1]struct { - nonptr int - ptr *int - } - }{}, - ptrs: []string{ - "MixedStruct.ExportedPtr", - "MixedStruct.unexportedPtr", - "MixedStruct.ptrArr[]", - "MixedStruct.nestedStruct.nestedPtr", - "MixedStruct.structArr[].ptr", - }, - }, - } { - t.Run(test.name, func(t *testing.T) { - typ := reflect.TypeOf(test.val) - ptrs := PointersInType(typ, test.name) - t.Logf("Found pointers: %v", ptrs) - if (len(ptrs) != 0 || len(test.ptrs) != 0) && !reflect.DeepEqual(ptrs, test.ptrs) { - t.Errorf("Got %v, wanted %v", ptrs, test.ptrs) - } - }) - } -} diff --git a/pkg/syncutil/syncutil.go b/pkg/syncutil/syncutil.go deleted file mode 100644 index 66e750d06..000000000 --- a/pkg/syncutil/syncutil.go +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package syncutil provides synchronization primitives. -package syncutil diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD index e07ebd153..db06d02c6 100644 --- a/pkg/tcpip/BUILD +++ b/pkg/tcpip/BUILD @@ -15,6 +15,7 @@ go_library( importpath = "gvisor.dev/gvisor/pkg/tcpip", visibility = ["//visibility:public"], deps = [ + "//pkg/sync", "//pkg/tcpip/buffer", "//pkg/tcpip/iptables", "//pkg/waiter", diff --git a/pkg/tcpip/adapters/gonet/BUILD b/pkg/tcpip/adapters/gonet/BUILD index 78df5a0b1..3df7d18d3 100644 --- a/pkg/tcpip/adapters/gonet/BUILD +++ b/pkg/tcpip/adapters/gonet/BUILD @@ -9,6 +9,7 @@ go_library( importpath = "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet", visibility = ["//visibility:public"], deps = [ + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/stack", diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go index cd6ce930a..a2f44b496 100644 --- a/pkg/tcpip/adapters/gonet/gonet.go +++ b/pkg/tcpip/adapters/gonet/gonet.go @@ -20,9 +20,9 @@ import ( "errors" "io" "net" - "sync" "time" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/stack" diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD index 897c94821..66cc53ed4 100644 --- a/pkg/tcpip/link/fdbased/BUILD +++ b/pkg/tcpip/link/fdbased/BUILD @@ -16,6 +16,7 @@ go_library( importpath = "gvisor.dev/gvisor/pkg/tcpip/link/fdbased", visibility = ["//visibility:public"], deps = [ + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index fa8a703d9..b7f60178e 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -41,10 +41,10 @@ package fdbased import ( "fmt" - "sync" "syscall" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/link/sharedmem/BUILD b/pkg/tcpip/link/sharedmem/BUILD index a4f9cdd69..09165dd4c 100644 --- a/pkg/tcpip/link/sharedmem/BUILD +++ b/pkg/tcpip/link/sharedmem/BUILD @@ -15,6 +15,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/log", + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", @@ -31,6 +32,7 @@ go_test( ], embed = [":sharedmem"], deps = [ + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", diff --git a/pkg/tcpip/link/sharedmem/pipe/BUILD b/pkg/tcpip/link/sharedmem/pipe/BUILD index 6b5bc542c..a0d4ad0be 100644 --- a/pkg/tcpip/link/sharedmem/pipe/BUILD +++ b/pkg/tcpip/link/sharedmem/pipe/BUILD @@ -21,4 +21,5 @@ go_test( "pipe_test.go", ], embed = [":pipe"], + deps = ["//pkg/sync"], ) diff --git a/pkg/tcpip/link/sharedmem/pipe/pipe_test.go b/pkg/tcpip/link/sharedmem/pipe/pipe_test.go index 59ef69a8b..dc239a0d0 100644 --- a/pkg/tcpip/link/sharedmem/pipe/pipe_test.go +++ b/pkg/tcpip/link/sharedmem/pipe/pipe_test.go @@ -18,8 +18,9 @@ import ( "math/rand" "reflect" "runtime" - "sync" "testing" + + "gvisor.dev/gvisor/pkg/sync" ) func TestSimpleReadWrite(t *testing.T) { diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go index 080f9d667..655e537c4 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem.go +++ b/pkg/tcpip/link/sharedmem/sharedmem.go @@ -23,11 +23,11 @@ package sharedmem import ( - "sync" "sync/atomic" "syscall" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go index 89603c48f..5c729a439 100644 --- a/pkg/tcpip/link/sharedmem/sharedmem_test.go +++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go @@ -22,11 +22,11 @@ import ( "math/rand" "os" "strings" - "sync" "syscall" "testing" "time" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD index acf1e022c..ed16076fd 100644 --- a/pkg/tcpip/network/fragmentation/BUILD +++ b/pkg/tcpip/network/fragmentation/BUILD @@ -28,6 +28,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/log", + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", ], diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go index 6da5238ec..92f2aa13a 100644 --- a/pkg/tcpip/network/fragmentation/fragmentation.go +++ b/pkg/tcpip/network/fragmentation/fragmentation.go @@ -19,9 +19,9 @@ package fragmentation import ( "fmt" "log" - "sync" "time" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip/buffer" ) diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go index 9e002e396..0a83d81f2 100644 --- a/pkg/tcpip/network/fragmentation/reassembler.go +++ b/pkg/tcpip/network/fragmentation/reassembler.go @@ -18,9 +18,9 @@ import ( "container/heap" "fmt" "math" - "sync" "time" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip/buffer" ) diff --git a/pkg/tcpip/ports/BUILD b/pkg/tcpip/ports/BUILD index e156b01f6..a6ef3bdcc 100644 --- a/pkg/tcpip/ports/BUILD +++ b/pkg/tcpip/ports/BUILD @@ -9,6 +9,7 @@ go_library( importpath = "gvisor.dev/gvisor/pkg/tcpip/ports", visibility = ["//visibility:public"], deps = [ + "//pkg/sync", "//pkg/tcpip", ], ) diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go index 6c5e19e8f..b937cb84b 100644 --- a/pkg/tcpip/ports/ports.go +++ b/pkg/tcpip/ports/ports.go @@ -18,9 +18,9 @@ package ports import ( "math" "math/rand" - "sync" "sync/atomic" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" ) diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index 826fca4de..6a8654105 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -36,6 +36,7 @@ go_library( "//pkg/ilist", "//pkg/rand", "//pkg/sleep", + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/hash/jenkins", @@ -80,6 +81,7 @@ go_test( embed = [":stack"], deps = [ "//pkg/sleep", + "//pkg/sync", "//pkg/tcpip", ], ) diff --git a/pkg/tcpip/stack/linkaddrcache.go b/pkg/tcpip/stack/linkaddrcache.go index 267df60d1..403557fd7 100644 --- a/pkg/tcpip/stack/linkaddrcache.go +++ b/pkg/tcpip/stack/linkaddrcache.go @@ -16,10 +16,10 @@ package stack import ( "fmt" - "sync" "time" "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" ) diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go index 9946b8fe8..1baa498d0 100644 --- a/pkg/tcpip/stack/linkaddrcache_test.go +++ b/pkg/tcpip/stack/linkaddrcache_test.go @@ -16,12 +16,12 @@ package stack import ( "fmt" - "sync" "sync/atomic" "testing" "time" "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" ) diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 3810c6602..fe557ccbd 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -16,9 +16,9 @@ package stack import ( "strings" - "sync" "sync/atomic" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 41bf9fd9b..a47ceba54 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -21,13 +21,13 @@ package stack import ( "encoding/binary" - "sync" "sync/atomic" "time" "golang.org/x/time/rate" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go index 67c21be42..f384a91de 100644 --- a/pkg/tcpip/stack/transport_demuxer.go +++ b/pkg/tcpip/stack/transport_demuxer.go @@ -18,8 +18,8 @@ import ( "fmt" "math/rand" "sort" - "sync" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 72b5ce179..4a090ac86 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -35,10 +35,10 @@ import ( "reflect" "strconv" "strings" - "sync" "sync/atomic" "time" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/iptables" "gvisor.dev/gvisor/pkg/waiter" diff --git a/pkg/tcpip/transport/icmp/BUILD b/pkg/tcpip/transport/icmp/BUILD index d8c5b5058..3aa23d529 100644 --- a/pkg/tcpip/transport/icmp/BUILD +++ b/pkg/tcpip/transport/icmp/BUILD @@ -28,6 +28,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/sleep", + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go index c7ce74cdd..330786f4c 100644 --- a/pkg/tcpip/transport/icmp/endpoint.go +++ b/pkg/tcpip/transport/icmp/endpoint.go @@ -15,8 +15,7 @@ package icmp import ( - "sync" - + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/transport/packet/BUILD b/pkg/tcpip/transport/packet/BUILD index 44b58ff6b..4858d150c 100644 --- a/pkg/tcpip/transport/packet/BUILD +++ b/pkg/tcpip/transport/packet/BUILD @@ -28,6 +28,7 @@ go_library( deps = [ "//pkg/log", "//pkg/sleep", + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go index 07ffa8aba..fc5bc69fa 100644 --- a/pkg/tcpip/transport/packet/endpoint.go +++ b/pkg/tcpip/transport/packet/endpoint.go @@ -25,8 +25,7 @@ package packet import ( - "sync" - + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/transport/raw/BUILD b/pkg/tcpip/transport/raw/BUILD index 00991ac8e..2f2131ff7 100644 --- a/pkg/tcpip/transport/raw/BUILD +++ b/pkg/tcpip/transport/raw/BUILD @@ -29,6 +29,7 @@ go_library( deps = [ "//pkg/log", "//pkg/sleep", + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go index 85f7eb76b..ee9c4c58b 100644 --- a/pkg/tcpip/transport/raw/endpoint.go +++ b/pkg/tcpip/transport/raw/endpoint.go @@ -26,8 +26,7 @@ package raw import ( - "sync" - + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 3b353d56c..353bd06f4 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -48,6 +48,7 @@ go_library( "//pkg/log", "//pkg/rand", "//pkg/sleep", + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/hash/jenkins", diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 5422ae80c..1ea996936 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -19,11 +19,11 @@ import ( "encoding/binary" "hash" "io" - "sync" "time" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index cdd69f360..613ec1775 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -16,11 +16,11 @@ package tcp import ( "encoding/binary" - "sync" "time" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 830bc1e3e..cca511fb9 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -19,12 +19,12 @@ import ( "fmt" "math" "strings" - "sync" "sync/atomic" "time" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go index 7aa4c3f0e..4b8d867bc 100644 --- a/pkg/tcpip/transport/tcp/endpoint_state.go +++ b/pkg/tcpip/transport/tcp/endpoint_state.go @@ -16,9 +16,9 @@ package tcp import ( "fmt" - "sync" "time" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/stack" diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index 4983bca81..7eb613be5 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -15,8 +15,7 @@ package tcp import ( - "sync" - + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/seqnum" diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index bc718064c..9a8f64aa6 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -22,9 +22,9 @@ package tcp import ( "strings" - "sync" "time" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/transport/tcp/segment_queue.go b/pkg/tcpip/transport/tcp/segment_queue.go index e0759225e..bd20a7ee9 100644 --- a/pkg/tcpip/transport/tcp/segment_queue.go +++ b/pkg/tcpip/transport/tcp/segment_queue.go @@ -15,7 +15,7 @@ package tcp import ( - "sync" + "gvisor.dev/gvisor/pkg/sync" ) // segmentQueue is a bounded, thread-safe queue of TCP segments. diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 8a947dc66..79f2d274b 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -16,11 +16,11 @@ package tcp import ( "math" - "sync" "sync/atomic" "time" "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tcpip/transport/udp/BUILD b/pkg/tcpip/transport/udp/BUILD index 97e4d5825..57ff123e3 100644 --- a/pkg/tcpip/transport/udp/BUILD +++ b/pkg/tcpip/transport/udp/BUILD @@ -30,6 +30,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/sleep", + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/header", diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 864dc8733..a4ff29a7d 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -15,8 +15,7 @@ package udp import ( - "sync" - + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" diff --git a/pkg/tmutex/BUILD b/pkg/tmutex/BUILD index 6afdb29b7..07778e4f7 100644 --- a/pkg/tmutex/BUILD +++ b/pkg/tmutex/BUILD @@ -15,4 +15,5 @@ go_test( size = "medium", srcs = ["tmutex_test.go"], embed = [":tmutex"], + deps = ["//pkg/sync"], ) diff --git a/pkg/tmutex/tmutex_test.go b/pkg/tmutex/tmutex_test.go index ce34c7962..05540696a 100644 --- a/pkg/tmutex/tmutex_test.go +++ b/pkg/tmutex/tmutex_test.go @@ -17,10 +17,11 @@ package tmutex import ( "fmt" "runtime" - "sync" "sync/atomic" "testing" "time" + + "gvisor.dev/gvisor/pkg/sync" ) func TestBasicLock(t *testing.T) { diff --git a/pkg/unet/BUILD b/pkg/unet/BUILD index 8f6f180e5..d1885ae66 100644 --- a/pkg/unet/BUILD +++ b/pkg/unet/BUILD @@ -24,4 +24,5 @@ go_test( "unet_test.go", ], embed = [":unet"], + deps = ["//pkg/sync"], ) diff --git a/pkg/unet/unet_test.go b/pkg/unet/unet_test.go index a3cc6f5d3..5c4b9e8e9 100644 --- a/pkg/unet/unet_test.go +++ b/pkg/unet/unet_test.go @@ -19,10 +19,11 @@ import ( "os" "path/filepath" "reflect" - "sync" "syscall" "testing" "time" + + "gvisor.dev/gvisor/pkg/sync" ) func randomFilename() (string, error) { diff --git a/pkg/urpc/BUILD b/pkg/urpc/BUILD index b6bbb0ea2..b8fdc3125 100644 --- a/pkg/urpc/BUILD +++ b/pkg/urpc/BUILD @@ -11,6 +11,7 @@ go_library( deps = [ "//pkg/fd", "//pkg/log", + "//pkg/sync", "//pkg/unet", ], ) diff --git a/pkg/urpc/urpc.go b/pkg/urpc/urpc.go index df59ffab1..13b2ea314 100644 --- a/pkg/urpc/urpc.go +++ b/pkg/urpc/urpc.go @@ -27,10 +27,10 @@ import ( "os" "reflect" "runtime" - "sync" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" ) diff --git a/pkg/waiter/BUILD b/pkg/waiter/BUILD index 0427bc41f..1c6890e52 100644 --- a/pkg/waiter/BUILD +++ b/pkg/waiter/BUILD @@ -24,6 +24,7 @@ go_library( ], importpath = "gvisor.dev/gvisor/pkg/waiter", visibility = ["//visibility:public"], + deps = ["//pkg/sync"], ) go_test( diff --git a/pkg/waiter/waiter.go b/pkg/waiter/waiter.go index 8a65ed164..f708e95fa 100644 --- a/pkg/waiter/waiter.go +++ b/pkg/waiter/waiter.go @@ -58,7 +58,7 @@ package waiter import ( - "sync" + "gvisor.dev/gvisor/pkg/sync" ) // EventMask represents io events as used in the poll() syscall. diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 6226b63f8..3e20f8f2f 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -74,6 +74,7 @@ go_library( "//pkg/sentry/usage", "//pkg/sentry/usermem", "//pkg/sentry/watchdog", + "//pkg/sync", "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/link/fdbased", @@ -114,6 +115,7 @@ go_test( "//pkg/sentry/context/contexttest", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", + "//pkg/sync", "//pkg/unet", "//runsc/fsgofer", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index 352e710d2..9c23b9553 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -17,7 +17,6 @@ package boot import ( "fmt" "os" - "sync" "syscall" "github.com/golang/protobuf/proto" @@ -27,6 +26,7 @@ import ( ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" "gvisor.dev/gvisor/pkg/sentry/strace" spb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" + "gvisor.dev/gvisor/pkg/sync" ) func initCompatLogs(fd int) error { diff --git a/runsc/boot/limits.go b/runsc/boot/limits.go index d1c0bb9b5..ce62236e5 100644 --- a/runsc/boot/limits.go +++ b/runsc/boot/limits.go @@ -16,12 +16,12 @@ package boot import ( "fmt" - "sync" "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sync" ) // Mapping from linux resource names to limits.LimitType. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index bc1d0c1bb..fad72f4ab 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -20,7 +20,6 @@ import ( mrand "math/rand" "os" "runtime" - "sync" "sync/atomic" "syscall" gtime "time" @@ -46,6 +45,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 147ff7703..bec0dc292 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -19,7 +19,6 @@ import ( "math/rand" "os" "reflect" - "sync" "syscall" "testing" "time" @@ -30,6 +29,7 @@ import ( "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/runsc/fsgofer" ) diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 250845ad7..b94bc4fa0 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -44,6 +44,7 @@ go_library( "//pkg/sentry/control", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sync", "//pkg/unet", "//pkg/urpc", "//runsc/boot", diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go index a4e3071b3..1815c93b9 100644 --- a/runsc/cmd/create.go +++ b/runsc/cmd/create.go @@ -16,6 +16,7 @@ package cmd import ( "context" + "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/runsc/boot" diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 4831210c0..7df7995f0 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -21,7 +21,6 @@ import ( "os" "path/filepath" "strings" - "sync" "syscall" "flag" @@ -30,6 +29,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/fsgofer" diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go index de2115dff..5e9bc53ab 100644 --- a/runsc/cmd/start.go +++ b/runsc/cmd/start.go @@ -16,6 +16,7 @@ package cmd import ( "context" + "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/runsc/boot" diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 2bd12120d..6dea179e4 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -18,6 +18,7 @@ go_library( deps = [ "//pkg/log", "//pkg/sentry/control", + "//pkg/sync", "//runsc/boot", "//runsc/cgroup", "//runsc/sandbox", @@ -53,6 +54,7 @@ go_test( "//pkg/sentry/control", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sync", "//pkg/unet", "//pkg/urpc", "//runsc/boot", diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 5ed131a7f..060b63bf3 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -20,7 +20,6 @@ import ( "io" "os" "path/filepath" - "sync" "syscall" "testing" "time" @@ -29,6 +28,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/testutil" diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index c10f85992..b54d8f712 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -26,7 +26,6 @@ import ( "reflect" "strconv" "strings" - "sync" "syscall" "testing" "time" @@ -39,6 +38,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" "gvisor.dev/gvisor/runsc/specutils" diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 4ad09ceab..2da93ec5b 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -22,7 +22,6 @@ import ( "path" "path/filepath" "strings" - "sync" "syscall" "testing" "time" @@ -30,6 +29,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/specutils" "gvisor.dev/gvisor/runsc/testutil" diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go index d95151ea5..17a251530 100644 --- a/runsc/container/state_file.go +++ b/runsc/container/state_file.go @@ -20,10 +20,10 @@ import ( "io/ioutil" "os" "path/filepath" - "sync" "github.com/gofrs/flock" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" ) const stateFileExtension = ".state" diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index afcb41801..a9582d92b 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -19,6 +19,7 @@ go_library( "//pkg/fd", "//pkg/log", "//pkg/p9", + "//pkg/sync", "//pkg/syserr", "//runsc/specutils", "@org_golang_x_sys//unix:go_default_library", diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index b59e1a70e..93606d051 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -29,7 +29,6 @@ import ( "path/filepath" "runtime" "strconv" - "sync" "syscall" "golang.org/x/sys/unix" @@ -37,6 +36,7 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/runsc/specutils" ) diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index 8001949d5..ddbc37456 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -19,6 +19,7 @@ go_library( "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/platform", + "//pkg/sync", "//pkg/tcpip/header", "//pkg/tcpip/stack", "//pkg/urpc", diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index ce1452b87..ec72bdbfd 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -22,7 +22,6 @@ import ( "os" "os/exec" "strconv" - "sync" "syscall" "time" @@ -34,6 +33,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD index c96ca2eb6..3c3027cb5 100644 --- a/runsc/testutil/BUILD +++ b/runsc/testutil/BUILD @@ -10,6 +10,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/log", + "//pkg/sync", "//runsc/boot", "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go index 9632776d2..fb22eae39 100644 --- a/runsc/testutil/testutil.go +++ b/runsc/testutil/testutil.go @@ -34,7 +34,6 @@ import ( "path/filepath" "strconv" "strings" - "sync" "sync/atomic" "syscall" "time" @@ -42,6 +41,7 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/specutils" ) -- cgit v1.2.3 From 7b7c31820b83abcfe43f7170eff1f7953f3f27e2 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 16 Jan 2020 12:28:44 -0800 Subject: Add remaining /proc/* and /proc/sys/* files Except for one under /proc/sys/net/ipv4/tcp_sack. /proc/pid/* is still incomplete. Updates #1195 PiperOrigin-RevId: 290120438 --- pkg/cpuid/cpuid.go | 44 ++-- pkg/sentry/fs/proc/cpuinfo.go | 8 +- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 40 ++++ pkg/sentry/fsimpl/proc/BUILD | 11 +- pkg/sentry/fsimpl/proc/filesystem.go | 11 + pkg/sentry/fsimpl/proc/loadavg.go | 42 ---- pkg/sentry/fsimpl/proc/meminfo.go | 79 ------- pkg/sentry/fsimpl/proc/net.go | 338 ---------------------------- pkg/sentry/fsimpl/proc/net_test.go | 78 ------- pkg/sentry/fsimpl/proc/stat.go | 129 ----------- pkg/sentry/fsimpl/proc/sys.go | 51 ----- pkg/sentry/fsimpl/proc/task.go | 12 +- pkg/sentry/fsimpl/proc/tasks.go | 41 +++- pkg/sentry/fsimpl/proc/tasks_files.go | 245 ++++++++++++++++++++ pkg/sentry/fsimpl/proc/tasks_net.go | 337 +++++++++++++++++++++++++++ pkg/sentry/fsimpl/proc/tasks_sys.go | 143 ++++++++++++ pkg/sentry/fsimpl/proc/tasks_sys_test.go | 78 +++++++ pkg/sentry/fsimpl/proc/tasks_test.go | 3 + pkg/sentry/fsimpl/proc/version.go | 70 ------ 19 files changed, 922 insertions(+), 838 deletions(-) delete mode 100644 pkg/sentry/fsimpl/proc/loadavg.go delete mode 100644 pkg/sentry/fsimpl/proc/meminfo.go delete mode 100644 pkg/sentry/fsimpl/proc/net.go delete mode 100644 pkg/sentry/fsimpl/proc/net_test.go delete mode 100644 pkg/sentry/fsimpl/proc/stat.go delete mode 100644 pkg/sentry/fsimpl/proc/sys.go create mode 100644 pkg/sentry/fsimpl/proc/tasks_net.go create mode 100644 pkg/sentry/fsimpl/proc/tasks_sys.go create mode 100644 pkg/sentry/fsimpl/proc/tasks_sys_test.go delete mode 100644 pkg/sentry/fsimpl/proc/version.go (limited to 'pkg/sentry/fs') diff --git a/pkg/cpuid/cpuid.go b/pkg/cpuid/cpuid.go index d37047368..cf50ee53f 100644 --- a/pkg/cpuid/cpuid.go +++ b/pkg/cpuid/cpuid.go @@ -657,30 +657,28 @@ func (fs *FeatureSet) FlagsString(cpuinfoOnly bool) string { return strings.Join(s, " ") } -// CPUInfo is to generate a section of one cpu in /proc/cpuinfo. This is a -// minimal /proc/cpuinfo, it is missing some fields like "microcode" that are +// WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is +// a minimal /proc/cpuinfo, it is missing some fields like "microcode" that are // not always printed in Linux. The bogomips field is simply made up. -func (fs FeatureSet) CPUInfo(cpu uint) string { - var b bytes.Buffer - fmt.Fprintf(&b, "processor\t: %d\n", cpu) - fmt.Fprintf(&b, "vendor_id\t: %s\n", fs.VendorID) - fmt.Fprintf(&b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family) - fmt.Fprintf(&b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model) - fmt.Fprintf(&b, "model name\t: %s\n", "unknown") // Unknown for now. - fmt.Fprintf(&b, "stepping\t: %s\n", "unknown") // Unknown for now. - fmt.Fprintf(&b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz) - fmt.Fprintln(&b, "fpu\t\t: yes") - fmt.Fprintln(&b, "fpu_exception\t: yes") - fmt.Fprintf(&b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID. - fmt.Fprintln(&b, "wp\t\t: yes") - fmt.Fprintf(&b, "flags\t\t: %s\n", fs.FlagsString(true)) - fmt.Fprintf(&b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway. - fmt.Fprintf(&b, "clflush size\t: %d\n", fs.CacheLine) - fmt.Fprintf(&b, "cache_alignment\t: %d\n", fs.CacheLine) - fmt.Fprintf(&b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48) - fmt.Fprintln(&b, "power management:") // This is always here, but can be blank. - fmt.Fprintln(&b, "") // The /proc/cpuinfo file ends with an extra newline. - return b.String() +func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) { + fmt.Fprintf(b, "processor\t: %d\n", cpu) + fmt.Fprintf(b, "vendor_id\t: %s\n", fs.VendorID) + fmt.Fprintf(b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family) + fmt.Fprintf(b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model) + fmt.Fprintf(b, "model name\t: %s\n", "unknown") // Unknown for now. + fmt.Fprintf(b, "stepping\t: %s\n", "unknown") // Unknown for now. + fmt.Fprintf(b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz) + fmt.Fprintln(b, "fpu\t\t: yes") + fmt.Fprintln(b, "fpu_exception\t: yes") + fmt.Fprintf(b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID. + fmt.Fprintln(b, "wp\t\t: yes") + fmt.Fprintf(b, "flags\t\t: %s\n", fs.FlagsString(true)) + fmt.Fprintf(b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway. + fmt.Fprintf(b, "clflush size\t: %d\n", fs.CacheLine) + fmt.Fprintf(b, "cache_alignment\t: %d\n", fs.CacheLine) + fmt.Fprintf(b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48) + fmt.Fprintln(b, "power management:") // This is always here, but can be blank. + fmt.Fprintln(b, "") // The /proc/cpuinfo file ends with an extra newline. } const ( diff --git a/pkg/sentry/fs/proc/cpuinfo.go b/pkg/sentry/fs/proc/cpuinfo.go index 3edf36780..6330337eb 100644 --- a/pkg/sentry/fs/proc/cpuinfo.go +++ b/pkg/sentry/fs/proc/cpuinfo.go @@ -15,6 +15,8 @@ package proc import ( + "bytes" + "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -27,9 +29,9 @@ func newCPUInfo(ctx context.Context, msrc *fs.MountSource) *fs.Inode { // Kernel is always initialized with a FeatureSet. panic("cpuinfo read with nil FeatureSet") } - contents := make([]byte, 0, 1024) + var buf bytes.Buffer for i, max := uint(0), k.ApplicationCores(); i < max; i++ { - contents = append(contents, []byte(features.CPUInfo(i))...) + features.WriteCPUInfoTo(i, &buf) } - return newStaticProcInode(ctx, msrc, contents) + return newStaticProcInode(ctx, msrc, buf.Bytes()) } diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 1d469a0db..6aff3d39a 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -510,3 +510,43 @@ type InodeSymlink struct { func (InodeSymlink) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { return nil, syserror.ELOOP } + +// StaticDirectory is a standard implementation of a directory with static +// contents. +// +// +stateify savable +type StaticDirectory struct { + InodeNotSymlink + InodeDirectoryNoNewChildren + InodeAttrs + InodeNoDynamicLookup + OrderedChildren +} + +var _ Inode = (*StaticDirectory)(nil) + +// NewStaticDir creates a new static directory and returns its dentry. +func NewStaticDir(creds *auth.Credentials, ino uint64, perm linux.FileMode, children map[string]*Dentry) *Dentry { + if perm&^linux.PermissionsMask != 0 { + panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask)) + } + + inode := &StaticDirectory{} + inode.InodeAttrs.Init(creds, ino, linux.ModeDirectory|perm) + + dentry := &Dentry{} + dentry.Init(inode) + + inode.OrderedChildren.Init(OrderedChildrenOptions{}) + links := inode.OrderedChildren.Populate(dentry, children) + inode.IncLinks(links) + + return dentry +} + +// Open implements kernfs.Inode. +func (s *StaticDirectory) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { + fd := &GenericDirectoryFD{} + fd.Init(rp.Mount(), vfsd, &s.OrderedChildren, flags) + return fd.VFSFileDescription(), nil +} diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 1f44b3217..6cd18cec8 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -7,17 +7,13 @@ go_library( name = "proc", srcs = [ "filesystem.go", - "loadavg.go", - "meminfo.go", "mounts.go", - "net.go", - "stat.go", - "sys.go", "task.go", "task_files.go", "tasks.go", "tasks_files.go", - "version.go", + "tasks_net.go", + "tasks_sys.go", ], importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc", deps = [ @@ -30,6 +26,7 @@ go_library( "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/time", "//pkg/sentry/limits", "//pkg/sentry/mm", "//pkg/sentry/socket", @@ -47,7 +44,7 @@ go_test( size = "small", srcs = [ "boot_test.go", - "net_test.go", + "tasks_sys_test.go", "tasks_test.go", ], embed = [":proc"], diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index d09182c77..e9cb7895f 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -67,3 +67,14 @@ func newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode d d.Init(inode) return d } + +type staticFile struct { + kernfs.DynamicBytesFile + vfs.StaticData +} + +var _ dynamicInode = (*staticFile)(nil) + +func newStaticFile(data string) *staticFile { + return &staticFile{StaticData: vfs.StaticData{Data: data}} +} diff --git a/pkg/sentry/fsimpl/proc/loadavg.go b/pkg/sentry/fsimpl/proc/loadavg.go deleted file mode 100644 index 5351d86e8..000000000 --- a/pkg/sentry/fsimpl/proc/loadavg.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" -) - -// loadavgData backs /proc/loadavg. -// -// +stateify savable -type loadavgData struct { - kernfs.DynamicBytesFile -} - -var _ dynamicInode = (*loadavgData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error { - // TODO(b/62345059): Include real data in fields. - // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods. - // Column 4-5: currently running processes and the total number of processes. - // Column 6: the last process ID used. - fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0) - return nil -} diff --git a/pkg/sentry/fsimpl/proc/meminfo.go b/pkg/sentry/fsimpl/proc/meminfo.go deleted file mode 100644 index cbdd4f3fc..000000000 --- a/pkg/sentry/fsimpl/proc/meminfo.go +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo. -// -// +stateify savable -type meminfoData struct { - kernfs.DynamicBytesFile - - // k is the owning Kernel. - k *kernel.Kernel -} - -var _ dynamicInode = (*meminfoData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { - mf := d.k.MemoryFile() - mf.UpdateUsage() - snapshot, totalUsage := usage.MemoryAccounting.Copy() - totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage) - anon := snapshot.Anonymous + snapshot.Tmpfs - file := snapshot.PageCache + snapshot.Mapped - // We don't actually have active/inactive LRUs, so just make up numbers. - activeFile := (file / 2) &^ (usermem.PageSize - 1) - inactiveFile := file - activeFile - - fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024) - memFree := (totalSize - totalUsage) / 1024 - // We use MemFree as MemAvailable because we don't swap. - // TODO(rahat): When reclaim is implemented the value of MemAvailable - // should change. - fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree) - fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree) - fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices - fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) - // Emulate a system with no swap, which disables inactivation of anon pages. - fmt.Fprintf(buf, "SwapCache: 0 kB\n") - fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024) - fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024) - fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024) - fmt.Fprintf(buf, "Inactive(anon): 0 kB\n") - fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024) - fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024) - fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263) - fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263) - fmt.Fprintf(buf, "SwapTotal: 0 kB\n") - fmt.Fprintf(buf, "SwapFree: 0 kB\n") - fmt.Fprintf(buf, "Dirty: 0 kB\n") - fmt.Fprintf(buf, "Writeback: 0 kB\n") - fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024) - fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know - fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024) - return nil -} diff --git a/pkg/sentry/fsimpl/proc/net.go b/pkg/sentry/fsimpl/proc/net.go deleted file mode 100644 index fd46eebf8..000000000 --- a/pkg/sentry/fsimpl/proc/net.go +++ /dev/null @@ -1,338 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/socket/unix" - "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6. -// -// +stateify savable -type ifinet6 struct { - s inet.Stack -} - -var _ vfs.DynamicBytesSource = (*ifinet6)(nil) - -func (n *ifinet6) contents() []string { - var lines []string - nics := n.s.Interfaces() - for id, naddrs := range n.s.InterfaceAddrs() { - nic, ok := nics[id] - if !ok { - // NIC was added after NICNames was called. We'll just - // ignore it. - continue - } - - for _, a := range naddrs { - // IPv6 only. - if a.Family != linux.AF_INET6 { - continue - } - - // Fields: - // IPv6 address displayed in 32 hexadecimal chars without colons - // Netlink device number (interface index) in hexadecimal (use nic id) - // Prefix length in hexadecimal - // Scope value (use 0) - // Interface flags - // Device name - lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name)) - } - } - return lines -} - -// Generate implements vfs.DynamicBytesSource.Generate. -func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error { - for _, l := range n.contents() { - buf.WriteString(l) - } - return nil -} - -// netDev implements vfs.DynamicBytesSource for /proc/net/dev. -// -// +stateify savable -type netDev struct { - s inet.Stack -} - -var _ vfs.DynamicBytesSource = (*netDev)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (n *netDev) Generate(ctx context.Context, buf *bytes.Buffer) error { - interfaces := n.s.Interfaces() - buf.WriteString("Inter-| Receive | Transmit\n") - buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n") - - for _, i := range interfaces { - // Implements the same format as - // net/core/net-procfs.c:dev_seq_printf_stats. - var stats inet.StatDev - if err := n.s.Statistics(&stats, i.Name); err != nil { - log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err) - continue - } - fmt.Fprintf( - buf, - "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n", - i.Name, - // Received - stats[0], // bytes - stats[1], // packets - stats[2], // errors - stats[3], // dropped - stats[4], // fifo - stats[5], // frame - stats[6], // compressed - stats[7], // multicast - // Transmitted - stats[8], // bytes - stats[9], // packets - stats[10], // errors - stats[11], // dropped - stats[12], // fifo - stats[13], // frame - stats[14], // compressed - stats[15], // multicast - ) - } - - return nil -} - -// netUnix implements vfs.DynamicBytesSource for /proc/net/unix. -// -// +stateify savable -type netUnix struct { - k *kernel.Kernel -} - -var _ vfs.DynamicBytesSource = (*netUnix)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (n *netUnix) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n") - for _, se := range n.k.ListSockets() { - s := se.Sock.Get() - if s == nil { - log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock) - continue - } - sfile := s.(*fs.File) - if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { - s.DecRef() - // Not a unix socket. - continue - } - sops := sfile.FileOperations.(*unix.SocketOperations) - - addr, err := sops.Endpoint().GetLocalAddress() - if err != nil { - log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err) - addr.Addr = "" - } - - sockFlags := 0 - if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok { - if ce.Listening() { - // For unix domain sockets, linux reports a single flag - // value if the socket is listening, of __SO_ACCEPTCON. - sockFlags = linux.SO_ACCEPTCON - } - } - - // In the socket entry below, the value for the 'Num' field requires - // some consideration. Linux prints the address to the struct - // unix_sock representing a socket in the kernel, but may redact the - // value for unprivileged users depending on the kptr_restrict - // sysctl. - // - // One use for this field is to allow a privileged user to - // introspect into the kernel memory to determine information about - // a socket not available through procfs, such as the socket's peer. - // - // In gvisor, returning a pointer to our internal structures would - // be pointless, as it wouldn't match the memory layout for struct - // unix_sock, making introspection difficult. We could populate a - // struct unix_sock with the appropriate data, but even that - // requires consideration for which kernel version to emulate, as - // the definition of this struct changes over time. - // - // For now, we always redact this pointer. - fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d", - (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. - sfile.ReadRefs()-1, // RefCount, don't count our own ref. - 0, // Protocol, always 0 for UDS. - sockFlags, // Flags. - sops.Endpoint().Type(), // Type. - sops.State(), // State. - sfile.InodeID(), // Inode. - ) - - // Path - if len(addr.Addr) != 0 { - if addr.Addr[0] == 0 { - // Abstract path. - fmt.Fprintf(buf, " @%s", string(addr.Addr[1:])) - } else { - fmt.Fprintf(buf, " %s", string(addr.Addr)) - } - } - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - return nil -} - -// netTCP implements vfs.DynamicBytesSource for /proc/net/tcp. -// -// +stateify savable -type netTCP struct { - k *kernel.Kernel -} - -var _ vfs.DynamicBytesSource = (*netTCP)(nil) - -func (n *netTCP) Generate(ctx context.Context, buf *bytes.Buffer) error { - t := kernel.TaskFromContext(ctx) - buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n") - for _, se := range n.k.ListSockets() { - s := se.Sock.Get() - if s == nil { - log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock) - continue - } - sfile := s.(*fs.File) - sops, ok := sfile.FileOperations.(socket.Socket) - if !ok { - panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) - } - if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) { - s.DecRef() - // Not tcp4 sockets. - continue - } - - // Linux's documentation for the fields below can be found at - // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. - // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). - // Note that the header doesn't contain labels for all the fields. - - // Field: sl; entry number. - fmt.Fprintf(buf, "%4d: ", se.ID) - - portBuf := make([]byte, 2) - - // Field: local_adddress. - var localAddr linux.SockAddrInet - if local, _, err := sops.GetSockName(t); err == nil { - localAddr = *local.(*linux.SockAddrInet) - } - binary.LittleEndian.PutUint16(portBuf, localAddr.Port) - fmt.Fprintf(buf, "%08X:%04X ", - binary.LittleEndian.Uint32(localAddr.Addr[:]), - portBuf) - - // Field: rem_address. - var remoteAddr linux.SockAddrInet - if remote, _, err := sops.GetPeerName(t); err == nil { - remoteAddr = *remote.(*linux.SockAddrInet) - } - binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port) - fmt.Fprintf(buf, "%08X:%04X ", - binary.LittleEndian.Uint32(remoteAddr.Addr[:]), - portBuf) - - // Field: state; socket state. - fmt.Fprintf(buf, "%02X ", sops.State()) - - // Field: tx_queue, rx_queue; number of packets in the transmit and - // receive queue. Unimplemented. - fmt.Fprintf(buf, "%08X:%08X ", 0, 0) - - // Field: tr, tm->when; timer active state and number of jiffies - // until timer expires. Unimplemented. - fmt.Fprintf(buf, "%02X:%08X ", 0, 0) - - // Field: retrnsmt; number of unrecovered RTO timeouts. - // Unimplemented. - fmt.Fprintf(buf, "%08X ", 0) - - // Field: uid. - uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) - if err != nil { - log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) - fmt.Fprintf(buf, "%5d ", 0) - } else { - fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow())) - } - - // Field: timeout; number of unanswered 0-window probes. - // Unimplemented. - fmt.Fprintf(buf, "%8d ", 0) - - // Field: inode. - fmt.Fprintf(buf, "%8d ", sfile.InodeID()) - - // Field: refcount. Don't count the ref we obtain while deferencing - // the weakref to this socket. - fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) - - // Field: Socket struct address. Redacted due to the same reason as - // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. - fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) - - // Field: retransmit timeout. Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: predicted tick of soft clock (delayed ACK control data). - // Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: sending congestion window, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. - // Unimplemented, report as large threshold. - fmt.Fprintf(buf, "%d", -1) - - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - - return nil -} diff --git a/pkg/sentry/fsimpl/proc/net_test.go b/pkg/sentry/fsimpl/proc/net_test.go deleted file mode 100644 index 20a77a8ca..000000000 --- a/pkg/sentry/fsimpl/proc/net_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/inet" -) - -func newIPv6TestStack() *inet.TestStack { - s := inet.NewTestStack() - s.SupportsIPv6Flag = true - return s -} - -func TestIfinet6NoAddresses(t *testing.T) { - n := &ifinet6{s: newIPv6TestStack()} - var buf bytes.Buffer - n.Generate(contexttest.Context(t), &buf) - if buf.Len() > 0 { - t.Errorf("n.Generate() generated = %v, want = %v", buf.Bytes(), []byte{}) - } -} - -func TestIfinet6(t *testing.T) { - s := newIPv6TestStack() - s.InterfacesMap[1] = inet.Interface{Name: "eth0"} - s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - }, - } - s.InterfacesMap[2] = inet.Interface{Name: "eth1"} - s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"), - }, - } - want := map[string]struct{}{ - "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {}, - "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {}, - } - - n := &ifinet6{s: s} - contents := n.contents() - if len(contents) != len(want) { - t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want)) - } - got := map[string]struct{}{} - for _, l := range contents { - got[l] = struct{}{} - } - - if !reflect.DeepEqual(got, want) { - t.Errorf("Got n.contents() = %v, want = %v", got, want) - } -} diff --git a/pkg/sentry/fsimpl/proc/stat.go b/pkg/sentry/fsimpl/proc/stat.go deleted file mode 100644 index 50894a534..000000000 --- a/pkg/sentry/fsimpl/proc/stat.go +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" -) - -// cpuStats contains the breakdown of CPU time for /proc/stat. -type cpuStats struct { - // user is time spent in userspace tasks with non-positive niceness. - user uint64 - - // nice is time spent in userspace tasks with positive niceness. - nice uint64 - - // system is time spent in non-interrupt kernel context. - system uint64 - - // idle is time spent idle. - idle uint64 - - // ioWait is time spent waiting for IO. - ioWait uint64 - - // irq is time spent in interrupt context. - irq uint64 - - // softirq is time spent in software interrupt context. - softirq uint64 - - // steal is involuntary wait time. - steal uint64 - - // guest is time spent in guests with non-positive niceness. - guest uint64 - - // guestNice is time spent in guests with positive niceness. - guestNice uint64 -} - -// String implements fmt.Stringer. -func (c cpuStats) String() string { - return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice) -} - -// statData implements vfs.DynamicBytesSource for /proc/stat. -// -// +stateify savable -type statData struct { - kernfs.DynamicBytesFile - - // k is the owning Kernel. - k *kernel.Kernel -} - -var _ dynamicInode = (*statData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (s *statData) Generate(ctx context.Context, buf *bytes.Buffer) error { - // TODO(b/37226836): We currently export only zero CPU stats. We could - // at least provide some aggregate stats. - var cpu cpuStats - fmt.Fprintf(buf, "cpu %s\n", cpu) - - for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ { - fmt.Fprintf(buf, "cpu%d %s\n", c, cpu) - } - - // The total number of interrupts is dependent on the CPUs and PCI - // devices on the system. See arch_probe_nr_irqs. - // - // Since we don't report real interrupt stats, just choose an arbitrary - // value from a representative VM. - const numInterrupts = 256 - - // The Kernel doesn't handle real interrupts, so report all zeroes. - // TODO(b/37226836): We could count page faults as #PF. - fmt.Fprintf(buf, "intr 0") // total - for i := 0; i < numInterrupts; i++ { - fmt.Fprintf(buf, " 0") - } - fmt.Fprintf(buf, "\n") - - // Total number of context switches. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "ctxt 0\n") - - // CLOCK_REALTIME timestamp from boot, in seconds. - fmt.Fprintf(buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds()) - - // Total number of clones. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "processes 0\n") - - // Number of runnable tasks. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "procs_running 0\n") - - // Number of tasks waiting on IO. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "procs_blocked 0\n") - - // Number of each softirq handled. - fmt.Fprintf(buf, "softirq 0") // total - for i := 0; i < linux.NumSoftIRQ; i++ { - fmt.Fprintf(buf, " 0") - } - fmt.Fprintf(buf, "\n") - return nil -} diff --git a/pkg/sentry/fsimpl/proc/sys.go b/pkg/sentry/fsimpl/proc/sys.go deleted file mode 100644 index b88256e12..000000000 --- a/pkg/sentry/fsimpl/proc/sys.go +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// mmapMinAddrData implements vfs.DynamicBytesSource for -// /proc/sys/vm/mmap_min_addr. -// -// +stateify savable -type mmapMinAddrData struct { - k *kernel.Kernel -} - -var _ vfs.DynamicBytesSource = (*mmapMinAddrData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress()) - return nil -} - -// +stateify savable -type overcommitMemory struct{} - -var _ vfs.DynamicBytesSource = (*overcommitMemory)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *overcommitMemory) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "0\n") - return nil -} diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 11a64c777..5a384817f 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -50,15 +50,15 @@ func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNames //"fd": newFdDir(t, msrc), //"fdinfo": newFdInfoDir(t, msrc), //"gid_map": newGIDMap(t, msrc), - "io": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, newIO(task, isThreadGroup)), - "maps": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &mapsData{task: task}), + "io": newTaskOwnedFile(task, inoGen.NextIno(), 0400, newIO(task, isThreadGroup)), + "maps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mapsData{task: task}), //"mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), //"mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), //"ns": newNamespaceDir(t, msrc), - "smaps": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &smapsData{task: task}), - "stat": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &taskStatData{t: task, pidns: pidns, tgstats: isThreadGroup}), - "statm": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &statmData{t: task}), - "status": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &statusData{t: task, pidns: pidns}), + "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}), + "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{t: task, pidns: pidns, tgstats: isThreadGroup}), + "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{t: task}), + "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{t: task, pidns: pidns}), //"uid_map": newUIDMap(t, msrc), } if isThreadGroup { diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index d8f92d52f..72315d25c 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -15,6 +15,7 @@ package proc import ( + "bytes" "sort" "strconv" @@ -28,9 +29,8 @@ import ( ) const ( - defaultPermission = 0444 - selfName = "self" - threadSelfName = "thread-self" + selfName = "self" + threadSelfName = "thread-self" ) // InoGenerator generates unique inode numbers for a given filesystem. @@ -61,15 +61,15 @@ var _ kernfs.Inode = (*tasksInode)(nil) func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace) (*tasksInode, *kernfs.Dentry) { root := auth.NewRootCredentials(pidns.UserNamespace()) contents := map[string]*kernfs.Dentry{ - //"cpuinfo": newCPUInfo(ctx, msrc), - //"filesystems": seqfile.NewSeqFileInode(ctx, &filesystemsData{}, msrc), - "loadavg": newDentry(root, inoGen.NextIno(), defaultPermission, &loadavgData{}), - "meminfo": newDentry(root, inoGen.NextIno(), defaultPermission, &meminfoData{k: k}), - "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), defaultPermission, "self/mounts"), - "stat": newDentry(root, inoGen.NextIno(), defaultPermission, &statData{k: k}), - //"uptime": newUptime(ctx, msrc), - //"version": newVersionData(root, inoGen.NextIno(), k), - "version": newDentry(root, inoGen.NextIno(), defaultPermission, &versionData{k: k}), + "cpuinfo": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(cpuInfoData(k))), + //"filesystems": newDentry(root, inoGen.NextIno(), 0444, &filesystemsData{}), + "loadavg": newDentry(root, inoGen.NextIno(), 0444, &loadavgData{}), + "sys": newSysDir(root, inoGen), + "meminfo": newDentry(root, inoGen.NextIno(), 0444, &meminfoData{}), + "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), 0777, "self/mounts"), + "stat": newDentry(root, inoGen.NextIno(), 0444, &statData{}), + "uptime": newDentry(root, inoGen.NextIno(), 0444, &uptimeData{}), + "version": newDentry(root, inoGen.NextIno(), 0444, &versionData{}), } inode := &tasksInode{ @@ -216,3 +216,20 @@ func (i *tasksInode) Stat(vsfs *vfs.Filesystem) linux.Statx { return stat } + +func cpuInfoData(k *kernel.Kernel) string { + features := k.FeatureSet() + if features == nil { + // Kernel is always initialized with a FeatureSet. + panic("cpuinfo read with nil FeatureSet") + } + var buf bytes.Buffer + for i, max := uint(0), k.ApplicationCores(); i < max; i++ { + features.WriteCPUInfoTo(i, &buf) + } + return buf.String() +} + +func shmData(v uint64) dynamicInode { + return newStaticFile(strconv.FormatUint(v, 10)) +} diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index 91f30a798..ad3760e39 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -15,6 +15,7 @@ package proc import ( + "bytes" "fmt" "strconv" @@ -23,6 +24,9 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" ) @@ -90,3 +94,244 @@ func (s *threadSelfSymlink) Readlink(ctx context.Context) (string, error) { } return fmt.Sprintf("%d/task/%d", tgid, tid), nil } + +// cpuStats contains the breakdown of CPU time for /proc/stat. +type cpuStats struct { + // user is time spent in userspace tasks with non-positive niceness. + user uint64 + + // nice is time spent in userspace tasks with positive niceness. + nice uint64 + + // system is time spent in non-interrupt kernel context. + system uint64 + + // idle is time spent idle. + idle uint64 + + // ioWait is time spent waiting for IO. + ioWait uint64 + + // irq is time spent in interrupt context. + irq uint64 + + // softirq is time spent in software interrupt context. + softirq uint64 + + // steal is involuntary wait time. + steal uint64 + + // guest is time spent in guests with non-positive niceness. + guest uint64 + + // guestNice is time spent in guests with positive niceness. + guestNice uint64 +} + +// String implements fmt.Stringer. +func (c cpuStats) String() string { + return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice) +} + +// statData implements vfs.DynamicBytesSource for /proc/stat. +// +// +stateify savable +type statData struct { + kernfs.DynamicBytesFile + + // k is the owning Kernel. + k *kernel.Kernel +} + +var _ dynamicInode = (*statData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (s *statData) Generate(ctx context.Context, buf *bytes.Buffer) error { + // TODO(b/37226836): We currently export only zero CPU stats. We could + // at least provide some aggregate stats. + var cpu cpuStats + fmt.Fprintf(buf, "cpu %s\n", cpu) + + for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ { + fmt.Fprintf(buf, "cpu%d %s\n", c, cpu) + } + + // The total number of interrupts is dependent on the CPUs and PCI + // devices on the system. See arch_probe_nr_irqs. + // + // Since we don't report real interrupt stats, just choose an arbitrary + // value from a representative VM. + const numInterrupts = 256 + + // The Kernel doesn't handle real interrupts, so report all zeroes. + // TODO(b/37226836): We could count page faults as #PF. + fmt.Fprintf(buf, "intr 0") // total + for i := 0; i < numInterrupts; i++ { + fmt.Fprintf(buf, " 0") + } + fmt.Fprintf(buf, "\n") + + // Total number of context switches. + // TODO(b/37226836): Count this. + fmt.Fprintf(buf, "ctxt 0\n") + + // CLOCK_REALTIME timestamp from boot, in seconds. + fmt.Fprintf(buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds()) + + // Total number of clones. + // TODO(b/37226836): Count this. + fmt.Fprintf(buf, "processes 0\n") + + // Number of runnable tasks. + // TODO(b/37226836): Count this. + fmt.Fprintf(buf, "procs_running 0\n") + + // Number of tasks waiting on IO. + // TODO(b/37226836): Count this. + fmt.Fprintf(buf, "procs_blocked 0\n") + + // Number of each softirq handled. + fmt.Fprintf(buf, "softirq 0") // total + for i := 0; i < linux.NumSoftIRQ; i++ { + fmt.Fprintf(buf, " 0") + } + fmt.Fprintf(buf, "\n") + return nil +} + +// loadavgData backs /proc/loadavg. +// +// +stateify savable +type loadavgData struct { + kernfs.DynamicBytesFile +} + +var _ dynamicInode = (*loadavgData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error { + // TODO(b/62345059): Include real data in fields. + // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods. + // Column 4-5: currently running processes and the total number of processes. + // Column 6: the last process ID used. + fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0) + return nil +} + +// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo. +// +// +stateify savable +type meminfoData struct { + kernfs.DynamicBytesFile + + // k is the owning Kernel. + k *kernel.Kernel +} + +var _ dynamicInode = (*meminfoData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { + mf := d.k.MemoryFile() + mf.UpdateUsage() + snapshot, totalUsage := usage.MemoryAccounting.Copy() + totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage) + anon := snapshot.Anonymous + snapshot.Tmpfs + file := snapshot.PageCache + snapshot.Mapped + // We don't actually have active/inactive LRUs, so just make up numbers. + activeFile := (file / 2) &^ (usermem.PageSize - 1) + inactiveFile := file - activeFile + + fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024) + memFree := (totalSize - totalUsage) / 1024 + // We use MemFree as MemAvailable because we don't swap. + // TODO(rahat): When reclaim is implemented the value of MemAvailable + // should change. + fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree) + fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree) + fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices + fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) + // Emulate a system with no swap, which disables inactivation of anon pages. + fmt.Fprintf(buf, "SwapCache: 0 kB\n") + fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024) + fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024) + fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024) + fmt.Fprintf(buf, "Inactive(anon): 0 kB\n") + fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024) + fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024) + fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263) + fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263) + fmt.Fprintf(buf, "SwapTotal: 0 kB\n") + fmt.Fprintf(buf, "SwapFree: 0 kB\n") + fmt.Fprintf(buf, "Dirty: 0 kB\n") + fmt.Fprintf(buf, "Writeback: 0 kB\n") + fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024) + fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know + fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024) + return nil +} + +// uptimeData implements vfs.DynamicBytesSource for /proc/uptime. +// +// +stateify savable +type uptimeData struct { + kernfs.DynamicBytesFile +} + +var _ dynamicInode = (*uptimeData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (*uptimeData) Generate(ctx context.Context, buf *bytes.Buffer) error { + k := kernel.KernelFromContext(ctx) + now := time.NowFromContext(ctx) + + // Pretend that we've spent zero time sleeping (second number). + fmt.Fprintf(buf, "%.2f 0.00\n", now.Sub(k.Timekeeper().BootTime()).Seconds()) + return nil +} + +// versionData implements vfs.DynamicBytesSource for /proc/version. +// +// +stateify savable +type versionData struct { + kernfs.DynamicBytesFile + + // k is the owning Kernel. + k *kernel.Kernel +} + +var _ dynamicInode = (*versionData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (v *versionData) Generate(ctx context.Context, buf *bytes.Buffer) error { + init := v.k.GlobalInit() + if init == nil { + // Attempted to read before the init Task is created. This can + // only occur during startup, which should never need to read + // this file. + panic("Attempted to read version before initial Task is available") + } + + // /proc/version takes the form: + // + // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST) + // (COMPILER_VERSION) VERSION" + // + // where: + // - SYSNAME, RELEASE, and VERSION are the same as returned by + // sys_utsname + // - COMPILE_USER is the user that build the kernel + // - COMPILE_HOST is the hostname of the machine on which the kernel + // was built + // - COMPILER_VERSION is the version reported by the building compiler + // + // Since we don't really want to expose build information to + // applications, those fields are omitted. + // + // FIXME(mpratt): Using Version from the init task SyscallTable + // disregards the different version a task may have (e.g., in a uts + // namespace). + ver := init.Leader().SyscallTable().Version + fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version) + return nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks_net.go b/pkg/sentry/fsimpl/proc/tasks_net.go new file mode 100644 index 000000000..06dc43c26 --- /dev/null +++ b/pkg/sentry/fsimpl/proc/tasks_net.go @@ -0,0 +1,337 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6. +// +// +stateify savable +type ifinet6 struct { + s inet.Stack +} + +var _ vfs.DynamicBytesSource = (*ifinet6)(nil) + +func (n *ifinet6) contents() []string { + var lines []string + nics := n.s.Interfaces() + for id, naddrs := range n.s.InterfaceAddrs() { + nic, ok := nics[id] + if !ok { + // NIC was added after NICNames was called. We'll just ignore it. + continue + } + + for _, a := range naddrs { + // IPv6 only. + if a.Family != linux.AF_INET6 { + continue + } + + // Fields: + // IPv6 address displayed in 32 hexadecimal chars without colons + // Netlink device number (interface index) in hexadecimal (use nic id) + // Prefix length in hexadecimal + // Scope value (use 0) + // Interface flags + // Device name + lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name)) + } + } + return lines +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error { + for _, l := range n.contents() { + buf.WriteString(l) + } + return nil +} + +// netDev implements vfs.DynamicBytesSource for /proc/net/dev. +// +// +stateify savable +type netDev struct { + s inet.Stack +} + +var _ vfs.DynamicBytesSource = (*netDev)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *netDev) Generate(ctx context.Context, buf *bytes.Buffer) error { + interfaces := n.s.Interfaces() + buf.WriteString("Inter-| Receive | Transmit\n") + buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n") + + for _, i := range interfaces { + // Implements the same format as + // net/core/net-procfs.c:dev_seq_printf_stats. + var stats inet.StatDev + if err := n.s.Statistics(&stats, i.Name); err != nil { + log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err) + continue + } + fmt.Fprintf( + buf, + "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n", + i.Name, + // Received + stats[0], // bytes + stats[1], // packets + stats[2], // errors + stats[3], // dropped + stats[4], // fifo + stats[5], // frame + stats[6], // compressed + stats[7], // multicast + // Transmitted + stats[8], // bytes + stats[9], // packets + stats[10], // errors + stats[11], // dropped + stats[12], // fifo + stats[13], // frame + stats[14], // compressed + stats[15], // multicast + ) + } + + return nil +} + +// netUnix implements vfs.DynamicBytesSource for /proc/net/unix. +// +// +stateify savable +type netUnix struct { + k *kernel.Kernel +} + +var _ vfs.DynamicBytesSource = (*netUnix)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *netUnix) Generate(ctx context.Context, buf *bytes.Buffer) error { + buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n") + for _, se := range n.k.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock) + continue + } + sfile := s.(*fs.File) + if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { + s.DecRef() + // Not a unix socket. + continue + } + sops := sfile.FileOperations.(*unix.SocketOperations) + + addr, err := sops.Endpoint().GetLocalAddress() + if err != nil { + log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err) + addr.Addr = "" + } + + sockFlags := 0 + if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok { + if ce.Listening() { + // For unix domain sockets, linux reports a single flag + // value if the socket is listening, of __SO_ACCEPTCON. + sockFlags = linux.SO_ACCEPTCON + } + } + + // In the socket entry below, the value for the 'Num' field requires + // some consideration. Linux prints the address to the struct + // unix_sock representing a socket in the kernel, but may redact the + // value for unprivileged users depending on the kptr_restrict + // sysctl. + // + // One use for this field is to allow a privileged user to + // introspect into the kernel memory to determine information about + // a socket not available through procfs, such as the socket's peer. + // + // In gvisor, returning a pointer to our internal structures would + // be pointless, as it wouldn't match the memory layout for struct + // unix_sock, making introspection difficult. We could populate a + // struct unix_sock with the appropriate data, but even that + // requires consideration for which kernel version to emulate, as + // the definition of this struct changes over time. + // + // For now, we always redact this pointer. + fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d", + (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. + sfile.ReadRefs()-1, // RefCount, don't count our own ref. + 0, // Protocol, always 0 for UDS. + sockFlags, // Flags. + sops.Endpoint().Type(), // Type. + sops.State(), // State. + sfile.InodeID(), // Inode. + ) + + // Path + if len(addr.Addr) != 0 { + if addr.Addr[0] == 0 { + // Abstract path. + fmt.Fprintf(buf, " @%s", string(addr.Addr[1:])) + } else { + fmt.Fprintf(buf, " %s", string(addr.Addr)) + } + } + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + return nil +} + +// netTCP implements vfs.DynamicBytesSource for /proc/net/tcp. +// +// +stateify savable +type netTCP struct { + k *kernel.Kernel +} + +var _ vfs.DynamicBytesSource = (*netTCP)(nil) + +func (n *netTCP) Generate(ctx context.Context, buf *bytes.Buffer) error { + t := kernel.TaskFromContext(ctx) + buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n") + for _, se := range n.k.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(socket.Socket) + if !ok { + panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) + } + if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) { + s.DecRef() + // Not tcp4 sockets. + continue + } + + // Linux's documentation for the fields below can be found at + // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. + // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). + // Note that the header doesn't contain labels for all the fields. + + // Field: sl; entry number. + fmt.Fprintf(buf, "%4d: ", se.ID) + + portBuf := make([]byte, 2) + + // Field: local_adddress. + var localAddr linux.SockAddrInet + if local, _, err := sops.GetSockName(t); err == nil { + localAddr = *local.(*linux.SockAddrInet) + } + binary.LittleEndian.PutUint16(portBuf, localAddr.Port) + fmt.Fprintf(buf, "%08X:%04X ", + binary.LittleEndian.Uint32(localAddr.Addr[:]), + portBuf) + + // Field: rem_address. + var remoteAddr linux.SockAddrInet + if remote, _, err := sops.GetPeerName(t); err == nil { + remoteAddr = *remote.(*linux.SockAddrInet) + } + binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port) + fmt.Fprintf(buf, "%08X:%04X ", + binary.LittleEndian.Uint32(remoteAddr.Addr[:]), + portBuf) + + // Field: state; socket state. + fmt.Fprintf(buf, "%02X ", sops.State()) + + // Field: tx_queue, rx_queue; number of packets in the transmit and + // receive queue. Unimplemented. + fmt.Fprintf(buf, "%08X:%08X ", 0, 0) + + // Field: tr, tm->when; timer active state and number of jiffies + // until timer expires. Unimplemented. + fmt.Fprintf(buf, "%02X:%08X ", 0, 0) + + // Field: retrnsmt; number of unrecovered RTO timeouts. + // Unimplemented. + fmt.Fprintf(buf, "%08X ", 0) + + // Field: uid. + uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) + if err != nil { + log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) + fmt.Fprintf(buf, "%5d ", 0) + } else { + fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow())) + } + + // Field: timeout; number of unanswered 0-window probes. + // Unimplemented. + fmt.Fprintf(buf, "%8d ", 0) + + // Field: inode. + fmt.Fprintf(buf, "%8d ", sfile.InodeID()) + + // Field: refcount. Don't count the ref we obtain while deferencing + // the weakref to this socket. + fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) + + // Field: Socket struct address. Redacted due to the same reason as + // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. + fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) + + // Field: retransmit timeout. Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: predicted tick of soft clock (delayed ACK control data). + // Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: sending congestion window, Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. + // Unimplemented, report as large threshold. + fmt.Fprintf(buf, "%d", -1) + + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + + return nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go new file mode 100644 index 000000000..aabf2bf0c --- /dev/null +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -0,0 +1,143 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// newSysDir returns the dentry corresponding to /proc/sys directory. +func newSysDir(root *auth.Credentials, inoGen InoGenerator) *kernfs.Dentry { + return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "kernel": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "hostname": newDentry(root, inoGen.NextIno(), 0444, &hostnameData{}), + "shmall": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMALL)), + "shmmax": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMAX)), + "shmmni": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMNI)), + }), + "vm": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "mmap_min_addr": newDentry(root, inoGen.NextIno(), 0444, &mmapMinAddrData{}), + "overcommit_memory": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0\n")), + }), + "net": newSysNetDir(root, inoGen), + }) +} + +// newSysNetDir returns the dentry corresponding to /proc/sys/net directory. +func newSysNetDir(root *auth.Credentials, inoGen InoGenerator) *kernfs.Dentry { + return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "net": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "ipv4": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + // Add tcp_sack. + // TODO(gvisor.dev/issue/1195): tcp_sack allows write(2) + // "tcp_sack": newTCPSackInode(ctx, msrc, s), + + // The following files are simple stubs until they are implemented in + // netstack, most of these files are configuration related. We use the + // value closest to the actual netstack behavior or any empty file, all + // of these files will have mode 0444 (read-only for all users). + "ip_local_port_range": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("16000 65535")), + "ip_local_reserved_ports": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")), + "ipfrag_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("30")), + "ip_nonlocal_bind": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "ip_no_pmtu_disc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + + // tcp_allowed_congestion_control tell the user what they are able to + // do as an unprivledged process so we leave it empty. + "tcp_allowed_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")), + "tcp_available_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")), + "tcp_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")), + + // Many of the following stub files are features netstack doesn't + // support. The unsupported features return "0" to indicate they are + // disabled. + "tcp_base_mss": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1280")), + "tcp_dsack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_early_retrans": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_fack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_fastopen": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_fastopen_key": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")), + "tcp_invalid_ratelimit": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_keepalive_intvl": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_keepalive_probes": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_keepalive_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("7200")), + "tcp_mtu_probing": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_no_metrics_save": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + "tcp_probe_interval": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_probe_threshold": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_retries1": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")), + "tcp_retries2": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("15")), + "tcp_rfc1337": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + "tcp_slow_start_after_idle": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + "tcp_synack_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")), + "tcp_syn_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")), + "tcp_timestamps": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + }), + "core": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "default_qdisc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("pfifo_fast")), + "message_burst": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("10")), + "message_cost": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")), + "optmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "rmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), + "rmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), + "somaxconn": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("128")), + "wmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), + "wmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), + }), + }), + }) +} + +// mmapMinAddrData implements vfs.DynamicBytesSource for +// /proc/sys/vm/mmap_min_addr. +// +// +stateify savable +type mmapMinAddrData struct { + kernfs.DynamicBytesFile + + k *kernel.Kernel +} + +var _ dynamicInode = (*mmapMinAddrData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error { + fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress()) + return nil +} + +// hostnameData implements vfs.DynamicBytesSource for /proc/sys/kernel/hostname. +// +// +stateify savable +type hostnameData struct { + kernfs.DynamicBytesFile +} + +var _ dynamicInode = (*hostnameData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (*hostnameData) Generate(ctx context.Context, buf *bytes.Buffer) error { + utsns := kernel.UTSNamespaceFromContext(ctx) + buf.WriteString(utsns.HostName()) + buf.WriteString("\n") + return nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks_sys_test.go b/pkg/sentry/fsimpl/proc/tasks_sys_test.go new file mode 100644 index 000000000..20a77a8ca --- /dev/null +++ b/pkg/sentry/fsimpl/proc/tasks_sys_test.go @@ -0,0 +1,78 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "reflect" + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/inet" +) + +func newIPv6TestStack() *inet.TestStack { + s := inet.NewTestStack() + s.SupportsIPv6Flag = true + return s +} + +func TestIfinet6NoAddresses(t *testing.T) { + n := &ifinet6{s: newIPv6TestStack()} + var buf bytes.Buffer + n.Generate(contexttest.Context(t), &buf) + if buf.Len() > 0 { + t.Errorf("n.Generate() generated = %v, want = %v", buf.Bytes(), []byte{}) + } +} + +func TestIfinet6(t *testing.T) { + s := newIPv6TestStack() + s.InterfacesMap[1] = inet.Interface{Name: "eth0"} + s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{ + { + Family: linux.AF_INET6, + PrefixLen: 128, + Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), + }, + } + s.InterfacesMap[2] = inet.Interface{Name: "eth1"} + s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{ + { + Family: linux.AF_INET6, + PrefixLen: 128, + Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"), + }, + } + want := map[string]struct{}{ + "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {}, + "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {}, + } + + n := &ifinet6{s: s} + contents := n.contents() + if len(contents) != len(want) { + t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want)) + } + got := map[string]struct{}{} + for _, l := range contents { + got[l] = struct{}{} + } + + if !reflect.DeepEqual(got, want) { + t.Errorf("Got n.contents() = %v, want = %v", got, want) + } +} diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go index ca8c87ec2..76eafe593 100644 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_test.go @@ -69,12 +69,15 @@ func checkDots(dirs []vfs.Dirent) ([]vfs.Dirent, error) { func checkTasksStaticFiles(gots []vfs.Dirent) ([]vfs.Dirent, error) { wants := map[string]vfs.Dirent{ + "cpuinfo": {Type: linux.DT_REG}, "loadavg": {Type: linux.DT_REG}, "meminfo": {Type: linux.DT_REG}, "mounts": {Type: linux.DT_LNK}, "self": selfLink, "stat": {Type: linux.DT_REG}, + "sys": {Type: linux.DT_DIR}, "thread-self": threadSelfLink, + "uptime": {Type: linux.DT_REG}, "version": {Type: linux.DT_REG}, } return checkFiles(gots, wants) diff --git a/pkg/sentry/fsimpl/proc/version.go b/pkg/sentry/fsimpl/proc/version.go deleted file mode 100644 index 367f2396b..000000000 --- a/pkg/sentry/fsimpl/proc/version.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" -) - -// versionData implements vfs.DynamicBytesSource for /proc/version. -// -// +stateify savable -type versionData struct { - kernfs.DynamicBytesFile - - // k is the owning Kernel. - k *kernel.Kernel -} - -var _ dynamicInode = (*versionData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (v *versionData) Generate(ctx context.Context, buf *bytes.Buffer) error { - init := v.k.GlobalInit() - if init == nil { - // Attempted to read before the init Task is created. This can - // only occur during startup, which should never need to read - // this file. - panic("Attempted to read version before initial Task is available") - } - - // /proc/version takes the form: - // - // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST) - // (COMPILER_VERSION) VERSION" - // - // where: - // - SYSNAME, RELEASE, and VERSION are the same as returned by - // sys_utsname - // - COMPILE_USER is the user that build the kernel - // - COMPILE_HOST is the hostname of the machine on which the kernel - // was built - // - COMPILER_VERSION is the version reported by the building compiler - // - // Since we don't really want to expose build information to - // applications, those fields are omitted. - // - // FIXME(mpratt): Using Version from the init task SyscallTable - // disregards the different version a task may have (e.g., in a uts - // namespace). - ver := init.Leader().SyscallTable().Version - fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version) - return nil -} -- cgit v1.2.3 From 07f258497932e53f4651b80a086117ffda843fe3 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 16 Jan 2020 12:33:07 -0800 Subject: Plumb getting/setting xattrs through InodeOperations and 9p gofer interfaces. There was a very bare get/setxattr in the InodeOperations interface. Add context.Context to both, size to getxattr, and flags to setxattr. Note that extended attributes are passed around as strings in this implementation, so size is automatically encoded into the value. Size is added in getxattr so that implementations can return ERANGE if a value is larger than can fit in the user-allocated buffer. This prevents us from unnecessarily passing around an arbitrarily large xattr when the user buffer is actually too small. Don't use the existing xattrwalk and xattrcreate messages and define our own, mainly for the sake of simplicity. Extended attributes will be implemented in future commits. PiperOrigin-RevId: 290121300 --- pkg/p9/client_file.go | 23 +++++ pkg/p9/file.go | 16 ++++ pkg/p9/handlers.go | 29 +++++++ pkg/p9/messages.go | 129 +++++++++++++++++++++++++++++ pkg/p9/messages_test.go | 15 ++++ pkg/p9/p9.go | 4 + pkg/sentry/fs/copy_up.go | 9 +- pkg/sentry/fs/file_overlay.go | 2 +- pkg/sentry/fs/fsutil/inode.go | 41 +++++---- pkg/sentry/fs/gofer/context_file.go | 14 ++++ pkg/sentry/fs/gofer/inode.go | 18 +++- pkg/sentry/fs/inode.go | 24 +++--- pkg/sentry/fs/inode_operations.go | 25 ++++-- pkg/sentry/fs/inode_overlay.go | 30 +++---- pkg/sentry/fs/inode_overlay_test.go | 4 +- pkg/sentry/fs/tmpfs/tmpfs.go | 18 ++-- pkg/sentry/syscalls/linux/linux64_amd64.go | 4 +- pkg/sentry/syscalls/linux/linux64_arm64.go | 4 +- pkg/sentry/syscalls/linux/sys_xattr.go | 48 ++++++----- runsc/fsgofer/fsgofer.go | 10 +++ 20 files changed, 374 insertions(+), 93 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go index de9357389..04b584383 100644 --- a/pkg/p9/client_file.go +++ b/pkg/p9/client_file.go @@ -165,6 +165,29 @@ func (c *clientFile) SetAttr(valid SetAttrMask, attr SetAttr) error { return c.client.sendRecv(&Tsetattr{FID: c.fid, Valid: valid, SetAttr: attr}, &Rsetattr{}) } +// GetXattr implements File.GetXattr. +func (c *clientFile) GetXattr(name string, size uint64) (string, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return "", syscall.EBADF + } + + rgetxattr := Rgetxattr{} + if err := c.client.sendRecv(&Tgetxattr{FID: c.fid, Name: name, Size: size}, &rgetxattr); err != nil { + return "", err + } + + return rgetxattr.Value, nil +} + +// SetXattr implements File.SetXattr. +func (c *clientFile) SetXattr(name, value string, flags uint32) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + + return c.client.sendRecv(&Tsetxattr{FID: c.fid, Name: name, Value: value, Flags: flags}, &Rsetxattr{}) +} + // Allocate implements File.Allocate. func (c *clientFile) Allocate(mode AllocateMode, offset, length uint64) error { if atomic.LoadUint32(&c.closed) != 0 { diff --git a/pkg/p9/file.go b/pkg/p9/file.go index 96d1f2a8e..4607cfcdf 100644 --- a/pkg/p9/file.go +++ b/pkg/p9/file.go @@ -89,6 +89,22 @@ type File interface { // On the server, SetAttr has a write concurrency guarantee. SetAttr(valid SetAttrMask, attr SetAttr) error + // GetXattr returns extended attributes of this node. + // + // Size indicates the size of the buffer that has been allocated to hold the + // attribute value. If the value is larger than size, implementations may + // return ERANGE to indicate that the buffer is too small, but they are also + // free to ignore the hint entirely (i.e. the value returned may be larger + // than size). All size checking is done independently at the syscall layer. + // + // TODO(b/127675828): Determine concurrency guarantees once implemented. + GetXattr(name string, size uint64) (string, error) + + // SetXattr sets extended attributes on this node. + // + // TODO(b/127675828): Determine concurrency guarantees once implemented. + SetXattr(name, value string, flags uint32) error + // Allocate allows the caller to directly manipulate the allocated disk space // for the file. See fallocate(2) for more details. Allocate(mode AllocateMode, offset, length uint64) error diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go index b9582c07f..7d6653a07 100644 --- a/pkg/p9/handlers.go +++ b/pkg/p9/handlers.go @@ -912,6 +912,35 @@ func (t *Txattrcreate) handle(cs *connState) message { return newErr(syscall.ENOSYS) } +// handle implements handler.handle. +func (t *Tgetxattr) handle(cs *connState) message { + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + val, err := ref.file.GetXattr(t.Name, t.Size) + if err != nil { + return newErr(err) + } + return &Rgetxattr{Value: val} +} + +// handle implements handler.handle. +func (t *Tsetxattr) handle(cs *connState) message { + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + if err := ref.file.SetXattr(t.Name, t.Value, t.Flags); err != nil { + return newErr(err) + } + return &Rsetxattr{} +} + // handle implements handler.handle. func (t *Treaddir) handle(cs *connState) message { ref, ok := cs.LookupFID(t.Directory) diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go index ffdd7e8c6..ceb723d86 100644 --- a/pkg/p9/messages.go +++ b/pkg/p9/messages.go @@ -1611,6 +1611,131 @@ func (r *Rxattrcreate) String() string { return fmt.Sprintf("Rxattrcreate{}") } +// Tgetxattr is a getxattr request. +type Tgetxattr struct { + // FID refers to the file for which to get xattrs. + FID FID + + // Name is the xattr to get. + Name string + + // Size is the buffer size for the xattr to get. + Size uint64 +} + +// Decode implements encoder.Decode. +func (t *Tgetxattr) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Name = b.ReadString() + t.Size = b.Read64() +} + +// Encode implements encoder.Encode. +func (t *Tgetxattr) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteString(t.Name) + b.Write64(t.Size) +} + +// Type implements message.Type. +func (*Tgetxattr) Type() MsgType { + return MsgTgetxattr +} + +// String implements fmt.Stringer. +func (t *Tgetxattr) String() string { + return fmt.Sprintf("Tgetxattr{FID: %d, Name: %s, Size: %d}", t.FID, t.Name, t.Size) +} + +// Rgetxattr is a getxattr response. +type Rgetxattr struct { + // Value is the extended attribute value. + Value string +} + +// Decode implements encoder.Decode. +func (r *Rgetxattr) Decode(b *buffer) { + r.Value = b.ReadString() +} + +// Encode implements encoder.Encode. +func (r *Rgetxattr) Encode(b *buffer) { + b.WriteString(r.Value) +} + +// Type implements message.Type. +func (*Rgetxattr) Type() MsgType { + return MsgRgetxattr +} + +// String implements fmt.Stringer. +func (r *Rgetxattr) String() string { + return fmt.Sprintf("Rgetxattr{Value: %s}", r.Value) +} + +// Tsetxattr sets extended attributes. +type Tsetxattr struct { + // FID refers to the file on which to set xattrs. + FID FID + + // Name is the attribute name. + Name string + + // Value is the attribute value. + Value string + + // Linux setxattr(2) flags. + Flags uint32 +} + +// Decode implements encoder.Decode. +func (t *Tsetxattr) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Name = b.ReadString() + t.Value = b.ReadString() + t.Flags = b.Read32() +} + +// Encode implements encoder.Encode. +func (t *Tsetxattr) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteString(t.Name) + b.WriteString(t.Value) + b.Write32(t.Flags) +} + +// Type implements message.Type. +func (*Tsetxattr) Type() MsgType { + return MsgTsetxattr +} + +// String implements fmt.Stringer. +func (t *Tsetxattr) String() string { + return fmt.Sprintf("Tsetxattr{FID: %d, Name: %s, Value: %s, Flags: %d}", t.FID, t.Name, t.Value, t.Flags) +} + +// Rsetxattr is a setxattr response. +type Rsetxattr struct { +} + +// Decode implements encoder.Decode. +func (r *Rsetxattr) Decode(b *buffer) { +} + +// Encode implements encoder.Encode. +func (r *Rsetxattr) Encode(b *buffer) { +} + +// Type implements message.Type. +func (*Rsetxattr) Type() MsgType { + return MsgRsetxattr +} + +// String implements fmt.Stringer. +func (r *Rsetxattr) String() string { + return fmt.Sprintf("Rsetxattr{}") +} + // Treaddir is a readdir request. type Treaddir struct { // Directory is the directory FID to read. @@ -2363,6 +2488,10 @@ func init() { msgRegistry.register(MsgRxattrwalk, func() message { return &Rxattrwalk{} }) msgRegistry.register(MsgTxattrcreate, func() message { return &Txattrcreate{} }) msgRegistry.register(MsgRxattrcreate, func() message { return &Rxattrcreate{} }) + msgRegistry.register(MsgTgetxattr, func() message { return &Tgetxattr{} }) + msgRegistry.register(MsgRgetxattr, func() message { return &Rgetxattr{} }) + msgRegistry.register(MsgTsetxattr, func() message { return &Tsetxattr{} }) + msgRegistry.register(MsgRsetxattr, func() message { return &Rsetxattr{} }) msgRegistry.register(MsgTreaddir, func() message { return &Treaddir{} }) msgRegistry.register(MsgRreaddir, func() message { return &Rreaddir{} }) msgRegistry.register(MsgTfsync, func() message { return &Tfsync{} }) diff --git a/pkg/p9/messages_test.go b/pkg/p9/messages_test.go index 6ba6a1654..825c939da 100644 --- a/pkg/p9/messages_test.go +++ b/pkg/p9/messages_test.go @@ -194,6 +194,21 @@ func TestEncodeDecode(t *testing.T) { Flags: 3, }, &Rxattrcreate{}, + &Tgetxattr{ + FID: 1, + Name: "abc", + Size: 2, + }, + &Rgetxattr{ + Value: "xyz", + }, + &Tsetxattr{ + FID: 1, + Name: "abc", + Value: "xyz", + Flags: 2, + }, + &Rsetxattr{}, &Treaddir{ Directory: 1, Offset: 2, diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go index d3090535a..5ab00d625 100644 --- a/pkg/p9/p9.go +++ b/pkg/p9/p9.go @@ -339,6 +339,10 @@ const ( MsgRxattrwalk = 31 MsgTxattrcreate = 32 MsgRxattrcreate = 33 + MsgTgetxattr = 34 + MsgRgetxattr = 35 + MsgTsetxattr = 36 + MsgRsetxattr = 37 MsgTreaddir = 40 MsgRreaddir = 41 MsgTfsync = 50 diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index 734177e90..e03e3e417 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -18,6 +18,7 @@ import ( "fmt" "io" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -395,12 +396,12 @@ func copyContentsLocked(ctx context.Context, upper *Inode, lower *Inode, size in // Size and permissions are set on upper when the file content is copied // and when the file is created respectively. func copyAttributesLocked(ctx context.Context, upper *Inode, lower *Inode) error { - // Extract attributes fro the lower filesystem. + // Extract attributes from the lower filesystem. lowerAttr, err := lower.UnstableAttr(ctx) if err != nil { return err } - lowerXattr, err := lower.Listxattr() + lowerXattr, err := lower.ListXattr(ctx) if err != nil && err != syserror.EOPNOTSUPP { return err } @@ -421,11 +422,11 @@ func copyAttributesLocked(ctx context.Context, upper *Inode, lower *Inode) error if isXattrOverlay(name) { continue } - value, err := lower.Getxattr(name) + value, err := lower.GetXattr(ctx, name, linux.XATTR_SIZE_MAX) if err != nil { return err } - if err := upper.InodeOperations.Setxattr(upper, name, value); err != nil { + if err := upper.InodeOperations.SetXattr(ctx, upper, name, value, 0 /* flags */); err != nil { return err } } diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go index 8a633b1ba..8991207b4 100644 --- a/pkg/sentry/fs/file_overlay.go +++ b/pkg/sentry/fs/file_overlay.go @@ -475,7 +475,7 @@ func readdirEntries(ctx context.Context, o *overlayEntry) (*SortedDentryMap, err // Skip this name if it is a negative entry in the // upper or there exists a whiteout for it. if o.upper != nil { - if overlayHasWhiteout(o.upper, name) { + if overlayHasWhiteout(ctx, o.upper, name) { continue } } diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index adf5ec69c..df7b74855 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -15,6 +15,7 @@ package fsutil import ( + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -202,7 +203,7 @@ func (i *InodeSimpleAttributes) NotifyModificationAndStatusChange(ctx context.Co } // InodeSimpleExtendedAttributes implements -// fs.InodeOperations.{Get,Set,List}xattr. +// fs.InodeOperations.{Get,Set,List}Xattr. // // +stateify savable type InodeSimpleExtendedAttributes struct { @@ -211,8 +212,8 @@ type InodeSimpleExtendedAttributes struct { xattrs map[string]string } -// Getxattr implements fs.InodeOperations.Getxattr. -func (i *InodeSimpleExtendedAttributes) Getxattr(_ *fs.Inode, name string) (string, error) { +// GetXattr implements fs.InodeOperations.GetXattr. +func (i *InodeSimpleExtendedAttributes) GetXattr(_ context.Context, _ *fs.Inode, name string, _ uint64) (string, error) { i.mu.RLock() value, ok := i.xattrs[name] i.mu.RUnlock() @@ -222,19 +223,31 @@ func (i *InodeSimpleExtendedAttributes) Getxattr(_ *fs.Inode, name string) (stri return value, nil } -// Setxattr implements fs.InodeOperations.Setxattr. -func (i *InodeSimpleExtendedAttributes) Setxattr(_ *fs.Inode, name, value string) error { +// SetXattr implements fs.InodeOperations.SetXattr. +func (i *InodeSimpleExtendedAttributes) SetXattr(_ context.Context, _ *fs.Inode, name, value string, flags uint32) error { i.mu.Lock() + defer i.mu.Unlock() if i.xattrs == nil { + if flags&linux.XATTR_REPLACE != 0 { + return syserror.ENODATA + } i.xattrs = make(map[string]string) } + + _, ok := i.xattrs[name] + if ok && flags&linux.XATTR_CREATE != 0 { + return syserror.EEXIST + } + if !ok && flags&linux.XATTR_REPLACE != 0 { + return syserror.ENODATA + } + i.xattrs[name] = value - i.mu.Unlock() return nil } -// Listxattr implements fs.InodeOperations.Listxattr. -func (i *InodeSimpleExtendedAttributes) Listxattr(_ *fs.Inode) (map[string]struct{}, error) { +// ListXattr implements fs.InodeOperations.ListXattr. +func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { i.mu.RLock() names := make(map[string]struct{}, len(i.xattrs)) for name := range i.xattrs { @@ -436,18 +449,18 @@ func (InodeNotSymlink) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) { // extended attributes. type InodeNoExtendedAttributes struct{} -// Getxattr implements fs.InodeOperations.Getxattr. -func (InodeNoExtendedAttributes) Getxattr(*fs.Inode, string) (string, error) { +// GetXattr implements fs.InodeOperations.GetXattr. +func (InodeNoExtendedAttributes) GetXattr(context.Context, *fs.Inode, string, uint64) (string, error) { return "", syserror.EOPNOTSUPP } -// Setxattr implements fs.InodeOperations.Setxattr. -func (InodeNoExtendedAttributes) Setxattr(*fs.Inode, string, string) error { +// SetXattr implements fs.InodeOperations.SetXattr. +func (InodeNoExtendedAttributes) SetXattr(context.Context, *fs.Inode, string, string, uint32) error { return syserror.EOPNOTSUPP } -// Listxattr implements fs.InodeOperations.Listxattr. -func (InodeNoExtendedAttributes) Listxattr(*fs.Inode) (map[string]struct{}, error) { +// ListXattr implements fs.InodeOperations.ListXattr. +func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { return nil, syserror.EOPNOTSUPP } diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go index 44b72582a..2125dafef 100644 --- a/pkg/sentry/fs/gofer/context_file.go +++ b/pkg/sentry/fs/gofer/context_file.go @@ -59,6 +59,20 @@ func (c *contextFile) setAttr(ctx context.Context, valid p9.SetAttrMask, attr p9 return err } +func (c *contextFile) getXattr(ctx context.Context, name string, size uint64) (string, error) { + ctx.UninterruptibleSleepStart(false) + val, err := c.file.GetXattr(name, size) + ctx.UninterruptibleSleepFinish(false) + return val, err +} + +func (c *contextFile) setXattr(ctx context.Context, name, value string, flags uint32) error { + ctx.UninterruptibleSleepStart(false) + err := c.file.SetXattr(name, value, flags) + ctx.UninterruptibleSleepFinish(false) + return err +} + func (c *contextFile) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error { ctx.UninterruptibleSleepStart(false) err := c.file.Allocate(mode, offset, length) diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 245fe2ef1..98d1a8a48 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -38,8 +38,7 @@ import ( // // +stateify savable type inodeOperations struct { - fsutil.InodeNotVirtual `state:"nosave"` - fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNotVirtual `state:"nosave"` // fileState implements fs.CachedFileObject. It exists // to break a circular load dependency between inodeOperations @@ -604,6 +603,21 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length return i.fileState.file.setAttr(ctx, p9.SetAttrMask{Size: true}, p9.SetAttr{Size: uint64(length)}) } +// GetXattr implements fs.InodeOperations.GetXattr. +func (i *inodeOperations) GetXattr(ctx context.Context, inode *fs.Inode, name string, size uint64) (string, error) { + return i.fileState.file.getXattr(ctx, name, size) +} + +// SetXattr implements fs.InodeOperations.SetXattr. +func (i *inodeOperations) SetXattr(ctx context.Context, inode *fs.Inode, name string, value string, flags uint32) error { + return i.fileState.file.setXattr(ctx, name, value, flags) +} + +// ListXattr implements fs.InodeOperations.ListXattr. +func (i *inodeOperations) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { + return nil, syscall.EOPNOTSUPP +} + // Allocate implements fs.InodeOperations.Allocate. func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset, length int64) error { // This can only be called for files anyway. diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 468043df0..ee9d301ef 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -261,28 +261,28 @@ func (i *Inode) UnstableAttr(ctx context.Context) (UnstableAttr, error) { return i.InodeOperations.UnstableAttr(ctx, i) } -// Getxattr calls i.InodeOperations.Getxattr with i as the Inode. -func (i *Inode) Getxattr(name string) (string, error) { +// GetXattr calls i.InodeOperations.GetXattr with i as the Inode. +func (i *Inode) GetXattr(ctx context.Context, name string, size uint64) (string, error) { if i.overlay != nil { - return overlayGetxattr(i.overlay, name) + return overlayGetXattr(ctx, i.overlay, name, size) } - return i.InodeOperations.Getxattr(i, name) + return i.InodeOperations.GetXattr(ctx, i, name, size) } -// Setxattr calls i.InodeOperations.Setxattr with i as the Inode. -func (i *Inode) Setxattr(name, value string) error { +// SetXattr calls i.InodeOperations.SetXattr with i as the Inode. +func (i *Inode) SetXattr(ctx context.Context, name, value string, flags uint32) error { if i.overlay != nil { - return overlaySetxattr(i.overlay, name, value) + return overlaySetxattr(ctx, i.overlay, name, value, flags) } - return i.InodeOperations.Setxattr(i, name, value) + return i.InodeOperations.SetXattr(ctx, i, name, value, flags) } -// Listxattr calls i.InodeOperations.Listxattr with i as the Inode. -func (i *Inode) Listxattr() (map[string]struct{}, error) { +// ListXattr calls i.InodeOperations.ListXattr with i as the Inode. +func (i *Inode) ListXattr(ctx context.Context) (map[string]struct{}, error) { if i.overlay != nil { - return overlayListxattr(i.overlay) + return overlayListXattr(ctx, i.overlay) } - return i.InodeOperations.Listxattr(i) + return i.InodeOperations.ListXattr(ctx, i) } // CheckPermission will check if the caller may access this file in the diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index 5cde9d215..13261cb81 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -170,20 +170,27 @@ type InodeOperations interface { // file system events. UnstableAttr(ctx context.Context, inode *Inode) (UnstableAttr, error) - // Getxattr retrieves the value of extended attribute name. Inodes that - // do not support extended attributes return EOPNOTSUPP. Inodes that - // support extended attributes but don't have a value at name return + // GetXattr retrieves the value of extended attribute specified by name. + // Inodes that do not support extended attributes return EOPNOTSUPP. Inodes + // that support extended attributes but don't have a value at name return // ENODATA. - Getxattr(inode *Inode, name string) (string, error) + // + // If this is called through the getxattr(2) syscall, size indicates the + // size of the buffer that the application has allocated to hold the + // attribute value. If the value is larger than size, implementations may + // return ERANGE to indicate that the buffer is too small, but they are also + // free to ignore the hint entirely (i.e. the value returned may be larger + // than size). All size checking is done independently at the syscall layer. + GetXattr(ctx context.Context, inode *Inode, name string, size uint64) (string, error) - // Setxattr sets the value of extended attribute name. Inodes that - // do not support extended attributes return EOPNOTSUPP. - Setxattr(inode *Inode, name, value string) error + // SetXattr sets the value of extended attribute specified by name. Inodes + // that do not support extended attributes return EOPNOTSUPP. + SetXattr(ctx context.Context, inode *Inode, name, value string, flags uint32) error - // Listxattr returns the set of all extended attributes names that + // ListXattr returns the set of all extended attributes names that // have values. Inodes that do not support extended attributes return // EOPNOTSUPP. - Listxattr(inode *Inode) (map[string]struct{}, error) + ListXattr(ctx context.Context, inode *Inode) (map[string]struct{}, error) // Check determines whether an Inode can be accessed with the // requested permission mask using the context (which gives access diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 13d11e001..b90da20d0 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -25,13 +25,13 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) -func overlayHasWhiteout(parent *Inode, name string) bool { - s, err := parent.Getxattr(XattrOverlayWhiteout(name)) +func overlayHasWhiteout(ctx context.Context, parent *Inode, name string) bool { + s, err := parent.GetXattr(ctx, XattrOverlayWhiteout(name), 1) return err == nil && s == "y" } -func overlayCreateWhiteout(parent *Inode, name string) error { - return parent.InodeOperations.Setxattr(parent, XattrOverlayWhiteout(name), "y") +func overlayCreateWhiteout(ctx context.Context, parent *Inode, name string) error { + return parent.InodeOperations.SetXattr(ctx, parent, XattrOverlayWhiteout(name), "y", 0 /* flags */) } func overlayWriteOut(ctx context.Context, o *overlayEntry) error { @@ -89,7 +89,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name } // Are we done? - if overlayHasWhiteout(parent.upper, name) { + if overlayHasWhiteout(ctx, parent.upper, name) { if upperInode == nil { parent.copyMu.RUnlock() if negativeUpperChild { @@ -345,7 +345,7 @@ func overlayRemove(ctx context.Context, o *overlayEntry, parent *Dirent, child * } } if child.Inode.overlay.lowerExists { - if err := overlayCreateWhiteout(o.upper, child.name); err != nil { + if err := overlayCreateWhiteout(ctx, o.upper, child.name); err != nil { return err } } @@ -426,7 +426,7 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena return err } if renamed.Inode.overlay.lowerExists { - if err := overlayCreateWhiteout(oldParent.Inode.overlay.upper, oldName); err != nil { + if err := overlayCreateWhiteout(ctx, oldParent.Inode.overlay.upper, oldName); err != nil { return err } } @@ -528,7 +528,7 @@ func overlayUnstableAttr(ctx context.Context, o *overlayEntry) (UnstableAttr, er return attr, err } -func overlayGetxattr(o *overlayEntry, name string) (string, error) { +func overlayGetXattr(ctx context.Context, o *overlayEntry, name string, size uint64) (string, error) { // Hot path. This is how the overlay checks for whiteout files. // Avoid defers. var ( @@ -544,31 +544,31 @@ func overlayGetxattr(o *overlayEntry, name string) (string, error) { o.copyMu.RLock() if o.upper != nil { - s, err = o.upper.Getxattr(name) + s, err = o.upper.GetXattr(ctx, name, size) } else { - s, err = o.lower.Getxattr(name) + s, err = o.lower.GetXattr(ctx, name, size) } o.copyMu.RUnlock() return s, err } // TODO(b/146028302): Support setxattr for overlayfs. -func overlaySetxattr(o *overlayEntry, name, value string) error { +func overlaySetxattr(ctx context.Context, o *overlayEntry, name, value string, flags uint32) error { return syserror.EOPNOTSUPP } -func overlayListxattr(o *overlayEntry) (map[string]struct{}, error) { +func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{}, error) { o.copyMu.RLock() defer o.copyMu.RUnlock() var names map[string]struct{} var err error if o.upper != nil { - names, err = o.upper.Listxattr() + names, err = o.upper.ListXattr(ctx) } else { - names, err = o.lower.Listxattr() + names, err = o.lower.ListXattr(ctx) } for name := range names { - // Same as overlayGetxattr, we shouldn't forward along + // Same as overlayGetXattr, we shouldn't forward along // overlay attributes. if strings.HasPrefix(XattrOverlayPrefix, name) { delete(names, name) diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go index 8935aad65..493d98c36 100644 --- a/pkg/sentry/fs/inode_overlay_test.go +++ b/pkg/sentry/fs/inode_overlay_test.go @@ -382,8 +382,8 @@ type dir struct { ReaddirCalled bool } -// Getxattr implements InodeOperations.Getxattr. -func (d *dir) Getxattr(inode *fs.Inode, name string) (string, error) { +// GetXattr implements InodeOperations.GetXattr. +func (d *dir) GetXattr(_ context.Context, _ *fs.Inode, name string, _ uint64) (string, error) { for _, n := range d.negative { if name == fs.XattrOverlayWhiteout(n) { return "y", nil diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index 69089c8a8..0f718e236 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -148,19 +148,19 @@ func (d *Dir) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perms return d.ramfsDir.CreateFifo(ctx, dir, name, perms) } -// Getxattr implements fs.InodeOperations.Getxattr. -func (d *Dir) Getxattr(i *fs.Inode, name string) (string, error) { - return d.ramfsDir.Getxattr(i, name) +// GetXattr implements fs.InodeOperations.GetXattr. +func (d *Dir) GetXattr(ctx context.Context, i *fs.Inode, name string, size uint64) (string, error) { + return d.ramfsDir.GetXattr(ctx, i, name, size) } -// Setxattr implements fs.InodeOperations.Setxattr. -func (d *Dir) Setxattr(i *fs.Inode, name, value string) error { - return d.ramfsDir.Setxattr(i, name, value) +// SetXattr implements fs.InodeOperations.SetXattr. +func (d *Dir) SetXattr(ctx context.Context, i *fs.Inode, name, value string, flags uint32) error { + return d.ramfsDir.SetXattr(ctx, i, name, value, flags) } -// Listxattr implements fs.InodeOperations.Listxattr. -func (d *Dir) Listxattr(i *fs.Inode) (map[string]struct{}, error) { - return d.ramfsDir.Listxattr(i) +// ListXattr implements fs.InodeOperations.ListXattr. +func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode) (map[string]struct{}, error) { + return d.ramfsDir.ListXattr(ctx, i) } // Lookup implements fs.InodeOperations.Lookup. diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 479c5f6ff..6b2920900 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -228,10 +228,10 @@ var AMD64 = &kernel.SyscallTable{ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), - 188: syscalls.PartiallySupported("setxattr", Setxattr, "Only supported for tmpfs.", nil), + 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), 189: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 190: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 191: syscalls.PartiallySupported("getxattr", Getxattr, "Only supported for tmpfs.", nil), + 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index d3f61f5e8..8c1b20911 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -41,10 +41,10 @@ var ARM64 = &kernel.SyscallTable{ 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 5: syscalls.PartiallySupported("setxattr", Setxattr, "Only supported for tmpfs.", nil), + 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), 6: syscalls.Error("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 7: syscalls.Error("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), - 8: syscalls.PartiallySupported("getxattr", Getxattr, "Only supported for tmpfs.", nil), + 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", nil), diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index 97d9a65ea..816352218 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -25,12 +25,12 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) -// Getxattr implements linux syscall getxattr(2). -func Getxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { +// GetXattr implements linux syscall getxattr(2). +func GetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { pathAddr := args[0].Pointer() nameAddr := args[1].Pointer() valueAddr := args[2].Pointer() - size := args[3].SizeT() + size := uint64(args[3].SizeT()) path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) if err != nil { @@ -39,22 +39,28 @@ func Getxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc valueLen := 0 err = fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { - value, err := getxattr(t, d, dirPath, nameAddr) + // If getxattr(2) is called with size 0, the size of the value will be + // returned successfully even if it is nonzero. In that case, we need to + // retrieve the entire attribute value so we can return the correct size. + requestedSize := size + if size == 0 || size > linux.XATTR_SIZE_MAX { + requestedSize = linux.XATTR_SIZE_MAX + } + + value, err := getXattr(t, d, dirPath, nameAddr, uint64(requestedSize)) if err != nil { return err } valueLen = len(value) - if size == 0 { - return nil - } - if size > linux.XATTR_SIZE_MAX { - size = linux.XATTR_SIZE_MAX - } - if valueLen > int(size) { + if uint64(valueLen) > requestedSize { return syserror.ERANGE } + // Skip copying out the attribute value if size is 0. + if size == 0 { + return nil + } _, err = t.CopyOutBytes(valueAddr, []byte(value)) return err }) @@ -64,8 +70,8 @@ func Getxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return uintptr(valueLen), nil, nil } -// getxattr implements getxattr from the given *fs.Dirent. -func getxattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr usermem.Addr) (string, error) { +// getXattr implements getxattr(2) from the given *fs.Dirent. +func getXattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr usermem.Addr, size uint64) (string, error) { if dirPath && !fs.IsDir(d.Inode.StableAttr) { return "", syserror.ENOTDIR } @@ -83,15 +89,15 @@ func getxattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr usermem.Addr) return "", syserror.EOPNOTSUPP } - return d.Inode.Getxattr(name) + return d.Inode.GetXattr(t, name, size) } -// Setxattr implements linux syscall setxattr(2). -func Setxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { +// SetXattr implements linux syscall setxattr(2). +func SetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { pathAddr := args[0].Pointer() nameAddr := args[1].Pointer() valueAddr := args[2].Pointer() - size := args[3].SizeT() + size := uint64(args[3].SizeT()) flags := args[4].Uint() path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) @@ -104,12 +110,12 @@ func Setxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { - return setxattr(t, d, dirPath, nameAddr, valueAddr, size, flags) + return setXattr(t, d, dirPath, nameAddr, valueAddr, uint64(size), flags) }) } -// setxattr implements setxattr from the given *fs.Dirent. -func setxattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr, valueAddr usermem.Addr, size uint, flags uint32) error { +// setXattr implements setxattr(2) from the given *fs.Dirent. +func setXattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr, valueAddr usermem.Addr, size uint64, flags uint32) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -136,7 +142,7 @@ func setxattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr, valueAddr us return syserror.EOPNOTSUPP } - return d.Inode.Setxattr(name, value) + return d.Inode.SetXattr(t, name, value, flags) } func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 93606d051..4d84ad999 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -767,6 +767,16 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { return err } +// TODO(b/127675828): support getxattr. +func (l *localFile) GetXattr(name string, size uint64) (string, error) { + return "", syscall.EOPNOTSUPP +} + +// TODO(b/127675828): support setxattr. +func (l *localFile) SetXattr(name, value string, flags uint32) error { + return syscall.EOPNOTSUPP +} + // Allocate implements p9.File. func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error { if !l.isOpen() { -- cgit v1.2.3 From ab48112e41427579ecf585f6280be1e2d58acf06 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 16 Jan 2020 13:58:50 -0800 Subject: Add IfChange/ThenChange reminders in fs/proc There is a lot of code duplication for VFSv2 and this serves as remind to keep the copies in sync. Updates #1195 PiperOrigin-RevId: 290139234 --- pkg/sentry/fs/proc/cgroup.go | 4 ++++ pkg/sentry/fs/proc/cpuinfo.go | 4 ++++ pkg/sentry/fs/proc/exec_args.go | 4 ++++ pkg/sentry/fs/proc/fds.go | 4 ++++ pkg/sentry/fs/proc/filesystems.go | 4 ++++ pkg/sentry/fs/proc/fs.go | 4 ++++ pkg/sentry/fs/proc/inode.go | 4 ++++ pkg/sentry/fs/proc/loadavg.go | 4 ++++ pkg/sentry/fs/proc/meminfo.go | 4 ++++ pkg/sentry/fs/proc/mounts.go | 4 ++++ pkg/sentry/fs/proc/net.go | 4 ++++ pkg/sentry/fs/proc/proc.go | 4 ++++ pkg/sentry/fs/proc/stat.go | 4 ++++ pkg/sentry/fs/proc/sys.go | 4 ++++ pkg/sentry/fs/proc/sys_net.go | 4 ++++ pkg/sentry/fs/proc/task.go | 4 ++++ pkg/sentry/fs/proc/uid_gid_map.go | 4 ++++ pkg/sentry/fs/proc/uptime.go | 4 ++++ pkg/sentry/fs/proc/version.go | 4 ++++ 19 files changed, 76 insertions(+) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/cgroup.go b/pkg/sentry/fs/proc/cgroup.go index 05e31c55d..c4abe319d 100644 --- a/pkg/sentry/fs/proc/cgroup.go +++ b/pkg/sentry/fs/proc/cgroup.go @@ -21,6 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" ) +// LINT.IfChange + func newCGroupInode(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string]string) *fs.Inode { // From man 7 cgroups: "For each cgroup hierarchy of which the process // is a member, there is one entry containing three colon-separated @@ -39,3 +41,5 @@ func newCGroupInode(ctx context.Context, msrc *fs.MountSource, cgroupControllers return newStaticProcInode(ctx, msrc, []byte(data)) } + +// LINT.ThenChange(../../fsimpl/proc/tasks_files.go) diff --git a/pkg/sentry/fs/proc/cpuinfo.go b/pkg/sentry/fs/proc/cpuinfo.go index 6330337eb..df0c4e3a7 100644 --- a/pkg/sentry/fs/proc/cpuinfo.go +++ b/pkg/sentry/fs/proc/cpuinfo.go @@ -22,6 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ) +// LINT.IfChange + func newCPUInfo(ctx context.Context, msrc *fs.MountSource) *fs.Inode { k := kernel.KernelFromContext(ctx) features := k.FeatureSet() @@ -35,3 +37,5 @@ func newCPUInfo(ctx context.Context, msrc *fs.MountSource) *fs.Inode { } return newStaticProcInode(ctx, msrc, buf.Bytes()) } + +// LINT.ThenChange(../../fsimpl/proc/tasks_files.go) diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go index 1d3a2d426..9aaeb780b 100644 --- a/pkg/sentry/fs/proc/exec_args.go +++ b/pkg/sentry/fs/proc/exec_args.go @@ -29,6 +29,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + // execArgType enumerates the types of exec arguments that are exposed through // proc. type execArgType int @@ -201,3 +203,5 @@ func (f *execArgFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen } return int64(n), err } + +// LINT.ThenChange(../../fsimpl/proc/task.go) diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go index bee421d76..2fa3cfa7d 100644 --- a/pkg/sentry/fs/proc/fds.go +++ b/pkg/sentry/fs/proc/fds.go @@ -28,6 +28,8 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) +// LINT.IfChange + // walkDescriptors finds the descriptor (file-flag pair) for the fd identified // by p, and calls the toInodeOperations callback with that descriptor. This is a helper // method for implementing fs.InodeOperations.Lookup. @@ -277,3 +279,5 @@ func (fdid *fdInfoDir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs. } return fs.NewFile(ctx, dirent, flags, fops), nil } + +// LINT.ThenChange(../../fsimpl/proc/task_files.go) diff --git a/pkg/sentry/fs/proc/filesystems.go b/pkg/sentry/fs/proc/filesystems.go index e9250c51c..7b3b974ab 100644 --- a/pkg/sentry/fs/proc/filesystems.go +++ b/pkg/sentry/fs/proc/filesystems.go @@ -23,6 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" ) +// LINT.IfChange + // filesystemsData backs /proc/filesystems. // // +stateify savable @@ -59,3 +61,5 @@ func (*filesystemsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle // Return the SeqData and advance the generation counter. return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*filesystemsData)(nil)}}, 1 } + +// LINT.ThenChange(../../fsimpl/proc/filesystem.go) diff --git a/pkg/sentry/fs/proc/fs.go b/pkg/sentry/fs/proc/fs.go index f14833805..761d24462 100644 --- a/pkg/sentry/fs/proc/fs.go +++ b/pkg/sentry/fs/proc/fs.go @@ -21,6 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" ) +// LINT.IfChange + // filesystem is a procfs. // // +stateify savable @@ -79,3 +81,5 @@ func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSou // never want them cached. return New(ctx, fs.NewNonCachingMountSource(ctx, f, flags), cgroups) } + +// LINT.ThenChange(../../fsimpl/proc/filesystem.go) diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go index 0c04f81fa..723f6b661 100644 --- a/pkg/sentry/fs/proc/inode.go +++ b/pkg/sentry/fs/proc/inode.go @@ -26,6 +26,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usermem" ) +// LINT.IfChange + // taskOwnedInodeOps wraps an fs.InodeOperations and overrides the UnstableAttr // method to return either the task or root as the owner, depending on the // task's dumpability. @@ -131,3 +133,5 @@ func newProcInode(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSo } return fs.NewInode(ctx, iops, msrc, sattr) } + +// LINT.ThenChange(../../fsimpl/proc/tasks.go) diff --git a/pkg/sentry/fs/proc/loadavg.go b/pkg/sentry/fs/proc/loadavg.go index 8602b7426..d7d2afcb7 100644 --- a/pkg/sentry/fs/proc/loadavg.go +++ b/pkg/sentry/fs/proc/loadavg.go @@ -22,6 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" ) +// LINT.IfChange + // loadavgData backs /proc/loadavg. // // +stateify savable @@ -53,3 +55,5 @@ func (d *loadavgData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) }, }, 0 } + +// LINT.ThenChange(../../fsimpl/proc/tasks_files.go) diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go index 495f3e3ba..313c6a32b 100644 --- a/pkg/sentry/fs/proc/meminfo.go +++ b/pkg/sentry/fs/proc/meminfo.go @@ -25,6 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usermem" ) +// LINT.IfChange + // meminfoData backs /proc/meminfo. // // +stateify savable @@ -83,3 +85,5 @@ func (d *meminfoData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) fmt.Fprintf(&buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024) return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*meminfoData)(nil)}}, 0 } + +// LINT.ThenChange(../../fsimpl/proc/tasks_files.go) diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go index e33c4a460..5aedae799 100644 --- a/pkg/sentry/fs/proc/mounts.go +++ b/pkg/sentry/fs/proc/mounts.go @@ -25,6 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ) +// LINT.IfChange + // forEachMountSource runs f for the process root mount and each mount that is a // descendant of the root. func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) { @@ -195,3 +197,5 @@ func (mf *mountsFile) ReadSeqFileData(ctx context.Context, handle seqfile.SeqHan return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*mountsFile)(nil)}}, 0 } + +// LINT.ThenChange(../../fsimpl/proc/tasks_files.go) diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 402919924..3f17e98ea 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -38,6 +38,8 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/header" ) +// LINT.IfChange + // newNet creates a new proc net entry. func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSource) *fs.Inode { var contents map[string]*fs.Inode @@ -831,3 +833,5 @@ func (n *netUDP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se } return data, 0 } + +// LINT.ThenChange(../../fsimpl/proc/tasks_net.go) diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index 56e92721e..da9341e4e 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -31,6 +31,8 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) +// LINT.IfChange + // proc is a root proc node. // // +stateify savable @@ -249,3 +251,5 @@ func (rpf *rootProcFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dent } return offset, nil } + +// LINT.ThenChange(../../fsimpl/proc/tasks.go) diff --git a/pkg/sentry/fs/proc/stat.go b/pkg/sentry/fs/proc/stat.go index b641effbb..bc5b2bc7b 100644 --- a/pkg/sentry/fs/proc/stat.go +++ b/pkg/sentry/fs/proc/stat.go @@ -24,6 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ) +// LINT.IfChange + // statData backs /proc/stat. // // +stateify savable @@ -140,3 +142,5 @@ func (s *statData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([] }, }, 0 } + +// LINT.ThenChange(../../fsimpl/proc/task_files.go) diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go index cd37776c8..1062bd852 100644 --- a/pkg/sentry/fs/proc/sys.go +++ b/pkg/sentry/fs/proc/sys.go @@ -31,6 +31,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + // mmapMinAddrData backs /proc/sys/vm/mmap_min_addr. // // +stateify savable @@ -160,3 +162,5 @@ func (hf *hostnameFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequ } var _ fs.FileOperations = (*hostnameFile)(nil) + +// LINT.ThenChange(../../fsimpl/proc/tasks_sys.go) diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index a37e1fa06..b9e8ef35f 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -30,6 +30,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + type tcpMemDir int const ( @@ -364,3 +366,5 @@ func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } + +// LINT.ThenChange(../../fsimpl/proc/tasks_sys.go) diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 9bf4b4527..7358d6ef9 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -37,6 +37,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + // getTaskMM returns t's MemoryManager. If getTaskMM succeeds, the MemoryManager's // users count is incremented, and must be decremented by the caller when it is // no longer in use. @@ -800,3 +802,5 @@ func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequenc n, err := dst.CopyOut(ctx, buf[offset:]) return int64(n), err } + +// LINT.ThenChange(../../fsimpl/proc/task.go|../../fsimpl/proc/task_files.go) diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go index eea37d15c..3eacc9265 100644 --- a/pkg/sentry/fs/proc/uid_gid_map.go +++ b/pkg/sentry/fs/proc/uid_gid_map.go @@ -30,6 +30,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + // idMapInodeOperations implements fs.InodeOperations for // /proc/[pid]/{uid,gid}_map. // @@ -177,3 +179,5 @@ func (imfo *idMapFileOperations) Write(ctx context.Context, file *fs.File, src u // count, even if fewer bytes were used. return int64(srclen), nil } + +// LINT.ThenChange(../../fsimpl/proc/task_files.go) diff --git a/pkg/sentry/fs/proc/uptime.go b/pkg/sentry/fs/proc/uptime.go index 4e903917a..adfe58adb 100644 --- a/pkg/sentry/fs/proc/uptime.go +++ b/pkg/sentry/fs/proc/uptime.go @@ -28,6 +28,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + // uptime is a file containing the system uptime. // // +stateify savable @@ -85,3 +87,5 @@ func (f *uptimeFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequenc n, err := dst.CopyOut(ctx, s[offset:]) return int64(n), err } + +// LINT.ThenChange(../../fsimpl/proc/tasks_files.go) diff --git a/pkg/sentry/fs/proc/version.go b/pkg/sentry/fs/proc/version.go index a6d2c3cd3..27fd5b1cb 100644 --- a/pkg/sentry/fs/proc/version.go +++ b/pkg/sentry/fs/proc/version.go @@ -22,6 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ) +// LINT.IfChange + // versionData backs /proc/version. // // +stateify savable @@ -76,3 +78,5 @@ func (v *versionData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) }, }, 0 } + +// LINT.ThenChange(../../fsimpl/proc/task_files.go) -- cgit v1.2.3 From 7a45ae7e67438697296fc12345202e3c76304096 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 16 Jan 2020 18:13:27 -0800 Subject: Implement setxattr for overlays. PiperOrigin-RevId: 290186303 --- pkg/sentry/fs/inode.go | 4 ++-- pkg/sentry/fs/inode_overlay.go | 13 ++++++++++--- pkg/sentry/syscalls/linux/sys_xattr.go | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index ee9d301ef..e4cf5a570 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -270,9 +270,9 @@ func (i *Inode) GetXattr(ctx context.Context, name string, size uint64) (string, } // SetXattr calls i.InodeOperations.SetXattr with i as the Inode. -func (i *Inode) SetXattr(ctx context.Context, name, value string, flags uint32) error { +func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, flags uint32) error { if i.overlay != nil { - return overlaySetxattr(ctx, i.overlay, name, value, flags) + return overlaySetxattr(ctx, i.overlay, d, name, value, flags) } return i.InodeOperations.SetXattr(ctx, i, name, value, flags) } diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index b90da20d0..c477de837 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -552,9 +552,16 @@ func overlayGetXattr(ctx context.Context, o *overlayEntry, name string, size uin return s, err } -// TODO(b/146028302): Support setxattr for overlayfs. -func overlaySetxattr(ctx context.Context, o *overlayEntry, name, value string, flags uint32) error { - return syserror.EOPNOTSUPP +func overlaySetxattr(ctx context.Context, o *overlayEntry, d *Dirent, name, value string, flags uint32) error { + // Don't allow changes to overlay xattrs through a setxattr syscall. + if strings.HasPrefix(XattrOverlayPrefix, name) { + return syserror.EPERM + } + + if err := copyUp(ctx, d); err != nil { + return err + } + return o.upper.SetXattr(ctx, d, name, value, flags) } func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{}, error) { diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index 816352218..23d20da6f 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -142,7 +142,7 @@ func setXattr(t *kernel.Task, d *fs.Dirent, dirPath bool, nameAddr, valueAddr us return syserror.EOPNOTSUPP } - return d.Inode.SetXattr(t, name, value, flags) + return d.Inode.SetXattr(t, d, name, value, flags) } func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { -- cgit v1.2.3 From 19b4653147c8ec1cd2cf6e2a8f9bfc7865a5f850 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Thu, 16 Jan 2020 20:19:40 -0800 Subject: Remove unused rpcinet. PiperOrigin-RevId: 290198756 --- pkg/sentry/fs/proc/BUILD | 2 - pkg/sentry/fs/proc/proc.go | 9 +- pkg/sentry/fs/proc/rpcinet_proc.go | 217 ------ pkg/sentry/fs/proc/sys.go | 9 +- pkg/sentry/socket/rpcinet/BUILD | 69 -- pkg/sentry/socket/rpcinet/conn/BUILD | 18 - pkg/sentry/socket/rpcinet/conn/conn.go | 187 ----- pkg/sentry/socket/rpcinet/device.go | 19 - pkg/sentry/socket/rpcinet/notifier/BUILD | 17 - pkg/sentry/socket/rpcinet/notifier/notifier.go | 231 ------- pkg/sentry/socket/rpcinet/rpcinet.go | 16 - pkg/sentry/socket/rpcinet/socket.go | 909 ------------------------- pkg/sentry/socket/rpcinet/stack.go | 177 ----- pkg/sentry/socket/rpcinet/stack_unsafe.go | 193 ------ pkg/sentry/socket/rpcinet/syscall_rpc.proto | 352 ---------- 15 files changed, 2 insertions(+), 2423 deletions(-) delete mode 100644 pkg/sentry/fs/proc/rpcinet_proc.go delete mode 100644 pkg/sentry/socket/rpcinet/BUILD delete mode 100644 pkg/sentry/socket/rpcinet/conn/BUILD delete mode 100644 pkg/sentry/socket/rpcinet/conn/conn.go delete mode 100644 pkg/sentry/socket/rpcinet/device.go delete mode 100644 pkg/sentry/socket/rpcinet/notifier/BUILD delete mode 100644 pkg/sentry/socket/rpcinet/notifier/notifier.go delete mode 100644 pkg/sentry/socket/rpcinet/rpcinet.go delete mode 100644 pkg/sentry/socket/rpcinet/socket.go delete mode 100644 pkg/sentry/socket/rpcinet/stack.go delete mode 100644 pkg/sentry/socket/rpcinet/stack_unsafe.go delete mode 100644 pkg/sentry/socket/rpcinet/syscall_rpc.proto (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index 94d46ab1b..cb37c6c6b 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -18,7 +18,6 @@ go_library( "mounts.go", "net.go", "proc.go", - "rpcinet_proc.go", "stat.go", "sys.go", "sys_net.go", @@ -46,7 +45,6 @@ go_library( "//pkg/sentry/limits", "//pkg/sentry/mm", "//pkg/sentry/socket", - "//pkg/sentry/socket/rpcinet", "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index da9341e4e..29867dc3a 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -27,7 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet" "gvisor.dev/gvisor/pkg/syserror" ) @@ -87,15 +86,9 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string } // Add more contents that need proc to be initialized. + p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc)) p.AddChild(ctx, "sys", p.newSysDir(ctx, msrc)) - // If we're using rpcinet we will let it manage /proc/net. - if _, ok := p.k.NetworkStack().(*rpcinet.Stack); ok { - p.AddChild(ctx, "net", newRPCInetProcNet(ctx, msrc)) - } else { - p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc)) - } - return newProcInode(ctx, p, msrc, fs.SpecialDirectory, nil), nil } diff --git a/pkg/sentry/fs/proc/rpcinet_proc.go b/pkg/sentry/fs/proc/rpcinet_proc.go deleted file mode 100644 index 01ac97530..000000000 --- a/pkg/sentry/fs/proc/rpcinet_proc.go +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "io" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/waiter" -) - -// rpcInetInode implements fs.InodeOperations. -type rpcInetInode struct { - fsutil.SimpleFileInode - - // filepath is the full path of this rpcInetInode. - filepath string - - k *kernel.Kernel -} - -func newRPCInetInode(ctx context.Context, msrc *fs.MountSource, filepath string, mode linux.FileMode) *fs.Inode { - f := &rpcInetInode{ - SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(mode), linux.PROC_SUPER_MAGIC), - filepath: filepath, - k: kernel.KernelFromContext(ctx), - } - return newProcInode(ctx, f, msrc, fs.SpecialFile, nil) -} - -// GetFile implements fs.InodeOperations.GetFile. -func (i *rpcInetInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { - flags.Pread = true - flags.Pwrite = true - fops := &rpcInetFile{ - inode: i, - } - return fs.NewFile(ctx, dirent, flags, fops), nil -} - -// rpcInetFile implements fs.FileOperations as RPCs. -type rpcInetFile struct { - fsutil.FileGenericSeek `state:"nosave"` - fsutil.FileNoIoctl `state:"nosave"` - fsutil.FileNoMMap `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileNoopFsync `state:"nosave"` - fsutil.FileNoopRelease `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - waiter.AlwaysReady `state:"nosave"` - - inode *rpcInetInode -} - -// Read implements fs.FileOperations.Read. -// -// This method can panic if an rpcInetInode was created without an rpcinet -// stack. -func (f *rpcInetFile) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { - if offset < 0 { - return 0, syserror.EINVAL - } - s, ok := f.inode.k.NetworkStack().(*rpcinet.Stack) - if !ok { - panic("Network stack is not a rpcinet.") - } - - contents, se := s.RPCReadFile(f.inode.filepath) - if se != nil || offset >= int64(len(contents)) { - return 0, io.EOF - } - - n, err := dst.CopyOut(ctx, contents[offset:]) - return int64(n), err -} - -// Write implements fs.FileOperations.Write. -// -// This method can panic if an rpcInetInode was created without an rpcInet -// stack. -func (f *rpcInetFile) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { - s, ok := f.inode.k.NetworkStack().(*rpcinet.Stack) - if !ok { - panic("Network stack is not a rpcinet.") - } - - if src.NumBytes() == 0 { - return 0, nil - } - - b := make([]byte, src.NumBytes(), src.NumBytes()) - n, err := src.CopyIn(ctx, b) - if err != nil { - return int64(n), err - } - - written, se := s.RPCWriteFile(f.inode.filepath, b) - return int64(written), se.ToError() -} - -// newRPCInetProcNet will build an inode for /proc/net. -func newRPCInetProcNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - contents := map[string]*fs.Inode{ - "arp": newRPCInetInode(ctx, msrc, "/proc/net/arp", 0444), - "dev": newRPCInetInode(ctx, msrc, "/proc/net/dev", 0444), - "if_inet6": newRPCInetInode(ctx, msrc, "/proc/net/if_inet6", 0444), - "ipv6_route": newRPCInetInode(ctx, msrc, "/proc/net/ipv6_route", 0444), - "netlink": newRPCInetInode(ctx, msrc, "/proc/net/netlink", 0444), - "netstat": newRPCInetInode(ctx, msrc, "/proc/net/netstat", 0444), - "packet": newRPCInetInode(ctx, msrc, "/proc/net/packet", 0444), - "protocols": newRPCInetInode(ctx, msrc, "/proc/net/protocols", 0444), - "psched": newRPCInetInode(ctx, msrc, "/proc/net/psched", 0444), - "ptype": newRPCInetInode(ctx, msrc, "/proc/net/ptype", 0444), - "route": newRPCInetInode(ctx, msrc, "/proc/net/route", 0444), - "tcp": newRPCInetInode(ctx, msrc, "/proc/net/tcp", 0444), - "tcp6": newRPCInetInode(ctx, msrc, "/proc/net/tcp6", 0444), - "udp": newRPCInetInode(ctx, msrc, "/proc/net/udp", 0444), - "udp6": newRPCInetInode(ctx, msrc, "/proc/net/udp6", 0444), - } - - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) -} - -// newRPCInetProcSysNet will build an inode for /proc/sys/net. -func newRPCInetProcSysNet(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - contents := map[string]*fs.Inode{ - "ipv4": newRPCInetSysNetIPv4Dir(ctx, msrc), - "core": newRPCInetSysNetCore(ctx, msrc), - } - - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) -} - -// newRPCInetSysNetCore builds the /proc/sys/net/core directory. -func newRPCInetSysNetCore(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - contents := map[string]*fs.Inode{ - "default_qdisc": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/default_qdisc", 0444), - "message_burst": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/message_burst", 0444), - "message_cost": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/message_cost", 0444), - "optmem_max": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/optmem_max", 0444), - "rmem_default": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/rmem_default", 0444), - "rmem_max": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/rmem_max", 0444), - "somaxconn": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/somaxconn", 0444), - "wmem_default": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/wmem_default", 0444), - "wmem_max": newRPCInetInode(ctx, msrc, "/proc/sys/net/core/wmem_max", 0444), - } - - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) -} - -// newRPCInetSysNetIPv4Dir builds the /proc/sys/net/ipv4 directory. -func newRPCInetSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - contents := map[string]*fs.Inode{ - "ip_local_port_range": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_local_port_range", 0444), - "ip_local_reserved_ports": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_local_reserved_ports", 0444), - "ipfrag_time": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ipfrag_time", 0444), - "ip_nonlocal_bind": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_nonlocal_bind", 0444), - "ip_no_pmtu_disc": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/ip_no_pmtu_disc", 0444), - "tcp_allowed_congestion_control": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_allowed_congestion_control", 0444), - "tcp_available_congestion_control": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_available_congestion_control", 0444), - "tcp_base_mss": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_base_mss", 0444), - "tcp_congestion_control": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_congestion_control", 0644), - "tcp_dsack": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_dsack", 0644), - "tcp_early_retrans": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_early_retrans", 0644), - "tcp_fack": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fack", 0644), - "tcp_fastopen": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fastopen", 0644), - "tcp_fastopen_key": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fastopen_key", 0444), - "tcp_fin_timeout": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_fin_timeout", 0644), - "tcp_invalid_ratelimit": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_invalid_ratelimit", 0444), - "tcp_keepalive_intvl": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_intvl", 0644), - "tcp_keepalive_probes": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_probes", 0644), - "tcp_keepalive_time": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_keepalive_time", 0644), - "tcp_mem": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_mem", 0444), - "tcp_mtu_probing": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_mtu_probing", 0644), - "tcp_no_metrics_save": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_no_metrics_save", 0444), - "tcp_probe_interval": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_probe_interval", 0444), - "tcp_probe_threshold": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_probe_threshold", 0444), - "tcp_retries1": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_retries1", 0644), - "tcp_retries2": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_retries2", 0644), - "tcp_rfc1337": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_rfc1337", 0444), - "tcp_rmem": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_rmem", 0444), - "tcp_sack": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_sack", 0644), - "tcp_slow_start_after_idle": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_slow_start_after_idle", 0644), - "tcp_synack_retries": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_synack_retries", 0644), - "tcp_syn_retries": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_syn_retries", 0644), - "tcp_timestamps": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_timestamps", 0644), - "tcp_wmem": newRPCInetInode(ctx, msrc, "/proc/sys/net/ipv4/tcp_wmem", 0444), - } - - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) -} diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go index 1062bd852..2bdcf5f70 100644 --- a/pkg/sentry/fs/proc/sys.go +++ b/pkg/sentry/fs/proc/sys.go @@ -26,7 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet" "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -106,16 +105,10 @@ func (p *proc) newVMDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { func (p *proc) newSysDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { children := map[string]*fs.Inode{ "kernel": p.newKernelDir(ctx, msrc), + "net": p.newSysNetDir(ctx, msrc), "vm": p.newVMDir(ctx, msrc), } - // If we're using rpcinet we will let it manage /proc/sys/net. - if _, ok := p.k.NetworkStack().(*rpcinet.Stack); ok { - children["net"] = newRPCInetProcSysNet(ctx, msrc) - } else { - children["net"] = p.newSysNetDir(ctx, msrc) - } - d := ramfs.NewDir(ctx, children, fs.RootOwner, fs.FilePermsFromMode(0555)) return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } diff --git a/pkg/sentry/socket/rpcinet/BUILD b/pkg/sentry/socket/rpcinet/BUILD deleted file mode 100644 index 4668b87d1..000000000 --- a/pkg/sentry/socket/rpcinet/BUILD +++ /dev/null @@ -1,69 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") - -package(licenses = ["notice"]) - -go_library( - name = "rpcinet", - srcs = [ - "device.go", - "rpcinet.go", - "socket.go", - "stack.go", - "stack_unsafe.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet", - visibility = ["//pkg/sentry:internal"], - deps = [ - ":syscall_rpc_go_proto", - "//pkg/abi/linux", - "//pkg/binary", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/device", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/time", - "//pkg/sentry/socket", - "//pkg/sentry/socket/hostinet", - "//pkg/sentry/socket/rpcinet/conn", - "//pkg/sentry/socket/rpcinet/notifier", - "//pkg/sentry/unimpl", - "//pkg/sentry/usermem", - "//pkg/syserr", - "//pkg/syserror", - "//pkg/tcpip", - "//pkg/tcpip/buffer", - "//pkg/tcpip/stack", - "//pkg/unet", - "//pkg/waiter", - ], -) - -proto_library( - name = "syscall_rpc_proto", - srcs = ["syscall_rpc.proto"], - visibility = [ - "//visibility:public", - ], -) - -cc_proto_library( - name = "syscall_rpc_cc_proto", - visibility = [ - "//visibility:public", - ], - deps = [":syscall_rpc_proto"], -) - -go_proto_library( - name = "syscall_rpc_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto", - proto = ":syscall_rpc_proto", - visibility = [ - "//visibility:public", - ], -) diff --git a/pkg/sentry/socket/rpcinet/conn/BUILD b/pkg/sentry/socket/rpcinet/conn/BUILD deleted file mode 100644 index b2677c659..000000000 --- a/pkg/sentry/socket/rpcinet/conn/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "conn", - srcs = ["conn.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/binary", - "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", - "//pkg/sync", - "//pkg/syserr", - "//pkg/unet", - "@com_github_golang_protobuf//proto:go_default_library", - ], -) diff --git a/pkg/sentry/socket/rpcinet/conn/conn.go b/pkg/sentry/socket/rpcinet/conn/conn.go deleted file mode 100644 index 02f39c767..000000000 --- a/pkg/sentry/socket/rpcinet/conn/conn.go +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package conn is an RPC connection to a syscall RPC server. -package conn - -import ( - "fmt" - "sync/atomic" - "syscall" - - "github.com/golang/protobuf/proto" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/unet" - - pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" -) - -type request struct { - response []byte - ready chan struct{} - ignoreResult bool -} - -// RPCConnection represents a single RPC connection to a syscall gofer. -type RPCConnection struct { - // reqID is the ID of the last request and must be accessed atomically. - reqID uint64 - - sendMu sync.Mutex - socket *unet.Socket - - reqMu sync.Mutex - requests map[uint64]request -} - -// NewRPCConnection initializes a RPC connection to a socket gofer. -func NewRPCConnection(s *unet.Socket) *RPCConnection { - conn := &RPCConnection{socket: s, requests: map[uint64]request{}} - go func() { // S/R-FIXME(b/77962828) - var nums [16]byte - for { - for n := 0; n < len(nums); { - nn, err := conn.socket.Read(nums[n:]) - if err != nil { - panic(fmt.Sprint("error reading length from socket rpc gofer: ", err)) - } - n += nn - } - - b := make([]byte, binary.LittleEndian.Uint64(nums[:8])) - id := binary.LittleEndian.Uint64(nums[8:]) - - for n := 0; n < len(b); { - nn, err := conn.socket.Read(b[n:]) - if err != nil { - panic(fmt.Sprint("error reading request from socket rpc gofer: ", err)) - } - n += nn - } - - conn.reqMu.Lock() - r := conn.requests[id] - if r.ignoreResult { - delete(conn.requests, id) - } else { - r.response = b - conn.requests[id] = r - } - conn.reqMu.Unlock() - close(r.ready) - } - }() - return conn -} - -// NewRequest makes a request to the RPC gofer and returns the request ID and a -// channel which will be closed once the request completes. -func (c *RPCConnection) NewRequest(req pb.SyscallRequest, ignoreResult bool) (uint64, chan struct{}) { - b, err := proto.Marshal(&req) - if err != nil { - panic(fmt.Sprint("invalid proto: ", err)) - } - - id := atomic.AddUint64(&c.reqID, 1) - ch := make(chan struct{}) - - c.reqMu.Lock() - c.requests[id] = request{ready: ch, ignoreResult: ignoreResult} - c.reqMu.Unlock() - - c.sendMu.Lock() - defer c.sendMu.Unlock() - - var nums [16]byte - binary.LittleEndian.PutUint64(nums[:8], uint64(len(b))) - binary.LittleEndian.PutUint64(nums[8:], id) - for n := 0; n < len(nums); { - nn, err := c.socket.Write(nums[n:]) - if err != nil { - panic(fmt.Sprint("error writing length and ID to socket gofer: ", err)) - } - n += nn - } - - for n := 0; n < len(b); { - nn, err := c.socket.Write(b[n:]) - if err != nil { - panic(fmt.Sprint("error writing request to socket gofer: ", err)) - } - n += nn - } - - return id, ch -} - -// RPCReadFile will execute the ReadFile helper RPC method which avoids the -// common pattern of open(2), read(2), close(2) by doing all three operations -// as a single RPC. It will read the entire file or return EFBIG if the file -// was too large. -func (c *RPCConnection) RPCReadFile(path string) ([]byte, *syserr.Error) { - req := &pb.SyscallRequest_ReadFile{&pb.ReadFileRequest{ - Path: path, - }} - - id, ch := c.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-ch - - res := c.Request(id).Result.(*pb.SyscallResponse_ReadFile).ReadFile.Result - if e, ok := res.(*pb.ReadFileResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.ReadFileResponse_Data).Data, nil -} - -// RPCWriteFile will execute the WriteFile helper RPC method which avoids the -// common pattern of open(2), write(2), write(2), close(2) by doing all -// operations as a single RPC. -func (c *RPCConnection) RPCWriteFile(path string, data []byte) (int64, *syserr.Error) { - req := &pb.SyscallRequest_WriteFile{&pb.WriteFileRequest{ - Path: path, - Content: data, - }} - - id, ch := c.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-ch - - res := c.Request(id).Result.(*pb.SyscallResponse_WriteFile).WriteFile - if e := res.ErrorNumber; e != 0 { - return int64(res.Written), syserr.FromHost(syscall.Errno(e)) - } - - return int64(res.Written), nil -} - -// Request retrieves the request corresponding to the given request ID. -// -// The channel returned by NewRequest must have been closed before Request can -// be called. This will happen automatically, do not manually close the -// channel. -func (c *RPCConnection) Request(id uint64) pb.SyscallResponse { - c.reqMu.Lock() - r := c.requests[id] - delete(c.requests, id) - c.reqMu.Unlock() - - var resp pb.SyscallResponse - if err := proto.Unmarshal(r.response, &resp); err != nil { - panic(fmt.Sprint("invalid proto: ", err)) - } - - return resp -} diff --git a/pkg/sentry/socket/rpcinet/device.go b/pkg/sentry/socket/rpcinet/device.go deleted file mode 100644 index 8cfd5f6e5..000000000 --- a/pkg/sentry/socket/rpcinet/device.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rpcinet - -import "gvisor.dev/gvisor/pkg/sentry/device" - -var socketDevice = device.NewAnonDevice() diff --git a/pkg/sentry/socket/rpcinet/notifier/BUILD b/pkg/sentry/socket/rpcinet/notifier/BUILD deleted file mode 100644 index a5954f22b..000000000 --- a/pkg/sentry/socket/rpcinet/notifier/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "notifier", - srcs = ["notifier.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier", - visibility = ["//:sandbox"], - deps = [ - "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto", - "//pkg/sentry/socket/rpcinet/conn", - "//pkg/sync", - "//pkg/waiter", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier.go b/pkg/sentry/socket/rpcinet/notifier/notifier.go deleted file mode 100644 index 82b75d6dd..000000000 --- a/pkg/sentry/socket/rpcinet/notifier/notifier.go +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package notifier implements an FD notifier implementation over RPC. -package notifier - -import ( - "fmt" - "syscall" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" - pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/waiter" -) - -type fdInfo struct { - queue *waiter.Queue - waiting bool -} - -// Notifier holds all the state necessary to issue notifications when IO events -// occur in the observed FDs. -type Notifier struct { - // rpcConn is the connection that is used for sending RPCs. - rpcConn *conn.RPCConnection - - // epFD is the epoll file descriptor used to register for io - // notifications. - epFD uint32 - - // mu protects fdMap. - mu sync.Mutex - - // fdMap maps file descriptors to their notification queues and waiting - // status. - fdMap map[uint32]*fdInfo -} - -// NewRPCNotifier creates a new notifier object. -func NewRPCNotifier(cn *conn.RPCConnection) (*Notifier, error) { - id, c := cn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCreate1{&pb.EpollCreate1Request{}}}, false /* ignoreResult */) - <-c - - res := cn.Request(id).Result.(*pb.SyscallResponse_EpollCreate1).EpollCreate1.Result - if e, ok := res.(*pb.EpollCreate1Response_ErrorNumber); ok { - return nil, syscall.Errno(e.ErrorNumber) - } - - w := &Notifier{ - rpcConn: cn, - epFD: res.(*pb.EpollCreate1Response_Fd).Fd, - fdMap: make(map[uint32]*fdInfo), - } - - go w.waitAndNotify() // S/R-FIXME(b/77962828) - - return w, nil -} - -// waitFD waits on mask for fd. The fdMap mutex must be hold. -func (n *Notifier) waitFD(fd uint32, fi *fdInfo, mask waiter.EventMask) error { - if !fi.waiting && mask == 0 { - return nil - } - - e := pb.EpollEvent{ - Events: mask.ToLinux() | unix.EPOLLET, - Fd: fd, - } - - switch { - case !fi.waiting && mask != 0: - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCtl{&pb.EpollCtlRequest{Epfd: n.epFD, Op: syscall.EPOLL_CTL_ADD, Fd: fd, Event: &e}}}, false /* ignoreResult */) - <-c - - e := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_EpollCtl).EpollCtl.ErrorNumber - if e != 0 { - return syscall.Errno(e) - } - - fi.waiting = true - case fi.waiting && mask == 0: - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCtl{&pb.EpollCtlRequest{Epfd: n.epFD, Op: syscall.EPOLL_CTL_DEL, Fd: fd}}}, false /* ignoreResult */) - <-c - n.rpcConn.Request(id) - - fi.waiting = false - case fi.waiting && mask != 0: - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollCtl{&pb.EpollCtlRequest{Epfd: n.epFD, Op: syscall.EPOLL_CTL_MOD, Fd: fd, Event: &e}}}, false /* ignoreResult */) - <-c - - e := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_EpollCtl).EpollCtl.ErrorNumber - if e != 0 { - return syscall.Errno(e) - } - } - - return nil -} - -// addFD adds an FD to the list of FDs observed by n. -func (n *Notifier) addFD(fd uint32, queue *waiter.Queue) { - n.mu.Lock() - defer n.mu.Unlock() - - // Panic if we're already notifying on this FD. - if _, ok := n.fdMap[fd]; ok { - panic(fmt.Sprintf("File descriptor %d added twice", fd)) - } - - // We have nothing to wait for at the moment. Just add it to the map. - n.fdMap[fd] = &fdInfo{queue: queue} -} - -// updateFD updates the set of events the FD needs to be notified on. -func (n *Notifier) updateFD(fd uint32) error { - n.mu.Lock() - defer n.mu.Unlock() - - if fi, ok := n.fdMap[fd]; ok { - return n.waitFD(fd, fi, fi.queue.Events()) - } - - return nil -} - -// RemoveFD removes an FD from the list of FDs observed by n. -func (n *Notifier) removeFD(fd uint32) { - n.mu.Lock() - defer n.mu.Unlock() - - // Remove from map, then from epoll object. - n.waitFD(fd, n.fdMap[fd], 0) - delete(n.fdMap, fd) -} - -// hasFD returns true if the FD is in the list of observed FDs. -func (n *Notifier) hasFD(fd uint32) bool { - n.mu.Lock() - defer n.mu.Unlock() - - _, ok := n.fdMap[fd] - return ok -} - -// waitAndNotify loops waiting for io event notifications from the epoll -// object. Once notifications arrive, they are dispatched to the -// registered queue. -func (n *Notifier) waitAndNotify() error { - for { - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_EpollWait{&pb.EpollWaitRequest{Fd: n.epFD, NumEvents: 100, Msec: -1}}}, false /* ignoreResult */) - <-c - - res := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_EpollWait).EpollWait.Result - if e, ok := res.(*pb.EpollWaitResponse_ErrorNumber); ok { - err := syscall.Errno(e.ErrorNumber) - // NOTE(magi): I don't think epoll_wait can return EAGAIN but I'm being - // conseratively careful here since exiting the notification thread - // would be really bad. - if err == syscall.EINTR || err == syscall.EAGAIN { - continue - } - return err - } - - n.mu.Lock() - for _, e := range res.(*pb.EpollWaitResponse_Events).Events.Events { - if fi, ok := n.fdMap[e.Fd]; ok { - fi.queue.Notify(waiter.EventMaskFromLinux(e.Events)) - } - } - n.mu.Unlock() - } -} - -// AddFD adds an FD to the list of observed FDs. -func (n *Notifier) AddFD(fd uint32, queue *waiter.Queue) error { - n.addFD(fd, queue) - return nil -} - -// UpdateFD updates the set of events the FD needs to be notified on. -func (n *Notifier) UpdateFD(fd uint32) error { - return n.updateFD(fd) -} - -// RemoveFD removes an FD from the list of observed FDs. -func (n *Notifier) RemoveFD(fd uint32) { - n.removeFD(fd) -} - -// HasFD returns true if the FD is in the list of observed FDs. -// -// This should only be used by tests to assert that FDs are correctly -// registered. -func (n *Notifier) HasFD(fd uint32) bool { - return n.hasFD(fd) -} - -// NonBlockingPoll polls the given fd in non-blocking fashion. It is used just -// to query the FD's current state; this method will block on the RPC response -// although the syscall is non-blocking. -func (n *Notifier) NonBlockingPoll(fd uint32, mask waiter.EventMask) waiter.EventMask { - for { - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Poll{&pb.PollRequest{Fd: fd, Events: mask.ToLinux()}}}, false /* ignoreResult */) - <-c - - res := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_Poll).Poll.Result - if e, ok := res.(*pb.PollResponse_ErrorNumber); ok { - if syscall.Errno(e.ErrorNumber) == syscall.EINTR { - continue - } - return mask - } - - return waiter.EventMaskFromLinux(res.(*pb.PollResponse_Events).Events) - } -} diff --git a/pkg/sentry/socket/rpcinet/rpcinet.go b/pkg/sentry/socket/rpcinet/rpcinet.go deleted file mode 100644 index 5d4fd4dac..000000000 --- a/pkg/sentry/socket/rpcinet/rpcinet.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package rpcinet implements sockets using an RPC for each syscall. -package rpcinet diff --git a/pkg/sentry/socket/rpcinet/socket.go b/pkg/sentry/socket/rpcinet/socket.go deleted file mode 100644 index ddb76d9d4..000000000 --- a/pkg/sentry/socket/rpcinet/socket.go +++ /dev/null @@ -1,909 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rpcinet - -import ( - "sync/atomic" - "syscall" - "time" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/kernel" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier" - pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" - "gvisor.dev/gvisor/pkg/sentry/unimpl" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/buffer" - "gvisor.dev/gvisor/pkg/waiter" -) - -// socketOperations implements fs.FileOperations and socket.Socket for a socket -// implemented using a host socket. -type socketOperations struct { - fsutil.FilePipeSeek `state:"nosave"` - fsutil.FileNotDirReaddir `state:"nosave"` - fsutil.FileNoFsync `state:"nosave"` - fsutil.FileNoMMap `state:"nosave"` - fsutil.FileNoSplice `state:"nosave"` - fsutil.FileNoopFlush `state:"nosave"` - fsutil.FileUseInodeUnstableAttr `state:"nosave"` - socket.SendReceiveTimeout - - family int // Read-only. - stype linux.SockType // Read-only. - protocol int // Read-only. - - fd uint32 // must be O_NONBLOCK - wq *waiter.Queue - rpcConn *conn.RPCConnection - notifier *notifier.Notifier - - // shState is the state of the connection with respect to shutdown. Because - // we're mixing non-blocking semantics on the other side we have to adapt for - // some strange differences between blocking and non-blocking sockets. - shState int32 -} - -// Verify that we actually implement socket.Socket. -var _ = socket.Socket(&socketOperations{}) - -// New creates a new RPC socket. -func newSocketFile(ctx context.Context, stack *Stack, family int, skType linux.SockType, protocol int) (*fs.File, *syserr.Error) { - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Socket{&pb.SocketRequest{Family: int64(family), Type: int64(skType | syscall.SOCK_NONBLOCK), Protocol: int64(protocol)}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Socket).Socket.Result - if e, ok := res.(*pb.SocketResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - fd := res.(*pb.SocketResponse_Fd).Fd - - var wq waiter.Queue - stack.notifier.AddFD(fd, &wq) - - dirent := socket.NewDirent(ctx, socketDevice) - defer dirent.DecRef() - return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &socketOperations{ - family: family, - stype: skType, - protocol: protocol, - wq: &wq, - fd: fd, - rpcConn: stack.rpcConn, - notifier: stack.notifier, - }), nil -} - -func isBlockingErrno(err error) bool { - return err == syscall.EAGAIN || err == syscall.EWOULDBLOCK -} - -func translateIOSyscallError(err error) error { - if isBlockingErrno(err) { - return syserror.ErrWouldBlock - } - return err -} - -// setShutdownFlags will set the shutdown flag so we can handle blocking reads -// after a read shutdown. -func (s *socketOperations) setShutdownFlags(how int) { - var f tcpip.ShutdownFlags - switch how { - case linux.SHUT_RD: - f = tcpip.ShutdownRead - case linux.SHUT_WR: - f = tcpip.ShutdownWrite - case linux.SHUT_RDWR: - f = tcpip.ShutdownWrite | tcpip.ShutdownRead - } - - // Atomically update the flags. - for { - old := atomic.LoadInt32(&s.shState) - if atomic.CompareAndSwapInt32(&s.shState, old, old|int32(f)) { - break - } - } -} - -func (s *socketOperations) resetShutdownFlags() { - atomic.StoreInt32(&s.shState, 0) -} - -func (s *socketOperations) isShutRdSet() bool { - return atomic.LoadInt32(&s.shState)&int32(tcpip.ShutdownRead) != 0 -} - -func (s *socketOperations) isShutWrSet() bool { - return atomic.LoadInt32(&s.shState)&int32(tcpip.ShutdownWrite) != 0 -} - -// Release implements fs.FileOperations.Release. -func (s *socketOperations) Release() { - s.notifier.RemoveFD(s.fd) - - // We always need to close the FD. - _, _ = s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Close{&pb.CloseRequest{Fd: s.fd}}}, true /* ignoreResult */) -} - -// Readiness implements waiter.Waitable.Readiness. -func (s *socketOperations) Readiness(mask waiter.EventMask) waiter.EventMask { - return s.notifier.NonBlockingPoll(s.fd, mask) -} - -// EventRegister implements waiter.Waitable.EventRegister. -func (s *socketOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { - s.wq.EventRegister(e, mask) - s.notifier.UpdateFD(s.fd) -} - -// EventUnregister implements waiter.Waitable.EventUnregister. -func (s *socketOperations) EventUnregister(e *waiter.Entry) { - s.wq.EventUnregister(e) - s.notifier.UpdateFD(s.fd) -} - -func rpcRead(t *kernel.Task, req *pb.SyscallRequest_Read) (*pb.ReadResponse_Data, *syserr.Error) { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Read).Read.Result - if e, ok := res.(*pb.ReadResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.ReadResponse_Data), nil -} - -// Read implements fs.FileOperations.Read. -func (s *socketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) { - req := &pb.SyscallRequest_Read{&pb.ReadRequest{ - Fd: s.fd, - Length: uint32(dst.NumBytes()), - }} - - res, se := rpcRead(ctx.(*kernel.Task), req) - if se == nil { - n, e := dst.CopyOut(ctx, res.Data) - return int64(n), e - } - - return 0, se.ToError() -} - -func rpcWrite(t *kernel.Task, req *pb.SyscallRequest_Write) (uint32, *syserr.Error) { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Write).Write.Result - if e, ok := res.(*pb.WriteResponse_ErrorNumber); ok { - return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.WriteResponse_Length).Length, nil -} - -// Write implements fs.FileOperations.Write. -func (s *socketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) { - t := ctx.(*kernel.Task) - v := buffer.NewView(int(src.NumBytes())) - - // Copy all the data into the buffer. - if _, err := src.CopyIn(t, v); err != nil { - return 0, err - } - - n, err := rpcWrite(t, &pb.SyscallRequest_Write{&pb.WriteRequest{Fd: s.fd, Data: v}}) - if n > 0 && n < uint32(src.NumBytes()) { - // The FileOperations.Write interface expects us to return ErrWouldBlock in - // the event of a partial write. - return int64(n), syserror.ErrWouldBlock - } - return int64(n), err.ToError() -} - -func rpcConnect(t *kernel.Task, fd uint32, sockaddr []byte) *syserr.Error { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Connect{&pb.ConnectRequest{Fd: uint32(fd), Address: sockaddr}}}, false /* ignoreResult */) - <-c - - if e := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Connect).Connect.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -// Connect implements socket.Socket.Connect. -func (s *socketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error { - if !blocking { - e := rpcConnect(t, s.fd, sockaddr) - if e == nil { - // Reset the shutdown state on new connects. - s.resetShutdownFlags() - } - return e - } - - // Register for notification when the endpoint becomes writable, then - // initiate the connection. - e, ch := waiter.NewChannelEntry(nil) - s.EventRegister(&e, waiter.EventOut|waiter.EventIn|waiter.EventHUp) - defer s.EventUnregister(&e) - for { - if err := rpcConnect(t, s.fd, sockaddr); err == nil || err != syserr.ErrInProgress && err != syserr.ErrAlreadyInProgress { - if err == nil { - // Reset the shutdown state on new connects. - s.resetShutdownFlags() - } - return err - } - - // It's pending, so we have to wait for a notification, and fetch the - // result once the wait completes. - if err := t.Block(ch); err != nil { - return syserr.FromError(err) - } - } -} - -func rpcAccept(t *kernel.Task, fd uint32, peer bool) (*pb.AcceptResponse_ResultPayload, *syserr.Error) { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Accept{&pb.AcceptRequest{Fd: fd, Peer: peer, Flags: syscall.SOCK_NONBLOCK}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Accept).Accept.Result - if e, ok := res.(*pb.AcceptResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - return res.(*pb.AcceptResponse_Payload).Payload, nil -} - -// Accept implements socket.Socket.Accept. -func (s *socketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) { - payload, se := rpcAccept(t, s.fd, peerRequested) - - // Check if we need to block. - if blocking && se == syserr.ErrTryAgain { - // Register for notifications. - e, ch := waiter.NewChannelEntry(nil) - // FIXME(b/119878986): This waiter.EventHUp is a partial - // measure, need to figure out how to translate linux events to - // internal events. - s.EventRegister(&e, waiter.EventIn|waiter.EventHUp) - defer s.EventUnregister(&e) - - // Try to accept the connection again; if it fails, then wait until we - // get a notification. - for { - if payload, se = rpcAccept(t, s.fd, peerRequested); se != syserr.ErrTryAgain { - break - } - - if err := t.Block(ch); err != nil { - return 0, nil, 0, syserr.FromError(err) - } - } - } - - // Handle any error from accept. - if se != nil { - return 0, nil, 0, se - } - - var wq waiter.Queue - s.notifier.AddFD(payload.Fd, &wq) - - dirent := socket.NewDirent(t, socketDevice) - defer dirent.DecRef() - fileFlags := fs.FileFlags{ - Read: true, - Write: true, - NonSeekable: true, - NonBlocking: flags&linux.SOCK_NONBLOCK != 0, - } - file := fs.NewFile(t, dirent, fileFlags, &socketOperations{ - family: s.family, - stype: s.stype, - protocol: s.protocol, - wq: &wq, - fd: payload.Fd, - rpcConn: s.rpcConn, - notifier: s.notifier, - }) - defer file.DecRef() - - fd, err := t.NewFDFrom(0, file, kernel.FDFlags{ - CloseOnExec: flags&linux.SOCK_CLOEXEC != 0, - }) - if err != nil { - return 0, nil, 0, syserr.FromError(err) - } - t.Kernel().RecordSocket(file) - - if peerRequested { - return fd, socket.UnmarshalSockAddr(s.family, payload.Address.Address), payload.Address.Length, nil - } - - return fd, nil, 0, nil -} - -// Bind implements socket.Socket.Bind. -func (s *socketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Bind{&pb.BindRequest{Fd: s.fd, Address: sockaddr}}}, false /* ignoreResult */) - <-c - - if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Bind).Bind.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -// Listen implements socket.Socket.Listen. -func (s *socketOperations) Listen(t *kernel.Task, backlog int) *syserr.Error { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Listen{&pb.ListenRequest{Fd: s.fd, Backlog: int64(backlog)}}}, false /* ignoreResult */) - <-c - - if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Listen).Listen.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -// Shutdown implements socket.Socket.Shutdown. -func (s *socketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error { - // We save the shutdown state because of strange differences on linux - // related to recvs on blocking vs. non-blocking sockets after a SHUT_RD. - // We need to emulate that behavior on the blocking side. - // TODO(b/120096741): There is a possible race that can exist with loopback, - // where data could possibly be lost. - s.setShutdownFlags(how) - - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Shutdown{&pb.ShutdownRequest{Fd: s.fd, How: int64(how)}}}, false /* ignoreResult */) - <-c - - if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Shutdown).Shutdown.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - - return nil -} - -// GetSockOpt implements socket.Socket.GetSockOpt. -func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) { - // SO_RCVTIMEO and SO_SNDTIMEO are special because blocking is performed - // within the sentry. - if level == linux.SOL_SOCKET && name == linux.SO_RCVTIMEO { - if outLen < linux.SizeOfTimeval { - return nil, syserr.ErrInvalidArgument - } - - return linux.NsecToTimeval(s.RecvTimeout()), nil - } - if level == linux.SOL_SOCKET && name == linux.SO_SNDTIMEO { - if outLen < linux.SizeOfTimeval { - return nil, syserr.ErrInvalidArgument - } - - return linux.NsecToTimeval(s.SendTimeout()), nil - } - - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_GetSockOpt{&pb.GetSockOptRequest{Fd: s.fd, Level: int64(level), Name: int64(name), Length: uint32(outLen)}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_GetSockOpt).GetSockOpt.Result - if e, ok := res.(*pb.GetSockOptResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.GetSockOptResponse_Opt).Opt, nil -} - -// SetSockOpt implements socket.Socket.SetSockOpt. -func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error { - // Because blocking actually happens within the sentry we need to inspect - // this socket option to determine if it's a SO_RCVTIMEO or SO_SNDTIMEO, - // and if so, we will save it and use it as the deadline for recv(2) - // or send(2) related syscalls. - if level == linux.SOL_SOCKET && name == linux.SO_RCVTIMEO { - if len(opt) < linux.SizeOfTimeval { - return syserr.ErrInvalidArgument - } - - var v linux.Timeval - binary.Unmarshal(opt[:linux.SizeOfTimeval], usermem.ByteOrder, &v) - if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { - return syserr.ErrDomain - } - s.SetRecvTimeout(v.ToNsecCapped()) - return nil - } - if level == linux.SOL_SOCKET && name == linux.SO_SNDTIMEO { - if len(opt) < linux.SizeOfTimeval { - return syserr.ErrInvalidArgument - } - - var v linux.Timeval - binary.Unmarshal(opt[:linux.SizeOfTimeval], usermem.ByteOrder, &v) - if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) { - return syserr.ErrDomain - } - s.SetSendTimeout(v.ToNsecCapped()) - return nil - } - - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_SetSockOpt{&pb.SetSockOptRequest{Fd: s.fd, Level: int64(level), Name: int64(name), Opt: opt}}}, false /* ignoreResult */) - <-c - - if e := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_SetSockOpt).SetSockOpt.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -// GetPeerName implements socket.Socket.GetPeerName. -func (s *socketOperations) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_GetPeerName{&pb.GetPeerNameRequest{Fd: s.fd}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_GetPeerName).GetPeerName.Result - if e, ok := res.(*pb.GetPeerNameResponse_ErrorNumber); ok { - return nil, 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - addr := res.(*pb.GetPeerNameResponse_Address).Address - return socket.UnmarshalSockAddr(s.family, addr.Address), addr.Length, nil -} - -// GetSockName implements socket.Socket.GetSockName. -func (s *socketOperations) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) { - stack := t.NetworkContext().(*Stack) - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_GetSockName{&pb.GetSockNameRequest{Fd: s.fd}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_GetSockName).GetSockName.Result - if e, ok := res.(*pb.GetSockNameResponse_ErrorNumber); ok { - return nil, 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - addr := res.(*pb.GetSockNameResponse_Address).Address - return socket.UnmarshalSockAddr(s.family, addr.Address), addr.Length, nil -} - -func rpcIoctl(t *kernel.Task, fd, cmd uint32, arg []byte) ([]byte, error) { - stack := t.NetworkContext().(*Stack) - - id, c := stack.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Ioctl{&pb.IOCtlRequest{Fd: fd, Cmd: cmd, Arg: arg}}}, false /* ignoreResult */) - <-c - - res := stack.rpcConn.Request(id).Result.(*pb.SyscallResponse_Ioctl).Ioctl.Result - if e, ok := res.(*pb.IOCtlResponse_ErrorNumber); ok { - return nil, syscall.Errno(e.ErrorNumber) - } - - return res.(*pb.IOCtlResponse_Value).Value, nil -} - -// ifconfIoctlFromStack populates a struct ifconf for the SIOCGIFCONF ioctl. -func ifconfIoctlFromStack(ctx context.Context, io usermem.IO, ifc *linux.IFConf) error { - // If Ptr is NULL, return the necessary buffer size via Len. - // Otherwise, write up to Len bytes starting at Ptr containing ifreq - // structs. - t := ctx.(*kernel.Task) - s := t.NetworkContext().(*Stack) - if s == nil { - return syserr.ErrNoDevice.ToError() - } - - if ifc.Ptr == 0 { - ifc.Len = int32(len(s.Interfaces())) * int32(linux.SizeOfIFReq) - return nil - } - - max := ifc.Len - ifc.Len = 0 - for key, ifaceAddrs := range s.InterfaceAddrs() { - iface := s.Interfaces()[key] - for _, ifaceAddr := range ifaceAddrs { - // Don't write past the end of the buffer. - if ifc.Len+int32(linux.SizeOfIFReq) > max { - break - } - if ifaceAddr.Family != linux.AF_INET { - continue - } - - // Populate ifr.ifr_addr. - ifr := linux.IFReq{} - ifr.SetName(iface.Name) - usermem.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family)) - usermem.ByteOrder.PutUint16(ifr.Data[2:4], 0) - copy(ifr.Data[4:8], ifaceAddr.Addr[:4]) - - // Copy the ifr to userspace. - dst := uintptr(ifc.Ptr) + uintptr(ifc.Len) - ifc.Len += int32(linux.SizeOfIFReq) - if _, err := usermem.CopyObjectOut(ctx, io, usermem.Addr(dst), ifr, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return err - } - } - } - return nil -} - -// Ioctl implements fs.FileOperations.Ioctl. -func (s *socketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { - t := ctx.(*kernel.Task) - - cmd := uint32(args[1].Int()) - arg := args[2].Pointer() - - var buf []byte - switch cmd { - // The following ioctls take 4 byte argument parameters. - case syscall.TIOCINQ, - syscall.TIOCOUTQ: - buf = make([]byte, 4) - // The following ioctls have args which are sizeof(struct ifreq). - case syscall.SIOCGIFADDR, - syscall.SIOCGIFBRDADDR, - syscall.SIOCGIFDSTADDR, - syscall.SIOCGIFFLAGS, - syscall.SIOCGIFHWADDR, - syscall.SIOCGIFINDEX, - syscall.SIOCGIFMAP, - syscall.SIOCGIFMETRIC, - syscall.SIOCGIFMTU, - syscall.SIOCGIFNAME, - syscall.SIOCGIFNETMASK, - syscall.SIOCGIFTXQLEN: - buf = make([]byte, linux.SizeOfIFReq) - case syscall.SIOCGIFCONF: - // SIOCGIFCONF has slightly different behavior than the others, in that it - // will need to populate the array of ifreqs. - var ifc linux.IFConf - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifc, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { - return 0, err - } - - if err := ifconfIoctlFromStack(ctx, io, &ifc); err != nil { - return 0, err - } - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), ifc, usermem.IOOpts{ - AddressSpaceActive: true, - }) - - return 0, err - - case linux.SIOCGIFMEM, linux.SIOCGIFPFLAGS, linux.SIOCGMIIPHY, linux.SIOCGMIIREG: - unimpl.EmitUnimplementedEvent(ctx) - - default: - return 0, syserror.ENOTTY - } - - _, err := io.CopyIn(ctx, arg, buf, usermem.IOOpts{ - AddressSpaceActive: true, - }) - - if err != nil { - return 0, err - } - - v, err := rpcIoctl(t, s.fd, cmd, buf) - if err != nil { - return 0, err - } - - if len(v) != len(buf) { - return 0, syserror.EINVAL - } - - _, err = io.CopyOut(ctx, arg, v, usermem.IOOpts{ - AddressSpaceActive: true, - }) - return 0, err -} - -func rpcRecvMsg(t *kernel.Task, req *pb.SyscallRequest_Recvmsg) (*pb.RecvmsgResponse_ResultPayload, *syserr.Error) { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Recvmsg).Recvmsg.Result - if e, ok := res.(*pb.RecvmsgResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.RecvmsgResponse_Payload).Payload, nil -} - -// Because we only support SO_TIMESTAMP we will search control messages for -// that value and set it if so, all other control messages will be ignored. -func (s *socketOperations) extractControlMessages(payload *pb.RecvmsgResponse_ResultPayload) socket.ControlMessages { - c := socket.ControlMessages{} - if len(payload.GetCmsgData()) > 0 { - // Parse the control messages looking for SO_TIMESTAMP. - msgs, e := syscall.ParseSocketControlMessage(payload.GetCmsgData()) - if e != nil { - return socket.ControlMessages{} - } - for _, m := range msgs { - if m.Header.Level != linux.SOL_SOCKET || m.Header.Type != linux.SO_TIMESTAMP { - continue - } - - // Let's parse the time stamp and set it. - if len(m.Data) < linux.SizeOfTimeval { - // Give up on locating the SO_TIMESTAMP option. - return socket.ControlMessages{} - } - - var v linux.Timeval - binary.Unmarshal(m.Data[:linux.SizeOfTimeval], usermem.ByteOrder, &v) - c.IP.HasTimestamp = true - c.IP.Timestamp = v.ToNsecCapped() - break - } - } - return c -} - -// RecvMsg implements socket.Socket.RecvMsg. -func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { - req := &pb.SyscallRequest_Recvmsg{&pb.RecvmsgRequest{ - Fd: s.fd, - Length: uint32(dst.NumBytes()), - Sender: senderRequested, - Trunc: flags&linux.MSG_TRUNC != 0, - Peek: flags&linux.MSG_PEEK != 0, - CmsgLength: uint32(controlDataLen), - }} - - res, err := rpcRecvMsg(t, req) - if err == nil { - var e error - var n int - if len(res.Data) > 0 { - n, e = dst.CopyOut(t, res.Data) - if e == nil && n != len(res.Data) { - panic("CopyOut failed to copy full buffer") - } - } - c := s.extractControlMessages(res) - return int(res.Length), 0, socket.UnmarshalSockAddr(s.family, res.Address.GetAddress()), res.Address.GetLength(), c, syserr.FromError(e) - } - if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain || flags&linux.MSG_DONTWAIT != 0 { - return 0, 0, nil, 0, socket.ControlMessages{}, err - } - - // We'll have to block. Register for notifications and keep trying to - // send all the data. - e, ch := waiter.NewChannelEntry(nil) - s.EventRegister(&e, waiter.EventIn) - defer s.EventUnregister(&e) - - for { - res, err := rpcRecvMsg(t, req) - if err == nil { - var e error - var n int - if len(res.Data) > 0 { - n, e = dst.CopyOut(t, res.Data) - if e == nil && n != len(res.Data) { - panic("CopyOut failed to copy full buffer") - } - } - c := s.extractControlMessages(res) - return int(res.Length), 0, socket.UnmarshalSockAddr(s.family, res.Address.GetAddress()), res.Address.GetLength(), c, syserr.FromError(e) - } - if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain { - return 0, 0, nil, 0, socket.ControlMessages{}, err - } - - if s.isShutRdSet() { - // Blocking would have caused us to block indefinitely so we return 0, - // this is the same behavior as Linux. - return 0, 0, nil, 0, socket.ControlMessages{}, nil - } - - if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { - return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain - } - return 0, 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err) - } - } -} - -func rpcSendMsg(t *kernel.Task, req *pb.SyscallRequest_Sendmsg) (uint32, *syserr.Error) { - s := t.NetworkContext().(*Stack) - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Sendmsg).Sendmsg.Result - if e, ok := res.(*pb.SendmsgResponse_ErrorNumber); ok { - return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.SendmsgResponse_Length).Length, nil -} - -// SendMsg implements socket.Socket.SendMsg. -func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) { - // Whitelist flags. - if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 { - return 0, syserr.ErrInvalidArgument - } - - // Reject Unix control messages. - if !controlMessages.Unix.Empty() { - return 0, syserr.ErrInvalidArgument - } - - v := buffer.NewView(int(src.NumBytes())) - - // Copy all the data into the buffer. - if _, err := src.CopyIn(t, v); err != nil { - return 0, syserr.FromError(err) - } - - // TODO(bgeffon): this needs to change to map directly to a SendMsg syscall - // in the RPC. - totalWritten := 0 - n, err := rpcSendMsg(t, &pb.SyscallRequest_Sendmsg{&pb.SendmsgRequest{ - Fd: uint32(s.fd), - Data: v, - Address: to, - More: flags&linux.MSG_MORE != 0, - EndOfRecord: flags&linux.MSG_EOR != 0, - }}) - - if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain || flags&linux.MSG_DONTWAIT != 0 { - return int(n), err - } - - if n > 0 { - totalWritten += int(n) - v.TrimFront(int(n)) - } - - // We'll have to block. Register for notification and keep trying to - // send all the data. - e, ch := waiter.NewChannelEntry(nil) - s.EventRegister(&e, waiter.EventOut) - defer s.EventUnregister(&e) - - for { - n, err := rpcSendMsg(t, &pb.SyscallRequest_Sendmsg{&pb.SendmsgRequest{ - Fd: uint32(s.fd), - Data: v, - Address: to, - More: flags&linux.MSG_MORE != 0, - EndOfRecord: flags&linux.MSG_EOR != 0, - }}) - - if n > 0 { - totalWritten += int(n) - v.TrimFront(int(n)) - - if err == nil && totalWritten < int(src.NumBytes()) { - continue - } - } - - if err != syserr.ErrWouldBlock && err != syserr.ErrTryAgain { - // We eat the error in this situation. - return int(totalWritten), nil - } - - if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { - if err == syserror.ETIMEDOUT { - return int(totalWritten), syserr.ErrTryAgain - } - return int(totalWritten), syserr.FromError(err) - } - } -} - -// State implements socket.Socket.State. -func (s *socketOperations) State() uint32 { - // TODO(b/127845868): Define a new rpc to query the socket state. - return 0 -} - -// Type implements socket.Socket.Type. -func (s *socketOperations) Type() (family int, skType linux.SockType, protocol int) { - return s.family, s.stype, s.protocol -} - -type socketProvider struct { - family int -} - -// Socket implements socket.Provider.Socket. -func (p *socketProvider) Socket(t *kernel.Task, stypeflags linux.SockType, protocol int) (*fs.File, *syserr.Error) { - // Check that we are using the RPC network stack. - stack := t.NetworkContext() - if stack == nil { - return nil, nil - } - - s, ok := stack.(*Stack) - if !ok { - return nil, nil - } - - // Only accept TCP and UDP. - // - // Try to restrict the flags we will accept to minimize backwards - // incompatibility with netstack. - stype := stypeflags & linux.SOCK_TYPE_MASK - switch stype { - case syscall.SOCK_STREAM: - switch protocol { - case 0, syscall.IPPROTO_TCP: - // ok - default: - return nil, nil - } - case syscall.SOCK_DGRAM: - switch protocol { - case 0, syscall.IPPROTO_UDP: - // ok - default: - return nil, nil - } - default: - return nil, nil - } - - return newSocketFile(t, s, p.family, stype, protocol) -} - -// Pair implements socket.Provider.Pair. -func (p *socketProvider) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error) { - // Not supported by AF_INET/AF_INET6. - return nil, nil, nil -} - -func init() { - for _, family := range []int{syscall.AF_INET, syscall.AF_INET6} { - socket.RegisterProvider(family, &socketProvider{family}) - } -} diff --git a/pkg/sentry/socket/rpcinet/stack.go b/pkg/sentry/socket/rpcinet/stack.go deleted file mode 100644 index f7878a760..000000000 --- a/pkg/sentry/socket/rpcinet/stack.go +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rpcinet - -import ( - "fmt" - "syscall" - - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn" - "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier" - "gvisor.dev/gvisor/pkg/syserr" - "gvisor.dev/gvisor/pkg/tcpip/stack" - "gvisor.dev/gvisor/pkg/unet" -) - -// Stack implements inet.Stack for RPC backed sockets. -type Stack struct { - interfaces map[int32]inet.Interface - interfaceAddrs map[int32][]inet.InterfaceAddr - routes []inet.Route - rpcConn *conn.RPCConnection - notifier *notifier.Notifier -} - -// NewStack returns a Stack containing the current state of the host network -// stack. -func NewStack(fd int32) (*Stack, error) { - sock, err := unet.NewSocket(int(fd)) - if err != nil { - return nil, err - } - - stack := &Stack{ - interfaces: make(map[int32]inet.Interface), - interfaceAddrs: make(map[int32][]inet.InterfaceAddr), - rpcConn: conn.NewRPCConnection(sock), - } - - var e error - stack.notifier, e = notifier.NewRPCNotifier(stack.rpcConn) - if e != nil { - return nil, e - } - - links, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETLINK) - if err != nil { - return nil, fmt.Errorf("RTM_GETLINK failed: %v", err) - } - - addrs, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETADDR) - if err != nil { - return nil, fmt.Errorf("RTM_GETADDR failed: %v", err) - } - - e = hostinet.ExtractHostInterfaces(links, addrs, stack.interfaces, stack.interfaceAddrs) - if e != nil { - return nil, e - } - - routes, err := stack.DoNetlinkRouteRequest(syscall.RTM_GETROUTE) - if err != nil { - return nil, fmt.Errorf("RTM_GETROUTE failed: %v", err) - } - - stack.routes, e = hostinet.ExtractHostRoutes(routes) - if e != nil { - return nil, e - } - - return stack, nil -} - -// RPCReadFile will execute the ReadFile helper RPC method which avoids the -// common pattern of open(2), read(2), close(2) by doing all three operations -// as a single RPC. It will read the entire file or return EFBIG if the file -// was too large. -func (s *Stack) RPCReadFile(path string) ([]byte, *syserr.Error) { - return s.rpcConn.RPCReadFile(path) -} - -// RPCWriteFile will execute the WriteFile helper RPC method which avoids the -// common pattern of open(2), write(2), write(2), close(2) by doing all -// operations as a single RPC. -func (s *Stack) RPCWriteFile(path string, data []byte) (int64, *syserr.Error) { - return s.rpcConn.RPCWriteFile(path, data) -} - -// Interfaces implements inet.Stack.Interfaces. -func (s *Stack) Interfaces() map[int32]inet.Interface { - interfaces := make(map[int32]inet.Interface) - for k, v := range s.interfaces { - interfaces[k] = v - } - return interfaces -} - -// InterfaceAddrs implements inet.Stack.InterfaceAddrs. -func (s *Stack) InterfaceAddrs() map[int32][]inet.InterfaceAddr { - addrs := make(map[int32][]inet.InterfaceAddr) - for k, v := range s.interfaceAddrs { - addrs[k] = append([]inet.InterfaceAddr(nil), v...) - } - return addrs -} - -// SupportsIPv6 implements inet.Stack.SupportsIPv6. -func (s *Stack) SupportsIPv6() bool { - panic("rpcinet handles procfs directly this method should not be called") -} - -// TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize. -func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) { - panic("rpcinet handles procfs directly this method should not be called") -} - -// SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize. -func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error { - panic("rpcinet handles procfs directly this method should not be called") - -} - -// TCPSendBufferSize implements inet.Stack.TCPSendBufferSize. -func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) { - panic("rpcinet handles procfs directly this method should not be called") - -} - -// SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize. -func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error { - panic("rpcinet handles procfs directly this method should not be called") -} - -// TCPSACKEnabled implements inet.Stack.TCPSACKEnabled. -func (s *Stack) TCPSACKEnabled() (bool, error) { - panic("rpcinet handles procfs directly this method should not be called") -} - -// SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled. -func (s *Stack) SetTCPSACKEnabled(enabled bool) error { - panic("rpcinet handles procfs directly this method should not be called") -} - -// Statistics implements inet.Stack.Statistics. -func (s *Stack) Statistics(stat interface{}, arg string) error { - return syserr.ErrEndpointOperation.ToError() -} - -// RouteTable implements inet.Stack.RouteTable. -func (s *Stack) RouteTable() []inet.Route { - return append([]inet.Route(nil), s.routes...) -} - -// Resume implements inet.Stack.Resume. -func (s *Stack) Resume() {} - -// RegisteredEndpoints implements inet.Stack.RegisteredEndpoints. -func (s *Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil } - -// CleanupEndpoints implements inet.Stack.CleanupEndpoints. -func (s *Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil } - -// RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints. -func (s *Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {} diff --git a/pkg/sentry/socket/rpcinet/stack_unsafe.go b/pkg/sentry/socket/rpcinet/stack_unsafe.go deleted file mode 100644 index a94bdad83..000000000 --- a/pkg/sentry/socket/rpcinet/stack_unsafe.go +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package rpcinet - -import ( - "syscall" - "unsafe" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" - pb "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/syscall_rpc_go_proto" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/syserr" -) - -// NewNetlinkRouteRequest builds a netlink message for getting the RIB, -// the routing information base. -func newNetlinkRouteRequest(proto, seq, family int) []byte { - rr := &syscall.NetlinkRouteRequest{} - rr.Header.Len = uint32(syscall.NLMSG_HDRLEN + syscall.SizeofRtGenmsg) - rr.Header.Type = uint16(proto) - rr.Header.Flags = syscall.NLM_F_DUMP | syscall.NLM_F_REQUEST - rr.Header.Seq = uint32(seq) - rr.Data.Family = uint8(family) - return netlinkRRtoWireFormat(rr) -} - -func netlinkRRtoWireFormat(rr *syscall.NetlinkRouteRequest) []byte { - b := make([]byte, rr.Header.Len) - *(*uint32)(unsafe.Pointer(&b[0:4][0])) = rr.Header.Len - *(*uint16)(unsafe.Pointer(&b[4:6][0])) = rr.Header.Type - *(*uint16)(unsafe.Pointer(&b[6:8][0])) = rr.Header.Flags - *(*uint32)(unsafe.Pointer(&b[8:12][0])) = rr.Header.Seq - *(*uint32)(unsafe.Pointer(&b[12:16][0])) = rr.Header.Pid - b[16] = byte(rr.Data.Family) - return b -} - -func (s *Stack) getNetlinkFd() (uint32, *syserr.Error) { - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Socket{&pb.SocketRequest{Family: int64(syscall.AF_NETLINK), Type: int64(syscall.SOCK_RAW | syscall.SOCK_NONBLOCK), Protocol: int64(syscall.NETLINK_ROUTE)}}}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Socket).Socket.Result - if e, ok := res.(*pb.SocketResponse_ErrorNumber); ok { - return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - return res.(*pb.SocketResponse_Fd).Fd, nil -} - -func (s *Stack) bindNetlinkFd(fd uint32, sockaddr []byte) *syserr.Error { - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Bind{&pb.BindRequest{Fd: fd, Address: sockaddr}}}, false /* ignoreResult */) - <-c - - if e := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Bind).Bind.ErrorNumber; e != 0 { - return syserr.FromHost(syscall.Errno(e)) - } - return nil -} - -func (s *Stack) closeNetlinkFd(fd uint32) { - _, _ = s.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Close{&pb.CloseRequest{Fd: fd}}}, true /* ignoreResult */) -} - -func (s *Stack) rpcSendMsg(req *pb.SyscallRequest_Sendmsg) (uint32, *syserr.Error) { - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Sendmsg).Sendmsg.Result - if e, ok := res.(*pb.SendmsgResponse_ErrorNumber); ok { - return 0, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.SendmsgResponse_Length).Length, nil -} - -func (s *Stack) sendMsg(fd uint32, buf []byte, to []byte, flags int) (int, *syserr.Error) { - // Whitelist flags. - if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 { - return 0, syserr.ErrInvalidArgument - } - - req := &pb.SyscallRequest_Sendmsg{&pb.SendmsgRequest{ - Fd: fd, - Data: buf, - Address: to, - More: flags&linux.MSG_MORE != 0, - EndOfRecord: flags&linux.MSG_EOR != 0, - }} - - n, err := s.rpcSendMsg(req) - return int(n), err -} - -func (s *Stack) rpcRecvMsg(req *pb.SyscallRequest_Recvmsg) (*pb.RecvmsgResponse_ResultPayload, *syserr.Error) { - id, c := s.rpcConn.NewRequest(pb.SyscallRequest{Args: req}, false /* ignoreResult */) - <-c - - res := s.rpcConn.Request(id).Result.(*pb.SyscallResponse_Recvmsg).Recvmsg.Result - if e, ok := res.(*pb.RecvmsgResponse_ErrorNumber); ok { - return nil, syserr.FromHost(syscall.Errno(e.ErrorNumber)) - } - - return res.(*pb.RecvmsgResponse_Payload).Payload, nil -} - -func (s *Stack) recvMsg(fd, l, flags uint32) ([]byte, *syserr.Error) { - req := &pb.SyscallRequest_Recvmsg{&pb.RecvmsgRequest{ - Fd: fd, - Length: l, - Sender: false, - Trunc: flags&linux.MSG_TRUNC != 0, - Peek: flags&linux.MSG_PEEK != 0, - }} - - res, err := s.rpcRecvMsg(req) - if err != nil { - return nil, err - } - return res.Data, nil -} - -func (s *Stack) netlinkRequest(proto, family int) ([]byte, error) { - fd, err := s.getNetlinkFd() - if err != nil { - return nil, err.ToError() - } - defer s.closeNetlinkFd(fd) - - lsa := syscall.SockaddrNetlink{Family: syscall.AF_NETLINK} - b := binary.Marshal(nil, usermem.ByteOrder, &lsa) - if err := s.bindNetlinkFd(fd, b); err != nil { - return nil, err.ToError() - } - - wb := newNetlinkRouteRequest(proto, 1, family) - _, err = s.sendMsg(fd, wb, b, 0) - if err != nil { - return nil, err.ToError() - } - - var tab []byte -done: - for { - rb, err := s.recvMsg(fd, uint32(syscall.Getpagesize()), 0) - nr := len(rb) - if err != nil { - return nil, err.ToError() - } - - if nr < syscall.NLMSG_HDRLEN { - return nil, syserr.ErrInvalidArgument.ToError() - } - - tab = append(tab, rb...) - msgs, e := syscall.ParseNetlinkMessage(rb) - if e != nil { - return nil, e - } - - for _, m := range msgs { - if m.Header.Type == syscall.NLMSG_DONE { - break done - } - if m.Header.Type == syscall.NLMSG_ERROR { - return nil, syserr.ErrInvalidArgument.ToError() - } - } - } - - return tab, nil -} - -// DoNetlinkRouteRequest returns routing information base, also known as RIB, -// which consists of network facility information, states and parameters. -func (s *Stack) DoNetlinkRouteRequest(req int) ([]syscall.NetlinkMessage, error) { - data, err := s.netlinkRequest(req, syscall.AF_UNSPEC) - if err != nil { - return nil, err - } - return syscall.ParseNetlinkMessage(data) -} diff --git a/pkg/sentry/socket/rpcinet/syscall_rpc.proto b/pkg/sentry/socket/rpcinet/syscall_rpc.proto deleted file mode 100644 index b677e9eb3..000000000 --- a/pkg/sentry/socket/rpcinet/syscall_rpc.proto +++ /dev/null @@ -1,352 +0,0 @@ -syntax = "proto3"; - -// package syscall_rpc is a set of networking related system calls that can be -// forwarded to a socket gofer. -// -package syscall_rpc; - -message SendmsgRequest { - uint32 fd = 1; - bytes data = 2 [ctype = CORD]; - bytes address = 3; - bool more = 4; - bool end_of_record = 5; -} - -message SendmsgResponse { - oneof result { - uint32 error_number = 1; - uint32 length = 2; - } -} - -message IOCtlRequest { - uint32 fd = 1; - uint32 cmd = 2; - bytes arg = 3; -} - -message IOCtlResponse { - oneof result { - uint32 error_number = 1; - bytes value = 2; - } -} - -message RecvmsgRequest { - uint32 fd = 1; - uint32 length = 2; - bool sender = 3; - bool peek = 4; - bool trunc = 5; - uint32 cmsg_length = 6; -} - -message OpenRequest { - bytes path = 1; - uint32 flags = 2; - uint32 mode = 3; -} - -message OpenResponse { - oneof result { - uint32 error_number = 1; - uint32 fd = 2; - } -} - -message ReadRequest { - uint32 fd = 1; - uint32 length = 2; -} - -message ReadResponse { - oneof result { - uint32 error_number = 1; - bytes data = 2 [ctype = CORD]; - } -} - -message ReadFileRequest { - string path = 1; -} - -message ReadFileResponse { - oneof result { - uint32 error_number = 1; - bytes data = 2 [ctype = CORD]; - } -} - -message WriteRequest { - uint32 fd = 1; - bytes data = 2 [ctype = CORD]; -} - -message WriteResponse { - oneof result { - uint32 error_number = 1; - uint32 length = 2; - } -} - -message WriteFileRequest { - string path = 1; - bytes content = 2; -} - -message WriteFileResponse { - uint32 error_number = 1; - uint32 written = 2; -} - -message AddressResponse { - bytes address = 1; - uint32 length = 2; -} - -message RecvmsgResponse { - message ResultPayload { - bytes data = 1 [ctype = CORD]; - AddressResponse address = 2; - uint32 length = 3; - bytes cmsg_data = 4; - } - oneof result { - uint32 error_number = 1; - ResultPayload payload = 2; - } -} - -message BindRequest { - uint32 fd = 1; - bytes address = 2; -} - -message BindResponse { - uint32 error_number = 1; -} - -message AcceptRequest { - uint32 fd = 1; - bool peer = 2; - int64 flags = 3; -} - -message AcceptResponse { - message ResultPayload { - uint32 fd = 1; - AddressResponse address = 2; - } - oneof result { - uint32 error_number = 1; - ResultPayload payload = 2; - } -} - -message ConnectRequest { - uint32 fd = 1; - bytes address = 2; -} - -message ConnectResponse { - uint32 error_number = 1; -} - -message ListenRequest { - uint32 fd = 1; - int64 backlog = 2; -} - -message ListenResponse { - uint32 error_number = 1; -} - -message ShutdownRequest { - uint32 fd = 1; - int64 how = 2; -} - -message ShutdownResponse { - uint32 error_number = 1; -} - -message CloseRequest { - uint32 fd = 1; -} - -message CloseResponse { - uint32 error_number = 1; -} - -message GetSockOptRequest { - uint32 fd = 1; - int64 level = 2; - int64 name = 3; - uint32 length = 4; -} - -message GetSockOptResponse { - oneof result { - uint32 error_number = 1; - bytes opt = 2; - } -} - -message SetSockOptRequest { - uint32 fd = 1; - int64 level = 2; - int64 name = 3; - bytes opt = 4; -} - -message SetSockOptResponse { - uint32 error_number = 1; -} - -message GetSockNameRequest { - uint32 fd = 1; -} - -message GetSockNameResponse { - oneof result { - uint32 error_number = 1; - AddressResponse address = 2; - } -} - -message GetPeerNameRequest { - uint32 fd = 1; -} - -message GetPeerNameResponse { - oneof result { - uint32 error_number = 1; - AddressResponse address = 2; - } -} - -message SocketRequest { - int64 family = 1; - int64 type = 2; - int64 protocol = 3; -} - -message SocketResponse { - oneof result { - uint32 error_number = 1; - uint32 fd = 2; - } -} - -message EpollWaitRequest { - uint32 fd = 1; - uint32 num_events = 2; - sint64 msec = 3; -} - -message EpollEvent { - uint32 fd = 1; - uint32 events = 2; -} - -message EpollEvents { - repeated EpollEvent events = 1; -} - -message EpollWaitResponse { - oneof result { - uint32 error_number = 1; - EpollEvents events = 2; - } -} - -message EpollCtlRequest { - uint32 epfd = 1; - int64 op = 2; - uint32 fd = 3; - EpollEvent event = 4; -} - -message EpollCtlResponse { - uint32 error_number = 1; -} - -message EpollCreate1Request { - int64 flag = 1; -} - -message EpollCreate1Response { - oneof result { - uint32 error_number = 1; - uint32 fd = 2; - } -} - -message PollRequest { - uint32 fd = 1; - uint32 events = 2; -} - -message PollResponse { - oneof result { - uint32 error_number = 1; - uint32 events = 2; - } -} - -message SyscallRequest { - oneof args { - SocketRequest socket = 1; - SendmsgRequest sendmsg = 2; - RecvmsgRequest recvmsg = 3; - BindRequest bind = 4; - AcceptRequest accept = 5; - ConnectRequest connect = 6; - ListenRequest listen = 7; - ShutdownRequest shutdown = 8; - CloseRequest close = 9; - GetSockOptRequest get_sock_opt = 10; - SetSockOptRequest set_sock_opt = 11; - GetSockNameRequest get_sock_name = 12; - GetPeerNameRequest get_peer_name = 13; - EpollWaitRequest epoll_wait = 14; - EpollCtlRequest epoll_ctl = 15; - EpollCreate1Request epoll_create1 = 16; - PollRequest poll = 17; - ReadRequest read = 18; - WriteRequest write = 19; - OpenRequest open = 20; - IOCtlRequest ioctl = 21; - WriteFileRequest write_file = 22; - ReadFileRequest read_file = 23; - } -} - -message SyscallResponse { - oneof result { - SocketResponse socket = 1; - SendmsgResponse sendmsg = 2; - RecvmsgResponse recvmsg = 3; - BindResponse bind = 4; - AcceptResponse accept = 5; - ConnectResponse connect = 6; - ListenResponse listen = 7; - ShutdownResponse shutdown = 8; - CloseResponse close = 9; - GetSockOptResponse get_sock_opt = 10; - SetSockOptResponse set_sock_opt = 11; - GetSockNameResponse get_sock_name = 12; - GetPeerNameResponse get_peer_name = 13; - EpollWaitResponse epoll_wait = 14; - EpollCtlResponse epoll_ctl = 15; - EpollCreate1Response epoll_create1 = 16; - PollResponse poll = 17; - ReadResponse read = 18; - WriteResponse write = 19; - OpenResponse open = 20; - IOCtlResponse ioctl = 21; - WriteFileResponse write_file = 22; - ReadFileResponse read_file = 23; - } -} -- cgit v1.2.3 From 345df7cab48ac79bccf2620900cd972b3026296d Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 17 Jan 2020 08:10:14 -0800 Subject: Add explanation for implementation of BSD full file locks. PiperOrigin-RevId: 290272560 --- pkg/sentry/fs/lock/lock.go | 3 +++ 1 file changed, 3 insertions(+) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go index 41b040818..926538d90 100644 --- a/pkg/sentry/fs/lock/lock.go +++ b/pkg/sentry/fs/lock/lock.go @@ -78,6 +78,9 @@ const ( ) // LockEOF is the maximal possible end of a regional file lock. +// +// A BSD-style full file lock can be represented as a regional file lock from +// offset 0 to LockEOF. const LockEOF = math.MaxUint64 // Lock is a regional file lock. It consists of either a single writer -- cgit v1.2.3 From 80d0f9304484897e4307c9701ddbfaacb925715d Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Fri, 17 Jan 2020 11:20:29 -0800 Subject: Fix data race in tty.queue.readableSize. We were setting queue.readable without holding the lock. PiperOrigin-RevId: 290306922 --- pkg/sentry/fs/tty/line_discipline.go | 4 +++- pkg/sentry/fs/tty/queue.go | 11 ++--------- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go index 894964260..9fe02657e 100644 --- a/pkg/sentry/fs/tty/line_discipline.go +++ b/pkg/sentry/fs/tty/line_discipline.go @@ -140,8 +140,10 @@ func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arc // buffer to its read buffer. Anything already in the read buffer is // now readable. if oldCanonEnabled && !l.termios.LEnabled(linux.ICANON) { - l.inQueue.pushWaitBuf(l) + l.inQueue.mu.Lock() + l.inQueue.pushWaitBufLocked(l) l.inQueue.readable = true + l.inQueue.mu.Unlock() l.slaveWaiter.Notify(waiter.EventIn) } diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index 8b5d4699a..21ccc6f32 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -197,16 +197,9 @@ func (q *queue) writeBytes(b []byte, l *lineDiscipline) { q.pushWaitBufLocked(l) } -// pushWaitBuf fills the queue's read buffer with data from the wait buffer. +// pushWaitBufLocked fills the queue's read buffer with data from the wait +// buffer. // -// Preconditions: -// * l.termiosMu must be held for reading. -func (q *queue) pushWaitBuf(l *lineDiscipline) int { - q.mu.Lock() - defer q.mu.Unlock() - return q.pushWaitBufLocked(l) -} - // Preconditions: // * l.termiosMu must be held for reading. // * q.mu must be locked. -- cgit v1.2.3 From f1a5178c589dbd9a1fe4f1b9fb943fbe64791b58 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Fri, 17 Jan 2020 14:20:00 -0800 Subject: Fix data race in MountNamespace.resolve. We must hold fs.renameMu to access Dirent.parent. PiperOrigin-RevId: 290340804 --- pkg/sentry/fs/mounts.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index db3dfd096..a9627a9d1 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -609,8 +609,11 @@ func (mns *MountNamespace) resolve(ctx context.Context, root, node *Dirent, rema } // Find the node; we resolve relative to the current symlink's parent. + renameMu.RLock() + parent := node.parent + renameMu.RUnlock() *remainingTraversals-- - d, err := mns.FindInode(ctx, root, node.parent, targetPath, remainingTraversals) + d, err := mns.FindInode(ctx, root, parent, targetPath, remainingTraversals) if err != nil { return nil, err } -- cgit v1.2.3 From 10401599e104d90644a220c1cce3e4c2f224f0b3 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Sat, 18 Jan 2020 09:32:39 -0800 Subject: Include the cgroup name in the superblock options in /proc/self/mountinfo. Java 11 parses /proc/self/mountinfo for cgroup information. Java 11.0.4 uses the mount path to determine what cgroups existed, but Java 11.0.5 reads the cgroup names from the superblock options. This CL adds the cgroup name to the superblock options if the filesystem type is "cgroup". Since gVisor doesn't actually support cgroups yet, we just infer the cgroup name from the path. PiperOrigin-RevId: 290434323 --- pkg/sentry/fs/proc/mounts.go | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go index 5aedae799..d4efc86e0 100644 --- a/pkg/sentry/fs/proc/mounts.go +++ b/pkg/sentry/fs/proc/mounts.go @@ -18,6 +18,7 @@ import ( "bytes" "fmt" "sort" + "strings" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -146,14 +147,35 @@ func (mif *mountInfoFile) ReadSeqFileData(ctx context.Context, handle seqfile.Se // (10) Mount source: filesystem-specific information or "none". fmt.Fprintf(&buf, "none ") - // (11) Superblock options. Only "ro/rw" is supported for now, - // and is the same as the filesystem option. - fmt.Fprintf(&buf, "%s\n", opts) + // (11) Superblock options, and final newline. + fmt.Fprintf(&buf, "%s\n", superBlockOpts(mountPath, mroot.Inode.MountSource)) }) return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*mountInfoFile)(nil)}}, 0 } +func superBlockOpts(mountPath string, msrc *fs.MountSource) string { + // gVisor doesn't (yet) have a concept of super block options, so we + // use the ro/rw bit from the mount flag. + opts := "rw" + if msrc.Flags.ReadOnly { + opts = "ro" + } + + // NOTE(b/147673608): If the mount is a cgroup, we also need to include + // the cgroup name in the options. For now we just read that from the + // path. + // TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we + // should get this value from the cgroup itself, and not rely on the + // path. + if msrc.FilesystemType == "cgroup" { + splitPath := strings.Split(mountPath, "/") + cgroupType := splitPath[len(splitPath)-1] + opts += "," + cgroupType + } + return opts +} + // mountsFile is used to implement /proc/[pid]/mounts. // // +stateify savable -- cgit v1.2.3 From d46c397a1cd38f1e2aa5c864c1bb8594fb87bb63 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 21 Jan 2020 13:26:50 -0800 Subject: Add line break to /proc/net files Some files were missing the last line break. PiperOrigin-RevId: 290808898 --- pkg/sentry/fs/proc/net.go | 14 +++++++------- pkg/sentry/fsimpl/proc/tasks_net.go | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 3f17e98ea..bad445f3f 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -52,17 +52,17 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo // implemented in netstack, if the file contains a // header the stub is just the header otherwise it is // an empty file. - "arp": newStaticProcInode(ctx, msrc, []byte("IP address HW type Flags HW address Mask Device")), + "arp": newStaticProcInode(ctx, msrc, []byte("IP address HW type Flags HW address Mask Device\n")), - "netlink": newStaticProcInode(ctx, msrc, []byte("sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode")), - "netstat": newStaticProcInode(ctx, msrc, []byte("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess")), - "packet": newStaticProcInode(ctx, msrc, []byte("sk RefCnt Type Proto Iface R Rmem User Inode")), - "protocols": newStaticProcInode(ctx, msrc, []byte("protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em")), + "netlink": newStaticProcInode(ctx, msrc, []byte("sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n")), + "netstat": newStaticProcInode(ctx, msrc, []byte("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")), + "packet": newStaticProcInode(ctx, msrc, []byte("sk RefCnt Type Proto Iface R Rmem User Inode\n")), + "protocols": newStaticProcInode(ctx, msrc, []byte("protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n")), // Linux sets psched values to: nsec per usec, psched // tick in ns, 1000000, high res timer ticks per sec // (ClockGetres returns 1ns resolution). "psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))), - "ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function")), + "ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function\n")), "route": seqfile.NewSeqFileInode(ctx, &netRoute{s: s}, msrc), "tcp": seqfile.NewSeqFileInode(ctx, &netTCP{k: k}, msrc), "udp": seqfile.NewSeqFileInode(ctx, &netUDP{k: k}, msrc), @@ -73,7 +73,7 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo contents["if_inet6"] = seqfile.NewSeqFileInode(ctx, &ifinet6{s: s}, msrc) contents["ipv6_route"] = newStaticProcInode(ctx, msrc, []byte("")) contents["tcp6"] = seqfile.NewSeqFileInode(ctx, &netTCP6{k: k}, msrc) - contents["udp6"] = newStaticProcInode(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode")) + contents["udp6"] = newStaticProcInode(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n")) } } d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) diff --git a/pkg/sentry/fsimpl/proc/tasks_net.go b/pkg/sentry/fsimpl/proc/tasks_net.go index 3dbf3ba41..4aaf23e97 100644 --- a/pkg/sentry/fsimpl/proc/tasks_net.go +++ b/pkg/sentry/fsimpl/proc/tasks_net.go @@ -41,12 +41,12 @@ func newNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *k var contents map[string]*kernfs.Dentry if stack := k.NetworkStack(); stack != nil { const ( - arp = "IP address HW type Flags HW address Mask Device" - netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode" - packet = "sk RefCnt Type Proto Iface R Rmem User Inode" - protocols = "protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em" - ptype = "Type Device Function" - upd6 = " sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode" + arp = "IP address HW type Flags HW address Mask Device\n" + netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n" + packet = "sk RefCnt Type Proto Iface R Rmem User Inode\n" + protocols = "protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n" + ptype = "Type Device Function\n" + upd6 = " sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n" ) psched := fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)) @@ -779,6 +779,6 @@ func (d *netStatData) Generate(ctx context.Context, buf *bytes.Buffer) error { "TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd " + "TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq " + "TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge " + - "TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess") + "TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n") return nil } -- cgit v1.2.3 From 6a59e7f510a7b12f8b3bd768dfe569033ef07d30 Mon Sep 17 00:00:00 2001 From: Ian Gudger Date: Tue, 21 Jan 2020 19:23:26 -0800 Subject: Rename DowngradableRWMutex to RWmutex. Also renames TMutex to Mutex. These custom mutexes aren't any worse than the standard library versions (same code), so having both seems redundant. PiperOrigin-RevId: 290873587 --- pkg/sentry/fs/overlay.go | 2 +- pkg/sentry/mm/mm.go | 4 +-- pkg/sync/aliases.go | 6 ---- pkg/sync/downgradable_rwmutex_test.go | 50 ++++++++++++++++----------------- pkg/sync/downgradable_rwmutex_unsafe.go | 26 ++++++++--------- pkg/sync/tmutex_test.go | 8 +++--- pkg/sync/tmutex_unsafe.go | 8 +++--- 7 files changed, 49 insertions(+), 55 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index 4cad55327..f7702f8f4 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -198,7 +198,7 @@ type overlayEntry struct { upper *Inode // dirCacheMu protects dirCache. - dirCacheMu sync.DowngradableRWMutex `state:"nosave"` + dirCacheMu sync.RWMutex `state:"nosave"` // dirCache is cache of DentAttrs from upper and lower Inodes. dirCache *SortedDentryMap diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index fa86ebced..78cc9e6e4 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -80,7 +80,7 @@ type MemoryManager struct { users int32 // mappingMu is analogous to Linux's struct mm_struct::mmap_sem. - mappingMu sync.DowngradableRWMutex `state:"nosave"` + mappingMu sync.RWMutex `state:"nosave"` // vmas stores virtual memory areas. Since vmas are stored by value, // clients should usually use vmaIterator.ValuePtr() instead of @@ -123,7 +123,7 @@ type MemoryManager struct { // activeMu is loosely analogous to Linux's struct // mm_struct::page_table_lock. - activeMu sync.DowngradableRWMutex `state:"nosave"` + activeMu sync.RWMutex `state:"nosave"` // pmas stores platform mapping areas used to implement vmas. Since pmas // are stored by value, clients should usually use pmaIterator.ValuePtr() diff --git a/pkg/sync/aliases.go b/pkg/sync/aliases.go index 20c7ca041..d2d7132fa 100644 --- a/pkg/sync/aliases.go +++ b/pkg/sync/aliases.go @@ -11,12 +11,6 @@ import ( // Aliases of standard library types. type ( - // Mutex is an alias of sync.Mutex. - Mutex = sync.Mutex - - // RWMutex is an alias of sync.RWMutex. - RWMutex = sync.RWMutex - // Cond is an alias of sync.Cond. Cond = sync.Cond diff --git a/pkg/sync/downgradable_rwmutex_test.go b/pkg/sync/downgradable_rwmutex_test.go index b5cb28ec0..ce667e825 100644 --- a/pkg/sync/downgradable_rwmutex_test.go +++ b/pkg/sync/downgradable_rwmutex_test.go @@ -18,7 +18,7 @@ import ( "testing" ) -func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { +func parallelReader(m *RWMutex, clocked, cunlock, cdone chan bool) { m.RLock() clocked <- true <-cunlock @@ -28,7 +28,7 @@ func parallelReader(m *DowngradableRWMutex, clocked, cunlock, cdone chan bool) { func doTestParallelReaders(numReaders, gomaxprocs int) { runtime.GOMAXPROCS(gomaxprocs) - var m DowngradableRWMutex + var m RWMutex clocked := make(chan bool) cunlock := make(chan bool) cdone := make(chan bool) @@ -55,7 +55,7 @@ func TestParallelReaders(t *testing.T) { doTestParallelReaders(4, 2) } -func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { +func reader(rwm *RWMutex, numIterations int, activity *int32, cdone chan bool) { for i := 0; i < numIterations; i++ { rwm.RLock() n := atomic.AddInt32(activity, 1) @@ -70,7 +70,7 @@ func reader(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone cdone <- true } -func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { +func writer(rwm *RWMutex, numIterations int, activity *int32, cdone chan bool) { for i := 0; i < numIterations; i++ { rwm.Lock() n := atomic.AddInt32(activity, 10000) @@ -85,7 +85,7 @@ func writer(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone cdone <- true } -func downgradingWriter(rwm *DowngradableRWMutex, numIterations int, activity *int32, cdone chan bool) { +func downgradingWriter(rwm *RWMutex, numIterations int, activity *int32, cdone chan bool) { for i := 0; i < numIterations; i++ { rwm.Lock() n := atomic.AddInt32(activity, 10000) @@ -112,7 +112,7 @@ func HammerDowngradableRWMutex(gomaxprocs, numReaders, numIterations int) { runtime.GOMAXPROCS(gomaxprocs) // Number of active readers + 10000 * number of active writers. var activity int32 - var rwm DowngradableRWMutex + var rwm RWMutex cdone := make(chan bool) go writer(&rwm, numIterations, &activity, cdone) go downgradingWriter(&rwm, numIterations, &activity, cdone) @@ -150,56 +150,56 @@ func TestDowngradableRWMutex(t *testing.T) { } func TestRWDoubleTryLock(t *testing.T) { - var m DowngradableRWMutex - if !m.TryLock() { + var rwm RWMutex + if !rwm.TryLock() { t.Fatal("failed to aquire lock") } - if m.TryLock() { + if rwm.TryLock() { t.Fatal("unexpectedly succeeded in aquiring locked mutex") } } func TestRWTryLockAfterLock(t *testing.T) { - var m DowngradableRWMutex - m.Lock() - if m.TryLock() { + var rwm RWMutex + rwm.Lock() + if rwm.TryLock() { t.Fatal("unexpectedly succeeded in aquiring locked mutex") } } func TestRWTryLockUnlock(t *testing.T) { - var m DowngradableRWMutex - if !m.TryLock() { + var rwm RWMutex + if !rwm.TryLock() { t.Fatal("failed to aquire lock") } - m.Unlock() - if !m.TryLock() { + rwm.Unlock() + if !rwm.TryLock() { t.Fatal("failed to aquire lock after unlock") } } func TestTryRLockAfterLock(t *testing.T) { - var m DowngradableRWMutex - m.Lock() - if m.TryRLock() { + var rwm RWMutex + rwm.Lock() + if rwm.TryRLock() { t.Fatal("unexpectedly succeeded in aquiring locked mutex") } } func TestTryLockAfterRLock(t *testing.T) { - var m DowngradableRWMutex - m.RLock() - if m.TryLock() { + var rwm RWMutex + rwm.RLock() + if rwm.TryLock() { t.Fatal("unexpectedly succeeded in aquiring locked mutex") } } func TestDoubleTryRLock(t *testing.T) { - var m DowngradableRWMutex - if !m.TryRLock() { + var rwm RWMutex + if !rwm.TryRLock() { t.Fatal("failed to aquire lock") } - if !m.TryRLock() { + if !rwm.TryRLock() { t.Fatal("failed to read aquire read locked lock") } } diff --git a/pkg/sync/downgradable_rwmutex_unsafe.go b/pkg/sync/downgradable_rwmutex_unsafe.go index 0d321f5e3..ea6cdc447 100644 --- a/pkg/sync/downgradable_rwmutex_unsafe.go +++ b/pkg/sync/downgradable_rwmutex_unsafe.go @@ -29,10 +29,10 @@ func runtimeSemacquire(s *uint32) //go:linkname runtimeSemrelease sync.runtime_Semrelease func runtimeSemrelease(s *uint32, handoff bool, skipframes int) -// DowngradableRWMutex is identical to sync.RWMutex, but adds the DowngradeLock, +// RWMutex is identical to sync.RWMutex, but adds the DowngradeLock, // TryLock and TryRLock methods. -type DowngradableRWMutex struct { - w TMutex // held if there are pending writers +type RWMutex struct { + w Mutex // held if there are pending writers writerSem uint32 // semaphore for writers to wait for completing readers readerSem uint32 // semaphore for readers to wait for completing writers readerCount int32 // number of pending readers @@ -43,7 +43,7 @@ const rwmutexMaxReaders = 1 << 30 // TryRLock locks rw for reading. It returns true if it succeeds and false // otherwise. It does not block. -func (rw *DowngradableRWMutex) TryRLock() bool { +func (rw *RWMutex) TryRLock() bool { if RaceEnabled { RaceDisable() } @@ -67,7 +67,7 @@ func (rw *DowngradableRWMutex) TryRLock() bool { } // RLock locks rw for reading. -func (rw *DowngradableRWMutex) RLock() { +func (rw *RWMutex) RLock() { if RaceEnabled { RaceDisable() } @@ -82,14 +82,14 @@ func (rw *DowngradableRWMutex) RLock() { } // RUnlock undoes a single RLock call. -func (rw *DowngradableRWMutex) RUnlock() { +func (rw *RWMutex) RUnlock() { if RaceEnabled { RaceReleaseMerge(unsafe.Pointer(&rw.writerSem)) RaceDisable() } if r := atomic.AddInt32(&rw.readerCount, -1); r < 0 { if r+1 == 0 || r+1 == -rwmutexMaxReaders { - panic("RUnlock of unlocked DowngradableRWMutex") + panic("RUnlock of unlocked RWMutex") } // A writer is pending. if atomic.AddInt32(&rw.readerWait, -1) == 0 { @@ -104,7 +104,7 @@ func (rw *DowngradableRWMutex) RUnlock() { // TryLock locks rw for writing. It returns true if it succeeds and false // otherwise. It does not block. -func (rw *DowngradableRWMutex) TryLock() bool { +func (rw *RWMutex) TryLock() bool { if RaceEnabled { RaceDisable() } @@ -131,7 +131,7 @@ func (rw *DowngradableRWMutex) TryLock() bool { } // Lock locks rw for writing. -func (rw *DowngradableRWMutex) Lock() { +func (rw *RWMutex) Lock() { if RaceEnabled { RaceDisable() } @@ -150,7 +150,7 @@ func (rw *DowngradableRWMutex) Lock() { } // Unlock unlocks rw for writing. -func (rw *DowngradableRWMutex) Unlock() { +func (rw *RWMutex) Unlock() { if RaceEnabled { RaceRelease(unsafe.Pointer(&rw.writerSem)) RaceRelease(unsafe.Pointer(&rw.readerSem)) @@ -159,7 +159,7 @@ func (rw *DowngradableRWMutex) Unlock() { // Announce to readers there is no active writer. r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders) if r >= rwmutexMaxReaders { - panic("Unlock of unlocked DowngradableRWMutex") + panic("Unlock of unlocked RWMutex") } // Unblock blocked readers, if any. for i := 0; i < int(r); i++ { @@ -173,7 +173,7 @@ func (rw *DowngradableRWMutex) Unlock() { } // DowngradeLock atomically unlocks rw for writing and locks it for reading. -func (rw *DowngradableRWMutex) DowngradeLock() { +func (rw *RWMutex) DowngradeLock() { if RaceEnabled { RaceRelease(unsafe.Pointer(&rw.readerSem)) RaceDisable() @@ -181,7 +181,7 @@ func (rw *DowngradableRWMutex) DowngradeLock() { // Announce to readers there is no active writer and one additional reader. r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders+1) if r >= rwmutexMaxReaders+1 { - panic("DowngradeLock of unlocked DowngradableRWMutex") + panic("DowngradeLock of unlocked RWMutex") } // Unblock blocked readers, if any. Note that this loop starts as 1 since r // includes this goroutine. diff --git a/pkg/sync/tmutex_test.go b/pkg/sync/tmutex_test.go index c640bae23..0838248b4 100644 --- a/pkg/sync/tmutex_test.go +++ b/pkg/sync/tmutex_test.go @@ -30,7 +30,7 @@ func TestStructSize(t *testing.T) { // // The correctness of this package relies on these remaining in sync. func TestFieldValues(t *testing.T) { - var m TMutex + var m Mutex m.Lock() if got := *m.state(); got != mutexLocked { t.Errorf("got locked sync.Mutex.state = %d, want = %d", got, mutexLocked) @@ -42,7 +42,7 @@ func TestFieldValues(t *testing.T) { } func TestDoubleTryLock(t *testing.T) { - var m TMutex + var m Mutex if !m.TryLock() { t.Fatal("failed to aquire lock") } @@ -52,7 +52,7 @@ func TestDoubleTryLock(t *testing.T) { } func TestTryLockAfterLock(t *testing.T) { - var m TMutex + var m Mutex m.Lock() if m.TryLock() { t.Fatal("unexpectedly succeeded in aquiring locked mutex") @@ -60,7 +60,7 @@ func TestTryLockAfterLock(t *testing.T) { } func TestTryLockUnlock(t *testing.T) { - var m TMutex + var m Mutex if !m.TryLock() { t.Fatal("failed to aquire lock") } diff --git a/pkg/sync/tmutex_unsafe.go b/pkg/sync/tmutex_unsafe.go index 3c32f8371..3dd15578b 100644 --- a/pkg/sync/tmutex_unsafe.go +++ b/pkg/sync/tmutex_unsafe.go @@ -17,8 +17,8 @@ import ( "unsafe" ) -// TMutex is a try lock. -type TMutex struct { +// Mutex is a try lock. +type Mutex struct { sync.Mutex } @@ -27,7 +27,7 @@ type syncMutex struct { sema uint32 } -func (m *TMutex) state() *int32 { +func (m *Mutex) state() *int32 { return &(*syncMutex)(unsafe.Pointer(&m.Mutex)).state } @@ -38,7 +38,7 @@ const ( // TryLock tries to aquire the mutex. It returns true if it succeeds and false // otherwise. TryLock does not block. -func (m *TMutex) TryLock() bool { +func (m *Mutex) TryLock() bool { if atomic.CompareAndSwapInt32(m.state(), mutexUnlocked, mutexLocked) { if RaceEnabled { RaceAcquire(unsafe.Pointer(&m.Mutex)) -- cgit v1.2.3 From d29e59af9fbd420e34378bcbf7ae543134070217 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 27 Jan 2020 10:04:07 -0800 Subject: Standardize on tools directory. PiperOrigin-RevId: 291745021 --- .bazelrc | 8 +- BUILD | 49 ++++++- benchmarks/defs.bzl | 18 --- benchmarks/harness/BUILD | 74 +++++----- benchmarks/harness/machine_producers/BUILD | 4 +- benchmarks/runner/BUILD | 24 ++-- benchmarks/tcp/BUILD | 3 +- benchmarks/workloads/ab/BUILD | 19 ++- benchmarks/workloads/absl/BUILD | 19 ++- benchmarks/workloads/curl/BUILD | 2 +- benchmarks/workloads/ffmpeg/BUILD | 2 +- benchmarks/workloads/fio/BUILD | 19 ++- benchmarks/workloads/httpd/BUILD | 2 +- benchmarks/workloads/iperf/BUILD | 19 ++- benchmarks/workloads/netcat/BUILD | 2 +- benchmarks/workloads/nginx/BUILD | 2 +- benchmarks/workloads/node/BUILD | 2 +- benchmarks/workloads/node_template/BUILD | 2 +- benchmarks/workloads/redis/BUILD | 2 +- benchmarks/workloads/redisbenchmark/BUILD | 19 ++- benchmarks/workloads/ruby/BUILD | 2 +- benchmarks/workloads/ruby_template/BUILD | 2 +- benchmarks/workloads/sleep/BUILD | 2 +- benchmarks/workloads/sysbench/BUILD | 19 ++- benchmarks/workloads/syscall/BUILD | 19 ++- benchmarks/workloads/tensorflow/BUILD | 2 +- benchmarks/workloads/true/BUILD | 2 +- pkg/abi/BUILD | 3 +- pkg/abi/linux/BUILD | 6 +- pkg/amutex/BUILD | 6 +- pkg/atomicbitops/BUILD | 6 +- pkg/binary/BUILD | 6 +- pkg/bits/BUILD | 6 +- pkg/bpf/BUILD | 6 +- pkg/compressio/BUILD | 6 +- pkg/control/client/BUILD | 3 +- pkg/control/server/BUILD | 3 +- pkg/cpuid/BUILD | 8 +- pkg/eventchannel/BUILD | 16 +-- pkg/fd/BUILD | 6 +- pkg/fdchannel/BUILD | 8 +- pkg/fdnotifier/BUILD | 3 +- pkg/flipcall/BUILD | 8 +- pkg/fspath/BUILD | 13 +- pkg/gate/BUILD | 4 +- pkg/goid/BUILD | 6 +- pkg/ilist/BUILD | 6 +- pkg/linewriter/BUILD | 6 +- pkg/log/BUILD | 6 +- pkg/memutil/BUILD | 3 +- pkg/metric/BUILD | 23 +-- pkg/p9/BUILD | 6 +- pkg/p9/p9test/BUILD | 6 +- pkg/procid/BUILD | 8 +- pkg/rand/BUILD | 3 +- pkg/refs/BUILD | 6 +- pkg/seccomp/BUILD | 6 +- pkg/secio/BUILD | 6 +- pkg/segment/test/BUILD | 6 +- pkg/sentry/BUILD | 2 + pkg/sentry/arch/BUILD | 20 +-- pkg/sentry/context/BUILD | 3 +- pkg/sentry/context/contexttest/BUILD | 3 +- pkg/sentry/control/BUILD | 8 +- pkg/sentry/device/BUILD | 6 +- pkg/sentry/fs/BUILD | 6 +- pkg/sentry/fs/anon/BUILD | 3 +- pkg/sentry/fs/dev/BUILD | 3 +- pkg/sentry/fs/fdpipe/BUILD | 6 +- pkg/sentry/fs/filetest/BUILD | 3 +- pkg/sentry/fs/fsutil/BUILD | 6 +- pkg/sentry/fs/gofer/BUILD | 6 +- pkg/sentry/fs/host/BUILD | 6 +- pkg/sentry/fs/lock/BUILD | 6 +- pkg/sentry/fs/proc/BUILD | 6 +- pkg/sentry/fs/proc/device/BUILD | 3 +- pkg/sentry/fs/proc/seqfile/BUILD | 6 +- pkg/sentry/fs/ramfs/BUILD | 6 +- pkg/sentry/fs/sys/BUILD | 3 +- pkg/sentry/fs/timerfd/BUILD | 3 +- pkg/sentry/fs/tmpfs/BUILD | 6 +- pkg/sentry/fs/tty/BUILD | 6 +- pkg/sentry/fsimpl/ext/BUILD | 6 +- pkg/sentry/fsimpl/ext/benchmark/BUILD | 2 +- pkg/sentry/fsimpl/ext/disklayout/BUILD | 6 +- pkg/sentry/fsimpl/kernfs/BUILD | 6 +- pkg/sentry/fsimpl/proc/BUILD | 8 +- pkg/sentry/fsimpl/sys/BUILD | 6 +- pkg/sentry/fsimpl/testutil/BUILD | 5 +- pkg/sentry/fsimpl/tmpfs/BUILD | 8 +- pkg/sentry/hostcpu/BUILD | 6 +- pkg/sentry/hostmm/BUILD | 3 +- pkg/sentry/inet/BUILD | 3 +- pkg/sentry/kernel/BUILD | 24 +--- pkg/sentry/kernel/auth/BUILD | 3 +- pkg/sentry/kernel/contexttest/BUILD | 3 +- pkg/sentry/kernel/epoll/BUILD | 6 +- pkg/sentry/kernel/eventfd/BUILD | 6 +- pkg/sentry/kernel/fasync/BUILD | 3 +- pkg/sentry/kernel/futex/BUILD | 6 +- pkg/sentry/kernel/memevent/BUILD | 20 +-- pkg/sentry/kernel/pipe/BUILD | 6 +- pkg/sentry/kernel/sched/BUILD | 6 +- pkg/sentry/kernel/semaphore/BUILD | 6 +- pkg/sentry/kernel/shm/BUILD | 3 +- pkg/sentry/kernel/signalfd/BUILD | 5 +- pkg/sentry/kernel/time/BUILD | 3 +- pkg/sentry/limits/BUILD | 6 +- pkg/sentry/loader/BUILD | 4 +- pkg/sentry/memmap/BUILD | 6 +- pkg/sentry/mm/BUILD | 6 +- pkg/sentry/pgalloc/BUILD | 6 +- pkg/sentry/platform/BUILD | 3 +- pkg/sentry/platform/interrupt/BUILD | 6 +- pkg/sentry/platform/kvm/BUILD | 6 +- pkg/sentry/platform/kvm/testutil/BUILD | 3 +- pkg/sentry/platform/ptrace/BUILD | 3 +- pkg/sentry/platform/ring0/BUILD | 3 +- pkg/sentry/platform/ring0/gen_offsets/BUILD | 2 +- pkg/sentry/platform/ring0/pagetables/BUILD | 16 +-- pkg/sentry/platform/safecopy/BUILD | 6 +- pkg/sentry/safemem/BUILD | 6 +- pkg/sentry/sighandling/BUILD | 3 +- pkg/sentry/socket/BUILD | 3 +- pkg/sentry/socket/control/BUILD | 3 +- pkg/sentry/socket/hostinet/BUILD | 3 +- pkg/sentry/socket/netfilter/BUILD | 3 +- pkg/sentry/socket/netlink/BUILD | 3 +- pkg/sentry/socket/netlink/port/BUILD | 6 +- pkg/sentry/socket/netlink/route/BUILD | 3 +- pkg/sentry/socket/netlink/uevent/BUILD | 3 +- pkg/sentry/socket/netstack/BUILD | 3 +- pkg/sentry/socket/unix/BUILD | 3 +- pkg/sentry/socket/unix/transport/BUILD | 3 +- pkg/sentry/state/BUILD | 3 +- pkg/sentry/strace/BUILD | 20 +-- pkg/sentry/syscalls/BUILD | 3 +- pkg/sentry/syscalls/linux/BUILD | 3 +- pkg/sentry/time/BUILD | 6 +- pkg/sentry/unimpl/BUILD | 21 +-- pkg/sentry/uniqueid/BUILD | 3 +- pkg/sentry/usage/BUILD | 5 +- pkg/sentry/usermem/BUILD | 7 +- pkg/sentry/vfs/BUILD | 8 +- pkg/sentry/watchdog/BUILD | 3 +- pkg/sleep/BUILD | 6 +- pkg/state/BUILD | 17 +-- pkg/state/statefile/BUILD | 6 +- pkg/sync/BUILD | 6 +- pkg/sync/atomicptrtest/BUILD | 6 +- pkg/sync/seqatomictest/BUILD | 6 +- pkg/syserr/BUILD | 3 +- pkg/syserror/BUILD | 4 +- pkg/tcpip/BUILD | 6 +- pkg/tcpip/adapters/gonet/BUILD | 6 +- pkg/tcpip/buffer/BUILD | 6 +- pkg/tcpip/checker/BUILD | 3 +- pkg/tcpip/hash/jenkins/BUILD | 6 +- pkg/tcpip/header/BUILD | 6 +- pkg/tcpip/iptables/BUILD | 3 +- pkg/tcpip/link/channel/BUILD | 3 +- pkg/tcpip/link/fdbased/BUILD | 6 +- pkg/tcpip/link/loopback/BUILD | 3 +- pkg/tcpip/link/muxed/BUILD | 6 +- pkg/tcpip/link/rawfile/BUILD | 3 +- pkg/tcpip/link/sharedmem/BUILD | 6 +- pkg/tcpip/link/sharedmem/pipe/BUILD | 6 +- pkg/tcpip/link/sharedmem/queue/BUILD | 6 +- pkg/tcpip/link/sniffer/BUILD | 3 +- pkg/tcpip/link/tun/BUILD | 3 +- pkg/tcpip/link/waitable/BUILD | 6 +- pkg/tcpip/network/BUILD | 2 +- pkg/tcpip/network/arp/BUILD | 4 +- pkg/tcpip/network/fragmentation/BUILD | 6 +- pkg/tcpip/network/hash/BUILD | 3 +- pkg/tcpip/network/ipv4/BUILD | 4 +- pkg/tcpip/network/ipv6/BUILD | 6 +- pkg/tcpip/ports/BUILD | 6 +- pkg/tcpip/sample/tun_tcp_connect/BUILD | 2 +- pkg/tcpip/sample/tun_tcp_echo/BUILD | 2 +- pkg/tcpip/seqnum/BUILD | 3 +- pkg/tcpip/stack/BUILD | 6 +- pkg/tcpip/transport/icmp/BUILD | 3 +- pkg/tcpip/transport/packet/BUILD | 3 +- pkg/tcpip/transport/raw/BUILD | 3 +- pkg/tcpip/transport/tcp/BUILD | 4 +- pkg/tcpip/transport/tcp/testing/context/BUILD | 3 +- pkg/tcpip/transport/tcpconntrack/BUILD | 4 +- pkg/tcpip/transport/udp/BUILD | 4 +- pkg/tmutex/BUILD | 6 +- pkg/unet/BUILD | 6 +- pkg/urpc/BUILD | 6 +- pkg/waiter/BUILD | 6 +- runsc/BUILD | 27 ++-- runsc/boot/BUILD | 5 +- runsc/boot/filter/BUILD | 3 +- runsc/boot/platforms/BUILD | 3 +- runsc/cgroup/BUILD | 5 +- runsc/cmd/BUILD | 5 +- runsc/console/BUILD | 3 +- runsc/container/BUILD | 5 +- runsc/container/test_app/BUILD | 4 +- runsc/criutil/BUILD | 3 +- runsc/dockerutil/BUILD | 3 +- runsc/fsgofer/BUILD | 9 +- runsc/fsgofer/filter/BUILD | 3 +- runsc/sandbox/BUILD | 3 +- runsc/specutils/BUILD | 5 +- runsc/testutil/BUILD | 3 +- runsc/version_test.sh | 2 +- scripts/common.sh | 6 +- scripts/common_bazel.sh | 99 ------------- scripts/common_build.sh | 99 +++++++++++++ test/BUILD | 45 +----- test/e2e/BUILD | 5 +- test/image/BUILD | 5 +- test/iptables/BUILD | 5 +- test/iptables/runner/BUILD | 12 +- test/root/BUILD | 5 +- test/root/testdata/BUILD | 3 +- test/runtimes/BUILD | 4 +- test/runtimes/build_defs.bzl | 5 +- test/runtimes/images/proctor/BUILD | 4 +- test/syscalls/BUILD | 2 +- test/syscalls/build_defs.bzl | 6 +- test/syscalls/gtest/BUILD | 7 +- test/syscalls/linux/BUILD | 23 ++- test/syscalls/linux/arch_prctl.cc | 2 + test/syscalls/linux/rseq/BUILD | 5 +- .../linux/udp_socket_errqueue_test_case.cc | 4 + test/uds/BUILD | 3 +- test/util/BUILD | 27 ++-- test/util/save_util_linux.cc | 4 + test/util/save_util_other.cc | 4 + test/util/test_util_runfiles.cc | 4 + tools/BUILD | 3 + tools/build/BUILD | 10 ++ tools/build/defs.bzl | 91 ++++++++++++ tools/checkunsafe/BUILD | 3 +- tools/defs.bzl | 154 +++++++++++++++++++++ tools/go_generics/BUILD | 2 +- tools/go_generics/globals/BUILD | 4 +- tools/go_generics/go_merge/BUILD | 2 +- tools/go_generics/rules_tests/BUILD | 2 +- tools/go_marshal/BUILD | 4 +- tools/go_marshal/README.md | 52 +------ tools/go_marshal/analysis/BUILD | 5 +- tools/go_marshal/defs.bzl | 112 ++------------- tools/go_marshal/gomarshal/BUILD | 6 +- tools/go_marshal/gomarshal/generator.go | 20 ++- tools/go_marshal/gomarshal/generator_tests.go | 6 +- tools/go_marshal/main.go | 11 +- tools/go_marshal/marshal/BUILD | 5 +- tools/go_marshal/test/BUILD | 7 +- tools/go_marshal/test/external/BUILD | 6 +- tools/go_stateify/BUILD | 2 +- tools/go_stateify/defs.bzl | 79 +---------- tools/images/BUILD | 2 +- tools/images/defs.bzl | 6 +- tools/issue_reviver/BUILD | 2 +- tools/issue_reviver/github/BUILD | 3 +- tools/issue_reviver/reviver/BUILD | 5 +- tools/workspace_status.sh | 2 +- vdso/BUILD | 33 ++--- 264 files changed, 1012 insertions(+), 1380 deletions(-) delete mode 100644 benchmarks/defs.bzl delete mode 100755 scripts/common_bazel.sh create mode 100755 scripts/common_build.sh create mode 100644 tools/BUILD create mode 100644 tools/build/BUILD create mode 100644 tools/build/defs.bzl create mode 100644 tools/defs.bzl (limited to 'pkg/sentry/fs') diff --git a/.bazelrc b/.bazelrc index 9c35c5e7b..ef214bcfa 100644 --- a/.bazelrc +++ b/.bazelrc @@ -30,10 +30,10 @@ build:remote --auth_scope="https://www.googleapis.com/auth/cloud-source-tools" # Add a custom platform and toolchain that builds in a privileged docker # container, which is required by our syscall tests. -build:remote --host_platform=//test:rbe_ubuntu1604 -build:remote --extra_toolchains=//test:cc-toolchain-clang-x86_64-default -build:remote --extra_execution_platforms=//test:rbe_ubuntu1604 -build:remote --platforms=//test:rbe_ubuntu1604 +build:remote --host_platform=//:rbe_ubuntu1604 +build:remote --extra_toolchains=//:cc-toolchain-clang-x86_64-default +build:remote --extra_execution_platforms=//:rbe_ubuntu1604 +build:remote --platforms=//:rbe_ubuntu1604 build:remote --crosstool_top=@rbe_default//cc:toolchain build:remote --jobs=50 build:remote --remote_timeout=3600 diff --git a/BUILD b/BUILD index 76286174f..5fd929378 100644 --- a/BUILD +++ b/BUILD @@ -1,8 +1,8 @@ -package(licenses = ["notice"]) # Apache 2.0 - load("@io_bazel_rules_go//go:def.bzl", "go_path", "nogo") load("@bazel_gazelle//:def.bzl", "gazelle") +package(licenses = ["notice"]) + # The sandbox filegroup is used for sandbox-internal dependencies. package_group( name = "sandbox", @@ -49,9 +49,52 @@ gazelle(name = "gazelle") # live in the tools subdirectory (unless they are standard). nogo( name = "nogo", - config = "tools/nogo.js", + config = "//tools:nogo.js", visibility = ["//visibility:public"], deps = [ "//tools/checkunsafe", ], ) + +# We need to define a bazel platform and toolchain to specify dockerPrivileged +# and dockerRunAsRoot options, they are required to run tests on the RBE +# cluster in Kokoro. +alias( + name = "rbe_ubuntu1604", + actual = ":rbe_ubuntu1604_r346485", +) + +platform( + name = "rbe_ubuntu1604_r346485", + constraint_values = [ + "@bazel_tools//platforms:x86_64", + "@bazel_tools//platforms:linux", + "@bazel_tools//tools/cpp:clang", + "@bazel_toolchains//constraints:xenial", + "@bazel_toolchains//constraints/sanitizers:support_msan", + ], + remote_execution_properties = """ + properties: { + name: "container-image" + value:"docker://gcr.io/cloud-marketplace/google/rbe-ubuntu16-04@sha256:93f7e127196b9b653d39830c50f8b05d49ef6fd8739a9b5b8ab16e1df5399e50" + } + properties: { + name: "dockerAddCapabilities" + value: "SYS_ADMIN" + } + properties: { + name: "dockerPrivileged" + value: "true" + } + """, +) + +toolchain( + name = "cc-toolchain-clang-x86_64-default", + exec_compatible_with = [ + ], + target_compatible_with = [ + ], + toolchain = "@bazel_toolchains//configs/ubuntu16_04_clang/10.0.0/bazel_2.0.0/cc:cc-compiler-k8", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", +) diff --git a/benchmarks/defs.bzl b/benchmarks/defs.bzl deleted file mode 100644 index 79e6cdbc8..000000000 --- a/benchmarks/defs.bzl +++ /dev/null @@ -1,18 +0,0 @@ -"""Provides python helper functions.""" - -load("@pydeps//:requirements.bzl", _requirement = "requirement") - -def filter_deps(deps = None): - if deps == None: - deps = [] - return [dep for dep in deps if dep] - -def py_library(deps = None, **kwargs): - return native.py_library(deps = filter_deps(deps), **kwargs) - -def py_test(deps = None, **kwargs): - return native.py_test(deps = filter_deps(deps), **kwargs) - -def requirement(name, direct = True): - """ requirement returns the required dependency. """ - return _requirement(name) diff --git a/benchmarks/harness/BUILD b/benchmarks/harness/BUILD index 081a74243..52d4e42f8 100644 --- a/benchmarks/harness/BUILD +++ b/benchmarks/harness/BUILD @@ -1,4 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "requirement") +load("//tools:defs.bzl", "py_library", "py_requirement") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -25,16 +25,16 @@ py_library( srcs = ["container.py"], deps = [ "//benchmarks/workloads", - requirement("asn1crypto", False), - requirement("chardet", False), - requirement("certifi", False), - requirement("docker", True), - requirement("docker-pycreds", False), - requirement("idna", False), - requirement("ptyprocess", False), - requirement("requests", False), - requirement("urllib3", False), - requirement("websocket-client", False), + py_requirement("asn1crypto", False), + py_requirement("chardet", False), + py_requirement("certifi", False), + py_requirement("docker", True), + py_requirement("docker-pycreds", False), + py_requirement("idna", False), + py_requirement("ptyprocess", False), + py_requirement("requests", False), + py_requirement("urllib3", False), + py_requirement("websocket-client", False), ], ) @@ -47,17 +47,17 @@ py_library( "//benchmarks/harness:ssh_connection", "//benchmarks/harness:tunnel_dispatcher", "//benchmarks/harness/machine_mocks", - requirement("asn1crypto", False), - requirement("chardet", False), - requirement("certifi", False), - requirement("docker", True), - requirement("docker-pycreds", False), - requirement("idna", False), - requirement("ptyprocess", False), - requirement("requests", False), - requirement("six", False), - requirement("urllib3", False), - requirement("websocket-client", False), + py_requirement("asn1crypto", False), + py_requirement("chardet", False), + py_requirement("certifi", False), + py_requirement("docker", True), + py_requirement("docker-pycreds", False), + py_requirement("idna", False), + py_requirement("ptyprocess", False), + py_requirement("requests", False), + py_requirement("six", False), + py_requirement("urllib3", False), + py_requirement("websocket-client", False), ], ) @@ -66,10 +66,10 @@ py_library( srcs = ["ssh_connection.py"], deps = [ "//benchmarks/harness", - requirement("bcrypt", False), - requirement("cffi", True), - requirement("paramiko", True), - requirement("cryptography", False), + py_requirement("bcrypt", False), + py_requirement("cffi", True), + py_requirement("paramiko", True), + py_requirement("cryptography", False), ], ) @@ -77,16 +77,16 @@ py_library( name = "tunnel_dispatcher", srcs = ["tunnel_dispatcher.py"], deps = [ - requirement("asn1crypto", False), - requirement("chardet", False), - requirement("certifi", False), - requirement("docker", True), - requirement("docker-pycreds", False), - requirement("idna", False), - requirement("pexpect", True), - requirement("ptyprocess", False), - requirement("requests", False), - requirement("urllib3", False), - requirement("websocket-client", False), + py_requirement("asn1crypto", False), + py_requirement("chardet", False), + py_requirement("certifi", False), + py_requirement("docker", True), + py_requirement("docker-pycreds", False), + py_requirement("idna", False), + py_requirement("pexpect", True), + py_requirement("ptyprocess", False), + py_requirement("requests", False), + py_requirement("urllib3", False), + py_requirement("websocket-client", False), ], ) diff --git a/benchmarks/harness/machine_producers/BUILD b/benchmarks/harness/machine_producers/BUILD index c4e943882..48ea0ef39 100644 --- a/benchmarks/harness/machine_producers/BUILD +++ b/benchmarks/harness/machine_producers/BUILD @@ -1,4 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "requirement") +load("//tools:defs.bzl", "py_library", "py_requirement") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -31,7 +31,7 @@ py_library( deps = [ "//benchmarks/harness:machine", "//benchmarks/harness/machine_producers:machine_producer", - requirement("PyYAML", False), + py_requirement("PyYAML", False), ], ) diff --git a/benchmarks/runner/BUILD b/benchmarks/runner/BUILD index e1b2ea550..fae0ca800 100644 --- a/benchmarks/runner/BUILD +++ b/benchmarks/runner/BUILD @@ -1,4 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") +load("//tools:defs.bzl", "py_library", "py_requirement", "py_test") package(licenses = ["notice"]) @@ -28,7 +28,7 @@ py_library( "//benchmarks/suites:startup", "//benchmarks/suites:sysbench", "//benchmarks/suites:syscall", - requirement("click", True), + py_requirement("click", True), ], ) @@ -36,7 +36,7 @@ py_library( name = "commands", srcs = ["commands.py"], deps = [ - requirement("click", True), + py_requirement("click", True), ], ) @@ -50,14 +50,14 @@ py_test( ], deps = [ ":runner", - requirement("click", True), - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("click", True), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/tcp/BUILD b/benchmarks/tcp/BUILD index 735d7127f..d5e401acc 100644 --- a/benchmarks/tcp/BUILD +++ b/benchmarks/tcp/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") -load("@rules_cc//cc:defs.bzl", "cc_binary") +load("//tools:defs.bzl", "cc_binary", "go_binary") package(licenses = ["notice"]) diff --git a/benchmarks/workloads/ab/BUILD b/benchmarks/workloads/ab/BUILD index 4fc0ab735..4dd91ceb3 100644 --- a/benchmarks/workloads/ab/BUILD +++ b/benchmarks/workloads/ab/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":ab", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/absl/BUILD b/benchmarks/workloads/absl/BUILD index 61e010096..55dae3baa 100644 --- a/benchmarks/workloads/absl/BUILD +++ b/benchmarks/workloads/absl/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":absl", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/curl/BUILD b/benchmarks/workloads/curl/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/curl/BUILD +++ b/benchmarks/workloads/curl/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/ffmpeg/BUILD b/benchmarks/workloads/ffmpeg/BUILD index be472dfb2..7c41ba631 100644 --- a/benchmarks/workloads/ffmpeg/BUILD +++ b/benchmarks/workloads/ffmpeg/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/fio/BUILD b/benchmarks/workloads/fio/BUILD index de257adad..7b78e8e75 100644 --- a/benchmarks/workloads/fio/BUILD +++ b/benchmarks/workloads/fio/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":fio", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/httpd/BUILD b/benchmarks/workloads/httpd/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/httpd/BUILD +++ b/benchmarks/workloads/httpd/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/iperf/BUILD b/benchmarks/workloads/iperf/BUILD index 8832a996c..570f40148 100644 --- a/benchmarks/workloads/iperf/BUILD +++ b/benchmarks/workloads/iperf/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":iperf", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/netcat/BUILD b/benchmarks/workloads/netcat/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/netcat/BUILD +++ b/benchmarks/workloads/netcat/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/nginx/BUILD b/benchmarks/workloads/nginx/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/nginx/BUILD +++ b/benchmarks/workloads/nginx/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/node/BUILD b/benchmarks/workloads/node/BUILD index 71cd9f519..bfcf78cf9 100644 --- a/benchmarks/workloads/node/BUILD +++ b/benchmarks/workloads/node/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/node_template/BUILD b/benchmarks/workloads/node_template/BUILD index ca996f068..e142f082a 100644 --- a/benchmarks/workloads/node_template/BUILD +++ b/benchmarks/workloads/node_template/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/redis/BUILD b/benchmarks/workloads/redis/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/redis/BUILD +++ b/benchmarks/workloads/redis/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/redisbenchmark/BUILD b/benchmarks/workloads/redisbenchmark/BUILD index f5994a815..f472a4443 100644 --- a/benchmarks/workloads/redisbenchmark/BUILD +++ b/benchmarks/workloads/redisbenchmark/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":redisbenchmark", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/ruby/BUILD b/benchmarks/workloads/ruby/BUILD index e37d77804..a3be4fe92 100644 --- a/benchmarks/workloads/ruby/BUILD +++ b/benchmarks/workloads/ruby/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/ruby_template/BUILD b/benchmarks/workloads/ruby_template/BUILD index 27f7c0c46..59443b14a 100644 --- a/benchmarks/workloads/ruby_template/BUILD +++ b/benchmarks/workloads/ruby_template/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/sleep/BUILD b/benchmarks/workloads/sleep/BUILD index eb0fb6165..a70873065 100644 --- a/benchmarks/workloads/sleep/BUILD +++ b/benchmarks/workloads/sleep/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/sysbench/BUILD b/benchmarks/workloads/sysbench/BUILD index fd2f8f03d..3834af7ed 100644 --- a/benchmarks/workloads/sysbench/BUILD +++ b/benchmarks/workloads/sysbench/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":sysbench", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/syscall/BUILD b/benchmarks/workloads/syscall/BUILD index 5100cbb21..dba4bb1e7 100644 --- a/benchmarks/workloads/syscall/BUILD +++ b/benchmarks/workloads/syscall/BUILD @@ -1,5 +1,4 @@ -load("//benchmarks:defs.bzl", "py_library", "py_test", "requirement") -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar", "py_library", "py_requirement", "py_test") package( default_visibility = ["//benchmarks:__subpackages__"], @@ -17,14 +16,14 @@ py_test( python_version = "PY3", deps = [ ":syscall", - requirement("attrs", False), - requirement("atomicwrites", False), - requirement("more-itertools", False), - requirement("pathlib2", False), - requirement("pluggy", False), - requirement("py", False), - requirement("pytest", True), - requirement("six", False), + py_requirement("attrs", False), + py_requirement("atomicwrites", False), + py_requirement("more-itertools", False), + py_requirement("pathlib2", False), + py_requirement("pluggy", False), + py_requirement("py", False), + py_requirement("pytest", True), + py_requirement("six", False), ], ) diff --git a/benchmarks/workloads/tensorflow/BUILD b/benchmarks/workloads/tensorflow/BUILD index 026c3b316..a7b7742f4 100644 --- a/benchmarks/workloads/tensorflow/BUILD +++ b/benchmarks/workloads/tensorflow/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/benchmarks/workloads/true/BUILD b/benchmarks/workloads/true/BUILD index 221c4b9a7..eba23d325 100644 --- a/benchmarks/workloads/true/BUILD +++ b/benchmarks/workloads/true/BUILD @@ -1,4 +1,4 @@ -load("@rules_pkg//:pkg.bzl", "pkg_tar") +load("//tools:defs.bzl", "pkg_tar") package( default_visibility = ["//benchmarks:__subpackages__"], diff --git a/pkg/abi/BUILD b/pkg/abi/BUILD index f5c08ea06..839f822eb 100644 --- a/pkg/abi/BUILD +++ b/pkg/abi/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,6 +9,5 @@ go_library( "abi_linux.go", "flag.go", ], - importpath = "gvisor.dev/gvisor/pkg/abi", visibility = ["//:sandbox"], ) diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index 716ff22d2..1f3c0c687 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") # Package linux contains the constants and types needed to interface with a # Linux kernel. It should be used instead of syscall or golang.org/x/sys/unix @@ -60,7 +59,6 @@ go_library( "wait.go", "xattr.go", ], - importpath = "gvisor.dev/gvisor/pkg/abi/linux", visibility = ["//visibility:public"], deps = [ "//pkg/abi", @@ -73,7 +71,7 @@ go_test( name = "linux_test", size = "small", srcs = ["netfilter_test.go"], - embed = [":linux"], + library = ":linux", deps = [ "//pkg/binary", ], diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD index d99e37b40..9612f072e 100644 --- a/pkg/amutex/BUILD +++ b/pkg/amutex/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "amutex", srcs = ["amutex.go"], - importpath = "gvisor.dev/gvisor/pkg/amutex", visibility = ["//:sandbox"], ) @@ -14,6 +12,6 @@ go_test( name = "amutex_test", size = "small", srcs = ["amutex_test.go"], - embed = [":amutex"], + library = ":amutex", deps = ["//pkg/sync"], ) diff --git a/pkg/atomicbitops/BUILD b/pkg/atomicbitops/BUILD index 6403c60c2..3948074ba 100644 --- a/pkg/atomicbitops/BUILD +++ b/pkg/atomicbitops/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "atomic_bitops_arm64.s", "atomic_bitops_common.go", ], - importpath = "gvisor.dev/gvisor/pkg/atomicbitops", visibility = ["//:sandbox"], ) @@ -19,6 +17,6 @@ go_test( name = "atomicbitops_test", size = "small", srcs = ["atomic_bitops_test.go"], - embed = [":atomicbitops"], + library = ":atomicbitops", deps = ["//pkg/sync"], ) diff --git a/pkg/binary/BUILD b/pkg/binary/BUILD index 543fb54bf..7ca2fda90 100644 --- a/pkg/binary/BUILD +++ b/pkg/binary/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "binary", srcs = ["binary.go"], - importpath = "gvisor.dev/gvisor/pkg/binary", visibility = ["//:sandbox"], ) @@ -14,5 +12,5 @@ go_test( name = "binary_test", size = "small", srcs = ["binary_test.go"], - embed = [":binary"], + library = ":binary", ) diff --git a/pkg/bits/BUILD b/pkg/bits/BUILD index 93b88a29a..63f4670d7 100644 --- a/pkg/bits/BUILD +++ b/pkg/bits/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) @@ -15,7 +14,6 @@ go_library( "uint64_arch_arm64_asm.s", "uint64_arch_generic.go", ], - importpath = "gvisor.dev/gvisor/pkg/bits", visibility = ["//:sandbox"], ) @@ -53,5 +51,5 @@ go_test( name = "bits_test", size = "small", srcs = ["uint64_test.go"], - embed = [":bits"], + library = ":bits", ) diff --git a/pkg/bpf/BUILD b/pkg/bpf/BUILD index fba5643e8..2a6977f85 100644 --- a/pkg/bpf/BUILD +++ b/pkg/bpf/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "interpreter.go", "program_builder.go", ], - importpath = "gvisor.dev/gvisor/pkg/bpf", visibility = ["//visibility:public"], deps = ["//pkg/abi/linux"], ) @@ -25,7 +23,7 @@ go_test( "interpreter_test.go", "program_builder_test.go", ], - embed = [":bpf"], + library = ":bpf", deps = [ "//pkg/abi/linux", "//pkg/binary", diff --git a/pkg/compressio/BUILD b/pkg/compressio/BUILD index 2bb581b18..1f75319a7 100644 --- a/pkg/compressio/BUILD +++ b/pkg/compressio/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "compressio", srcs = ["compressio.go"], - importpath = "gvisor.dev/gvisor/pkg/compressio", visibility = ["//:sandbox"], deps = [ "//pkg/binary", @@ -18,5 +16,5 @@ go_test( name = "compressio_test", size = "medium", srcs = ["compressio_test.go"], - embed = [":compressio"], + library = ":compressio", ) diff --git a/pkg/control/client/BUILD b/pkg/control/client/BUILD index 066d7b1a1..1b9e10ee7 100644 --- a/pkg/control/client/BUILD +++ b/pkg/control/client/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -7,7 +7,6 @@ go_library( srcs = [ "client.go", ], - importpath = "gvisor.dev/gvisor/pkg/control/client", visibility = ["//:sandbox"], deps = [ "//pkg/unet", diff --git a/pkg/control/server/BUILD b/pkg/control/server/BUILD index adbd1e3f8..002d2ef44 100644 --- a/pkg/control/server/BUILD +++ b/pkg/control/server/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "server", srcs = ["server.go"], - importpath = "gvisor.dev/gvisor/pkg/control/server", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/cpuid/BUILD b/pkg/cpuid/BUILD index ed111fd2a..43a432190 100644 --- a/pkg/cpuid/BUILD +++ b/pkg/cpuid/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "cpu_amd64.s", "cpuid.go", ], - importpath = "gvisor.dev/gvisor/pkg/cpuid", visibility = ["//:sandbox"], deps = ["//pkg/log"], ) @@ -18,7 +16,7 @@ go_test( name = "cpuid_test", size = "small", srcs = ["cpuid_test.go"], - embed = [":cpuid"], + library = ":cpuid", ) go_test( @@ -27,6 +25,6 @@ go_test( srcs = [ "cpuid_parse_test.go", ], - embed = [":cpuid"], + library = ":cpuid", tags = ["manual"], ) diff --git a/pkg/eventchannel/BUILD b/pkg/eventchannel/BUILD index 9d68682c7..bee28b68d 100644 --- a/pkg/eventchannel/BUILD +++ b/pkg/eventchannel/BUILD @@ -1,6 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test", "proto_library") package(licenses = ["notice"]) @@ -10,7 +8,6 @@ go_library( "event.go", "rate.go", ], - importpath = "gvisor.dev/gvisor/pkg/eventchannel", visibility = ["//:sandbox"], deps = [ ":eventchannel_go_proto", @@ -24,22 +21,15 @@ go_library( ) proto_library( - name = "eventchannel_proto", + name = "eventchannel", srcs = ["event.proto"], visibility = ["//:sandbox"], ) -go_proto_library( - name = "eventchannel_go_proto", - importpath = "gvisor.dev/gvisor/pkg/eventchannel/eventchannel_go_proto", - proto = ":eventchannel_proto", - visibility = ["//:sandbox"], -) - go_test( name = "eventchannel_test", srcs = ["event_test.go"], - embed = [":eventchannel"], + library = ":eventchannel", deps = [ "//pkg/sync", "@com_github_golang_protobuf//proto:go_default_library", diff --git a/pkg/fd/BUILD b/pkg/fd/BUILD index afa8f7659..872361546 100644 --- a/pkg/fd/BUILD +++ b/pkg/fd/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "fd", srcs = ["fd.go"], - importpath = "gvisor.dev/gvisor/pkg/fd", visibility = ["//visibility:public"], ) @@ -14,5 +12,5 @@ go_test( name = "fd_test", size = "small", srcs = ["fd_test.go"], - embed = [":fd"], + library = ":fd", ) diff --git a/pkg/fdchannel/BUILD b/pkg/fdchannel/BUILD index b0478c672..d9104ef02 100644 --- a/pkg/fdchannel/BUILD +++ b/pkg/fdchannel/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "fdchannel", srcs = ["fdchannel_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/fdchannel", visibility = ["//visibility:public"], ) @@ -14,6 +12,6 @@ go_test( name = "fdchannel_test", size = "small", srcs = ["fdchannel_test.go"], - embed = [":fdchannel"], + library = ":fdchannel", deps = ["//pkg/sync"], ) diff --git a/pkg/fdnotifier/BUILD b/pkg/fdnotifier/BUILD index 91a202a30..235dcc490 100644 --- a/pkg/fdnotifier/BUILD +++ b/pkg/fdnotifier/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "fdnotifier.go", "poll_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/fdnotifier", visibility = ["//:sandbox"], deps = [ "//pkg/sync", diff --git a/pkg/flipcall/BUILD b/pkg/flipcall/BUILD index 85bd83af1..9c5ad500b 100644 --- a/pkg/flipcall/BUILD +++ b/pkg/flipcall/BUILD @@ -1,7 +1,6 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "flipcall", @@ -13,7 +12,6 @@ go_library( "io.go", "packet_window_allocator.go", ], - importpath = "gvisor.dev/gvisor/pkg/flipcall", visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", @@ -30,6 +28,6 @@ go_test( "flipcall_example_test.go", "flipcall_test.go", ], - embed = [":flipcall"], + library = ":flipcall", deps = ["//pkg/sync"], ) diff --git a/pkg/fspath/BUILD b/pkg/fspath/BUILD index ca540363c..ee84471b2 100644 --- a/pkg/fspath/BUILD +++ b/pkg/fspath/BUILD @@ -1,10 +1,8 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package( - default_visibility = ["//visibility:public"], - licenses = ["notice"], -) +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) go_library( name = "fspath", @@ -13,7 +11,6 @@ go_library( "builder_unsafe.go", "fspath.go", ], - importpath = "gvisor.dev/gvisor/pkg/fspath", ) go_test( @@ -23,5 +20,5 @@ go_test( "builder_test.go", "fspath_test.go", ], - embed = [":fspath"], + library = ":fspath", ) diff --git a/pkg/gate/BUILD b/pkg/gate/BUILD index f22bd070d..dd3141143 100644 --- a/pkg/gate/BUILD +++ b/pkg/gate/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -8,7 +7,6 @@ go_library( srcs = [ "gate.go", ], - importpath = "gvisor.dev/gvisor/pkg/gate", visibility = ["//visibility:public"], ) diff --git a/pkg/goid/BUILD b/pkg/goid/BUILD index 5d31e5366..ea8d2422c 100644 --- a/pkg/goid/BUILD +++ b/pkg/goid/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "goid_race.go", "goid_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/goid", visibility = ["//visibility:public"], ) @@ -22,5 +20,5 @@ go_test( "empty_test.go", "goid_test.go", ], - embed = [":goid"], + library = ":goid", ) diff --git a/pkg/ilist/BUILD b/pkg/ilist/BUILD index 34d2673ef..3f6eb07df 100644 --- a/pkg/ilist/BUILD +++ b/pkg/ilist/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( srcs = [ "interface_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/ilist", visibility = ["//visibility:public"], ) @@ -41,7 +39,7 @@ go_test( "list_test.go", "test_list.go", ], - embed = [":ilist"], + library = ":ilist", ) go_template( diff --git a/pkg/linewriter/BUILD b/pkg/linewriter/BUILD index bcde6d308..41bf104d0 100644 --- a/pkg/linewriter/BUILD +++ b/pkg/linewriter/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "linewriter", srcs = ["linewriter.go"], - importpath = "gvisor.dev/gvisor/pkg/linewriter", visibility = ["//visibility:public"], deps = ["//pkg/sync"], ) @@ -14,5 +12,5 @@ go_library( go_test( name = "linewriter_test", srcs = ["linewriter_test.go"], - embed = [":linewriter"], + library = ":linewriter", ) diff --git a/pkg/log/BUILD b/pkg/log/BUILD index 0df0f2849..935d06963 100644 --- a/pkg/log/BUILD +++ b/pkg/log/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "json_k8s.go", "log.go", ], - importpath = "gvisor.dev/gvisor/pkg/log", visibility = [ "//visibility:public", ], @@ -29,5 +27,5 @@ go_test( "json_test.go", "log_test.go", ], - embed = [":log"], + library = ":log", ) diff --git a/pkg/memutil/BUILD b/pkg/memutil/BUILD index 7b50e2b28..9d07d98b4 100644 --- a/pkg/memutil/BUILD +++ b/pkg/memutil/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "memutil", srcs = ["memutil_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/memutil", visibility = ["//visibility:public"], deps = ["@org_golang_x_sys//unix:go_default_library"], ) diff --git a/pkg/metric/BUILD b/pkg/metric/BUILD index 9145f3233..58305009d 100644 --- a/pkg/metric/BUILD +++ b/pkg/metric/BUILD @@ -1,14 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "go_test", "proto_library") package(licenses = ["notice"]) go_library( name = "metric", srcs = ["metric.go"], - importpath = "gvisor.dev/gvisor/pkg/metric", visibility = ["//:sandbox"], deps = [ ":metric_go_proto", @@ -19,28 +15,15 @@ go_library( ) proto_library( - name = "metric_proto", + name = "metric", srcs = ["metric.proto"], visibility = ["//:sandbox"], ) -cc_proto_library( - name = "metric_cc_proto", - visibility = ["//:sandbox"], - deps = [":metric_proto"], -) - -go_proto_library( - name = "metric_go_proto", - importpath = "gvisor.dev/gvisor/pkg/metric/metric_go_proto", - proto = ":metric_proto", - visibility = ["//:sandbox"], -) - go_test( name = "metric_test", srcs = ["metric_test.go"], - embed = [":metric"], + library = ":metric", deps = [ ":metric_go_proto", "//pkg/eventchannel", diff --git a/pkg/p9/BUILD b/pkg/p9/BUILD index a3e05c96d..4ccc1de86 100644 --- a/pkg/p9/BUILD +++ b/pkg/p9/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package( default_visibility = ["//visibility:public"], @@ -23,7 +22,6 @@ go_library( "transport_flipcall.go", "version.go", ], - importpath = "gvisor.dev/gvisor/pkg/p9", deps = [ "//pkg/fd", "//pkg/fdchannel", @@ -47,7 +45,7 @@ go_test( "transport_test.go", "version_test.go", ], - embed = [":p9"], + library = ":p9", deps = [ "//pkg/fd", "//pkg/unet", diff --git a/pkg/p9/p9test/BUILD b/pkg/p9/p9test/BUILD index f4edd68b2..7ca67cb19 100644 --- a/pkg/p9/p9test/BUILD +++ b/pkg/p9/p9test/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_test") +load("//tools:defs.bzl", "go_binary", "go_library", "go_test") package(licenses = ["notice"]) @@ -64,7 +63,6 @@ go_library( "mocks.go", "p9test.go", ], - importpath = "gvisor.dev/gvisor/pkg/p9/p9test", visibility = ["//:sandbox"], deps = [ "//pkg/fd", @@ -80,7 +78,7 @@ go_test( name = "client_test", size = "medium", srcs = ["client_test.go"], - embed = [":p9test"], + library = ":p9test", deps = [ "//pkg/fd", "//pkg/p9", diff --git a/pkg/procid/BUILD b/pkg/procid/BUILD index b506813f0..aa3e3ac0b 100644 --- a/pkg/procid/BUILD +++ b/pkg/procid/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +9,6 @@ go_library( "procid_amd64.s", "procid_arm64.s", ], - importpath = "gvisor.dev/gvisor/pkg/procid", visibility = ["//visibility:public"], ) @@ -20,7 +18,7 @@ go_test( srcs = [ "procid_test.go", ], - embed = [":procid"], + library = ":procid", deps = ["//pkg/sync"], ) @@ -31,6 +29,6 @@ go_test( "procid_net_test.go", "procid_test.go", ], - embed = [":procid"], + library = ":procid", deps = ["//pkg/sync"], ) diff --git a/pkg/rand/BUILD b/pkg/rand/BUILD index 9d5b4859b..80b8ceb02 100644 --- a/pkg/rand/BUILD +++ b/pkg/rand/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "rand.go", "rand_linux.go", ], - importpath = "gvisor.dev/gvisor/pkg/rand", visibility = ["//:sandbox"], deps = [ "//pkg/sync", diff --git a/pkg/refs/BUILD b/pkg/refs/BUILD index 974d9af9b..74affc887 100644 --- a/pkg/refs/BUILD +++ b/pkg/refs/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -23,7 +22,6 @@ go_library( "refcounter_state.go", "weak_ref_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/refs", visibility = ["//:sandbox"], deps = [ "//pkg/log", @@ -35,6 +33,6 @@ go_test( name = "refs_test", size = "small", srcs = ["refcounter_test.go"], - embed = [":refs"], + library = ":refs", deps = ["//pkg/sync"], ) diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD index af94e944d..742c8b79b 100644 --- a/pkg/seccomp/BUILD +++ b/pkg/seccomp/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_embed_data", "go_test") +load("//tools:defs.bzl", "go_binary", "go_embed_data", "go_library", "go_test") package(licenses = ["notice"]) @@ -27,7 +26,6 @@ go_library( "seccomp_rules.go", "seccomp_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/seccomp", visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", @@ -43,7 +41,7 @@ go_test( "seccomp_test.go", ":victim_data", ], - embed = [":seccomp"], + library = ":seccomp", deps = [ "//pkg/abi/linux", "//pkg/binary", diff --git a/pkg/secio/BUILD b/pkg/secio/BUILD index 22abdc69f..60f63c7a6 100644 --- a/pkg/secio/BUILD +++ b/pkg/secio/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "full_reader.go", "secio.go", ], - importpath = "gvisor.dev/gvisor/pkg/secio", visibility = ["//pkg/sentry:internal"], ) @@ -17,5 +15,5 @@ go_test( name = "secio_test", size = "small", srcs = ["secio_test.go"], - embed = [":secio"], + library = ":secio", ) diff --git a/pkg/segment/test/BUILD b/pkg/segment/test/BUILD index a27c35e21..f2d8462d8 100644 --- a/pkg/segment/test/BUILD +++ b/pkg/segment/test/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package( @@ -38,7 +37,6 @@ go_library( "int_set.go", "set_functions.go", ], - importpath = "gvisor.dev/gvisor/pkg/segment/segment", deps = [ "//pkg/state", ], @@ -48,5 +46,5 @@ go_test( name = "segment_test", size = "small", srcs = ["segment_test.go"], - embed = [":segment"], + library = ":segment", ) diff --git a/pkg/sentry/BUILD b/pkg/sentry/BUILD index 2d6379c86..e8b794179 100644 --- a/pkg/sentry/BUILD +++ b/pkg/sentry/BUILD @@ -6,6 +6,8 @@ package(licenses = ["notice"]) package_group( name = "internal", packages = [ + "//cloud/gvisor/gopkg/sentry/...", + "//cloud/gvisor/sentry/...", "//pkg/sentry/...", "//runsc/...", # Code generated by go_marshal relies on go_marshal libraries. diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index 65f22af2b..51ca09b24 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -1,6 +1,4 @@ -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "proto_library") package(licenses = ["notice"]) @@ -27,7 +25,6 @@ go_library( "syscalls_amd64.go", "syscalls_arm64.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/arch", visibility = ["//:sandbox"], deps = [ ":registers_go_proto", @@ -44,20 +41,7 @@ go_library( ) proto_library( - name = "registers_proto", + name = "registers", srcs = ["registers.proto"], visibility = ["//visibility:public"], ) - -cc_proto_library( - name = "registers_cc_proto", - visibility = ["//visibility:public"], - deps = [":registers_proto"], -) - -go_proto_library( - name = "registers_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto", - proto = ":registers_proto", - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/context/BUILD b/pkg/sentry/context/BUILD index 8dc1a77b1..e13a9ce20 100644 --- a/pkg/sentry/context/BUILD +++ b/pkg/sentry/context/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "context", srcs = ["context.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/context", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/amutex", diff --git a/pkg/sentry/context/contexttest/BUILD b/pkg/sentry/context/contexttest/BUILD index 581e7aa96..f91a6d4ed 100644 --- a/pkg/sentry/context/contexttest/BUILD +++ b/pkg/sentry/context/contexttest/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "contexttest", testonly = 1, srcs = ["contexttest.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/context/contexttest", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/memutil", diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index 2561a6109..e69496477 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,9 +11,8 @@ go_library( "proc.go", "state.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/control", visibility = [ - "//pkg/sentry:internal", + "//:sandbox", ], deps = [ "//pkg/abi/linux", @@ -40,7 +38,7 @@ go_test( name = "control_test", size = "small", srcs = ["proc_test.go"], - embed = [":control"], + library = ":control", deps = [ "//pkg/log", "//pkg/sentry/kernel/time", diff --git a/pkg/sentry/device/BUILD b/pkg/sentry/device/BUILD index 97fa1512c..e403cbd8b 100644 --- a/pkg/sentry/device/BUILD +++ b/pkg/sentry/device/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "device", srcs = ["device.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/device", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -18,5 +16,5 @@ go_test( name = "device_test", size = "small", srcs = ["device_test.go"], - embed = [":device"], + library = ":device", ) diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 7d5d72d5a..605d61dbe 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -44,7 +43,6 @@ go_library( "splice.go", "sync.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -129,7 +127,7 @@ go_test( "mount_test.go", "path_test.go", ], - embed = [":fs"], + library = ":fs", deps = [ "//pkg/sentry/context", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD index ae1c9cf76..c14e5405e 100644 --- a/pkg/sentry/fs/anon/BUILD +++ b/pkg/sentry/fs/anon/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "anon.go", "device.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/anon", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index a0d9e8496..0c7247bd7 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -13,7 +13,6 @@ go_library( "random.go", "tty.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/dev", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD index cc43de69d..25ef96299 100644 --- a/pkg/sentry/fs/fdpipe/BUILD +++ b/pkg/sentry/fs/fdpipe/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +9,6 @@ go_library( "pipe_opener.go", "pipe_state.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fdpipe", imports = ["gvisor.dev/gvisor/pkg/sentry/fs"], visibility = ["//pkg/sentry:internal"], deps = [ @@ -36,7 +34,7 @@ go_test( "pipe_opener_test.go", "pipe_test.go", ], - embed = [":fdpipe"], + library = ":fdpipe", deps = [ "//pkg/fd", "//pkg/fdnotifier", diff --git a/pkg/sentry/fs/filetest/BUILD b/pkg/sentry/fs/filetest/BUILD index 358dc2be3..9a7608cae 100644 --- a/pkg/sentry/fs/filetest/BUILD +++ b/pkg/sentry/fs/filetest/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "filetest", testonly = 1, srcs = ["filetest.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/filetest", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 945b6270d..9142f5bdf 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -75,7 +74,6 @@ go_library( "inode.go", "inode_cached.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/fsutil", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -106,7 +104,7 @@ go_test( "dirty_set_test.go", "inode_cached_test.go", ], - embed = [":fsutil"], + library = ":fsutil", deps = [ "//pkg/sentry/context", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index fd870e8e1..cf48e7c03 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -22,7 +21,6 @@ go_library( "socket.go", "util.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/gofer", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -56,7 +54,7 @@ go_test( name = "gofer_test", size = "small", srcs = ["gofer_test.go"], - embed = [":gofer"], + library = ":gofer", deps = [ "//pkg/p9", "//pkg/p9/p9test", diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 2b581aa69..f586f47c1 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -25,7 +24,6 @@ go_library( "util_arm64_unsafe.go", "util_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/host", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -69,7 +67,7 @@ go_test( "socket_test.go", "wait_test.go", ], - embed = [":host"], + library = ":host", deps = [ "//pkg/fd", "//pkg/fdnotifier", diff --git a/pkg/sentry/fs/lock/BUILD b/pkg/sentry/fs/lock/BUILD index 2c332a82a..ae3331737 100644 --- a/pkg/sentry/fs/lock/BUILD +++ b/pkg/sentry/fs/lock/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -40,7 +39,6 @@ go_library( "lock_set.go", "lock_set_functions.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/lock", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", @@ -56,5 +54,5 @@ go_test( "lock_range_test.go", "lock_test.go", ], - embed = [":lock"], + library = ":lock", ) diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index cb37c6c6b..b06bead41 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -27,7 +26,6 @@ go_library( "uptime.go", "version.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -63,7 +61,7 @@ go_test( "net_test.go", "sys_net_test.go", ], - embed = [":proc"], + library = ":proc", deps = [ "//pkg/abi/linux", "//pkg/sentry/context", diff --git a/pkg/sentry/fs/proc/device/BUILD b/pkg/sentry/fs/proc/device/BUILD index 0394451d4..52c9aa93d 100644 --- a/pkg/sentry/fs/proc/device/BUILD +++ b/pkg/sentry/fs/proc/device/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "device", srcs = ["device.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/device", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/sentry/device"], ) diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD index 38b246dff..310d8dd52 100644 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ b/pkg/sentry/fs/proc/seqfile/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "seqfile", srcs = ["seqfile.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -26,7 +24,7 @@ go_test( name = "seqfile_test", size = "small", srcs = ["seqfile_test.go"], - embed = [":seqfile"], + library = ":seqfile", deps = [ "//pkg/sentry/context", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD index 3fb7b0633..39c4b84f8 100644 --- a/pkg/sentry/fs/ramfs/BUILD +++ b/pkg/sentry/fs/ramfs/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "symlink.go", "tree.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ramfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -31,7 +29,7 @@ go_test( name = "ramfs_test", size = "small", srcs = ["tree_test.go"], - embed = [":ramfs"], + library = ":ramfs", deps = [ "//pkg/sentry/context/contexttest", "//pkg/sentry/fs", diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD index 25f0f124e..cc6b3bfbf 100644 --- a/pkg/sentry/fs/sys/BUILD +++ b/pkg/sentry/fs/sys/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -10,7 +10,6 @@ go_library( "fs.go", "sys.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/sys", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD index a215c1b95..092668e8d 100644 --- a/pkg/sentry/fs/timerfd/BUILD +++ b/pkg/sentry/fs/timerfd/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "timerfd", srcs = ["timerfd.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/timerfd", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD index 3400b940c..04776555f 100644 --- a/pkg/sentry/fs/tmpfs/BUILD +++ b/pkg/sentry/fs/tmpfs/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "inode_file.go", "tmpfs.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -41,7 +39,7 @@ go_test( name = "tmpfs_test", size = "small", srcs = ["file_test.go"], - embed = [":tmpfs"], + library = ":tmpfs", deps = [ "//pkg/sentry/context", "//pkg/sentry/fs", diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index f6f60d0cf..29f804c6c 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -14,7 +13,6 @@ go_library( "slave.go", "terminal.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fs/tty", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -40,7 +38,7 @@ go_test( name = "tty_test", size = "small", srcs = ["tty_test.go"], - embed = [":tty"], + library = ":tty", deps = [ "//pkg/abi/linux", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index 903874141..a718920d5 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -32,7 +31,6 @@ go_library( "symlink.go", "utils.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -71,7 +69,7 @@ go_test( "//pkg/sentry/fsimpl/ext:assets/tiny.ext3", "//pkg/sentry/fsimpl/ext:assets/tiny.ext4", ], - embed = [":ext"], + library = ":ext", deps = [ "//pkg/abi/linux", "//pkg/binary", diff --git a/pkg/sentry/fsimpl/ext/benchmark/BUILD b/pkg/sentry/fsimpl/ext/benchmark/BUILD index 4fc8296ef..12f3990c1 100644 --- a/pkg/sentry/fsimpl/ext/benchmark/BUILD +++ b/pkg/sentry/fsimpl/ext/benchmark/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_test") package(licenses = ["notice"]) diff --git a/pkg/sentry/fsimpl/ext/disklayout/BUILD b/pkg/sentry/fsimpl/ext/disklayout/BUILD index fcfaf5c3e..9bd9c76c0 100644 --- a/pkg/sentry/fsimpl/ext/disklayout/BUILD +++ b/pkg/sentry/fsimpl/ext/disklayout/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -23,7 +22,6 @@ go_library( "superblock_old.go", "test_utils.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -44,6 +42,6 @@ go_test( "inode_test.go", "superblock_test.go", ], - embed = [":disklayout"], + library = ":disklayout", deps = ["//pkg/sentry/kernel/time"], ) diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index 66d409785..7bf83ccba 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -1,8 +1,7 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -package(licenses = ["notice"]) +licenses(["notice"]) go_template_instance( name = "slot_list", @@ -27,7 +26,6 @@ go_library( "slot_list.go", "symlink.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index c5b79fb38..3768f55b2 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -1,7 +1,6 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "proc", @@ -15,7 +14,6 @@ go_library( "tasks_net.go", "tasks_sys.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc", deps = [ "//pkg/abi/linux", "//pkg/log", @@ -47,7 +45,7 @@ go_test( "tasks_sys_test.go", "tasks_test.go", ], - embed = [":proc"], + library = ":proc", deps = [ "//pkg/abi/linux", "//pkg/fspath", diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index ee3c842bd..beda141f1 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -1,14 +1,12 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "sys", srcs = [ "sys.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys", deps = [ "//pkg/abi/linux", "//pkg/sentry/context", diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD index 4e70d84a7..12053a5b6 100644 --- a/pkg/sentry/fsimpl/testutil/BUILD +++ b/pkg/sentry/fsimpl/testutil/BUILD @@ -1,6 +1,6 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "testutil", @@ -9,7 +9,6 @@ go_library( "kernel.go", "testutil.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index 691476b4f..857e98bc5 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -1,8 +1,7 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -package(licenses = ["notice"]) +licenses(["notice"]) go_template_instance( name = "dentry_list", @@ -28,7 +27,6 @@ go_library( "symlink.go", "tmpfs.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs", deps = [ "//pkg/abi/linux", "//pkg/amutex", @@ -81,7 +79,7 @@ go_test( "regular_file_test.go", "stat_test.go", ], - embed = [":tmpfs"], + library = ":tmpfs", deps = [ "//pkg/abi/linux", "//pkg/fspath", diff --git a/pkg/sentry/hostcpu/BUILD b/pkg/sentry/hostcpu/BUILD index 359468ccc..e6933aa70 100644 --- a/pkg/sentry/hostcpu/BUILD +++ b/pkg/sentry/hostcpu/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +9,6 @@ go_library( "getcpu_arm64.s", "hostcpu.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/hostcpu", visibility = ["//:sandbox"], ) @@ -18,5 +16,5 @@ go_test( name = "hostcpu_test", size = "small", srcs = ["hostcpu_test.go"], - embed = [":hostcpu"], + library = ":hostcpu", ) diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD index 67831d5a1..a145a5ca3 100644 --- a/pkg/sentry/hostmm/BUILD +++ b/pkg/sentry/hostmm/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "cgroup.go", "hostmm.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/hostmm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/fd", diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD index 8d60ad4ad..aa621b724 100644 --- a/pkg/sentry/inet/BUILD +++ b/pkg/sentry/inet/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package( default_visibility = ["//:sandbox"], @@ -12,7 +12,6 @@ go_library( "inet.go", "test_stack.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/inet", deps = [ "//pkg/sentry/context", "//pkg/tcpip/stack", diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index ac85ba0c8..cebaccd92 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -1,8 +1,5 @@ -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "go_test", "proto_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -78,26 +75,12 @@ go_template_instance( ) proto_library( - name = "uncaught_signal_proto", + name = "uncaught_signal", srcs = ["uncaught_signal.proto"], visibility = ["//visibility:public"], deps = ["//pkg/sentry/arch:registers_proto"], ) -cc_proto_library( - name = "uncaught_signal_cc_proto", - visibility = ["//visibility:public"], - deps = [":uncaught_signal_proto"], -) - -go_proto_library( - name = "uncaught_signal_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto", - proto = ":uncaught_signal_proto", - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_go_proto"], -) - go_library( name = "kernel", srcs = [ @@ -156,7 +139,6 @@ go_library( "vdso.go", "version.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel", imports = [ "gvisor.dev/gvisor/pkg/bpf", "gvisor.dev/gvisor/pkg/sentry/device", @@ -227,7 +209,7 @@ go_test( "task_test.go", "timekeeper_test.go", ], - embed = [":kernel"], + library = ":kernel", deps = [ "//pkg/abi", "//pkg/sentry/arch", diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index 1aa72fa47..64537c9be 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -57,7 +57,6 @@ go_library( "id_map_set.go", "user_namespace.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/auth", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/contexttest/BUILD b/pkg/sentry/kernel/contexttest/BUILD index 3a88a585c..daff608d7 100644 --- a/pkg/sentry/kernel/contexttest/BUILD +++ b/pkg/sentry/kernel/contexttest/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "contexttest", testonly = 1, srcs = ["contexttest.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD index c47f6b6fc..19e16ab3a 100644 --- a/pkg/sentry/kernel/epoll/BUILD +++ b/pkg/sentry/kernel/epoll/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -23,7 +22,6 @@ go_library( "epoll_list.go", "epoll_state.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/epoll", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/refs", @@ -43,7 +41,7 @@ go_test( srcs = [ "epoll_test.go", ], - embed = [":epoll"], + library = ":epoll", deps = [ "//pkg/sentry/context/contexttest", "//pkg/sentry/fs/filetest", diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD index c831fbab2..ee2d74864 100644 --- a/pkg/sentry/kernel/eventfd/BUILD +++ b/pkg/sentry/kernel/eventfd/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "eventfd", srcs = ["eventfd.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -26,7 +24,7 @@ go_test( name = "eventfd_test", size = "small", srcs = ["eventfd_test.go"], - embed = [":eventfd"], + library = ":eventfd", deps = [ "//pkg/sentry/context/contexttest", "//pkg/sentry/usermem", diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD index 6b36bc63e..b9126e946 100644 --- a/pkg/sentry/kernel/fasync/BUILD +++ b/pkg/sentry/kernel/fasync/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "fasync", srcs = ["fasync.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/fasync", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index 50db443ce..f413d8ae2 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -34,7 +33,6 @@ go_library( "futex.go", "waiter_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/futex", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -51,7 +49,7 @@ go_test( name = "futex_test", size = "small", srcs = ["futex_test.go"], - embed = [":futex"], + library = ":futex", deps = [ "//pkg/sentry/usermem", "//pkg/sync", diff --git a/pkg/sentry/kernel/memevent/BUILD b/pkg/sentry/kernel/memevent/BUILD index 7f36252a9..4486848d2 100644 --- a/pkg/sentry/kernel/memevent/BUILD +++ b/pkg/sentry/kernel/memevent/BUILD @@ -1,13 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "proto_library") package(licenses = ["notice"]) go_library( name = "memevent", srcs = ["memory_events.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent", visibility = ["//:sandbox"], deps = [ ":memory_events_go_proto", @@ -21,20 +18,7 @@ go_library( ) proto_library( - name = "memory_events_proto", + name = "memory_events", srcs = ["memory_events.proto"], visibility = ["//visibility:public"], ) - -cc_proto_library( - name = "memory_events_cc_proto", - visibility = ["//visibility:public"], - deps = [":memory_events_proto"], -) - -go_proto_library( - name = "memory_events_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/memevent/memory_events_go_proto", - proto = ":memory_events_proto", - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 5eeaeff66..2c7b6206f 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -30,7 +29,6 @@ go_library( "vfs.go", "writer.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/pipe", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -57,7 +55,7 @@ go_test( "node_test.go", "pipe_test.go", ], - embed = [":pipe"], + library = ":pipe", deps = [ "//pkg/sentry/context", "//pkg/sentry/context/contexttest", diff --git a/pkg/sentry/kernel/sched/BUILD b/pkg/sentry/kernel/sched/BUILD index 98ea7a0d8..1b82e087b 100644 --- a/pkg/sentry/kernel/sched/BUILD +++ b/pkg/sentry/kernel/sched/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "cpuset.go", "sched.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/sched", visibility = ["//pkg/sentry:internal"], ) @@ -17,5 +15,5 @@ go_test( name = "sched_test", size = "small", srcs = ["cpuset_test.go"], - embed = [":sched"], + library = ":sched", ) diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index 13a961594..76e19b551 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -22,7 +21,6 @@ go_library( "semaphore.go", "waiter_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/semaphore", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -40,7 +38,7 @@ go_test( name = "semaphore_test", size = "small", srcs = ["semaphore_test.go"], - embed = [":semaphore"], + library = ":semaphore", deps = [ "//pkg/abi/linux", "//pkg/sentry/context", diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index 7321b22ed..5547c5abf 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "device.go", "shm.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/shm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD index 89e4d84b1..5d44773d4 100644 --- a/pkg/sentry/kernel/signalfd/BUILD +++ b/pkg/sentry/kernel/signalfd/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "signalfd", srcs = ["signalfd.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/signalfd", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD index 4e4de0512..d49594d9f 100644 --- a/pkg/sentry/kernel/time/BUILD +++ b/pkg/sentry/kernel/time/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "context.go", "time.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/kernel/time", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/limits/BUILD b/pkg/sentry/limits/BUILD index 9fa841e8b..67869757f 100644 --- a/pkg/sentry/limits/BUILD +++ b/pkg/sentry/limits/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +9,6 @@ go_library( "limits.go", "linux.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/limits", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", @@ -25,5 +23,5 @@ go_test( srcs = [ "limits_test.go", ], - embed = [":limits"], + library = ":limits", ) diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD index 2890393bd..d4ad2bd6c 100644 --- a/pkg/sentry/loader/BUILD +++ b/pkg/sentry/loader/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_embed_data") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_embed_data", "go_library") package(licenses = ["notice"]) @@ -20,7 +19,6 @@ go_library( "vdso_state.go", ":vdso_bin", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/loader", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi", diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD index 112794e9c..f9a65f086 100644 --- a/pkg/sentry/memmap/BUILD +++ b/pkg/sentry/memmap/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -37,7 +36,6 @@ go_library( "mapping_set_impl.go", "memmap.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/memmap", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", @@ -52,6 +50,6 @@ go_test( name = "memmap_test", size = "small", srcs = ["mapping_set_test.go"], - embed = [":memmap"], + library = ":memmap", deps = ["//pkg/sentry/usermem"], ) diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index 83e248431..bd6399fa2 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -96,7 +95,6 @@ go_library( "vma.go", "vma_set.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/mm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -128,7 +126,7 @@ go_test( name = "mm_test", size = "small", srcs = ["mm_test.go"], - embed = [":mm"], + library = ":mm", deps = [ "//pkg/sentry/arch", "//pkg/sentry/context", diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index a9a2642c5..02385a3ce 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -60,7 +59,6 @@ go_library( "save_restore.go", "usage_set.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/pgalloc", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/log", @@ -82,6 +80,6 @@ go_test( name = "pgalloc_test", size = "small", srcs = ["pgalloc_test.go"], - embed = [":pgalloc"], + library = ":pgalloc", deps = ["//pkg/sentry/usermem"], ) diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD index 157bffa81..006450b2d 100644 --- a/pkg/sentry/platform/BUILD +++ b/pkg/sentry/platform/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -22,7 +22,6 @@ go_library( "mmap_min_addr.go", "platform.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/platform/interrupt/BUILD b/pkg/sentry/platform/interrupt/BUILD index 85e882df9..83b385f14 100644 --- a/pkg/sentry/platform/interrupt/BUILD +++ b/pkg/sentry/platform/interrupt/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -8,7 +7,6 @@ go_library( srcs = [ "interrupt.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/interrupt", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/sync"], ) @@ -17,5 +15,5 @@ go_test( name = "interrupt_test", size = "small", srcs = ["interrupt_test.go"], - embed = [":interrupt"], + library = ":interrupt", ) diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 6a358d1d4..a4532a766 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -38,7 +37,6 @@ go_library( "physical_map_arm64.go", "virtual_map.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -65,7 +63,7 @@ go_test( "kvm_test.go", "virtual_map_test.go", ], - embed = [":kvm"], + library = ":kvm", tags = [ "manual", "nogotsan", diff --git a/pkg/sentry/platform/kvm/testutil/BUILD b/pkg/sentry/platform/kvm/testutil/BUILD index b0e45f159..f7605df8a 100644 --- a/pkg/sentry/platform/kvm/testutil/BUILD +++ b/pkg/sentry/platform/kvm/testutil/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -12,6 +12,5 @@ go_library( "testutil_arm64.go", "testutil_arm64.s", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil", visibility = ["//pkg/sentry/platform/kvm:__pkg__"], ) diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD index cd13390c3..3bcc5e040 100644 --- a/pkg/sentry/platform/ptrace/BUILD +++ b/pkg/sentry/platform/ptrace/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -20,7 +20,6 @@ go_library( "subprocess_linux_unsafe.go", "subprocess_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ptrace", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD index 87f4552b5..6dee8fcc5 100644 --- a/pkg/sentry/platform/ring0/BUILD +++ b/pkg/sentry/platform/ring0/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) @@ -74,7 +74,6 @@ go_library( "lib_arm64.s", "ring0.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/cpuid", diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/sentry/platform/ring0/gen_offsets/BUILD index 42076fb04..147311ed3 100644 --- a/pkg/sentry/platform/ring0/gen_offsets/BUILD +++ b/pkg/sentry/platform/ring0/gen_offsets/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 387a7f6c3..8b5cdd6c1 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -1,17 +1,14 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test", "select_arch") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) -config_setting( - name = "aarch64", - constraint_values = ["@bazel_tools//platforms:aarch64"], -) - go_template( name = "generic_walker", - srcs = ["walker_amd64.go"], + srcs = select_arch( + amd64 = ["walker_amd64.go"], + arm64 = ["walker_amd64.go"], + ), opt_types = [ "Visitor", ], @@ -91,7 +88,6 @@ go_library( "walker_map.go", "walker_unmap.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables", visibility = [ "//pkg/sentry/platform/kvm:__subpackages__", "//pkg/sentry/platform/ring0:__subpackages__", @@ -111,6 +107,6 @@ go_test( "pagetables_test.go", "walker_check.go", ], - embed = [":pagetables"], + library = ":pagetables", deps = ["//pkg/sentry/usermem"], ) diff --git a/pkg/sentry/platform/safecopy/BUILD b/pkg/sentry/platform/safecopy/BUILD index 6769cd0a5..b8747585b 100644 --- a/pkg/sentry/platform/safecopy/BUILD +++ b/pkg/sentry/platform/safecopy/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -17,7 +16,6 @@ go_library( "sighandler_amd64.s", "sighandler_arm64.s", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/platform/safecopy", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/syserror"], ) @@ -27,5 +25,5 @@ go_test( srcs = [ "safecopy_test.go", ], - embed = [":safecopy"], + library = ":safecopy", ) diff --git a/pkg/sentry/safemem/BUILD b/pkg/sentry/safemem/BUILD index 884020f7b..3ab76da97 100644 --- a/pkg/sentry/safemem/BUILD +++ b/pkg/sentry/safemem/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "safemem.go", "seq_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/safemem", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/platform/safecopy", @@ -25,5 +23,5 @@ go_test( "io_test.go", "seq_test.go", ], - embed = [":safemem"], + library = ":safemem", ) diff --git a/pkg/sentry/sighandling/BUILD b/pkg/sentry/sighandling/BUILD index f561670c7..6c38a3f44 100644 --- a/pkg/sentry/sighandling/BUILD +++ b/pkg/sentry/sighandling/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "sighandling.go", "sighandling_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/sighandling", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/abi/linux"], ) diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD index 26176b10d..8e2b97afb 100644 --- a/pkg/sentry/socket/BUILD +++ b/pkg/sentry/socket/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "socket", srcs = ["socket.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD index 357517ed4..3850f6345 100644 --- a/pkg/sentry/socket/control/BUILD +++ b/pkg/sentry/socket/control/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "control", srcs = ["control.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/control", imports = [ "gvisor.dev/gvisor/pkg/sentry/fs", ], diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index 4c44c7c0f..42bf7be6a 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -12,7 +12,6 @@ go_library( "socket_unsafe.go", "stack.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/hostinet", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index b70047d81..ed34a8308 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -7,7 +7,6 @@ go_library( srcs = [ "netfilter.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netfilter", # This target depends on netstack and should only be used by epsocket, # which is allowed to depend on netstack. visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 103933144..baaac13c6 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "provider.go", "socket.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/netlink/port/BUILD b/pkg/sentry/socket/netlink/port/BUILD index 2d9f4ba9b..3a22923d8 100644 --- a/pkg/sentry/socket/netlink/port/BUILD +++ b/pkg/sentry/socket/netlink/port/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "port", srcs = ["port.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port", visibility = ["//pkg/sentry:internal"], deps = ["//pkg/sync"], ) @@ -14,5 +12,5 @@ go_library( go_test( name = "port_test", srcs = ["port_test.go"], - embed = [":port"], + library = ":port", ) diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD index 1d4912753..2137c7aeb 100644 --- a/pkg/sentry/socket/netlink/route/BUILD +++ b/pkg/sentry/socket/netlink/route/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "route", srcs = ["protocol.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/netlink/uevent/BUILD b/pkg/sentry/socket/netlink/uevent/BUILD index 0777f3baf..73fbdf1eb 100644 --- a/pkg/sentry/socket/netlink/uevent/BUILD +++ b/pkg/sentry/socket/netlink/uevent/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "uevent", srcs = ["protocol.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netlink/uevent", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index f78784569..e3d1f90cb 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -11,7 +11,6 @@ go_library( "save_restore.go", "stack.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/netstack", visibility = [ "//pkg/sentry:internal", ], diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index 5b6a154f6..bade18686 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "io.go", "unix.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD index d7ba95dff..4bdfc9208 100644 --- a/pkg/sentry/socket/unix/transport/BUILD +++ b/pkg/sentry/socket/unix/transport/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -25,7 +25,6 @@ go_library( "transport_message_list.go", "unix.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/state/BUILD b/pkg/sentry/state/BUILD index 88765f4d6..0ea4aab8b 100644 --- a/pkg/sentry/state/BUILD +++ b/pkg/sentry/state/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "state_metadata.go", "state_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/state", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD index aa1ac720c..ff6fafa63 100644 --- a/pkg/sentry/strace/BUILD +++ b/pkg/sentry/strace/BUILD @@ -1,6 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "proto_library") package(licenses = ["notice"]) @@ -21,7 +19,6 @@ go_library( "strace.go", "syscalls.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/strace", visibility = ["//:sandbox"], deps = [ ":strace_go_proto", @@ -42,20 +39,7 @@ go_library( ) proto_library( - name = "strace_proto", + name = "strace", srcs = ["strace.proto"], visibility = ["//visibility:public"], ) - -cc_proto_library( - name = "strace_cc_proto", - visibility = ["//visibility:public"], - deps = [":strace_proto"], -) - -go_proto_library( - name = "strace_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto", - proto = ":strace_proto", - visibility = ["//visibility:public"], -) diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD index 79d972202..b8d1bd415 100644 --- a/pkg/sentry/syscalls/BUILD +++ b/pkg/sentry/syscalls/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "epoll.go", "syscalls.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 917f74e07..7d74e0f70 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -57,7 +57,6 @@ go_library( "sys_xattr.go", "timespec.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/syscalls/linux", visibility = ["//:sandbox"], deps = [ "//pkg/abi", diff --git a/pkg/sentry/time/BUILD b/pkg/sentry/time/BUILD index 3cde3a0be..04f81a35b 100644 --- a/pkg/sentry/time/BUILD +++ b/pkg/sentry/time/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -31,7 +30,6 @@ go_library( "tsc_amd64.s", "tsc_arm64.s", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/time", visibility = ["//:sandbox"], deps = [ "//pkg/log", @@ -48,5 +46,5 @@ go_test( "parameters_test.go", "sampler_test.go", ], - embed = [":time"], + library = ":time", ) diff --git a/pkg/sentry/unimpl/BUILD b/pkg/sentry/unimpl/BUILD index fc7614fff..370fa6ec5 100644 --- a/pkg/sentry/unimpl/BUILD +++ b/pkg/sentry/unimpl/BUILD @@ -1,34 +1,17 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@rules_cc//cc:defs.bzl", "cc_proto_library") +load("//tools:defs.bzl", "go_library", "proto_library") package(licenses = ["notice"]) proto_library( - name = "unimplemented_syscall_proto", + name = "unimplemented_syscall", srcs = ["unimplemented_syscall.proto"], visibility = ["//visibility:public"], deps = ["//pkg/sentry/arch:registers_proto"], ) -cc_proto_library( - name = "unimplemented_syscall_cc_proto", - visibility = ["//visibility:public"], - deps = [":unimplemented_syscall_proto"], -) - -go_proto_library( - name = "unimplemented_syscall_go_proto", - importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto", - proto = ":unimplemented_syscall_proto", - visibility = ["//visibility:public"], - deps = ["//pkg/sentry/arch:registers_go_proto"], -) - go_library( name = "unimpl", srcs = ["events.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/unimpl", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/pkg/sentry/uniqueid/BUILD b/pkg/sentry/uniqueid/BUILD index 86a87edd4..e9c18f170 100644 --- a/pkg/sentry/uniqueid/BUILD +++ b/pkg/sentry/uniqueid/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "uniqueid", srcs = ["context.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/uniqueid", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/sentry/context", diff --git a/pkg/sentry/usage/BUILD b/pkg/sentry/usage/BUILD index 5518ac3d0..099315613 100644 --- a/pkg/sentry/usage/BUILD +++ b/pkg/sentry/usage/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -11,9 +11,8 @@ go_library( "memory_unsafe.go", "usage.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/usage", visibility = [ - "//pkg/sentry:internal", + "//:sandbox", ], deps = [ "//pkg/bits", diff --git a/pkg/sentry/usermem/BUILD b/pkg/sentry/usermem/BUILD index 684f59a6b..c8322e29e 100644 --- a/pkg/sentry/usermem/BUILD +++ b/pkg/sentry/usermem/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -29,7 +28,6 @@ go_library( "usermem_unsafe.go", "usermem_x86.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/usermem", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/atomicbitops", @@ -38,7 +36,6 @@ go_library( "//pkg/sentry/context", "//pkg/sentry/safemem", "//pkg/syserror", - "//pkg/tcpip/buffer", ], ) @@ -49,7 +46,7 @@ go_test( "addr_range_seq_test.go", "usermem_test.go", ], - embed = [":usermem"], + library = ":usermem", deps = [ "//pkg/sentry/context", "//pkg/sentry/safemem", diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 35c7be259..51acdc4e9 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -1,7 +1,6 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "vfs", @@ -24,7 +23,6 @@ go_library( "testutil.go", "vfs.go", ], - importpath = "gvisor.dev/gvisor/pkg/sentry/vfs", visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", @@ -47,7 +45,7 @@ go_test( "file_description_impl_util_test.go", "mount_test.go", ], - embed = [":vfs"], + library = ":vfs", deps = [ "//pkg/abi/linux", "//pkg/sentry/context", diff --git a/pkg/sentry/watchdog/BUILD b/pkg/sentry/watchdog/BUILD index 28f21f13d..1c5a1c9b6 100644 --- a/pkg/sentry/watchdog/BUILD +++ b/pkg/sentry/watchdog/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "watchdog", srcs = ["watchdog.go"], - importpath = "gvisor.dev/gvisor/pkg/sentry/watchdog", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", diff --git a/pkg/sleep/BUILD b/pkg/sleep/BUILD index a23c86fb1..e131455f7 100644 --- a/pkg/sleep/BUILD +++ b/pkg/sleep/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "commit_noasm.go", "sleep_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sleep", visibility = ["//:sandbox"], ) @@ -22,5 +20,5 @@ go_test( srcs = [ "sleep_test.go", ], - embed = [":sleep"], + library = ":sleep", ) diff --git a/pkg/state/BUILD b/pkg/state/BUILD index be93750bf..921af9d63 100644 --- a/pkg/state/BUILD +++ b/pkg/state/BUILD @@ -1,6 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test", "proto_library") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -49,7 +47,7 @@ go_library( "state.go", "stats.go", ], - importpath = "gvisor.dev/gvisor/pkg/state", + stateify = False, visibility = ["//:sandbox"], deps = [ ":object_go_proto", @@ -58,21 +56,14 @@ go_library( ) proto_library( - name = "object_proto", + name = "object", srcs = ["object.proto"], visibility = ["//:sandbox"], ) -go_proto_library( - name = "object_go_proto", - importpath = "gvisor.dev/gvisor/pkg/state/object_go_proto", - proto = ":object_proto", - visibility = ["//:sandbox"], -) - go_test( name = "state_test", timeout = "long", srcs = ["state_test.go"], - embed = [":state"], + library = ":state", ) diff --git a/pkg/state/statefile/BUILD b/pkg/state/statefile/BUILD index 8a865d229..e7581c09b 100644 --- a/pkg/state/statefile/BUILD +++ b/pkg/state/statefile/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "statefile", srcs = ["statefile.go"], - importpath = "gvisor.dev/gvisor/pkg/state/statefile", visibility = ["//:sandbox"], deps = [ "//pkg/binary", @@ -18,6 +16,6 @@ go_test( name = "statefile_test", size = "small", srcs = ["statefile_test.go"], - embed = [":statefile"], + library = ":statefile", deps = ["//pkg/compressio"], ) diff --git a/pkg/sync/BUILD b/pkg/sync/BUILD index 97c4b3b1e..5340cf0d6 100644 --- a/pkg/sync/BUILD +++ b/pkg/sync/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template") package( @@ -40,7 +39,6 @@ go_library( "syncutil.go", "tmutex_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/sync", ) go_test( @@ -51,5 +49,5 @@ go_test( "seqcount_test.go", "tmutex_test.go", ], - embed = [":sync"], + library = ":sync", ) diff --git a/pkg/sync/atomicptrtest/BUILD b/pkg/sync/atomicptrtest/BUILD index 418eda29c..e97553254 100644 --- a/pkg/sync/atomicptrtest/BUILD +++ b/pkg/sync/atomicptrtest/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -18,12 +17,11 @@ go_template_instance( go_library( name = "atomicptr", srcs = ["atomicptr_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/sync/atomicptr", ) go_test( name = "atomicptr_test", size = "small", srcs = ["atomicptr_test.go"], - embed = [":atomicptr"], + library = ":atomicptr", ) diff --git a/pkg/sync/seqatomictest/BUILD b/pkg/sync/seqatomictest/BUILD index eba21518d..5c38c783e 100644 --- a/pkg/sync/seqatomictest/BUILD +++ b/pkg/sync/seqatomictest/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) @@ -18,7 +17,6 @@ go_template_instance( go_library( name = "seqatomic", srcs = ["seqatomic_int_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/sync/seqatomic", deps = [ "//pkg/sync", ], @@ -28,6 +26,6 @@ go_test( name = "seqatomic_test", size = "small", srcs = ["seqatomic_test.go"], - embed = [":seqatomic"], + library = ":seqatomic", deps = ["//pkg/sync"], ) diff --git a/pkg/syserr/BUILD b/pkg/syserr/BUILD index 5665ad4ee..7d760344a 100644 --- a/pkg/syserr/BUILD +++ b/pkg/syserr/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "netstack.go", "syserr.go", ], - importpath = "gvisor.dev/gvisor/pkg/syserr", visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", diff --git a/pkg/syserror/BUILD b/pkg/syserror/BUILD index bd3f9fd28..b13c15d9b 100644 --- a/pkg/syserror/BUILD +++ b/pkg/syserror/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "syserror", srcs = ["syserror.go"], - importpath = "gvisor.dev/gvisor/pkg/syserror", visibility = ["//visibility:public"], ) diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD index 23e4b09e7..26f7ba86b 100644 --- a/pkg/tcpip/BUILD +++ b/pkg/tcpip/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +11,6 @@ go_library( "time_unsafe.go", "timer.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip", visibility = ["//visibility:public"], deps = [ "//pkg/sync", @@ -25,7 +23,7 @@ go_test( name = "tcpip_test", size = "small", srcs = ["tcpip_test.go"], - embed = [":tcpip"], + library = ":tcpip", ) go_test( diff --git a/pkg/tcpip/adapters/gonet/BUILD b/pkg/tcpip/adapters/gonet/BUILD index 3df7d18d3..a984f1712 100644 --- a/pkg/tcpip/adapters/gonet/BUILD +++ b/pkg/tcpip/adapters/gonet/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "gonet", srcs = ["gonet.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet", visibility = ["//visibility:public"], deps = [ "//pkg/sync", @@ -23,7 +21,7 @@ go_test( name = "gonet_test", size = "small", srcs = ["gonet_test.go"], - embed = [":gonet"], + library = ":gonet", deps = [ "//pkg/tcpip", "//pkg/tcpip/header", diff --git a/pkg/tcpip/buffer/BUILD b/pkg/tcpip/buffer/BUILD index d6c31bfa2..563bc78ea 100644 --- a/pkg/tcpip/buffer/BUILD +++ b/pkg/tcpip/buffer/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "prependable.go", "view.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/buffer", visibility = ["//visibility:public"], ) @@ -17,5 +15,5 @@ go_test( name = "buffer_test", size = "small", srcs = ["view_test.go"], - embed = [":buffer"], + library = ":buffer", ) diff --git a/pkg/tcpip/checker/BUILD b/pkg/tcpip/checker/BUILD index b6fa6fc37..ed434807f 100644 --- a/pkg/tcpip/checker/BUILD +++ b/pkg/tcpip/checker/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "checker", testonly = 1, srcs = ["checker.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/checker", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/hash/jenkins/BUILD b/pkg/tcpip/hash/jenkins/BUILD index e648efa71..ff2719291 100644 --- a/pkg/tcpip/hash/jenkins/BUILD +++ b/pkg/tcpip/hash/jenkins/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "jenkins", srcs = ["jenkins.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins", visibility = ["//visibility:public"], ) @@ -16,5 +14,5 @@ go_test( srcs = [ "jenkins_test.go", ], - embed = [":jenkins"], + library = ":jenkins", ) diff --git a/pkg/tcpip/header/BUILD b/pkg/tcpip/header/BUILD index cd747d100..9da0d71f8 100644 --- a/pkg/tcpip/header/BUILD +++ b/pkg/tcpip/header/BUILD @@ -1,5 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -24,7 +23,6 @@ go_library( "tcp.go", "udp.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/header", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", @@ -59,7 +57,7 @@ go_test( "eth_test.go", "ndp_test.go", ], - embed = [":header"], + library = ":header", deps = [ "//pkg/tcpip", "@com_github_google_go-cmp//cmp:go_default_library", diff --git a/pkg/tcpip/iptables/BUILD b/pkg/tcpip/iptables/BUILD index 297eaccaf..d1b73cfdf 100644 --- a/pkg/tcpip/iptables/BUILD +++ b/pkg/tcpip/iptables/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "targets.go", "types.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/iptables", visibility = ["//visibility:public"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD index 7dbc05754..3974c464e 100644 --- a/pkg/tcpip/link/channel/BUILD +++ b/pkg/tcpip/link/channel/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "channel", srcs = ["channel.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/channel", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD index 66cc53ed4..abe725548 100644 --- a/pkg/tcpip/link/fdbased/BUILD +++ b/pkg/tcpip/link/fdbased/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -13,7 +12,6 @@ go_library( "mmap_unsafe.go", "packet_dispatchers.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/fdbased", visibility = ["//visibility:public"], deps = [ "//pkg/sync", @@ -30,7 +28,7 @@ go_test( name = "fdbased_test", size = "small", srcs = ["endpoint_test.go"], - embed = [":fdbased"], + library = ":fdbased", deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/link/loopback/BUILD b/pkg/tcpip/link/loopback/BUILD index f35fcdff4..6bf3805b7 100644 --- a/pkg/tcpip/link/loopback/BUILD +++ b/pkg/tcpip/link/loopback/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "loopback", srcs = ["loopback.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/loopback", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/muxed/BUILD b/pkg/tcpip/link/muxed/BUILD index 1ac7948b6..82b441b79 100644 --- a/pkg/tcpip/link/muxed/BUILD +++ b/pkg/tcpip/link/muxed/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "muxed", srcs = ["injectable.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/muxed", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", @@ -19,7 +17,7 @@ go_test( name = "muxed_test", size = "small", srcs = ["injectable_test.go"], - embed = [":muxed"], + library = ":muxed", deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD index d8211e93d..14b527bc2 100644 --- a/pkg/tcpip/link/rawfile/BUILD +++ b/pkg/tcpip/link/rawfile/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -12,7 +12,6 @@ go_library( "errors.go", "rawfile_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/rawfile", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/link/sharedmem/BUILD b/pkg/tcpip/link/sharedmem/BUILD index 09165dd4c..13243ebbb 100644 --- a/pkg/tcpip/link/sharedmem/BUILD +++ b/pkg/tcpip/link/sharedmem/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "sharedmem_unsafe.go", "tx.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem", visibility = ["//visibility:public"], deps = [ "//pkg/log", @@ -30,7 +28,7 @@ go_test( srcs = [ "sharedmem_test.go", ], - embed = [":sharedmem"], + library = ":sharedmem", deps = [ "//pkg/sync", "//pkg/tcpip", diff --git a/pkg/tcpip/link/sharedmem/pipe/BUILD b/pkg/tcpip/link/sharedmem/pipe/BUILD index a0d4ad0be..87020ec08 100644 --- a/pkg/tcpip/link/sharedmem/pipe/BUILD +++ b/pkg/tcpip/link/sharedmem/pipe/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -11,7 +10,6 @@ go_library( "rx.go", "tx.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe", visibility = ["//visibility:public"], ) @@ -20,6 +18,6 @@ go_test( srcs = [ "pipe_test.go", ], - embed = [":pipe"], + library = ":pipe", deps = ["//pkg/sync"], ) diff --git a/pkg/tcpip/link/sharedmem/queue/BUILD b/pkg/tcpip/link/sharedmem/queue/BUILD index 8c9234d54..3ba06af73 100644 --- a/pkg/tcpip/link/sharedmem/queue/BUILD +++ b/pkg/tcpip/link/sharedmem/queue/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "rx.go", "tx.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue", visibility = ["//visibility:public"], deps = [ "//pkg/log", @@ -22,7 +20,7 @@ go_test( srcs = [ "queue_test.go", ], - embed = [":queue"], + library = ":queue", deps = [ "//pkg/tcpip/link/sharedmem/pipe", ], diff --git a/pkg/tcpip/link/sniffer/BUILD b/pkg/tcpip/link/sniffer/BUILD index d6ae0368a..230a8d53a 100644 --- a/pkg/tcpip/link/sniffer/BUILD +++ b/pkg/tcpip/link/sniffer/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,7 +8,6 @@ go_library( "pcap.go", "sniffer.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sniffer", visibility = ["//visibility:public"], deps = [ "//pkg/log", diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index a71a493fc..e5096ea38 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -1,10 +1,9 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "tun", srcs = ["tun_unsafe.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/tun", visibility = ["//visibility:public"], ) diff --git a/pkg/tcpip/link/waitable/BUILD b/pkg/tcpip/link/waitable/BUILD index 134837943..0956d2c65 100644 --- a/pkg/tcpip/link/waitable/BUILD +++ b/pkg/tcpip/link/waitable/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -8,7 +7,6 @@ go_library( srcs = [ "waitable.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/link/waitable", visibility = ["//visibility:public"], deps = [ "//pkg/gate", @@ -23,7 +21,7 @@ go_test( srcs = [ "waitable_test.go", ], - embed = [":waitable"], + library = ":waitable", deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/network/BUILD b/pkg/tcpip/network/BUILD index 9d16ff8c9..6a4839fb8 100644 --- a/pkg/tcpip/network/BUILD +++ b/pkg/tcpip/network/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_test") package(licenses = ["notice"]) diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD index e7617229b..eddf7b725 100644 --- a/pkg/tcpip/network/arp/BUILD +++ b/pkg/tcpip/network/arp/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "arp", srcs = ["arp.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/arp", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD index ed16076fd..d1c728ccf 100644 --- a/pkg/tcpip/network/fragmentation/BUILD +++ b/pkg/tcpip/network/fragmentation/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -24,7 +23,6 @@ go_library( "reassembler.go", "reassembler_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/fragmentation", visibility = ["//visibility:public"], deps = [ "//pkg/log", @@ -42,6 +40,6 @@ go_test( "fragmentation_test.go", "reassembler_test.go", ], - embed = [":fragmentation"], + library = ":fragmentation", deps = ["//pkg/tcpip/buffer"], ) diff --git a/pkg/tcpip/network/hash/BUILD b/pkg/tcpip/network/hash/BUILD index e6db5c0b0..872165866 100644 --- a/pkg/tcpip/network/hash/BUILD +++ b/pkg/tcpip/network/hash/BUILD @@ -1,11 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "hash", srcs = ["hash.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/hash", visibility = ["//visibility:public"], deps = [ "//pkg/rand", diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD index 4e2aae9a3..0fef2b1f1 100644 --- a/pkg/tcpip/network/ipv4/BUILD +++ b/pkg/tcpip/network/ipv4/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "icmp.go", "ipv4.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv4", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", diff --git a/pkg/tcpip/network/ipv6/BUILD b/pkg/tcpip/network/ipv6/BUILD index e4e273460..fb11874c6 100644 --- a/pkg/tcpip/network/ipv6/BUILD +++ b/pkg/tcpip/network/ipv6/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "icmp.go", "ipv6.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/network/ipv6", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip", @@ -27,7 +25,7 @@ go_test( "ipv6_test.go", "ndp_test.go", ], - embed = [":ipv6"], + library = ":ipv6", deps = [ "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/ports/BUILD b/pkg/tcpip/ports/BUILD index a6ef3bdcc..2bad05a2e 100644 --- a/pkg/tcpip/ports/BUILD +++ b/pkg/tcpip/ports/BUILD @@ -1,12 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "ports", srcs = ["ports.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/ports", visibility = ["//visibility:public"], deps = [ "//pkg/sync", @@ -17,7 +15,7 @@ go_library( go_test( name = "ports_test", srcs = ["ports_test.go"], - embed = [":ports"], + library = ":ports", deps = [ "//pkg/tcpip", ], diff --git a/pkg/tcpip/sample/tun_tcp_connect/BUILD b/pkg/tcpip/sample/tun_tcp_connect/BUILD index d7496fde6..cf0a5fefe 100644 --- a/pkg/tcpip/sample/tun_tcp_connect/BUILD +++ b/pkg/tcpip/sample/tun_tcp_connect/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/pkg/tcpip/sample/tun_tcp_echo/BUILD b/pkg/tcpip/sample/tun_tcp_echo/BUILD index 875561566..43264b76d 100644 --- a/pkg/tcpip/sample/tun_tcp_echo/BUILD +++ b/pkg/tcpip/sample/tun_tcp_echo/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/pkg/tcpip/seqnum/BUILD b/pkg/tcpip/seqnum/BUILD index b31ddba2f..45f503845 100644 --- a/pkg/tcpip/seqnum/BUILD +++ b/pkg/tcpip/seqnum/BUILD @@ -1,10 +1,9 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "seqnum", srcs = ["seqnum.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/seqnum", visibility = ["//visibility:public"], ) diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index 783351a69..f5b750046 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -30,7 +29,6 @@ go_library( "stack_global_state.go", "transport_demuxer.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/stack", visibility = ["//visibility:public"], deps = [ "//pkg/ilist", @@ -81,7 +79,7 @@ go_test( name = "stack_test", size = "small", srcs = ["linkaddrcache_test.go"], - embed = [":stack"], + library = ":stack", deps = [ "//pkg/sleep", "//pkg/sync", diff --git a/pkg/tcpip/transport/icmp/BUILD b/pkg/tcpip/transport/icmp/BUILD index 3aa23d529..ac18ec5b1 100644 --- a/pkg/tcpip/transport/icmp/BUILD +++ b/pkg/tcpip/transport/icmp/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -23,7 +23,6 @@ go_library( "icmp_packet_list.go", "protocol.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/icmp", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tcpip/transport/packet/BUILD b/pkg/tcpip/transport/packet/BUILD index 4858d150c..d22de6b26 100644 --- a/pkg/tcpip/transport/packet/BUILD +++ b/pkg/tcpip/transport/packet/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -22,7 +22,6 @@ go_library( "endpoint_state.go", "packet_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/packet", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tcpip/transport/raw/BUILD b/pkg/tcpip/transport/raw/BUILD index 2f2131ff7..c9baf4600 100644 --- a/pkg/tcpip/transport/raw/BUILD +++ b/pkg/tcpip/transport/raw/BUILD @@ -1,5 +1,5 @@ +load("//tools:defs.bzl", "go_library") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -23,7 +23,6 @@ go_library( "protocol.go", "raw_packet_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/raw", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD index 0e3ab05ad..4acd9fb9a 100644 --- a/pkg/tcpip/transport/tcp/BUILD +++ b/pkg/tcpip/transport/tcp/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -55,7 +54,6 @@ go_library( "tcp_segment_list.go", "timer.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tcpip/transport/tcp/testing/context/BUILD b/pkg/tcpip/transport/tcp/testing/context/BUILD index b33ec2087..ce6a2c31d 100644 --- a/pkg/tcpip/transport/tcp/testing/context/BUILD +++ b/pkg/tcpip/transport/tcp/testing/context/BUILD @@ -1,4 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "context", testonly = 1, srcs = ["context.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context", visibility = [ "//visibility:public", ], diff --git a/pkg/tcpip/transport/tcpconntrack/BUILD b/pkg/tcpip/transport/tcpconntrack/BUILD index 43fcc27f0..3ad6994a7 100644 --- a/pkg/tcpip/transport/tcpconntrack/BUILD +++ b/pkg/tcpip/transport/tcpconntrack/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "tcpconntrack", srcs = ["tcp_conntrack.go"], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/tcpconntrack", visibility = ["//visibility:public"], deps = [ "//pkg/tcpip/header", diff --git a/pkg/tcpip/transport/udp/BUILD b/pkg/tcpip/transport/udp/BUILD index 57ff123e3..adc908e24 100644 --- a/pkg/tcpip/transport/udp/BUILD +++ b/pkg/tcpip/transport/udp/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -25,7 +24,6 @@ go_library( "protocol.go", "udp_packet_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/udp", imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"], visibility = ["//visibility:public"], deps = [ diff --git a/pkg/tmutex/BUILD b/pkg/tmutex/BUILD index 07778e4f7..2dcba84ae 100644 --- a/pkg/tmutex/BUILD +++ b/pkg/tmutex/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "tmutex", srcs = ["tmutex.go"], - importpath = "gvisor.dev/gvisor/pkg/tmutex", visibility = ["//:sandbox"], ) @@ -14,6 +12,6 @@ go_test( name = "tmutex_test", size = "medium", srcs = ["tmutex_test.go"], - embed = [":tmutex"], + library = ":tmutex", deps = ["//pkg/sync"], ) diff --git a/pkg/unet/BUILD b/pkg/unet/BUILD index d1885ae66..a86501fa2 100644 --- a/pkg/unet/BUILD +++ b/pkg/unet/BUILD @@ -1,5 +1,4 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -9,7 +8,6 @@ go_library( "unet.go", "unet_unsafe.go", ], - importpath = "gvisor.dev/gvisor/pkg/unet", visibility = ["//visibility:public"], deps = [ "//pkg/gate", @@ -23,6 +21,6 @@ go_test( srcs = [ "unet_test.go", ], - embed = [":unet"], + library = ":unet", deps = ["//pkg/sync"], ) diff --git a/pkg/urpc/BUILD b/pkg/urpc/BUILD index b8fdc3125..850c34ed0 100644 --- a/pkg/urpc/BUILD +++ b/pkg/urpc/BUILD @@ -1,12 +1,10 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "urpc", srcs = ["urpc.go"], - importpath = "gvisor.dev/gvisor/pkg/urpc", visibility = ["//:sandbox"], deps = [ "//pkg/fd", @@ -20,6 +18,6 @@ go_test( name = "urpc_test", size = "small", srcs = ["urpc_test.go"], - embed = [":urpc"], + library = ":urpc", deps = ["//pkg/unet"], ) diff --git a/pkg/waiter/BUILD b/pkg/waiter/BUILD index 1c6890e52..852480a09 100644 --- a/pkg/waiter/BUILD +++ b/pkg/waiter/BUILD @@ -1,6 +1,5 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") load("//tools/go_generics:defs.bzl", "go_template_instance") -load("//tools/go_stateify:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -22,7 +21,6 @@ go_library( "waiter.go", "waiter_list.go", ], - importpath = "gvisor.dev/gvisor/pkg/waiter", visibility = ["//visibility:public"], deps = ["//pkg/sync"], ) @@ -33,5 +31,5 @@ go_test( srcs = [ "waiter_test.go", ], - embed = [":waiter"], + library = ":waiter", ) diff --git a/runsc/BUILD b/runsc/BUILD index e5587421d..b35b41d81 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -1,7 +1,6 @@ -package(licenses = ["notice"]) # Apache 2.0 +load("//tools:defs.bzl", "go_binary", "pkg_deb", "pkg_tar") -load("@io_bazel_rules_go//go:def.bzl", "go_binary") -load("@rules_pkg//:pkg.bzl", "pkg_deb", "pkg_tar") +package(licenses = ["notice"]) go_binary( name = "runsc", @@ -9,7 +8,7 @@ go_binary( "main.go", "version.go", ], - pure = "on", + pure = True, visibility = [ "//visibility:public", ], @@ -26,10 +25,12 @@ go_binary( ) # The runsc-race target is a race-compatible BUILD target. This must be built -# via "bazel build --features=race //runsc:runsc-race", since the race feature -# must apply to all dependencies due a bug in gazelle file selection. The pure -# attribute must be off because the race detector requires linking with non-Go -# components, although we still require a static binary. +# via: bazel build --features=race //runsc:runsc-race +# +# This is neccessary because the race feature must apply to all dependencies +# due a bug in gazelle file selection. The pure attribute must be off because +# the race detector requires linking with non-Go components, although we still +# require a static binary. # # Note that in the future this might be convertible to a compatible target by # using the pure and static attributes within a select function, but select is @@ -42,7 +43,7 @@ go_binary( "main.go", "version.go", ], - static = "on", + static = True, visibility = [ "//visibility:public", ], @@ -82,7 +83,12 @@ genrule( # because they are assumes to be hermetic). srcs = [":runsc"], outs = ["version.txt"], - cmd = "$(location :runsc) -version | grep 'runsc version' | sed 's/^[^0-9]*//' > $@", + # Note that the little dance here is necessary because files in the $(SRCS) + # attribute are not executable by default, and we can't touch in place. + cmd = "cp $(location :runsc) $(@D)/runsc && \ + chmod a+x $(@D)/runsc && \ + $(@D)/runsc -version | grep version | sed 's/^[^0-9]*//' > $@ && \ + rm -f $(@D)/runsc", stamp = 1, ) @@ -109,5 +115,6 @@ sh_test( name = "version_test", size = "small", srcs = ["version_test.sh"], + args = ["$(location :runsc)"], data = [":runsc"], ) diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 3e20f8f2f..f3ebc0231 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -23,7 +23,6 @@ go_library( "strace.go", "user.go", ], - importpath = "gvisor.dev/gvisor/runsc/boot", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -107,7 +106,7 @@ go_test( "loader_test.go", "user_test.go", ], - embed = [":boot"], + library = ":boot", deps = [ "//pkg/control/server", "//pkg/log", diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index 3a9dcfc04..ce30f6c53 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -13,7 +13,6 @@ go_library( "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.dev/gvisor/runsc/boot/filter", visibility = [ "//runsc/boot:__subpackages__", ], diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD index 03391cdca..77774f43c 100644 --- a/runsc/boot/platforms/BUILD +++ b/runsc/boot/platforms/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "platforms", srcs = ["platforms.go"], - importpath = "gvisor.dev/gvisor/runsc/boot/platforms", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD index d6165f9e5..d4c7bdfbb 100644 --- a/runsc/cgroup/BUILD +++ b/runsc/cgroup/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "cgroup", srcs = ["cgroup.go"], - importpath = "gvisor.dev/gvisor/runsc/cgroup", visibility = ["//:sandbox"], deps = [ "//pkg/log", @@ -19,6 +18,6 @@ go_test( name = "cgroup_test", size = "small", srcs = ["cgroup_test.go"], - embed = [":cgroup"], + library = ":cgroup", tags = ["local"], ) diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index b94bc4fa0..09aa46434 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -34,7 +34,6 @@ go_library( "syscalls.go", "wait.go", ], - importpath = "gvisor.dev/gvisor/runsc/cmd", visibility = [ "//runsc:__subpackages__", ], @@ -73,7 +72,7 @@ go_test( data = [ "//runsc", ], - embed = [":cmd"], + library = ":cmd", deps = [ "//pkg/abi/linux", "//pkg/log", diff --git a/runsc/console/BUILD b/runsc/console/BUILD index e623c1a0f..06924bccd 100644 --- a/runsc/console/BUILD +++ b/runsc/console/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -7,7 +7,6 @@ go_library( srcs = [ "console.go", ], - importpath = "gvisor.dev/gvisor/runsc/console", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 6dea179e4..e21431e4c 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +10,6 @@ go_library( "state_file.go", "status.go", ], - importpath = "gvisor.dev/gvisor/runsc/container", visibility = [ "//runsc:__subpackages__", "//test:__subpackages__", @@ -42,7 +41,7 @@ go_test( "//runsc", "//runsc/container/test_app", ], - embed = [":container"], + library = ":container", shard_count = 5, tags = [ "requires-kvm", diff --git a/runsc/container/test_app/BUILD b/runsc/container/test_app/BUILD index bfd338bb6..e200bafd9 100644 --- a/runsc/container/test_app/BUILD +++ b/runsc/container/test_app/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) @@ -9,7 +9,7 @@ go_binary( "fds.go", "test_app.go", ], - pure = "on", + pure = True, visibility = ["//runsc/container:__pkg__"], deps = [ "//pkg/unet", diff --git a/runsc/criutil/BUILD b/runsc/criutil/BUILD index 558133a0e..8a571a000 100644 --- a/runsc/criutil/BUILD +++ b/runsc/criutil/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "criutil", testonly = 1, srcs = ["criutil.go"], - importpath = "gvisor.dev/gvisor/runsc/criutil", visibility = ["//:sandbox"], deps = ["//runsc/testutil"], ) diff --git a/runsc/dockerutil/BUILD b/runsc/dockerutil/BUILD index 0e0423504..8621af901 100644 --- a/runsc/dockerutil/BUILD +++ b/runsc/dockerutil/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "dockerutil", testonly = 1, srcs = ["dockerutil.go"], - importpath = "gvisor.dev/gvisor/runsc/dockerutil", visibility = ["//:sandbox"], deps = [ "//runsc/testutil", diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index a9582d92b..64a406ae2 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,10 +10,7 @@ go_library( "fsgofer_arm64_unsafe.go", "fsgofer_unsafe.go", ], - importpath = "gvisor.dev/gvisor/runsc/fsgofer", - visibility = [ - "//runsc:__subpackages__", - ], + visibility = ["//runsc:__subpackages__"], deps = [ "//pkg/abi/linux", "//pkg/fd", @@ -30,7 +27,7 @@ go_test( name = "fsgofer_test", size = "small", srcs = ["fsgofer_test.go"], - embed = [":fsgofer"], + library = ":fsgofer", deps = [ "//pkg/log", "//pkg/p9", diff --git a/runsc/fsgofer/filter/BUILD b/runsc/fsgofer/filter/BUILD index bac73f89d..82b48ef32 100644 --- a/runsc/fsgofer/filter/BUILD +++ b/runsc/fsgofer/filter/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -13,7 +13,6 @@ go_library( "extra_filters_race.go", "filter.go", ], - importpath = "gvisor.dev/gvisor/runsc/fsgofer/filter", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index ddbc37456..c95d50294 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -9,7 +9,6 @@ go_library( "network_unsafe.go", "sandbox.go", ], - importpath = "gvisor.dev/gvisor/runsc/sandbox", visibility = [ "//runsc:__subpackages__", ], diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD index 205638803..4ccd77f63 100644 --- a/runsc/specutils/BUILD +++ b/runsc/specutils/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +10,6 @@ go_library( "namespace.go", "specutils.go", ], - importpath = "gvisor.dev/gvisor/runsc/specutils", visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", @@ -28,6 +27,6 @@ go_test( name = "specutils_test", size = "small", srcs = ["specutils_test.go"], - embed = [":specutils"], + library = ":specutils", deps = ["@com_github_opencontainers_runtime-spec//specs-go:go_default_library"], ) diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD index 3c3027cb5..f845120b0 100644 --- a/runsc/testutil/BUILD +++ b/runsc/testutil/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -6,7 +6,6 @@ go_library( name = "testutil", testonly = 1, srcs = ["testutil.go"], - importpath = "gvisor.dev/gvisor/runsc/testutil", visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/runsc/version_test.sh b/runsc/version_test.sh index cc0ca3f05..747350654 100755 --- a/runsc/version_test.sh +++ b/runsc/version_test.sh @@ -16,7 +16,7 @@ set -euf -x -o pipefail -readonly runsc="${TEST_SRCDIR}/__main__/runsc/linux_amd64_pure_stripped/runsc" +readonly runsc="$1" readonly version=$($runsc --version) # Version should should not match VERSION, which is the default and which will diff --git a/scripts/common.sh b/scripts/common.sh index fdb1aa142..cd91b9f8e 100755 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -16,11 +16,7 @@ set -xeou pipefail -if [[ -f $(dirname $0)/common_google.sh ]]; then - source $(dirname $0)/common_google.sh -else - source $(dirname $0)/common_bazel.sh -fi +source $(dirname $0)/common_build.sh # Ensure it attempts to collect logs in all cases. trap collect_logs EXIT diff --git a/scripts/common_bazel.sh b/scripts/common_bazel.sh deleted file mode 100755 index a473a88a4..000000000 --- a/scripts/common_bazel.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash - -# Copyright 2019 The gVisor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Install the latest version of Bazel and log the version. -(which use_bazel.sh && use_bazel.sh latest) || which bazel -bazel version - -# Switch into the workspace; only necessary if run with kokoro. -if [[ -v KOKORO_GIT_COMMIT ]] && [[ -d git/repo ]]; then - cd git/repo -elif [[ -v KOKORO_GIT_COMMIT ]] && [[ -d github/repo ]]; then - cd github/repo -fi - -# Set the standard bazel flags. -declare -r BAZEL_FLAGS=( - "--show_timestamps" - "--test_output=errors" - "--keep_going" - "--verbose_failures=true" -) -if [[ -v KOKORO_BAZEL_AUTH_CREDENTIAL ]]; then - declare -r BAZEL_RBE_AUTH_FLAGS=( - "--auth_credentials=${KOKORO_BAZEL_AUTH_CREDENTIAL}" - ) - declare -r BAZEL_RBE_FLAGS=("--config=remote") -fi - -# Wrap bazel. -function build() { - bazel build "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" 2>&1 | - tee /dev/fd/2 | grep -E '^ bazel-bin/' | awk '{ print $1; }' -} - -function test() { - bazel test "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" -} - -function run() { - local binary=$1 - shift - bazel run "${binary}" -- "$@" -} - -function run_as_root() { - local binary=$1 - shift - bazel run --run_under="sudo" "${binary}" -- "$@" -} - -function collect_logs() { - # Zip out everything into a convenient form. - if [[ -v KOKORO_ARTIFACTS_DIR ]] && [[ -e bazel-testlogs ]]; then - # Merge results files of all shards for each test suite. - for d in `find -L "bazel-testlogs" -name 'shard_*_of_*' | xargs dirname | sort | uniq`; do - junitparser merge `find $d -name test.xml` $d/test.xml - cat $d/shard_*_of_*/test.log > $d/test.log - ls -l $d/shard_*_of_*/test.outputs/outputs.zip && zip -r -1 $d/outputs.zip $d/shard_*_of_*/test.outputs/outputs.zip - done - find -L "bazel-testlogs" -name 'shard_*_of_*' | xargs rm -rf - # Move test logs to Kokoro directory. tar is used to conveniently perform - # renames while moving files. - find -L "bazel-testlogs" -name "test.xml" -o -name "test.log" -o -name "outputs.zip" | - tar --create --files-from - --transform 's/test\./sponge_log./' | - tar --extract --directory ${KOKORO_ARTIFACTS_DIR} - - # Collect sentry logs, if any. - if [[ -v RUNSC_LOGS_DIR ]] && [[ -d "${RUNSC_LOGS_DIR}" ]]; then - # Check if the directory is empty or not (only the first line it needed). - local -r logs=$(ls "${RUNSC_LOGS_DIR}" | head -n1) - if [[ "${logs}" ]]; then - local -r archive=runsc_logs_"${RUNTIME}".tar.gz - if [[ -v KOKORO_BUILD_ARTIFACTS_SUBDIR ]]; then - echo "runsc logs will be uploaded to:" - echo " gsutil cp gs://gvisor/logs/${KOKORO_BUILD_ARTIFACTS_SUBDIR}/${archive} /tmp" - echo " https://storage.cloud.google.com/gvisor/logs/${KOKORO_BUILD_ARTIFACTS_SUBDIR}/${archive}" - fi - tar --create --gzip --file="${KOKORO_ARTIFACTS_DIR}/${archive}" -C "${RUNSC_LOGS_DIR}" . - fi - fi - fi -} - -function find_branch_name() { - git branch --show-current || git rev-parse HEAD || bazel info workspace | xargs basename -} diff --git a/scripts/common_build.sh b/scripts/common_build.sh new file mode 100755 index 000000000..a473a88a4 --- /dev/null +++ b/scripts/common_build.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# Copyright 2019 The gVisor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Install the latest version of Bazel and log the version. +(which use_bazel.sh && use_bazel.sh latest) || which bazel +bazel version + +# Switch into the workspace; only necessary if run with kokoro. +if [[ -v KOKORO_GIT_COMMIT ]] && [[ -d git/repo ]]; then + cd git/repo +elif [[ -v KOKORO_GIT_COMMIT ]] && [[ -d github/repo ]]; then + cd github/repo +fi + +# Set the standard bazel flags. +declare -r BAZEL_FLAGS=( + "--show_timestamps" + "--test_output=errors" + "--keep_going" + "--verbose_failures=true" +) +if [[ -v KOKORO_BAZEL_AUTH_CREDENTIAL ]]; then + declare -r BAZEL_RBE_AUTH_FLAGS=( + "--auth_credentials=${KOKORO_BAZEL_AUTH_CREDENTIAL}" + ) + declare -r BAZEL_RBE_FLAGS=("--config=remote") +fi + +# Wrap bazel. +function build() { + bazel build "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" 2>&1 | + tee /dev/fd/2 | grep -E '^ bazel-bin/' | awk '{ print $1; }' +} + +function test() { + bazel test "${BAZEL_RBE_FLAGS[@]}" "${BAZEL_RBE_AUTH_FLAGS[@]}" "${BAZEL_FLAGS[@]}" "$@" +} + +function run() { + local binary=$1 + shift + bazel run "${binary}" -- "$@" +} + +function run_as_root() { + local binary=$1 + shift + bazel run --run_under="sudo" "${binary}" -- "$@" +} + +function collect_logs() { + # Zip out everything into a convenient form. + if [[ -v KOKORO_ARTIFACTS_DIR ]] && [[ -e bazel-testlogs ]]; then + # Merge results files of all shards for each test suite. + for d in `find -L "bazel-testlogs" -name 'shard_*_of_*' | xargs dirname | sort | uniq`; do + junitparser merge `find $d -name test.xml` $d/test.xml + cat $d/shard_*_of_*/test.log > $d/test.log + ls -l $d/shard_*_of_*/test.outputs/outputs.zip && zip -r -1 $d/outputs.zip $d/shard_*_of_*/test.outputs/outputs.zip + done + find -L "bazel-testlogs" -name 'shard_*_of_*' | xargs rm -rf + # Move test logs to Kokoro directory. tar is used to conveniently perform + # renames while moving files. + find -L "bazel-testlogs" -name "test.xml" -o -name "test.log" -o -name "outputs.zip" | + tar --create --files-from - --transform 's/test\./sponge_log./' | + tar --extract --directory ${KOKORO_ARTIFACTS_DIR} + + # Collect sentry logs, if any. + if [[ -v RUNSC_LOGS_DIR ]] && [[ -d "${RUNSC_LOGS_DIR}" ]]; then + # Check if the directory is empty or not (only the first line it needed). + local -r logs=$(ls "${RUNSC_LOGS_DIR}" | head -n1) + if [[ "${logs}" ]]; then + local -r archive=runsc_logs_"${RUNTIME}".tar.gz + if [[ -v KOKORO_BUILD_ARTIFACTS_SUBDIR ]]; then + echo "runsc logs will be uploaded to:" + echo " gsutil cp gs://gvisor/logs/${KOKORO_BUILD_ARTIFACTS_SUBDIR}/${archive} /tmp" + echo " https://storage.cloud.google.com/gvisor/logs/${KOKORO_BUILD_ARTIFACTS_SUBDIR}/${archive}" + fi + tar --create --gzip --file="${KOKORO_ARTIFACTS_DIR}/${archive}" -C "${RUNSC_LOGS_DIR}" . + fi + fi + fi +} + +function find_branch_name() { + git branch --show-current || git rev-parse HEAD || bazel info workspace | xargs basename +} diff --git a/test/BUILD b/test/BUILD index bf834d994..34b950644 100644 --- a/test/BUILD +++ b/test/BUILD @@ -1,44 +1 @@ -package(licenses = ["notice"]) # Apache 2.0 - -# We need to define a bazel platform and toolchain to specify dockerPrivileged -# and dockerRunAsRoot options, they are required to run tests on the RBE -# cluster in Kokoro. -alias( - name = "rbe_ubuntu1604", - actual = ":rbe_ubuntu1604_r346485", -) - -platform( - name = "rbe_ubuntu1604_r346485", - constraint_values = [ - "@bazel_tools//platforms:x86_64", - "@bazel_tools//platforms:linux", - "@bazel_tools//tools/cpp:clang", - "@bazel_toolchains//constraints:xenial", - "@bazel_toolchains//constraints/sanitizers:support_msan", - ], - remote_execution_properties = """ - properties: { - name: "container-image" - value:"docker://gcr.io/cloud-marketplace/google/rbe-ubuntu16-04@sha256:93f7e127196b9b653d39830c50f8b05d49ef6fd8739a9b5b8ab16e1df5399e50" - } - properties: { - name: "dockerAddCapabilities" - value: "SYS_ADMIN" - } - properties: { - name: "dockerPrivileged" - value: "true" - } - """, -) - -toolchain( - name = "cc-toolchain-clang-x86_64-default", - exec_compatible_with = [ - ], - target_compatible_with = [ - ], - toolchain = "@bazel_toolchains//configs/ubuntu16_04_clang/10.0.0/bazel_2.0.0/cc:cc-compiler-k8", - toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", -) +package(licenses = ["notice"]) diff --git a/test/e2e/BUILD b/test/e2e/BUILD index 4fe03a220..76e04f878 100644 --- a/test/e2e/BUILD +++ b/test/e2e/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -10,7 +10,7 @@ go_test( "integration_test.go", "regression_test.go", ], - embed = [":integration"], + library = ":integration", tags = [ # Requires docker and runsc to be configured before the test runs. "manual", @@ -29,5 +29,4 @@ go_test( go_library( name = "integration", srcs = ["integration.go"], - importpath = "gvisor.dev/gvisor/test/integration", ) diff --git a/test/image/BUILD b/test/image/BUILD index 09b0a0ad5..7392ac54e 100644 --- a/test/image/BUILD +++ b/test/image/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -14,7 +14,7 @@ go_test( "ruby.rb", "ruby.sh", ], - embed = [":image"], + library = ":image", tags = [ # Requires docker and runsc to be configured before the test runs. "manual", @@ -30,5 +30,4 @@ go_test( go_library( name = "image", srcs = ["image.go"], - importpath = "gvisor.dev/gvisor/test/image", ) diff --git a/test/iptables/BUILD b/test/iptables/BUILD index 22f470092..6bb3b82b5 100644 --- a/test/iptables/BUILD +++ b/test/iptables/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) @@ -12,7 +12,6 @@ go_library( "iptables_util.go", "nat.go", ], - importpath = "gvisor.dev/gvisor/test/iptables", visibility = ["//test/iptables:__subpackages__"], deps = [ "//runsc/testutil", @@ -24,7 +23,7 @@ go_test( srcs = [ "iptables_test.go", ], - embed = [":iptables"], + library = ":iptables", tags = [ "local", "manual", diff --git a/test/iptables/runner/BUILD b/test/iptables/runner/BUILD index a5b6f082c..b9199387a 100644 --- a/test/iptables/runner/BUILD +++ b/test/iptables/runner/BUILD @@ -1,15 +1,21 @@ -load("@io_bazel_rules_docker//go:image.bzl", "go_image") -load("@io_bazel_rules_docker//container:container.bzl", "container_image") +load("//tools:defs.bzl", "container_image", "go_binary", "go_image") package(licenses = ["notice"]) +go_binary( + name = "runner", + testonly = 1, + srcs = ["main.go"], + deps = ["//test/iptables"], +) + container_image( name = "iptables-base", base = "@iptables-test//image", ) go_image( - name = "runner", + name = "runner-image", testonly = 1, srcs = ["main.go"], base = ":iptables-base", diff --git a/test/root/BUILD b/test/root/BUILD index d5dd9bca2..23ce2a70f 100644 --- a/test/root/BUILD +++ b/test/root/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "root", srcs = ["root.go"], - importpath = "gvisor.dev/gvisor/test/root", ) go_test( @@ -21,7 +20,7 @@ go_test( data = [ "//runsc", ], - embed = [":root"], + library = ":root", tags = [ # Requires docker and runsc to be configured before the test runs. # Also test only runs as root. diff --git a/test/root/testdata/BUILD b/test/root/testdata/BUILD index 125633680..bca5f9cab 100644 --- a/test/root/testdata/BUILD +++ b/test/root/testdata/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -12,7 +12,6 @@ go_library( "sandbox.go", "simple.go", ], - importpath = "gvisor.dev/gvisor/test/root/testdata", visibility = [ "//visibility:public", ], diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD index 367295206..2c472bf8d 100644 --- a/test/runtimes/BUILD +++ b/test/runtimes/BUILD @@ -1,6 +1,6 @@ # These packages are used to run language runtime tests inside gVisor sandboxes. -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_test") +load("//tools:defs.bzl", "go_binary", "go_test") load("//test/runtimes:build_defs.bzl", "runtime_test") package(licenses = ["notice"]) @@ -49,5 +49,5 @@ go_test( name = "blacklist_test", size = "small", srcs = ["blacklist_test.go"], - embed = [":runner"], + library = ":runner", ) diff --git a/test/runtimes/build_defs.bzl b/test/runtimes/build_defs.bzl index 6f84ca852..92e275a76 100644 --- a/test/runtimes/build_defs.bzl +++ b/test/runtimes/build_defs.bzl @@ -1,6 +1,6 @@ """Defines a rule for runtime test targets.""" -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_test", "loopback") def runtime_test( name, @@ -34,6 +34,7 @@ def runtime_test( ] data = [ ":runner", + loopback, ] if blacklist_file: args += ["--blacklist_file", "test/runtimes/" + blacklist_file] @@ -61,7 +62,7 @@ def blacklist_test(name, blacklist_file): """Test that a blacklist parses correctly.""" go_test( name = name + "_blacklist_test", - embed = [":runner"], + library = ":runner", srcs = ["blacklist_test.go"], args = ["--blacklist_file", "test/runtimes/" + blacklist_file], data = [blacklist_file], diff --git a/test/runtimes/images/proctor/BUILD b/test/runtimes/images/proctor/BUILD index 09dc6c42f..85e004c45 100644 --- a/test/runtimes/images/proctor/BUILD +++ b/test/runtimes/images/proctor/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_test") +load("//tools:defs.bzl", "go_binary", "go_test") package(licenses = ["notice"]) @@ -19,7 +19,7 @@ go_test( name = "proctor_test", size = "small", srcs = ["proctor_test.go"], - embed = [":proctor"], + library = ":proctor", deps = [ "//runsc/testutil", ], diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 90d52e73b..40e974314 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") load("//test/syscalls:build_defs.bzl", "syscall_test") package(licenses = ["notice"]) diff --git a/test/syscalls/build_defs.bzl b/test/syscalls/build_defs.bzl index aaf77c65b..1df761dd0 100644 --- a/test/syscalls/build_defs.bzl +++ b/test/syscalls/build_defs.bzl @@ -1,5 +1,7 @@ """Defines a rule for syscall test targets.""" +load("//tools:defs.bzl", "loopback") + # syscall_test is a macro that will create targets to run the given test target # on the host (native) and runsc. def syscall_test( @@ -135,6 +137,7 @@ def _syscall_test( name = name, data = [ ":syscall_test_runner", + loopback, test, ], args = args, @@ -148,6 +151,3 @@ def sh_test(**kwargs): native.sh_test( **kwargs ) - -def select_for_linux(for_linux, for_others = []): - return for_linux diff --git a/test/syscalls/gtest/BUILD b/test/syscalls/gtest/BUILD index 9293f25cb..de4b2727c 100644 --- a/test/syscalls/gtest/BUILD +++ b/test/syscalls/gtest/BUILD @@ -1,12 +1,9 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "gtest", srcs = ["gtest.go"], - importpath = "gvisor.dev/gvisor/test/syscalls/gtest", - visibility = [ - "//test:__subpackages__", - ], + visibility = ["//:sandbox"], ) diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 4c7ec3f06..c2ef50c1d 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1,5 +1,4 @@ -load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") -load("//test/syscalls:build_defs.bzl", "select_for_linux") +load("//tools:defs.bzl", "cc_binary", "cc_library", "default_net_util", "select_system") package( default_visibility = ["//:sandbox"], @@ -126,13 +125,11 @@ cc_library( testonly = 1, srcs = [ "socket_test_util.cc", - ] + select_for_linux( - [ - "socket_test_util_impl.cc", - ], - ), + "socket_test_util_impl.cc", + ], hdrs = ["socket_test_util.h"], - deps = [ + defines = select_system(), + deps = default_net_util() + [ "@com_google_googletest//:gtest", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", @@ -143,8 +140,7 @@ cc_library( "//test/util:temp_path", "//test/util:test_util", "//test/util:thread_util", - ] + select_for_linux([ - ]), + ], ) cc_library( @@ -1443,6 +1439,7 @@ cc_binary( srcs = ["arch_prctl.cc"], linkstatic = 1, deps = [ + "//test/util:file_descriptor", "//test/util:test_main", "//test/util:test_util", "@com_google_googletest//:gtest", @@ -3383,11 +3380,11 @@ cc_library( name = "udp_socket_test_cases", testonly = 1, srcs = [ - "udp_socket_test_cases.cc", - ] + select_for_linux([ "udp_socket_errqueue_test_case.cc", - ]), + "udp_socket_test_cases.cc", + ], hdrs = ["udp_socket_test_cases.h"], + defines = select_system(), deps = [ ":socket_test_util", ":unix_domain_socket_test_util", diff --git a/test/syscalls/linux/arch_prctl.cc b/test/syscalls/linux/arch_prctl.cc index 81bf5a775..3a901faf5 100644 --- a/test/syscalls/linux/arch_prctl.cc +++ b/test/syscalls/linux/arch_prctl.cc @@ -14,8 +14,10 @@ #include #include +#include #include "gtest/gtest.h" +#include "test/util/file_descriptor.h" #include "test/util/test_util.h" // glibc does not provide a prototype for arch_prctl() so declare it here. diff --git a/test/syscalls/linux/rseq/BUILD b/test/syscalls/linux/rseq/BUILD index 5cfe4e56f..ed488dbc2 100644 --- a/test/syscalls/linux/rseq/BUILD +++ b/test/syscalls/linux/rseq/BUILD @@ -1,8 +1,7 @@ # This package contains a standalone rseq test binary. This binary must not # depend on libc, which might use rseq itself. -load("@bazel_tools//tools/cpp:cc_flags_supplier.bzl", "cc_flags_supplier") -load("@rules_cc//cc:defs.bzl", "cc_library") +load("//tools:defs.bzl", "cc_flags_supplier", "cc_library", "cc_toolchain") package(licenses = ["notice"]) @@ -37,8 +36,8 @@ genrule( "$(location start.S)", ]), toolchains = [ + cc_toolchain, ":no_pie_cc_flags", - "@bazel_tools//tools/cpp:current_cc_toolchain", ], visibility = ["//:sandbox"], ) diff --git a/test/syscalls/linux/udp_socket_errqueue_test_case.cc b/test/syscalls/linux/udp_socket_errqueue_test_case.cc index 147978f46..9a24e1df0 100644 --- a/test/syscalls/linux/udp_socket_errqueue_test_case.cc +++ b/test/syscalls/linux/udp_socket_errqueue_test_case.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef __fuchsia__ + #include "test/syscalls/linux/udp_socket_test_cases.h" #include @@ -52,3 +54,5 @@ TEST_P(UdpSocketTest, ErrorQueue) { } // namespace testing } // namespace gvisor + +#endif // __fuchsia__ diff --git a/test/uds/BUILD b/test/uds/BUILD index a3843e699..51e2c7ce8 100644 --- a/test/uds/BUILD +++ b/test/uds/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package( default_visibility = ["//:sandbox"], @@ -9,7 +9,6 @@ go_library( name = "uds", testonly = 1, srcs = ["uds.go"], - importpath = "gvisor.dev/gvisor/test/uds", deps = [ "//pkg/log", "//pkg/unet", diff --git a/test/util/BUILD b/test/util/BUILD index cbc728159..3c732be62 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -1,5 +1,4 @@ -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") -load("//test/syscalls:build_defs.bzl", "select_for_linux") +load("//tools:defs.bzl", "cc_library", "cc_test", "select_system") package( default_visibility = ["//:sandbox"], @@ -142,12 +141,13 @@ cc_library( cc_library( name = "save_util", testonly = 1, - srcs = ["save_util.cc"] + - select_for_linux( - ["save_util_linux.cc"], - ["save_util_other.cc"], - ), + srcs = [ + "save_util.cc", + "save_util_linux.cc", + "save_util_other.cc", + ], hdrs = ["save_util.h"], + defines = select_system(), ) cc_library( @@ -234,13 +234,16 @@ cc_library( testonly = 1, srcs = [ "test_util.cc", - ] + select_for_linux( - [ - "test_util_impl.cc", - "test_util_runfiles.cc", + "test_util_impl.cc", + "test_util_runfiles.cc", + ], + hdrs = ["test_util.h"], + defines = select_system( + fuchsia = [ + "__opensource__", + "__fuchsia__", ], ), - hdrs = ["test_util.h"], deps = [ ":fs_util", ":logging", diff --git a/test/util/save_util_linux.cc b/test/util/save_util_linux.cc index cd56118c0..d0aea8e6a 100644 --- a/test/util/save_util_linux.cc +++ b/test/util/save_util_linux.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef __linux__ + #include #include #include @@ -43,3 +45,5 @@ void MaybeSave() { } // namespace testing } // namespace gvisor + +#endif diff --git a/test/util/save_util_other.cc b/test/util/save_util_other.cc index 1aca663b7..931af2c29 100644 --- a/test/util/save_util_other.cc +++ b/test/util/save_util_other.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef __linux__ + namespace gvisor { namespace testing { @@ -21,3 +23,5 @@ void MaybeSave() { } // namespace testing } // namespace gvisor + +#endif diff --git a/test/util/test_util_runfiles.cc b/test/util/test_util_runfiles.cc index 7210094eb..694d21692 100644 --- a/test/util/test_util_runfiles.cc +++ b/test/util/test_util_runfiles.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef __fuchsia__ + #include #include @@ -44,3 +46,5 @@ std::string RunfilePath(std::string path) { } // namespace testing } // namespace gvisor + +#endif // __fuchsia__ diff --git a/tools/BUILD b/tools/BUILD new file mode 100644 index 000000000..e73a9c885 --- /dev/null +++ b/tools/BUILD @@ -0,0 +1,3 @@ +package(licenses = ["notice"]) + +exports_files(["nogo.js"]) diff --git a/tools/build/BUILD b/tools/build/BUILD new file mode 100644 index 000000000..0c0ce3f4d --- /dev/null +++ b/tools/build/BUILD @@ -0,0 +1,10 @@ +package(licenses = ["notice"]) + +# In bazel, no special support is required for loopback networking. This is +# just a dummy data target that does not change the test environment. +genrule( + name = "loopback", + outs = ["loopback.txt"], + cmd = "touch $@", + visibility = ["//visibility:public"], +) diff --git a/tools/build/defs.bzl b/tools/build/defs.bzl new file mode 100644 index 000000000..d0556abd1 --- /dev/null +++ b/tools/build/defs.bzl @@ -0,0 +1,91 @@ +"""Bazel implementations of standard rules.""" + +load("@bazel_tools//tools/cpp:cc_flags_supplier.bzl", _cc_flags_supplier = "cc_flags_supplier") +load("@io_bazel_rules_go//go:def.bzl", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_library = "go_library", _go_test = "go_test", _go_tool_library = "go_tool_library") +load("@io_bazel_rules_go//proto:def.bzl", _go_proto_library = "go_proto_library") +load("@rules_cc//cc:defs.bzl", _cc_binary = "cc_binary", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test") +load("@rules_pkg//:pkg.bzl", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar") +load("@io_bazel_rules_docker//go:image.bzl", _go_image = "go_image") +load("@io_bazel_rules_docker//container:container.bzl", _container_image = "container_image") +load("@pydeps//:requirements.bzl", _py_requirement = "requirement") + +container_image = _container_image +cc_binary = _cc_binary +cc_library = _cc_library +cc_flags_supplier = _cc_flags_supplier +cc_proto_library = _cc_proto_library +cc_test = _cc_test +cc_toolchain = "@bazel_tools//tools/cpp:current_cc_toolchain" +go_image = _go_image +go_embed_data = _go_embed_data +loopback = "//tools/build:loopback" +proto_library = native.proto_library +pkg_deb = _pkg_deb +pkg_tar = _pkg_tar +py_library = native.py_library +py_binary = native.py_binary +py_test = native.py_test + +def go_binary(name, static = False, pure = False, **kwargs): + if static: + kwargs["static"] = "on" + if pure: + kwargs["pure"] = "on" + _go_binary( + name = name, + **kwargs + ) + +def go_library(name, **kwargs): + _go_library( + name = name, + importpath = "gvisor.dev/gvisor/" + native.package_name(), + **kwargs + ) + +def go_tool_library(name, **kwargs): + _go_tool_library( + name = name, + importpath = "gvisor.dev/gvisor/" + native.package_name(), + **kwargs + ) + +def go_proto_library(name, proto, **kwargs): + deps = kwargs.pop("deps", []) + _go_proto_library( + name = name, + importpath = "gvisor.dev/gvisor/" + native.package_name() + "/" + name, + proto = proto, + deps = [dep.replace("_proto", "_go_proto") for dep in deps], + **kwargs + ) + +def go_test(name, **kwargs): + library = kwargs.pop("library", None) + if library: + kwargs["embed"] = [library] + _go_test( + name = name, + **kwargs + ) + +def py_requirement(name, direct = False): + return _py_requirement(name) + +def select_arch(amd64 = "amd64", arm64 = "arm64", default = None, **kwargs): + values = { + "@bazel_tools//src/conditions:linux_x86_64": amd64, + "@bazel_tools//src/conditions:linux_aarch64": arm64, + } + if default: + values["//conditions:default"] = default + return select(values, **kwargs) + +def select_system(linux = ["__linux__"], **kwargs): + return linux # Only Linux supported. + +def default_installer(): + return None + +def default_net_util(): + return [] # Nothing needed. diff --git a/tools/checkunsafe/BUILD b/tools/checkunsafe/BUILD index d85c56131..92ba8ab06 100644 --- a/tools/checkunsafe/BUILD +++ b/tools/checkunsafe/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_tool_library") +load("//tools:defs.bzl", "go_tool_library") package(licenses = ["notice"]) go_tool_library( name = "checkunsafe", srcs = ["check_unsafe.go"], - importpath = "checkunsafe", visibility = ["//visibility:public"], deps = [ "@org_golang_x_tools//go/analysis:go_tool_library", diff --git a/tools/defs.bzl b/tools/defs.bzl new file mode 100644 index 000000000..819f12b0d --- /dev/null +++ b/tools/defs.bzl @@ -0,0 +1,154 @@ +"""Wrappers for common build rules. + +These wrappers apply common BUILD configurations (e.g., proto_library +automagically creating cc_ and go_ proto targets) and act as a single point of +change for Google-internal and bazel-compatible rules. +""" + +load("//tools/go_stateify:defs.bzl", "go_stateify") +load("//tools/go_marshal:defs.bzl", "go_marshal", "marshal_deps", "marshal_test_deps") +load("//tools/build:defs.bzl", _cc_binary = "cc_binary", _cc_flags_supplier = "cc_flags_supplier", _cc_library = "cc_library", _cc_proto_library = "cc_proto_library", _cc_test = "cc_test", _cc_toolchain = "cc_toolchain", _container_image = "container_image", _default_installer = "default_installer", _default_net_util = "default_net_util", _go_binary = "go_binary", _go_embed_data = "go_embed_data", _go_image = "go_image", _go_library = "go_library", _go_proto_library = "go_proto_library", _go_test = "go_test", _go_tool_library = "go_tool_library", _loopback = "loopback", _pkg_deb = "pkg_deb", _pkg_tar = "pkg_tar", _proto_library = "proto_library", _py_binary = "py_binary", _py_library = "py_library", _py_requirement = "py_requirement", _py_test = "py_test", _select_arch = "select_arch", _select_system = "select_system") + +# Delegate directly. +cc_binary = _cc_binary +cc_library = _cc_library +cc_test = _cc_test +cc_toolchain = _cc_toolchain +cc_flags_supplier = _cc_flags_supplier +container_image = _container_image +go_embed_data = _go_embed_data +go_image = _go_image +go_test = _go_test +go_tool_library = _go_tool_library +pkg_deb = _pkg_deb +pkg_tar = _pkg_tar +py_library = _py_library +py_binary = _py_binary +py_test = _py_test +py_requirement = _py_requirement +select_arch = _select_arch +select_system = _select_system +loopback = _loopback +default_installer = _default_installer +default_net_util = _default_net_util + +def go_binary(name, **kwargs): + """Wraps the standard go_binary. + + Args: + name: the rule name. + **kwargs: standard go_binary arguments. + """ + _go_binary( + name = name, + **kwargs + ) + +def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = False, **kwargs): + """Wraps the standard go_library and does stateification and marshalling. + + The recommended way is to use this rule with mostly identical configuration as the native + go_library rule. + + These definitions provide additional flags (stateify, marshal) that can be used + with the generators to automatically supplement the library code. + + load("//tools:defs.bzl", "go_library") + + go_library( + name = "foo", + srcs = ["foo.go"], + ) + + Args: + name: the rule name. + srcs: the library sources. + deps: the library dependencies. + imports: imports required for stateify. + stateify: whether statify is enabled (default: true). + marshal: whether marshal is enabled (default: false). + **kwargs: standard go_library arguments. + """ + if stateify: + # Only do stateification for non-state packages without manual autogen. + go_stateify( + name = name + "_state_autogen", + srcs = [src for src in srcs if src.endswith(".go")], + imports = imports, + package = name, + arch = select_arch(), + out = name + "_state_autogen.go", + ) + all_srcs = srcs + [name + "_state_autogen.go"] + if "//pkg/state" not in deps: + all_deps = deps + ["//pkg/state"] + else: + all_deps = deps + else: + all_deps = deps + all_srcs = srcs + if marshal: + go_marshal( + name = name + "_abi_autogen", + srcs = [src for src in srcs if src.endswith(".go")], + debug = False, + imports = imports, + package = name, + ) + extra_deps = [ + dep + for dep in marshal_deps + if not dep in all_deps + ] + all_deps = all_deps + extra_deps + all_srcs = srcs + [name + "_abi_autogen_unsafe.go"] + + _go_library( + name = name, + srcs = all_srcs, + deps = all_deps, + **kwargs + ) + + if marshal: + # Ignore importpath for go_test. + kwargs.pop("importpath", None) + + _go_test( + name = name + "_abi_autogen_test", + srcs = [name + "_abi_autogen_test.go"], + library = ":" + name, + deps = marshal_test_deps, + **kwargs + ) + +def proto_library(name, srcs, **kwargs): + """Wraps the standard proto_library. + + Given a proto_library named "foo", this produces three different targets: + - foo_proto: proto_library rule. + - foo_go_proto: go_proto_library rule. + - foo_cc_proto: cc_proto_library rule. + + Args: + srcs: the proto sources. + **kwargs: standard proto_library arguments. + """ + deps = kwargs.pop("deps", []) + _proto_library( + name = name + "_proto", + srcs = srcs, + deps = deps, + **kwargs + ) + _go_proto_library( + name = name + "_go_proto", + proto = ":" + name + "_proto", + deps = deps, + **kwargs + ) + _cc_proto_library( + name = name + "_cc_proto", + deps = [":" + name + "_proto"], + **kwargs + ) diff --git a/tools/go_generics/BUILD b/tools/go_generics/BUILD index 39318b877..069df3856 100644 --- a/tools/go_generics/BUILD +++ b/tools/go_generics/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/tools/go_generics/globals/BUILD b/tools/go_generics/globals/BUILD index 74853c7d2..38caa3ce7 100644 --- a/tools/go_generics/globals/BUILD +++ b/tools/go_generics/globals/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) @@ -8,6 +8,6 @@ go_library( "globals_visitor.go", "scope.go", ], - importpath = "gvisor.dev/gvisor/tools/go_generics/globals", + stateify = False, visibility = ["//tools/go_generics:__pkg__"], ) diff --git a/tools/go_generics/go_merge/BUILD b/tools/go_generics/go_merge/BUILD index 02b09120e..b7d35e272 100644 --- a/tools/go_generics/go_merge/BUILD +++ b/tools/go_generics/go_merge/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/tools/go_generics/rules_tests/BUILD b/tools/go_generics/rules_tests/BUILD index 9d26a88b7..8a329dfc6 100644 --- a/tools/go_generics/rules_tests/BUILD +++ b/tools/go_generics/rules_tests/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") +load("//tools:defs.bzl", "go_test") load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance") package(licenses = ["notice"]) diff --git a/tools/go_marshal/BUILD b/tools/go_marshal/BUILD index c862b277c..80d9c0504 100644 --- a/tools/go_marshal/BUILD +++ b/tools/go_marshal/BUILD @@ -1,6 +1,6 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") -package(licenses = ["notice"]) +licenses(["notice"]) go_binary( name = "go_marshal", diff --git a/tools/go_marshal/README.md b/tools/go_marshal/README.md index 481575bd3..4886efddf 100644 --- a/tools/go_marshal/README.md +++ b/tools/go_marshal/README.md @@ -20,19 +20,7 @@ comment `// +marshal`. # Usage -See `defs.bzl`: two new rules are provided, `go_marshal` and `go_library`. - -The recommended way to generate a go library with marshalling is to use the -`go_library` with mostly identical configuration as the native go_library rule. - -``` -load("/gvisor/tools/go_marshal:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], -) -``` +See `defs.bzl`: a new rule is provided, `go_marshal`. Under the hood, the `go_marshal` rule is used to generate a file that will appear in a Go target; the output file should appear explicitly in a srcs list. @@ -54,11 +42,7 @@ go_library( "foo.go", "foo_abi.go", ], - deps = [ - "/gvisor/pkg/abi", - "/gvisor/pkg/sentry/safemem/safemem", - "/gvisor/pkg/sentry/usermem/usermem", - ], + ... ) ``` @@ -69,22 +53,6 @@ These tests use reflection to verify properties of the ABI struct, and should be considered part of the generated interfaces (but are too expensive to execute at runtime). Ensure these tests run at some point. -``` -$ cat BUILD -load("/gvisor/tools/go_marshal:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], -) -$ blaze build :foo -$ blaze query ... -:foo_abi_autogen -:foo_abi_autogen_test -$ blaze test :foo_abi_autogen_test - -``` - # Restrictions Not all valid go type definitions can be used with `go_marshal`. `go_marshal` is @@ -131,22 +99,6 @@ for embedded structs that are not aligned. Because of this, it's generally best to avoid using `marshal:"unaligned"` and insert explicit padding fields instead. -## Debugging go_marshal - -To enable debugging output from the go marshal tool, pass the `-debug` flag to -the tool. When using the build rules from above, add a `debug = True` field to -the build rule like this: - -``` -load("/gvisor/tools/go_marshal:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], - debug = True, -) -``` - ## Modifying the `go_marshal` Tool The following are some guidelines for modifying the `go_marshal` tool: diff --git a/tools/go_marshal/analysis/BUILD b/tools/go_marshal/analysis/BUILD index c859ced77..c2a4d45c4 100644 --- a/tools/go_marshal/analysis/BUILD +++ b/tools/go_marshal/analysis/BUILD @@ -1,12 +1,11 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "analysis", testonly = 1, srcs = ["analysis_unsafe.go"], - importpath = "gvisor.dev/gvisor/tools/go_marshal/analysis", visibility = [ "//:sandbox", ], diff --git a/tools/go_marshal/defs.bzl b/tools/go_marshal/defs.bzl index c32eb559f..2918ceffe 100644 --- a/tools/go_marshal/defs.bzl +++ b/tools/go_marshal/defs.bzl @@ -1,57 +1,14 @@ -"""Marshal is a tool for generating marshalling interfaces for Go types. - -The recommended way is to use the go_library rule defined below with mostly -identical configuration as the native go_library rule. - -load("//tools/go_marshal:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], -) - -Under the hood, the go_marshal rule is used to generate a file that will -appear in a Go target; the output file should appear explicitly in a srcs list. -For example (the above is still the preferred way): - -load("//tools/go_marshal:defs.bzl", "go_marshal") - -go_marshal( - name = "foo_abi", - srcs = ["foo.go"], - out = "foo_abi.go", - package = "foo", -) - -go_library( - name = "foo", - srcs = [ - "foo.go", - "foo_abi.go", - ], - deps = [ - "//tools/go_marshal:marshal", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/usermem", - ], -) -""" - -load("@io_bazel_rules_go//go:def.bzl", _go_library = "go_library", _go_test = "go_test") +"""Marshal is a tool for generating marshalling interfaces for Go types.""" def _go_marshal_impl(ctx): """Execute the go_marshal tool.""" output = ctx.outputs.lib output_test = ctx.outputs.test - (build_dir, _, _) = ctx.build_file_path.rpartition("/BUILD") - - decl = "/".join(["gvisor.dev/gvisor", build_dir]) # Run the marshal command. args = ["-output=%s" % output.path] args += ["-pkg=%s" % ctx.attr.package] args += ["-output_test=%s" % output_test.path] - args += ["-declarationPkg=%s" % decl] if ctx.attr.debug: args += ["-debug"] @@ -83,7 +40,6 @@ go_marshal = rule( implementation = _go_marshal_impl, attrs = { "srcs": attr.label_list(mandatory = True, allow_files = True), - "libname": attr.string(mandatory = True), "imports": attr.string_list(mandatory = False), "package": attr.string(mandatory = True), "debug": attr.bool(doc = "enable debugging output from the go_marshal tool"), @@ -95,58 +51,14 @@ go_marshal = rule( }, ) -def go_library(name, srcs, deps = [], imports = [], debug = False, **kwargs): - """wraps the standard go_library and does mashalling interface generation. - - Args: - name: Same as native go_library. - srcs: Same as native go_library. - deps: Same as native go_library. - imports: Extra import paths to pass to the go_marshal tool. - debug: Enables debugging output from the go_marshal tool. - **kwargs: Remaining args to pass to the native go_library rule unmodified. - """ - go_marshal( - name = name + "_abi_autogen", - libname = name, - srcs = [src for src in srcs if src.endswith(".go")], - debug = debug, - imports = imports, - package = name, - ) - - extra_deps = [ - "//tools/go_marshal/marshal", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/usermem", - ] - - all_srcs = srcs + [name + "_abi_autogen_unsafe.go"] - all_deps = deps + [] # + extra_deps - - for extra in extra_deps: - if extra not in deps: - all_deps.append(extra) - - _go_library( - name = name, - srcs = all_srcs, - deps = all_deps, - **kwargs - ) - - # Don't pass importpath arg to go_test. - kwargs.pop("importpath", "") - - _go_test( - name = name + "_abi_autogen_test", - srcs = [name + "_abi_autogen_test.go"], - # Generated test has a fixed set of dependencies since we generate these - # tests. They should only depend on the library generated above, and the - # Marshallable interface. - deps = [ - ":" + name, - "//tools/go_marshal/analysis", - ], - **kwargs - ) +# marshal_deps are the dependencies requied by generated code. +marshal_deps = [ + "//tools/go_marshal/marshal", + "//pkg/sentry/platform/safecopy", + "//pkg/sentry/usermem", +] + +# marshal_test_deps are required by test targets. +marshal_test_deps = [ + "//tools/go_marshal/analysis", +] diff --git a/tools/go_marshal/gomarshal/BUILD b/tools/go_marshal/gomarshal/BUILD index a0eae6492..c92b59dd6 100644 --- a/tools/go_marshal/gomarshal/BUILD +++ b/tools/go_marshal/gomarshal/BUILD @@ -1,6 +1,6 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "gomarshal", @@ -10,7 +10,7 @@ go_library( "generator_tests.go", "util.go", ], - importpath = "gvisor.dev/gvisor/tools/go_marshal/gomarshal", + stateify = False, visibility = [ "//:sandbox", ], diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go index 641ccd938..8392f3f6d 100644 --- a/tools/go_marshal/gomarshal/generator.go +++ b/tools/go_marshal/gomarshal/generator.go @@ -62,15 +62,12 @@ type Generator struct { outputTest *os.File // Package name for the generated file. pkg string - // Go import path for package we're processing. This package should directly - // declare the type we're generating code for. - declaration string // Set of extra packages to import in the generated file. imports *importTable } // NewGenerator creates a new code Generator. -func NewGenerator(srcs []string, out, outTest, pkg, declaration string, imports []string) (*Generator, error) { +func NewGenerator(srcs []string, out, outTest, pkg string, imports []string) (*Generator, error) { f, err := os.OpenFile(out, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) if err != nil { return nil, fmt.Errorf("Couldn't open output file %q: %v", out, err) @@ -80,12 +77,11 @@ func NewGenerator(srcs []string, out, outTest, pkg, declaration string, imports return nil, fmt.Errorf("Couldn't open test output file %q: %v", out, err) } g := Generator{ - inputs: srcs, - output: f, - outputTest: fTest, - pkg: pkg, - declaration: declaration, - imports: newImportTable(), + inputs: srcs, + output: f, + outputTest: fTest, + pkg: pkg, + imports: newImportTable(), } for _, i := range imports { // All imports on the extra imports list are unconditionally marked as @@ -264,7 +260,7 @@ func (g *Generator) generateOne(t *ast.TypeSpec, fset *token.FileSet) *interface // generateOneTestSuite generates a test suite for the automatically generated // implementations type t. func (g *Generator) generateOneTestSuite(t *ast.TypeSpec) *testGenerator { - i := newTestGenerator(t, g.declaration) + i := newTestGenerator(t) i.emitTests() return i } @@ -359,7 +355,7 @@ func (g *Generator) Run() error { // source file. func (g *Generator) writeTests(ts []*testGenerator) error { var b sourceBuffer - b.emit("package %s_test\n\n", g.pkg) + b.emit("package %s\n\n", g.pkg) if err := b.write(g.outputTest); err != nil { return err } diff --git a/tools/go_marshal/gomarshal/generator_tests.go b/tools/go_marshal/gomarshal/generator_tests.go index df25cb5b2..bcda17c3b 100644 --- a/tools/go_marshal/gomarshal/generator_tests.go +++ b/tools/go_marshal/gomarshal/generator_tests.go @@ -46,7 +46,7 @@ type testGenerator struct { decl *importStmt } -func newTestGenerator(t *ast.TypeSpec, declaration string) *testGenerator { +func newTestGenerator(t *ast.TypeSpec) *testGenerator { if _, ok := t.Type.(*ast.StructType); !ok { panic(fmt.Sprintf("Attempting to generate code for a not struct type %v", t)) } @@ -59,14 +59,12 @@ func newTestGenerator(t *ast.TypeSpec, declaration string) *testGenerator { for _, i := range standardImports { g.imports.add(i).markUsed() } - g.decl = g.imports.add(declaration) - g.decl.markUsed() return g } func (g *testGenerator) typeName() string { - return fmt.Sprintf("%s.%s", g.decl.name, g.t.Name.Name) + return g.t.Name.Name } func (g *testGenerator) forEachField(fn func(f *ast.Field)) { diff --git a/tools/go_marshal/main.go b/tools/go_marshal/main.go index 3d12eb93c..e1a97b311 100644 --- a/tools/go_marshal/main.go +++ b/tools/go_marshal/main.go @@ -31,11 +31,10 @@ import ( ) var ( - pkg = flag.String("pkg", "", "output package") - output = flag.String("output", "", "output file") - outputTest = flag.String("output_test", "", "output file for tests") - imports = flag.String("imports", "", "comma-separated list of extra packages to import in generated code") - declarationPkg = flag.String("declarationPkg", "", "import path of target declaring the types we're generating on") + pkg = flag.String("pkg", "", "output package") + output = flag.String("output", "", "output file") + outputTest = flag.String("output_test", "", "output file for tests") + imports = flag.String("imports", "", "comma-separated list of extra packages to import in generated code") ) func main() { @@ -62,7 +61,7 @@ func main() { // as an import. extraImports = strings.Split(*imports, ",") } - g, err := gomarshal.NewGenerator(flag.Args(), *output, *outputTest, *pkg, *declarationPkg, extraImports) + g, err := gomarshal.NewGenerator(flag.Args(), *output, *outputTest, *pkg, extraImports) if err != nil { panic(err) } diff --git a/tools/go_marshal/marshal/BUILD b/tools/go_marshal/marshal/BUILD index 47dda97a1..ad508c72f 100644 --- a/tools/go_marshal/marshal/BUILD +++ b/tools/go_marshal/marshal/BUILD @@ -1,13 +1,12 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "marshal", srcs = [ "marshal.go", ], - importpath = "gvisor.dev/gvisor/tools/go_marshal/marshal", visibility = [ "//:sandbox", ], diff --git a/tools/go_marshal/test/BUILD b/tools/go_marshal/test/BUILD index d412e1ccf..38ba49fed 100644 --- a/tools/go_marshal/test/BUILD +++ b/tools/go_marshal/test/BUILD @@ -1,7 +1,6 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_marshal:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library", "go_test") -package(licenses = ["notice"]) +licenses(["notice"]) package_group( name = "gomarshal_test", @@ -25,6 +24,6 @@ go_library( name = "test", testonly = 1, srcs = ["test.go"], - importpath = "gvisor.dev/gvisor/tools/go_marshal/test", + marshal = True, deps = ["//tools/go_marshal/test/external"], ) diff --git a/tools/go_marshal/test/external/BUILD b/tools/go_marshal/test/external/BUILD index 9bb89e1da..0cf6da603 100644 --- a/tools/go_marshal/test/external/BUILD +++ b/tools/go_marshal/test/external/BUILD @@ -1,11 +1,11 @@ -load("//tools/go_marshal:defs.bzl", "go_library") +load("//tools:defs.bzl", "go_library") -package(licenses = ["notice"]) +licenses(["notice"]) go_library( name = "external", testonly = 1, srcs = ["external.go"], - importpath = "gvisor.dev/gvisor/tools/go_marshal/test/external", + marshal = True, visibility = ["//tools/go_marshal/test:gomarshal_test"], ) diff --git a/tools/go_stateify/BUILD b/tools/go_stateify/BUILD index bb53f8ae9..a133d6f8b 100644 --- a/tools/go_stateify/BUILD +++ b/tools/go_stateify/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/tools/go_stateify/defs.bzl b/tools/go_stateify/defs.bzl index 33267c074..0f261d89f 100644 --- a/tools/go_stateify/defs.bzl +++ b/tools/go_stateify/defs.bzl @@ -1,41 +1,4 @@ -"""Stateify is a tool for generating state wrappers for Go types. - -The recommended way is to use the go_library rule defined below with mostly -identical configuration as the native go_library rule. - -load("//tools/go_stateify:defs.bzl", "go_library") - -go_library( - name = "foo", - srcs = ["foo.go"], -) - -Under the hood, the go_stateify rule is used to generate a file that will -appear in a Go target; the output file should appear explicitly in a srcs list. -For example (the above is still the preferred way): - -load("//tools/go_stateify:defs.bzl", "go_stateify") - -go_stateify( - name = "foo_state", - srcs = ["foo.go"], - out = "foo_state.go", - package = "foo", -) - -go_library( - name = "foo", - srcs = [ - "foo.go", - "foo_state.go", - ], - deps = [ - "//pkg/state", - ], -) -""" - -load("@io_bazel_rules_go//go:def.bzl", _go_library = "go_library") +"""Stateify is a tool for generating state wrappers for Go types.""" def _go_stateify_impl(ctx): """Implementation for the stateify tool.""" @@ -103,43 +66,3 @@ files and must be added to the srcs of the relevant go_library. "_statepkg": attr.string(default = "gvisor.dev/gvisor/pkg/state"), }, ) - -def go_library(name, srcs, deps = [], imports = [], **kwargs): - """Standard go_library wrapped which generates state source files. - - Args: - name: the name of the go_library rule. - srcs: sources of the go_library. Each will be processed for stateify - annotations. - deps: dependencies for the go_library. - imports: an optional list of extra non-aliased, Go-style absolute import - paths required for stateified types. - **kwargs: passed to go_library. - """ - if "encode_unsafe.go" not in srcs and (name + "_state_autogen.go") not in srcs: - # Only do stateification for non-state packages without manual autogen. - go_stateify( - name = name + "_state_autogen", - srcs = [src for src in srcs if src.endswith(".go")], - imports = imports, - package = name, - arch = select({ - "@bazel_tools//src/conditions:linux_aarch64": "arm64", - "//conditions:default": "amd64", - }), - out = name + "_state_autogen.go", - ) - all_srcs = srcs + [name + "_state_autogen.go"] - if "//pkg/state" not in deps: - all_deps = deps + ["//pkg/state"] - else: - all_deps = deps - else: - all_deps = deps - all_srcs = srcs - _go_library( - name = name, - srcs = all_srcs, - deps = all_deps, - **kwargs - ) diff --git a/tools/images/BUILD b/tools/images/BUILD index 2b77c2737..f1699b184 100644 --- a/tools/images/BUILD +++ b/tools/images/BUILD @@ -1,4 +1,4 @@ -load("@rules_cc//cc:defs.bzl", "cc_binary") +load("//tools:defs.bzl", "cc_binary") load("//tools/images:defs.bzl", "vm_image", "vm_test") package( diff --git a/tools/images/defs.bzl b/tools/images/defs.bzl index d8e422a5d..32235813a 100644 --- a/tools/images/defs.bzl +++ b/tools/images/defs.bzl @@ -28,6 +28,8 @@ The vm_test rule can be used to execute a command remotely. For example, ) """ +load("//tools:defs.bzl", "default_installer") + def _vm_image_impl(ctx): script_paths = [] for script in ctx.files.scripts: @@ -165,8 +167,8 @@ def vm_test( targets = kwargs.pop("targets", []) if installer: targets = [installer] + targets - targets = [ - ] + targets + if default_installer(): + targets = [default_installer()] + targets _vm_test( tags = [ "local", diff --git a/tools/issue_reviver/BUILD b/tools/issue_reviver/BUILD index ee7ea11fd..4ef1a3124 100644 --- a/tools/issue_reviver/BUILD +++ b/tools/issue_reviver/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary") +load("//tools:defs.bzl", "go_binary") package(licenses = ["notice"]) diff --git a/tools/issue_reviver/github/BUILD b/tools/issue_reviver/github/BUILD index 6da22ba1c..da4133472 100644 --- a/tools/issue_reviver/github/BUILD +++ b/tools/issue_reviver/github/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//tools:defs.bzl", "go_library") package(licenses = ["notice"]) go_library( name = "github", srcs = ["github.go"], - importpath = "gvisor.dev/gvisor/tools/issue_reviver/github", visibility = [ "//tools/issue_reviver:__subpackages__", ], diff --git a/tools/issue_reviver/reviver/BUILD b/tools/issue_reviver/reviver/BUILD index 2c3675977..d262932bd 100644 --- a/tools/issue_reviver/reviver/BUILD +++ b/tools/issue_reviver/reviver/BUILD @@ -1,11 +1,10 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("//tools:defs.bzl", "go_library", "go_test") package(licenses = ["notice"]) go_library( name = "reviver", srcs = ["reviver.go"], - importpath = "gvisor.dev/gvisor/tools/issue_reviver/reviver", visibility = [ "//tools/issue_reviver:__subpackages__", ], @@ -15,5 +14,5 @@ go_test( name = "reviver_test", size = "small", srcs = ["reviver_test.go"], - embed = [":reviver"], + library = ":reviver", ) diff --git a/tools/workspace_status.sh b/tools/workspace_status.sh index fb09ff331..a22c8c9f2 100755 --- a/tools/workspace_status.sh +++ b/tools/workspace_status.sh @@ -15,4 +15,4 @@ # limitations under the License. # The STABLE_ prefix will trigger a re-link if it changes. -echo STABLE_VERSION $(git describe --always --tags --abbrev=12 --dirty) +echo STABLE_VERSION $(git describe --always --tags --abbrev=12 --dirty || echo 0.0.0) diff --git a/vdso/BUILD b/vdso/BUILD index 2b6744c26..d37d4266d 100644 --- a/vdso/BUILD +++ b/vdso/BUILD @@ -3,20 +3,10 @@ # normal system VDSO (time, gettimeofday, clock_gettimeofday) but which uses # timekeeping parameters managed by the sandbox kernel. -load("@bazel_tools//tools/cpp:cc_flags_supplier.bzl", "cc_flags_supplier") +load("//tools:defs.bzl", "cc_flags_supplier", "cc_toolchain", "select_arch") package(licenses = ["notice"]) -config_setting( - name = "x86_64", - constraint_values = ["@bazel_tools//platforms:x86_64"], -) - -config_setting( - name = "aarch64", - constraint_values = ["@bazel_tools//platforms:aarch64"], -) - genrule( name = "vdso", srcs = [ @@ -39,14 +29,15 @@ genrule( "-O2 " + "-std=c++11 " + "-fPIC " + + "-fno-sanitize=all " + # Some toolchains enable stack protector by default. Disable it, the # VDSO has no hooks to handle failures. "-fno-stack-protector " + "-fuse-ld=gold " + - select({ - ":x86_64": "-m64 ", - "//conditions:default": "", - }) + + select_arch( + amd64 = "-m64 ", + arm64 = "", + ) + "-shared " + "-nostdlib " + "-Wl,-soname=linux-vdso.so.1 " + @@ -55,12 +46,10 @@ genrule( "-Wl,-Bsymbolic " + "-Wl,-z,max-page-size=4096 " + "-Wl,-z,common-page-size=4096 " + - select( - { - ":x86_64": "-Wl,-T$(location vdso_amd64.lds) ", - ":aarch64": "-Wl,-T$(location vdso_arm64.lds) ", - }, - no_match_error = "Unsupported architecture", + select_arch( + amd64 = "-Wl,-T$(location vdso_amd64.lds) ", + arm64 = "-Wl,-T$(location vdso_arm64.lds) ", + no_match_error = "unsupported architecture", ) + "-o $(location vdso.so) " + "$(location vdso.cc) " + @@ -73,7 +62,7 @@ genrule( ], features = ["-pie"], toolchains = [ - "@bazel_tools//tools/cpp:current_cc_toolchain", + cc_toolchain, ":no_pie_cc_flags", ], visibility = ["//:sandbox"], -- cgit v1.2.3 From 0e2f1b7abd219f39d67cc2cecd00c441a13eeb29 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 27 Jan 2020 15:17:58 -0800 Subject: Update package locations. Because the abi will depend on the core types for marshalling (usermem, context, safemem, safecopy), these need to be flattened from the sentry directory. These packages contain no sentry-specific details. PiperOrigin-RevId: 291811289 --- pkg/abi/abi.go | 4 + pkg/context/BUILD | 13 + pkg/context/context.go | 141 +++++ pkg/safecopy/BUILD | 29 + pkg/safecopy/LICENSE | 27 + pkg/safecopy/atomic_amd64.s | 136 +++++ pkg/safecopy/atomic_arm64.s | 126 +++++ pkg/safecopy/memclr_amd64.s | 147 +++++ pkg/safecopy/memclr_arm64.s | 74 +++ pkg/safecopy/memcpy_amd64.s | 250 +++++++++ pkg/safecopy/memcpy_arm64.s | 78 +++ pkg/safecopy/safecopy.go | 144 +++++ pkg/safecopy/safecopy_test.go | 617 +++++++++++++++++++++ pkg/safecopy/safecopy_unsafe.go | 335 +++++++++++ pkg/safecopy/sighandler_amd64.s | 133 +++++ pkg/safecopy/sighandler_arm64.s | 143 +++++ pkg/safemem/BUILD | 27 + pkg/safemem/block_unsafe.go | 279 ++++++++++ pkg/safemem/io.go | 392 +++++++++++++ pkg/safemem/io_test.go | 199 +++++++ pkg/safemem/safemem.go | 16 + pkg/safemem/seq_test.go | 196 +++++++ pkg/safemem/seq_unsafe.go | 299 ++++++++++ pkg/sentry/arch/BUILD | 4 +- pkg/sentry/arch/arch.go | 2 +- pkg/sentry/arch/arch_aarch64.go | 2 +- pkg/sentry/arch/arch_amd64.go | 2 +- pkg/sentry/arch/arch_arm64.go | 2 +- pkg/sentry/arch/arch_state_x86.go | 2 +- pkg/sentry/arch/arch_x86.go | 2 +- pkg/sentry/arch/auxv.go | 2 +- pkg/sentry/arch/signal.go | 2 +- pkg/sentry/arch/signal_amd64.go | 2 +- pkg/sentry/arch/signal_arm64.go | 2 +- pkg/sentry/arch/signal_stack.go | 2 +- pkg/sentry/arch/stack.go | 4 +- pkg/sentry/context/BUILD | 13 - pkg/sentry/context/context.go | 141 ----- pkg/sentry/context/contexttest/BUILD | 21 - pkg/sentry/context/contexttest/contexttest.go | 188 ------- pkg/sentry/contexttest/BUILD | 21 + pkg/sentry/contexttest/contexttest.go | 188 +++++++ pkg/sentry/fs/BUILD | 12 +- pkg/sentry/fs/anon/BUILD | 4 +- pkg/sentry/fs/anon/anon.go | 4 +- pkg/sentry/fs/attr.go | 2 +- pkg/sentry/fs/context.go | 2 +- pkg/sentry/fs/copy_up.go | 4 +- pkg/sentry/fs/copy_up_test.go | 2 +- pkg/sentry/fs/dev/BUILD | 6 +- pkg/sentry/fs/dev/dev.go | 4 +- pkg/sentry/fs/dev/fs.go | 2 +- pkg/sentry/fs/dev/full.go | 4 +- pkg/sentry/fs/dev/null.go | 2 +- pkg/sentry/fs/dev/random.go | 6 +- pkg/sentry/fs/dev/tty.go | 2 +- pkg/sentry/fs/dirent.go | 2 +- pkg/sentry/fs/dirent_refs_test.go | 4 +- pkg/sentry/fs/fdpipe/BUILD | 12 +- pkg/sentry/fs/fdpipe/pipe.go | 6 +- pkg/sentry/fs/fdpipe/pipe_opener.go | 2 +- pkg/sentry/fs/fdpipe/pipe_opener_test.go | 6 +- pkg/sentry/fs/fdpipe/pipe_state.go | 2 +- pkg/sentry/fs/fdpipe/pipe_test.go | 4 +- pkg/sentry/fs/file.go | 4 +- pkg/sentry/fs/file_operations.go | 4 +- pkg/sentry/fs/file_overlay.go | 4 +- pkg/sentry/fs/file_overlay_test.go | 2 +- pkg/sentry/fs/filesystems.go | 2 +- pkg/sentry/fs/filetest/BUILD | 6 +- pkg/sentry/fs/filetest/filetest.go | 6 +- pkg/sentry/fs/fs.go | 2 +- pkg/sentry/fs/fsutil/BUILD | 14 +- pkg/sentry/fs/fsutil/dirty_set.go | 6 +- pkg/sentry/fs/fsutil/dirty_set_test.go | 2 +- pkg/sentry/fs/fsutil/file.go | 4 +- pkg/sentry/fs/fsutil/file_range_set.go | 6 +- pkg/sentry/fs/fsutil/host_file_mapper.go | 4 +- pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go | 2 +- pkg/sentry/fs/fsutil/host_mappable.go | 6 +- pkg/sentry/fs/fsutil/inode.go | 2 +- pkg/sentry/fs/fsutil/inode_cached.go | 6 +- pkg/sentry/fs/fsutil/inode_cached_test.go | 8 +- pkg/sentry/fs/gofer/BUILD | 10 +- pkg/sentry/fs/gofer/attr.go | 4 +- pkg/sentry/fs/gofer/cache_policy.go | 2 +- pkg/sentry/fs/gofer/context_file.go | 2 +- pkg/sentry/fs/gofer/file.go | 4 +- pkg/sentry/fs/gofer/file_state.go | 2 +- pkg/sentry/fs/gofer/fs.go | 2 +- pkg/sentry/fs/gofer/gofer_test.go | 4 +- pkg/sentry/fs/gofer/handles.go | 4 +- pkg/sentry/fs/gofer/inode.go | 4 +- pkg/sentry/fs/gofer/inode_state.go | 2 +- pkg/sentry/fs/gofer/path.go | 2 +- pkg/sentry/fs/gofer/session.go | 2 +- pkg/sentry/fs/gofer/session_state.go | 2 +- pkg/sentry/fs/gofer/socket.go | 2 +- pkg/sentry/fs/gofer/util.go | 2 +- pkg/sentry/fs/host/BUILD | 12 +- pkg/sentry/fs/host/control.go | 2 +- pkg/sentry/fs/host/file.go | 6 +- pkg/sentry/fs/host/fs.go | 2 +- pkg/sentry/fs/host/fs_test.go | 4 +- pkg/sentry/fs/host/inode.go | 4 +- pkg/sentry/fs/host/inode_state.go | 2 +- pkg/sentry/fs/host/inode_test.go | 2 +- pkg/sentry/fs/host/socket.go | 2 +- pkg/sentry/fs/host/socket_test.go | 4 +- pkg/sentry/fs/host/tty.go | 4 +- pkg/sentry/fs/host/wait_test.go | 2 +- pkg/sentry/fs/inode.go | 2 +- pkg/sentry/fs/inode_operations.go | 2 +- pkg/sentry/fs/inode_overlay.go | 2 +- pkg/sentry/fs/inode_overlay_test.go | 2 +- pkg/sentry/fs/inotify.go | 4 +- pkg/sentry/fs/inotify_event.go | 4 +- pkg/sentry/fs/mock.go | 2 +- pkg/sentry/fs/mount.go | 2 +- pkg/sentry/fs/mount_overlay.go | 2 +- pkg/sentry/fs/mount_test.go | 2 +- pkg/sentry/fs/mounts.go | 2 +- pkg/sentry/fs/mounts_test.go | 2 +- pkg/sentry/fs/offset.go | 2 +- pkg/sentry/fs/overlay.go | 4 +- pkg/sentry/fs/proc/BUILD | 8 +- pkg/sentry/fs/proc/cgroup.go | 2 +- pkg/sentry/fs/proc/cpuinfo.go | 2 +- pkg/sentry/fs/proc/exec_args.go | 4 +- pkg/sentry/fs/proc/fds.go | 2 +- pkg/sentry/fs/proc/filesystems.go | 2 +- pkg/sentry/fs/proc/fs.go | 2 +- pkg/sentry/fs/proc/inode.go | 4 +- pkg/sentry/fs/proc/loadavg.go | 2 +- pkg/sentry/fs/proc/meminfo.go | 4 +- pkg/sentry/fs/proc/mounts.go | 2 +- pkg/sentry/fs/proc/net.go | 4 +- pkg/sentry/fs/proc/proc.go | 2 +- pkg/sentry/fs/proc/seqfile/BUILD | 10 +- pkg/sentry/fs/proc/seqfile/seqfile.go | 4 +- pkg/sentry/fs/proc/seqfile/seqfile_test.go | 6 +- pkg/sentry/fs/proc/stat.go | 2 +- pkg/sentry/fs/proc/sys.go | 4 +- pkg/sentry/fs/proc/sys_net.go | 4 +- pkg/sentry/fs/proc/sys_net_test.go | 4 +- pkg/sentry/fs/proc/task.go | 4 +- pkg/sentry/fs/proc/uid_gid_map.go | 4 +- pkg/sentry/fs/proc/uptime.go | 4 +- pkg/sentry/fs/proc/version.go | 2 +- pkg/sentry/fs/ramfs/BUILD | 6 +- pkg/sentry/fs/ramfs/dir.go | 2 +- pkg/sentry/fs/ramfs/socket.go | 2 +- pkg/sentry/fs/ramfs/symlink.go | 2 +- pkg/sentry/fs/ramfs/tree.go | 4 +- pkg/sentry/fs/ramfs/tree_test.go | 2 +- pkg/sentry/fs/splice.go | 2 +- pkg/sentry/fs/sys/BUILD | 4 +- pkg/sentry/fs/sys/devices.go | 2 +- pkg/sentry/fs/sys/fs.go | 2 +- pkg/sentry/fs/sys/sys.go | 4 +- pkg/sentry/fs/timerfd/BUILD | 4 +- pkg/sentry/fs/timerfd/timerfd.go | 4 +- pkg/sentry/fs/tmpfs/BUILD | 10 +- pkg/sentry/fs/tmpfs/file_regular.go | 4 +- pkg/sentry/fs/tmpfs/file_test.go | 4 +- pkg/sentry/fs/tmpfs/fs.go | 2 +- pkg/sentry/fs/tmpfs/inode_file.go | 6 +- pkg/sentry/fs/tmpfs/tmpfs.go | 4 +- pkg/sentry/fs/tty/BUILD | 10 +- pkg/sentry/fs/tty/dir.go | 4 +- pkg/sentry/fs/tty/fs.go | 2 +- pkg/sentry/fs/tty/line_discipline.go | 4 +- pkg/sentry/fs/tty/master.go | 4 +- pkg/sentry/fs/tty/queue.go | 6 +- pkg/sentry/fs/tty/slave.go | 4 +- pkg/sentry/fs/tty/terminal.go | 4 +- pkg/sentry/fs/tty/tty_test.go | 4 +- pkg/sentry/fsimpl/ext/BUILD | 12 +- pkg/sentry/fsimpl/ext/benchmark/BUILD | 4 +- pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go | 4 +- pkg/sentry/fsimpl/ext/directory.go | 2 +- pkg/sentry/fsimpl/ext/ext.go | 2 +- pkg/sentry/fsimpl/ext/ext_test.go | 6 +- pkg/sentry/fsimpl/ext/file_description.go | 2 +- pkg/sentry/fsimpl/ext/filesystem.go | 2 +- pkg/sentry/fsimpl/ext/regular_file.go | 6 +- pkg/sentry/fsimpl/ext/symlink.go | 4 +- pkg/sentry/fsimpl/kernfs/BUILD | 10 +- pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 4 +- pkg/sentry/fsimpl/kernfs/fd_impl_util.go | 4 +- pkg/sentry/fsimpl/kernfs/filesystem.go | 2 +- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 2 +- pkg/sentry/fsimpl/kernfs/kernfs.go | 2 +- pkg/sentry/fsimpl/kernfs/kernfs_test.go | 6 +- pkg/sentry/fsimpl/kernfs/symlink.go | 2 +- pkg/sentry/fsimpl/proc/BUILD | 12 +- pkg/sentry/fsimpl/proc/filesystem.go | 2 +- pkg/sentry/fsimpl/proc/subtasks.go | 2 +- pkg/sentry/fsimpl/proc/task.go | 2 +- pkg/sentry/fsimpl/proc/task_files.go | 6 +- pkg/sentry/fsimpl/proc/tasks.go | 2 +- pkg/sentry/fsimpl/proc/tasks_files.go | 4 +- pkg/sentry/fsimpl/proc/tasks_net.go | 4 +- pkg/sentry/fsimpl/proc/tasks_sys.go | 2 +- pkg/sentry/fsimpl/proc/tasks_sys_test.go | 2 +- pkg/sentry/fsimpl/proc/tasks_test.go | 4 +- pkg/sentry/fsimpl/sys/BUILD | 2 +- pkg/sentry/fsimpl/sys/sys.go | 2 +- pkg/sentry/fsimpl/testutil/BUILD | 4 +- pkg/sentry/fsimpl/testutil/kernel.go | 2 +- pkg/sentry/fsimpl/testutil/testutil.go | 4 +- pkg/sentry/fsimpl/tmpfs/BUILD | 16 +- pkg/sentry/fsimpl/tmpfs/benchmark_test.go | 4 +- pkg/sentry/fsimpl/tmpfs/directory.go | 2 +- pkg/sentry/fsimpl/tmpfs/filesystem.go | 2 +- pkg/sentry/fsimpl/tmpfs/named_pipe.go | 4 +- pkg/sentry/fsimpl/tmpfs/pipe_test.go | 6 +- pkg/sentry/fsimpl/tmpfs/regular_file.go | 6 +- pkg/sentry/fsimpl/tmpfs/regular_file_test.go | 4 +- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 2 +- pkg/sentry/hostmm/BUILD | 2 +- pkg/sentry/hostmm/hostmm.go | 2 +- pkg/sentry/inet/BUILD | 2 +- pkg/sentry/inet/context.go | 2 +- pkg/sentry/kernel/BUILD | 12 +- pkg/sentry/kernel/auth/BUILD | 2 +- pkg/sentry/kernel/auth/context.go | 2 +- pkg/sentry/kernel/auth/id_map.go | 2 +- pkg/sentry/kernel/context.go | 2 +- pkg/sentry/kernel/contexttest/BUILD | 4 +- pkg/sentry/kernel/contexttest/contexttest.go | 4 +- pkg/sentry/kernel/epoll/BUILD | 6 +- pkg/sentry/kernel/epoll/epoll.go | 4 +- pkg/sentry/kernel/epoll/epoll_test.go | 2 +- pkg/sentry/kernel/eventfd/BUILD | 8 +- pkg/sentry/kernel/eventfd/eventfd.go | 4 +- pkg/sentry/kernel/eventfd/eventfd_test.go | 4 +- pkg/sentry/kernel/fd_table.go | 2 +- pkg/sentry/kernel/fd_table_test.go | 4 +- pkg/sentry/kernel/futex/BUILD | 6 +- pkg/sentry/kernel/futex/futex.go | 2 +- pkg/sentry/kernel/futex/futex_test.go | 2 +- pkg/sentry/kernel/kernel.go | 2 +- pkg/sentry/kernel/pipe/BUILD | 12 +- pkg/sentry/kernel/pipe/buffer.go | 2 +- pkg/sentry/kernel/pipe/buffer_test.go | 2 +- pkg/sentry/kernel/pipe/node.go | 2 +- pkg/sentry/kernel/pipe/node_test.go | 6 +- pkg/sentry/kernel/pipe/pipe.go | 2 +- pkg/sentry/kernel/pipe/pipe_test.go | 4 +- pkg/sentry/kernel/pipe/pipe_util.go | 4 +- pkg/sentry/kernel/pipe/reader_writer.go | 4 +- pkg/sentry/kernel/pipe/vfs.go | 4 +- pkg/sentry/kernel/ptrace.go | 2 +- pkg/sentry/kernel/ptrace_amd64.go | 2 +- pkg/sentry/kernel/ptrace_arm64.go | 2 +- pkg/sentry/kernel/rseq.go | 2 +- pkg/sentry/kernel/seccomp.go | 2 +- pkg/sentry/kernel/semaphore/BUILD | 6 +- pkg/sentry/kernel/semaphore/semaphore.go | 2 +- pkg/sentry/kernel/semaphore/semaphore_test.go | 4 +- pkg/sentry/kernel/shm/BUILD | 4 +- pkg/sentry/kernel/shm/shm.go | 4 +- pkg/sentry/kernel/signalfd/BUILD | 4 +- pkg/sentry/kernel/signalfd/signalfd.go | 4 +- pkg/sentry/kernel/syscalls.go | 2 +- pkg/sentry/kernel/task.go | 4 +- pkg/sentry/kernel/task_clone.go | 2 +- pkg/sentry/kernel/task_context.go | 4 +- pkg/sentry/kernel/task_futex.go | 2 +- pkg/sentry/kernel/task_log.go | 2 +- pkg/sentry/kernel/task_run.go | 2 +- pkg/sentry/kernel/task_signals.go | 2 +- pkg/sentry/kernel/task_start.go | 2 +- pkg/sentry/kernel/task_syscall.go | 2 +- pkg/sentry/kernel/task_usermem.go | 2 +- pkg/sentry/kernel/time/BUILD | 2 +- pkg/sentry/kernel/time/context.go | 2 +- pkg/sentry/kernel/timekeeper_test.go | 4 +- pkg/sentry/kernel/vdso.go | 4 +- pkg/sentry/limits/BUILD | 2 +- pkg/sentry/limits/context.go | 2 +- pkg/sentry/loader/BUILD | 6 +- pkg/sentry/loader/elf.go | 4 +- pkg/sentry/loader/interpreter.go | 4 +- pkg/sentry/loader/loader.go | 4 +- pkg/sentry/loader/vdso.go | 6 +- pkg/sentry/memmap/BUILD | 6 +- pkg/sentry/memmap/mapping_set.go | 2 +- pkg/sentry/memmap/mapping_set_test.go | 2 +- pkg/sentry/memmap/memmap.go | 4 +- pkg/sentry/mm/BUILD | 18 +- pkg/sentry/mm/address_space.go | 2 +- pkg/sentry/mm/aio_context.go | 4 +- pkg/sentry/mm/debug.go | 2 +- pkg/sentry/mm/io.go | 6 +- pkg/sentry/mm/lifecycle.go | 4 +- pkg/sentry/mm/metadata.go | 2 +- pkg/sentry/mm/mm.go | 4 +- pkg/sentry/mm/mm_test.go | 6 +- pkg/sentry/mm/pma.go | 8 +- pkg/sentry/mm/procfs.go | 4 +- pkg/sentry/mm/save_restore.go | 2 +- pkg/sentry/mm/shm.go | 4 +- pkg/sentry/mm/special_mappable.go | 4 +- pkg/sentry/mm/syscalls.go | 4 +- pkg/sentry/mm/vma.go | 4 +- pkg/sentry/pgalloc/BUILD | 8 +- pkg/sentry/pgalloc/context.go | 2 +- pkg/sentry/pgalloc/pgalloc.go | 6 +- pkg/sentry/pgalloc/pgalloc_test.go | 2 +- pkg/sentry/pgalloc/save_restore.go | 2 +- pkg/sentry/platform/BUILD | 8 +- pkg/sentry/platform/context.go | 2 +- pkg/sentry/platform/kvm/BUILD | 6 +- pkg/sentry/platform/kvm/address_space.go | 2 +- pkg/sentry/platform/kvm/bluepill.go | 2 +- pkg/sentry/platform/kvm/bluepill_fault.go | 2 +- pkg/sentry/platform/kvm/context.go | 2 +- pkg/sentry/platform/kvm/kvm.go | 2 +- pkg/sentry/platform/kvm/kvm_test.go | 2 +- pkg/sentry/platform/kvm/machine.go | 2 +- pkg/sentry/platform/kvm/machine_amd64.go | 2 +- pkg/sentry/platform/kvm/machine_arm64.go | 2 +- pkg/sentry/platform/kvm/machine_arm64_unsafe.go | 2 +- pkg/sentry/platform/kvm/physical_map.go | 2 +- pkg/sentry/platform/kvm/virtual_map.go | 2 +- pkg/sentry/platform/kvm/virtual_map_test.go | 2 +- pkg/sentry/platform/mmap_min_addr.go | 2 +- pkg/sentry/platform/platform.go | 4 +- pkg/sentry/platform/ptrace/BUILD | 4 +- pkg/sentry/platform/ptrace/ptrace.go | 2 +- pkg/sentry/platform/ptrace/ptrace_unsafe.go | 2 +- pkg/sentry/platform/ptrace/stub_unsafe.go | 4 +- pkg/sentry/platform/ptrace/subprocess.go | 2 +- pkg/sentry/platform/ring0/BUILD | 2 +- pkg/sentry/platform/ring0/defs_amd64.go | 2 +- pkg/sentry/platform/ring0/defs_arm64.go | 2 +- pkg/sentry/platform/ring0/gen_offsets/BUILD | 2 +- pkg/sentry/platform/ring0/pagetables/BUILD | 4 +- .../platform/ring0/pagetables/allocator_unsafe.go | 2 +- pkg/sentry/platform/ring0/pagetables/pagetables.go | 2 +- .../ring0/pagetables/pagetables_aarch64.go | 2 +- .../ring0/pagetables/pagetables_amd64_test.go | 2 +- .../ring0/pagetables/pagetables_arm64_test.go | 2 +- .../platform/ring0/pagetables/pagetables_test.go | 2 +- .../platform/ring0/pagetables/pagetables_x86.go | 2 +- pkg/sentry/platform/safecopy/BUILD | 29 - pkg/sentry/platform/safecopy/LICENSE | 27 - pkg/sentry/platform/safecopy/atomic_amd64.s | 136 ----- pkg/sentry/platform/safecopy/atomic_arm64.s | 126 ----- pkg/sentry/platform/safecopy/memclr_amd64.s | 147 ----- pkg/sentry/platform/safecopy/memclr_arm64.s | 74 --- pkg/sentry/platform/safecopy/memcpy_amd64.s | 250 --------- pkg/sentry/platform/safecopy/memcpy_arm64.s | 78 --- pkg/sentry/platform/safecopy/safecopy.go | 144 ----- pkg/sentry/platform/safecopy/safecopy_test.go | 617 --------------------- pkg/sentry/platform/safecopy/safecopy_unsafe.go | 335 ----------- pkg/sentry/platform/safecopy/sighandler_amd64.s | 133 ----- pkg/sentry/platform/safecopy/sighandler_arm64.s | 143 ----- pkg/sentry/safemem/BUILD | 27 - pkg/sentry/safemem/block_unsafe.go | 279 ---------- pkg/sentry/safemem/io.go | 392 ------------- pkg/sentry/safemem/io_test.go | 199 ------- pkg/sentry/safemem/safemem.go | 16 - pkg/sentry/safemem/seq_test.go | 196 ------- pkg/sentry/safemem/seq_unsafe.go | 299 ---------- pkg/sentry/socket/BUILD | 4 +- pkg/sentry/socket/control/BUILD | 4 +- pkg/sentry/socket/control/control.go | 4 +- pkg/sentry/socket/hostinet/BUILD | 6 +- pkg/sentry/socket/hostinet/socket.go | 6 +- pkg/sentry/socket/hostinet/socket_unsafe.go | 4 +- pkg/sentry/socket/hostinet/stack.go | 4 +- pkg/sentry/socket/netfilter/BUILD | 2 +- pkg/sentry/socket/netfilter/netfilter.go | 2 +- pkg/sentry/socket/netlink/BUILD | 4 +- pkg/sentry/socket/netlink/message.go | 2 +- pkg/sentry/socket/netlink/provider.go | 2 +- pkg/sentry/socket/netlink/route/BUILD | 2 +- pkg/sentry/socket/netlink/route/protocol.go | 2 +- pkg/sentry/socket/netlink/socket.go | 4 +- pkg/sentry/socket/netlink/uevent/BUILD | 2 +- pkg/sentry/socket/netlink/uevent/protocol.go | 2 +- pkg/sentry/socket/netstack/BUILD | 6 +- pkg/sentry/socket/netstack/netstack.go | 6 +- pkg/sentry/socket/netstack/provider.go | 2 +- pkg/sentry/socket/socket.go | 4 +- pkg/sentry/socket/unix/BUILD | 6 +- pkg/sentry/socket/unix/io.go | 4 +- pkg/sentry/socket/unix/transport/BUILD | 2 +- pkg/sentry/socket/unix/transport/connectioned.go | 2 +- pkg/sentry/socket/unix/transport/connectionless.go | 2 +- pkg/sentry/socket/unix/transport/unix.go | 2 +- pkg/sentry/socket/unix/unix.go | 4 +- pkg/sentry/strace/BUILD | 2 +- pkg/sentry/strace/poll.go | 2 +- pkg/sentry/strace/select.go | 2 +- pkg/sentry/strace/signal.go | 2 +- pkg/sentry/strace/socket.go | 2 +- pkg/sentry/strace/strace.go | 2 +- pkg/sentry/syscalls/linux/BUILD | 6 +- pkg/sentry/syscalls/linux/linux64_amd64.go | 2 +- pkg/sentry/syscalls/linux/linux64_arm64.go | 2 +- pkg/sentry/syscalls/linux/sigset.go | 2 +- pkg/sentry/syscalls/linux/sys_aio.go | 2 +- pkg/sentry/syscalls/linux/sys_epoll.go | 2 +- pkg/sentry/syscalls/linux/sys_file.go | 4 +- pkg/sentry/syscalls/linux/sys_futex.go | 2 +- pkg/sentry/syscalls/linux/sys_getdents.go | 2 +- pkg/sentry/syscalls/linux/sys_mempolicy.go | 2 +- pkg/sentry/syscalls/linux/sys_mmap.go | 2 +- pkg/sentry/syscalls/linux/sys_mount.go | 2 +- pkg/sentry/syscalls/linux/sys_pipe.go | 2 +- pkg/sentry/syscalls/linux/sys_poll.go | 2 +- pkg/sentry/syscalls/linux/sys_random.go | 4 +- pkg/sentry/syscalls/linux/sys_read.go | 2 +- pkg/sentry/syscalls/linux/sys_rlimit.go | 2 +- pkg/sentry/syscalls/linux/sys_seccomp.go | 2 +- pkg/sentry/syscalls/linux/sys_sem.go | 2 +- pkg/sentry/syscalls/linux/sys_signal.go | 2 +- pkg/sentry/syscalls/linux/sys_socket.go | 2 +- pkg/sentry/syscalls/linux/sys_stat.go | 2 +- pkg/sentry/syscalls/linux/sys_stat_amd64.go | 2 +- pkg/sentry/syscalls/linux/sys_stat_arm64.go | 2 +- pkg/sentry/syscalls/linux/sys_thread.go | 2 +- pkg/sentry/syscalls/linux/sys_time.go | 2 +- pkg/sentry/syscalls/linux/sys_timer.go | 2 +- pkg/sentry/syscalls/linux/sys_write.go | 2 +- pkg/sentry/syscalls/linux/sys_xattr.go | 2 +- pkg/sentry/syscalls/linux/timespec.go | 2 +- pkg/sentry/unimpl/BUILD | 2 +- pkg/sentry/unimpl/events.go | 2 +- pkg/sentry/uniqueid/BUILD | 2 +- pkg/sentry/uniqueid/context.go | 2 +- pkg/sentry/usermem/BUILD | 55 -- pkg/sentry/usermem/README.md | 31 -- pkg/sentry/usermem/access_type.go | 128 ----- pkg/sentry/usermem/addr.go | 108 ---- pkg/sentry/usermem/addr_range_seq_test.go | 197 ------- pkg/sentry/usermem/addr_range_seq_unsafe.go | 277 --------- pkg/sentry/usermem/bytes_io.go | 141 ----- pkg/sentry/usermem/bytes_io_unsafe.go | 47 -- pkg/sentry/usermem/usermem.go | 597 -------------------- pkg/sentry/usermem/usermem_arm64.go | 53 -- pkg/sentry/usermem/usermem_test.go | 424 -------------- pkg/sentry/usermem/usermem_unsafe.go | 27 - pkg/sentry/usermem/usermem_x86.go | 38 -- pkg/sentry/vfs/BUILD | 10 +- pkg/sentry/vfs/context.go | 2 +- pkg/sentry/vfs/device.go | 2 +- pkg/sentry/vfs/file_description.go | 4 +- pkg/sentry/vfs/file_description_impl_util.go | 4 +- pkg/sentry/vfs/file_description_impl_util_test.go | 6 +- pkg/sentry/vfs/filesystem.go | 2 +- pkg/sentry/vfs/filesystem_type.go | 2 +- pkg/sentry/vfs/mount.go | 2 +- pkg/sentry/vfs/pathname.go | 2 +- pkg/sentry/vfs/testutil.go | 2 +- pkg/sentry/vfs/vfs.go | 2 +- pkg/usermem/BUILD | 55 ++ pkg/usermem/README.md | 31 ++ pkg/usermem/access_type.go | 128 +++++ pkg/usermem/addr.go | 108 ++++ pkg/usermem/addr_range_seq_test.go | 197 +++++++ pkg/usermem/addr_range_seq_unsafe.go | 277 +++++++++ pkg/usermem/bytes_io.go | 141 +++++ pkg/usermem/bytes_io_unsafe.go | 47 ++ pkg/usermem/usermem.go | 597 ++++++++++++++++++++ pkg/usermem/usermem_arm64.go | 53 ++ pkg/usermem/usermem_test.go | 424 ++++++++++++++ pkg/usermem/usermem_unsafe.go | 27 + pkg/usermem/usermem_x86.go | 38 ++ runsc/boot/BUILD | 6 +- runsc/boot/fds.go | 2 +- runsc/boot/fs.go | 2 +- runsc/boot/loader_test.go | 2 +- runsc/boot/user.go | 4 +- runsc/boot/user_test.go | 2 +- tools/go_marshal/defs.bzl | 4 +- tools/go_marshal/gomarshal/generator.go | 4 +- tools/go_marshal/test/BUILD | 2 +- tools/go_marshal/test/benchmark_test.go | 2 +- 483 files changed, 6839 insertions(+), 6835 deletions(-) create mode 100644 pkg/context/BUILD create mode 100644 pkg/context/context.go create mode 100644 pkg/safecopy/BUILD create mode 100644 pkg/safecopy/LICENSE create mode 100644 pkg/safecopy/atomic_amd64.s create mode 100644 pkg/safecopy/atomic_arm64.s create mode 100644 pkg/safecopy/memclr_amd64.s create mode 100644 pkg/safecopy/memclr_arm64.s create mode 100644 pkg/safecopy/memcpy_amd64.s create mode 100644 pkg/safecopy/memcpy_arm64.s create mode 100644 pkg/safecopy/safecopy.go create mode 100644 pkg/safecopy/safecopy_test.go create mode 100644 pkg/safecopy/safecopy_unsafe.go create mode 100644 pkg/safecopy/sighandler_amd64.s create mode 100644 pkg/safecopy/sighandler_arm64.s create mode 100644 pkg/safemem/BUILD create mode 100644 pkg/safemem/block_unsafe.go create mode 100644 pkg/safemem/io.go create mode 100644 pkg/safemem/io_test.go create mode 100644 pkg/safemem/safemem.go create mode 100644 pkg/safemem/seq_test.go create mode 100644 pkg/safemem/seq_unsafe.go delete mode 100644 pkg/sentry/context/BUILD delete mode 100644 pkg/sentry/context/context.go delete mode 100644 pkg/sentry/context/contexttest/BUILD delete mode 100644 pkg/sentry/context/contexttest/contexttest.go create mode 100644 pkg/sentry/contexttest/BUILD create mode 100644 pkg/sentry/contexttest/contexttest.go delete mode 100644 pkg/sentry/platform/safecopy/BUILD delete mode 100644 pkg/sentry/platform/safecopy/LICENSE delete mode 100644 pkg/sentry/platform/safecopy/atomic_amd64.s delete mode 100644 pkg/sentry/platform/safecopy/atomic_arm64.s delete mode 100644 pkg/sentry/platform/safecopy/memclr_amd64.s delete mode 100644 pkg/sentry/platform/safecopy/memclr_arm64.s delete mode 100644 pkg/sentry/platform/safecopy/memcpy_amd64.s delete mode 100644 pkg/sentry/platform/safecopy/memcpy_arm64.s delete mode 100644 pkg/sentry/platform/safecopy/safecopy.go delete mode 100644 pkg/sentry/platform/safecopy/safecopy_test.go delete mode 100644 pkg/sentry/platform/safecopy/safecopy_unsafe.go delete mode 100644 pkg/sentry/platform/safecopy/sighandler_amd64.s delete mode 100644 pkg/sentry/platform/safecopy/sighandler_arm64.s delete mode 100644 pkg/sentry/safemem/BUILD delete mode 100644 pkg/sentry/safemem/block_unsafe.go delete mode 100644 pkg/sentry/safemem/io.go delete mode 100644 pkg/sentry/safemem/io_test.go delete mode 100644 pkg/sentry/safemem/safemem.go delete mode 100644 pkg/sentry/safemem/seq_test.go delete mode 100644 pkg/sentry/safemem/seq_unsafe.go delete mode 100644 pkg/sentry/usermem/BUILD delete mode 100644 pkg/sentry/usermem/README.md delete mode 100644 pkg/sentry/usermem/access_type.go delete mode 100644 pkg/sentry/usermem/addr.go delete mode 100644 pkg/sentry/usermem/addr_range_seq_test.go delete mode 100644 pkg/sentry/usermem/addr_range_seq_unsafe.go delete mode 100644 pkg/sentry/usermem/bytes_io.go delete mode 100644 pkg/sentry/usermem/bytes_io_unsafe.go delete mode 100644 pkg/sentry/usermem/usermem.go delete mode 100644 pkg/sentry/usermem/usermem_arm64.go delete mode 100644 pkg/sentry/usermem/usermem_test.go delete mode 100644 pkg/sentry/usermem/usermem_unsafe.go delete mode 100644 pkg/sentry/usermem/usermem_x86.go create mode 100644 pkg/usermem/BUILD create mode 100644 pkg/usermem/README.md create mode 100644 pkg/usermem/access_type.go create mode 100644 pkg/usermem/addr.go create mode 100644 pkg/usermem/addr_range_seq_test.go create mode 100644 pkg/usermem/addr_range_seq_unsafe.go create mode 100644 pkg/usermem/bytes_io.go create mode 100644 pkg/usermem/bytes_io_unsafe.go create mode 100644 pkg/usermem/usermem.go create mode 100644 pkg/usermem/usermem_arm64.go create mode 100644 pkg/usermem/usermem_test.go create mode 100644 pkg/usermem/usermem_unsafe.go create mode 100644 pkg/usermem/usermem_x86.go (limited to 'pkg/sentry/fs') diff --git a/pkg/abi/abi.go b/pkg/abi/abi.go index d56c481c9..e6be93c3a 100644 --- a/pkg/abi/abi.go +++ b/pkg/abi/abi.go @@ -39,3 +39,7 @@ func (o OS) String() string { return fmt.Sprintf("OS(%d)", o) } } + +// ABI is an interface that defines OS-specific interactions. +type ABI interface { +} diff --git a/pkg/context/BUILD b/pkg/context/BUILD new file mode 100644 index 000000000..239f31149 --- /dev/null +++ b/pkg/context/BUILD @@ -0,0 +1,13 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "context", + srcs = ["context.go"], + visibility = ["//:sandbox"], + deps = [ + "//pkg/amutex", + "//pkg/log", + ], +) diff --git a/pkg/context/context.go b/pkg/context/context.go new file mode 100644 index 000000000..23e009ef3 --- /dev/null +++ b/pkg/context/context.go @@ -0,0 +1,141 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package context defines an internal context type. +// +// The given Context conforms to the standard Go context, but mandates +// additional methods that are specific to the kernel internals. Note however, +// that the Context described by this package carries additional constraints +// regarding concurrent access and retaining beyond the scope of a call. +// +// See the Context type for complete details. +package context + +import ( + "context" + "time" + + "gvisor.dev/gvisor/pkg/amutex" + "gvisor.dev/gvisor/pkg/log" +) + +type contextID int + +// Globally accessible values from a context. These keys are defined in the +// context package to resolve dependency cycles by not requiring the caller to +// import packages usually required to get these information. +const ( + // CtxThreadGroupID is the current thread group ID when a context represents + // a task context. The value is represented as an int32. + CtxThreadGroupID contextID = iota +) + +// ThreadGroupIDFromContext returns the current thread group ID when ctx +// represents a task context. +func ThreadGroupIDFromContext(ctx Context) (tgid int32, ok bool) { + if tgid := ctx.Value(CtxThreadGroupID); tgid != nil { + return tgid.(int32), true + } + return 0, false +} + +// A Context represents a thread of execution (hereafter "goroutine" to reflect +// Go idiosyncrasy). It carries state associated with the goroutine across API +// boundaries. +// +// While Context exists for essentially the same reasons as Go's standard +// context.Context, the standard type represents the state of an operation +// rather than that of a goroutine. This is a critical distinction: +// +// - Unlike context.Context, which "may be passed to functions running in +// different goroutines", it is *not safe* to use the same Context in multiple +// concurrent goroutines. +// +// - It is *not safe* to retain a Context passed to a function beyond the scope +// of that function call. +// +// In both cases, values extracted from the Context should be used instead. +type Context interface { + log.Logger + amutex.Sleeper + context.Context + + // UninterruptibleSleepStart indicates the beginning of an uninterruptible + // sleep state (equivalent to Linux's TASK_UNINTERRUPTIBLE). If deactivate + // is true and the Context represents a Task, the Task's AddressSpace is + // deactivated. + UninterruptibleSleepStart(deactivate bool) + + // UninterruptibleSleepFinish indicates the end of an uninterruptible sleep + // state that was begun by a previous call to UninterruptibleSleepStart. If + // activate is true and the Context represents a Task, the Task's + // AddressSpace is activated. Normally activate is the same value as the + // deactivate parameter passed to UninterruptibleSleepStart. + UninterruptibleSleepFinish(activate bool) +} + +// NoopSleeper is a noop implementation of amutex.Sleeper and UninterruptibleSleep +// methods for anonymous embedding in other types that do not implement sleeps. +type NoopSleeper struct { + amutex.NoopSleeper +} + +// UninterruptibleSleepStart does nothing. +func (NoopSleeper) UninterruptibleSleepStart(bool) {} + +// UninterruptibleSleepFinish does nothing. +func (NoopSleeper) UninterruptibleSleepFinish(bool) {} + +// Deadline returns zero values, meaning no deadline. +func (NoopSleeper) Deadline() (time.Time, bool) { + return time.Time{}, false +} + +// Done returns nil. +func (NoopSleeper) Done() <-chan struct{} { + return nil +} + +// Err returns nil. +func (NoopSleeper) Err() error { + return nil +} + +// logContext implements basic logging. +type logContext struct { + log.Logger + NoopSleeper +} + +// Value implements Context.Value. +func (logContext) Value(key interface{}) interface{} { + return nil +} + +// bgContext is the context returned by context.Background. +var bgContext = &logContext{Logger: log.Log()} + +// Background returns an empty context using the default logger. +// +// Users should be wary of using a Background context. Please tag any use with +// FIXME(b/38173783) and a note to remove this use. +// +// Generally, one should use the Task as their context when available, or avoid +// having to use a context in places where a Task is unavailable. +// +// Using a Background context for tests is fine, as long as no values are +// needed from the context in the tested code paths. +func Background() Context { + return bgContext +} diff --git a/pkg/safecopy/BUILD b/pkg/safecopy/BUILD new file mode 100644 index 000000000..426ef30c9 --- /dev/null +++ b/pkg/safecopy/BUILD @@ -0,0 +1,29 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "safecopy", + srcs = [ + "atomic_amd64.s", + "atomic_arm64.s", + "memclr_amd64.s", + "memclr_arm64.s", + "memcpy_amd64.s", + "memcpy_arm64.s", + "safecopy.go", + "safecopy_unsafe.go", + "sighandler_amd64.s", + "sighandler_arm64.s", + ], + visibility = ["//:sandbox"], + deps = ["//pkg/syserror"], +) + +go_test( + name = "safecopy_test", + srcs = [ + "safecopy_test.go", + ], + library = ":safecopy", +) diff --git a/pkg/safecopy/LICENSE b/pkg/safecopy/LICENSE new file mode 100644 index 000000000..6a66aea5e --- /dev/null +++ b/pkg/safecopy/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkg/safecopy/atomic_amd64.s b/pkg/safecopy/atomic_amd64.s new file mode 100644 index 000000000..a0cd78f33 --- /dev/null +++ b/pkg/safecopy/atomic_amd64.s @@ -0,0 +1,136 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +// handleSwapUint32Fault returns the value stored in DI. Control is transferred +// to it when swapUint32 below receives SIGSEGV or SIGBUS, with the signal +// number stored in DI. +// +// It must have the same frame configuration as swapUint32 so that it can undo +// any potential call frame set up by the assembler. +TEXT handleSwapUint32Fault(SB), NOSPLIT, $0-24 + MOVL DI, sig+20(FP) + RET + +// swapUint32 atomically stores new into *addr and returns (the previous *addr +// value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the +// value of old is unspecified, and sig is the number of the signal that was +// received. +// +// Preconditions: addr must be aligned to a 4-byte boundary. +// +//func swapUint32(ptr unsafe.Pointer, new uint32) (old uint32, sig int32) +TEXT ·swapUint32(SB), NOSPLIT, $0-24 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleSwapUint32Fault will store a different value in this address. + MOVL $0, sig+20(FP) + + MOVQ addr+0(FP), DI + MOVL new+8(FP), AX + XCHGL AX, 0(DI) + MOVL AX, old+16(FP) + RET + +// handleSwapUint64Fault returns the value stored in DI. Control is transferred +// to it when swapUint64 below receives SIGSEGV or SIGBUS, with the signal +// number stored in DI. +// +// It must have the same frame configuration as swapUint64 so that it can undo +// any potential call frame set up by the assembler. +TEXT handleSwapUint64Fault(SB), NOSPLIT, $0-28 + MOVL DI, sig+24(FP) + RET + +// swapUint64 atomically stores new into *addr and returns (the previous *addr +// value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the +// value of old is unspecified, and sig is the number of the signal that was +// received. +// +// Preconditions: addr must be aligned to a 8-byte boundary. +// +//func swapUint64(ptr unsafe.Pointer, new uint64) (old uint64, sig int32) +TEXT ·swapUint64(SB), NOSPLIT, $0-28 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleSwapUint64Fault will store a different value in this address. + MOVL $0, sig+24(FP) + + MOVQ addr+0(FP), DI + MOVQ new+8(FP), AX + XCHGQ AX, 0(DI) + MOVQ AX, old+16(FP) + RET + +// handleCompareAndSwapUint32Fault returns the value stored in DI. Control is +// transferred to it when swapUint64 below receives SIGSEGV or SIGBUS, with the +// signal number stored in DI. +// +// It must have the same frame configuration as compareAndSwapUint32 so that it +// can undo any potential call frame set up by the assembler. +TEXT handleCompareAndSwapUint32Fault(SB), NOSPLIT, $0-24 + MOVL DI, sig+20(FP) + RET + +// compareAndSwapUint32 is like sync/atomic.CompareAndSwapUint32, but returns +// (the value previously stored at addr, 0). If a SIGSEGV or SIGBUS signal is +// received during the operation, the value of prev is unspecified, and sig is +// the number of the signal that was received. +// +// Preconditions: addr must be aligned to a 4-byte boundary. +// +//func compareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (prev uint32, sig int32) +TEXT ·compareAndSwapUint32(SB), NOSPLIT, $0-24 + // Store 0 as the returned signal number. If we run to completion, this is + // the value the caller will see; if a signal is received, + // handleCompareAndSwapUint32Fault will store a different value in this + // address. + MOVL $0, sig+20(FP) + + MOVQ addr+0(FP), DI + MOVL old+8(FP), AX + MOVL new+12(FP), DX + LOCK + CMPXCHGL DX, 0(DI) + MOVL AX, prev+16(FP) + RET + +// handleLoadUint32Fault returns the value stored in DI. Control is transferred +// to it when LoadUint32 below receives SIGSEGV or SIGBUS, with the signal +// number stored in DI. +// +// It must have the same frame configuration as loadUint32 so that it can undo +// any potential call frame set up by the assembler. +TEXT handleLoadUint32Fault(SB), NOSPLIT, $0-16 + MOVL DI, sig+12(FP) + RET + +// loadUint32 atomically loads *addr and returns it. If a SIGSEGV or SIGBUS +// signal is received, the value returned is unspecified, and sig is the number +// of the signal that was received. +// +// Preconditions: addr must be aligned to a 4-byte boundary. +// +//func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32) +TEXT ·loadUint32(SB), NOSPLIT, $0-16 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleLoadUint32Fault will store a different value in this address. + MOVL $0, sig+12(FP) + + MOVQ addr+0(FP), AX + MOVL (AX), BX + MOVL BX, val+8(FP) + RET diff --git a/pkg/safecopy/atomic_arm64.s b/pkg/safecopy/atomic_arm64.s new file mode 100644 index 000000000..d58ed71f7 --- /dev/null +++ b/pkg/safecopy/atomic_arm64.s @@ -0,0 +1,126 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// handleSwapUint32Fault returns the value stored in R1. Control is transferred +// to it when swapUint32 below receives SIGSEGV or SIGBUS, with the signal +// number stored in R1. +// +// It must have the same frame configuration as swapUint32 so that it can undo +// any potential call frame set up by the assembler. +TEXT handleSwapUint32Fault(SB), NOSPLIT, $0-24 + MOVW R1, sig+20(FP) + RET + +// See the corresponding doc in safecopy_unsafe.go +// +// The code is derived from Go source runtime/internal/atomic.Xchg. +// +//func swapUint32(ptr unsafe.Pointer, new uint32) (old uint32, sig int32) +TEXT ·swapUint32(SB), NOSPLIT, $0-24 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleSwapUint32Fault will store a different value in this address. + MOVW $0, sig+20(FP) +again: + MOVD addr+0(FP), R0 + MOVW new+8(FP), R1 + LDAXRW (R0), R2 + STLXRW R1, (R0), R3 + CBNZ R3, again + MOVW R2, old+16(FP) + RET + +// handleSwapUint64Fault returns the value stored in R1. Control is transferred +// to it when swapUint64 below receives SIGSEGV or SIGBUS, with the signal +// number stored in R1. +// +// It must have the same frame configuration as swapUint64 so that it can undo +// any potential call frame set up by the assembler. +TEXT handleSwapUint64Fault(SB), NOSPLIT, $0-28 + MOVW R1, sig+24(FP) + RET + +// See the corresponding doc in safecopy_unsafe.go +// +// The code is derived from Go source runtime/internal/atomic.Xchg64. +// +//func swapUint64(ptr unsafe.Pointer, new uint64) (old uint64, sig int32) +TEXT ·swapUint64(SB), NOSPLIT, $0-28 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleSwapUint64Fault will store a different value in this address. + MOVW $0, sig+24(FP) +again: + MOVD addr+0(FP), R0 + MOVD new+8(FP), R1 + LDAXR (R0), R2 + STLXR R1, (R0), R3 + CBNZ R3, again + MOVD R2, old+16(FP) + RET + +// handleCompareAndSwapUint32Fault returns the value stored in R1. Control is +// transferred to it when compareAndSwapUint32 below receives SIGSEGV or SIGBUS, +// with the signal number stored in R1. +// +// It must have the same frame configuration as compareAndSwapUint32 so that it +// can undo any potential call frame set up by the assembler. +TEXT handleCompareAndSwapUint32Fault(SB), NOSPLIT, $0-24 + MOVW R1, sig+20(FP) + RET + +// See the corresponding doc in safecopy_unsafe.go +// +// The code is derived from Go source runtime/internal/atomic.Cas. +// +//func compareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (prev uint32, sig int32) +TEXT ·compareAndSwapUint32(SB), NOSPLIT, $0-24 + // Store 0 as the returned signal number. If we run to completion, this is + // the value the caller will see; if a signal is received, + // handleCompareAndSwapUint32Fault will store a different value in this + // address. + MOVW $0, sig+20(FP) + + MOVD addr+0(FP), R0 + MOVW old+8(FP), R1 + MOVW new+12(FP), R2 +again: + LDAXRW (R0), R3 + CMPW R1, R3 + BNE done + STLXRW R2, (R0), R4 + CBNZ R4, again +done: + MOVW R3, prev+16(FP) + RET + +// handleLoadUint32Fault returns the value stored in DI. Control is transferred +// to it when LoadUint32 below receives SIGSEGV or SIGBUS, with the signal +// number stored in DI. +// +// It must have the same frame configuration as loadUint32 so that it can undo +// any potential call frame set up by the assembler. +TEXT handleLoadUint32Fault(SB), NOSPLIT, $0-16 + MOVW R1, sig+12(FP) + RET + +// loadUint32 atomically loads *addr and returns it. If a SIGSEGV or SIGBUS +// signal is received, the value returned is unspecified, and sig is the number +// of the signal that was received. +// +// Preconditions: addr must be aligned to a 4-byte boundary. +// +//func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32) +TEXT ·loadUint32(SB), NOSPLIT, $0-16 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleLoadUint32Fault will store a different value in this address. + MOVW $0, sig+12(FP) + + MOVD addr+0(FP), R0 + LDARW (R0), R1 + MOVW R1, val+8(FP) + RET diff --git a/pkg/safecopy/memclr_amd64.s b/pkg/safecopy/memclr_amd64.s new file mode 100644 index 000000000..64cf32f05 --- /dev/null +++ b/pkg/safecopy/memclr_amd64.s @@ -0,0 +1,147 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// handleMemclrFault returns (the value stored in AX, the value stored in DI). +// Control is transferred to it when memclr below receives SIGSEGV or SIGBUS, +// with the faulting address stored in AX and the signal number stored in DI. +// +// It must have the same frame configuration as memclr so that it can undo any +// potential call frame set up by the assembler. +TEXT handleMemclrFault(SB), NOSPLIT, $0-28 + MOVQ AX, addr+16(FP) + MOVL DI, sig+24(FP) + RET + +// memclr sets the n bytes following ptr to zeroes. If a SIGSEGV or SIGBUS +// signal is received during the write, it returns the address that caused the +// fault and the number of the signal that was received. Otherwise, it returns +// an unspecified address and a signal number of 0. +// +// Data is written in order, such that if a fault happens at address p, it is +// safe to assume that all data before p-maxRegisterSize has already been +// successfully written. +// +// The code is derived from runtime.memclrNoHeapPointers. +// +// func memclr(ptr unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) +TEXT ·memclr(SB), NOSPLIT, $0-28 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleMemclrFault will store a different value in this address. + MOVL $0, sig+24(FP) + + MOVQ ptr+0(FP), DI + MOVQ n+8(FP), BX + XORQ AX, AX + + // MOVOU seems always faster than REP STOSQ. +tail: + TESTQ BX, BX + JEQ _0 + CMPQ BX, $2 + JBE _1or2 + CMPQ BX, $4 + JBE _3or4 + CMPQ BX, $8 + JB _5through7 + JE _8 + CMPQ BX, $16 + JBE _9through16 + PXOR X0, X0 + CMPQ BX, $32 + JBE _17through32 + CMPQ BX, $64 + JBE _33through64 + CMPQ BX, $128 + JBE _65through128 + CMPQ BX, $256 + JBE _129through256 + // TODO: use branch table and BSR to make this just a single dispatch + // TODO: for really big clears, use MOVNTDQ, even without AVX2. + +loop: + MOVOU X0, 0(DI) + MOVOU X0, 16(DI) + MOVOU X0, 32(DI) + MOVOU X0, 48(DI) + MOVOU X0, 64(DI) + MOVOU X0, 80(DI) + MOVOU X0, 96(DI) + MOVOU X0, 112(DI) + MOVOU X0, 128(DI) + MOVOU X0, 144(DI) + MOVOU X0, 160(DI) + MOVOU X0, 176(DI) + MOVOU X0, 192(DI) + MOVOU X0, 208(DI) + MOVOU X0, 224(DI) + MOVOU X0, 240(DI) + SUBQ $256, BX + ADDQ $256, DI + CMPQ BX, $256 + JAE loop + JMP tail + +_1or2: + MOVB AX, (DI) + MOVB AX, -1(DI)(BX*1) + RET +_0: + RET +_3or4: + MOVW AX, (DI) + MOVW AX, -2(DI)(BX*1) + RET +_5through7: + MOVL AX, (DI) + MOVL AX, -4(DI)(BX*1) + RET +_8: + // We need a separate case for 8 to make sure we clear pointers atomically. + MOVQ AX, (DI) + RET +_9through16: + MOVQ AX, (DI) + MOVQ AX, -8(DI)(BX*1) + RET +_17through32: + MOVOU X0, (DI) + MOVOU X0, -16(DI)(BX*1) + RET +_33through64: + MOVOU X0, (DI) + MOVOU X0, 16(DI) + MOVOU X0, -32(DI)(BX*1) + MOVOU X0, -16(DI)(BX*1) + RET +_65through128: + MOVOU X0, (DI) + MOVOU X0, 16(DI) + MOVOU X0, 32(DI) + MOVOU X0, 48(DI) + MOVOU X0, -64(DI)(BX*1) + MOVOU X0, -48(DI)(BX*1) + MOVOU X0, -32(DI)(BX*1) + MOVOU X0, -16(DI)(BX*1) + RET +_129through256: + MOVOU X0, (DI) + MOVOU X0, 16(DI) + MOVOU X0, 32(DI) + MOVOU X0, 48(DI) + MOVOU X0, 64(DI) + MOVOU X0, 80(DI) + MOVOU X0, 96(DI) + MOVOU X0, 112(DI) + MOVOU X0, -128(DI)(BX*1) + MOVOU X0, -112(DI)(BX*1) + MOVOU X0, -96(DI)(BX*1) + MOVOU X0, -80(DI)(BX*1) + MOVOU X0, -64(DI)(BX*1) + MOVOU X0, -48(DI)(BX*1) + MOVOU X0, -32(DI)(BX*1) + MOVOU X0, -16(DI)(BX*1) + RET diff --git a/pkg/safecopy/memclr_arm64.s b/pkg/safecopy/memclr_arm64.s new file mode 100644 index 000000000..7361b9067 --- /dev/null +++ b/pkg/safecopy/memclr_arm64.s @@ -0,0 +1,74 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// handleMemclrFault returns (the value stored in R0, the value stored in R1). +// Control is transferred to it when memclr below receives SIGSEGV or SIGBUS, +// with the faulting address stored in R0 and the signal number stored in R1. +// +// It must have the same frame configuration as memclr so that it can undo any +// potential call frame set up by the assembler. +TEXT handleMemclrFault(SB), NOSPLIT, $0-28 + MOVD R0, addr+16(FP) + MOVW R1, sig+24(FP) + RET + +// See the corresponding doc in safecopy_unsafe.go +// +// The code is derived from runtime.memclrNoHeapPointers. +// +// func memclr(ptr unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) +TEXT ·memclr(SB), NOSPLIT, $0-28 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleMemclrFault will store a different value in this address. + MOVW $0, sig+24(FP) + MOVD ptr+0(FP), R0 + MOVD n+8(FP), R1 + + // If size is less than 16 bytes, use tail_zero to zero what remains + CMP $16, R1 + BLT tail_zero + // Get buffer offset into 16 byte aligned address for better performance + ANDS $15, R0, ZR + BNE unaligned_to_16 +aligned_to_16: + LSR $4, R1, R2 +zero_by_16: + STP.P (ZR, ZR), 16(R0) // Store pair with post index. + SUBS $1, R2, R2 + BNE zero_by_16 + ANDS $15, R1, R1 + BEQ end + + // Zero buffer with size=R1 < 16 +tail_zero: + TBZ $3, R1, tail_zero_4 + MOVD.P ZR, 8(R0) +tail_zero_4: + TBZ $2, R1, tail_zero_2 + MOVW.P ZR, 4(R0) +tail_zero_2: + TBZ $1, R1, tail_zero_1 + MOVH.P ZR, 2(R0) +tail_zero_1: + TBZ $0, R1, end + MOVB ZR, (R0) +end: + RET + +unaligned_to_16: + MOVD R0, R2 +head_loop: + MOVBU.P ZR, 1(R0) + ANDS $15, R0, ZR + BNE head_loop + // Adjust length for what remains + SUB R2, R0, R3 + SUB R3, R1 + // If size is less than 16 bytes, use tail_zero to zero what remains + CMP $16, R1 + BLT tail_zero + B aligned_to_16 diff --git a/pkg/safecopy/memcpy_amd64.s b/pkg/safecopy/memcpy_amd64.s new file mode 100644 index 000000000..129691d68 --- /dev/null +++ b/pkg/safecopy/memcpy_amd64.s @@ -0,0 +1,250 @@ +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. +// Portions Copyright 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "textflag.h" + +// handleMemcpyFault returns (the value stored in AX, the value stored in DI). +// Control is transferred to it when memcpy below receives SIGSEGV or SIGBUS, +// with the faulting address stored in AX and the signal number stored in DI. +// +// It must have the same frame configuration as memcpy so that it can undo any +// potential call frame set up by the assembler. +TEXT handleMemcpyFault(SB), NOSPLIT, $0-36 + MOVQ AX, addr+24(FP) + MOVL DI, sig+32(FP) + RET + +// memcpy copies data from src to dst. If a SIGSEGV or SIGBUS signal is received +// during the copy, it returns the address that caused the fault and the number +// of the signal that was received. Otherwise, it returns an unspecified address +// and a signal number of 0. +// +// Data is copied in order, such that if a fault happens at address p, it is +// safe to assume that all data before p-maxRegisterSize has already been +// successfully copied. +// +// The code is derived from the forward copying part of runtime.memmove. +// +// func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) +TEXT ·memcpy(SB), NOSPLIT, $0-36 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleMemcpyFault will store a different value in this address. + MOVL $0, sig+32(FP) + + MOVQ to+0(FP), DI + MOVQ from+8(FP), SI + MOVQ n+16(FP), BX + + // REP instructions have a high startup cost, so we handle small sizes + // with some straightline code. The REP MOVSQ instruction is really fast + // for large sizes. The cutover is approximately 2K. +tail: + // move_129through256 or smaller work whether or not the source and the + // destination memory regions overlap because they load all data into + // registers before writing it back. move_256through2048 on the other + // hand can be used only when the memory regions don't overlap or the copy + // direction is forward. + TESTQ BX, BX + JEQ move_0 + CMPQ BX, $2 + JBE move_1or2 + CMPQ BX, $4 + JBE move_3or4 + CMPQ BX, $8 + JB move_5through7 + JE move_8 + CMPQ BX, $16 + JBE move_9through16 + CMPQ BX, $32 + JBE move_17through32 + CMPQ BX, $64 + JBE move_33through64 + CMPQ BX, $128 + JBE move_65through128 + CMPQ BX, $256 + JBE move_129through256 + // TODO: use branch table and BSR to make this just a single dispatch + +/* + * forward copy loop + */ + CMPQ BX, $2048 + JLS move_256through2048 + + // Check alignment + MOVL SI, AX + ORL DI, AX + TESTL $7, AX + JEQ fwdBy8 + + // Do 1 byte at a time + MOVQ BX, CX + REP; MOVSB + RET + +fwdBy8: + // Do 8 bytes at a time + MOVQ BX, CX + SHRQ $3, CX + ANDQ $7, BX + REP; MOVSQ + JMP tail + +move_1or2: + MOVB (SI), AX + MOVB AX, (DI) + MOVB -1(SI)(BX*1), CX + MOVB CX, -1(DI)(BX*1) + RET +move_0: + RET +move_3or4: + MOVW (SI), AX + MOVW AX, (DI) + MOVW -2(SI)(BX*1), CX + MOVW CX, -2(DI)(BX*1) + RET +move_5through7: + MOVL (SI), AX + MOVL AX, (DI) + MOVL -4(SI)(BX*1), CX + MOVL CX, -4(DI)(BX*1) + RET +move_8: + // We need a separate case for 8 to make sure we write pointers atomically. + MOVQ (SI), AX + MOVQ AX, (DI) + RET +move_9through16: + MOVQ (SI), AX + MOVQ AX, (DI) + MOVQ -8(SI)(BX*1), CX + MOVQ CX, -8(DI)(BX*1) + RET +move_17through32: + MOVOU (SI), X0 + MOVOU X0, (DI) + MOVOU -16(SI)(BX*1), X1 + MOVOU X1, -16(DI)(BX*1) + RET +move_33through64: + MOVOU (SI), X0 + MOVOU X0, (DI) + MOVOU 16(SI), X1 + MOVOU X1, 16(DI) + MOVOU -32(SI)(BX*1), X2 + MOVOU X2, -32(DI)(BX*1) + MOVOU -16(SI)(BX*1), X3 + MOVOU X3, -16(DI)(BX*1) + RET +move_65through128: + MOVOU (SI), X0 + MOVOU X0, (DI) + MOVOU 16(SI), X1 + MOVOU X1, 16(DI) + MOVOU 32(SI), X2 + MOVOU X2, 32(DI) + MOVOU 48(SI), X3 + MOVOU X3, 48(DI) + MOVOU -64(SI)(BX*1), X4 + MOVOU X4, -64(DI)(BX*1) + MOVOU -48(SI)(BX*1), X5 + MOVOU X5, -48(DI)(BX*1) + MOVOU -32(SI)(BX*1), X6 + MOVOU X6, -32(DI)(BX*1) + MOVOU -16(SI)(BX*1), X7 + MOVOU X7, -16(DI)(BX*1) + RET +move_129through256: + MOVOU (SI), X0 + MOVOU X0, (DI) + MOVOU 16(SI), X1 + MOVOU X1, 16(DI) + MOVOU 32(SI), X2 + MOVOU X2, 32(DI) + MOVOU 48(SI), X3 + MOVOU X3, 48(DI) + MOVOU 64(SI), X4 + MOVOU X4, 64(DI) + MOVOU 80(SI), X5 + MOVOU X5, 80(DI) + MOVOU 96(SI), X6 + MOVOU X6, 96(DI) + MOVOU 112(SI), X7 + MOVOU X7, 112(DI) + MOVOU -128(SI)(BX*1), X8 + MOVOU X8, -128(DI)(BX*1) + MOVOU -112(SI)(BX*1), X9 + MOVOU X9, -112(DI)(BX*1) + MOVOU -96(SI)(BX*1), X10 + MOVOU X10, -96(DI)(BX*1) + MOVOU -80(SI)(BX*1), X11 + MOVOU X11, -80(DI)(BX*1) + MOVOU -64(SI)(BX*1), X12 + MOVOU X12, -64(DI)(BX*1) + MOVOU -48(SI)(BX*1), X13 + MOVOU X13, -48(DI)(BX*1) + MOVOU -32(SI)(BX*1), X14 + MOVOU X14, -32(DI)(BX*1) + MOVOU -16(SI)(BX*1), X15 + MOVOU X15, -16(DI)(BX*1) + RET +move_256through2048: + SUBQ $256, BX + MOVOU (SI), X0 + MOVOU X0, (DI) + MOVOU 16(SI), X1 + MOVOU X1, 16(DI) + MOVOU 32(SI), X2 + MOVOU X2, 32(DI) + MOVOU 48(SI), X3 + MOVOU X3, 48(DI) + MOVOU 64(SI), X4 + MOVOU X4, 64(DI) + MOVOU 80(SI), X5 + MOVOU X5, 80(DI) + MOVOU 96(SI), X6 + MOVOU X6, 96(DI) + MOVOU 112(SI), X7 + MOVOU X7, 112(DI) + MOVOU 128(SI), X8 + MOVOU X8, 128(DI) + MOVOU 144(SI), X9 + MOVOU X9, 144(DI) + MOVOU 160(SI), X10 + MOVOU X10, 160(DI) + MOVOU 176(SI), X11 + MOVOU X11, 176(DI) + MOVOU 192(SI), X12 + MOVOU X12, 192(DI) + MOVOU 208(SI), X13 + MOVOU X13, 208(DI) + MOVOU 224(SI), X14 + MOVOU X14, 224(DI) + MOVOU 240(SI), X15 + MOVOU X15, 240(DI) + CMPQ BX, $256 + LEAQ 256(SI), SI + LEAQ 256(DI), DI + JGE move_256through2048 + JMP tail diff --git a/pkg/safecopy/memcpy_arm64.s b/pkg/safecopy/memcpy_arm64.s new file mode 100644 index 000000000..e7e541565 --- /dev/null +++ b/pkg/safecopy/memcpy_arm64.s @@ -0,0 +1,78 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// handleMemcpyFault returns (the value stored in R0, the value stored in R1). +// Control is transferred to it when memcpy below receives SIGSEGV or SIGBUS, +// with the faulting address stored in R0 and the signal number stored in R1. +// +// It must have the same frame configuration as memcpy so that it can undo any +// potential call frame set up by the assembler. +TEXT handleMemcpyFault(SB), NOSPLIT, $0-36 + MOVD R0, addr+24(FP) + MOVW R1, sig+32(FP) + RET + +// memcpy copies data from src to dst. If a SIGSEGV or SIGBUS signal is received +// during the copy, it returns the address that caused the fault and the number +// of the signal that was received. Otherwise, it returns an unspecified address +// and a signal number of 0. +// +// Data is copied in order, such that if a fault happens at address p, it is +// safe to assume that all data before p-maxRegisterSize has already been +// successfully copied. +// +// The code is derived from the Go source runtime.memmove. +// +// func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) +TEXT ·memcpy(SB), NOSPLIT, $-8-36 + // Store 0 as the returned signal number. If we run to completion, + // this is the value the caller will see; if a signal is received, + // handleMemcpyFault will store a different value in this address. + MOVW $0, sig+32(FP) + + MOVD to+0(FP), R3 + MOVD from+8(FP), R4 + MOVD n+16(FP), R5 + CMP $0, R5 + BNE check + RET + +check: + AND $~7, R5, R7 // R7 is N&~7. + SUB R7, R5, R6 // R6 is N&7. + + // Copying forward proceeds by copying R7/8 words then copying R6 bytes. + // R3 and R4 are advanced as we copy. + + // (There may be implementations of armv8 where copying by bytes until + // at least one of source or dest is word aligned is a worthwhile + // optimization, but the on the one tested so far (xgene) it did not + // make a significance difference.) + + CMP $0, R7 // Do we need to do any word-by-word copying? + BEQ noforwardlarge + ADD R3, R7, R9 // R9 points just past where we copy by word. + +forwardlargeloop: + MOVD.P 8(R4), R8 // R8 is just a scratch register. + MOVD.P R8, 8(R3) + CMP R3, R9 + BNE forwardlargeloop + +noforwardlarge: + CMP $0, R6 // Do we need to do any byte-by-byte copying? + BNE forwardtail + RET + +forwardtail: + ADD R3, R6, R9 // R9 points just past the destination memory. + +forwardtailloop: + MOVBU.P 1(R4), R8 + MOVBU.P R8, 1(R3) + CMP R3, R9 + BNE forwardtailloop + RET diff --git a/pkg/safecopy/safecopy.go b/pkg/safecopy/safecopy.go new file mode 100644 index 000000000..2fb7e5809 --- /dev/null +++ b/pkg/safecopy/safecopy.go @@ -0,0 +1,144 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package safecopy provides an efficient implementation of functions to access +// memory that may result in SIGSEGV or SIGBUS being sent to the accessor. +package safecopy + +import ( + "fmt" + "reflect" + "runtime" + "syscall" + + "gvisor.dev/gvisor/pkg/syserror" +) + +// SegvError is returned when a safecopy function receives SIGSEGV. +type SegvError struct { + // Addr is the address at which the SIGSEGV occurred. + Addr uintptr +} + +// Error implements error.Error. +func (e SegvError) Error() string { + return fmt.Sprintf("SIGSEGV at %#x", e.Addr) +} + +// BusError is returned when a safecopy function receives SIGBUS. +type BusError struct { + // Addr is the address at which the SIGBUS occurred. + Addr uintptr +} + +// Error implements error.Error. +func (e BusError) Error() string { + return fmt.Sprintf("SIGBUS at %#x", e.Addr) +} + +// AlignmentError is returned when a safecopy function is passed an address +// that does not meet alignment requirements. +type AlignmentError struct { + // Addr is the invalid address. + Addr uintptr + + // Alignment is the required alignment. + Alignment uintptr +} + +// Error implements error.Error. +func (e AlignmentError) Error() string { + return fmt.Sprintf("address %#x is not aligned to a %d-byte boundary", e.Addr, e.Alignment) +} + +var ( + // The begin and end addresses below are for the functions that are + // checked by the signal handler. + memcpyBegin uintptr + memcpyEnd uintptr + memclrBegin uintptr + memclrEnd uintptr + swapUint32Begin uintptr + swapUint32End uintptr + swapUint64Begin uintptr + swapUint64End uintptr + compareAndSwapUint32Begin uintptr + compareAndSwapUint32End uintptr + loadUint32Begin uintptr + loadUint32End uintptr + + // savedSigSegVHandler is a pointer to the SIGSEGV handler that was + // configured before we replaced it with our own. We still call into it + // when we get a SIGSEGV that is not interesting to us. + savedSigSegVHandler uintptr + + // same a above, but for SIGBUS signals. + savedSigBusHandler uintptr +) + +// signalHandler is our replacement signal handler for SIGSEGV and SIGBUS +// signals. +func signalHandler() + +// FindEndAddress returns the end address (one byte beyond the last) of the +// function that contains the specified address (begin). +func FindEndAddress(begin uintptr) uintptr { + f := runtime.FuncForPC(begin) + if f != nil { + for p := begin; ; p++ { + g := runtime.FuncForPC(p) + if f != g { + return p + } + } + } + return begin +} + +// initializeAddresses initializes the addresses used by the signal handler. +func initializeAddresses() { + // The following functions are written in assembly language, so they won't + // be inlined by the existing compiler/linker. Tests will fail if this + // assumption is violated. + memcpyBegin = reflect.ValueOf(memcpy).Pointer() + memcpyEnd = FindEndAddress(memcpyBegin) + memclrBegin = reflect.ValueOf(memclr).Pointer() + memclrEnd = FindEndAddress(memclrBegin) + swapUint32Begin = reflect.ValueOf(swapUint32).Pointer() + swapUint32End = FindEndAddress(swapUint32Begin) + swapUint64Begin = reflect.ValueOf(swapUint64).Pointer() + swapUint64End = FindEndAddress(swapUint64Begin) + compareAndSwapUint32Begin = reflect.ValueOf(compareAndSwapUint32).Pointer() + compareAndSwapUint32End = FindEndAddress(compareAndSwapUint32Begin) + loadUint32Begin = reflect.ValueOf(loadUint32).Pointer() + loadUint32End = FindEndAddress(loadUint32Begin) +} + +func init() { + initializeAddresses() + if err := ReplaceSignalHandler(syscall.SIGSEGV, reflect.ValueOf(signalHandler).Pointer(), &savedSigSegVHandler); err != nil { + panic(fmt.Sprintf("Unable to set handler for SIGSEGV: %v", err)) + } + if err := ReplaceSignalHandler(syscall.SIGBUS, reflect.ValueOf(signalHandler).Pointer(), &savedSigBusHandler); err != nil { + panic(fmt.Sprintf("Unable to set handler for SIGBUS: %v", err)) + } + syserror.AddErrorUnwrapper(func(e error) (syscall.Errno, bool) { + switch e.(type) { + case SegvError, BusError, AlignmentError: + return syscall.EFAULT, true + default: + return 0, false + } + }) +} diff --git a/pkg/safecopy/safecopy_test.go b/pkg/safecopy/safecopy_test.go new file mode 100644 index 000000000..5818f7f9b --- /dev/null +++ b/pkg/safecopy/safecopy_test.go @@ -0,0 +1,617 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package safecopy + +import ( + "bytes" + "fmt" + "io/ioutil" + "math/rand" + "os" + "runtime/debug" + "syscall" + "testing" + "unsafe" +) + +// Size of a page in bytes. Cloned from usermem.PageSize to avoid a circular +// dependency. +const pageSize = 4096 + +func initRandom(b []byte) { + for i := range b { + b[i] = byte(rand.Intn(256)) + } +} + +func randBuf(size int) []byte { + b := make([]byte, size) + initRandom(b) + return b +} + +func TestCopyInSuccess(t *testing.T) { + // Test that CopyIn does not return an error when all pages are accessible. + const bufLen = 8192 + a := randBuf(bufLen) + b := make([]byte, bufLen) + + n, err := CopyIn(b, unsafe.Pointer(&a[0])) + if n != bufLen { + t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) + } + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if !bytes.Equal(a, b) { + t.Errorf("Buffers are not equal when they should be: %v %v", a, b) + } +} + +func TestCopyOutSuccess(t *testing.T) { + // Test that CopyOut does not return an error when all pages are + // accessible. + const bufLen = 8192 + a := randBuf(bufLen) + b := make([]byte, bufLen) + + n, err := CopyOut(unsafe.Pointer(&b[0]), a) + if n != bufLen { + t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) + } + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if !bytes.Equal(a, b) { + t.Errorf("Buffers are not equal when they should be: %v %v", a, b) + } +} + +func TestCopySuccess(t *testing.T) { + // Test that Copy does not return an error when all pages are accessible. + const bufLen = 8192 + a := randBuf(bufLen) + b := make([]byte, bufLen) + + n, err := Copy(unsafe.Pointer(&b[0]), unsafe.Pointer(&a[0]), bufLen) + if n != bufLen { + t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) + } + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if !bytes.Equal(a, b) { + t.Errorf("Buffers are not equal when they should be: %v %v", a, b) + } +} + +func TestZeroOutSuccess(t *testing.T) { + // Test that ZeroOut does not return an error when all pages are + // accessible. + const bufLen = 8192 + a := make([]byte, bufLen) + b := randBuf(bufLen) + + n, err := ZeroOut(unsafe.Pointer(&b[0]), bufLen) + if n != bufLen { + t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) + } + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if !bytes.Equal(a, b) { + t.Errorf("Buffers are not equal when they should be: %v %v", a, b) + } +} + +func TestSwapUint32Success(t *testing.T) { + // Test that SwapUint32 does not return an error when the page is + // accessible. + before := uint32(rand.Int31()) + after := uint32(rand.Int31()) + val := before + + old, err := SwapUint32(unsafe.Pointer(&val), after) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if old != before { + t.Errorf("Unexpected old value: got %v, want %v", old, before) + } + if val != after { + t.Errorf("Unexpected new value: got %v, want %v", val, after) + } +} + +func TestSwapUint32AlignmentError(t *testing.T) { + // Test that SwapUint32 returns an AlignmentError when passed an unaligned + // address. + data := new(struct{ val uint64 }) + addr := uintptr(unsafe.Pointer(&data.val)) + 1 + want := AlignmentError{Addr: addr, Alignment: 4} + if _, err := SwapUint32(unsafe.Pointer(addr), 1); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } +} + +func TestSwapUint64Success(t *testing.T) { + // Test that SwapUint64 does not return an error when the page is + // accessible. + before := uint64(rand.Int63()) + after := uint64(rand.Int63()) + // "The first word in ... an allocated struct or slice can be relied upon + // to be 64-bit aligned." - sync/atomic docs + data := new(struct{ val uint64 }) + data.val = before + + old, err := SwapUint64(unsafe.Pointer(&data.val), after) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if old != before { + t.Errorf("Unexpected old value: got %v, want %v", old, before) + } + if data.val != after { + t.Errorf("Unexpected new value: got %v, want %v", data.val, after) + } +} + +func TestSwapUint64AlignmentError(t *testing.T) { + // Test that SwapUint64 returns an AlignmentError when passed an unaligned + // address. + data := new(struct{ val1, val2 uint64 }) + addr := uintptr(unsafe.Pointer(&data.val1)) + 1 + want := AlignmentError{Addr: addr, Alignment: 8} + if _, err := SwapUint64(unsafe.Pointer(addr), 1); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } +} + +func TestCompareAndSwapUint32Success(t *testing.T) { + // Test that CompareAndSwapUint32 does not return an error when the page is + // accessible. + before := uint32(rand.Int31()) + after := uint32(rand.Int31()) + val := before + + old, err := CompareAndSwapUint32(unsafe.Pointer(&val), before, after) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if old != before { + t.Errorf("Unexpected old value: got %v, want %v", old, before) + } + if val != after { + t.Errorf("Unexpected new value: got %v, want %v", val, after) + } +} + +func TestCompareAndSwapUint32AlignmentError(t *testing.T) { + // Test that CompareAndSwapUint32 returns an AlignmentError when passed an + // unaligned address. + data := new(struct{ val uint64 }) + addr := uintptr(unsafe.Pointer(&data.val)) + 1 + want := AlignmentError{Addr: addr, Alignment: 4} + if _, err := CompareAndSwapUint32(unsafe.Pointer(addr), 0, 1); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } +} + +// withSegvErrorTestMapping calls fn with a two-page mapping. The first page +// contains random data, and the second page generates SIGSEGV when accessed. +func withSegvErrorTestMapping(t *testing.T, fn func(m []byte)) { + mapping, err := syscall.Mmap(-1, 0, 2*pageSize, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_ANONYMOUS|syscall.MAP_PRIVATE) + if err != nil { + t.Fatalf("Mmap failed: %v", err) + } + defer syscall.Munmap(mapping) + if err := syscall.Mprotect(mapping[pageSize:], syscall.PROT_NONE); err != nil { + t.Fatalf("Mprotect failed: %v", err) + } + initRandom(mapping[:pageSize]) + + fn(mapping) +} + +// withBusErrorTestMapping calls fn with a two-page mapping. The first page +// contains random data, and the second page generates SIGBUS when accessed. +func withBusErrorTestMapping(t *testing.T, fn func(m []byte)) { + f, err := ioutil.TempFile("", "sigbus_test") + if err != nil { + t.Fatalf("TempFile failed: %v", err) + } + defer f.Close() + if err := f.Truncate(pageSize); err != nil { + t.Fatalf("Truncate failed: %v", err) + } + mapping, err := syscall.Mmap(int(f.Fd()), 0, 2*pageSize, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) + if err != nil { + t.Fatalf("Mmap failed: %v", err) + } + defer syscall.Munmap(mapping) + initRandom(mapping[:pageSize]) + + fn(mapping) +} + +func TestCopyInSegvError(t *testing.T) { + // Test that CopyIn returns a SegvError when reaching a page that signals + // SIGSEGV. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { + withSegvErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + dst := randBuf(pageSize) + n, err := CopyIn(dst, src) + if n != bytesBeforeFault { + t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) + } + if want := (SegvError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { + t.Errorf("Buffers are not equal when they should be: %v %v", got, want) + } + }) + }) + } +} + +func TestCopyInBusError(t *testing.T) { + // Test that CopyIn returns a BusError when reaching a page that signals + // SIGBUS. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { + withBusErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + dst := randBuf(pageSize) + n, err := CopyIn(dst, src) + if n != bytesBeforeFault { + t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) + } + if want := (BusError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { + t.Errorf("Buffers are not equal when they should be: %v %v", got, want) + } + }) + }) + } +} + +func TestCopyOutSegvError(t *testing.T) { + // Test that CopyOut returns a SegvError when reaching a page that signals + // SIGSEGV. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { + withSegvErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + src := randBuf(pageSize) + n, err := CopyOut(dst, src) + if n != bytesBeforeFault { + t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) + } + if want := (SegvError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { + t.Errorf("Buffers are not equal when they should be: %v %v", got, want) + } + }) + }) + } +} + +func TestCopyOutBusError(t *testing.T) { + // Test that CopyOut returns a BusError when reaching a page that signals + // SIGBUS. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { + withBusErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + src := randBuf(pageSize) + n, err := CopyOut(dst, src) + if n != bytesBeforeFault { + t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) + } + if want := (BusError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { + t.Errorf("Buffers are not equal when they should be: %v %v", got, want) + } + }) + }) + } +} + +func TestCopySourceSegvError(t *testing.T) { + // Test that Copy returns a SegvError when copying from a page that signals + // SIGSEGV. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { + withSegvErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + dst := randBuf(pageSize) + n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize) + if n != uintptr(bytesBeforeFault) { + t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) + } + if want := (SegvError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { + t.Errorf("Buffers are not equal when they should be: %v %v", got, want) + } + }) + }) + } +} + +func TestCopySourceBusError(t *testing.T) { + // Test that Copy returns a BusError when copying from a page that signals + // SIGBUS. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { + withBusErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + dst := randBuf(pageSize) + n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize) + if n != uintptr(bytesBeforeFault) { + t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) + } + if want := (BusError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { + t.Errorf("Buffers are not equal when they should be: %v %v", got, want) + } + }) + }) + } +} + +func TestCopyDestinationSegvError(t *testing.T) { + // Test that Copy returns a SegvError when copying to a page that signals + // SIGSEGV. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { + withSegvErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + src := randBuf(pageSize) + n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize) + if n != uintptr(bytesBeforeFault) { + t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) + } + if want := (SegvError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { + t.Errorf("Buffers are not equal when they should be: %v %v", got, want) + } + }) + }) + } +} + +func TestCopyDestinationBusError(t *testing.T) { + // Test that Copy returns a BusError when copying to a page that signals + // SIGBUS. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { + withBusErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + src := randBuf(pageSize) + n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize) + if n != uintptr(bytesBeforeFault) { + t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) + } + if want := (BusError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { + t.Errorf("Buffers are not equal when they should be: %v %v", got, want) + } + }) + }) + } +} + +func TestZeroOutSegvError(t *testing.T) { + // Test that ZeroOut returns a SegvError when reaching a page that signals + // SIGSEGV. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting write %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { + withSegvErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + n, err := ZeroOut(dst, pageSize) + if n != uintptr(bytesBeforeFault) { + t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault) + } + if want := (SegvError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := mapping[pageSize-bytesBeforeFault:pageSize], make([]byte, bytesBeforeFault); !bytes.Equal(got, want) { + t.Errorf("Non-zero bytes in written part of mapping: %v", got) + } + }) + }) + } +} + +func TestZeroOutBusError(t *testing.T) { + // Test that ZeroOut returns a BusError when reaching a page that signals + // SIGBUS. + for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { + t.Run(fmt.Sprintf("starting write %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { + withBusErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) + n, err := ZeroOut(dst, pageSize) + if n != uintptr(bytesBeforeFault) { + t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault) + } + if want := (BusError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + if got, want := mapping[pageSize-bytesBeforeFault:pageSize], make([]byte, bytesBeforeFault); !bytes.Equal(got, want) { + t.Errorf("Non-zero bytes in written part of mapping: %v", got) + } + }) + }) + } +} + +func TestSwapUint32SegvError(t *testing.T) { + // Test that SwapUint32 returns a SegvError when reaching a page that + // signals SIGSEGV. + withSegvErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + _, err := SwapUint32(unsafe.Pointer(secondPage), 1) + if want := (SegvError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + }) +} + +func TestSwapUint32BusError(t *testing.T) { + // Test that SwapUint32 returns a BusError when reaching a page that + // signals SIGBUS. + withBusErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + _, err := SwapUint32(unsafe.Pointer(secondPage), 1) + if want := (BusError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + }) +} + +func TestSwapUint64SegvError(t *testing.T) { + // Test that SwapUint64 returns a SegvError when reaching a page that + // signals SIGSEGV. + withSegvErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + _, err := SwapUint64(unsafe.Pointer(secondPage), 1) + if want := (SegvError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + }) +} + +func TestSwapUint64BusError(t *testing.T) { + // Test that SwapUint64 returns a BusError when reaching a page that + // signals SIGBUS. + withBusErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + _, err := SwapUint64(unsafe.Pointer(secondPage), 1) + if want := (BusError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + }) +} + +func TestCompareAndSwapUint32SegvError(t *testing.T) { + // Test that CompareAndSwapUint32 returns a SegvError when reaching a page + // that signals SIGSEGV. + withSegvErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + _, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1) + if want := (SegvError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + }) +} + +func TestCompareAndSwapUint32BusError(t *testing.T) { + // Test that CompareAndSwapUint32 returns a BusError when reaching a page + // that signals SIGBUS. + withBusErrorTestMapping(t, func(mapping []byte) { + secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize + _, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1) + if want := (BusError{secondPage}); err != want { + t.Errorf("Unexpected error: got %v, want %v", err, want) + } + }) +} + +func testCopy(dst, src []byte) (panicked bool) { + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + debug.SetPanicOnFault(true) + copy(dst, src) + return +} + +func TestSegVOnMemmove(t *testing.T) { + // Test that SIGSEGVs received by runtime.memmove when *not* doing + // CopyIn or CopyOut work gets propagated to the runtime. + const bufLen = pageSize + a, err := syscall.Mmap(-1, 0, bufLen, syscall.PROT_NONE, syscall.MAP_ANON|syscall.MAP_PRIVATE) + if err != nil { + t.Fatalf("Mmap failed: %v", err) + + } + defer syscall.Munmap(a) + b := randBuf(bufLen) + + if !testCopy(b, a) { + t.Fatalf("testCopy didn't panic when it should have") + } + + if !testCopy(a, b) { + t.Fatalf("testCopy didn't panic when it should have") + } +} + +func TestSigbusOnMemmove(t *testing.T) { + // Test that SIGBUS received by runtime.memmove when *not* doing + // CopyIn or CopyOut work gets propagated to the runtime. + const bufLen = pageSize + f, err := ioutil.TempFile("", "sigbus_test") + if err != nil { + t.Fatalf("TempFile failed: %v", err) + } + os.Remove(f.Name()) + defer f.Close() + + a, err := syscall.Mmap(int(f.Fd()), 0, bufLen, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) + if err != nil { + t.Fatalf("Mmap failed: %v", err) + + } + defer syscall.Munmap(a) + b := randBuf(bufLen) + + if !testCopy(b, a) { + t.Fatalf("testCopy didn't panic when it should have") + } + + if !testCopy(a, b) { + t.Fatalf("testCopy didn't panic when it should have") + } +} diff --git a/pkg/safecopy/safecopy_unsafe.go b/pkg/safecopy/safecopy_unsafe.go new file mode 100644 index 000000000..eef028e68 --- /dev/null +++ b/pkg/safecopy/safecopy_unsafe.go @@ -0,0 +1,335 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package safecopy + +import ( + "fmt" + "syscall" + "unsafe" +) + +// maxRegisterSize is the maximum register size used in memcpy and memclr. It +// is used to decide by how much to rewind the copy (for memcpy) or zeroing +// (for memclr) before proceeding. +const maxRegisterSize = 16 + +// memcpy copies data from src to dst. If a SIGSEGV or SIGBUS signal is received +// during the copy, it returns the address that caused the fault and the number +// of the signal that was received. Otherwise, it returns an unspecified address +// and a signal number of 0. +// +// Data is copied in order, such that if a fault happens at address p, it is +// safe to assume that all data before p-maxRegisterSize has already been +// successfully copied. +// +//go:noescape +func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) + +// memclr sets the n bytes following ptr to zeroes. If a SIGSEGV or SIGBUS +// signal is received during the write, it returns the address that caused the +// fault and the number of the signal that was received. Otherwise, it returns +// an unspecified address and a signal number of 0. +// +// Data is written in order, such that if a fault happens at address p, it is +// safe to assume that all data before p-maxRegisterSize has already been +// successfully written. +// +//go:noescape +func memclr(ptr unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) + +// swapUint32 atomically stores new into *ptr and returns (the previous *ptr +// value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the +// value of old is unspecified, and sig is the number of the signal that was +// received. +// +// Preconditions: ptr must be aligned to a 4-byte boundary. +// +//go:noescape +func swapUint32(ptr unsafe.Pointer, new uint32) (old uint32, sig int32) + +// swapUint64 atomically stores new into *ptr and returns (the previous *ptr +// value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the +// value of old is unspecified, and sig is the number of the signal that was +// received. +// +// Preconditions: ptr must be aligned to a 8-byte boundary. +// +//go:noescape +func swapUint64(ptr unsafe.Pointer, new uint64) (old uint64, sig int32) + +// compareAndSwapUint32 is like sync/atomic.CompareAndSwapUint32, but returns +// (the value previously stored at ptr, 0). If a SIGSEGV or SIGBUS signal is +// received during the operation, the value of prev is unspecified, and sig is +// the number of the signal that was received. +// +// Preconditions: ptr must be aligned to a 4-byte boundary. +// +//go:noescape +func compareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (prev uint32, sig int32) + +// LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It +// may fail with SIGSEGV or SIGBUS if it is received while reading from ptr. +// +// Preconditions: ptr must be aligned to a 4-byte boundary. +// +//go:noescape +func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32) + +// CopyIn copies len(dst) bytes from src to dst. It returns the number of bytes +// copied and an error if SIGSEGV or SIGBUS is received while reading from src. +func CopyIn(dst []byte, src unsafe.Pointer) (int, error) { + toCopy := uintptr(len(dst)) + if len(dst) == 0 { + return 0, nil + } + + fault, sig := memcpy(unsafe.Pointer(&dst[0]), src, toCopy) + if sig == 0 { + return len(dst), nil + } + + faultN, srcN := uintptr(fault), uintptr(src) + if faultN < srcN || faultN >= srcN+toCopy { + panic(fmt.Sprintf("CopyIn raised signal %d at %#x, which is outside source [%#x, %#x)", sig, faultN, srcN, srcN+toCopy)) + } + + // memcpy might have ended the copy up to maxRegisterSize bytes before + // fault, if an instruction caused a memory access that straddled two + // pages, and the second one faulted. Try to copy up to the fault. + var done int + if faultN-srcN > maxRegisterSize { + done = int(faultN - srcN - maxRegisterSize) + } + n, err := CopyIn(dst[done:int(faultN-srcN)], unsafe.Pointer(srcN+uintptr(done))) + done += n + if err != nil { + return done, err + } + return done, errorFromFaultSignal(fault, sig) +} + +// CopyOut copies len(src) bytes from src to dst. If returns the number of +// bytes done and an error if SIGSEGV or SIGBUS is received while writing to +// dst. +func CopyOut(dst unsafe.Pointer, src []byte) (int, error) { + toCopy := uintptr(len(src)) + if toCopy == 0 { + return 0, nil + } + + fault, sig := memcpy(dst, unsafe.Pointer(&src[0]), toCopy) + if sig == 0 { + return len(src), nil + } + + faultN, dstN := uintptr(fault), uintptr(dst) + if faultN < dstN || faultN >= dstN+toCopy { + panic(fmt.Sprintf("CopyOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, faultN, dstN, dstN+toCopy)) + } + + // memcpy might have ended the copy up to maxRegisterSize bytes before + // fault, if an instruction caused a memory access that straddled two + // pages, and the second one faulted. Try to copy up to the fault. + var done int + if faultN-dstN > maxRegisterSize { + done = int(faultN - dstN - maxRegisterSize) + } + n, err := CopyOut(unsafe.Pointer(dstN+uintptr(done)), src[done:int(faultN-dstN)]) + done += n + if err != nil { + return done, err + } + return done, errorFromFaultSignal(fault, sig) +} + +// Copy copies toCopy bytes from src to dst. It returns the number of bytes +// copied and an error if SIGSEGV or SIGBUS is received while reading from src +// or writing to dst. +// +// Data is copied in order; if [src, src+toCopy) and [dst, dst+toCopy) overlap, +// the resulting contents of dst are unspecified. +func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) { + if toCopy == 0 { + return 0, nil + } + + fault, sig := memcpy(dst, src, toCopy) + if sig == 0 { + return toCopy, nil + } + + // Did the fault occur while reading from src or writing to dst? + faultN, srcN, dstN := uintptr(fault), uintptr(src), uintptr(dst) + faultAfterSrc := ^uintptr(0) + if faultN >= srcN { + faultAfterSrc = faultN - srcN + } + faultAfterDst := ^uintptr(0) + if faultN >= dstN { + faultAfterDst = faultN - dstN + } + if faultAfterSrc >= toCopy && faultAfterDst >= toCopy { + panic(fmt.Sprintf("Copy raised signal %d at %#x, which is outside source [%#x, %#x) and destination [%#x, %#x)", sig, faultN, srcN, srcN+toCopy, dstN, dstN+toCopy)) + } + faultedAfter := faultAfterSrc + if faultedAfter > faultAfterDst { + faultedAfter = faultAfterDst + } + + // memcpy might have ended the copy up to maxRegisterSize bytes before + // fault, if an instruction caused a memory access that straddled two + // pages, and the second one faulted. Try to copy up to the fault. + var done uintptr + if faultedAfter > maxRegisterSize { + done = faultedAfter - maxRegisterSize + } + n, err := Copy(unsafe.Pointer(dstN+done), unsafe.Pointer(srcN+done), faultedAfter-done) + done += n + if err != nil { + return done, err + } + return done, errorFromFaultSignal(fault, sig) +} + +// ZeroOut writes toZero zero bytes to dst. It returns the number of bytes +// written and an error if SIGSEGV or SIGBUS is received while writing to dst. +func ZeroOut(dst unsafe.Pointer, toZero uintptr) (uintptr, error) { + if toZero == 0 { + return 0, nil + } + + fault, sig := memclr(dst, toZero) + if sig == 0 { + return toZero, nil + } + + faultN, dstN := uintptr(fault), uintptr(dst) + if faultN < dstN || faultN >= dstN+toZero { + panic(fmt.Sprintf("ZeroOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, faultN, dstN, dstN+toZero)) + } + + // memclr might have ended the write up to maxRegisterSize bytes before + // fault, if an instruction caused a memory access that straddled two + // pages, and the second one faulted. Try to write up to the fault. + var done uintptr + if faultN-dstN > maxRegisterSize { + done = faultN - dstN - maxRegisterSize + } + n, err := ZeroOut(unsafe.Pointer(dstN+done), faultN-dstN-done) + done += n + if err != nil { + return done, err + } + return done, errorFromFaultSignal(fault, sig) +} + +// SwapUint32 is equivalent to sync/atomic.SwapUint32, except that it returns +// an error if SIGSEGV or SIGBUS is received while accessing ptr, or if ptr is +// not aligned to a 4-byte boundary. +func SwapUint32(ptr unsafe.Pointer, new uint32) (uint32, error) { + if addr := uintptr(ptr); addr&3 != 0 { + return 0, AlignmentError{addr, 4} + } + old, sig := swapUint32(ptr, new) + return old, errorFromFaultSignal(ptr, sig) +} + +// SwapUint64 is equivalent to sync/atomic.SwapUint64, except that it returns +// an error if SIGSEGV or SIGBUS is received while accessing ptr, or if ptr is +// not aligned to an 8-byte boundary. +func SwapUint64(ptr unsafe.Pointer, new uint64) (uint64, error) { + if addr := uintptr(ptr); addr&7 != 0 { + return 0, AlignmentError{addr, 8} + } + old, sig := swapUint64(ptr, new) + return old, errorFromFaultSignal(ptr, sig) +} + +// CompareAndSwapUint32 is equivalent to atomicbitops.CompareAndSwapUint32, +// except that it returns an error if SIGSEGV or SIGBUS is received while +// accessing ptr, or if ptr is not aligned to a 4-byte boundary. +func CompareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (uint32, error) { + if addr := uintptr(ptr); addr&3 != 0 { + return 0, AlignmentError{addr, 4} + } + prev, sig := compareAndSwapUint32(ptr, old, new) + return prev, errorFromFaultSignal(ptr, sig) +} + +// LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It +// may fail with SIGSEGV or SIGBUS if it is received while reading from ptr. +// +// Preconditions: ptr must be aligned to a 4-byte boundary. +func LoadUint32(ptr unsafe.Pointer) (uint32, error) { + if addr := uintptr(ptr); addr&3 != 0 { + return 0, AlignmentError{addr, 4} + } + val, sig := loadUint32(ptr) + return val, errorFromFaultSignal(ptr, sig) +} + +func errorFromFaultSignal(addr unsafe.Pointer, sig int32) error { + switch sig { + case 0: + return nil + case int32(syscall.SIGSEGV): + return SegvError{uintptr(addr)} + case int32(syscall.SIGBUS): + return BusError{uintptr(addr)} + default: + panic(fmt.Sprintf("safecopy got unexpected signal %d at address %#x", sig, addr)) + } +} + +// ReplaceSignalHandler replaces the existing signal handler for the provided +// signal with the one that handles faults in safecopy-protected functions. +// +// It stores the value of the previously set handler in previous. +// +// This function will be called on initialization in order to install safecopy +// handlers for appropriate signals. These handlers will call the previous +// handler however, and if this is function is being used externally then the +// same courtesy is expected. +func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr) error { + var sa struct { + handler uintptr + flags uint64 + restorer uintptr + mask uint64 + } + const maskLen = 8 + + // Get the existing signal handler information, and save the current + // handler. Once we replace it, we will use this pointer to fall back to + // it when we receive other signals. + if _, _, e := syscall.RawSyscall6(syscall.SYS_RT_SIGACTION, uintptr(sig), 0, uintptr(unsafe.Pointer(&sa)), maskLen, 0, 0); e != 0 { + return e + } + + // Fail if there isn't a previous handler. + if sa.handler == 0 { + return fmt.Errorf("previous handler for signal %x isn't set", sig) + } + + *previous = sa.handler + + // Install our own handler. + sa.handler = handler + if _, _, e := syscall.RawSyscall6(syscall.SYS_RT_SIGACTION, uintptr(sig), uintptr(unsafe.Pointer(&sa)), 0, maskLen, 0, 0); e != 0 { + return e + } + + return nil +} diff --git a/pkg/safecopy/sighandler_amd64.s b/pkg/safecopy/sighandler_amd64.s new file mode 100644 index 000000000..475ae48e9 --- /dev/null +++ b/pkg/safecopy/sighandler_amd64.s @@ -0,0 +1,133 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +// The signals handled by sigHandler. +#define SIGBUS 7 +#define SIGSEGV 11 + +// Offsets to the registers in context->uc_mcontext.gregs[]. +#define REG_RDI 0x68 +#define REG_RAX 0x90 +#define REG_IP 0xa8 + +// Offset to the si_addr field of siginfo. +#define SI_CODE 0x08 +#define SI_ADDR 0x10 + +// signalHandler is the signal handler for SIGSEGV and SIGBUS signals. It must +// not be set up as a handler to any other signals. +// +// If the instruction causing the signal is within a safecopy-protected +// function, the signal is handled such that execution resumes in the +// appropriate fault handling stub with AX containing the faulting address and +// DI containing the signal number. Otherwise control is transferred to the +// previously configured signal handler (savedSigSegvHandler or +// savedSigBusHandler). +// +// This function cannot be written in go because it runs whenever a signal is +// received by the thread (preempting whatever was running), which includes when +// garbage collector has stopped or isn't expecting any interactions (like +// barriers). +// +// The arguments are the following: +// DI - The signal number. +// SI - Pointer to siginfo_t structure. +// DX - Pointer to ucontext structure. +TEXT ·signalHandler(SB),NOSPLIT,$0 + // Check if the signal is from the kernel. + MOVQ $0x0, CX + CMPL CX, SI_CODE(SI) + JGE original_handler + + // Check if RIP is within the area we care about. + MOVQ REG_IP(DX), CX + CMPQ CX, ·memcpyBegin(SB) + JB not_memcpy + CMPQ CX, ·memcpyEnd(SB) + JAE not_memcpy + + // Modify the context such that execution will resume in the fault + // handler. + LEAQ handleMemcpyFault(SB), CX + JMP handle_fault + +not_memcpy: + CMPQ CX, ·memclrBegin(SB) + JB not_memclr + CMPQ CX, ·memclrEnd(SB) + JAE not_memclr + + LEAQ handleMemclrFault(SB), CX + JMP handle_fault + +not_memclr: + CMPQ CX, ·swapUint32Begin(SB) + JB not_swapuint32 + CMPQ CX, ·swapUint32End(SB) + JAE not_swapuint32 + + LEAQ handleSwapUint32Fault(SB), CX + JMP handle_fault + +not_swapuint32: + CMPQ CX, ·swapUint64Begin(SB) + JB not_swapuint64 + CMPQ CX, ·swapUint64End(SB) + JAE not_swapuint64 + + LEAQ handleSwapUint64Fault(SB), CX + JMP handle_fault + +not_swapuint64: + CMPQ CX, ·compareAndSwapUint32Begin(SB) + JB not_casuint32 + CMPQ CX, ·compareAndSwapUint32End(SB) + JAE not_casuint32 + + LEAQ handleCompareAndSwapUint32Fault(SB), CX + JMP handle_fault + +not_casuint32: + CMPQ CX, ·loadUint32Begin(SB) + JB not_loaduint32 + CMPQ CX, ·loadUint32End(SB) + JAE not_loaduint32 + + LEAQ handleLoadUint32Fault(SB), CX + JMP handle_fault + +not_loaduint32: +original_handler: + // Jump to the previous signal handler, which is likely the golang one. + XORQ CX, CX + MOVQ ·savedSigBusHandler(SB), AX + CMPL DI, $SIGSEGV + CMOVQEQ ·savedSigSegVHandler(SB), AX + JMP AX + +handle_fault: + // Entered with the address of the fault handler in RCX; store it in + // RIP. + MOVQ CX, REG_IP(DX) + + // Store the faulting address in RAX. + MOVQ SI_ADDR(SI), CX + MOVQ CX, REG_RAX(DX) + + // Store the signal number in EDI. + MOVL DI, REG_RDI(DX) + + RET diff --git a/pkg/safecopy/sighandler_arm64.s b/pkg/safecopy/sighandler_arm64.s new file mode 100644 index 000000000..53e4ac2c1 --- /dev/null +++ b/pkg/safecopy/sighandler_arm64.s @@ -0,0 +1,143 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +// The signals handled by sigHandler. +#define SIGBUS 7 +#define SIGSEGV 11 + +// Offsets to the registers in context->uc_mcontext.gregs[]. +#define REG_R0 0xB8 +#define REG_R1 0xC0 +#define REG_PC 0x1B8 + +// Offset to the si_addr field of siginfo. +#define SI_CODE 0x08 +#define SI_ADDR 0x10 + +// signalHandler is the signal handler for SIGSEGV and SIGBUS signals. It must +// not be set up as a handler to any other signals. +// +// If the instruction causing the signal is within a safecopy-protected +// function, the signal is handled such that execution resumes in the +// appropriate fault handling stub with R0 containing the faulting address and +// R1 containing the signal number. Otherwise control is transferred to the +// previously configured signal handler (savedSigSegvHandler or +// savedSigBusHandler). +// +// This function cannot be written in go because it runs whenever a signal is +// received by the thread (preempting whatever was running), which includes when +// garbage collector has stopped or isn't expecting any interactions (like +// barriers). +// +// The arguments are the following: +// R0 - The signal number. +// R1 - Pointer to siginfo_t structure. +// R2 - Pointer to ucontext structure. +TEXT ·signalHandler(SB),NOSPLIT,$0 + // Check if the signal is from the kernel, si_code > 0 means a kernel signal. + MOVD SI_CODE(R1), R7 + CMPW $0x0, R7 + BLE original_handler + + // Check if PC is within the area we care about. + MOVD REG_PC(R2), R7 + MOVD ·memcpyBegin(SB), R8 + CMP R8, R7 + BLO not_memcpy + MOVD ·memcpyEnd(SB), R8 + CMP R8, R7 + BHS not_memcpy + + // Modify the context such that execution will resume in the fault handler. + MOVD $handleMemcpyFault(SB), R7 + B handle_fault + +not_memcpy: + MOVD ·memclrBegin(SB), R8 + CMP R8, R7 + BLO not_memclr + MOVD ·memclrEnd(SB), R8 + CMP R8, R7 + BHS not_memclr + + MOVD $handleMemclrFault(SB), R7 + B handle_fault + +not_memclr: + MOVD ·swapUint32Begin(SB), R8 + CMP R8, R7 + BLO not_swapuint32 + MOVD ·swapUint32End(SB), R8 + CMP R8, R7 + BHS not_swapuint32 + + MOVD $handleSwapUint32Fault(SB), R7 + B handle_fault + +not_swapuint32: + MOVD ·swapUint64Begin(SB), R8 + CMP R8, R7 + BLO not_swapuint64 + MOVD ·swapUint64End(SB), R8 + CMP R8, R7 + BHS not_swapuint64 + + MOVD $handleSwapUint64Fault(SB), R7 + B handle_fault + +not_swapuint64: + MOVD ·compareAndSwapUint32Begin(SB), R8 + CMP R8, R7 + BLO not_casuint32 + MOVD ·compareAndSwapUint32End(SB), R8 + CMP R8, R7 + BHS not_casuint32 + + MOVD $handleCompareAndSwapUint32Fault(SB), R7 + B handle_fault + +not_casuint32: + MOVD ·loadUint32Begin(SB), R8 + CMP R8, R7 + BLO not_loaduint32 + MOVD ·loadUint32End(SB), R8 + CMP R8, R7 + BHS not_loaduint32 + + MOVD $handleLoadUint32Fault(SB), R7 + B handle_fault + +not_loaduint32: +original_handler: + // Jump to the previous signal handler, which is likely the golang one. + MOVD ·savedSigBusHandler(SB), R7 + MOVD ·savedSigSegVHandler(SB), R8 + CMPW $SIGSEGV, R0 + CSEL EQ, R8, R7, R7 + B (R7) + +handle_fault: + // Entered with the address of the fault handler in R7; store it in PC. + MOVD R7, REG_PC(R2) + + // Store the faulting address in R0. + MOVD SI_ADDR(R1), R7 + MOVD R7, REG_R0(R2) + + // Store the signal number in R1. + MOVW R0, REG_R1(R2) + + RET diff --git a/pkg/safemem/BUILD b/pkg/safemem/BUILD new file mode 100644 index 000000000..ce30382ab --- /dev/null +++ b/pkg/safemem/BUILD @@ -0,0 +1,27 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "safemem", + srcs = [ + "block_unsafe.go", + "io.go", + "safemem.go", + "seq_unsafe.go", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/safecopy", + ], +) + +go_test( + name = "safemem_test", + size = "small", + srcs = [ + "io_test.go", + "seq_test.go", + ], + library = ":safemem", +) diff --git a/pkg/safemem/block_unsafe.go b/pkg/safemem/block_unsafe.go new file mode 100644 index 000000000..e7fd30743 --- /dev/null +++ b/pkg/safemem/block_unsafe.go @@ -0,0 +1,279 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package safemem + +import ( + "fmt" + "reflect" + "unsafe" + + "gvisor.dev/gvisor/pkg/safecopy" +) + +// A Block is a range of contiguous bytes, similar to []byte but with the +// following differences: +// +// - The memory represented by a Block may require the use of safecopy to +// access. +// +// - Block does not carry a capacity and cannot be expanded. +// +// Blocks are immutable and may be copied by value. The zero value of Block +// represents an empty range, analogous to a nil []byte. +type Block struct { + // [start, start+length) is the represented memory. + // + // start is an unsafe.Pointer to ensure that Block prevents the represented + // memory from being garbage-collected. + start unsafe.Pointer + length int + + // needSafecopy is true if accessing the represented memory requires the + // use of safecopy. + needSafecopy bool +} + +// BlockFromSafeSlice returns a Block equivalent to slice, which is safe to +// access without safecopy. +func BlockFromSafeSlice(slice []byte) Block { + return blockFromSlice(slice, false) +} + +// BlockFromUnsafeSlice returns a Block equivalent to bs, which is not safe to +// access without safecopy. +func BlockFromUnsafeSlice(slice []byte) Block { + return blockFromSlice(slice, true) +} + +func blockFromSlice(slice []byte, needSafecopy bool) Block { + if len(slice) == 0 { + return Block{} + } + return Block{ + start: unsafe.Pointer(&slice[0]), + length: len(slice), + needSafecopy: needSafecopy, + } +} + +// BlockFromSafePointer returns a Block equivalent to [ptr, ptr+len), which is +// safe to access without safecopy. +// +// Preconditions: ptr+len does not overflow. +func BlockFromSafePointer(ptr unsafe.Pointer, len int) Block { + return blockFromPointer(ptr, len, false) +} + +// BlockFromUnsafePointer returns a Block equivalent to [ptr, ptr+len), which +// is not safe to access without safecopy. +// +// Preconditions: ptr+len does not overflow. +func BlockFromUnsafePointer(ptr unsafe.Pointer, len int) Block { + return blockFromPointer(ptr, len, true) +} + +func blockFromPointer(ptr unsafe.Pointer, len int, needSafecopy bool) Block { + if uptr := uintptr(ptr); uptr+uintptr(len) < uptr { + panic(fmt.Sprintf("ptr %#x + len %#x overflows", ptr, len)) + } + return Block{ + start: ptr, + length: len, + needSafecopy: needSafecopy, + } +} + +// DropFirst returns a Block equivalent to b, but with the first n bytes +// omitted. It is analogous to the [n:] operation on a slice, except that if n +// > b.Len(), DropFirst returns an empty Block instead of panicking. +// +// Preconditions: n >= 0. +func (b Block) DropFirst(n int) Block { + if n < 0 { + panic(fmt.Sprintf("invalid n: %d", n)) + } + return b.DropFirst64(uint64(n)) +} + +// DropFirst64 is equivalent to DropFirst but takes a uint64. +func (b Block) DropFirst64(n uint64) Block { + if n >= uint64(b.length) { + return Block{} + } + return Block{ + start: unsafe.Pointer(uintptr(b.start) + uintptr(n)), + length: b.length - int(n), + needSafecopy: b.needSafecopy, + } +} + +// TakeFirst returns a Block equivalent to the first n bytes of b. It is +// analogous to the [:n] operation on a slice, except that if n > b.Len(), +// TakeFirst returns a copy of b instead of panicking. +// +// Preconditions: n >= 0. +func (b Block) TakeFirst(n int) Block { + if n < 0 { + panic(fmt.Sprintf("invalid n: %d", n)) + } + return b.TakeFirst64(uint64(n)) +} + +// TakeFirst64 is equivalent to TakeFirst but takes a uint64. +func (b Block) TakeFirst64(n uint64) Block { + if n == 0 { + return Block{} + } + if n >= uint64(b.length) { + return b + } + return Block{ + start: b.start, + length: int(n), + needSafecopy: b.needSafecopy, + } +} + +// ToSlice returns a []byte equivalent to b. +func (b Block) ToSlice() []byte { + var bs []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&bs)) + hdr.Data = uintptr(b.start) + hdr.Len = b.length + hdr.Cap = b.length + return bs +} + +// Addr returns b's start address as a uintptr. It returns uintptr instead of +// unsafe.Pointer so that code using safemem cannot obtain unsafe.Pointers +// without importing the unsafe package explicitly. +// +// Note that a uintptr is not recognized as a pointer by the garbage collector, +// such that if there are no uses of b after a call to b.Addr() and the address +// is to Go-managed memory, the returned uintptr does not prevent garbage +// collection of the pointee. +func (b Block) Addr() uintptr { + return uintptr(b.start) +} + +// Len returns b's length in bytes. +func (b Block) Len() int { + return b.length +} + +// NeedSafecopy returns true if accessing b.ToSlice() requires the use of safecopy. +func (b Block) NeedSafecopy() bool { + return b.needSafecopy +} + +// String implements fmt.Stringer.String. +func (b Block) String() string { + if uintptr(b.start) == 0 && b.length == 0 { + return "" + } + var suffix string + if b.needSafecopy { + suffix = "*" + } + return fmt.Sprintf("[%#x-%#x)%s", uintptr(b.start), uintptr(b.start)+uintptr(b.length), suffix) +} + +// Copy copies src.Len() or dst.Len() bytes, whichever is less, from src +// to dst and returns the number of bytes copied. +// +// If src and dst overlap, the data stored in dst is unspecified. +func Copy(dst, src Block) (int, error) { + if !dst.needSafecopy && !src.needSafecopy { + return copy(dst.ToSlice(), src.ToSlice()), nil + } + + n := dst.length + if n > src.length { + n = src.length + } + if n == 0 { + return 0, nil + } + + switch { + case dst.needSafecopy && !src.needSafecopy: + return safecopy.CopyOut(dst.start, src.TakeFirst(n).ToSlice()) + case !dst.needSafecopy && src.needSafecopy: + return safecopy.CopyIn(dst.TakeFirst(n).ToSlice(), src.start) + case dst.needSafecopy && src.needSafecopy: + n64, err := safecopy.Copy(dst.start, src.start, uintptr(n)) + return int(n64), err + default: + panic("unreachable") + } +} + +// Zero sets all bytes in dst to 0 and returns the number of bytes zeroed. +func Zero(dst Block) (int, error) { + if !dst.needSafecopy { + bs := dst.ToSlice() + for i := range bs { + bs[i] = 0 + } + return len(bs), nil + } + + n64, err := safecopy.ZeroOut(dst.start, uintptr(dst.length)) + return int(n64), err +} + +// Safecopy atomics are no slower than non-safecopy atomics, so use the former +// even when !b.needSafecopy to get consistent alignment checking. + +// SwapUint32 invokes safecopy.SwapUint32 on the first 4 bytes of b. +// +// Preconditions: b.Len() >= 4. +func SwapUint32(b Block, new uint32) (uint32, error) { + if b.length < 4 { + panic(fmt.Sprintf("insufficient length: %d", b.length)) + } + return safecopy.SwapUint32(b.start, new) +} + +// SwapUint64 invokes safecopy.SwapUint64 on the first 8 bytes of b. +// +// Preconditions: b.Len() >= 8. +func SwapUint64(b Block, new uint64) (uint64, error) { + if b.length < 8 { + panic(fmt.Sprintf("insufficient length: %d", b.length)) + } + return safecopy.SwapUint64(b.start, new) +} + +// CompareAndSwapUint32 invokes safecopy.CompareAndSwapUint32 on the first 4 +// bytes of b. +// +// Preconditions: b.Len() >= 4. +func CompareAndSwapUint32(b Block, old, new uint32) (uint32, error) { + if b.length < 4 { + panic(fmt.Sprintf("insufficient length: %d", b.length)) + } + return safecopy.CompareAndSwapUint32(b.start, old, new) +} + +// LoadUint32 invokes safecopy.LoadUint32 on the first 4 bytes of b. +// +// Preconditions: b.Len() >= 4. +func LoadUint32(b Block) (uint32, error) { + if b.length < 4 { + panic(fmt.Sprintf("insufficient length: %d", b.length)) + } + return safecopy.LoadUint32(b.start) +} diff --git a/pkg/safemem/io.go b/pkg/safemem/io.go new file mode 100644 index 000000000..f039a5c34 --- /dev/null +++ b/pkg/safemem/io.go @@ -0,0 +1,392 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package safemem + +import ( + "errors" + "io" + "math" +) + +// ErrEndOfBlockSeq is returned by BlockSeqWriter when attempting to write +// beyond the end of the BlockSeq. +var ErrEndOfBlockSeq = errors.New("write beyond end of BlockSeq") + +// Reader represents a streaming byte source like io.Reader. +type Reader interface { + // ReadToBlocks reads up to dsts.NumBytes() bytes into dsts and returns the + // number of bytes read. It may return a partial read without an error + // (i.e. (n, nil) where 0 < n < dsts.NumBytes()). It should not return a + // full read with an error (i.e. (dsts.NumBytes(), err) where err != nil); + // note that this differs from io.Reader.Read (in particular, io.EOF should + // not be returned if ReadToBlocks successfully reads dsts.NumBytes() + // bytes.) + ReadToBlocks(dsts BlockSeq) (uint64, error) +} + +// Writer represents a streaming byte sink like io.Writer. +type Writer interface { + // WriteFromBlocks writes up to srcs.NumBytes() bytes from srcs and returns + // the number of bytes written. It may return a partial write without an + // error (i.e. (n, nil) where 0 < n < srcs.NumBytes()). It should not + // return a full write with an error (i.e. srcs.NumBytes(), err) where err + // != nil). + WriteFromBlocks(srcs BlockSeq) (uint64, error) +} + +// ReadFullToBlocks repeatedly invokes r.ReadToBlocks until dsts.NumBytes() +// bytes have been read or ReadToBlocks returns an error. +func ReadFullToBlocks(r Reader, dsts BlockSeq) (uint64, error) { + var done uint64 + for !dsts.IsEmpty() { + n, err := r.ReadToBlocks(dsts) + done += n + if err != nil { + return done, err + } + dsts = dsts.DropFirst64(n) + } + return done, nil +} + +// WriteFullFromBlocks repeatedly invokes w.WriteFromBlocks until +// srcs.NumBytes() bytes have been written or WriteFromBlocks returns an error. +func WriteFullFromBlocks(w Writer, srcs BlockSeq) (uint64, error) { + var done uint64 + for !srcs.IsEmpty() { + n, err := w.WriteFromBlocks(srcs) + done += n + if err != nil { + return done, err + } + srcs = srcs.DropFirst64(n) + } + return done, nil +} + +// BlockSeqReader implements Reader by reading from a BlockSeq. +type BlockSeqReader struct { + Blocks BlockSeq +} + +// ReadToBlocks implements Reader.ReadToBlocks. +func (r *BlockSeqReader) ReadToBlocks(dsts BlockSeq) (uint64, error) { + n, err := CopySeq(dsts, r.Blocks) + r.Blocks = r.Blocks.DropFirst64(n) + if err != nil { + return n, err + } + if n < dsts.NumBytes() { + return n, io.EOF + } + return n, nil +} + +// BlockSeqWriter implements Writer by writing to a BlockSeq. +type BlockSeqWriter struct { + Blocks BlockSeq +} + +// WriteFromBlocks implements Writer.WriteFromBlocks. +func (w *BlockSeqWriter) WriteFromBlocks(srcs BlockSeq) (uint64, error) { + n, err := CopySeq(w.Blocks, srcs) + w.Blocks = w.Blocks.DropFirst64(n) + if err != nil { + return n, err + } + if n < srcs.NumBytes() { + return n, ErrEndOfBlockSeq + } + return n, nil +} + +// ReaderFunc implements Reader for a function with the semantics of +// Reader.ReadToBlocks. +type ReaderFunc func(dsts BlockSeq) (uint64, error) + +// ReadToBlocks implements Reader.ReadToBlocks. +func (f ReaderFunc) ReadToBlocks(dsts BlockSeq) (uint64, error) { + return f(dsts) +} + +// WriterFunc implements Writer for a function with the semantics of +// Writer.WriteFromBlocks. +type WriterFunc func(srcs BlockSeq) (uint64, error) + +// WriteFromBlocks implements Writer.WriteFromBlocks. +func (f WriterFunc) WriteFromBlocks(srcs BlockSeq) (uint64, error) { + return f(srcs) +} + +// ToIOReader implements io.Reader for a (safemem.)Reader. +// +// ToIOReader will return a successful partial read iff Reader.ReadToBlocks does +// so. +type ToIOReader struct { + Reader Reader +} + +// Read implements io.Reader.Read. +func (r ToIOReader) Read(dst []byte) (int, error) { + n, err := r.Reader.ReadToBlocks(BlockSeqOf(BlockFromSafeSlice(dst))) + return int(n), err +} + +// ToIOWriter implements io.Writer for a (safemem.)Writer. +type ToIOWriter struct { + Writer Writer +} + +// Write implements io.Writer.Write. +func (w ToIOWriter) Write(src []byte) (int, error) { + // io.Writer does not permit partial writes. + n, err := WriteFullFromBlocks(w.Writer, BlockSeqOf(BlockFromSafeSlice(src))) + return int(n), err +} + +// FromIOReader implements Reader for an io.Reader by repeatedly invoking +// io.Reader.Read until it returns an error or partial read. This is not +// thread-safe. +// +// FromIOReader will return a successful partial read iff Reader.Read does so. +type FromIOReader struct { + Reader io.Reader +} + +// ReadToBlocks implements Reader.ReadToBlocks. +func (r FromIOReader) ReadToBlocks(dsts BlockSeq) (uint64, error) { + var buf []byte + var done uint64 + for !dsts.IsEmpty() { + dst := dsts.Head() + var n int + var err error + n, buf, err = r.readToBlock(dst, buf) + done += uint64(n) + if n != dst.Len() { + return done, err + } + dsts = dsts.Tail() + if err != nil { + if dsts.IsEmpty() && err == io.EOF { + return done, nil + } + return done, err + } + } + return done, nil +} + +func (r FromIOReader) readToBlock(dst Block, buf []byte) (int, []byte, error) { + // io.Reader isn't safecopy-aware, so we have to buffer Blocks that require + // safecopy. + if !dst.NeedSafecopy() { + n, err := r.Reader.Read(dst.ToSlice()) + return n, buf, err + } + if len(buf) < dst.Len() { + buf = make([]byte, dst.Len()) + } + rn, rerr := r.Reader.Read(buf[:dst.Len()]) + wbn, wberr := Copy(dst, BlockFromSafeSlice(buf[:rn])) + if wberr != nil { + return wbn, buf, wberr + } + return wbn, buf, rerr +} + +// FromIOReaderAt implements Reader for an io.ReaderAt. Does not repeatedly +// invoke io.ReaderAt.ReadAt because ReadAt is more strict than Read. A partial +// read indicates an error. This is not thread-safe. +type FromIOReaderAt struct { + ReaderAt io.ReaderAt + Offset int64 +} + +// ReadToBlocks implements Reader.ReadToBlocks. +func (r FromIOReaderAt) ReadToBlocks(dsts BlockSeq) (uint64, error) { + var buf []byte + var done uint64 + for !dsts.IsEmpty() { + dst := dsts.Head() + var n int + var err error + n, buf, err = r.readToBlock(dst, buf) + done += uint64(n) + if n != dst.Len() { + return done, err + } + dsts = dsts.Tail() + if err != nil { + if dsts.IsEmpty() && err == io.EOF { + return done, nil + } + return done, err + } + } + return done, nil +} + +func (r FromIOReaderAt) readToBlock(dst Block, buf []byte) (int, []byte, error) { + // io.Reader isn't safecopy-aware, so we have to buffer Blocks that require + // safecopy. + if !dst.NeedSafecopy() { + n, err := r.ReaderAt.ReadAt(dst.ToSlice(), r.Offset) + r.Offset += int64(n) + return n, buf, err + } + if len(buf) < dst.Len() { + buf = make([]byte, dst.Len()) + } + rn, rerr := r.ReaderAt.ReadAt(buf[:dst.Len()], r.Offset) + r.Offset += int64(rn) + wbn, wberr := Copy(dst, BlockFromSafeSlice(buf[:rn])) + if wberr != nil { + return wbn, buf, wberr + } + return wbn, buf, rerr +} + +// FromIOWriter implements Writer for an io.Writer by repeatedly invoking +// io.Writer.Write until it returns an error or partial write. +// +// FromIOWriter will tolerate implementations of io.Writer.Write that return +// partial writes with a nil error in contravention of io.Writer's +// requirements, since Writer is permitted to do so. FromIOWriter will return a +// successful partial write iff Writer.Write does so. +type FromIOWriter struct { + Writer io.Writer +} + +// WriteFromBlocks implements Writer.WriteFromBlocks. +func (w FromIOWriter) WriteFromBlocks(srcs BlockSeq) (uint64, error) { + var buf []byte + var done uint64 + for !srcs.IsEmpty() { + src := srcs.Head() + var n int + var err error + n, buf, err = w.writeFromBlock(src, buf) + done += uint64(n) + if n != src.Len() || err != nil { + return done, err + } + srcs = srcs.Tail() + } + return done, nil +} + +func (w FromIOWriter) writeFromBlock(src Block, buf []byte) (int, []byte, error) { + // io.Writer isn't safecopy-aware, so we have to buffer Blocks that require + // safecopy. + if !src.NeedSafecopy() { + n, err := w.Writer.Write(src.ToSlice()) + return n, buf, err + } + if len(buf) < src.Len() { + buf = make([]byte, src.Len()) + } + bufn, buferr := Copy(BlockFromSafeSlice(buf[:src.Len()]), src) + wn, werr := w.Writer.Write(buf[:bufn]) + if werr != nil { + return wn, buf, werr + } + return wn, buf, buferr +} + +// FromVecReaderFunc implements Reader for a function that reads data into a +// [][]byte and returns the number of bytes read as an int64. +type FromVecReaderFunc struct { + ReadVec func(dsts [][]byte) (int64, error) +} + +// ReadToBlocks implements Reader.ReadToBlocks. +// +// ReadToBlocks calls r.ReadVec at most once. +func (r FromVecReaderFunc) ReadToBlocks(dsts BlockSeq) (uint64, error) { + if dsts.IsEmpty() { + return 0, nil + } + // Ensure that we don't pass a [][]byte with a total length > MaxInt64. + dsts = dsts.TakeFirst64(uint64(math.MaxInt64)) + dstSlices := make([][]byte, 0, dsts.NumBlocks()) + // Buffer Blocks that require safecopy. + for tmp := dsts; !tmp.IsEmpty(); tmp = tmp.Tail() { + dst := tmp.Head() + if dst.NeedSafecopy() { + dstSlices = append(dstSlices, make([]byte, dst.Len())) + } else { + dstSlices = append(dstSlices, dst.ToSlice()) + } + } + rn, rerr := r.ReadVec(dstSlices) + dsts = dsts.TakeFirst64(uint64(rn)) + var done uint64 + var i int + for !dsts.IsEmpty() { + dst := dsts.Head() + if dst.NeedSafecopy() { + n, err := Copy(dst, BlockFromSafeSlice(dstSlices[i])) + done += uint64(n) + if err != nil { + return done, err + } + } else { + done += uint64(dst.Len()) + } + dsts = dsts.Tail() + i++ + } + return done, rerr +} + +// FromVecWriterFunc implements Writer for a function that writes data from a +// [][]byte and returns the number of bytes written. +type FromVecWriterFunc struct { + WriteVec func(srcs [][]byte) (int64, error) +} + +// WriteFromBlocks implements Writer.WriteFromBlocks. +// +// WriteFromBlocks calls w.WriteVec at most once. +func (w FromVecWriterFunc) WriteFromBlocks(srcs BlockSeq) (uint64, error) { + if srcs.IsEmpty() { + return 0, nil + } + // Ensure that we don't pass a [][]byte with a total length > MaxInt64. + srcs = srcs.TakeFirst64(uint64(math.MaxInt64)) + srcSlices := make([][]byte, 0, srcs.NumBlocks()) + // Buffer Blocks that require safecopy. + var buferr error + for tmp := srcs; !tmp.IsEmpty(); tmp = tmp.Tail() { + src := tmp.Head() + if src.NeedSafecopy() { + slice := make([]byte, src.Len()) + n, err := Copy(BlockFromSafeSlice(slice), src) + srcSlices = append(srcSlices, slice[:n]) + if err != nil { + buferr = err + break + } + } else { + srcSlices = append(srcSlices, src.ToSlice()) + } + } + n, err := w.WriteVec(srcSlices) + if err != nil { + return uint64(n), err + } + return uint64(n), buferr +} diff --git a/pkg/safemem/io_test.go b/pkg/safemem/io_test.go new file mode 100644 index 000000000..629741bee --- /dev/null +++ b/pkg/safemem/io_test.go @@ -0,0 +1,199 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package safemem + +import ( + "bytes" + "io" + "testing" +) + +func makeBlocks(slices ...[]byte) []Block { + blocks := make([]Block, 0, len(slices)) + for _, s := range slices { + blocks = append(blocks, BlockFromSafeSlice(s)) + } + return blocks +} + +func TestFromIOReaderFullRead(t *testing.T) { + r := FromIOReader{bytes.NewBufferString("foobar")} + dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) + n, err := r.ReadToBlocks(BlockSeqFromSlice(dsts)) + if wantN := uint64(6); n != wantN || err != nil { + t.Errorf("ReadToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + for i, want := range [][]byte{[]byte("foo"), []byte("bar")} { + if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { + t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) + } + } +} + +type eofHidingReader struct { + Reader io.Reader +} + +func (r eofHidingReader) Read(dst []byte) (int, error) { + n, err := r.Reader.Read(dst) + if err == io.EOF { + return n, nil + } + return n, err +} + +func TestFromIOReaderPartialRead(t *testing.T) { + r := FromIOReader{eofHidingReader{bytes.NewBufferString("foob")}} + dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) + n, err := r.ReadToBlocks(BlockSeqFromSlice(dsts)) + // FromIOReader should stop after the eofHidingReader returns (1, nil) + // for a 3-byte read. + if wantN := uint64(4); n != wantN || err != nil { + t.Errorf("ReadToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + for i, want := range [][]byte{[]byte("foo"), []byte("b\x00\x00")} { + if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { + t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) + } + } +} + +type singleByteReader struct { + Reader io.Reader +} + +func (r singleByteReader) Read(dst []byte) (int, error) { + if len(dst) == 0 { + return r.Reader.Read(dst) + } + return r.Reader.Read(dst[:1]) +} + +func TestSingleByteReader(t *testing.T) { + r := FromIOReader{singleByteReader{bytes.NewBufferString("foobar")}} + dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) + n, err := r.ReadToBlocks(BlockSeqFromSlice(dsts)) + // FromIOReader should stop after the singleByteReader returns (1, nil) + // for a 3-byte read. + if wantN := uint64(1); n != wantN || err != nil { + t.Errorf("ReadToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + for i, want := range [][]byte{[]byte("f\x00\x00"), []byte("\x00\x00\x00")} { + if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { + t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) + } + } +} + +func TestReadFullToBlocks(t *testing.T) { + r := FromIOReader{singleByteReader{bytes.NewBufferString("foobar")}} + dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) + n, err := ReadFullToBlocks(r, BlockSeqFromSlice(dsts)) + // ReadFullToBlocks should call into FromIOReader => singleByteReader + // repeatedly until dsts is exhausted. + if wantN := uint64(6); n != wantN || err != nil { + t.Errorf("ReadFullToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + for i, want := range [][]byte{[]byte("foo"), []byte("bar")} { + if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { + t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) + } + } +} + +func TestFromIOWriterFullWrite(t *testing.T) { + srcs := makeBlocks([]byte("foo"), []byte("bar")) + var dst bytes.Buffer + w := FromIOWriter{&dst} + n, err := w.WriteFromBlocks(BlockSeqFromSlice(srcs)) + if wantN := uint64(6); n != wantN || err != nil { + t.Errorf("WriteFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := dst.Bytes(), []byte("foobar"); !bytes.Equal(got, want) { + t.Errorf("dst: got %q, wanted %q", got, want) + } +} + +type limitedWriter struct { + Writer io.Writer + Done int + Limit int +} + +func (w *limitedWriter) Write(src []byte) (int, error) { + count := len(src) + if count > (w.Limit - w.Done) { + count = w.Limit - w.Done + } + n, err := w.Writer.Write(src[:count]) + w.Done += n + return n, err +} + +func TestFromIOWriterPartialWrite(t *testing.T) { + srcs := makeBlocks([]byte("foo"), []byte("bar")) + var dst bytes.Buffer + w := FromIOWriter{&limitedWriter{&dst, 0, 4}} + n, err := w.WriteFromBlocks(BlockSeqFromSlice(srcs)) + // FromIOWriter should stop after the limitedWriter returns (1, nil) for a + // 3-byte write. + if wantN := uint64(4); n != wantN || err != nil { + t.Errorf("WriteFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := dst.Bytes(), []byte("foob"); !bytes.Equal(got, want) { + t.Errorf("dst: got %q, wanted %q", got, want) + } +} + +type singleByteWriter struct { + Writer io.Writer +} + +func (w singleByteWriter) Write(src []byte) (int, error) { + if len(src) == 0 { + return w.Writer.Write(src) + } + return w.Writer.Write(src[:1]) +} + +func TestSingleByteWriter(t *testing.T) { + srcs := makeBlocks([]byte("foo"), []byte("bar")) + var dst bytes.Buffer + w := FromIOWriter{singleByteWriter{&dst}} + n, err := w.WriteFromBlocks(BlockSeqFromSlice(srcs)) + // FromIOWriter should stop after the singleByteWriter returns (1, nil) + // for a 3-byte write. + if wantN := uint64(1); n != wantN || err != nil { + t.Errorf("WriteFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := dst.Bytes(), []byte("f"); !bytes.Equal(got, want) { + t.Errorf("dst: got %q, wanted %q", got, want) + } +} + +func TestWriteFullToBlocks(t *testing.T) { + srcs := makeBlocks([]byte("foo"), []byte("bar")) + var dst bytes.Buffer + w := FromIOWriter{singleByteWriter{&dst}} + n, err := WriteFullFromBlocks(w, BlockSeqFromSlice(srcs)) + // WriteFullToBlocks should call into FromIOWriter => singleByteWriter + // repeatedly until srcs is exhausted. + if wantN := uint64(6); n != wantN || err != nil { + t.Errorf("WriteFullFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := dst.Bytes(), []byte("foobar"); !bytes.Equal(got, want) { + t.Errorf("dst: got %q, wanted %q", got, want) + } +} diff --git a/pkg/safemem/safemem.go b/pkg/safemem/safemem.go new file mode 100644 index 000000000..3e70d33a2 --- /dev/null +++ b/pkg/safemem/safemem.go @@ -0,0 +1,16 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package safemem provides the Block and BlockSeq types. +package safemem diff --git a/pkg/safemem/seq_test.go b/pkg/safemem/seq_test.go new file mode 100644 index 000000000..eba4bb535 --- /dev/null +++ b/pkg/safemem/seq_test.go @@ -0,0 +1,196 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package safemem + +import ( + "bytes" + "reflect" + "testing" +) + +type blockSeqTest struct { + desc string + + pieces []string + haveOffset bool + offset uint64 + haveLimit bool + limit uint64 + + want string +} + +func (t blockSeqTest) NonEmptyByteSlices() [][]byte { + // t is a value, so we can mutate it freely. + slices := make([][]byte, 0, len(t.pieces)) + for _, str := range t.pieces { + if t.haveOffset { + strOff := t.offset + if strOff > uint64(len(str)) { + strOff = uint64(len(str)) + } + str = str[strOff:] + t.offset -= strOff + } + if t.haveLimit { + strLim := t.limit + if strLim > uint64(len(str)) { + strLim = uint64(len(str)) + } + str = str[:strLim] + t.limit -= strLim + } + if len(str) != 0 { + slices = append(slices, []byte(str)) + } + } + return slices +} + +func (t blockSeqTest) BlockSeq() BlockSeq { + blocks := make([]Block, 0, len(t.pieces)) + for _, str := range t.pieces { + blocks = append(blocks, BlockFromSafeSlice([]byte(str))) + } + bs := BlockSeqFromSlice(blocks) + if t.haveOffset { + bs = bs.DropFirst64(t.offset) + } + if t.haveLimit { + bs = bs.TakeFirst64(t.limit) + } + return bs +} + +var blockSeqTests = []blockSeqTest{ + { + desc: "Empty sequence", + }, + { + desc: "Sequence of length 1", + pieces: []string{"foobar"}, + want: "foobar", + }, + { + desc: "Sequence of length 2", + pieces: []string{"foo", "bar"}, + want: "foobar", + }, + { + desc: "Empty Blocks", + pieces: []string{"", "foo", "", "", "bar", ""}, + want: "foobar", + }, + { + desc: "Sequence with non-zero offset", + pieces: []string{"foo", "bar"}, + haveOffset: true, + offset: 2, + want: "obar", + }, + { + desc: "Sequence with non-maximal limit", + pieces: []string{"foo", "bar"}, + haveLimit: true, + limit: 5, + want: "fooba", + }, + { + desc: "Sequence with offset and limit", + pieces: []string{"foo", "bar"}, + haveOffset: true, + offset: 2, + haveLimit: true, + limit: 3, + want: "oba", + }, +} + +func TestBlockSeqNumBytes(t *testing.T) { + for _, test := range blockSeqTests { + t.Run(test.desc, func(t *testing.T) { + if got, want := test.BlockSeq().NumBytes(), uint64(len(test.want)); got != want { + t.Errorf("NumBytes: got %d, wanted %d", got, want) + } + }) + } +} + +func TestBlockSeqIterBlocks(t *testing.T) { + // Tests BlockSeq iteration using Head/Tail. + for _, test := range blockSeqTests { + t.Run(test.desc, func(t *testing.T) { + srcs := test.BlockSeq() + // "Note that a non-nil empty slice and a nil slice ... are not + // deeply equal." - reflect + slices := make([][]byte, 0, 0) + for !srcs.IsEmpty() { + src := srcs.Head() + slices = append(slices, src.ToSlice()) + nextSrcs := srcs.Tail() + if got, want := nextSrcs.NumBytes(), srcs.NumBytes()-uint64(src.Len()); got != want { + t.Fatalf("%v.Tail(): got %v (%d bytes), wanted %d bytes", srcs, nextSrcs, got, want) + } + srcs = nextSrcs + } + if wantSlices := test.NonEmptyByteSlices(); !reflect.DeepEqual(slices, wantSlices) { + t.Errorf("Accumulated slices: got %v, wanted %v", slices, wantSlices) + } + }) + } +} + +func TestBlockSeqIterBytes(t *testing.T) { + // Tests BlockSeq iteration using Head/DropFirst. + for _, test := range blockSeqTests { + t.Run(test.desc, func(t *testing.T) { + srcs := test.BlockSeq() + var dst bytes.Buffer + for !srcs.IsEmpty() { + src := srcs.Head() + var b [1]byte + n, err := Copy(BlockFromSafeSlice(b[:]), src) + if n != 1 || err != nil { + t.Fatalf("Copy: got (%v, %v), wanted (1, nil)", n, err) + } + dst.WriteByte(b[0]) + nextSrcs := srcs.DropFirst(1) + if got, want := nextSrcs.NumBytes(), srcs.NumBytes()-1; got != want { + t.Fatalf("%v.DropFirst(1): got %v (%d bytes), wanted %d bytes", srcs, nextSrcs, got, want) + } + srcs = nextSrcs + } + if got := string(dst.Bytes()); got != test.want { + t.Errorf("Copied string: got %q, wanted %q", got, test.want) + } + }) + } +} + +func TestBlockSeqDropBeyondLimit(t *testing.T) { + blocks := []Block{BlockFromSafeSlice([]byte("123")), BlockFromSafeSlice([]byte("4"))} + bs := BlockSeqFromSlice(blocks) + if got, want := bs.NumBytes(), uint64(4); got != want { + t.Errorf("%v.NumBytes(): got %d, wanted %d", bs, got, want) + } + bs = bs.TakeFirst(1) + if got, want := bs.NumBytes(), uint64(1); got != want { + t.Errorf("%v.NumBytes(): got %d, wanted %d", bs, got, want) + } + bs = bs.DropFirst(2) + if got, want := bs.NumBytes(), uint64(0); got != want { + t.Errorf("%v.NumBytes(): got %d, wanted %d", bs, got, want) + } +} diff --git a/pkg/safemem/seq_unsafe.go b/pkg/safemem/seq_unsafe.go new file mode 100644 index 000000000..354a95dde --- /dev/null +++ b/pkg/safemem/seq_unsafe.go @@ -0,0 +1,299 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package safemem + +import ( + "bytes" + "fmt" + "reflect" + "unsafe" +) + +// A BlockSeq represents a sequence of Blocks, each of which has non-zero +// length. +// +// BlockSeqs are immutable and may be copied by value. The zero value of +// BlockSeq represents an empty sequence. +type BlockSeq struct { + // If length is 0, then the BlockSeq is empty. Invariants: data == 0; + // offset == 0; limit == 0. + // + // If length is -1, then the BlockSeq represents the single Block{data, + // limit, false}. Invariants: offset == 0; limit > 0; limit does not + // overflow the range of an int. + // + // If length is -2, then the BlockSeq represents the single Block{data, + // limit, true}. Invariants: offset == 0; limit > 0; limit does not + // overflow the range of an int. + // + // Otherwise, length >= 2, and the BlockSeq represents the `length` Blocks + // in the array of Blocks starting at address `data`, starting at `offset` + // bytes into the first Block and limited to the following `limit` bytes. + // Invariants: data != 0; offset < len(data[0]); limit > 0; offset+limit <= + // the combined length of all Blocks in the array; the first Block in the + // array has non-zero length. + // + // length is never 1; sequences consisting of a single Block are always + // stored inline (with length < 0). + data unsafe.Pointer + length int + offset int + limit uint64 +} + +// BlockSeqOf returns a BlockSeq representing the single Block b. +func BlockSeqOf(b Block) BlockSeq { + bs := BlockSeq{ + data: b.start, + length: -1, + limit: uint64(b.length), + } + if b.needSafecopy { + bs.length = -2 + } + return bs +} + +// BlockSeqFromSlice returns a BlockSeq representing all Blocks in slice. +// If slice contains Blocks with zero length, BlockSeq will skip them during +// iteration. +// +// Whether the returned BlockSeq shares memory with slice is unspecified; +// clients should avoid mutating slices passed to BlockSeqFromSlice. +// +// Preconditions: The combined length of all Blocks in slice <= math.MaxUint64. +func BlockSeqFromSlice(slice []Block) BlockSeq { + slice = skipEmpty(slice) + var limit uint64 + for _, b := range slice { + sum := limit + uint64(b.Len()) + if sum < limit { + panic("BlockSeq length overflows uint64") + } + limit = sum + } + return blockSeqFromSliceLimited(slice, limit) +} + +// Preconditions: The combined length of all Blocks in slice <= limit. If +// len(slice) != 0, the first Block in slice has non-zero length, and limit > +// 0. +func blockSeqFromSliceLimited(slice []Block, limit uint64) BlockSeq { + switch len(slice) { + case 0: + return BlockSeq{} + case 1: + return BlockSeqOf(slice[0].TakeFirst64(limit)) + default: + return BlockSeq{ + data: unsafe.Pointer(&slice[0]), + length: len(slice), + limit: limit, + } + } +} + +func skipEmpty(slice []Block) []Block { + for i, b := range slice { + if b.Len() != 0 { + return slice[i:] + } + } + return nil +} + +// IsEmpty returns true if bs contains no Blocks. +// +// Invariants: bs.IsEmpty() == (bs.NumBlocks() == 0) == (bs.NumBytes() == 0). +// (Of these, prefer to use bs.IsEmpty().) +func (bs BlockSeq) IsEmpty() bool { + return bs.length == 0 +} + +// NumBlocks returns the number of Blocks in bs. +func (bs BlockSeq) NumBlocks() int { + // In general, we have to count: if bs represents a windowed slice then the + // slice may contain Blocks with zero length, and bs.length may be larger + // than the actual number of Blocks due to bs.limit. + var n int + for !bs.IsEmpty() { + n++ + bs = bs.Tail() + } + return n +} + +// NumBytes returns the sum of Block.Len() for all Blocks in bs. +func (bs BlockSeq) NumBytes() uint64 { + return bs.limit +} + +// Head returns the first Block in bs. +// +// Preconditions: !bs.IsEmpty(). +func (bs BlockSeq) Head() Block { + if bs.length == 0 { + panic("empty BlockSeq") + } + if bs.length < 0 { + return bs.internalBlock() + } + return (*Block)(bs.data).DropFirst(bs.offset).TakeFirst64(bs.limit) +} + +// Preconditions: bs.length < 0. +func (bs BlockSeq) internalBlock() Block { + return Block{ + start: bs.data, + length: int(bs.limit), + needSafecopy: bs.length == -2, + } +} + +// Tail returns a BlockSeq consisting of all Blocks in bs after the first. +// +// Preconditions: !bs.IsEmpty(). +func (bs BlockSeq) Tail() BlockSeq { + if bs.length == 0 { + panic("empty BlockSeq") + } + if bs.length < 0 { + return BlockSeq{} + } + head := (*Block)(bs.data).DropFirst(bs.offset) + headLen := uint64(head.Len()) + if headLen >= bs.limit { + // The head Block exhausts the limit, so the tail is empty. + return BlockSeq{} + } + var extSlice []Block + extSliceHdr := (*reflect.SliceHeader)(unsafe.Pointer(&extSlice)) + extSliceHdr.Data = uintptr(bs.data) + extSliceHdr.Len = bs.length + extSliceHdr.Cap = bs.length + tailSlice := skipEmpty(extSlice[1:]) + tailLimit := bs.limit - headLen + return blockSeqFromSliceLimited(tailSlice, tailLimit) +} + +// DropFirst returns a BlockSeq equivalent to bs, but with the first n bytes +// omitted. If n > bs.NumBytes(), DropFirst returns an empty BlockSeq. +// +// Preconditions: n >= 0. +func (bs BlockSeq) DropFirst(n int) BlockSeq { + if n < 0 { + panic(fmt.Sprintf("invalid n: %d", n)) + } + return bs.DropFirst64(uint64(n)) +} + +// DropFirst64 is equivalent to DropFirst but takes an uint64. +func (bs BlockSeq) DropFirst64(n uint64) BlockSeq { + if n >= bs.limit { + return BlockSeq{} + } + for { + // Calling bs.Head() here is surprisingly expensive, so inline getting + // the head's length. + var headLen uint64 + if bs.length < 0 { + headLen = bs.limit + } else { + headLen = uint64((*Block)(bs.data).Len() - bs.offset) + } + if n < headLen { + // Dropping ends partway through the head Block. + if bs.length < 0 { + return BlockSeqOf(bs.internalBlock().DropFirst64(n)) + } + bs.offset += int(n) + bs.limit -= n + return bs + } + n -= headLen + bs = bs.Tail() + } +} + +// TakeFirst returns a BlockSeq equivalent to the first n bytes of bs. If n > +// bs.NumBytes(), TakeFirst returns a BlockSeq equivalent to bs. +// +// Preconditions: n >= 0. +func (bs BlockSeq) TakeFirst(n int) BlockSeq { + if n < 0 { + panic(fmt.Sprintf("invalid n: %d", n)) + } + return bs.TakeFirst64(uint64(n)) +} + +// TakeFirst64 is equivalent to TakeFirst but takes a uint64. +func (bs BlockSeq) TakeFirst64(n uint64) BlockSeq { + if n == 0 { + return BlockSeq{} + } + if bs.limit > n { + bs.limit = n + } + return bs +} + +// String implements fmt.Stringer.String. +func (bs BlockSeq) String() string { + var buf bytes.Buffer + buf.WriteByte('[') + var sep string + for !bs.IsEmpty() { + buf.WriteString(sep) + sep = " " + buf.WriteString(bs.Head().String()) + bs = bs.Tail() + } + buf.WriteByte(']') + return buf.String() +} + +// CopySeq copies srcs.NumBytes() or dsts.NumBytes() bytes, whichever is less, +// from srcs to dsts and returns the number of bytes copied. +// +// If srcs and dsts overlap, the data stored in dsts is unspecified. +func CopySeq(dsts, srcs BlockSeq) (uint64, error) { + var done uint64 + for !dsts.IsEmpty() && !srcs.IsEmpty() { + dst := dsts.Head() + src := srcs.Head() + n, err := Copy(dst, src) + done += uint64(n) + if err != nil { + return done, err + } + dsts = dsts.DropFirst(n) + srcs = srcs.DropFirst(n) + } + return done, nil +} + +// ZeroSeq sets all bytes in dsts to 0 and returns the number of bytes zeroed. +func ZeroSeq(dsts BlockSeq) (uint64, error) { + var done uint64 + for !dsts.IsEmpty() { + n, err := Zero(dsts.Head()) + done += uint64(n) + if err != nil { + return done, err + } + dsts = dsts.DropFirst(n) + } + return done, nil +} diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD index 51ca09b24..34c0a867d 100644 --- a/pkg/sentry/arch/BUILD +++ b/pkg/sentry/arch/BUILD @@ -30,13 +30,13 @@ go_library( ":registers_go_proto", "//pkg/abi/linux", "//pkg/binary", + "//pkg/context", "//pkg/cpuid", "//pkg/log", - "//pkg/sentry/context", "//pkg/sentry/limits", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go index 81ec98a77..1d11cc472 100644 --- a/pkg/sentry/arch/arch.go +++ b/pkg/sentry/arch/arch.go @@ -24,7 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // Arch describes an architecture. diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go index ea4dedbdf..3b6987665 100644 --- a/pkg/sentry/arch/arch_aarch64.go +++ b/pkg/sentry/arch/arch_aarch64.go @@ -25,8 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go index 2aa08b1a9..85d6acc0f 100644 --- a/pkg/sentry/arch/arch_amd64.go +++ b/pkg/sentry/arch/arch_amd64.go @@ -25,7 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // Host specifies the host architecture. diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go index 0d5b7d317..94f1a808f 100644 --- a/pkg/sentry/arch/arch_arm64.go +++ b/pkg/sentry/arch/arch_arm64.go @@ -21,7 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // Host specifies the host architecture. diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go index 84f11b0d1..d388ee9cf 100644 --- a/pkg/sentry/arch/arch_state_x86.go +++ b/pkg/sentry/arch/arch_state_x86.go @@ -21,7 +21,7 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/cpuid" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // ErrFloatingPoint indicates a failed restore due to unusable floating point diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go index 9f41e566f..a18093155 100644 --- a/pkg/sentry/arch/arch_x86.go +++ b/pkg/sentry/arch/arch_x86.go @@ -25,9 +25,9 @@ import ( "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // System-related constants for x86. diff --git a/pkg/sentry/arch/auxv.go b/pkg/sentry/arch/auxv.go index 4546b2ef9..2b4c8f3fc 100644 --- a/pkg/sentry/arch/auxv.go +++ b/pkg/sentry/arch/auxv.go @@ -15,7 +15,7 @@ package arch import ( - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // An AuxEntry represents an entry in an ELF auxiliary vector. diff --git a/pkg/sentry/arch/signal.go b/pkg/sentry/arch/signal.go index 402e46025..8b03d0187 100644 --- a/pkg/sentry/arch/signal.go +++ b/pkg/sentry/arch/signal.go @@ -16,7 +16,7 @@ package arch import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // SignalAct represents the action that should be taken when a signal is diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index 1e4f9c3c2..81b92bb43 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -23,7 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // SignalContext64 is equivalent to struct sigcontext, the type passed as the diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go index 7d0e98935..4f4cc46a8 100644 --- a/pkg/sentry/arch/signal_arm64.go +++ b/pkg/sentry/arch/signal_arm64.go @@ -19,7 +19,7 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // SignalContext64 is equivalent to struct sigcontext, the type passed as the diff --git a/pkg/sentry/arch/signal_stack.go b/pkg/sentry/arch/signal_stack.go index d324da705..1a6056171 100644 --- a/pkg/sentry/arch/signal_stack.go +++ b/pkg/sentry/arch/signal_stack.go @@ -17,7 +17,7 @@ package arch import ( - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/arch/stack.go b/pkg/sentry/arch/stack.go index 7472c3c61..09bceabc9 100644 --- a/pkg/sentry/arch/stack.go +++ b/pkg/sentry/arch/stack.go @@ -18,8 +18,8 @@ import ( "encoding/binary" "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/usermem" ) // Stack is a simple wrapper around a usermem.IO and an address. diff --git a/pkg/sentry/context/BUILD b/pkg/sentry/context/BUILD deleted file mode 100644 index e13a9ce20..000000000 --- a/pkg/sentry/context/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "context", - srcs = ["context.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/amutex", - "//pkg/log", - ], -) diff --git a/pkg/sentry/context/context.go b/pkg/sentry/context/context.go deleted file mode 100644 index 23e009ef3..000000000 --- a/pkg/sentry/context/context.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package context defines an internal context type. -// -// The given Context conforms to the standard Go context, but mandates -// additional methods that are specific to the kernel internals. Note however, -// that the Context described by this package carries additional constraints -// regarding concurrent access and retaining beyond the scope of a call. -// -// See the Context type for complete details. -package context - -import ( - "context" - "time" - - "gvisor.dev/gvisor/pkg/amutex" - "gvisor.dev/gvisor/pkg/log" -) - -type contextID int - -// Globally accessible values from a context. These keys are defined in the -// context package to resolve dependency cycles by not requiring the caller to -// import packages usually required to get these information. -const ( - // CtxThreadGroupID is the current thread group ID when a context represents - // a task context. The value is represented as an int32. - CtxThreadGroupID contextID = iota -) - -// ThreadGroupIDFromContext returns the current thread group ID when ctx -// represents a task context. -func ThreadGroupIDFromContext(ctx Context) (tgid int32, ok bool) { - if tgid := ctx.Value(CtxThreadGroupID); tgid != nil { - return tgid.(int32), true - } - return 0, false -} - -// A Context represents a thread of execution (hereafter "goroutine" to reflect -// Go idiosyncrasy). It carries state associated with the goroutine across API -// boundaries. -// -// While Context exists for essentially the same reasons as Go's standard -// context.Context, the standard type represents the state of an operation -// rather than that of a goroutine. This is a critical distinction: -// -// - Unlike context.Context, which "may be passed to functions running in -// different goroutines", it is *not safe* to use the same Context in multiple -// concurrent goroutines. -// -// - It is *not safe* to retain a Context passed to a function beyond the scope -// of that function call. -// -// In both cases, values extracted from the Context should be used instead. -type Context interface { - log.Logger - amutex.Sleeper - context.Context - - // UninterruptibleSleepStart indicates the beginning of an uninterruptible - // sleep state (equivalent to Linux's TASK_UNINTERRUPTIBLE). If deactivate - // is true and the Context represents a Task, the Task's AddressSpace is - // deactivated. - UninterruptibleSleepStart(deactivate bool) - - // UninterruptibleSleepFinish indicates the end of an uninterruptible sleep - // state that was begun by a previous call to UninterruptibleSleepStart. If - // activate is true and the Context represents a Task, the Task's - // AddressSpace is activated. Normally activate is the same value as the - // deactivate parameter passed to UninterruptibleSleepStart. - UninterruptibleSleepFinish(activate bool) -} - -// NoopSleeper is a noop implementation of amutex.Sleeper and UninterruptibleSleep -// methods for anonymous embedding in other types that do not implement sleeps. -type NoopSleeper struct { - amutex.NoopSleeper -} - -// UninterruptibleSleepStart does nothing. -func (NoopSleeper) UninterruptibleSleepStart(bool) {} - -// UninterruptibleSleepFinish does nothing. -func (NoopSleeper) UninterruptibleSleepFinish(bool) {} - -// Deadline returns zero values, meaning no deadline. -func (NoopSleeper) Deadline() (time.Time, bool) { - return time.Time{}, false -} - -// Done returns nil. -func (NoopSleeper) Done() <-chan struct{} { - return nil -} - -// Err returns nil. -func (NoopSleeper) Err() error { - return nil -} - -// logContext implements basic logging. -type logContext struct { - log.Logger - NoopSleeper -} - -// Value implements Context.Value. -func (logContext) Value(key interface{}) interface{} { - return nil -} - -// bgContext is the context returned by context.Background. -var bgContext = &logContext{Logger: log.Log()} - -// Background returns an empty context using the default logger. -// -// Users should be wary of using a Background context. Please tag any use with -// FIXME(b/38173783) and a note to remove this use. -// -// Generally, one should use the Task as their context when available, or avoid -// having to use a context in places where a Task is unavailable. -// -// Using a Background context for tests is fine, as long as no values are -// needed from the context in the tested code paths. -func Background() Context { - return bgContext -} diff --git a/pkg/sentry/context/contexttest/BUILD b/pkg/sentry/context/contexttest/BUILD deleted file mode 100644 index f91a6d4ed..000000000 --- a/pkg/sentry/context/contexttest/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "contexttest", - testonly = 1, - srcs = ["contexttest.go"], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/memutil", - "//pkg/sentry/context", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/platform/ptrace", - "//pkg/sentry/uniqueid", - ], -) diff --git a/pkg/sentry/context/contexttest/contexttest.go b/pkg/sentry/context/contexttest/contexttest.go deleted file mode 100644 index 15cf086a9..000000000 --- a/pkg/sentry/context/contexttest/contexttest.go +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package contexttest builds a test context.Context. -package contexttest - -import ( - "os" - "sync/atomic" - "testing" - "time" - - "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" - "gvisor.dev/gvisor/pkg/sentry/uniqueid" -) - -// Context returns a Context that may be used in tests. Uses ptrace as the -// platform.Platform. -// -// Note that some filesystems may require a minimal kernel for testing, which -// this test context does not provide. For such tests, see kernel/contexttest. -func Context(tb testing.TB) context.Context { - const memfileName = "contexttest-memory" - memfd, err := memutil.CreateMemFD(memfileName, 0) - if err != nil { - tb.Fatalf("error creating application memory file: %v", err) - } - memfile := os.NewFile(uintptr(memfd), memfileName) - mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{}) - if err != nil { - memfile.Close() - tb.Fatalf("error creating pgalloc.MemoryFile: %v", err) - } - p, err := ptrace.New() - if err != nil { - tb.Fatal(err) - } - // Test usage of context.Background is fine. - return &TestContext{ - Context: context.Background(), - l: limits.NewLimitSet(), - mf: mf, - platform: p, - creds: auth.NewAnonymousCredentials(), - otherValues: make(map[interface{}]interface{}), - } -} - -// TestContext represents a context with minimal functionality suitable for -// running tests. -type TestContext struct { - context.Context - l *limits.LimitSet - mf *pgalloc.MemoryFile - platform platform.Platform - creds *auth.Credentials - otherValues map[interface{}]interface{} -} - -// globalUniqueID tracks incremental unique identifiers for tests. -var globalUniqueID uint64 - -// globalUniqueIDProvider implements unix.UniqueIDProvider. -type globalUniqueIDProvider struct{} - -// UniqueID implements unix.UniqueIDProvider.UniqueID. -func (*globalUniqueIDProvider) UniqueID() uint64 { - return atomic.AddUint64(&globalUniqueID, 1) -} - -// lastInotifyCookie is a monotonically increasing counter for generating unique -// inotify cookies. Must be accessed using atomic ops. -var lastInotifyCookie uint32 - -// hostClock implements ktime.Clock. -type hostClock struct { - ktime.WallRateClock - ktime.NoClockEvents -} - -// Now implements ktime.Clock.Now. -func (hostClock) Now() ktime.Time { - return ktime.FromNanoseconds(time.Now().UnixNano()) -} - -// RegisterValue registers additional values with this test context. Useful for -// providing values from external packages that contexttest can't depend on. -func (t *TestContext) RegisterValue(key, value interface{}) { - t.otherValues[key] = value -} - -// Value implements context.Context. -func (t *TestContext) Value(key interface{}) interface{} { - switch key { - case auth.CtxCredentials: - return t.creds - case limits.CtxLimits: - return t.l - case pgalloc.CtxMemoryFile: - return t.mf - case pgalloc.CtxMemoryFileProvider: - return t - case platform.CtxPlatform: - return t.platform - case uniqueid.CtxGlobalUniqueID: - return (*globalUniqueIDProvider).UniqueID(nil) - case uniqueid.CtxGlobalUniqueIDProvider: - return &globalUniqueIDProvider{} - case uniqueid.CtxInotifyCookie: - return atomic.AddUint32(&lastInotifyCookie, 1) - case ktime.CtxRealtimeClock: - return hostClock{} - default: - if val, ok := t.otherValues[key]; ok { - return val - } - return t.Context.Value(key) - } -} - -// MemoryFile implements pgalloc.MemoryFileProvider.MemoryFile. -func (t *TestContext) MemoryFile() *pgalloc.MemoryFile { - return t.mf -} - -// RootContext returns a Context that may be used in tests that need root -// credentials. Uses ptrace as the platform.Platform. -func RootContext(tb testing.TB) context.Context { - return WithCreds(Context(tb), auth.NewRootCredentials(auth.NewRootUserNamespace())) -} - -// WithCreds returns a copy of ctx carrying creds. -func WithCreds(ctx context.Context, creds *auth.Credentials) context.Context { - return &authContext{ctx, creds} -} - -type authContext struct { - context.Context - creds *auth.Credentials -} - -// Value implements context.Context. -func (ac *authContext) Value(key interface{}) interface{} { - switch key { - case auth.CtxCredentials: - return ac.creds - default: - return ac.Context.Value(key) - } -} - -// WithLimitSet returns a copy of ctx carrying l. -func WithLimitSet(ctx context.Context, l *limits.LimitSet) context.Context { - return limitContext{ctx, l} -} - -type limitContext struct { - context.Context - l *limits.LimitSet -} - -// Value implements context.Context. -func (lc limitContext) Value(key interface{}) interface{} { - switch key { - case limits.CtxLimits: - return lc.l - default: - return lc.Context.Value(key) - } -} diff --git a/pkg/sentry/contexttest/BUILD b/pkg/sentry/contexttest/BUILD new file mode 100644 index 000000000..6f4c86684 --- /dev/null +++ b/pkg/sentry/contexttest/BUILD @@ -0,0 +1,21 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "contexttest", + testonly = 1, + srcs = ["contexttest.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/context", + "//pkg/memutil", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/time", + "//pkg/sentry/limits", + "//pkg/sentry/pgalloc", + "//pkg/sentry/platform", + "//pkg/sentry/platform/ptrace", + "//pkg/sentry/uniqueid", + ], +) diff --git a/pkg/sentry/contexttest/contexttest.go b/pkg/sentry/contexttest/contexttest.go new file mode 100644 index 000000000..031fc64ec --- /dev/null +++ b/pkg/sentry/contexttest/contexttest.go @@ -0,0 +1,188 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package contexttest builds a test context.Context. +package contexttest + +import ( + "os" + "sync/atomic" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/memutil" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" +) + +// Context returns a Context that may be used in tests. Uses ptrace as the +// platform.Platform. +// +// Note that some filesystems may require a minimal kernel for testing, which +// this test context does not provide. For such tests, see kernel/contexttest. +func Context(tb testing.TB) context.Context { + const memfileName = "contexttest-memory" + memfd, err := memutil.CreateMemFD(memfileName, 0) + if err != nil { + tb.Fatalf("error creating application memory file: %v", err) + } + memfile := os.NewFile(uintptr(memfd), memfileName) + mf, err := pgalloc.NewMemoryFile(memfile, pgalloc.MemoryFileOpts{}) + if err != nil { + memfile.Close() + tb.Fatalf("error creating pgalloc.MemoryFile: %v", err) + } + p, err := ptrace.New() + if err != nil { + tb.Fatal(err) + } + // Test usage of context.Background is fine. + return &TestContext{ + Context: context.Background(), + l: limits.NewLimitSet(), + mf: mf, + platform: p, + creds: auth.NewAnonymousCredentials(), + otherValues: make(map[interface{}]interface{}), + } +} + +// TestContext represents a context with minimal functionality suitable for +// running tests. +type TestContext struct { + context.Context + l *limits.LimitSet + mf *pgalloc.MemoryFile + platform platform.Platform + creds *auth.Credentials + otherValues map[interface{}]interface{} +} + +// globalUniqueID tracks incremental unique identifiers for tests. +var globalUniqueID uint64 + +// globalUniqueIDProvider implements unix.UniqueIDProvider. +type globalUniqueIDProvider struct{} + +// UniqueID implements unix.UniqueIDProvider.UniqueID. +func (*globalUniqueIDProvider) UniqueID() uint64 { + return atomic.AddUint64(&globalUniqueID, 1) +} + +// lastInotifyCookie is a monotonically increasing counter for generating unique +// inotify cookies. Must be accessed using atomic ops. +var lastInotifyCookie uint32 + +// hostClock implements ktime.Clock. +type hostClock struct { + ktime.WallRateClock + ktime.NoClockEvents +} + +// Now implements ktime.Clock.Now. +func (hostClock) Now() ktime.Time { + return ktime.FromNanoseconds(time.Now().UnixNano()) +} + +// RegisterValue registers additional values with this test context. Useful for +// providing values from external packages that contexttest can't depend on. +func (t *TestContext) RegisterValue(key, value interface{}) { + t.otherValues[key] = value +} + +// Value implements context.Context. +func (t *TestContext) Value(key interface{}) interface{} { + switch key { + case auth.CtxCredentials: + return t.creds + case limits.CtxLimits: + return t.l + case pgalloc.CtxMemoryFile: + return t.mf + case pgalloc.CtxMemoryFileProvider: + return t + case platform.CtxPlatform: + return t.platform + case uniqueid.CtxGlobalUniqueID: + return (*globalUniqueIDProvider).UniqueID(nil) + case uniqueid.CtxGlobalUniqueIDProvider: + return &globalUniqueIDProvider{} + case uniqueid.CtxInotifyCookie: + return atomic.AddUint32(&lastInotifyCookie, 1) + case ktime.CtxRealtimeClock: + return hostClock{} + default: + if val, ok := t.otherValues[key]; ok { + return val + } + return t.Context.Value(key) + } +} + +// MemoryFile implements pgalloc.MemoryFileProvider.MemoryFile. +func (t *TestContext) MemoryFile() *pgalloc.MemoryFile { + return t.mf +} + +// RootContext returns a Context that may be used in tests that need root +// credentials. Uses ptrace as the platform.Platform. +func RootContext(tb testing.TB) context.Context { + return WithCreds(Context(tb), auth.NewRootCredentials(auth.NewRootUserNamespace())) +} + +// WithCreds returns a copy of ctx carrying creds. +func WithCreds(ctx context.Context, creds *auth.Credentials) context.Context { + return &authContext{ctx, creds} +} + +type authContext struct { + context.Context + creds *auth.Credentials +} + +// Value implements context.Context. +func (ac *authContext) Value(key interface{}) interface{} { + switch key { + case auth.CtxCredentials: + return ac.creds + default: + return ac.Context.Value(key) + } +} + +// WithLimitSet returns a copy of ctx carrying l. +func WithLimitSet(ctx context.Context, l *limits.LimitSet) context.Context { + return limitContext{ctx, l} +} + +type limitContext struct { + context.Context + l *limits.LimitSet +} + +// Value implements context.Context. +func (lc limitContext) Value(key interface{}) interface{} { + switch key { + case limits.CtxLimits: + return lc.l + default: + return lc.Context.Value(key) + } +} diff --git a/pkg/sentry/fs/BUILD b/pkg/sentry/fs/BUILD index 605d61dbe..ea85ab33c 100644 --- a/pkg/sentry/fs/BUILD +++ b/pkg/sentry/fs/BUILD @@ -47,13 +47,13 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/amutex", + "//pkg/context", "//pkg/log", "//pkg/metric", "//pkg/p9", "//pkg/refs", "//pkg/secio", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", @@ -64,10 +64,10 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/uniqueid", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/state", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -107,14 +107,14 @@ go_test( ], deps = [ ":fs", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/ramfs", "//pkg/sentry/fs/tmpfs", "//pkg/sentry/kernel/contexttest", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) @@ -129,7 +129,7 @@ go_test( ], library = ":fs", deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/context", + "//pkg/sentry/contexttest", ], ) diff --git a/pkg/sentry/fs/anon/BUILD b/pkg/sentry/fs/anon/BUILD index c14e5405e..aedcecfa1 100644 --- a/pkg/sentry/fs/anon/BUILD +++ b/pkg/sentry/fs/anon/BUILD @@ -11,10 +11,10 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/anon/anon.go b/pkg/sentry/fs/anon/anon.go index 7323c7222..5c421f5fb 100644 --- a/pkg/sentry/fs/anon/anon.go +++ b/pkg/sentry/fs/anon/anon.go @@ -18,10 +18,10 @@ package anon import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // NewInode constructs an anonymous Inode that is not associated diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go index 4f3d6410e..fa9e7d517 100644 --- a/pkg/sentry/fs/attr.go +++ b/pkg/sentry/fs/attr.go @@ -20,8 +20,8 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" ) diff --git a/pkg/sentry/fs/context.go b/pkg/sentry/fs/context.go index dd427de5d..0fbd60056 100644 --- a/pkg/sentry/fs/context.go +++ b/pkg/sentry/fs/context.go @@ -16,7 +16,7 @@ package fs import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index e03e3e417..f6c79e51b 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -19,12 +19,12 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // copyUp copies a file in an overlay from a lower filesystem to an diff --git a/pkg/sentry/fs/copy_up_test.go b/pkg/sentry/fs/copy_up_test.go index 738580c5f..91792d9fe 100644 --- a/pkg/sentry/fs/copy_up_test.go +++ b/pkg/sentry/fs/copy_up_test.go @@ -24,8 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index 0c7247bd7..4c4b7d5cc 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -16,8 +16,9 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/rand", - "//pkg/sentry/context", + "//pkg/safemem", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", @@ -26,9 +27,8 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/mm", "//pkg/sentry/pgalloc", - "//pkg/sentry/safemem", - "//pkg/sentry/usermem", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go index f739c476c..35bd23991 100644 --- a/pkg/sentry/fs/dev/dev.go +++ b/pkg/sentry/fs/dev/dev.go @@ -18,11 +18,11 @@ package dev import ( "math" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // Memory device numbers are from Linux's drivers/char/mem.c diff --git a/pkg/sentry/fs/dev/fs.go b/pkg/sentry/fs/dev/fs.go index 55f8af704..5e518fb63 100644 --- a/pkg/sentry/fs/dev/fs.go +++ b/pkg/sentry/fs/dev/fs.go @@ -15,7 +15,7 @@ package dev import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/dev/full.go b/pkg/sentry/fs/dev/full.go index 07e0ea010..deb9c6ad8 100644 --- a/pkg/sentry/fs/dev/full.go +++ b/pkg/sentry/fs/dev/full.go @@ -16,11 +16,11 @@ package dev import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/dev/null.go b/pkg/sentry/fs/dev/null.go index 4404b97ef..aec33d0d9 100644 --- a/pkg/sentry/fs/dev/null.go +++ b/pkg/sentry/fs/dev/null.go @@ -16,7 +16,7 @@ package dev import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" diff --git a/pkg/sentry/fs/dev/random.go b/pkg/sentry/fs/dev/random.go index 49cb92f6e..2a9bbeb18 100644 --- a/pkg/sentry/fs/dev/random.go +++ b/pkg/sentry/fs/dev/random.go @@ -16,12 +16,12 @@ package dev import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/rand" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/dev/tty.go b/pkg/sentry/fs/dev/tty.go index 87d80e292..760ca563d 100644 --- a/pkg/sentry/fs/dev/tty.go +++ b/pkg/sentry/fs/dev/tty.go @@ -16,7 +16,7 @@ package dev import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/waiter" diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index 31fc4d87b..acab0411a 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -22,8 +22,8 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/uniqueid" diff --git a/pkg/sentry/fs/dirent_refs_test.go b/pkg/sentry/fs/dirent_refs_test.go index 47bc72a88..98d69c6f2 100644 --- a/pkg/sentry/fs/dirent_refs_test.go +++ b/pkg/sentry/fs/dirent_refs_test.go @@ -18,8 +18,8 @@ import ( "syscall" "testing" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" ) func newMockDirInode(ctx context.Context, cache *DirentCache) *Inode { diff --git a/pkg/sentry/fs/fdpipe/BUILD b/pkg/sentry/fs/fdpipe/BUILD index 25ef96299..1d09e983c 100644 --- a/pkg/sentry/fs/fdpipe/BUILD +++ b/pkg/sentry/fs/fdpipe/BUILD @@ -12,17 +12,17 @@ go_library( imports = ["gvisor.dev/gvisor/pkg/sentry/fs"], visibility = ["//pkg/sentry:internal"], deps = [ + "//pkg/context", "//pkg/fd", "//pkg/fdnotifier", "//pkg/log", + "//pkg/safemem", "//pkg/secio", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", - "//pkg/sentry/safemem", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -36,13 +36,13 @@ go_test( ], library = ":fdpipe", deps = [ + "//pkg/context", "//pkg/fd", "//pkg/fdnotifier", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", - "//pkg/sentry/usermem", "//pkg/syserror", + "//pkg/usermem", "@com_github_google_uuid//:go_default_library", ], ) diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go index 5b6cfeb0a..9fce177ad 100644 --- a/pkg/sentry/fs/fdpipe/pipe.go +++ b/pkg/sentry/fs/fdpipe/pipe.go @@ -19,17 +19,17 @@ import ( "os" "syscall" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/secio" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/fdpipe/pipe_opener.go b/pkg/sentry/fs/fdpipe/pipe_opener.go index 64b558975..0c3595998 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener.go @@ -20,8 +20,8 @@ import ( "syscall" "time" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go index 577445148..e556da48a 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go @@ -26,12 +26,12 @@ import ( "github.com/google/uuid" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) type hostOpener struct { diff --git a/pkg/sentry/fs/fdpipe/pipe_state.go b/pkg/sentry/fs/fdpipe/pipe_state.go index cee87f726..af8230a7d 100644 --- a/pkg/sentry/fs/fdpipe/pipe_state.go +++ b/pkg/sentry/fs/fdpipe/pipe_state.go @@ -18,7 +18,7 @@ import ( "fmt" "io/ioutil" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sync" ) diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go index 69abc1e71..5aff0cc95 100644 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_test.go @@ -23,10 +23,10 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) func singlePipeFD() (int, error) { diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index 7c4586296..ca3466f4f 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -20,16 +20,16 @@ import ( "time" "gvisor.dev/gvisor/pkg/amutex" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/uniqueid" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go index b88303f17..beba0f771 100644 --- a/pkg/sentry/fs/file_operations.go +++ b/pkg/sentry/fs/file_operations.go @@ -17,10 +17,10 @@ package fs import ( "io" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go index 8991207b4..dcc1df38f 100644 --- a/pkg/sentry/fs/file_overlay.go +++ b/pkg/sentry/fs/file_overlay.go @@ -17,13 +17,13 @@ package fs import ( "io" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go index 2fb824d5c..02538bb4f 100644 --- a/pkg/sentry/fs/file_overlay_test.go +++ b/pkg/sentry/fs/file_overlay_test.go @@ -18,7 +18,7 @@ import ( "reflect" "testing" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" diff --git a/pkg/sentry/fs/filesystems.go b/pkg/sentry/fs/filesystems.go index c5b51620a..084da2a8d 100644 --- a/pkg/sentry/fs/filesystems.go +++ b/pkg/sentry/fs/filesystems.go @@ -19,7 +19,7 @@ import ( "sort" "strings" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" ) diff --git a/pkg/sentry/fs/filetest/BUILD b/pkg/sentry/fs/filetest/BUILD index 9a7608cae..a8000e010 100644 --- a/pkg/sentry/fs/filetest/BUILD +++ b/pkg/sentry/fs/filetest/BUILD @@ -8,12 +8,12 @@ go_library( srcs = ["filetest.go"], visibility = ["//pkg/sentry:internal"], deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/context", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", - "//pkg/sentry/usermem", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/fs/filetest/filetest.go b/pkg/sentry/fs/filetest/filetest.go index 22270a494..8049538f2 100644 --- a/pkg/sentry/fs/filetest/filetest.go +++ b/pkg/sentry/fs/filetest/filetest.go @@ -19,12 +19,12 @@ import ( "fmt" "testing" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go index 26abf49e2..bdba6efe5 100644 --- a/pkg/sentry/fs/fs.go +++ b/pkg/sentry/fs/fs.go @@ -54,8 +54,8 @@ package fs import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sync" ) diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 9142f5bdf..4ab2a384f 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -77,22 +77,22 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/log", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/sentry/safemem", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/state", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -106,13 +106,13 @@ go_test( ], library = ":fsutil", deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/context", + "//pkg/safemem", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", - "//pkg/sentry/safemem", - "//pkg/sentry/usermem", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go index 12132680b..c6cd45087 100644 --- a/pkg/sentry/fs/fsutil/dirty_set.go +++ b/pkg/sentry/fs/fsutil/dirty_set.go @@ -17,11 +17,11 @@ package fsutil import ( "math" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // DirtySet maps offsets into a memmap.Mappable to DirtyInfo. It is used to diff --git a/pkg/sentry/fs/fsutil/dirty_set_test.go b/pkg/sentry/fs/fsutil/dirty_set_test.go index 75575d994..e3579c23c 100644 --- a/pkg/sentry/fs/fsutil/dirty_set_test.go +++ b/pkg/sentry/fs/fsutil/dirty_set_test.go @@ -19,7 +19,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) func TestDirtySet(t *testing.T) { diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go index fc5b3b1a1..08695391c 100644 --- a/pkg/sentry/fs/fsutil/file.go +++ b/pkg/sentry/fs/fsutil/file.go @@ -17,12 +17,12 @@ package fsutil import ( "io" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go index f52d712e3..5643cdac9 100644 --- a/pkg/sentry/fs/fsutil/file_range_set.go +++ b/pkg/sentry/fs/fsutil/file_range_set.go @@ -19,13 +19,13 @@ import ( "io" "math" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // FileRangeSet maps offsets into a memmap.Mappable to offsets into a diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go index 837fc70b5..67278aa86 100644 --- a/pkg/sentry/fs/fsutil/host_file_mapper.go +++ b/pkg/sentry/fs/fsutil/host_file_mapper.go @@ -19,11 +19,11 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // HostFileMapper caches mappings of an arbitrary host file descriptor. It is diff --git a/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go index ad11a0573..2d4778d64 100644 --- a/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go +++ b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go @@ -17,7 +17,7 @@ package fsutil import ( "unsafe" - "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/safemem" ) func (*HostFileMapper) unsafeBlockFromChunkMapping(addr uintptr) safemem.Block { diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go index a625f0e26..78fec553e 100644 --- a/pkg/sentry/fs/fsutil/host_mappable.go +++ b/pkg/sentry/fs/fsutil/host_mappable.go @@ -17,13 +17,13 @@ package fsutil import ( "math" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // HostMappable implements memmap.Mappable and platform.File over a diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index df7b74855..252830572 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -16,7 +16,7 @@ package fsutil import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 20a014402..573b8586e 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -18,18 +18,18 @@ import ( "fmt" "io" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/time" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // Lock order (compare the lock order model in mm/mm.go): diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go index 129f314c8..1547584c5 100644 --- a/pkg/sentry/fs/fsutil/inode_cached_test.go +++ b/pkg/sentry/fs/fsutil/inode_cached_test.go @@ -19,14 +19,14 @@ import ( "io" "testing" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) type noopBackingFile struct{} diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index cf48e7c03..971d3718e 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -24,13 +24,14 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/fd", "//pkg/log", "//pkg/metric", "//pkg/p9", "//pkg/refs", + "//pkg/safemem", "//pkg/secio", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fdpipe", @@ -39,13 +40,12 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", - "//pkg/sentry/safemem", "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/unet", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -56,10 +56,10 @@ go_test( srcs = ["gofer_test.go"], library = ":gofer", deps = [ + "//pkg/context", "//pkg/p9", "//pkg/p9/p9test", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", ], ) diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go index 4848e2374..71cccdc34 100644 --- a/pkg/sentry/fs/gofer/attr.go +++ b/pkg/sentry/fs/gofer/attr.go @@ -17,12 +17,12 @@ package gofer import ( "syscall" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // getattr returns the 9p attributes of the p9.File. On success, Mode, Size, and RDev diff --git a/pkg/sentry/fs/gofer/cache_policy.go b/pkg/sentry/fs/gofer/cache_policy.go index cc11c6339..ebea03c42 100644 --- a/pkg/sentry/fs/gofer/cache_policy.go +++ b/pkg/sentry/fs/gofer/cache_policy.go @@ -17,7 +17,7 @@ package gofer import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go index 2125dafef..3da818aed 100644 --- a/pkg/sentry/fs/gofer/context_file.go +++ b/pkg/sentry/fs/gofer/context_file.go @@ -15,9 +15,9 @@ package gofer import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" ) // contextFile is a wrapper around p9.File that notifies the context that diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go index 7960b9c7b..23296f246 100644 --- a/pkg/sentry/fs/gofer/file.go +++ b/pkg/sentry/fs/gofer/file.go @@ -19,16 +19,16 @@ import ( "syscall" "time" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/gofer/file_state.go b/pkg/sentry/fs/gofer/file_state.go index bb8312849..ff96b28ba 100644 --- a/pkg/sentry/fs/gofer/file_state.go +++ b/pkg/sentry/fs/gofer/file_state.go @@ -17,7 +17,7 @@ package gofer import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go index cf96dd9fa..9d41fcbdb 100644 --- a/pkg/sentry/fs/gofer/fs.go +++ b/pkg/sentry/fs/gofer/fs.go @@ -20,8 +20,8 @@ import ( "fmt" "strconv" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 7fc3c32ae..0c2f89ae8 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -20,10 +20,10 @@ import ( "testing" "time" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/p9/p9test" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/gofer/handles.go b/pkg/sentry/fs/gofer/handles.go index b86c49b39..9f7c3e89f 100644 --- a/pkg/sentry/fs/gofer/handles.go +++ b/pkg/sentry/fs/gofer/handles.go @@ -17,14 +17,14 @@ package gofer import ( "io" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/secio" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/safemem" ) // handles are the open handles of a gofer file. They are reference counted to diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 98d1a8a48..ac28174d2 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -19,17 +19,17 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fdpipe" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/host" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go index 0b2eedb7c..238f7804c 100644 --- a/pkg/sentry/fs/gofer/inode_state.go +++ b/pkg/sentry/fs/gofer/inode_state.go @@ -20,8 +20,8 @@ import ( "path/filepath" "strings" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/time" diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index c09f3b71c..0c1be05ef 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -18,9 +18,9 @@ import ( "fmt" "syscall" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index edc796ce0..498c4645a 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -17,9 +17,9 @@ package gofer import ( "fmt" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index d045e04ff..0285c5361 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -17,8 +17,8 @@ package gofer import ( "fmt" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/unet" ) diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index a45a8f36c..376cfce2c 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -16,9 +16,9 @@ package gofer import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" diff --git a/pkg/sentry/fs/gofer/util.go b/pkg/sentry/fs/gofer/util.go index 848e6812b..2d8d3a2ea 100644 --- a/pkg/sentry/fs/gofer/util.go +++ b/pkg/sentry/fs/gofer/util.go @@ -17,8 +17,8 @@ package gofer import ( "syscall" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index f586f47c1..21003ea45 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -27,13 +27,14 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/fd", "//pkg/fdnotifier", "//pkg/log", "//pkg/refs", + "//pkg/safemem", "//pkg/secio", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", @@ -41,18 +42,17 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", - "//pkg/sentry/safemem", "//pkg/sentry/socket/control", "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/unimpl", "//pkg/sentry/uniqueid", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", "//pkg/unet", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -69,17 +69,17 @@ go_test( ], library = ":host", deps = [ + "//pkg/context", "//pkg/fd", "//pkg/fdnotifier", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/kernel/time", "//pkg/sentry/socket", "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", "//pkg/syserr", "//pkg/tcpip", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go index 5532ff5a0..1658979fc 100644 --- a/pkg/sentry/fs/host/control.go +++ b/pkg/sentry/fs/host/control.go @@ -17,7 +17,7 @@ package host import ( "syscall" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/socket/control" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go index f6c626f2c..e08f56d04 100644 --- a/pkg/sentry/fs/host/file.go +++ b/pkg/sentry/fs/host/file.go @@ -18,17 +18,17 @@ import ( "fmt" "syscall" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/secio" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/host/fs.go b/pkg/sentry/fs/host/fs.go index 68d2697c0..d3e8e3a36 100644 --- a/pkg/sentry/fs/host/fs.go +++ b/pkg/sentry/fs/host/fs.go @@ -23,8 +23,8 @@ import ( "strconv" "strings" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/host/fs_test.go b/pkg/sentry/fs/host/fs_test.go index c6852ee30..3111d2df9 100644 --- a/pkg/sentry/fs/host/fs_test.go +++ b/pkg/sentry/fs/host/fs_test.go @@ -23,8 +23,8 @@ import ( "sort" "testing" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 873a1c52d..6fa39caab 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -18,14 +18,14 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/secio" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" diff --git a/pkg/sentry/fs/host/inode_state.go b/pkg/sentry/fs/host/inode_state.go index b267ec305..299e0e0b0 100644 --- a/pkg/sentry/fs/host/inode_state.go +++ b/pkg/sentry/fs/host/inode_state.go @@ -18,7 +18,7 @@ import ( "fmt" "syscall" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go index 2d959f10d..7221bc825 100644 --- a/pkg/sentry/fs/host/inode_test.go +++ b/pkg/sentry/fs/host/inode_test.go @@ -21,7 +21,7 @@ import ( "syscall" "testing" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index c076d5bdd..06fc2d80a 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -19,11 +19,11 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/socket/control" unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix" diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go index 68b38fd1c..eb4afe520 100644 --- a/pkg/sentry/fs/host/socket_test.go +++ b/pkg/sentry/fs/host/socket_test.go @@ -21,13 +21,13 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 753ef8cd6..3f218b4a7 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -16,14 +16,14 @@ package host import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/unimpl" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // TTYFileOperations implements fs.FileOperations for a host file descriptor diff --git a/pkg/sentry/fs/host/wait_test.go b/pkg/sentry/fs/host/wait_test.go index 88d24d693..d49c3a635 100644 --- a/pkg/sentry/fs/host/wait_test.go +++ b/pkg/sentry/fs/host/wait_test.go @@ -19,7 +19,7 @@ import ( "testing" "time" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index e4cf5a570..b66c091ab 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -16,10 +16,10 @@ package fs import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index 13261cb81..70f2eae96 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -17,7 +17,7 @@ package fs import ( "errors" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index c477de837..4729b4aac 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -19,8 +19,8 @@ import ( "strings" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go index 493d98c36..389c219d6 100644 --- a/pkg/sentry/fs/inode_overlay_test.go +++ b/pkg/sentry/fs/inode_overlay_test.go @@ -17,7 +17,7 @@ package fs_test import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index cc7dd1c92..928c90aa0 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -19,13 +19,13 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/uniqueid" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/inotify_event.go b/pkg/sentry/fs/inotify_event.go index 9f70a3e82..686e1b1cd 100644 --- a/pkg/sentry/fs/inotify_event.go +++ b/pkg/sentry/fs/inotify_event.go @@ -18,8 +18,8 @@ import ( "bytes" "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/usermem" ) // inotifyEventBaseSize is the base size of linux's struct inotify_event. This diff --git a/pkg/sentry/fs/mock.go b/pkg/sentry/fs/mock.go index 7a24c6f1b..1d6ea5736 100644 --- a/pkg/sentry/fs/mock.go +++ b/pkg/sentry/fs/mock.go @@ -15,7 +15,7 @@ package fs import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go index 7a9692800..37bae6810 100644 --- a/pkg/sentry/fs/mount.go +++ b/pkg/sentry/fs/mount.go @@ -19,8 +19,8 @@ import ( "fmt" "sync/atomic" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" ) // DirentOperations provide file systems greater control over how long a Dirent diff --git a/pkg/sentry/fs/mount_overlay.go b/pkg/sentry/fs/mount_overlay.go index 299712cd7..78e35b1e6 100644 --- a/pkg/sentry/fs/mount_overlay.go +++ b/pkg/sentry/fs/mount_overlay.go @@ -15,7 +15,7 @@ package fs import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // overlayMountSourceOperations implements MountSourceOperations for an overlay diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go index 0b84732aa..e672a438c 100644 --- a/pkg/sentry/fs/mount_test.go +++ b/pkg/sentry/fs/mount_test.go @@ -18,7 +18,7 @@ import ( "fmt" "testing" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" ) // cacheReallyContains iterates through the dirent cache to determine whether diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index a9627a9d1..574a2cc91 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -22,9 +22,9 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" diff --git a/pkg/sentry/fs/mounts_test.go b/pkg/sentry/fs/mounts_test.go index c4c771f2c..a69b41468 100644 --- a/pkg/sentry/fs/mounts_test.go +++ b/pkg/sentry/fs/mounts_test.go @@ -17,7 +17,7 @@ package fs_test import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" diff --git a/pkg/sentry/fs/offset.go b/pkg/sentry/fs/offset.go index f7d844ce7..53b5df175 100644 --- a/pkg/sentry/fs/offset.go +++ b/pkg/sentry/fs/offset.go @@ -17,7 +17,7 @@ package fs import ( "math" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // OffsetPageEnd returns the file offset rounded up to the nearest diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index f7702f8f4..a8ae7d81d 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -18,12 +18,12 @@ import ( "fmt" "strings" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // The virtual filesystem implements an overlay configuration. For a high-level diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index b06bead41..280093c5e 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -29,8 +29,8 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/log", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/proc/device", @@ -46,10 +46,10 @@ go_library( "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", "//pkg/tcpip/header", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -64,8 +64,8 @@ go_test( library = ":proc", deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/inet", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/proc/cgroup.go b/pkg/sentry/fs/proc/cgroup.go index c4abe319d..7c1d9e7e9 100644 --- a/pkg/sentry/fs/proc/cgroup.go +++ b/pkg/sentry/fs/proc/cgroup.go @@ -17,7 +17,7 @@ package proc import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/proc/cpuinfo.go b/pkg/sentry/fs/proc/cpuinfo.go index df0c4e3a7..c96533401 100644 --- a/pkg/sentry/fs/proc/cpuinfo.go +++ b/pkg/sentry/fs/proc/cpuinfo.go @@ -17,7 +17,7 @@ package proc import ( "bytes" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" ) diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go index 9aaeb780b..8fe626e1c 100644 --- a/pkg/sentry/fs/proc/exec_args.go +++ b/pkg/sentry/fs/proc/exec_args.go @@ -20,12 +20,12 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go index 2fa3cfa7d..35972e23c 100644 --- a/pkg/sentry/fs/proc/fds.go +++ b/pkg/sentry/fs/proc/fds.go @@ -19,7 +19,7 @@ import ( "sort" "strconv" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" diff --git a/pkg/sentry/fs/proc/filesystems.go b/pkg/sentry/fs/proc/filesystems.go index 7b3b974ab..0a58ac34c 100644 --- a/pkg/sentry/fs/proc/filesystems.go +++ b/pkg/sentry/fs/proc/filesystems.go @@ -18,7 +18,7 @@ import ( "bytes" "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" ) diff --git a/pkg/sentry/fs/proc/fs.go b/pkg/sentry/fs/proc/fs.go index 761d24462..daf1ba781 100644 --- a/pkg/sentry/fs/proc/fs.go +++ b/pkg/sentry/fs/proc/fs.go @@ -17,7 +17,7 @@ package proc import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go index 723f6b661..d2859a4c2 100644 --- a/pkg/sentry/fs/proc/inode.go +++ b/pkg/sentry/fs/proc/inode.go @@ -16,14 +16,14 @@ package proc import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange diff --git a/pkg/sentry/fs/proc/loadavg.go b/pkg/sentry/fs/proc/loadavg.go index d7d2afcb7..139d49c34 100644 --- a/pkg/sentry/fs/proc/loadavg.go +++ b/pkg/sentry/fs/proc/loadavg.go @@ -18,7 +18,7 @@ import ( "bytes" "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" ) diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go index 313c6a32b..465b47da9 100644 --- a/pkg/sentry/fs/proc/meminfo.go +++ b/pkg/sentry/fs/proc/meminfo.go @@ -18,11 +18,11 @@ import ( "bytes" "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go index d4efc86e0..c10888100 100644 --- a/pkg/sentry/fs/proc/mounts.go +++ b/pkg/sentry/fs/proc/mounts.go @@ -20,7 +20,7 @@ import ( "sort" "strings" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/kernel" diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index bad445f3f..6f2775344 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -22,8 +22,8 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" @@ -33,9 +33,9 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/usermem" ) // LINT.IfChange diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index 29867dc3a..c8abb5052 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -20,7 +20,7 @@ import ( "sort" "strconv" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" diff --git a/pkg/sentry/fs/proc/seqfile/BUILD b/pkg/sentry/fs/proc/seqfile/BUILD index 310d8dd52..21338d912 100644 --- a/pkg/sentry/fs/proc/seqfile/BUILD +++ b/pkg/sentry/fs/proc/seqfile/BUILD @@ -8,14 +8,14 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/proc/device", "//pkg/sentry/kernel/time", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -26,10 +26,10 @@ go_test( srcs = ["seqfile_test.go"], library = ":seqfile", deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/context", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/fs/ramfs", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/proc/seqfile/seqfile.go b/pkg/sentry/fs/proc/seqfile/seqfile.go index f9af191d5..6121f0e95 100644 --- a/pkg/sentry/fs/proc/seqfile/seqfile.go +++ b/pkg/sentry/fs/proc/seqfile/seqfile.go @@ -19,14 +19,14 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/proc/seqfile/seqfile_test.go b/pkg/sentry/fs/proc/seqfile/seqfile_test.go index ebfeee835..98e394569 100644 --- a/pkg/sentry/fs/proc/seqfile/seqfile_test.go +++ b/pkg/sentry/fs/proc/seqfile/seqfile_test.go @@ -20,11 +20,11 @@ import ( "io" "testing" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) type seqTest struct { diff --git a/pkg/sentry/fs/proc/stat.go b/pkg/sentry/fs/proc/stat.go index bc5b2bc7b..d4fbd76ac 100644 --- a/pkg/sentry/fs/proc/stat.go +++ b/pkg/sentry/fs/proc/stat.go @@ -19,7 +19,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/kernel" ) diff --git a/pkg/sentry/fs/proc/sys.go b/pkg/sentry/fs/proc/sys.go index 2bdcf5f70..f8aad2dbd 100644 --- a/pkg/sentry/fs/proc/sys.go +++ b/pkg/sentry/fs/proc/sys.go @@ -20,13 +20,13 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index b9e8ef35f..0772d4ae4 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -19,14 +19,14 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/proc/sys_net_test.go b/pkg/sentry/fs/proc/sys_net_test.go index 6abae7a60..355e83d47 100644 --- a/pkg/sentry/fs/proc/sys_net_test.go +++ b/pkg/sentry/fs/proc/sys_net_test.go @@ -17,9 +17,9 @@ package proc import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) func TestQuerySendBufferSize(t *testing.T) { diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 7358d6ef9..ca020e11e 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -22,7 +22,7 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" @@ -32,8 +32,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/proc/uid_gid_map.go b/pkg/sentry/fs/proc/uid_gid_map.go index 3eacc9265..8d9517b95 100644 --- a/pkg/sentry/fs/proc/uid_gid_map.go +++ b/pkg/sentry/fs/proc/uid_gid_map.go @@ -20,13 +20,13 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/proc/uptime.go b/pkg/sentry/fs/proc/uptime.go index adfe58adb..c0f6fb802 100644 --- a/pkg/sentry/fs/proc/uptime.go +++ b/pkg/sentry/fs/proc/uptime.go @@ -19,12 +19,12 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/proc/version.go b/pkg/sentry/fs/proc/version.go index 27fd5b1cb..35e258ff6 100644 --- a/pkg/sentry/fs/proc/version.go +++ b/pkg/sentry/fs/proc/version.go @@ -17,7 +17,7 @@ package proc import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/kernel" ) diff --git a/pkg/sentry/fs/ramfs/BUILD b/pkg/sentry/fs/ramfs/BUILD index 39c4b84f8..8ca823fb3 100644 --- a/pkg/sentry/fs/ramfs/BUILD +++ b/pkg/sentry/fs/ramfs/BUILD @@ -13,14 +13,14 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -31,7 +31,7 @@ go_test( srcs = ["tree_test.go"], library = ":ramfs", deps = [ - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", ], ) diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index dcbb8eb2e..bfa304552 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -20,7 +20,7 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" diff --git a/pkg/sentry/fs/ramfs/socket.go b/pkg/sentry/fs/ramfs/socket.go index a24fe2ea2..29ff004f2 100644 --- a/pkg/sentry/fs/ramfs/socket.go +++ b/pkg/sentry/fs/ramfs/socket.go @@ -16,7 +16,7 @@ package ramfs import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" diff --git a/pkg/sentry/fs/ramfs/symlink.go b/pkg/sentry/fs/ramfs/symlink.go index fcfaa29aa..d988349aa 100644 --- a/pkg/sentry/fs/ramfs/symlink.go +++ b/pkg/sentry/fs/ramfs/symlink.go @@ -16,7 +16,7 @@ package ramfs import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/waiter" diff --git a/pkg/sentry/fs/ramfs/tree.go b/pkg/sentry/fs/ramfs/tree.go index 702cc4a1e..dfc9d3453 100644 --- a/pkg/sentry/fs/ramfs/tree.go +++ b/pkg/sentry/fs/ramfs/tree.go @@ -19,10 +19,10 @@ import ( "path" "strings" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // MakeDirectoryTree constructs a ramfs tree of all directories containing diff --git a/pkg/sentry/fs/ramfs/tree_test.go b/pkg/sentry/fs/ramfs/tree_test.go index 61a7e2900..a6ed8b2c5 100644 --- a/pkg/sentry/fs/ramfs/tree_test.go +++ b/pkg/sentry/fs/ramfs/tree_test.go @@ -17,7 +17,7 @@ package ramfs import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index 389c330a0..791d1526c 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -18,7 +18,7 @@ import ( "io" "sync/atomic" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fs/sys/BUILD b/pkg/sentry/fs/sys/BUILD index cc6b3bfbf..f2e8b9932 100644 --- a/pkg/sentry/fs/sys/BUILD +++ b/pkg/sentry/fs/sys/BUILD @@ -13,12 +13,12 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/ramfs", "//pkg/sentry/kernel", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/sys/devices.go b/pkg/sentry/fs/sys/devices.go index 4f78ca8d2..b67065956 100644 --- a/pkg/sentry/fs/sys/devices.go +++ b/pkg/sentry/fs/sys/devices.go @@ -18,7 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" diff --git a/pkg/sentry/fs/sys/fs.go b/pkg/sentry/fs/sys/fs.go index e60b63e75..fd03a4e38 100644 --- a/pkg/sentry/fs/sys/fs.go +++ b/pkg/sentry/fs/sys/fs.go @@ -15,7 +15,7 @@ package sys import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" ) diff --git a/pkg/sentry/fs/sys/sys.go b/pkg/sentry/fs/sys/sys.go index b14bf3f55..0891645e4 100644 --- a/pkg/sentry/fs/sys/sys.go +++ b/pkg/sentry/fs/sys/sys.go @@ -16,10 +16,10 @@ package sys import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) func newFile(ctx context.Context, node fs.InodeOperations, msrc *fs.MountSource) *fs.Inode { diff --git a/pkg/sentry/fs/timerfd/BUILD b/pkg/sentry/fs/timerfd/BUILD index 092668e8d..d16cdb4df 100644 --- a/pkg/sentry/fs/timerfd/BUILD +++ b/pkg/sentry/fs/timerfd/BUILD @@ -7,13 +7,13 @@ go_library( srcs = ["timerfd.go"], visibility = ["//pkg/sentry:internal"], deps = [ - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel/time", - "//pkg/sentry/usermem", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go index f8bf663bb..88c344089 100644 --- a/pkg/sentry/fs/timerfd/timerfd.go +++ b/pkg/sentry/fs/timerfd/timerfd.go @@ -19,13 +19,13 @@ package timerfd import ( "sync/atomic" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/tmpfs/BUILD b/pkg/sentry/fs/tmpfs/BUILD index 04776555f..aa7199014 100644 --- a/pkg/sentry/fs/tmpfs/BUILD +++ b/pkg/sentry/fs/tmpfs/BUILD @@ -14,8 +14,9 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/metric", - "//pkg/sentry/context", + "//pkg/safemem", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", @@ -25,12 +26,11 @@ go_library( "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", - "//pkg/sentry/safemem", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -41,10 +41,10 @@ go_test( srcs = ["file_test.go"], library = ":tmpfs", deps = [ - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/fs", "//pkg/sentry/kernel/contexttest", "//pkg/sentry/usage", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/tmpfs/file_regular.go b/pkg/sentry/fs/tmpfs/file_regular.go index 9a6943fe4..614f8f8a1 100644 --- a/pkg/sentry/fs/tmpfs/file_regular.go +++ b/pkg/sentry/fs/tmpfs/file_regular.go @@ -15,11 +15,11 @@ package tmpfs import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go index 0075ef023..aaba35502 100644 --- a/pkg/sentry/fs/tmpfs/file_test.go +++ b/pkg/sentry/fs/tmpfs/file_test.go @@ -18,11 +18,11 @@ import ( "bytes" "testing" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) func newFileInode(ctx context.Context) *fs.Inode { diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go index be98ad751..d5be56c3f 100644 --- a/pkg/sentry/fs/tmpfs/fs.go +++ b/pkg/sentry/fs/tmpfs/fs.go @@ -19,7 +19,7 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index f1c87fe41..dabc10662 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -20,18 +20,18 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/metric" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) var ( diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index 0f718e236..c00cef0a5 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -17,7 +17,7 @@ package tmpfs import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" @@ -25,8 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) var fsInfo = fs.Info{ diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD index 29f804c6c..5cb0e0417 100644 --- a/pkg/sentry/fs/tty/BUILD +++ b/pkg/sentry/fs/tty/BUILD @@ -16,20 +16,20 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/refs", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", - "//pkg/sentry/safemem", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/unimpl", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -41,7 +41,7 @@ go_test( library = ":tty", deps = [ "//pkg/abi/linux", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/usermem", + "//pkg/sentry/contexttest", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index 88aa66b24..108654827 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -21,14 +21,14 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go index edee56c12..8fe05ebe5 100644 --- a/pkg/sentry/fs/tty/fs.go +++ b/pkg/sentry/fs/tty/fs.go @@ -15,7 +15,7 @@ package tty import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/syserror" diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go index 9fe02657e..12b1c6097 100644 --- a/pkg/sentry/fs/tty/line_discipline.go +++ b/pkg/sentry/fs/tty/line_discipline.go @@ -19,11 +19,11 @@ import ( "unicode/utf8" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index 6b07f6bf2..f62da49bd 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -16,13 +16,13 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/unimpl" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index 21ccc6f32..1ca79c0b2 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -16,12 +16,12 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index 2a51e6bab..db55cdc48 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -16,12 +16,12 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go index 917f90cc0..5883f26db 100644 --- a/pkg/sentry/fs/tty/terminal.go +++ b/pkg/sentry/fs/tty/terminal.go @@ -16,11 +16,11 @@ package tty import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // Terminal is a pseudoterminal. diff --git a/pkg/sentry/fs/tty/tty_test.go b/pkg/sentry/fs/tty/tty_test.go index 59f07ff8e..2cbc05678 100644 --- a/pkg/sentry/fs/tty/tty_test.go +++ b/pkg/sentry/fs/tty/tty_test.go @@ -18,8 +18,8 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/contexttest" + "gvisor.dev/gvisor/pkg/usermem" ) func TestSimpleMasterToSlave(t *testing.T) { diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index a718920d5..6f78f478f 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -35,21 +35,21 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/context", "//pkg/fd", "//pkg/fspath", "//pkg/log", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fsimpl/ext/disklayout", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", - "//pkg/sentry/safemem", "//pkg/sentry/syscalls/linux", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -73,14 +73,14 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/context", "//pkg/fspath", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fsimpl/ext/disklayout", "//pkg/sentry/kernel/auth", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/syserror", + "//pkg/usermem", "//runsc/testutil", "@com_github_google_go-cmp//cmp:go_default_library", "@com_github_google_go-cmp//cmp/cmpopts:go_default_library", diff --git a/pkg/sentry/fsimpl/ext/benchmark/BUILD b/pkg/sentry/fsimpl/ext/benchmark/BUILD index 12f3990c1..6c5a559fd 100644 --- a/pkg/sentry/fsimpl/ext/benchmark/BUILD +++ b/pkg/sentry/fsimpl/ext/benchmark/BUILD @@ -7,9 +7,9 @@ go_test( size = "small", srcs = ["benchmark_test.go"], deps = [ + "//pkg/context", "//pkg/fspath", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fsimpl/ext", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go index a56b03711..d1436b943 100644 --- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go +++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go @@ -24,9 +24,9 @@ import ( "strings" "testing" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go index 8944171c8..ebb72b75e 100644 --- a/pkg/sentry/fsimpl/ext/directory.go +++ b/pkg/sentry/fsimpl/ext/directory.go @@ -17,8 +17,8 @@ package ext import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/memmap" diff --git a/pkg/sentry/fsimpl/ext/ext.go b/pkg/sentry/fsimpl/ext/ext.go index 4b7d17dc6..373d23b74 100644 --- a/pkg/sentry/fsimpl/ext/ext.go +++ b/pkg/sentry/fsimpl/ext/ext.go @@ -21,9 +21,9 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go index 6c14a1e2d..05f992826 100644 --- a/pkg/sentry/fsimpl/ext/ext_test.go +++ b/pkg/sentry/fsimpl/ext/ext_test.go @@ -25,14 +25,14 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/runsc/testutil" ) diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go index 841274daf..92f7da40d 100644 --- a/pkg/sentry/fsimpl/ext/file_description.go +++ b/pkg/sentry/fsimpl/ext/file_description.go @@ -16,7 +16,7 @@ package ext import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index 9afb1a84c..07bf58953 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -19,8 +19,8 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go index d11153c90..30135ddb0 100644 --- a/pkg/sentry/fsimpl/ext/regular_file.go +++ b/pkg/sentry/fsimpl/ext/regular_file.go @@ -18,13 +18,13 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // regularFile represents a regular file's inode. This too follows the diff --git a/pkg/sentry/fsimpl/ext/symlink.go b/pkg/sentry/fsimpl/ext/symlink.go index bdf8705c1..1447a4dc1 100644 --- a/pkg/sentry/fsimpl/ext/symlink.go +++ b/pkg/sentry/fsimpl/ext/symlink.go @@ -15,11 +15,11 @@ package ext import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // symlink represents a symlink inode. diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index 7bf83ccba..e73f1f857 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -29,16 +29,16 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/fspath", "//pkg/log", "//pkg/refs", - "//pkg/sentry/context", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) @@ -49,13 +49,13 @@ go_test( deps = [ ":kernfs", "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/context", + "//pkg/sentry/contexttest", "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/kernel/auth", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/syserror", + "//pkg/usermem", "@com_github_google_go-cmp//cmp:go_default_library", ], ) diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index 75624e0b1..373f801ff 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -18,11 +18,11 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // DynamicBytesFile implements kernfs.Inode and represents a read-only diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 5fa1fa67b..6104751c8 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -16,11 +16,11 @@ package kernfs import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // GenericDirectoryFD implements vfs.FileDescriptionImpl for a generic directory diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index a4600ad47..9d65d0179 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -20,8 +20,8 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 1700fffd9..adca2313f 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -19,8 +19,8 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 85bcdcc57..79ebea8a5 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -56,8 +56,8 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index fade59491..ee65cf491 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -21,14 +21,14 @@ import ( "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const defaultMode linux.FileMode = 01777 diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go index f19f12854..0ee7eb9b7 100644 --- a/pkg/sentry/fsimpl/kernfs/symlink.go +++ b/pkg/sentry/fsimpl/kernfs/symlink.go @@ -16,7 +16,7 @@ package kernfs import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 3768f55b2..12aac2e6a 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -16,8 +16,9 @@ go_library( ], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/log", - "//pkg/sentry/context", + "//pkg/safemem", "//pkg/sentry/fs", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/inet", @@ -26,15 +27,14 @@ go_library( "//pkg/sentry/kernel/time", "//pkg/sentry/limits", "//pkg/sentry/mm", - "//pkg/sentry/safemem", "//pkg/sentry/socket", "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/syserror", "//pkg/tcpip/header", + "//pkg/usermem", ], ) @@ -48,15 +48,15 @@ go_test( library = ":proc", deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/fspath", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index f49819187..11477b6a9 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -19,7 +19,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index 91eded415..353e37195 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -19,7 +19,7 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/vfs" diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index a0580f20d..eb5bc62c0 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -19,7 +19,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 7bc352ae9..efd3b3453 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -20,17 +20,17 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // mm gets the kernel task's MemoryManager. No additional reference is taken on diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index 51f634716..e0cb9c47b 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -20,7 +20,7 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index ad3760e39..434998910 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -20,14 +20,14 @@ import ( "strconv" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) type selfSymlink struct { diff --git a/pkg/sentry/fsimpl/proc/tasks_net.go b/pkg/sentry/fsimpl/proc/tasks_net.go index 4aaf23e97..608fec017 100644 --- a/pkg/sentry/fsimpl/proc/tasks_net.go +++ b/pkg/sentry/fsimpl/proc/tasks_net.go @@ -22,8 +22,8 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -32,9 +32,9 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/usermem" ) func newNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry { diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index aabf2bf0c..ad963870b 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -19,7 +19,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" diff --git a/pkg/sentry/fsimpl/proc/tasks_sys_test.go b/pkg/sentry/fsimpl/proc/tasks_sys_test.go index 0a1d3f34b..be54897bb 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys_test.go @@ -20,7 +20,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/inet" ) diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go index 2c1635f33..6fc3524db 100644 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_test.go @@ -22,14 +22,14 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) var ( diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index beda141f1..66c0d8bc8 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -9,7 +9,7 @@ go_library( ], deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index 1305ad01d..e35d52d17 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -20,7 +20,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD index 12053a5b6..efd5974c4 100644 --- a/pkg/sentry/fsimpl/testutil/BUILD +++ b/pkg/sentry/fsimpl/testutil/BUILD @@ -12,10 +12,10 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/cpuid", "//pkg/fspath", "//pkg/memutil", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", @@ -27,9 +27,9 @@ go_library( "//pkg/sentry/platform/kvm", "//pkg/sentry/platform/ptrace", "//pkg/sentry/time", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/sync", + "//pkg/usermem", "@com_github_google_go-cmp//cmp:go_default_library", ], ) diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go index 295da2d52..89f8c4915 100644 --- a/pkg/sentry/fsimpl/testutil/kernel.go +++ b/pkg/sentry/fsimpl/testutil/kernel.go @@ -21,9 +21,9 @@ import ( "runtime" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" diff --git a/pkg/sentry/fsimpl/testutil/testutil.go b/pkg/sentry/fsimpl/testutil/testutil.go index 2a723a89f..1c98335c1 100644 --- a/pkg/sentry/fsimpl/testutil/testutil.go +++ b/pkg/sentry/fsimpl/testutil/testutil.go @@ -24,12 +24,12 @@ import ( "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // System represents the context for a single test. diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index 857e98bc5..fb436860c 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -30,10 +30,11 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/amutex", + "//pkg/context", "//pkg/fspath", "//pkg/log", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", @@ -43,12 +44,11 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/sentry/safemem", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) @@ -59,10 +59,10 @@ go_test( deps = [ ":tmpfs", "//pkg/abi/linux", + "//pkg/context", "//pkg/fspath", "//pkg/refs", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/fs/tmpfs", "//pkg/sentry/kernel/auth", @@ -82,13 +82,13 @@ go_test( library = ":tmpfs", deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/fspath", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/contexttest", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go index d88c83499..54241c8e8 100644 --- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go +++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go @@ -21,10 +21,10 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index 887ca2619..dc0d27cf9 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -16,7 +16,7 @@ package tmpfs import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index d726f03c5..5ee9cf1e9 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -19,8 +19,8 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go index 482aabd52..0c57fdca3 100644 --- a/pkg/sentry/fsimpl/tmpfs/named_pipe.go +++ b/pkg/sentry/fsimpl/tmpfs/named_pipe.go @@ -16,11 +16,11 @@ package tmpfs import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/usermem" ) type namedPipe struct { diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go index 70b42a6ec..5ee7f2a72 100644 --- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go +++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go @@ -19,13 +19,13 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const fileName = "mypipe" diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 7c633c1b0..e9e6faf67 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -20,17 +20,17 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) type regularFile struct { diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go index 034a29fdb..32552e261 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go @@ -22,12 +22,12 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/usermem" ) // nextFileID is used to generate unique file names. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 515f033f2..88dbd6e35 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -29,7 +29,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/pgalloc" diff --git a/pkg/sentry/hostmm/BUILD b/pkg/sentry/hostmm/BUILD index a145a5ca3..61c78569d 100644 --- a/pkg/sentry/hostmm/BUILD +++ b/pkg/sentry/hostmm/BUILD @@ -12,6 +12,6 @@ go_library( deps = [ "//pkg/fd", "//pkg/log", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/hostmm/hostmm.go b/pkg/sentry/hostmm/hostmm.go index 19335ca73..506c7864a 100644 --- a/pkg/sentry/hostmm/hostmm.go +++ b/pkg/sentry/hostmm/hostmm.go @@ -24,7 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // NotifyCurrentMemcgPressureCallback requests that f is called whenever the diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD index aa621b724..334432abf 100644 --- a/pkg/sentry/inet/BUILD +++ b/pkg/sentry/inet/BUILD @@ -13,7 +13,7 @@ go_library( "test_stack.go", ], deps = [ - "//pkg/sentry/context", + "//pkg/context", "//pkg/tcpip/stack", ], ) diff --git a/pkg/sentry/inet/context.go b/pkg/sentry/inet/context.go index 4eda7dd1f..e8cc1bffd 100644 --- a/pkg/sentry/inet/context.go +++ b/pkg/sentry/inet/context.go @@ -15,7 +15,7 @@ package inet import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // contextID is the inet package's type for context.Context.Value keys. diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index cebaccd92..0738946d9 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -153,14 +153,15 @@ go_library( "//pkg/binary", "//pkg/bits", "//pkg/bpf", + "//pkg/context", "//pkg/cpuid", "//pkg/eventchannel", "//pkg/log", "//pkg/metric", "//pkg/refs", + "//pkg/safemem", "//pkg/secio", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/lock", @@ -180,7 +181,6 @@ go_library( "//pkg/sentry/mm", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/sentry/safemem", "//pkg/sentry/socket/netlink/port", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/time", @@ -188,7 +188,6 @@ go_library( "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", "//pkg/sentry/uniqueid", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/state", "//pkg/state/statefile", "//pkg/sync", @@ -196,6 +195,7 @@ go_library( "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/stack", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -212,9 +212,9 @@ go_test( library = ":kernel", deps = [ "//pkg/abi", + "//pkg/context", "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/fs/filetest", "//pkg/sentry/kernel/sched", @@ -222,8 +222,8 @@ go_test( "//pkg/sentry/pgalloc", "//pkg/sentry/time", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD index 64537c9be..2bc49483a 100644 --- a/pkg/sentry/kernel/auth/BUILD +++ b/pkg/sentry/kernel/auth/BUILD @@ -61,8 +61,8 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/bits", + "//pkg/context", "//pkg/log", - "//pkg/sentry/context", "//pkg/sync", "//pkg/syserror", ], diff --git a/pkg/sentry/kernel/auth/context.go b/pkg/sentry/kernel/auth/context.go index 5c0e7d6b6..ef5723127 100644 --- a/pkg/sentry/kernel/auth/context.go +++ b/pkg/sentry/kernel/auth/context.go @@ -15,7 +15,7 @@ package auth import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // contextID is the auth package's type for context.Context.Value keys. diff --git a/pkg/sentry/kernel/auth/id_map.go b/pkg/sentry/kernel/auth/id_map.go index 3d74bc610..28cbe159d 100644 --- a/pkg/sentry/kernel/auth/id_map.go +++ b/pkg/sentry/kernel/auth/id_map.go @@ -16,7 +16,7 @@ package auth import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/kernel/context.go b/pkg/sentry/kernel/context.go index 3c9dceaba..0c40bf315 100644 --- a/pkg/sentry/kernel/context.go +++ b/pkg/sentry/kernel/context.go @@ -17,8 +17,8 @@ package kernel import ( "time" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is the kernel package's type for context.Context.Value keys. diff --git a/pkg/sentry/kernel/contexttest/BUILD b/pkg/sentry/kernel/contexttest/BUILD index daff608d7..9d26392c0 100644 --- a/pkg/sentry/kernel/contexttest/BUILD +++ b/pkg/sentry/kernel/contexttest/BUILD @@ -8,8 +8,8 @@ go_library( srcs = ["contexttest.go"], visibility = ["//pkg/sentry:internal"], deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/context", + "//pkg/sentry/contexttest", "//pkg/sentry/kernel", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", diff --git a/pkg/sentry/kernel/contexttest/contexttest.go b/pkg/sentry/kernel/contexttest/contexttest.go index 82f9d8922..22c340e56 100644 --- a/pkg/sentry/kernel/contexttest/contexttest.go +++ b/pkg/sentry/kernel/contexttest/contexttest.go @@ -19,8 +19,8 @@ package contexttest import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" diff --git a/pkg/sentry/kernel/epoll/BUILD b/pkg/sentry/kernel/epoll/BUILD index 19e16ab3a..dedf0fa15 100644 --- a/pkg/sentry/kernel/epoll/BUILD +++ b/pkg/sentry/kernel/epoll/BUILD @@ -24,13 +24,13 @@ go_library( ], visibility = ["//pkg/sentry:internal"], deps = [ + "//pkg/context", "//pkg/refs", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", - "//pkg/sentry/usermem", "//pkg/sync", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -43,7 +43,7 @@ go_test( ], library = ":epoll", deps = [ - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fs/filetest", "//pkg/waiter", ], diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go index e84742993..8bffb78fc 100644 --- a/pkg/sentry/kernel/epoll/epoll.go +++ b/pkg/sentry/kernel/epoll/epoll.go @@ -20,13 +20,13 @@ import ( "fmt" "syscall" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/epoll/epoll_test.go b/pkg/sentry/kernel/epoll/epoll_test.go index 4a20d4c82..22630e9c5 100644 --- a/pkg/sentry/kernel/epoll/epoll_test.go +++ b/pkg/sentry/kernel/epoll/epoll_test.go @@ -17,7 +17,7 @@ package epoll import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs/filetest" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD index ee2d74864..9983a32e5 100644 --- a/pkg/sentry/kernel/eventfd/BUILD +++ b/pkg/sentry/kernel/eventfd/BUILD @@ -8,14 +8,14 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/fdnotifier", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -26,8 +26,8 @@ go_test( srcs = ["eventfd_test.go"], library = ":eventfd", deps = [ - "//pkg/sentry/context/contexttest", - "//pkg/sentry/usermem", + "//pkg/sentry/contexttest", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go index 687690679..87951adeb 100644 --- a/pkg/sentry/kernel/eventfd/eventfd.go +++ b/pkg/sentry/kernel/eventfd/eventfd.go @@ -21,14 +21,14 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/eventfd/eventfd_test.go b/pkg/sentry/kernel/eventfd/eventfd_test.go index 018c7f3ef..9b4892f74 100644 --- a/pkg/sentry/kernel/eventfd/eventfd_test.go +++ b/pkg/sentry/kernel/eventfd/eventfd_test.go @@ -17,8 +17,8 @@ package eventfd import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/contexttest" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 0ad4135b3..9460bb235 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -22,8 +22,8 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/limits" diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go index 86164df49..261b815f2 100644 --- a/pkg/sentry/kernel/fd_table_test.go +++ b/pkg/sentry/kernel/fd_table_test.go @@ -18,8 +18,8 @@ import ( "runtime" "testing" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/filetest" "gvisor.dev/gvisor/pkg/sentry/limits" diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index f413d8ae2..c5021f2db 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -36,12 +36,12 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/log", - "//pkg/sentry/context", "//pkg/sentry/memmap", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) @@ -51,7 +51,7 @@ go_test( srcs = ["futex_test.go"], library = ":futex", deps = [ - "//pkg/sentry/usermem", "//pkg/sync", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go index d1931c8f4..732e66da4 100644 --- a/pkg/sentry/kernel/futex/futex.go +++ b/pkg/sentry/kernel/futex/futex.go @@ -20,9 +20,9 @@ package futex import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // KeyKind indicates the type of a Key. diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go index c23126ca5..7c5c7665b 100644 --- a/pkg/sentry/kernel/futex/futex_test.go +++ b/pkg/sentry/kernel/futex/futex_test.go @@ -22,8 +22,8 @@ import ( "testing" "unsafe" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // testData implements the Target interface, and allows us to diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index c85e97fef..7b90fac5a 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -40,12 +40,12 @@ import ( "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/eventchannel" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/timerfd" "gvisor.dev/gvisor/pkg/sentry/hostcpu" diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 2c7b6206f..4c049d5b4 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -33,16 +33,16 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/amutex", + "//pkg/context", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", - "//pkg/sentry/safemem", - "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -57,11 +57,11 @@ go_test( ], library = ":pipe", deps = [ - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/context", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", - "//pkg/sentry/usermem", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/kernel/pipe/buffer.go b/pkg/sentry/kernel/pipe/buffer.go index 1c0f34269..fe3be5dbd 100644 --- a/pkg/sentry/kernel/pipe/buffer.go +++ b/pkg/sentry/kernel/pipe/buffer.go @@ -17,7 +17,7 @@ package pipe import ( "io" - "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sync" ) diff --git a/pkg/sentry/kernel/pipe/buffer_test.go b/pkg/sentry/kernel/pipe/buffer_test.go index ee1b90115..4d54b8b8f 100644 --- a/pkg/sentry/kernel/pipe/buffer_test.go +++ b/pkg/sentry/kernel/pipe/buffer_test.go @@ -18,7 +18,7 @@ import ( "testing" "unsafe" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) func TestBufferSize(t *testing.T) { diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go index 716f589af..4b688c627 100644 --- a/pkg/sentry/kernel/pipe/node.go +++ b/pkg/sentry/kernel/pipe/node.go @@ -16,7 +16,7 @@ package pipe import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sync" diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go index 16fa80abe..ab75a87ff 100644 --- a/pkg/sentry/kernel/pipe/node_test.go +++ b/pkg/sentry/kernel/pipe/node_test.go @@ -18,11 +18,11 @@ import ( "testing" "time" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) type sleeper struct { diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index e4fd7d420..08410283f 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -20,7 +20,7 @@ import ( "sync/atomic" "syscall" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go index e3a14b665..bda739dbe 100644 --- a/pkg/sentry/kernel/pipe/pipe_test.go +++ b/pkg/sentry/kernel/pipe/pipe_test.go @@ -18,9 +18,9 @@ import ( "bytes" "testing" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go index 8394eb78b..80158239e 100644 --- a/pkg/sentry/kernel/pipe/pipe_util.go +++ b/pkg/sentry/kernel/pipe/pipe_util.go @@ -21,10 +21,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/amutex" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/pipe/reader_writer.go b/pkg/sentry/kernel/pipe/reader_writer.go index b4d29fc77..b2b5691ee 100644 --- a/pkg/sentry/kernel/pipe/reader_writer.go +++ b/pkg/sentry/kernel/pipe/reader_writer.go @@ -17,11 +17,11 @@ package pipe import ( "io" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // ReaderWriter satisfies the FileOperations interface and services both diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 6f83e3cee..a5675bd70 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -16,12 +16,12 @@ package pipe import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index 3be171cdc..35ad97d5d 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // ptraceOptions are the subset of options controlling a task's ptrace behavior diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go index 5514cf432..cef1276ec 100644 --- a/pkg/sentry/kernel/ptrace_amd64.go +++ b/pkg/sentry/kernel/ptrace_amd64.go @@ -18,8 +18,8 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // ptraceArch implements arch-specific ptrace commands. diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go index 61e412911..d971b96b3 100644 --- a/pkg/sentry/kernel/ptrace_arm64.go +++ b/pkg/sentry/kernel/ptrace_arm64.go @@ -17,8 +17,8 @@ package kernel import ( - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // ptraceArch implements arch-specific ptrace commands. diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go index b14429854..efebfd872 100644 --- a/pkg/sentry/kernel/rseq.go +++ b/pkg/sentry/kernel/rseq.go @@ -19,8 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/hostcpu" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Restartable sequences. diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go index 2347dcf36..c38c5a40c 100644 --- a/pkg/sentry/kernel/seccomp.go +++ b/pkg/sentry/kernel/seccomp.go @@ -21,8 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const maxSyscallFilterInstructions = 1 << 15 diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD index 76e19b551..65e5427c1 100644 --- a/pkg/sentry/kernel/semaphore/BUILD +++ b/pkg/sentry/kernel/semaphore/BUILD @@ -24,8 +24,8 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/log", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", @@ -41,8 +41,8 @@ go_test( library = ":semaphore", deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/context", + "//pkg/sentry/contexttest", "//pkg/sentry/kernel/auth", "//pkg/syserror", ], diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index 18299814e..1000f3287 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -19,8 +19,8 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go index c235f6ca4..e47acefdf 100644 --- a/pkg/sentry/kernel/semaphore/semaphore_test.go +++ b/pkg/sentry/kernel/semaphore/semaphore_test.go @@ -18,8 +18,8 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index 5547c5abf..bfd779837 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -11,9 +11,9 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/log", "//pkg/refs", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", @@ -22,8 +22,8 @@ go_library( "//pkg/sentry/pgalloc", "//pkg/sentry/platform", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index 8ddef7eb8..208569057 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -37,9 +37,9 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -47,9 +47,9 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Key represents a shm segment key. Analogous to a file name. diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD index 5d44773d4..3eb78e91b 100644 --- a/pkg/sentry/kernel/signalfd/BUILD +++ b/pkg/sentry/kernel/signalfd/BUILD @@ -9,14 +9,14 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go index 28be4a939..8243bb93e 100644 --- a/pkg/sentry/kernel/signalfd/signalfd.go +++ b/pkg/sentry/kernel/signalfd/signalfd.go @@ -18,14 +18,14 @@ package signalfd import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go index d2d01add4..93c4fe969 100644 --- a/pkg/sentry/kernel/syscalls.go +++ b/pkg/sentry/kernel/syscalls.go @@ -21,8 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/bits" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // maxSyscallNum is the highest supported syscall number. diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 978d66da8..95adf2778 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -21,8 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -35,8 +35,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index 247bd4aba..53d4d211b 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -17,8 +17,8 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // SharingOptions controls what resources are shared by a new task created by diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go index bb5560acf..2d6e7733c 100644 --- a/pkg/sentry/kernel/task_context.go +++ b/pkg/sentry/kernel/task_context.go @@ -18,13 +18,13 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/loader" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/usermem" ) var errNoSyscalls = syserr.New("no syscall table found", linux.ENOEXEC) diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go index c211b5b74..a53e77c9f 100644 --- a/pkg/sentry/kernel/task_futex.go +++ b/pkg/sentry/kernel/task_futex.go @@ -16,7 +16,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/sentry/kernel/futex" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // Futex returns t's futex manager. diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go index 0fb3661de..41259210c 100644 --- a/pkg/sentry/kernel/task_log.go +++ b/pkg/sentry/kernel/task_log.go @@ -20,7 +20,7 @@ import ( "sort" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go index 6357273d3..5568c91bc 100644 --- a/pkg/sentry/kernel/task_run.go +++ b/pkg/sentry/kernel/task_run.go @@ -26,7 +26,7 @@ import ( ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // A taskRunState is a reified state in the task state machine. See README.md diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index 39cd1340d..8802db142 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -26,8 +26,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index 58af16ee2..de838beef 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -21,8 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // TaskConfig defines the configuration of a new Task (see below). diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go index 3180f5560..d555d69a8 100644 --- a/pkg/sentry/kernel/task_syscall.go +++ b/pkg/sentry/kernel/task_syscall.go @@ -25,8 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // SyscallRestartErrno represents a ERESTART* errno defined in the Linux's kernel diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go index 518bfe1bd..2bf3ce8a8 100644 --- a/pkg/sentry/kernel/task_usermem.go +++ b/pkg/sentry/kernel/task_usermem.go @@ -18,8 +18,8 @@ import ( "math" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // MAX_RW_COUNT is the maximum size in bytes of a single read or write. diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD index d49594d9f..7ba7dc50c 100644 --- a/pkg/sentry/kernel/time/BUILD +++ b/pkg/sentry/kernel/time/BUILD @@ -11,7 +11,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sync", "//pkg/syserror", "//pkg/waiter", diff --git a/pkg/sentry/kernel/time/context.go b/pkg/sentry/kernel/time/context.go index 8ef483dd3..00b729d88 100644 --- a/pkg/sentry/kernel/time/context.go +++ b/pkg/sentry/kernel/time/context.go @@ -15,7 +15,7 @@ package time import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // contextID is the time package's type for context.Context.Value keys. diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go index 849c5b646..cf2f7ca72 100644 --- a/pkg/sentry/kernel/timekeeper_test.go +++ b/pkg/sentry/kernel/timekeeper_test.go @@ -17,12 +17,12 @@ package kernel import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/pgalloc" sentrytime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // mockClocks is a sentrytime.Clocks that simply returns the times in the diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go index fdd10c56c..f1b3c212c 100644 --- a/pkg/sentry/kernel/vdso.go +++ b/pkg/sentry/kernel/vdso.go @@ -18,10 +18,10 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // vdsoParams are the parameters exposed to the VDSO. diff --git a/pkg/sentry/limits/BUILD b/pkg/sentry/limits/BUILD index 67869757f..cf591c4c1 100644 --- a/pkg/sentry/limits/BUILD +++ b/pkg/sentry/limits/BUILD @@ -12,7 +12,7 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sync", ], ) diff --git a/pkg/sentry/limits/context.go b/pkg/sentry/limits/context.go index 6972749ed..77e1fe217 100644 --- a/pkg/sentry/limits/context.go +++ b/pkg/sentry/limits/context.go @@ -15,7 +15,7 @@ package limits import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // contextID is the limit package's type for context.Context.Value keys. diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD index d4ad2bd6c..23790378a 100644 --- a/pkg/sentry/loader/BUILD +++ b/pkg/sentry/loader/BUILD @@ -24,11 +24,12 @@ go_library( "//pkg/abi", "//pkg/abi/linux", "//pkg/binary", + "//pkg/context", "//pkg/cpuid", "//pkg/log", "//pkg/rand", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", @@ -37,12 +38,11 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/mm", "//pkg/sentry/pgalloc", - "//pkg/sentry/safemem", "//pkg/sentry/uniqueid", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/syserr", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go index 6299a3e2f..122ed05c2 100644 --- a/pkg/sentry/loader/elf.go +++ b/pkg/sentry/loader/elf.go @@ -23,16 +23,16 @@ import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go index ccf909cac..098a45d36 100644 --- a/pkg/sentry/loader/interpreter.go +++ b/pkg/sentry/loader/interpreter.go @@ -18,10 +18,10 @@ import ( "bytes" "io" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go index b03eeb005..9a613d6b7 100644 --- a/pkg/sentry/loader/loader.go +++ b/pkg/sentry/loader/loader.go @@ -24,16 +24,16 @@ import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // LoadArgs holds specifications for an executable file to be loaded. diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go index df8a81907..52f446ed7 100644 --- a/pkg/sentry/loader/vdso.go +++ b/pkg/sentry/loader/vdso.go @@ -20,20 +20,20 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/uniqueid" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD index f9a65f086..a98b66de1 100644 --- a/pkg/sentry/memmap/BUILD +++ b/pkg/sentry/memmap/BUILD @@ -38,11 +38,11 @@ go_library( ], visibility = ["//pkg/sentry:internal"], deps = [ + "//pkg/context", "//pkg/log", - "//pkg/sentry/context", "//pkg/sentry/platform", - "//pkg/sentry/usermem", "//pkg/syserror", + "//pkg/usermem", ], ) @@ -51,5 +51,5 @@ go_test( size = "small", srcs = ["mapping_set_test.go"], library = ":memmap", - deps = ["//pkg/sentry/usermem"], + deps = ["//pkg/usermem"], ) diff --git a/pkg/sentry/memmap/mapping_set.go b/pkg/sentry/memmap/mapping_set.go index 0a5b7ce45..d609c1ae0 100644 --- a/pkg/sentry/memmap/mapping_set.go +++ b/pkg/sentry/memmap/mapping_set.go @@ -18,7 +18,7 @@ import ( "fmt" "math" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // MappingSet maps offsets into a Mappable to mappings of those offsets. It is diff --git a/pkg/sentry/memmap/mapping_set_test.go b/pkg/sentry/memmap/mapping_set_test.go index f9b11a59c..d39efe38f 100644 --- a/pkg/sentry/memmap/mapping_set_test.go +++ b/pkg/sentry/memmap/mapping_set_test.go @@ -18,7 +18,7 @@ import ( "reflect" "testing" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) type testMappingSpace struct { diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go index 16a722a13..c6db9fc8f 100644 --- a/pkg/sentry/memmap/memmap.go +++ b/pkg/sentry/memmap/memmap.go @@ -18,9 +18,9 @@ package memmap import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // Mappable represents a memory-mappable object, a mutable mapping from uint64 diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index bd6399fa2..e5729ced5 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -27,7 +27,7 @@ go_template_instance( "minDegree": "8", }, imports = { - "usermem": "gvisor.dev/gvisor/pkg/sentry/usermem", + "usermem": "gvisor.dev/gvisor/pkg/usermem", }, package = "mm", prefix = "vma", @@ -47,7 +47,7 @@ go_template_instance( "minDegree": "8", }, imports = { - "usermem": "gvisor.dev/gvisor/pkg/sentry/usermem", + "usermem": "gvisor.dev/gvisor/pkg/usermem", }, package = "mm", prefix = "pma", @@ -99,10 +99,12 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/atomicbitops", + "//pkg/context", "//pkg/log", "//pkg/refs", + "//pkg/safecopy", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fs/proc/seqfile", "//pkg/sentry/kernel/auth", @@ -112,13 +114,11 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/safemem", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", "//pkg/tcpip/buffer", + "//pkg/usermem", ], ) @@ -128,14 +128,14 @@ go_test( srcs = ["mm_test.go"], library = ":mm", deps = [ + "//pkg/context", "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/limits", "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/sentry/usermem", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go index cfebcfd42..e58a63deb 100644 --- a/pkg/sentry/mm/address_space.go +++ b/pkg/sentry/mm/address_space.go @@ -20,7 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // AddressSpace returns the platform.AddressSpace bound to mm. diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 4b48866ad..cb29d94b0 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -16,15 +16,15 @@ package mm import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // aioManager creates and manages asynchronous I/O contexts. diff --git a/pkg/sentry/mm/debug.go b/pkg/sentry/mm/debug.go index df9adf708..c273c982e 100644 --- a/pkg/sentry/mm/debug.go +++ b/pkg/sentry/mm/debug.go @@ -18,7 +18,7 @@ import ( "bytes" "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) const ( diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go index b03e7d020..fa776f9c6 100644 --- a/pkg/sentry/mm/io.go +++ b/pkg/sentry/mm/io.go @@ -15,11 +15,11 @@ package mm import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // There are two supported ways to copy data to/from application virtual diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go index 4e9ca1de6..47b8fbf43 100644 --- a/pkg/sentry/mm/lifecycle.go +++ b/pkg/sentry/mm/lifecycle.go @@ -19,13 +19,13 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // NewMemoryManager returns a new MemoryManager with no mappings and 1 user. diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go index d2a01d48a..f550acae0 100644 --- a/pkg/sentry/mm/metadata.go +++ b/pkg/sentry/mm/metadata.go @@ -17,7 +17,7 @@ package mm import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // Dumpability describes if and how core dumps should be created. diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index 78cc9e6e4..09e582dd3 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -35,14 +35,14 @@ package mm import ( + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // MemoryManager implements a virtual address space. diff --git a/pkg/sentry/mm/mm_test.go b/pkg/sentry/mm/mm_test.go index 4d2bfaaed..edacca741 100644 --- a/pkg/sentry/mm/mm_test.go +++ b/pkg/sentry/mm/mm_test.go @@ -17,15 +17,15 @@ package mm import ( "testing" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) func testMemoryManager(ctx context.Context) *MemoryManager { diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go index c976c6f45..62e4c20af 100644 --- a/pkg/sentry/mm/pma.go +++ b/pkg/sentry/mm/pma.go @@ -17,14 +17,14 @@ package mm import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safecopy" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/safecopy" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // existingPMAsLocked checks that pmas exist for all addresses in ar, and diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go index 79610acb7..1ab92f046 100644 --- a/pkg/sentry/mm/procfs.go +++ b/pkg/sentry/mm/procfs.go @@ -19,10 +19,10 @@ import ( "fmt" "strings" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/mm/save_restore.go b/pkg/sentry/mm/save_restore.go index 93259c5a3..f56215d9a 100644 --- a/pkg/sentry/mm/save_restore.go +++ b/pkg/sentry/mm/save_restore.go @@ -17,7 +17,7 @@ package mm import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // InvalidateUnsavable invokes memmap.Mappable.InvalidateUnsavable on all diff --git a/pkg/sentry/mm/shm.go b/pkg/sentry/mm/shm.go index b9f2d23e5..6432731d4 100644 --- a/pkg/sentry/mm/shm.go +++ b/pkg/sentry/mm/shm.go @@ -15,10 +15,10 @@ package mm import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/shm" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // DetachShm unmaps a sysv shared memory segment. diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index ea2d7af74..9ad52082d 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -15,14 +15,14 @@ package mm import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // SpecialMappable implements memmap.MappingIdentity and memmap.Mappable with diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go index c2466c988..c5dfa5972 100644 --- a/pkg/sentry/mm/syscalls.go +++ b/pkg/sentry/mm/syscalls.go @@ -19,14 +19,14 @@ import ( mrand "math/rand" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // HandleUserFault handles an application page fault. sp is the faulting diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go index f2fd70799..9a14e69e6 100644 --- a/pkg/sentry/mm/vma.go +++ b/pkg/sentry/mm/vma.go @@ -18,13 +18,13 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Preconditions: mm.mappingMu must be locked for writing. opts must be valid diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index 02385a3ce..1eeb9f317 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -61,18 +61,18 @@ go_library( ], visibility = ["//pkg/sentry:internal"], deps = [ + "//pkg/context", "//pkg/log", "//pkg/memutil", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/hostmm", "//pkg/sentry/platform", - "//pkg/sentry/safemem", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/state", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) @@ -81,5 +81,5 @@ go_test( size = "small", srcs = ["pgalloc_test.go"], library = ":pgalloc", - deps = ["//pkg/sentry/usermem"], + deps = ["//pkg/usermem"], ) diff --git a/pkg/sentry/pgalloc/context.go b/pkg/sentry/pgalloc/context.go index 11ccf897b..d25215418 100644 --- a/pkg/sentry/pgalloc/context.go +++ b/pkg/sentry/pgalloc/context.go @@ -15,7 +15,7 @@ package pgalloc import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // contextID is this package's type for context.Context.Value keys. diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index c99e023d9..577e9306a 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -29,15 +29,15 @@ import ( "syscall" "time" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/hostmm" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // MemoryFile is a platform.File whose pages may be allocated to arbitrary diff --git a/pkg/sentry/pgalloc/pgalloc_test.go b/pkg/sentry/pgalloc/pgalloc_test.go index 428e6a859..293f22c6b 100644 --- a/pkg/sentry/pgalloc/pgalloc_test.go +++ b/pkg/sentry/pgalloc/pgalloc_test.go @@ -17,7 +17,7 @@ package pgalloc import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/pgalloc/save_restore.go b/pkg/sentry/pgalloc/save_restore.go index aafce1d00..f8385c146 100644 --- a/pkg/sentry/pgalloc/save_restore.go +++ b/pkg/sentry/pgalloc/save_restore.go @@ -25,8 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/usermem" ) // SaveTo writes f's state to the given stream. diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD index 006450b2d..453241eca 100644 --- a/pkg/sentry/platform/BUILD +++ b/pkg/sentry/platform/BUILD @@ -26,14 +26,14 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/atomicbitops", + "//pkg/context", "//pkg/log", + "//pkg/safecopy", + "//pkg/safemem", "//pkg/seccomp", "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/safemem", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/platform/context.go b/pkg/sentry/platform/context.go index e29bc4485..6759cda65 100644 --- a/pkg/sentry/platform/context.go +++ b/pkg/sentry/platform/context.go @@ -15,7 +15,7 @@ package platform import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // contextID is the auth package's type for context.Context.Value keys. diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index a4532a766..159f7eafd 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -44,16 +44,16 @@ go_library( "//pkg/cpuid", "//pkg/log", "//pkg/procid", + "//pkg/safecopy", "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", "//pkg/sentry/platform/ring0", "//pkg/sentry/platform/ring0/pagetables", - "//pkg/sentry/platform/safecopy", "//pkg/sentry/time", - "//pkg/sentry/usermem", "//pkg/sync", + "//pkg/usermem", ], ) @@ -75,6 +75,6 @@ go_test( "//pkg/sentry/platform/kvm/testutil", "//pkg/sentry/platform/ring0", "//pkg/sentry/platform/ring0/pagetables", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index a25f3c449..be213bfe8 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // dirtySet tracks vCPUs for invalidation. diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go index 30dbb74d6..35cd55fef 100644 --- a/pkg/sentry/platform/kvm/bluepill.go +++ b/pkg/sentry/platform/kvm/bluepill.go @@ -19,9 +19,9 @@ import ( "reflect" "syscall" + "gvisor.dev/gvisor/pkg/safecopy" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/platform/safecopy" ) // bluepill enters guest mode. diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go index f6459cda9..e34f46aeb 100644 --- a/pkg/sentry/platform/kvm/bluepill_fault.go +++ b/pkg/sentry/platform/kvm/bluepill_fault.go @@ -18,7 +18,7 @@ import ( "sync/atomic" "syscall" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go index 99450d22d..c769ac7b4 100644 --- a/pkg/sentry/platform/kvm/context.go +++ b/pkg/sentry/platform/kvm/context.go @@ -19,7 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/interrupt" "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // context is an implementation of the platform context. diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index d337c5c7c..972ba85c3 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/ring0" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // KVM represents a lightweight VM context. diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index 30df725d4..c42752d50 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -27,7 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" "gvisor.dev/gvisor/pkg/sentry/platform/ring0" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) var dummyFPState = (*byte)(arch.NewFloatingPointData()) diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index e6d912168..8076c7529 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -25,8 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/procid" "gvisor.dev/gvisor/pkg/sentry/platform/ring0" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // machine contains state associated with the VM as a whole. diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 873e39dc7..923ce3909 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -26,7 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/ring0" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // initArchState initializes architecture-specific state. diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go index 3b1f20219..09552837a 100644 --- a/pkg/sentry/platform/kvm/machine_arm64.go +++ b/pkg/sentry/platform/kvm/machine_arm64.go @@ -20,7 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) type vCPUArchState struct { diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 3f2f97a6b..1c8384e6b 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -26,7 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // setMemoryRegion initializes a region. diff --git a/pkg/sentry/platform/kvm/physical_map.go b/pkg/sentry/platform/kvm/physical_map.go index 91de5dab1..f7fa2f98d 100644 --- a/pkg/sentry/platform/kvm/physical_map.go +++ b/pkg/sentry/platform/kvm/physical_map.go @@ -21,7 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/platform/ring0" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) type region struct { diff --git a/pkg/sentry/platform/kvm/virtual_map.go b/pkg/sentry/platform/kvm/virtual_map.go index 2d68855ef..c8897d34f 100644 --- a/pkg/sentry/platform/kvm/virtual_map.go +++ b/pkg/sentry/platform/kvm/virtual_map.go @@ -22,7 +22,7 @@ import ( "regexp" "strconv" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) type virtualRegion struct { diff --git a/pkg/sentry/platform/kvm/virtual_map_test.go b/pkg/sentry/platform/kvm/virtual_map_test.go index 6a2f145be..327e2be4f 100644 --- a/pkg/sentry/platform/kvm/virtual_map_test.go +++ b/pkg/sentry/platform/kvm/virtual_map_test.go @@ -18,7 +18,7 @@ import ( "syscall" "testing" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) type checker struct { diff --git a/pkg/sentry/platform/mmap_min_addr.go b/pkg/sentry/platform/mmap_min_addr.go index 999787462..091c2e365 100644 --- a/pkg/sentry/platform/mmap_min_addr.go +++ b/pkg/sentry/platform/mmap_min_addr.go @@ -20,7 +20,7 @@ import ( "strconv" "strings" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // systemMMapMinAddrSource is the source file. diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go index ec22dbf87..2ca696382 100644 --- a/pkg/sentry/platform/platform.go +++ b/pkg/sentry/platform/platform.go @@ -22,10 +22,10 @@ import ( "os" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/seccomp" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // Platform provides abstractions for execution contexts (Context, diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD index 3bcc5e040..95abd321e 100644 --- a/pkg/sentry/platform/ptrace/BUILD +++ b/pkg/sentry/platform/ptrace/BUILD @@ -25,14 +25,14 @@ go_library( "//pkg/abi/linux", "//pkg/log", "//pkg/procid", + "//pkg/safecopy", "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/hostcpu", "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/usermem", "//pkg/sync", + "//pkg/usermem", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go index bb0e03880..03adb624b 100644 --- a/pkg/sentry/platform/ptrace/ptrace.go +++ b/pkg/sentry/platform/ptrace/ptrace.go @@ -51,8 +51,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/interrupt" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) var ( diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go index 72c7ec564..6c0ed7b3e 100644 --- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go +++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go @@ -20,7 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // getRegs gets the general purpose register set. diff --git a/pkg/sentry/platform/ptrace/stub_unsafe.go b/pkg/sentry/platform/ptrace/stub_unsafe.go index aa1b87237..341dde143 100644 --- a/pkg/sentry/platform/ptrace/stub_unsafe.go +++ b/pkg/sentry/platform/ptrace/stub_unsafe.go @@ -19,8 +19,8 @@ import ( "syscall" "unsafe" - "gvisor.dev/gvisor/pkg/sentry/platform/safecopy" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/safecopy" + "gvisor.dev/gvisor/pkg/usermem" ) // stub is defined in arch-specific assembly. diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go index 15dc46a5b..31b7cec53 100644 --- a/pkg/sentry/platform/ptrace/subprocess.go +++ b/pkg/sentry/platform/ptrace/subprocess.go @@ -25,8 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/procid" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" ) // Linux kernel errnos which "should never be seen by user programs", but will diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD index 6dee8fcc5..934b6fbcd 100644 --- a/pkg/sentry/platform/ring0/BUILD +++ b/pkg/sentry/platform/ring0/BUILD @@ -78,6 +78,6 @@ go_library( deps = [ "//pkg/cpuid", "//pkg/sentry/platform/ring0/pagetables", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go index 9dae0dccb..9c6c2cf5c 100644 --- a/pkg/sentry/platform/ring0/defs_amd64.go +++ b/pkg/sentry/platform/ring0/defs_amd64.go @@ -18,7 +18,7 @@ package ring0 import ( "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) var ( diff --git a/pkg/sentry/platform/ring0/defs_arm64.go b/pkg/sentry/platform/ring0/defs_arm64.go index a850ce6cf..1583dda12 100644 --- a/pkg/sentry/platform/ring0/defs_arm64.go +++ b/pkg/sentry/platform/ring0/defs_arm64.go @@ -18,7 +18,7 @@ package ring0 import ( "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) var ( diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/sentry/platform/ring0/gen_offsets/BUILD index 147311ed3..4cae10459 100644 --- a/pkg/sentry/platform/ring0/gen_offsets/BUILD +++ b/pkg/sentry/platform/ring0/gen_offsets/BUILD @@ -28,6 +28,6 @@ go_binary( deps = [ "//pkg/cpuid", "//pkg/sentry/platform/ring0/pagetables", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 8b5cdd6c1..971eed7fa 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -93,8 +93,8 @@ go_library( "//pkg/sentry/platform/ring0:__subpackages__", ], deps = [ - "//pkg/sentry/usermem", "//pkg/sync", + "//pkg/usermem", ], ) @@ -108,5 +108,5 @@ go_test( "walker_check.go", ], library = ":pagetables", - deps = ["//pkg/sentry/usermem"], + deps = ["//pkg/usermem"], ) diff --git a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go index a90394a33..d08bfdeb3 100644 --- a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go +++ b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go @@ -17,7 +17,7 @@ package pagetables import ( "unsafe" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // newAlignedPTEs returns a set of aligned PTEs. diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go index 30c64a372..87e88e97d 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go @@ -21,7 +21,7 @@ package pagetables import ( - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // PageTables is a set of page tables. diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go index e78424766..78510ebed 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go @@ -19,7 +19,7 @@ package pagetables import ( "sync/atomic" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // archPageTables is architecture-specific data. diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go index 35e917526..54e8e554f 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go @@ -19,7 +19,7 @@ package pagetables import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) func Test2MAnd4K(t *testing.T) { diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go index 254116233..2f73d424f 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go @@ -19,7 +19,7 @@ package pagetables import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) func Test2MAnd4K(t *testing.T) { diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go index 6e95ad2b9..5c88d087d 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go @@ -17,7 +17,7 @@ package pagetables import ( "testing" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) type mapping struct { diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go index 3e2383c5e..dcf061df9 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go @@ -19,7 +19,7 @@ package pagetables import ( "sync/atomic" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // archPageTables is architecture-specific data. diff --git a/pkg/sentry/platform/safecopy/BUILD b/pkg/sentry/platform/safecopy/BUILD deleted file mode 100644 index b8747585b..000000000 --- a/pkg/sentry/platform/safecopy/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "safecopy", - srcs = [ - "atomic_amd64.s", - "atomic_arm64.s", - "memclr_amd64.s", - "memclr_arm64.s", - "memcpy_amd64.s", - "memcpy_arm64.s", - "safecopy.go", - "safecopy_unsafe.go", - "sighandler_amd64.s", - "sighandler_arm64.s", - ], - visibility = ["//pkg/sentry:internal"], - deps = ["//pkg/syserror"], -) - -go_test( - name = "safecopy_test", - srcs = [ - "safecopy_test.go", - ], - library = ":safecopy", -) diff --git a/pkg/sentry/platform/safecopy/LICENSE b/pkg/sentry/platform/safecopy/LICENSE deleted file mode 100644 index 6a66aea5e..000000000 --- a/pkg/sentry/platform/safecopy/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pkg/sentry/platform/safecopy/atomic_amd64.s b/pkg/sentry/platform/safecopy/atomic_amd64.s deleted file mode 100644 index a0cd78f33..000000000 --- a/pkg/sentry/platform/safecopy/atomic_amd64.s +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "textflag.h" - -// handleSwapUint32Fault returns the value stored in DI. Control is transferred -// to it when swapUint32 below receives SIGSEGV or SIGBUS, with the signal -// number stored in DI. -// -// It must have the same frame configuration as swapUint32 so that it can undo -// any potential call frame set up by the assembler. -TEXT handleSwapUint32Fault(SB), NOSPLIT, $0-24 - MOVL DI, sig+20(FP) - RET - -// swapUint32 atomically stores new into *addr and returns (the previous *addr -// value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the -// value of old is unspecified, and sig is the number of the signal that was -// received. -// -// Preconditions: addr must be aligned to a 4-byte boundary. -// -//func swapUint32(ptr unsafe.Pointer, new uint32) (old uint32, sig int32) -TEXT ·swapUint32(SB), NOSPLIT, $0-24 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleSwapUint32Fault will store a different value in this address. - MOVL $0, sig+20(FP) - - MOVQ addr+0(FP), DI - MOVL new+8(FP), AX - XCHGL AX, 0(DI) - MOVL AX, old+16(FP) - RET - -// handleSwapUint64Fault returns the value stored in DI. Control is transferred -// to it when swapUint64 below receives SIGSEGV or SIGBUS, with the signal -// number stored in DI. -// -// It must have the same frame configuration as swapUint64 so that it can undo -// any potential call frame set up by the assembler. -TEXT handleSwapUint64Fault(SB), NOSPLIT, $0-28 - MOVL DI, sig+24(FP) - RET - -// swapUint64 atomically stores new into *addr and returns (the previous *addr -// value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the -// value of old is unspecified, and sig is the number of the signal that was -// received. -// -// Preconditions: addr must be aligned to a 8-byte boundary. -// -//func swapUint64(ptr unsafe.Pointer, new uint64) (old uint64, sig int32) -TEXT ·swapUint64(SB), NOSPLIT, $0-28 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleSwapUint64Fault will store a different value in this address. - MOVL $0, sig+24(FP) - - MOVQ addr+0(FP), DI - MOVQ new+8(FP), AX - XCHGQ AX, 0(DI) - MOVQ AX, old+16(FP) - RET - -// handleCompareAndSwapUint32Fault returns the value stored in DI. Control is -// transferred to it when swapUint64 below receives SIGSEGV or SIGBUS, with the -// signal number stored in DI. -// -// It must have the same frame configuration as compareAndSwapUint32 so that it -// can undo any potential call frame set up by the assembler. -TEXT handleCompareAndSwapUint32Fault(SB), NOSPLIT, $0-24 - MOVL DI, sig+20(FP) - RET - -// compareAndSwapUint32 is like sync/atomic.CompareAndSwapUint32, but returns -// (the value previously stored at addr, 0). If a SIGSEGV or SIGBUS signal is -// received during the operation, the value of prev is unspecified, and sig is -// the number of the signal that was received. -// -// Preconditions: addr must be aligned to a 4-byte boundary. -// -//func compareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (prev uint32, sig int32) -TEXT ·compareAndSwapUint32(SB), NOSPLIT, $0-24 - // Store 0 as the returned signal number. If we run to completion, this is - // the value the caller will see; if a signal is received, - // handleCompareAndSwapUint32Fault will store a different value in this - // address. - MOVL $0, sig+20(FP) - - MOVQ addr+0(FP), DI - MOVL old+8(FP), AX - MOVL new+12(FP), DX - LOCK - CMPXCHGL DX, 0(DI) - MOVL AX, prev+16(FP) - RET - -// handleLoadUint32Fault returns the value stored in DI. Control is transferred -// to it when LoadUint32 below receives SIGSEGV or SIGBUS, with the signal -// number stored in DI. -// -// It must have the same frame configuration as loadUint32 so that it can undo -// any potential call frame set up by the assembler. -TEXT handleLoadUint32Fault(SB), NOSPLIT, $0-16 - MOVL DI, sig+12(FP) - RET - -// loadUint32 atomically loads *addr and returns it. If a SIGSEGV or SIGBUS -// signal is received, the value returned is unspecified, and sig is the number -// of the signal that was received. -// -// Preconditions: addr must be aligned to a 4-byte boundary. -// -//func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32) -TEXT ·loadUint32(SB), NOSPLIT, $0-16 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleLoadUint32Fault will store a different value in this address. - MOVL $0, sig+12(FP) - - MOVQ addr+0(FP), AX - MOVL (AX), BX - MOVL BX, val+8(FP) - RET diff --git a/pkg/sentry/platform/safecopy/atomic_arm64.s b/pkg/sentry/platform/safecopy/atomic_arm64.s deleted file mode 100644 index d58ed71f7..000000000 --- a/pkg/sentry/platform/safecopy/atomic_arm64.s +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "textflag.h" - -// handleSwapUint32Fault returns the value stored in R1. Control is transferred -// to it when swapUint32 below receives SIGSEGV or SIGBUS, with the signal -// number stored in R1. -// -// It must have the same frame configuration as swapUint32 so that it can undo -// any potential call frame set up by the assembler. -TEXT handleSwapUint32Fault(SB), NOSPLIT, $0-24 - MOVW R1, sig+20(FP) - RET - -// See the corresponding doc in safecopy_unsafe.go -// -// The code is derived from Go source runtime/internal/atomic.Xchg. -// -//func swapUint32(ptr unsafe.Pointer, new uint32) (old uint32, sig int32) -TEXT ·swapUint32(SB), NOSPLIT, $0-24 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleSwapUint32Fault will store a different value in this address. - MOVW $0, sig+20(FP) -again: - MOVD addr+0(FP), R0 - MOVW new+8(FP), R1 - LDAXRW (R0), R2 - STLXRW R1, (R0), R3 - CBNZ R3, again - MOVW R2, old+16(FP) - RET - -// handleSwapUint64Fault returns the value stored in R1. Control is transferred -// to it when swapUint64 below receives SIGSEGV or SIGBUS, with the signal -// number stored in R1. -// -// It must have the same frame configuration as swapUint64 so that it can undo -// any potential call frame set up by the assembler. -TEXT handleSwapUint64Fault(SB), NOSPLIT, $0-28 - MOVW R1, sig+24(FP) - RET - -// See the corresponding doc in safecopy_unsafe.go -// -// The code is derived from Go source runtime/internal/atomic.Xchg64. -// -//func swapUint64(ptr unsafe.Pointer, new uint64) (old uint64, sig int32) -TEXT ·swapUint64(SB), NOSPLIT, $0-28 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleSwapUint64Fault will store a different value in this address. - MOVW $0, sig+24(FP) -again: - MOVD addr+0(FP), R0 - MOVD new+8(FP), R1 - LDAXR (R0), R2 - STLXR R1, (R0), R3 - CBNZ R3, again - MOVD R2, old+16(FP) - RET - -// handleCompareAndSwapUint32Fault returns the value stored in R1. Control is -// transferred to it when compareAndSwapUint32 below receives SIGSEGV or SIGBUS, -// with the signal number stored in R1. -// -// It must have the same frame configuration as compareAndSwapUint32 so that it -// can undo any potential call frame set up by the assembler. -TEXT handleCompareAndSwapUint32Fault(SB), NOSPLIT, $0-24 - MOVW R1, sig+20(FP) - RET - -// See the corresponding doc in safecopy_unsafe.go -// -// The code is derived from Go source runtime/internal/atomic.Cas. -// -//func compareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (prev uint32, sig int32) -TEXT ·compareAndSwapUint32(SB), NOSPLIT, $0-24 - // Store 0 as the returned signal number. If we run to completion, this is - // the value the caller will see; if a signal is received, - // handleCompareAndSwapUint32Fault will store a different value in this - // address. - MOVW $0, sig+20(FP) - - MOVD addr+0(FP), R0 - MOVW old+8(FP), R1 - MOVW new+12(FP), R2 -again: - LDAXRW (R0), R3 - CMPW R1, R3 - BNE done - STLXRW R2, (R0), R4 - CBNZ R4, again -done: - MOVW R3, prev+16(FP) - RET - -// handleLoadUint32Fault returns the value stored in DI. Control is transferred -// to it when LoadUint32 below receives SIGSEGV or SIGBUS, with the signal -// number stored in DI. -// -// It must have the same frame configuration as loadUint32 so that it can undo -// any potential call frame set up by the assembler. -TEXT handleLoadUint32Fault(SB), NOSPLIT, $0-16 - MOVW R1, sig+12(FP) - RET - -// loadUint32 atomically loads *addr and returns it. If a SIGSEGV or SIGBUS -// signal is received, the value returned is unspecified, and sig is the number -// of the signal that was received. -// -// Preconditions: addr must be aligned to a 4-byte boundary. -// -//func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32) -TEXT ·loadUint32(SB), NOSPLIT, $0-16 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleLoadUint32Fault will store a different value in this address. - MOVW $0, sig+12(FP) - - MOVD addr+0(FP), R0 - LDARW (R0), R1 - MOVW R1, val+8(FP) - RET diff --git a/pkg/sentry/platform/safecopy/memclr_amd64.s b/pkg/sentry/platform/safecopy/memclr_amd64.s deleted file mode 100644 index 64cf32f05..000000000 --- a/pkg/sentry/platform/safecopy/memclr_amd64.s +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "textflag.h" - -// handleMemclrFault returns (the value stored in AX, the value stored in DI). -// Control is transferred to it when memclr below receives SIGSEGV or SIGBUS, -// with the faulting address stored in AX and the signal number stored in DI. -// -// It must have the same frame configuration as memclr so that it can undo any -// potential call frame set up by the assembler. -TEXT handleMemclrFault(SB), NOSPLIT, $0-28 - MOVQ AX, addr+16(FP) - MOVL DI, sig+24(FP) - RET - -// memclr sets the n bytes following ptr to zeroes. If a SIGSEGV or SIGBUS -// signal is received during the write, it returns the address that caused the -// fault and the number of the signal that was received. Otherwise, it returns -// an unspecified address and a signal number of 0. -// -// Data is written in order, such that if a fault happens at address p, it is -// safe to assume that all data before p-maxRegisterSize has already been -// successfully written. -// -// The code is derived from runtime.memclrNoHeapPointers. -// -// func memclr(ptr unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) -TEXT ·memclr(SB), NOSPLIT, $0-28 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleMemclrFault will store a different value in this address. - MOVL $0, sig+24(FP) - - MOVQ ptr+0(FP), DI - MOVQ n+8(FP), BX - XORQ AX, AX - - // MOVOU seems always faster than REP STOSQ. -tail: - TESTQ BX, BX - JEQ _0 - CMPQ BX, $2 - JBE _1or2 - CMPQ BX, $4 - JBE _3or4 - CMPQ BX, $8 - JB _5through7 - JE _8 - CMPQ BX, $16 - JBE _9through16 - PXOR X0, X0 - CMPQ BX, $32 - JBE _17through32 - CMPQ BX, $64 - JBE _33through64 - CMPQ BX, $128 - JBE _65through128 - CMPQ BX, $256 - JBE _129through256 - // TODO: use branch table and BSR to make this just a single dispatch - // TODO: for really big clears, use MOVNTDQ, even without AVX2. - -loop: - MOVOU X0, 0(DI) - MOVOU X0, 16(DI) - MOVOU X0, 32(DI) - MOVOU X0, 48(DI) - MOVOU X0, 64(DI) - MOVOU X0, 80(DI) - MOVOU X0, 96(DI) - MOVOU X0, 112(DI) - MOVOU X0, 128(DI) - MOVOU X0, 144(DI) - MOVOU X0, 160(DI) - MOVOU X0, 176(DI) - MOVOU X0, 192(DI) - MOVOU X0, 208(DI) - MOVOU X0, 224(DI) - MOVOU X0, 240(DI) - SUBQ $256, BX - ADDQ $256, DI - CMPQ BX, $256 - JAE loop - JMP tail - -_1or2: - MOVB AX, (DI) - MOVB AX, -1(DI)(BX*1) - RET -_0: - RET -_3or4: - MOVW AX, (DI) - MOVW AX, -2(DI)(BX*1) - RET -_5through7: - MOVL AX, (DI) - MOVL AX, -4(DI)(BX*1) - RET -_8: - // We need a separate case for 8 to make sure we clear pointers atomically. - MOVQ AX, (DI) - RET -_9through16: - MOVQ AX, (DI) - MOVQ AX, -8(DI)(BX*1) - RET -_17through32: - MOVOU X0, (DI) - MOVOU X0, -16(DI)(BX*1) - RET -_33through64: - MOVOU X0, (DI) - MOVOU X0, 16(DI) - MOVOU X0, -32(DI)(BX*1) - MOVOU X0, -16(DI)(BX*1) - RET -_65through128: - MOVOU X0, (DI) - MOVOU X0, 16(DI) - MOVOU X0, 32(DI) - MOVOU X0, 48(DI) - MOVOU X0, -64(DI)(BX*1) - MOVOU X0, -48(DI)(BX*1) - MOVOU X0, -32(DI)(BX*1) - MOVOU X0, -16(DI)(BX*1) - RET -_129through256: - MOVOU X0, (DI) - MOVOU X0, 16(DI) - MOVOU X0, 32(DI) - MOVOU X0, 48(DI) - MOVOU X0, 64(DI) - MOVOU X0, 80(DI) - MOVOU X0, 96(DI) - MOVOU X0, 112(DI) - MOVOU X0, -128(DI)(BX*1) - MOVOU X0, -112(DI)(BX*1) - MOVOU X0, -96(DI)(BX*1) - MOVOU X0, -80(DI)(BX*1) - MOVOU X0, -64(DI)(BX*1) - MOVOU X0, -48(DI)(BX*1) - MOVOU X0, -32(DI)(BX*1) - MOVOU X0, -16(DI)(BX*1) - RET diff --git a/pkg/sentry/platform/safecopy/memclr_arm64.s b/pkg/sentry/platform/safecopy/memclr_arm64.s deleted file mode 100644 index 7361b9067..000000000 --- a/pkg/sentry/platform/safecopy/memclr_arm64.s +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "textflag.h" - -// handleMemclrFault returns (the value stored in R0, the value stored in R1). -// Control is transferred to it when memclr below receives SIGSEGV or SIGBUS, -// with the faulting address stored in R0 and the signal number stored in R1. -// -// It must have the same frame configuration as memclr so that it can undo any -// potential call frame set up by the assembler. -TEXT handleMemclrFault(SB), NOSPLIT, $0-28 - MOVD R0, addr+16(FP) - MOVW R1, sig+24(FP) - RET - -// See the corresponding doc in safecopy_unsafe.go -// -// The code is derived from runtime.memclrNoHeapPointers. -// -// func memclr(ptr unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) -TEXT ·memclr(SB), NOSPLIT, $0-28 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleMemclrFault will store a different value in this address. - MOVW $0, sig+24(FP) - MOVD ptr+0(FP), R0 - MOVD n+8(FP), R1 - - // If size is less than 16 bytes, use tail_zero to zero what remains - CMP $16, R1 - BLT tail_zero - // Get buffer offset into 16 byte aligned address for better performance - ANDS $15, R0, ZR - BNE unaligned_to_16 -aligned_to_16: - LSR $4, R1, R2 -zero_by_16: - STP.P (ZR, ZR), 16(R0) // Store pair with post index. - SUBS $1, R2, R2 - BNE zero_by_16 - ANDS $15, R1, R1 - BEQ end - - // Zero buffer with size=R1 < 16 -tail_zero: - TBZ $3, R1, tail_zero_4 - MOVD.P ZR, 8(R0) -tail_zero_4: - TBZ $2, R1, tail_zero_2 - MOVW.P ZR, 4(R0) -tail_zero_2: - TBZ $1, R1, tail_zero_1 - MOVH.P ZR, 2(R0) -tail_zero_1: - TBZ $0, R1, end - MOVB ZR, (R0) -end: - RET - -unaligned_to_16: - MOVD R0, R2 -head_loop: - MOVBU.P ZR, 1(R0) - ANDS $15, R0, ZR - BNE head_loop - // Adjust length for what remains - SUB R2, R0, R3 - SUB R3, R1 - // If size is less than 16 bytes, use tail_zero to zero what remains - CMP $16, R1 - BLT tail_zero - B aligned_to_16 diff --git a/pkg/sentry/platform/safecopy/memcpy_amd64.s b/pkg/sentry/platform/safecopy/memcpy_amd64.s deleted file mode 100644 index 129691d68..000000000 --- a/pkg/sentry/platform/safecopy/memcpy_amd64.s +++ /dev/null @@ -1,250 +0,0 @@ -// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. -// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. -// Portions Copyright 2009 The Go Authors. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include "textflag.h" - -// handleMemcpyFault returns (the value stored in AX, the value stored in DI). -// Control is transferred to it when memcpy below receives SIGSEGV or SIGBUS, -// with the faulting address stored in AX and the signal number stored in DI. -// -// It must have the same frame configuration as memcpy so that it can undo any -// potential call frame set up by the assembler. -TEXT handleMemcpyFault(SB), NOSPLIT, $0-36 - MOVQ AX, addr+24(FP) - MOVL DI, sig+32(FP) - RET - -// memcpy copies data from src to dst. If a SIGSEGV or SIGBUS signal is received -// during the copy, it returns the address that caused the fault and the number -// of the signal that was received. Otherwise, it returns an unspecified address -// and a signal number of 0. -// -// Data is copied in order, such that if a fault happens at address p, it is -// safe to assume that all data before p-maxRegisterSize has already been -// successfully copied. -// -// The code is derived from the forward copying part of runtime.memmove. -// -// func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) -TEXT ·memcpy(SB), NOSPLIT, $0-36 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleMemcpyFault will store a different value in this address. - MOVL $0, sig+32(FP) - - MOVQ to+0(FP), DI - MOVQ from+8(FP), SI - MOVQ n+16(FP), BX - - // REP instructions have a high startup cost, so we handle small sizes - // with some straightline code. The REP MOVSQ instruction is really fast - // for large sizes. The cutover is approximately 2K. -tail: - // move_129through256 or smaller work whether or not the source and the - // destination memory regions overlap because they load all data into - // registers before writing it back. move_256through2048 on the other - // hand can be used only when the memory regions don't overlap or the copy - // direction is forward. - TESTQ BX, BX - JEQ move_0 - CMPQ BX, $2 - JBE move_1or2 - CMPQ BX, $4 - JBE move_3or4 - CMPQ BX, $8 - JB move_5through7 - JE move_8 - CMPQ BX, $16 - JBE move_9through16 - CMPQ BX, $32 - JBE move_17through32 - CMPQ BX, $64 - JBE move_33through64 - CMPQ BX, $128 - JBE move_65through128 - CMPQ BX, $256 - JBE move_129through256 - // TODO: use branch table and BSR to make this just a single dispatch - -/* - * forward copy loop - */ - CMPQ BX, $2048 - JLS move_256through2048 - - // Check alignment - MOVL SI, AX - ORL DI, AX - TESTL $7, AX - JEQ fwdBy8 - - // Do 1 byte at a time - MOVQ BX, CX - REP; MOVSB - RET - -fwdBy8: - // Do 8 bytes at a time - MOVQ BX, CX - SHRQ $3, CX - ANDQ $7, BX - REP; MOVSQ - JMP tail - -move_1or2: - MOVB (SI), AX - MOVB AX, (DI) - MOVB -1(SI)(BX*1), CX - MOVB CX, -1(DI)(BX*1) - RET -move_0: - RET -move_3or4: - MOVW (SI), AX - MOVW AX, (DI) - MOVW -2(SI)(BX*1), CX - MOVW CX, -2(DI)(BX*1) - RET -move_5through7: - MOVL (SI), AX - MOVL AX, (DI) - MOVL -4(SI)(BX*1), CX - MOVL CX, -4(DI)(BX*1) - RET -move_8: - // We need a separate case for 8 to make sure we write pointers atomically. - MOVQ (SI), AX - MOVQ AX, (DI) - RET -move_9through16: - MOVQ (SI), AX - MOVQ AX, (DI) - MOVQ -8(SI)(BX*1), CX - MOVQ CX, -8(DI)(BX*1) - RET -move_17through32: - MOVOU (SI), X0 - MOVOU X0, (DI) - MOVOU -16(SI)(BX*1), X1 - MOVOU X1, -16(DI)(BX*1) - RET -move_33through64: - MOVOU (SI), X0 - MOVOU X0, (DI) - MOVOU 16(SI), X1 - MOVOU X1, 16(DI) - MOVOU -32(SI)(BX*1), X2 - MOVOU X2, -32(DI)(BX*1) - MOVOU -16(SI)(BX*1), X3 - MOVOU X3, -16(DI)(BX*1) - RET -move_65through128: - MOVOU (SI), X0 - MOVOU X0, (DI) - MOVOU 16(SI), X1 - MOVOU X1, 16(DI) - MOVOU 32(SI), X2 - MOVOU X2, 32(DI) - MOVOU 48(SI), X3 - MOVOU X3, 48(DI) - MOVOU -64(SI)(BX*1), X4 - MOVOU X4, -64(DI)(BX*1) - MOVOU -48(SI)(BX*1), X5 - MOVOU X5, -48(DI)(BX*1) - MOVOU -32(SI)(BX*1), X6 - MOVOU X6, -32(DI)(BX*1) - MOVOU -16(SI)(BX*1), X7 - MOVOU X7, -16(DI)(BX*1) - RET -move_129through256: - MOVOU (SI), X0 - MOVOU X0, (DI) - MOVOU 16(SI), X1 - MOVOU X1, 16(DI) - MOVOU 32(SI), X2 - MOVOU X2, 32(DI) - MOVOU 48(SI), X3 - MOVOU X3, 48(DI) - MOVOU 64(SI), X4 - MOVOU X4, 64(DI) - MOVOU 80(SI), X5 - MOVOU X5, 80(DI) - MOVOU 96(SI), X6 - MOVOU X6, 96(DI) - MOVOU 112(SI), X7 - MOVOU X7, 112(DI) - MOVOU -128(SI)(BX*1), X8 - MOVOU X8, -128(DI)(BX*1) - MOVOU -112(SI)(BX*1), X9 - MOVOU X9, -112(DI)(BX*1) - MOVOU -96(SI)(BX*1), X10 - MOVOU X10, -96(DI)(BX*1) - MOVOU -80(SI)(BX*1), X11 - MOVOU X11, -80(DI)(BX*1) - MOVOU -64(SI)(BX*1), X12 - MOVOU X12, -64(DI)(BX*1) - MOVOU -48(SI)(BX*1), X13 - MOVOU X13, -48(DI)(BX*1) - MOVOU -32(SI)(BX*1), X14 - MOVOU X14, -32(DI)(BX*1) - MOVOU -16(SI)(BX*1), X15 - MOVOU X15, -16(DI)(BX*1) - RET -move_256through2048: - SUBQ $256, BX - MOVOU (SI), X0 - MOVOU X0, (DI) - MOVOU 16(SI), X1 - MOVOU X1, 16(DI) - MOVOU 32(SI), X2 - MOVOU X2, 32(DI) - MOVOU 48(SI), X3 - MOVOU X3, 48(DI) - MOVOU 64(SI), X4 - MOVOU X4, 64(DI) - MOVOU 80(SI), X5 - MOVOU X5, 80(DI) - MOVOU 96(SI), X6 - MOVOU X6, 96(DI) - MOVOU 112(SI), X7 - MOVOU X7, 112(DI) - MOVOU 128(SI), X8 - MOVOU X8, 128(DI) - MOVOU 144(SI), X9 - MOVOU X9, 144(DI) - MOVOU 160(SI), X10 - MOVOU X10, 160(DI) - MOVOU 176(SI), X11 - MOVOU X11, 176(DI) - MOVOU 192(SI), X12 - MOVOU X12, 192(DI) - MOVOU 208(SI), X13 - MOVOU X13, 208(DI) - MOVOU 224(SI), X14 - MOVOU X14, 224(DI) - MOVOU 240(SI), X15 - MOVOU X15, 240(DI) - CMPQ BX, $256 - LEAQ 256(SI), SI - LEAQ 256(DI), DI - JGE move_256through2048 - JMP tail diff --git a/pkg/sentry/platform/safecopy/memcpy_arm64.s b/pkg/sentry/platform/safecopy/memcpy_arm64.s deleted file mode 100644 index e7e541565..000000000 --- a/pkg/sentry/platform/safecopy/memcpy_arm64.s +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "textflag.h" - -// handleMemcpyFault returns (the value stored in R0, the value stored in R1). -// Control is transferred to it when memcpy below receives SIGSEGV or SIGBUS, -// with the faulting address stored in R0 and the signal number stored in R1. -// -// It must have the same frame configuration as memcpy so that it can undo any -// potential call frame set up by the assembler. -TEXT handleMemcpyFault(SB), NOSPLIT, $0-36 - MOVD R0, addr+24(FP) - MOVW R1, sig+32(FP) - RET - -// memcpy copies data from src to dst. If a SIGSEGV or SIGBUS signal is received -// during the copy, it returns the address that caused the fault and the number -// of the signal that was received. Otherwise, it returns an unspecified address -// and a signal number of 0. -// -// Data is copied in order, such that if a fault happens at address p, it is -// safe to assume that all data before p-maxRegisterSize has already been -// successfully copied. -// -// The code is derived from the Go source runtime.memmove. -// -// func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) -TEXT ·memcpy(SB), NOSPLIT, $-8-36 - // Store 0 as the returned signal number. If we run to completion, - // this is the value the caller will see; if a signal is received, - // handleMemcpyFault will store a different value in this address. - MOVW $0, sig+32(FP) - - MOVD to+0(FP), R3 - MOVD from+8(FP), R4 - MOVD n+16(FP), R5 - CMP $0, R5 - BNE check - RET - -check: - AND $~7, R5, R7 // R7 is N&~7. - SUB R7, R5, R6 // R6 is N&7. - - // Copying forward proceeds by copying R7/8 words then copying R6 bytes. - // R3 and R4 are advanced as we copy. - - // (There may be implementations of armv8 where copying by bytes until - // at least one of source or dest is word aligned is a worthwhile - // optimization, but the on the one tested so far (xgene) it did not - // make a significance difference.) - - CMP $0, R7 // Do we need to do any word-by-word copying? - BEQ noforwardlarge - ADD R3, R7, R9 // R9 points just past where we copy by word. - -forwardlargeloop: - MOVD.P 8(R4), R8 // R8 is just a scratch register. - MOVD.P R8, 8(R3) - CMP R3, R9 - BNE forwardlargeloop - -noforwardlarge: - CMP $0, R6 // Do we need to do any byte-by-byte copying? - BNE forwardtail - RET - -forwardtail: - ADD R3, R6, R9 // R9 points just past the destination memory. - -forwardtailloop: - MOVBU.P 1(R4), R8 - MOVBU.P R8, 1(R3) - CMP R3, R9 - BNE forwardtailloop - RET diff --git a/pkg/sentry/platform/safecopy/safecopy.go b/pkg/sentry/platform/safecopy/safecopy.go deleted file mode 100644 index 2fb7e5809..000000000 --- a/pkg/sentry/platform/safecopy/safecopy.go +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package safecopy provides an efficient implementation of functions to access -// memory that may result in SIGSEGV or SIGBUS being sent to the accessor. -package safecopy - -import ( - "fmt" - "reflect" - "runtime" - "syscall" - - "gvisor.dev/gvisor/pkg/syserror" -) - -// SegvError is returned when a safecopy function receives SIGSEGV. -type SegvError struct { - // Addr is the address at which the SIGSEGV occurred. - Addr uintptr -} - -// Error implements error.Error. -func (e SegvError) Error() string { - return fmt.Sprintf("SIGSEGV at %#x", e.Addr) -} - -// BusError is returned when a safecopy function receives SIGBUS. -type BusError struct { - // Addr is the address at which the SIGBUS occurred. - Addr uintptr -} - -// Error implements error.Error. -func (e BusError) Error() string { - return fmt.Sprintf("SIGBUS at %#x", e.Addr) -} - -// AlignmentError is returned when a safecopy function is passed an address -// that does not meet alignment requirements. -type AlignmentError struct { - // Addr is the invalid address. - Addr uintptr - - // Alignment is the required alignment. - Alignment uintptr -} - -// Error implements error.Error. -func (e AlignmentError) Error() string { - return fmt.Sprintf("address %#x is not aligned to a %d-byte boundary", e.Addr, e.Alignment) -} - -var ( - // The begin and end addresses below are for the functions that are - // checked by the signal handler. - memcpyBegin uintptr - memcpyEnd uintptr - memclrBegin uintptr - memclrEnd uintptr - swapUint32Begin uintptr - swapUint32End uintptr - swapUint64Begin uintptr - swapUint64End uintptr - compareAndSwapUint32Begin uintptr - compareAndSwapUint32End uintptr - loadUint32Begin uintptr - loadUint32End uintptr - - // savedSigSegVHandler is a pointer to the SIGSEGV handler that was - // configured before we replaced it with our own. We still call into it - // when we get a SIGSEGV that is not interesting to us. - savedSigSegVHandler uintptr - - // same a above, but for SIGBUS signals. - savedSigBusHandler uintptr -) - -// signalHandler is our replacement signal handler for SIGSEGV and SIGBUS -// signals. -func signalHandler() - -// FindEndAddress returns the end address (one byte beyond the last) of the -// function that contains the specified address (begin). -func FindEndAddress(begin uintptr) uintptr { - f := runtime.FuncForPC(begin) - if f != nil { - for p := begin; ; p++ { - g := runtime.FuncForPC(p) - if f != g { - return p - } - } - } - return begin -} - -// initializeAddresses initializes the addresses used by the signal handler. -func initializeAddresses() { - // The following functions are written in assembly language, so they won't - // be inlined by the existing compiler/linker. Tests will fail if this - // assumption is violated. - memcpyBegin = reflect.ValueOf(memcpy).Pointer() - memcpyEnd = FindEndAddress(memcpyBegin) - memclrBegin = reflect.ValueOf(memclr).Pointer() - memclrEnd = FindEndAddress(memclrBegin) - swapUint32Begin = reflect.ValueOf(swapUint32).Pointer() - swapUint32End = FindEndAddress(swapUint32Begin) - swapUint64Begin = reflect.ValueOf(swapUint64).Pointer() - swapUint64End = FindEndAddress(swapUint64Begin) - compareAndSwapUint32Begin = reflect.ValueOf(compareAndSwapUint32).Pointer() - compareAndSwapUint32End = FindEndAddress(compareAndSwapUint32Begin) - loadUint32Begin = reflect.ValueOf(loadUint32).Pointer() - loadUint32End = FindEndAddress(loadUint32Begin) -} - -func init() { - initializeAddresses() - if err := ReplaceSignalHandler(syscall.SIGSEGV, reflect.ValueOf(signalHandler).Pointer(), &savedSigSegVHandler); err != nil { - panic(fmt.Sprintf("Unable to set handler for SIGSEGV: %v", err)) - } - if err := ReplaceSignalHandler(syscall.SIGBUS, reflect.ValueOf(signalHandler).Pointer(), &savedSigBusHandler); err != nil { - panic(fmt.Sprintf("Unable to set handler for SIGBUS: %v", err)) - } - syserror.AddErrorUnwrapper(func(e error) (syscall.Errno, bool) { - switch e.(type) { - case SegvError, BusError, AlignmentError: - return syscall.EFAULT, true - default: - return 0, false - } - }) -} diff --git a/pkg/sentry/platform/safecopy/safecopy_test.go b/pkg/sentry/platform/safecopy/safecopy_test.go deleted file mode 100644 index 5818f7f9b..000000000 --- a/pkg/sentry/platform/safecopy/safecopy_test.go +++ /dev/null @@ -1,617 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safecopy - -import ( - "bytes" - "fmt" - "io/ioutil" - "math/rand" - "os" - "runtime/debug" - "syscall" - "testing" - "unsafe" -) - -// Size of a page in bytes. Cloned from usermem.PageSize to avoid a circular -// dependency. -const pageSize = 4096 - -func initRandom(b []byte) { - for i := range b { - b[i] = byte(rand.Intn(256)) - } -} - -func randBuf(size int) []byte { - b := make([]byte, size) - initRandom(b) - return b -} - -func TestCopyInSuccess(t *testing.T) { - // Test that CopyIn does not return an error when all pages are accessible. - const bufLen = 8192 - a := randBuf(bufLen) - b := make([]byte, bufLen) - - n, err := CopyIn(b, unsafe.Pointer(&a[0])) - if n != bufLen { - t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) - } - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !bytes.Equal(a, b) { - t.Errorf("Buffers are not equal when they should be: %v %v", a, b) - } -} - -func TestCopyOutSuccess(t *testing.T) { - // Test that CopyOut does not return an error when all pages are - // accessible. - const bufLen = 8192 - a := randBuf(bufLen) - b := make([]byte, bufLen) - - n, err := CopyOut(unsafe.Pointer(&b[0]), a) - if n != bufLen { - t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) - } - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !bytes.Equal(a, b) { - t.Errorf("Buffers are not equal when they should be: %v %v", a, b) - } -} - -func TestCopySuccess(t *testing.T) { - // Test that Copy does not return an error when all pages are accessible. - const bufLen = 8192 - a := randBuf(bufLen) - b := make([]byte, bufLen) - - n, err := Copy(unsafe.Pointer(&b[0]), unsafe.Pointer(&a[0]), bufLen) - if n != bufLen { - t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) - } - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !bytes.Equal(a, b) { - t.Errorf("Buffers are not equal when they should be: %v %v", a, b) - } -} - -func TestZeroOutSuccess(t *testing.T) { - // Test that ZeroOut does not return an error when all pages are - // accessible. - const bufLen = 8192 - a := make([]byte, bufLen) - b := randBuf(bufLen) - - n, err := ZeroOut(unsafe.Pointer(&b[0]), bufLen) - if n != bufLen { - t.Errorf("Unexpected copy length, got %v, want %v", n, bufLen) - } - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if !bytes.Equal(a, b) { - t.Errorf("Buffers are not equal when they should be: %v %v", a, b) - } -} - -func TestSwapUint32Success(t *testing.T) { - // Test that SwapUint32 does not return an error when the page is - // accessible. - before := uint32(rand.Int31()) - after := uint32(rand.Int31()) - val := before - - old, err := SwapUint32(unsafe.Pointer(&val), after) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if old != before { - t.Errorf("Unexpected old value: got %v, want %v", old, before) - } - if val != after { - t.Errorf("Unexpected new value: got %v, want %v", val, after) - } -} - -func TestSwapUint32AlignmentError(t *testing.T) { - // Test that SwapUint32 returns an AlignmentError when passed an unaligned - // address. - data := new(struct{ val uint64 }) - addr := uintptr(unsafe.Pointer(&data.val)) + 1 - want := AlignmentError{Addr: addr, Alignment: 4} - if _, err := SwapUint32(unsafe.Pointer(addr), 1); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } -} - -func TestSwapUint64Success(t *testing.T) { - // Test that SwapUint64 does not return an error when the page is - // accessible. - before := uint64(rand.Int63()) - after := uint64(rand.Int63()) - // "The first word in ... an allocated struct or slice can be relied upon - // to be 64-bit aligned." - sync/atomic docs - data := new(struct{ val uint64 }) - data.val = before - - old, err := SwapUint64(unsafe.Pointer(&data.val), after) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if old != before { - t.Errorf("Unexpected old value: got %v, want %v", old, before) - } - if data.val != after { - t.Errorf("Unexpected new value: got %v, want %v", data.val, after) - } -} - -func TestSwapUint64AlignmentError(t *testing.T) { - // Test that SwapUint64 returns an AlignmentError when passed an unaligned - // address. - data := new(struct{ val1, val2 uint64 }) - addr := uintptr(unsafe.Pointer(&data.val1)) + 1 - want := AlignmentError{Addr: addr, Alignment: 8} - if _, err := SwapUint64(unsafe.Pointer(addr), 1); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } -} - -func TestCompareAndSwapUint32Success(t *testing.T) { - // Test that CompareAndSwapUint32 does not return an error when the page is - // accessible. - before := uint32(rand.Int31()) - after := uint32(rand.Int31()) - val := before - - old, err := CompareAndSwapUint32(unsafe.Pointer(&val), before, after) - if err != nil { - t.Errorf("Unexpected error: %v", err) - } - if old != before { - t.Errorf("Unexpected old value: got %v, want %v", old, before) - } - if val != after { - t.Errorf("Unexpected new value: got %v, want %v", val, after) - } -} - -func TestCompareAndSwapUint32AlignmentError(t *testing.T) { - // Test that CompareAndSwapUint32 returns an AlignmentError when passed an - // unaligned address. - data := new(struct{ val uint64 }) - addr := uintptr(unsafe.Pointer(&data.val)) + 1 - want := AlignmentError{Addr: addr, Alignment: 4} - if _, err := CompareAndSwapUint32(unsafe.Pointer(addr), 0, 1); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } -} - -// withSegvErrorTestMapping calls fn with a two-page mapping. The first page -// contains random data, and the second page generates SIGSEGV when accessed. -func withSegvErrorTestMapping(t *testing.T, fn func(m []byte)) { - mapping, err := syscall.Mmap(-1, 0, 2*pageSize, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_ANONYMOUS|syscall.MAP_PRIVATE) - if err != nil { - t.Fatalf("Mmap failed: %v", err) - } - defer syscall.Munmap(mapping) - if err := syscall.Mprotect(mapping[pageSize:], syscall.PROT_NONE); err != nil { - t.Fatalf("Mprotect failed: %v", err) - } - initRandom(mapping[:pageSize]) - - fn(mapping) -} - -// withBusErrorTestMapping calls fn with a two-page mapping. The first page -// contains random data, and the second page generates SIGBUS when accessed. -func withBusErrorTestMapping(t *testing.T, fn func(m []byte)) { - f, err := ioutil.TempFile("", "sigbus_test") - if err != nil { - t.Fatalf("TempFile failed: %v", err) - } - defer f.Close() - if err := f.Truncate(pageSize); err != nil { - t.Fatalf("Truncate failed: %v", err) - } - mapping, err := syscall.Mmap(int(f.Fd()), 0, 2*pageSize, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) - if err != nil { - t.Fatalf("Mmap failed: %v", err) - } - defer syscall.Munmap(mapping) - initRandom(mapping[:pageSize]) - - fn(mapping) -} - -func TestCopyInSegvError(t *testing.T) { - // Test that CopyIn returns a SegvError when reaching a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - dst := randBuf(pageSize) - n, err := CopyIn(dst, src) - if n != bytesBeforeFault { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyInBusError(t *testing.T) { - // Test that CopyIn returns a BusError when reaching a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - dst := randBuf(pageSize) - n, err := CopyIn(dst, src) - if n != bytesBeforeFault { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyOutSegvError(t *testing.T) { - // Test that CopyOut returns a SegvError when reaching a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - src := randBuf(pageSize) - n, err := CopyOut(dst, src) - if n != bytesBeforeFault { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyOutBusError(t *testing.T) { - // Test that CopyOut returns a BusError when reaching a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - src := randBuf(pageSize) - n, err := CopyOut(dst, src) - if n != bytesBeforeFault { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopySourceSegvError(t *testing.T) { - // Test that Copy returns a SegvError when copying from a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - dst := randBuf(pageSize) - n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopySourceBusError(t *testing.T) { - // Test that Copy returns a BusError when copying from a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - src := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - dst := randBuf(pageSize) - n, err := Copy(unsafe.Pointer(&dst[0]), src, pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := dst[:bytesBeforeFault], mapping[pageSize-bytesBeforeFault:pageSize]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyDestinationSegvError(t *testing.T) { - // Test that Copy returns a SegvError when copying to a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - src := randBuf(pageSize) - n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestCopyDestinationBusError(t *testing.T) { - // Test that Copy returns a BusError when copying to a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting copy %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - src := randBuf(pageSize) - n, err := Copy(dst, unsafe.Pointer(&src[0]), pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected copy length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], src[:bytesBeforeFault]; !bytes.Equal(got, want) { - t.Errorf("Buffers are not equal when they should be: %v %v", got, want) - } - }) - }) - } -} - -func TestZeroOutSegvError(t *testing.T) { - // Test that ZeroOut returns a SegvError when reaching a page that signals - // SIGSEGV. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting write %d bytes before SIGSEGV", bytesBeforeFault), func(t *testing.T) { - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - n, err := ZeroOut(dst, pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault) - } - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], make([]byte, bytesBeforeFault); !bytes.Equal(got, want) { - t.Errorf("Non-zero bytes in written part of mapping: %v", got) - } - }) - }) - } -} - -func TestZeroOutBusError(t *testing.T) { - // Test that ZeroOut returns a BusError when reaching a page that signals - // SIGBUS. - for bytesBeforeFault := 0; bytesBeforeFault <= 2*maxRegisterSize; bytesBeforeFault++ { - t.Run(fmt.Sprintf("starting write %d bytes before SIGBUS", bytesBeforeFault), func(t *testing.T) { - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - dst := unsafe.Pointer(secondPage - uintptr(bytesBeforeFault)) - n, err := ZeroOut(dst, pageSize) - if n != uintptr(bytesBeforeFault) { - t.Errorf("Unexpected write length: got %v, want %v", n, bytesBeforeFault) - } - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - if got, want := mapping[pageSize-bytesBeforeFault:pageSize], make([]byte, bytesBeforeFault); !bytes.Equal(got, want) { - t.Errorf("Non-zero bytes in written part of mapping: %v", got) - } - }) - }) - } -} - -func TestSwapUint32SegvError(t *testing.T) { - // Test that SwapUint32 returns a SegvError when reaching a page that - // signals SIGSEGV. - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := SwapUint32(unsafe.Pointer(secondPage), 1) - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestSwapUint32BusError(t *testing.T) { - // Test that SwapUint32 returns a BusError when reaching a page that - // signals SIGBUS. - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := SwapUint32(unsafe.Pointer(secondPage), 1) - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestSwapUint64SegvError(t *testing.T) { - // Test that SwapUint64 returns a SegvError when reaching a page that - // signals SIGSEGV. - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := SwapUint64(unsafe.Pointer(secondPage), 1) - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestSwapUint64BusError(t *testing.T) { - // Test that SwapUint64 returns a BusError when reaching a page that - // signals SIGBUS. - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := SwapUint64(unsafe.Pointer(secondPage), 1) - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestCompareAndSwapUint32SegvError(t *testing.T) { - // Test that CompareAndSwapUint32 returns a SegvError when reaching a page - // that signals SIGSEGV. - withSegvErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1) - if want := (SegvError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func TestCompareAndSwapUint32BusError(t *testing.T) { - // Test that CompareAndSwapUint32 returns a BusError when reaching a page - // that signals SIGBUS. - withBusErrorTestMapping(t, func(mapping []byte) { - secondPage := uintptr(unsafe.Pointer(&mapping[0])) + pageSize - _, err := CompareAndSwapUint32(unsafe.Pointer(secondPage), 0, 1) - if want := (BusError{secondPage}); err != want { - t.Errorf("Unexpected error: got %v, want %v", err, want) - } - }) -} - -func testCopy(dst, src []byte) (panicked bool) { - defer func() { - if r := recover(); r != nil { - panicked = true - } - }() - debug.SetPanicOnFault(true) - copy(dst, src) - return -} - -func TestSegVOnMemmove(t *testing.T) { - // Test that SIGSEGVs received by runtime.memmove when *not* doing - // CopyIn or CopyOut work gets propagated to the runtime. - const bufLen = pageSize - a, err := syscall.Mmap(-1, 0, bufLen, syscall.PROT_NONE, syscall.MAP_ANON|syscall.MAP_PRIVATE) - if err != nil { - t.Fatalf("Mmap failed: %v", err) - - } - defer syscall.Munmap(a) - b := randBuf(bufLen) - - if !testCopy(b, a) { - t.Fatalf("testCopy didn't panic when it should have") - } - - if !testCopy(a, b) { - t.Fatalf("testCopy didn't panic when it should have") - } -} - -func TestSigbusOnMemmove(t *testing.T) { - // Test that SIGBUS received by runtime.memmove when *not* doing - // CopyIn or CopyOut work gets propagated to the runtime. - const bufLen = pageSize - f, err := ioutil.TempFile("", "sigbus_test") - if err != nil { - t.Fatalf("TempFile failed: %v", err) - } - os.Remove(f.Name()) - defer f.Close() - - a, err := syscall.Mmap(int(f.Fd()), 0, bufLen, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) - if err != nil { - t.Fatalf("Mmap failed: %v", err) - - } - defer syscall.Munmap(a) - b := randBuf(bufLen) - - if !testCopy(b, a) { - t.Fatalf("testCopy didn't panic when it should have") - } - - if !testCopy(a, b) { - t.Fatalf("testCopy didn't panic when it should have") - } -} diff --git a/pkg/sentry/platform/safecopy/safecopy_unsafe.go b/pkg/sentry/platform/safecopy/safecopy_unsafe.go deleted file mode 100644 index eef028e68..000000000 --- a/pkg/sentry/platform/safecopy/safecopy_unsafe.go +++ /dev/null @@ -1,335 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safecopy - -import ( - "fmt" - "syscall" - "unsafe" -) - -// maxRegisterSize is the maximum register size used in memcpy and memclr. It -// is used to decide by how much to rewind the copy (for memcpy) or zeroing -// (for memclr) before proceeding. -const maxRegisterSize = 16 - -// memcpy copies data from src to dst. If a SIGSEGV or SIGBUS signal is received -// during the copy, it returns the address that caused the fault and the number -// of the signal that was received. Otherwise, it returns an unspecified address -// and a signal number of 0. -// -// Data is copied in order, such that if a fault happens at address p, it is -// safe to assume that all data before p-maxRegisterSize has already been -// successfully copied. -// -//go:noescape -func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) - -// memclr sets the n bytes following ptr to zeroes. If a SIGSEGV or SIGBUS -// signal is received during the write, it returns the address that caused the -// fault and the number of the signal that was received. Otherwise, it returns -// an unspecified address and a signal number of 0. -// -// Data is written in order, such that if a fault happens at address p, it is -// safe to assume that all data before p-maxRegisterSize has already been -// successfully written. -// -//go:noescape -func memclr(ptr unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32) - -// swapUint32 atomically stores new into *ptr and returns (the previous *ptr -// value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the -// value of old is unspecified, and sig is the number of the signal that was -// received. -// -// Preconditions: ptr must be aligned to a 4-byte boundary. -// -//go:noescape -func swapUint32(ptr unsafe.Pointer, new uint32) (old uint32, sig int32) - -// swapUint64 atomically stores new into *ptr and returns (the previous *ptr -// value, 0). If a SIGSEGV or SIGBUS signal is received during the swap, the -// value of old is unspecified, and sig is the number of the signal that was -// received. -// -// Preconditions: ptr must be aligned to a 8-byte boundary. -// -//go:noescape -func swapUint64(ptr unsafe.Pointer, new uint64) (old uint64, sig int32) - -// compareAndSwapUint32 is like sync/atomic.CompareAndSwapUint32, but returns -// (the value previously stored at ptr, 0). If a SIGSEGV or SIGBUS signal is -// received during the operation, the value of prev is unspecified, and sig is -// the number of the signal that was received. -// -// Preconditions: ptr must be aligned to a 4-byte boundary. -// -//go:noescape -func compareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (prev uint32, sig int32) - -// LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It -// may fail with SIGSEGV or SIGBUS if it is received while reading from ptr. -// -// Preconditions: ptr must be aligned to a 4-byte boundary. -// -//go:noescape -func loadUint32(ptr unsafe.Pointer) (val uint32, sig int32) - -// CopyIn copies len(dst) bytes from src to dst. It returns the number of bytes -// copied and an error if SIGSEGV or SIGBUS is received while reading from src. -func CopyIn(dst []byte, src unsafe.Pointer) (int, error) { - toCopy := uintptr(len(dst)) - if len(dst) == 0 { - return 0, nil - } - - fault, sig := memcpy(unsafe.Pointer(&dst[0]), src, toCopy) - if sig == 0 { - return len(dst), nil - } - - faultN, srcN := uintptr(fault), uintptr(src) - if faultN < srcN || faultN >= srcN+toCopy { - panic(fmt.Sprintf("CopyIn raised signal %d at %#x, which is outside source [%#x, %#x)", sig, faultN, srcN, srcN+toCopy)) - } - - // memcpy might have ended the copy up to maxRegisterSize bytes before - // fault, if an instruction caused a memory access that straddled two - // pages, and the second one faulted. Try to copy up to the fault. - var done int - if faultN-srcN > maxRegisterSize { - done = int(faultN - srcN - maxRegisterSize) - } - n, err := CopyIn(dst[done:int(faultN-srcN)], unsafe.Pointer(srcN+uintptr(done))) - done += n - if err != nil { - return done, err - } - return done, errorFromFaultSignal(fault, sig) -} - -// CopyOut copies len(src) bytes from src to dst. If returns the number of -// bytes done and an error if SIGSEGV or SIGBUS is received while writing to -// dst. -func CopyOut(dst unsafe.Pointer, src []byte) (int, error) { - toCopy := uintptr(len(src)) - if toCopy == 0 { - return 0, nil - } - - fault, sig := memcpy(dst, unsafe.Pointer(&src[0]), toCopy) - if sig == 0 { - return len(src), nil - } - - faultN, dstN := uintptr(fault), uintptr(dst) - if faultN < dstN || faultN >= dstN+toCopy { - panic(fmt.Sprintf("CopyOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, faultN, dstN, dstN+toCopy)) - } - - // memcpy might have ended the copy up to maxRegisterSize bytes before - // fault, if an instruction caused a memory access that straddled two - // pages, and the second one faulted. Try to copy up to the fault. - var done int - if faultN-dstN > maxRegisterSize { - done = int(faultN - dstN - maxRegisterSize) - } - n, err := CopyOut(unsafe.Pointer(dstN+uintptr(done)), src[done:int(faultN-dstN)]) - done += n - if err != nil { - return done, err - } - return done, errorFromFaultSignal(fault, sig) -} - -// Copy copies toCopy bytes from src to dst. It returns the number of bytes -// copied and an error if SIGSEGV or SIGBUS is received while reading from src -// or writing to dst. -// -// Data is copied in order; if [src, src+toCopy) and [dst, dst+toCopy) overlap, -// the resulting contents of dst are unspecified. -func Copy(dst, src unsafe.Pointer, toCopy uintptr) (uintptr, error) { - if toCopy == 0 { - return 0, nil - } - - fault, sig := memcpy(dst, src, toCopy) - if sig == 0 { - return toCopy, nil - } - - // Did the fault occur while reading from src or writing to dst? - faultN, srcN, dstN := uintptr(fault), uintptr(src), uintptr(dst) - faultAfterSrc := ^uintptr(0) - if faultN >= srcN { - faultAfterSrc = faultN - srcN - } - faultAfterDst := ^uintptr(0) - if faultN >= dstN { - faultAfterDst = faultN - dstN - } - if faultAfterSrc >= toCopy && faultAfterDst >= toCopy { - panic(fmt.Sprintf("Copy raised signal %d at %#x, which is outside source [%#x, %#x) and destination [%#x, %#x)", sig, faultN, srcN, srcN+toCopy, dstN, dstN+toCopy)) - } - faultedAfter := faultAfterSrc - if faultedAfter > faultAfterDst { - faultedAfter = faultAfterDst - } - - // memcpy might have ended the copy up to maxRegisterSize bytes before - // fault, if an instruction caused a memory access that straddled two - // pages, and the second one faulted. Try to copy up to the fault. - var done uintptr - if faultedAfter > maxRegisterSize { - done = faultedAfter - maxRegisterSize - } - n, err := Copy(unsafe.Pointer(dstN+done), unsafe.Pointer(srcN+done), faultedAfter-done) - done += n - if err != nil { - return done, err - } - return done, errorFromFaultSignal(fault, sig) -} - -// ZeroOut writes toZero zero bytes to dst. It returns the number of bytes -// written and an error if SIGSEGV or SIGBUS is received while writing to dst. -func ZeroOut(dst unsafe.Pointer, toZero uintptr) (uintptr, error) { - if toZero == 0 { - return 0, nil - } - - fault, sig := memclr(dst, toZero) - if sig == 0 { - return toZero, nil - } - - faultN, dstN := uintptr(fault), uintptr(dst) - if faultN < dstN || faultN >= dstN+toZero { - panic(fmt.Sprintf("ZeroOut raised signal %d at %#x, which is outside destination [%#x, %#x)", sig, faultN, dstN, dstN+toZero)) - } - - // memclr might have ended the write up to maxRegisterSize bytes before - // fault, if an instruction caused a memory access that straddled two - // pages, and the second one faulted. Try to write up to the fault. - var done uintptr - if faultN-dstN > maxRegisterSize { - done = faultN - dstN - maxRegisterSize - } - n, err := ZeroOut(unsafe.Pointer(dstN+done), faultN-dstN-done) - done += n - if err != nil { - return done, err - } - return done, errorFromFaultSignal(fault, sig) -} - -// SwapUint32 is equivalent to sync/atomic.SwapUint32, except that it returns -// an error if SIGSEGV or SIGBUS is received while accessing ptr, or if ptr is -// not aligned to a 4-byte boundary. -func SwapUint32(ptr unsafe.Pointer, new uint32) (uint32, error) { - if addr := uintptr(ptr); addr&3 != 0 { - return 0, AlignmentError{addr, 4} - } - old, sig := swapUint32(ptr, new) - return old, errorFromFaultSignal(ptr, sig) -} - -// SwapUint64 is equivalent to sync/atomic.SwapUint64, except that it returns -// an error if SIGSEGV or SIGBUS is received while accessing ptr, or if ptr is -// not aligned to an 8-byte boundary. -func SwapUint64(ptr unsafe.Pointer, new uint64) (uint64, error) { - if addr := uintptr(ptr); addr&7 != 0 { - return 0, AlignmentError{addr, 8} - } - old, sig := swapUint64(ptr, new) - return old, errorFromFaultSignal(ptr, sig) -} - -// CompareAndSwapUint32 is equivalent to atomicbitops.CompareAndSwapUint32, -// except that it returns an error if SIGSEGV or SIGBUS is received while -// accessing ptr, or if ptr is not aligned to a 4-byte boundary. -func CompareAndSwapUint32(ptr unsafe.Pointer, old, new uint32) (uint32, error) { - if addr := uintptr(ptr); addr&3 != 0 { - return 0, AlignmentError{addr, 4} - } - prev, sig := compareAndSwapUint32(ptr, old, new) - return prev, errorFromFaultSignal(ptr, sig) -} - -// LoadUint32 is like sync/atomic.LoadUint32, but operates with user memory. It -// may fail with SIGSEGV or SIGBUS if it is received while reading from ptr. -// -// Preconditions: ptr must be aligned to a 4-byte boundary. -func LoadUint32(ptr unsafe.Pointer) (uint32, error) { - if addr := uintptr(ptr); addr&3 != 0 { - return 0, AlignmentError{addr, 4} - } - val, sig := loadUint32(ptr) - return val, errorFromFaultSignal(ptr, sig) -} - -func errorFromFaultSignal(addr unsafe.Pointer, sig int32) error { - switch sig { - case 0: - return nil - case int32(syscall.SIGSEGV): - return SegvError{uintptr(addr)} - case int32(syscall.SIGBUS): - return BusError{uintptr(addr)} - default: - panic(fmt.Sprintf("safecopy got unexpected signal %d at address %#x", sig, addr)) - } -} - -// ReplaceSignalHandler replaces the existing signal handler for the provided -// signal with the one that handles faults in safecopy-protected functions. -// -// It stores the value of the previously set handler in previous. -// -// This function will be called on initialization in order to install safecopy -// handlers for appropriate signals. These handlers will call the previous -// handler however, and if this is function is being used externally then the -// same courtesy is expected. -func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr) error { - var sa struct { - handler uintptr - flags uint64 - restorer uintptr - mask uint64 - } - const maskLen = 8 - - // Get the existing signal handler information, and save the current - // handler. Once we replace it, we will use this pointer to fall back to - // it when we receive other signals. - if _, _, e := syscall.RawSyscall6(syscall.SYS_RT_SIGACTION, uintptr(sig), 0, uintptr(unsafe.Pointer(&sa)), maskLen, 0, 0); e != 0 { - return e - } - - // Fail if there isn't a previous handler. - if sa.handler == 0 { - return fmt.Errorf("previous handler for signal %x isn't set", sig) - } - - *previous = sa.handler - - // Install our own handler. - sa.handler = handler - if _, _, e := syscall.RawSyscall6(syscall.SYS_RT_SIGACTION, uintptr(sig), uintptr(unsafe.Pointer(&sa)), 0, maskLen, 0, 0); e != 0 { - return e - } - - return nil -} diff --git a/pkg/sentry/platform/safecopy/sighandler_amd64.s b/pkg/sentry/platform/safecopy/sighandler_amd64.s deleted file mode 100644 index 475ae48e9..000000000 --- a/pkg/sentry/platform/safecopy/sighandler_amd64.s +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "textflag.h" - -// The signals handled by sigHandler. -#define SIGBUS 7 -#define SIGSEGV 11 - -// Offsets to the registers in context->uc_mcontext.gregs[]. -#define REG_RDI 0x68 -#define REG_RAX 0x90 -#define REG_IP 0xa8 - -// Offset to the si_addr field of siginfo. -#define SI_CODE 0x08 -#define SI_ADDR 0x10 - -// signalHandler is the signal handler for SIGSEGV and SIGBUS signals. It must -// not be set up as a handler to any other signals. -// -// If the instruction causing the signal is within a safecopy-protected -// function, the signal is handled such that execution resumes in the -// appropriate fault handling stub with AX containing the faulting address and -// DI containing the signal number. Otherwise control is transferred to the -// previously configured signal handler (savedSigSegvHandler or -// savedSigBusHandler). -// -// This function cannot be written in go because it runs whenever a signal is -// received by the thread (preempting whatever was running), which includes when -// garbage collector has stopped or isn't expecting any interactions (like -// barriers). -// -// The arguments are the following: -// DI - The signal number. -// SI - Pointer to siginfo_t structure. -// DX - Pointer to ucontext structure. -TEXT ·signalHandler(SB),NOSPLIT,$0 - // Check if the signal is from the kernel. - MOVQ $0x0, CX - CMPL CX, SI_CODE(SI) - JGE original_handler - - // Check if RIP is within the area we care about. - MOVQ REG_IP(DX), CX - CMPQ CX, ·memcpyBegin(SB) - JB not_memcpy - CMPQ CX, ·memcpyEnd(SB) - JAE not_memcpy - - // Modify the context such that execution will resume in the fault - // handler. - LEAQ handleMemcpyFault(SB), CX - JMP handle_fault - -not_memcpy: - CMPQ CX, ·memclrBegin(SB) - JB not_memclr - CMPQ CX, ·memclrEnd(SB) - JAE not_memclr - - LEAQ handleMemclrFault(SB), CX - JMP handle_fault - -not_memclr: - CMPQ CX, ·swapUint32Begin(SB) - JB not_swapuint32 - CMPQ CX, ·swapUint32End(SB) - JAE not_swapuint32 - - LEAQ handleSwapUint32Fault(SB), CX - JMP handle_fault - -not_swapuint32: - CMPQ CX, ·swapUint64Begin(SB) - JB not_swapuint64 - CMPQ CX, ·swapUint64End(SB) - JAE not_swapuint64 - - LEAQ handleSwapUint64Fault(SB), CX - JMP handle_fault - -not_swapuint64: - CMPQ CX, ·compareAndSwapUint32Begin(SB) - JB not_casuint32 - CMPQ CX, ·compareAndSwapUint32End(SB) - JAE not_casuint32 - - LEAQ handleCompareAndSwapUint32Fault(SB), CX - JMP handle_fault - -not_casuint32: - CMPQ CX, ·loadUint32Begin(SB) - JB not_loaduint32 - CMPQ CX, ·loadUint32End(SB) - JAE not_loaduint32 - - LEAQ handleLoadUint32Fault(SB), CX - JMP handle_fault - -not_loaduint32: -original_handler: - // Jump to the previous signal handler, which is likely the golang one. - XORQ CX, CX - MOVQ ·savedSigBusHandler(SB), AX - CMPL DI, $SIGSEGV - CMOVQEQ ·savedSigSegVHandler(SB), AX - JMP AX - -handle_fault: - // Entered with the address of the fault handler in RCX; store it in - // RIP. - MOVQ CX, REG_IP(DX) - - // Store the faulting address in RAX. - MOVQ SI_ADDR(SI), CX - MOVQ CX, REG_RAX(DX) - - // Store the signal number in EDI. - MOVL DI, REG_RDI(DX) - - RET diff --git a/pkg/sentry/platform/safecopy/sighandler_arm64.s b/pkg/sentry/platform/safecopy/sighandler_arm64.s deleted file mode 100644 index 53e4ac2c1..000000000 --- a/pkg/sentry/platform/safecopy/sighandler_arm64.s +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "textflag.h" - -// The signals handled by sigHandler. -#define SIGBUS 7 -#define SIGSEGV 11 - -// Offsets to the registers in context->uc_mcontext.gregs[]. -#define REG_R0 0xB8 -#define REG_R1 0xC0 -#define REG_PC 0x1B8 - -// Offset to the si_addr field of siginfo. -#define SI_CODE 0x08 -#define SI_ADDR 0x10 - -// signalHandler is the signal handler for SIGSEGV and SIGBUS signals. It must -// not be set up as a handler to any other signals. -// -// If the instruction causing the signal is within a safecopy-protected -// function, the signal is handled such that execution resumes in the -// appropriate fault handling stub with R0 containing the faulting address and -// R1 containing the signal number. Otherwise control is transferred to the -// previously configured signal handler (savedSigSegvHandler or -// savedSigBusHandler). -// -// This function cannot be written in go because it runs whenever a signal is -// received by the thread (preempting whatever was running), which includes when -// garbage collector has stopped or isn't expecting any interactions (like -// barriers). -// -// The arguments are the following: -// R0 - The signal number. -// R1 - Pointer to siginfo_t structure. -// R2 - Pointer to ucontext structure. -TEXT ·signalHandler(SB),NOSPLIT,$0 - // Check if the signal is from the kernel, si_code > 0 means a kernel signal. - MOVD SI_CODE(R1), R7 - CMPW $0x0, R7 - BLE original_handler - - // Check if PC is within the area we care about. - MOVD REG_PC(R2), R7 - MOVD ·memcpyBegin(SB), R8 - CMP R8, R7 - BLO not_memcpy - MOVD ·memcpyEnd(SB), R8 - CMP R8, R7 - BHS not_memcpy - - // Modify the context such that execution will resume in the fault handler. - MOVD $handleMemcpyFault(SB), R7 - B handle_fault - -not_memcpy: - MOVD ·memclrBegin(SB), R8 - CMP R8, R7 - BLO not_memclr - MOVD ·memclrEnd(SB), R8 - CMP R8, R7 - BHS not_memclr - - MOVD $handleMemclrFault(SB), R7 - B handle_fault - -not_memclr: - MOVD ·swapUint32Begin(SB), R8 - CMP R8, R7 - BLO not_swapuint32 - MOVD ·swapUint32End(SB), R8 - CMP R8, R7 - BHS not_swapuint32 - - MOVD $handleSwapUint32Fault(SB), R7 - B handle_fault - -not_swapuint32: - MOVD ·swapUint64Begin(SB), R8 - CMP R8, R7 - BLO not_swapuint64 - MOVD ·swapUint64End(SB), R8 - CMP R8, R7 - BHS not_swapuint64 - - MOVD $handleSwapUint64Fault(SB), R7 - B handle_fault - -not_swapuint64: - MOVD ·compareAndSwapUint32Begin(SB), R8 - CMP R8, R7 - BLO not_casuint32 - MOVD ·compareAndSwapUint32End(SB), R8 - CMP R8, R7 - BHS not_casuint32 - - MOVD $handleCompareAndSwapUint32Fault(SB), R7 - B handle_fault - -not_casuint32: - MOVD ·loadUint32Begin(SB), R8 - CMP R8, R7 - BLO not_loaduint32 - MOVD ·loadUint32End(SB), R8 - CMP R8, R7 - BHS not_loaduint32 - - MOVD $handleLoadUint32Fault(SB), R7 - B handle_fault - -not_loaduint32: -original_handler: - // Jump to the previous signal handler, which is likely the golang one. - MOVD ·savedSigBusHandler(SB), R7 - MOVD ·savedSigSegVHandler(SB), R8 - CMPW $SIGSEGV, R0 - CSEL EQ, R8, R7, R7 - B (R7) - -handle_fault: - // Entered with the address of the fault handler in R7; store it in PC. - MOVD R7, REG_PC(R2) - - // Store the faulting address in R0. - MOVD SI_ADDR(R1), R7 - MOVD R7, REG_R0(R2) - - // Store the signal number in R1. - MOVW R0, REG_R1(R2) - - RET diff --git a/pkg/sentry/safemem/BUILD b/pkg/sentry/safemem/BUILD deleted file mode 100644 index 3ab76da97..000000000 --- a/pkg/sentry/safemem/BUILD +++ /dev/null @@ -1,27 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "safemem", - srcs = [ - "block_unsafe.go", - "io.go", - "safemem.go", - "seq_unsafe.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/sentry/platform/safecopy", - ], -) - -go_test( - name = "safemem_test", - size = "small", - srcs = [ - "io_test.go", - "seq_test.go", - ], - library = ":safemem", -) diff --git a/pkg/sentry/safemem/block_unsafe.go b/pkg/sentry/safemem/block_unsafe.go deleted file mode 100644 index 6f03c94bf..000000000 --- a/pkg/sentry/safemem/block_unsafe.go +++ /dev/null @@ -1,279 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safemem - -import ( - "fmt" - "reflect" - "unsafe" - - "gvisor.dev/gvisor/pkg/sentry/platform/safecopy" -) - -// A Block is a range of contiguous bytes, similar to []byte but with the -// following differences: -// -// - The memory represented by a Block may require the use of safecopy to -// access. -// -// - Block does not carry a capacity and cannot be expanded. -// -// Blocks are immutable and may be copied by value. The zero value of Block -// represents an empty range, analogous to a nil []byte. -type Block struct { - // [start, start+length) is the represented memory. - // - // start is an unsafe.Pointer to ensure that Block prevents the represented - // memory from being garbage-collected. - start unsafe.Pointer - length int - - // needSafecopy is true if accessing the represented memory requires the - // use of safecopy. - needSafecopy bool -} - -// BlockFromSafeSlice returns a Block equivalent to slice, which is safe to -// access without safecopy. -func BlockFromSafeSlice(slice []byte) Block { - return blockFromSlice(slice, false) -} - -// BlockFromUnsafeSlice returns a Block equivalent to bs, which is not safe to -// access without safecopy. -func BlockFromUnsafeSlice(slice []byte) Block { - return blockFromSlice(slice, true) -} - -func blockFromSlice(slice []byte, needSafecopy bool) Block { - if len(slice) == 0 { - return Block{} - } - return Block{ - start: unsafe.Pointer(&slice[0]), - length: len(slice), - needSafecopy: needSafecopy, - } -} - -// BlockFromSafePointer returns a Block equivalent to [ptr, ptr+len), which is -// safe to access without safecopy. -// -// Preconditions: ptr+len does not overflow. -func BlockFromSafePointer(ptr unsafe.Pointer, len int) Block { - return blockFromPointer(ptr, len, false) -} - -// BlockFromUnsafePointer returns a Block equivalent to [ptr, ptr+len), which -// is not safe to access without safecopy. -// -// Preconditions: ptr+len does not overflow. -func BlockFromUnsafePointer(ptr unsafe.Pointer, len int) Block { - return blockFromPointer(ptr, len, true) -} - -func blockFromPointer(ptr unsafe.Pointer, len int, needSafecopy bool) Block { - if uptr := uintptr(ptr); uptr+uintptr(len) < uptr { - panic(fmt.Sprintf("ptr %#x + len %#x overflows", ptr, len)) - } - return Block{ - start: ptr, - length: len, - needSafecopy: needSafecopy, - } -} - -// DropFirst returns a Block equivalent to b, but with the first n bytes -// omitted. It is analogous to the [n:] operation on a slice, except that if n -// > b.Len(), DropFirst returns an empty Block instead of panicking. -// -// Preconditions: n >= 0. -func (b Block) DropFirst(n int) Block { - if n < 0 { - panic(fmt.Sprintf("invalid n: %d", n)) - } - return b.DropFirst64(uint64(n)) -} - -// DropFirst64 is equivalent to DropFirst but takes a uint64. -func (b Block) DropFirst64(n uint64) Block { - if n >= uint64(b.length) { - return Block{} - } - return Block{ - start: unsafe.Pointer(uintptr(b.start) + uintptr(n)), - length: b.length - int(n), - needSafecopy: b.needSafecopy, - } -} - -// TakeFirst returns a Block equivalent to the first n bytes of b. It is -// analogous to the [:n] operation on a slice, except that if n > b.Len(), -// TakeFirst returns a copy of b instead of panicking. -// -// Preconditions: n >= 0. -func (b Block) TakeFirst(n int) Block { - if n < 0 { - panic(fmt.Sprintf("invalid n: %d", n)) - } - return b.TakeFirst64(uint64(n)) -} - -// TakeFirst64 is equivalent to TakeFirst but takes a uint64. -func (b Block) TakeFirst64(n uint64) Block { - if n == 0 { - return Block{} - } - if n >= uint64(b.length) { - return b - } - return Block{ - start: b.start, - length: int(n), - needSafecopy: b.needSafecopy, - } -} - -// ToSlice returns a []byte equivalent to b. -func (b Block) ToSlice() []byte { - var bs []byte - hdr := (*reflect.SliceHeader)(unsafe.Pointer(&bs)) - hdr.Data = uintptr(b.start) - hdr.Len = b.length - hdr.Cap = b.length - return bs -} - -// Addr returns b's start address as a uintptr. It returns uintptr instead of -// unsafe.Pointer so that code using safemem cannot obtain unsafe.Pointers -// without importing the unsafe package explicitly. -// -// Note that a uintptr is not recognized as a pointer by the garbage collector, -// such that if there are no uses of b after a call to b.Addr() and the address -// is to Go-managed memory, the returned uintptr does not prevent garbage -// collection of the pointee. -func (b Block) Addr() uintptr { - return uintptr(b.start) -} - -// Len returns b's length in bytes. -func (b Block) Len() int { - return b.length -} - -// NeedSafecopy returns true if accessing b.ToSlice() requires the use of safecopy. -func (b Block) NeedSafecopy() bool { - return b.needSafecopy -} - -// String implements fmt.Stringer.String. -func (b Block) String() string { - if uintptr(b.start) == 0 && b.length == 0 { - return "" - } - var suffix string - if b.needSafecopy { - suffix = "*" - } - return fmt.Sprintf("[%#x-%#x)%s", uintptr(b.start), uintptr(b.start)+uintptr(b.length), suffix) -} - -// Copy copies src.Len() or dst.Len() bytes, whichever is less, from src -// to dst and returns the number of bytes copied. -// -// If src and dst overlap, the data stored in dst is unspecified. -func Copy(dst, src Block) (int, error) { - if !dst.needSafecopy && !src.needSafecopy { - return copy(dst.ToSlice(), src.ToSlice()), nil - } - - n := dst.length - if n > src.length { - n = src.length - } - if n == 0 { - return 0, nil - } - - switch { - case dst.needSafecopy && !src.needSafecopy: - return safecopy.CopyOut(dst.start, src.TakeFirst(n).ToSlice()) - case !dst.needSafecopy && src.needSafecopy: - return safecopy.CopyIn(dst.TakeFirst(n).ToSlice(), src.start) - case dst.needSafecopy && src.needSafecopy: - n64, err := safecopy.Copy(dst.start, src.start, uintptr(n)) - return int(n64), err - default: - panic("unreachable") - } -} - -// Zero sets all bytes in dst to 0 and returns the number of bytes zeroed. -func Zero(dst Block) (int, error) { - if !dst.needSafecopy { - bs := dst.ToSlice() - for i := range bs { - bs[i] = 0 - } - return len(bs), nil - } - - n64, err := safecopy.ZeroOut(dst.start, uintptr(dst.length)) - return int(n64), err -} - -// Safecopy atomics are no slower than non-safecopy atomics, so use the former -// even when !b.needSafecopy to get consistent alignment checking. - -// SwapUint32 invokes safecopy.SwapUint32 on the first 4 bytes of b. -// -// Preconditions: b.Len() >= 4. -func SwapUint32(b Block, new uint32) (uint32, error) { - if b.length < 4 { - panic(fmt.Sprintf("insufficient length: %d", b.length)) - } - return safecopy.SwapUint32(b.start, new) -} - -// SwapUint64 invokes safecopy.SwapUint64 on the first 8 bytes of b. -// -// Preconditions: b.Len() >= 8. -func SwapUint64(b Block, new uint64) (uint64, error) { - if b.length < 8 { - panic(fmt.Sprintf("insufficient length: %d", b.length)) - } - return safecopy.SwapUint64(b.start, new) -} - -// CompareAndSwapUint32 invokes safecopy.CompareAndSwapUint32 on the first 4 -// bytes of b. -// -// Preconditions: b.Len() >= 4. -func CompareAndSwapUint32(b Block, old, new uint32) (uint32, error) { - if b.length < 4 { - panic(fmt.Sprintf("insufficient length: %d", b.length)) - } - return safecopy.CompareAndSwapUint32(b.start, old, new) -} - -// LoadUint32 invokes safecopy.LoadUint32 on the first 4 bytes of b. -// -// Preconditions: b.Len() >= 4. -func LoadUint32(b Block) (uint32, error) { - if b.length < 4 { - panic(fmt.Sprintf("insufficient length: %d", b.length)) - } - return safecopy.LoadUint32(b.start) -} diff --git a/pkg/sentry/safemem/io.go b/pkg/sentry/safemem/io.go deleted file mode 100644 index f039a5c34..000000000 --- a/pkg/sentry/safemem/io.go +++ /dev/null @@ -1,392 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safemem - -import ( - "errors" - "io" - "math" -) - -// ErrEndOfBlockSeq is returned by BlockSeqWriter when attempting to write -// beyond the end of the BlockSeq. -var ErrEndOfBlockSeq = errors.New("write beyond end of BlockSeq") - -// Reader represents a streaming byte source like io.Reader. -type Reader interface { - // ReadToBlocks reads up to dsts.NumBytes() bytes into dsts and returns the - // number of bytes read. It may return a partial read without an error - // (i.e. (n, nil) where 0 < n < dsts.NumBytes()). It should not return a - // full read with an error (i.e. (dsts.NumBytes(), err) where err != nil); - // note that this differs from io.Reader.Read (in particular, io.EOF should - // not be returned if ReadToBlocks successfully reads dsts.NumBytes() - // bytes.) - ReadToBlocks(dsts BlockSeq) (uint64, error) -} - -// Writer represents a streaming byte sink like io.Writer. -type Writer interface { - // WriteFromBlocks writes up to srcs.NumBytes() bytes from srcs and returns - // the number of bytes written. It may return a partial write without an - // error (i.e. (n, nil) where 0 < n < srcs.NumBytes()). It should not - // return a full write with an error (i.e. srcs.NumBytes(), err) where err - // != nil). - WriteFromBlocks(srcs BlockSeq) (uint64, error) -} - -// ReadFullToBlocks repeatedly invokes r.ReadToBlocks until dsts.NumBytes() -// bytes have been read or ReadToBlocks returns an error. -func ReadFullToBlocks(r Reader, dsts BlockSeq) (uint64, error) { - var done uint64 - for !dsts.IsEmpty() { - n, err := r.ReadToBlocks(dsts) - done += n - if err != nil { - return done, err - } - dsts = dsts.DropFirst64(n) - } - return done, nil -} - -// WriteFullFromBlocks repeatedly invokes w.WriteFromBlocks until -// srcs.NumBytes() bytes have been written or WriteFromBlocks returns an error. -func WriteFullFromBlocks(w Writer, srcs BlockSeq) (uint64, error) { - var done uint64 - for !srcs.IsEmpty() { - n, err := w.WriteFromBlocks(srcs) - done += n - if err != nil { - return done, err - } - srcs = srcs.DropFirst64(n) - } - return done, nil -} - -// BlockSeqReader implements Reader by reading from a BlockSeq. -type BlockSeqReader struct { - Blocks BlockSeq -} - -// ReadToBlocks implements Reader.ReadToBlocks. -func (r *BlockSeqReader) ReadToBlocks(dsts BlockSeq) (uint64, error) { - n, err := CopySeq(dsts, r.Blocks) - r.Blocks = r.Blocks.DropFirst64(n) - if err != nil { - return n, err - } - if n < dsts.NumBytes() { - return n, io.EOF - } - return n, nil -} - -// BlockSeqWriter implements Writer by writing to a BlockSeq. -type BlockSeqWriter struct { - Blocks BlockSeq -} - -// WriteFromBlocks implements Writer.WriteFromBlocks. -func (w *BlockSeqWriter) WriteFromBlocks(srcs BlockSeq) (uint64, error) { - n, err := CopySeq(w.Blocks, srcs) - w.Blocks = w.Blocks.DropFirst64(n) - if err != nil { - return n, err - } - if n < srcs.NumBytes() { - return n, ErrEndOfBlockSeq - } - return n, nil -} - -// ReaderFunc implements Reader for a function with the semantics of -// Reader.ReadToBlocks. -type ReaderFunc func(dsts BlockSeq) (uint64, error) - -// ReadToBlocks implements Reader.ReadToBlocks. -func (f ReaderFunc) ReadToBlocks(dsts BlockSeq) (uint64, error) { - return f(dsts) -} - -// WriterFunc implements Writer for a function with the semantics of -// Writer.WriteFromBlocks. -type WriterFunc func(srcs BlockSeq) (uint64, error) - -// WriteFromBlocks implements Writer.WriteFromBlocks. -func (f WriterFunc) WriteFromBlocks(srcs BlockSeq) (uint64, error) { - return f(srcs) -} - -// ToIOReader implements io.Reader for a (safemem.)Reader. -// -// ToIOReader will return a successful partial read iff Reader.ReadToBlocks does -// so. -type ToIOReader struct { - Reader Reader -} - -// Read implements io.Reader.Read. -func (r ToIOReader) Read(dst []byte) (int, error) { - n, err := r.Reader.ReadToBlocks(BlockSeqOf(BlockFromSafeSlice(dst))) - return int(n), err -} - -// ToIOWriter implements io.Writer for a (safemem.)Writer. -type ToIOWriter struct { - Writer Writer -} - -// Write implements io.Writer.Write. -func (w ToIOWriter) Write(src []byte) (int, error) { - // io.Writer does not permit partial writes. - n, err := WriteFullFromBlocks(w.Writer, BlockSeqOf(BlockFromSafeSlice(src))) - return int(n), err -} - -// FromIOReader implements Reader for an io.Reader by repeatedly invoking -// io.Reader.Read until it returns an error or partial read. This is not -// thread-safe. -// -// FromIOReader will return a successful partial read iff Reader.Read does so. -type FromIOReader struct { - Reader io.Reader -} - -// ReadToBlocks implements Reader.ReadToBlocks. -func (r FromIOReader) ReadToBlocks(dsts BlockSeq) (uint64, error) { - var buf []byte - var done uint64 - for !dsts.IsEmpty() { - dst := dsts.Head() - var n int - var err error - n, buf, err = r.readToBlock(dst, buf) - done += uint64(n) - if n != dst.Len() { - return done, err - } - dsts = dsts.Tail() - if err != nil { - if dsts.IsEmpty() && err == io.EOF { - return done, nil - } - return done, err - } - } - return done, nil -} - -func (r FromIOReader) readToBlock(dst Block, buf []byte) (int, []byte, error) { - // io.Reader isn't safecopy-aware, so we have to buffer Blocks that require - // safecopy. - if !dst.NeedSafecopy() { - n, err := r.Reader.Read(dst.ToSlice()) - return n, buf, err - } - if len(buf) < dst.Len() { - buf = make([]byte, dst.Len()) - } - rn, rerr := r.Reader.Read(buf[:dst.Len()]) - wbn, wberr := Copy(dst, BlockFromSafeSlice(buf[:rn])) - if wberr != nil { - return wbn, buf, wberr - } - return wbn, buf, rerr -} - -// FromIOReaderAt implements Reader for an io.ReaderAt. Does not repeatedly -// invoke io.ReaderAt.ReadAt because ReadAt is more strict than Read. A partial -// read indicates an error. This is not thread-safe. -type FromIOReaderAt struct { - ReaderAt io.ReaderAt - Offset int64 -} - -// ReadToBlocks implements Reader.ReadToBlocks. -func (r FromIOReaderAt) ReadToBlocks(dsts BlockSeq) (uint64, error) { - var buf []byte - var done uint64 - for !dsts.IsEmpty() { - dst := dsts.Head() - var n int - var err error - n, buf, err = r.readToBlock(dst, buf) - done += uint64(n) - if n != dst.Len() { - return done, err - } - dsts = dsts.Tail() - if err != nil { - if dsts.IsEmpty() && err == io.EOF { - return done, nil - } - return done, err - } - } - return done, nil -} - -func (r FromIOReaderAt) readToBlock(dst Block, buf []byte) (int, []byte, error) { - // io.Reader isn't safecopy-aware, so we have to buffer Blocks that require - // safecopy. - if !dst.NeedSafecopy() { - n, err := r.ReaderAt.ReadAt(dst.ToSlice(), r.Offset) - r.Offset += int64(n) - return n, buf, err - } - if len(buf) < dst.Len() { - buf = make([]byte, dst.Len()) - } - rn, rerr := r.ReaderAt.ReadAt(buf[:dst.Len()], r.Offset) - r.Offset += int64(rn) - wbn, wberr := Copy(dst, BlockFromSafeSlice(buf[:rn])) - if wberr != nil { - return wbn, buf, wberr - } - return wbn, buf, rerr -} - -// FromIOWriter implements Writer for an io.Writer by repeatedly invoking -// io.Writer.Write until it returns an error or partial write. -// -// FromIOWriter will tolerate implementations of io.Writer.Write that return -// partial writes with a nil error in contravention of io.Writer's -// requirements, since Writer is permitted to do so. FromIOWriter will return a -// successful partial write iff Writer.Write does so. -type FromIOWriter struct { - Writer io.Writer -} - -// WriteFromBlocks implements Writer.WriteFromBlocks. -func (w FromIOWriter) WriteFromBlocks(srcs BlockSeq) (uint64, error) { - var buf []byte - var done uint64 - for !srcs.IsEmpty() { - src := srcs.Head() - var n int - var err error - n, buf, err = w.writeFromBlock(src, buf) - done += uint64(n) - if n != src.Len() || err != nil { - return done, err - } - srcs = srcs.Tail() - } - return done, nil -} - -func (w FromIOWriter) writeFromBlock(src Block, buf []byte) (int, []byte, error) { - // io.Writer isn't safecopy-aware, so we have to buffer Blocks that require - // safecopy. - if !src.NeedSafecopy() { - n, err := w.Writer.Write(src.ToSlice()) - return n, buf, err - } - if len(buf) < src.Len() { - buf = make([]byte, src.Len()) - } - bufn, buferr := Copy(BlockFromSafeSlice(buf[:src.Len()]), src) - wn, werr := w.Writer.Write(buf[:bufn]) - if werr != nil { - return wn, buf, werr - } - return wn, buf, buferr -} - -// FromVecReaderFunc implements Reader for a function that reads data into a -// [][]byte and returns the number of bytes read as an int64. -type FromVecReaderFunc struct { - ReadVec func(dsts [][]byte) (int64, error) -} - -// ReadToBlocks implements Reader.ReadToBlocks. -// -// ReadToBlocks calls r.ReadVec at most once. -func (r FromVecReaderFunc) ReadToBlocks(dsts BlockSeq) (uint64, error) { - if dsts.IsEmpty() { - return 0, nil - } - // Ensure that we don't pass a [][]byte with a total length > MaxInt64. - dsts = dsts.TakeFirst64(uint64(math.MaxInt64)) - dstSlices := make([][]byte, 0, dsts.NumBlocks()) - // Buffer Blocks that require safecopy. - for tmp := dsts; !tmp.IsEmpty(); tmp = tmp.Tail() { - dst := tmp.Head() - if dst.NeedSafecopy() { - dstSlices = append(dstSlices, make([]byte, dst.Len())) - } else { - dstSlices = append(dstSlices, dst.ToSlice()) - } - } - rn, rerr := r.ReadVec(dstSlices) - dsts = dsts.TakeFirst64(uint64(rn)) - var done uint64 - var i int - for !dsts.IsEmpty() { - dst := dsts.Head() - if dst.NeedSafecopy() { - n, err := Copy(dst, BlockFromSafeSlice(dstSlices[i])) - done += uint64(n) - if err != nil { - return done, err - } - } else { - done += uint64(dst.Len()) - } - dsts = dsts.Tail() - i++ - } - return done, rerr -} - -// FromVecWriterFunc implements Writer for a function that writes data from a -// [][]byte and returns the number of bytes written. -type FromVecWriterFunc struct { - WriteVec func(srcs [][]byte) (int64, error) -} - -// WriteFromBlocks implements Writer.WriteFromBlocks. -// -// WriteFromBlocks calls w.WriteVec at most once. -func (w FromVecWriterFunc) WriteFromBlocks(srcs BlockSeq) (uint64, error) { - if srcs.IsEmpty() { - return 0, nil - } - // Ensure that we don't pass a [][]byte with a total length > MaxInt64. - srcs = srcs.TakeFirst64(uint64(math.MaxInt64)) - srcSlices := make([][]byte, 0, srcs.NumBlocks()) - // Buffer Blocks that require safecopy. - var buferr error - for tmp := srcs; !tmp.IsEmpty(); tmp = tmp.Tail() { - src := tmp.Head() - if src.NeedSafecopy() { - slice := make([]byte, src.Len()) - n, err := Copy(BlockFromSafeSlice(slice), src) - srcSlices = append(srcSlices, slice[:n]) - if err != nil { - buferr = err - break - } - } else { - srcSlices = append(srcSlices, src.ToSlice()) - } - } - n, err := w.WriteVec(srcSlices) - if err != nil { - return uint64(n), err - } - return uint64(n), buferr -} diff --git a/pkg/sentry/safemem/io_test.go b/pkg/sentry/safemem/io_test.go deleted file mode 100644 index 629741bee..000000000 --- a/pkg/sentry/safemem/io_test.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safemem - -import ( - "bytes" - "io" - "testing" -) - -func makeBlocks(slices ...[]byte) []Block { - blocks := make([]Block, 0, len(slices)) - for _, s := range slices { - blocks = append(blocks, BlockFromSafeSlice(s)) - } - return blocks -} - -func TestFromIOReaderFullRead(t *testing.T) { - r := FromIOReader{bytes.NewBufferString("foobar")} - dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) - n, err := r.ReadToBlocks(BlockSeqFromSlice(dsts)) - if wantN := uint64(6); n != wantN || err != nil { - t.Errorf("ReadToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - for i, want := range [][]byte{[]byte("foo"), []byte("bar")} { - if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { - t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) - } - } -} - -type eofHidingReader struct { - Reader io.Reader -} - -func (r eofHidingReader) Read(dst []byte) (int, error) { - n, err := r.Reader.Read(dst) - if err == io.EOF { - return n, nil - } - return n, err -} - -func TestFromIOReaderPartialRead(t *testing.T) { - r := FromIOReader{eofHidingReader{bytes.NewBufferString("foob")}} - dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) - n, err := r.ReadToBlocks(BlockSeqFromSlice(dsts)) - // FromIOReader should stop after the eofHidingReader returns (1, nil) - // for a 3-byte read. - if wantN := uint64(4); n != wantN || err != nil { - t.Errorf("ReadToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - for i, want := range [][]byte{[]byte("foo"), []byte("b\x00\x00")} { - if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { - t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) - } - } -} - -type singleByteReader struct { - Reader io.Reader -} - -func (r singleByteReader) Read(dst []byte) (int, error) { - if len(dst) == 0 { - return r.Reader.Read(dst) - } - return r.Reader.Read(dst[:1]) -} - -func TestSingleByteReader(t *testing.T) { - r := FromIOReader{singleByteReader{bytes.NewBufferString("foobar")}} - dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) - n, err := r.ReadToBlocks(BlockSeqFromSlice(dsts)) - // FromIOReader should stop after the singleByteReader returns (1, nil) - // for a 3-byte read. - if wantN := uint64(1); n != wantN || err != nil { - t.Errorf("ReadToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - for i, want := range [][]byte{[]byte("f\x00\x00"), []byte("\x00\x00\x00")} { - if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { - t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) - } - } -} - -func TestReadFullToBlocks(t *testing.T) { - r := FromIOReader{singleByteReader{bytes.NewBufferString("foobar")}} - dsts := makeBlocks(make([]byte, 3), make([]byte, 3)) - n, err := ReadFullToBlocks(r, BlockSeqFromSlice(dsts)) - // ReadFullToBlocks should call into FromIOReader => singleByteReader - // repeatedly until dsts is exhausted. - if wantN := uint64(6); n != wantN || err != nil { - t.Errorf("ReadFullToBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - for i, want := range [][]byte{[]byte("foo"), []byte("bar")} { - if got := dsts[i].ToSlice(); !bytes.Equal(got, want) { - t.Errorf("dsts[%d]: got %q, wanted %q", i, got, want) - } - } -} - -func TestFromIOWriterFullWrite(t *testing.T) { - srcs := makeBlocks([]byte("foo"), []byte("bar")) - var dst bytes.Buffer - w := FromIOWriter{&dst} - n, err := w.WriteFromBlocks(BlockSeqFromSlice(srcs)) - if wantN := uint64(6); n != wantN || err != nil { - t.Errorf("WriteFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("foobar"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -type limitedWriter struct { - Writer io.Writer - Done int - Limit int -} - -func (w *limitedWriter) Write(src []byte) (int, error) { - count := len(src) - if count > (w.Limit - w.Done) { - count = w.Limit - w.Done - } - n, err := w.Writer.Write(src[:count]) - w.Done += n - return n, err -} - -func TestFromIOWriterPartialWrite(t *testing.T) { - srcs := makeBlocks([]byte("foo"), []byte("bar")) - var dst bytes.Buffer - w := FromIOWriter{&limitedWriter{&dst, 0, 4}} - n, err := w.WriteFromBlocks(BlockSeqFromSlice(srcs)) - // FromIOWriter should stop after the limitedWriter returns (1, nil) for a - // 3-byte write. - if wantN := uint64(4); n != wantN || err != nil { - t.Errorf("WriteFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("foob"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -type singleByteWriter struct { - Writer io.Writer -} - -func (w singleByteWriter) Write(src []byte) (int, error) { - if len(src) == 0 { - return w.Writer.Write(src) - } - return w.Writer.Write(src[:1]) -} - -func TestSingleByteWriter(t *testing.T) { - srcs := makeBlocks([]byte("foo"), []byte("bar")) - var dst bytes.Buffer - w := FromIOWriter{singleByteWriter{&dst}} - n, err := w.WriteFromBlocks(BlockSeqFromSlice(srcs)) - // FromIOWriter should stop after the singleByteWriter returns (1, nil) - // for a 3-byte write. - if wantN := uint64(1); n != wantN || err != nil { - t.Errorf("WriteFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("f"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -func TestWriteFullToBlocks(t *testing.T) { - srcs := makeBlocks([]byte("foo"), []byte("bar")) - var dst bytes.Buffer - w := FromIOWriter{singleByteWriter{&dst}} - n, err := WriteFullFromBlocks(w, BlockSeqFromSlice(srcs)) - // WriteFullToBlocks should call into FromIOWriter => singleByteWriter - // repeatedly until srcs is exhausted. - if wantN := uint64(6); n != wantN || err != nil { - t.Errorf("WriteFullFromBlocks: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("foobar"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} diff --git a/pkg/sentry/safemem/safemem.go b/pkg/sentry/safemem/safemem.go deleted file mode 100644 index 3e70d33a2..000000000 --- a/pkg/sentry/safemem/safemem.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package safemem provides the Block and BlockSeq types. -package safemem diff --git a/pkg/sentry/safemem/seq_test.go b/pkg/sentry/safemem/seq_test.go deleted file mode 100644 index eba4bb535..000000000 --- a/pkg/sentry/safemem/seq_test.go +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safemem - -import ( - "bytes" - "reflect" - "testing" -) - -type blockSeqTest struct { - desc string - - pieces []string - haveOffset bool - offset uint64 - haveLimit bool - limit uint64 - - want string -} - -func (t blockSeqTest) NonEmptyByteSlices() [][]byte { - // t is a value, so we can mutate it freely. - slices := make([][]byte, 0, len(t.pieces)) - for _, str := range t.pieces { - if t.haveOffset { - strOff := t.offset - if strOff > uint64(len(str)) { - strOff = uint64(len(str)) - } - str = str[strOff:] - t.offset -= strOff - } - if t.haveLimit { - strLim := t.limit - if strLim > uint64(len(str)) { - strLim = uint64(len(str)) - } - str = str[:strLim] - t.limit -= strLim - } - if len(str) != 0 { - slices = append(slices, []byte(str)) - } - } - return slices -} - -func (t blockSeqTest) BlockSeq() BlockSeq { - blocks := make([]Block, 0, len(t.pieces)) - for _, str := range t.pieces { - blocks = append(blocks, BlockFromSafeSlice([]byte(str))) - } - bs := BlockSeqFromSlice(blocks) - if t.haveOffset { - bs = bs.DropFirst64(t.offset) - } - if t.haveLimit { - bs = bs.TakeFirst64(t.limit) - } - return bs -} - -var blockSeqTests = []blockSeqTest{ - { - desc: "Empty sequence", - }, - { - desc: "Sequence of length 1", - pieces: []string{"foobar"}, - want: "foobar", - }, - { - desc: "Sequence of length 2", - pieces: []string{"foo", "bar"}, - want: "foobar", - }, - { - desc: "Empty Blocks", - pieces: []string{"", "foo", "", "", "bar", ""}, - want: "foobar", - }, - { - desc: "Sequence with non-zero offset", - pieces: []string{"foo", "bar"}, - haveOffset: true, - offset: 2, - want: "obar", - }, - { - desc: "Sequence with non-maximal limit", - pieces: []string{"foo", "bar"}, - haveLimit: true, - limit: 5, - want: "fooba", - }, - { - desc: "Sequence with offset and limit", - pieces: []string{"foo", "bar"}, - haveOffset: true, - offset: 2, - haveLimit: true, - limit: 3, - want: "oba", - }, -} - -func TestBlockSeqNumBytes(t *testing.T) { - for _, test := range blockSeqTests { - t.Run(test.desc, func(t *testing.T) { - if got, want := test.BlockSeq().NumBytes(), uint64(len(test.want)); got != want { - t.Errorf("NumBytes: got %d, wanted %d", got, want) - } - }) - } -} - -func TestBlockSeqIterBlocks(t *testing.T) { - // Tests BlockSeq iteration using Head/Tail. - for _, test := range blockSeqTests { - t.Run(test.desc, func(t *testing.T) { - srcs := test.BlockSeq() - // "Note that a non-nil empty slice and a nil slice ... are not - // deeply equal." - reflect - slices := make([][]byte, 0, 0) - for !srcs.IsEmpty() { - src := srcs.Head() - slices = append(slices, src.ToSlice()) - nextSrcs := srcs.Tail() - if got, want := nextSrcs.NumBytes(), srcs.NumBytes()-uint64(src.Len()); got != want { - t.Fatalf("%v.Tail(): got %v (%d bytes), wanted %d bytes", srcs, nextSrcs, got, want) - } - srcs = nextSrcs - } - if wantSlices := test.NonEmptyByteSlices(); !reflect.DeepEqual(slices, wantSlices) { - t.Errorf("Accumulated slices: got %v, wanted %v", slices, wantSlices) - } - }) - } -} - -func TestBlockSeqIterBytes(t *testing.T) { - // Tests BlockSeq iteration using Head/DropFirst. - for _, test := range blockSeqTests { - t.Run(test.desc, func(t *testing.T) { - srcs := test.BlockSeq() - var dst bytes.Buffer - for !srcs.IsEmpty() { - src := srcs.Head() - var b [1]byte - n, err := Copy(BlockFromSafeSlice(b[:]), src) - if n != 1 || err != nil { - t.Fatalf("Copy: got (%v, %v), wanted (1, nil)", n, err) - } - dst.WriteByte(b[0]) - nextSrcs := srcs.DropFirst(1) - if got, want := nextSrcs.NumBytes(), srcs.NumBytes()-1; got != want { - t.Fatalf("%v.DropFirst(1): got %v (%d bytes), wanted %d bytes", srcs, nextSrcs, got, want) - } - srcs = nextSrcs - } - if got := string(dst.Bytes()); got != test.want { - t.Errorf("Copied string: got %q, wanted %q", got, test.want) - } - }) - } -} - -func TestBlockSeqDropBeyondLimit(t *testing.T) { - blocks := []Block{BlockFromSafeSlice([]byte("123")), BlockFromSafeSlice([]byte("4"))} - bs := BlockSeqFromSlice(blocks) - if got, want := bs.NumBytes(), uint64(4); got != want { - t.Errorf("%v.NumBytes(): got %d, wanted %d", bs, got, want) - } - bs = bs.TakeFirst(1) - if got, want := bs.NumBytes(), uint64(1); got != want { - t.Errorf("%v.NumBytes(): got %d, wanted %d", bs, got, want) - } - bs = bs.DropFirst(2) - if got, want := bs.NumBytes(), uint64(0); got != want { - t.Errorf("%v.NumBytes(): got %d, wanted %d", bs, got, want) - } -} diff --git a/pkg/sentry/safemem/seq_unsafe.go b/pkg/sentry/safemem/seq_unsafe.go deleted file mode 100644 index 354a95dde..000000000 --- a/pkg/sentry/safemem/seq_unsafe.go +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package safemem - -import ( - "bytes" - "fmt" - "reflect" - "unsafe" -) - -// A BlockSeq represents a sequence of Blocks, each of which has non-zero -// length. -// -// BlockSeqs are immutable and may be copied by value. The zero value of -// BlockSeq represents an empty sequence. -type BlockSeq struct { - // If length is 0, then the BlockSeq is empty. Invariants: data == 0; - // offset == 0; limit == 0. - // - // If length is -1, then the BlockSeq represents the single Block{data, - // limit, false}. Invariants: offset == 0; limit > 0; limit does not - // overflow the range of an int. - // - // If length is -2, then the BlockSeq represents the single Block{data, - // limit, true}. Invariants: offset == 0; limit > 0; limit does not - // overflow the range of an int. - // - // Otherwise, length >= 2, and the BlockSeq represents the `length` Blocks - // in the array of Blocks starting at address `data`, starting at `offset` - // bytes into the first Block and limited to the following `limit` bytes. - // Invariants: data != 0; offset < len(data[0]); limit > 0; offset+limit <= - // the combined length of all Blocks in the array; the first Block in the - // array has non-zero length. - // - // length is never 1; sequences consisting of a single Block are always - // stored inline (with length < 0). - data unsafe.Pointer - length int - offset int - limit uint64 -} - -// BlockSeqOf returns a BlockSeq representing the single Block b. -func BlockSeqOf(b Block) BlockSeq { - bs := BlockSeq{ - data: b.start, - length: -1, - limit: uint64(b.length), - } - if b.needSafecopy { - bs.length = -2 - } - return bs -} - -// BlockSeqFromSlice returns a BlockSeq representing all Blocks in slice. -// If slice contains Blocks with zero length, BlockSeq will skip them during -// iteration. -// -// Whether the returned BlockSeq shares memory with slice is unspecified; -// clients should avoid mutating slices passed to BlockSeqFromSlice. -// -// Preconditions: The combined length of all Blocks in slice <= math.MaxUint64. -func BlockSeqFromSlice(slice []Block) BlockSeq { - slice = skipEmpty(slice) - var limit uint64 - for _, b := range slice { - sum := limit + uint64(b.Len()) - if sum < limit { - panic("BlockSeq length overflows uint64") - } - limit = sum - } - return blockSeqFromSliceLimited(slice, limit) -} - -// Preconditions: The combined length of all Blocks in slice <= limit. If -// len(slice) != 0, the first Block in slice has non-zero length, and limit > -// 0. -func blockSeqFromSliceLimited(slice []Block, limit uint64) BlockSeq { - switch len(slice) { - case 0: - return BlockSeq{} - case 1: - return BlockSeqOf(slice[0].TakeFirst64(limit)) - default: - return BlockSeq{ - data: unsafe.Pointer(&slice[0]), - length: len(slice), - limit: limit, - } - } -} - -func skipEmpty(slice []Block) []Block { - for i, b := range slice { - if b.Len() != 0 { - return slice[i:] - } - } - return nil -} - -// IsEmpty returns true if bs contains no Blocks. -// -// Invariants: bs.IsEmpty() == (bs.NumBlocks() == 0) == (bs.NumBytes() == 0). -// (Of these, prefer to use bs.IsEmpty().) -func (bs BlockSeq) IsEmpty() bool { - return bs.length == 0 -} - -// NumBlocks returns the number of Blocks in bs. -func (bs BlockSeq) NumBlocks() int { - // In general, we have to count: if bs represents a windowed slice then the - // slice may contain Blocks with zero length, and bs.length may be larger - // than the actual number of Blocks due to bs.limit. - var n int - for !bs.IsEmpty() { - n++ - bs = bs.Tail() - } - return n -} - -// NumBytes returns the sum of Block.Len() for all Blocks in bs. -func (bs BlockSeq) NumBytes() uint64 { - return bs.limit -} - -// Head returns the first Block in bs. -// -// Preconditions: !bs.IsEmpty(). -func (bs BlockSeq) Head() Block { - if bs.length == 0 { - panic("empty BlockSeq") - } - if bs.length < 0 { - return bs.internalBlock() - } - return (*Block)(bs.data).DropFirst(bs.offset).TakeFirst64(bs.limit) -} - -// Preconditions: bs.length < 0. -func (bs BlockSeq) internalBlock() Block { - return Block{ - start: bs.data, - length: int(bs.limit), - needSafecopy: bs.length == -2, - } -} - -// Tail returns a BlockSeq consisting of all Blocks in bs after the first. -// -// Preconditions: !bs.IsEmpty(). -func (bs BlockSeq) Tail() BlockSeq { - if bs.length == 0 { - panic("empty BlockSeq") - } - if bs.length < 0 { - return BlockSeq{} - } - head := (*Block)(bs.data).DropFirst(bs.offset) - headLen := uint64(head.Len()) - if headLen >= bs.limit { - // The head Block exhausts the limit, so the tail is empty. - return BlockSeq{} - } - var extSlice []Block - extSliceHdr := (*reflect.SliceHeader)(unsafe.Pointer(&extSlice)) - extSliceHdr.Data = uintptr(bs.data) - extSliceHdr.Len = bs.length - extSliceHdr.Cap = bs.length - tailSlice := skipEmpty(extSlice[1:]) - tailLimit := bs.limit - headLen - return blockSeqFromSliceLimited(tailSlice, tailLimit) -} - -// DropFirst returns a BlockSeq equivalent to bs, but with the first n bytes -// omitted. If n > bs.NumBytes(), DropFirst returns an empty BlockSeq. -// -// Preconditions: n >= 0. -func (bs BlockSeq) DropFirst(n int) BlockSeq { - if n < 0 { - panic(fmt.Sprintf("invalid n: %d", n)) - } - return bs.DropFirst64(uint64(n)) -} - -// DropFirst64 is equivalent to DropFirst but takes an uint64. -func (bs BlockSeq) DropFirst64(n uint64) BlockSeq { - if n >= bs.limit { - return BlockSeq{} - } - for { - // Calling bs.Head() here is surprisingly expensive, so inline getting - // the head's length. - var headLen uint64 - if bs.length < 0 { - headLen = bs.limit - } else { - headLen = uint64((*Block)(bs.data).Len() - bs.offset) - } - if n < headLen { - // Dropping ends partway through the head Block. - if bs.length < 0 { - return BlockSeqOf(bs.internalBlock().DropFirst64(n)) - } - bs.offset += int(n) - bs.limit -= n - return bs - } - n -= headLen - bs = bs.Tail() - } -} - -// TakeFirst returns a BlockSeq equivalent to the first n bytes of bs. If n > -// bs.NumBytes(), TakeFirst returns a BlockSeq equivalent to bs. -// -// Preconditions: n >= 0. -func (bs BlockSeq) TakeFirst(n int) BlockSeq { - if n < 0 { - panic(fmt.Sprintf("invalid n: %d", n)) - } - return bs.TakeFirst64(uint64(n)) -} - -// TakeFirst64 is equivalent to TakeFirst but takes a uint64. -func (bs BlockSeq) TakeFirst64(n uint64) BlockSeq { - if n == 0 { - return BlockSeq{} - } - if bs.limit > n { - bs.limit = n - } - return bs -} - -// String implements fmt.Stringer.String. -func (bs BlockSeq) String() string { - var buf bytes.Buffer - buf.WriteByte('[') - var sep string - for !bs.IsEmpty() { - buf.WriteString(sep) - sep = " " - buf.WriteString(bs.Head().String()) - bs = bs.Tail() - } - buf.WriteByte(']') - return buf.String() -} - -// CopySeq copies srcs.NumBytes() or dsts.NumBytes() bytes, whichever is less, -// from srcs to dsts and returns the number of bytes copied. -// -// If srcs and dsts overlap, the data stored in dsts is unspecified. -func CopySeq(dsts, srcs BlockSeq) (uint64, error) { - var done uint64 - for !dsts.IsEmpty() && !srcs.IsEmpty() { - dst := dsts.Head() - src := srcs.Head() - n, err := Copy(dst, src) - done += uint64(n) - if err != nil { - return done, err - } - dsts = dsts.DropFirst(n) - srcs = srcs.DropFirst(n) - } - return done, nil -} - -// ZeroSeq sets all bytes in dsts to 0 and returns the number of bytes zeroed. -func ZeroSeq(dsts BlockSeq) (uint64, error) { - var done uint64 - for !dsts.IsEmpty() { - n, err := Zero(dsts.Head()) - done += uint64(n) - if err != nil { - return done, err - } - dsts = dsts.DropFirst(n) - } - return done, nil -} diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD index 8e2b97afb..611fa22c3 100644 --- a/pkg/sentry/socket/BUILD +++ b/pkg/sentry/socket/BUILD @@ -9,15 +9,15 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", "//pkg/sentry/kernel/time", "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", "//pkg/syserr", "//pkg/tcpip", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD index 3850f6345..79e16d6e8 100644 --- a/pkg/sentry/socket/control/BUILD +++ b/pkg/sentry/socket/control/BUILD @@ -12,13 +12,13 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/socket", "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 1684dfc24..00265f15b 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -19,14 +19,14 @@ package control import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const maxInt = int(^uint(0) >> 1) diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index 42bf7be6a..5a07d5d0e 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -16,23 +16,23 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/context", "//pkg/fdnotifier", "//pkg/log", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/time", - "//pkg/sentry/safemem", "//pkg/sentry/socket", "//pkg/sentry/socket/control", - "//pkg/sentry/usermem", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip/stack", + "//pkg/usermem", "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index c957b0f1d..bde4c7a1e 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -21,19 +21,19 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/control" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/socket/hostinet/socket_unsafe.go b/pkg/sentry/socket/hostinet/socket_unsafe.go index e69ec38c2..cd67234d2 100644 --- a/pkg/sentry/socket/hostinet/socket_unsafe.go +++ b/pkg/sentry/socket/hostinet/socket_unsafe.go @@ -19,14 +19,14 @@ import ( "unsafe" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) func firstBytePtr(bs []byte) unsafe.Pointer { diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index e67b46c9e..034eca676 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -25,13 +25,13 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/usermem" ) var defaultRecvBufSize = inet.TCPBufferSize{ diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD index ed34a8308..fa2a2cb66 100644 --- a/pkg/sentry/socket/netfilter/BUILD +++ b/pkg/sentry/socket/netfilter/BUILD @@ -15,10 +15,10 @@ go_library( "//pkg/binary", "//pkg/log", "//pkg/sentry/kernel", - "//pkg/sentry/usermem", "//pkg/syserr", "//pkg/tcpip", "//pkg/tcpip/iptables", "//pkg/tcpip/stack", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index c65c36081..6ef740463 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -23,11 +23,11 @@ import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/iptables" "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/usermem" ) // errorTargetName is used to mark targets as error targets. Error targets diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index baaac13c6..f8b8e467d 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -13,8 +13,8 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/context", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", @@ -25,11 +25,11 @@ go_library( "//pkg/sentry/socket/netlink/port", "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/socket/netlink/message.go b/pkg/sentry/socket/netlink/message.go index ce0a1afd0..b21e0ca4b 100644 --- a/pkg/sentry/socket/netlink/message.go +++ b/pkg/sentry/socket/netlink/message.go @@ -20,7 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // alignUp rounds a length up to an alignment. diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go index be005df24..07f860a49 100644 --- a/pkg/sentry/socket/netlink/provider.go +++ b/pkg/sentry/socket/netlink/provider.go @@ -18,7 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD index 2137c7aeb..0234aadde 100644 --- a/pkg/sentry/socket/netlink/route/BUILD +++ b/pkg/sentry/socket/netlink/route/BUILD @@ -8,7 +8,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go index 6b4a0ecf4..80a15d6cb 100644 --- a/pkg/sentry/socket/netlink/route/protocol.go +++ b/pkg/sentry/socket/netlink/route/protocol.go @@ -19,7 +19,7 @@ import ( "bytes" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index cea56f4ed..c4b95debb 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" @@ -32,11 +32,11 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port" "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/socket/netlink/uevent/BUILD b/pkg/sentry/socket/netlink/uevent/BUILD index 73fbdf1eb..b6434923c 100644 --- a/pkg/sentry/socket/netlink/uevent/BUILD +++ b/pkg/sentry/socket/netlink/uevent/BUILD @@ -8,7 +8,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/kernel", "//pkg/sentry/socket/netlink", "//pkg/syserr", diff --git a/pkg/sentry/socket/netlink/uevent/protocol.go b/pkg/sentry/socket/netlink/uevent/protocol.go index b5d7808d7..1ee4296bc 100644 --- a/pkg/sentry/socket/netlink/uevent/protocol.go +++ b/pkg/sentry/socket/netlink/uevent/protocol.go @@ -20,7 +20,7 @@ package uevent import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket/netlink" "gvisor.dev/gvisor/pkg/syserr" diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index e3d1f90cb..ab01cb4fa 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -17,10 +17,11 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/context", "//pkg/log", "//pkg/metric", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", @@ -28,11 +29,9 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", - "//pkg/sentry/safemem", "//pkg/sentry/socket", "//pkg/sentry/socket/netfilter", "//pkg/sentry/unimpl", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", @@ -45,6 +44,7 @@ go_library( "//pkg/tcpip/stack", "//pkg/tcpip/transport/tcp", "//pkg/tcpip/transport/udp", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 318acbeff..8619cc506 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -34,20 +34,19 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/safemem" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/unimpl" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" @@ -57,6 +56,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" "gvisor.dev/gvisor/pkg/tcpip/transport/udp" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/socket/netstack/provider.go b/pkg/sentry/socket/netstack/provider.go index 2d2c1ba2a..5afff2564 100644 --- a/pkg/sentry/socket/netstack/provider.go +++ b/pkg/sentry/socket/netstack/provider.go @@ -18,7 +18,7 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index 2389a9cdb..50d9744e6 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -24,16 +24,16 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/usermem" ) // ControlMessages represents the union of unix control messages and tcpip diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index bade18686..08743deba 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -12,23 +12,23 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/refs", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/kernel", "//pkg/sentry/kernel/time", - "//pkg/sentry/safemem", "//pkg/sentry/socket", "//pkg/sentry/socket/control", "//pkg/sentry/socket/netstack", "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usermem", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/socket/unix/io.go b/pkg/sentry/socket/unix/io.go index 2447f24ef..129949990 100644 --- a/pkg/sentry/socket/unix/io.go +++ b/pkg/sentry/socket/unix/io.go @@ -15,8 +15,8 @@ package unix import ( - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/safemem" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/tcpip" ) diff --git a/pkg/sentry/socket/unix/transport/BUILD b/pkg/sentry/socket/unix/transport/BUILD index 4bdfc9208..74bcd6300 100644 --- a/pkg/sentry/socket/unix/transport/BUILD +++ b/pkg/sentry/socket/unix/transport/BUILD @@ -28,9 +28,9 @@ go_library( visibility = ["//:sandbox"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/ilist", "//pkg/refs", - "//pkg/sentry/context", "//pkg/sync", "//pkg/syserr", "//pkg/tcpip", diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go index 9e6fbc111..ce5b94ee7 100644 --- a/pkg/sentry/socket/unix/transport/connectioned.go +++ b/pkg/sentry/socket/unix/transport/connectioned.go @@ -16,7 +16,7 @@ package transport import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go index 0322dec0b..4b06d63ac 100644 --- a/pkg/sentry/socket/unix/transport/connectionless.go +++ b/pkg/sentry/socket/unix/transport/connectionless.go @@ -16,7 +16,7 @@ package transport import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/waiter" diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go index fcc0da332..dcbafe0e5 100644 --- a/pkg/sentry/socket/unix/transport/unix.go +++ b/pkg/sentry/socket/unix/transport/unix.go @@ -19,7 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/tcpip" diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 7f49ba864..4d30aa714 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -22,9 +22,9 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -33,10 +33,10 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/control" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/strace/BUILD b/pkg/sentry/strace/BUILD index ff6fafa63..762a946fe 100644 --- a/pkg/sentry/strace/BUILD +++ b/pkg/sentry/strace/BUILD @@ -34,7 +34,7 @@ go_library( "//pkg/sentry/socket/netlink", "//pkg/sentry/socket/netstack", "//pkg/sentry/syscalls/linux", - "//pkg/sentry/usermem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/strace/poll.go b/pkg/sentry/strace/poll.go index 5187594a7..074e80f9b 100644 --- a/pkg/sentry/strace/poll.go +++ b/pkg/sentry/strace/poll.go @@ -22,7 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/kernel" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // PollEventSet is the set of poll(2) event flags. diff --git a/pkg/sentry/strace/select.go b/pkg/sentry/strace/select.go index c77d418e6..3a4c32aa0 100644 --- a/pkg/sentry/strace/select.go +++ b/pkg/sentry/strace/select.go @@ -19,7 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) func fdsFromSet(t *kernel.Task, set []byte) []int { diff --git a/pkg/sentry/strace/signal.go b/pkg/sentry/strace/signal.go index 5656d53eb..c41f36e3f 100644 --- a/pkg/sentry/strace/signal.go +++ b/pkg/sentry/strace/signal.go @@ -21,7 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // signalNames contains the names of all named signals. diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go index b6d7177f4..d2079c85f 100644 --- a/pkg/sentry/strace/socket.go +++ b/pkg/sentry/strace/socket.go @@ -26,7 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netlink" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // SocketFamily are the possible socket(2) families. diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go index 629c1f308..3fc4a47fc 100644 --- a/pkg/sentry/strace/strace.go +++ b/pkg/sentry/strace/strace.go @@ -33,7 +33,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" pb "gvisor.dev/gvisor/pkg/sentry/strace/strace_go_proto" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // DefaultLogMaximumSize is the default LogMaximumSize. diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 7d74e0f70..8d6c52850 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -63,11 +63,12 @@ go_library( "//pkg/abi/linux", "//pkg/binary", "//pkg/bpf", + "//pkg/context", "//pkg/log", "//pkg/metric", "//pkg/rand", + "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/lock", @@ -87,16 +88,15 @@ go_library( "//pkg/sentry/loader", "//pkg/sentry/memmap", "//pkg/sentry/mm", - "//pkg/sentry/safemem", "//pkg/sentry/socket", "//pkg/sentry/socket/control", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/syscalls", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index c76771a54..7435b50bf 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/syscalls" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // AMD64 is a table of Linux amd64 syscall API with the corresponding syscall diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index d3587fda6..03a39fe65 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/syscalls" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // ARM64 is a table of Linux arm64 syscall API with the corresponding syscall diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go index 333013d8c..2ddb2b146 100644 --- a/pkg/sentry/syscalls/linux/sigset.go +++ b/pkg/sentry/syscalls/linux/sigset.go @@ -17,8 +17,8 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // copyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go index f56411bfe..b401978db 100644 --- a/pkg/sentry/syscalls/linux/sys_aio.go +++ b/pkg/sentry/syscalls/linux/sys_aio.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/eventfd" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // I/O commands. diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go index 65b4a227b..5f11b496c 100644 --- a/pkg/sentry/syscalls/linux/sys_epoll.go +++ b/pkg/sentry/syscalls/linux/sys_epoll.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" "gvisor.dev/gvisor/pkg/sentry/syscalls" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 9bc2445a5..c54735148 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -18,8 +18,8 @@ import ( "syscall" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" @@ -28,8 +28,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/fasync" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // fileOpAt performs an operation on the second last component in the path. diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go index bde17a767..b68261f72 100644 --- a/pkg/sentry/syscalls/linux/sys_futex.go +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -21,8 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // futexWaitRestartBlock encapsulates the state required to restart futex(2) diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go index 912cbe4ff..f66f4ffde 100644 --- a/pkg/sentry/syscalls/linux/sys_getdents.go +++ b/pkg/sentry/syscalls/linux/sys_getdents.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Getdents implements linux syscall getdents(2) for 64bit systems. diff --git a/pkg/sentry/syscalls/linux/sys_mempolicy.go b/pkg/sentry/syscalls/linux/sys_mempolicy.go index f5a519d8a..ac934dc6f 100644 --- a/pkg/sentry/syscalls/linux/sys_mempolicy.go +++ b/pkg/sentry/syscalls/linux/sys_mempolicy.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // We unconditionally report a single NUMA node. This also means that our diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index 58a05b5bb..9959f6e61 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -22,8 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Brk implements linux syscall brk(2). diff --git a/pkg/sentry/syscalls/linux/sys_mount.go b/pkg/sentry/syscalls/linux/sys_mount.go index 8c13e2d82..eb5ff48f5 100644 --- a/pkg/sentry/syscalls/linux/sys_mount.go +++ b/pkg/sentry/syscalls/linux/sys_mount.go @@ -19,8 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Mount implements Linux syscall mount(2). diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go index 418d7fa5f..798344042 100644 --- a/pkg/sentry/syscalls/linux/sys_pipe.go +++ b/pkg/sentry/syscalls/linux/sys_pipe.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // pipe2 implements the actual system call with flags. diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index 2b2df989a..4f8762d7d 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/syscalls/linux/sys_random.go b/pkg/sentry/syscalls/linux/sys_random.go index bc4c588bf..c0aa0fd60 100644 --- a/pkg/sentry/syscalls/linux/sys_random.go +++ b/pkg/sentry/syscalls/linux/sys_random.go @@ -19,11 +19,11 @@ import ( "math" "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go index cd31e0649..f9f594190 100644 --- a/pkg/sentry/syscalls/linux/sys_read.go +++ b/pkg/sentry/syscalls/linux/sys_read.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go index 51e3f836b..e08c333d6 100644 --- a/pkg/sentry/syscalls/linux/sys_rlimit.go +++ b/pkg/sentry/syscalls/linux/sys_rlimit.go @@ -19,8 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // rlimit describes an implementation of 'struct rlimit', which may vary from diff --git a/pkg/sentry/syscalls/linux/sys_seccomp.go b/pkg/sentry/syscalls/linux/sys_seccomp.go index 18510ead8..5b7a66f4d 100644 --- a/pkg/sentry/syscalls/linux/sys_seccomp.go +++ b/pkg/sentry/syscalls/linux/sys_seccomp.go @@ -19,8 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // userSockFprog is equivalent to Linux's struct sock_fprog on amd64. diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go index cde3b54e7..5f54f2456 100644 --- a/pkg/sentry/syscalls/linux/sys_sem.go +++ b/pkg/sentry/syscalls/linux/sys_sem.go @@ -22,8 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const opsMax = 500 // SEMOPM diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go index fb6efd5d8..209be2990 100644 --- a/pkg/sentry/syscalls/linux/sys_signal.go +++ b/pkg/sentry/syscalls/linux/sys_signal.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/signalfd" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // "For a process to have permission to send a signal it must diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index cda517a81..2919228d0 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -26,9 +26,9 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/control" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // minListenBacklog is the minimum reasonable backlog for listening sockets. diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go index 69b17b799..c841abccb 100644 --- a/pkg/sentry/syscalls/linux/sys_stat.go +++ b/pkg/sentry/syscalls/linux/sys_stat.go @@ -19,8 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Stat implements linux syscall stat(2). diff --git a/pkg/sentry/syscalls/linux/sys_stat_amd64.go b/pkg/sentry/syscalls/linux/sys_stat_amd64.go index 58afb4a9a..75a567bd4 100644 --- a/pkg/sentry/syscalls/linux/sys_stat_amd64.go +++ b/pkg/sentry/syscalls/linux/sys_stat_amd64.go @@ -21,7 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // copyOutStat copies the attributes (sattr, uattr) to the struct stat at diff --git a/pkg/sentry/syscalls/linux/sys_stat_arm64.go b/pkg/sentry/syscalls/linux/sys_stat_arm64.go index 3e1251e0b..80c98d05c 100644 --- a/pkg/sentry/syscalls/linux/sys_stat_arm64.go +++ b/pkg/sentry/syscalls/linux/sys_stat_arm64.go @@ -21,7 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) // copyOutStat copies the attributes (sattr, uattr) to the struct stat at diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index b47c3b5c4..0c9e2255d 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -24,8 +24,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" "gvisor.dev/gvisor/pkg/sentry/loader" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const ( diff --git a/pkg/sentry/syscalls/linux/sys_time.go b/pkg/sentry/syscalls/linux/sys_time.go index b887fa9d7..2d2aa0819 100644 --- a/pkg/sentry/syscalls/linux/sys_time.go +++ b/pkg/sentry/syscalls/linux/sys_time.go @@ -22,8 +22,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // The most significant 29 bits hold either a pid or a file descriptor. diff --git a/pkg/sentry/syscalls/linux/sys_timer.go b/pkg/sentry/syscalls/linux/sys_timer.go index d4134207b..432351917 100644 --- a/pkg/sentry/syscalls/linux/sys_timer.go +++ b/pkg/sentry/syscalls/linux/sys_timer.go @@ -20,8 +20,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) const nsecPerSec = int64(time.Second) diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go index ad4b67806..aba892939 100644 --- a/pkg/sentry/syscalls/linux/sys_write.go +++ b/pkg/sentry/syscalls/linux/sys_write.go @@ -23,8 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index 77deb8980..efb95555c 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -21,8 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // GetXattr implements linux syscall getxattr(2). diff --git a/pkg/sentry/syscalls/linux/timespec.go b/pkg/sentry/syscalls/linux/timespec.go index 4ff8f9234..ddc3ee26e 100644 --- a/pkg/sentry/syscalls/linux/timespec.go +++ b/pkg/sentry/syscalls/linux/timespec.go @@ -19,8 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // copyTimespecIn copies a Timespec from the untrusted app range to the kernel. diff --git a/pkg/sentry/unimpl/BUILD b/pkg/sentry/unimpl/BUILD index 370fa6ec5..5d4aa3a63 100644 --- a/pkg/sentry/unimpl/BUILD +++ b/pkg/sentry/unimpl/BUILD @@ -14,7 +14,7 @@ go_library( srcs = ["events.go"], visibility = ["//:sandbox"], deps = [ + "//pkg/context", "//pkg/log", - "//pkg/sentry/context", ], ) diff --git a/pkg/sentry/unimpl/events.go b/pkg/sentry/unimpl/events.go index 79b5de9e4..73ed9372f 100644 --- a/pkg/sentry/unimpl/events.go +++ b/pkg/sentry/unimpl/events.go @@ -17,8 +17,8 @@ package unimpl import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" ) // contextID is the events package's type for context.Context.Value keys. diff --git a/pkg/sentry/uniqueid/BUILD b/pkg/sentry/uniqueid/BUILD index e9c18f170..7467e6398 100644 --- a/pkg/sentry/uniqueid/BUILD +++ b/pkg/sentry/uniqueid/BUILD @@ -7,7 +7,7 @@ go_library( srcs = ["context.go"], visibility = ["//pkg/sentry:internal"], deps = [ - "//pkg/sentry/context", + "//pkg/context", "//pkg/sentry/socket/unix/transport", ], ) diff --git a/pkg/sentry/uniqueid/context.go b/pkg/sentry/uniqueid/context.go index 4e466d66d..1fb884a90 100644 --- a/pkg/sentry/uniqueid/context.go +++ b/pkg/sentry/uniqueid/context.go @@ -17,7 +17,7 @@ package uniqueid import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" ) diff --git a/pkg/sentry/usermem/BUILD b/pkg/sentry/usermem/BUILD deleted file mode 100644 index c8322e29e..000000000 --- a/pkg/sentry/usermem/BUILD +++ /dev/null @@ -1,55 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "addr_range", - out = "addr_range.go", - package = "usermem", - prefix = "Addr", - template = "//pkg/segment:generic_range", - types = { - "T": "Addr", - }, -) - -go_library( - name = "usermem", - srcs = [ - "access_type.go", - "addr.go", - "addr_range.go", - "addr_range_seq_unsafe.go", - "bytes_io.go", - "bytes_io_unsafe.go", - "usermem.go", - "usermem_arm64.go", - "usermem_unsafe.go", - "usermem_x86.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/atomicbitops", - "//pkg/binary", - "//pkg/log", - "//pkg/sentry/context", - "//pkg/sentry/safemem", - "//pkg/syserror", - ], -) - -go_test( - name = "usermem_test", - size = "small", - srcs = [ - "addr_range_seq_test.go", - "usermem_test.go", - ], - library = ":usermem", - deps = [ - "//pkg/sentry/context", - "//pkg/sentry/safemem", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/usermem/README.md b/pkg/sentry/usermem/README.md deleted file mode 100644 index f6d2137eb..000000000 --- a/pkg/sentry/usermem/README.md +++ /dev/null @@ -1,31 +0,0 @@ -This package defines primitives for sentry access to application memory. - -Major types: - -- The `IO` interface represents a virtual address space and provides I/O - methods on that address space. `IO` is the lowest-level primitive. The - primary implementation of the `IO` interface is `mm.MemoryManager`. - -- `IOSequence` represents a collection of individually-contiguous address - ranges in a `IO` that is operated on sequentially, analogous to Linux's - `struct iov_iter`. - -Major usage patterns: - -- Access to a task's virtual memory, subject to the application's memory - protections and while running on that task's goroutine, from a context that - is at or above the level of the `kernel` package (e.g. most syscall - implementations in `syscalls/linux`); use the `kernel.Task.Copy*` wrappers - defined in `kernel/task_usermem.go`. - -- Access to a task's virtual memory, from a context that is at or above the - level of the `kernel` package, but where any of the above constraints does - not hold (e.g. `PTRACE_POKEDATA`, which ignores application memory - protections); obtain the task's `mm.MemoryManager` by calling - `kernel.Task.MemoryManager`, and call its `IO` methods directly. - -- Access to a task's virtual memory, from a context that is below the level of - the `kernel` package (e.g. filesystem I/O); clients must pass I/O arguments - from higher layers, usually in the form of an `IOSequence`. The - `kernel.Task.SingleIOSequence` and `kernel.Task.IovecsIOSequence` functions - in `kernel/task_usermem.go` are convenience functions for doing so. diff --git a/pkg/sentry/usermem/access_type.go b/pkg/sentry/usermem/access_type.go deleted file mode 100644 index 9c1742a59..000000000 --- a/pkg/sentry/usermem/access_type.go +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "syscall" -) - -// AccessType specifies memory access types. This is used for -// setting mapping permissions, as well as communicating faults. -// -// +stateify savable -type AccessType struct { - // Read is read access. - Read bool - - // Write is write access. - Write bool - - // Execute is executable access. - Execute bool -} - -// String returns a pretty representation of access. This looks like the -// familiar r-x, rw-, etc. and can be relied on as such. -func (a AccessType) String() string { - bits := [3]byte{'-', '-', '-'} - if a.Read { - bits[0] = 'r' - } - if a.Write { - bits[1] = 'w' - } - if a.Execute { - bits[2] = 'x' - } - return string(bits[:]) -} - -// Any returns true iff at least one of Read, Write or Execute is true. -func (a AccessType) Any() bool { - return a.Read || a.Write || a.Execute -} - -// Prot returns the system prot (syscall.PROT_READ, etc.) for this access. -func (a AccessType) Prot() int { - var prot int - if a.Read { - prot |= syscall.PROT_READ - } - if a.Write { - prot |= syscall.PROT_WRITE - } - if a.Execute { - prot |= syscall.PROT_EXEC - } - return prot -} - -// SupersetOf returns true iff the access types in a are a superset of the -// access types in other. -func (a AccessType) SupersetOf(other AccessType) bool { - if !a.Read && other.Read { - return false - } - if !a.Write && other.Write { - return false - } - if !a.Execute && other.Execute { - return false - } - return true -} - -// Intersect returns the access types set in both a and other. -func (a AccessType) Intersect(other AccessType) AccessType { - return AccessType{ - Read: a.Read && other.Read, - Write: a.Write && other.Write, - Execute: a.Execute && other.Execute, - } -} - -// Union returns the access types set in either a or other. -func (a AccessType) Union(other AccessType) AccessType { - return AccessType{ - Read: a.Read || other.Read, - Write: a.Write || other.Write, - Execute: a.Execute || other.Execute, - } -} - -// Effective returns the set of effective access types allowed by a, even if -// some types are not explicitly allowed. -func (a AccessType) Effective() AccessType { - // In Linux, Write and Execute access generally imply Read access. See - // mm/mmap.c:protection_map. - // - // The notable exception is get_user_pages, which only checks against - // the original vma flags. That said, most user memory accesses do not - // use GUP. - if a.Write || a.Execute { - a.Read = true - } - return a -} - -// Convenient access types. -var ( - NoAccess = AccessType{} - Read = AccessType{Read: true} - Write = AccessType{Write: true} - Execute = AccessType{Execute: true} - ReadWrite = AccessType{Read: true, Write: true} - AnyAccess = AccessType{Read: true, Write: true, Execute: true} -) diff --git a/pkg/sentry/usermem/addr.go b/pkg/sentry/usermem/addr.go deleted file mode 100644 index e79210804..000000000 --- a/pkg/sentry/usermem/addr.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "fmt" -) - -// Addr represents a generic virtual address. -// -// +stateify savable -type Addr uintptr - -// AddLength adds the given length to start and returns the result. ok is true -// iff adding the length did not overflow the range of Addr. -// -// Note: This function is usually used to get the end of an address range -// defined by its start address and length. Since the resulting end is -// exclusive, end == 0 is technically valid, and corresponds to a range that -// extends to the end of the address space, but ok will be false. This isn't -// expected to ever come up in practice. -func (v Addr) AddLength(length uint64) (end Addr, ok bool) { - end = v + Addr(length) - // The second half of the following check is needed in case uintptr is - // smaller than 64 bits. - ok = end >= v && length <= uint64(^Addr(0)) - return -} - -// RoundDown returns the address rounded down to the nearest page boundary. -func (v Addr) RoundDown() Addr { - return v & ^Addr(PageSize-1) -} - -// RoundUp returns the address rounded up to the nearest page boundary. ok is -// true iff rounding up did not wrap around. -func (v Addr) RoundUp() (addr Addr, ok bool) { - addr = Addr(v + PageSize - 1).RoundDown() - ok = addr >= v - return -} - -// MustRoundUp is equivalent to RoundUp, but panics if rounding up wraps -// around. -func (v Addr) MustRoundUp() Addr { - addr, ok := v.RoundUp() - if !ok { - panic(fmt.Sprintf("usermem.Addr(%d).RoundUp() wraps", v)) - } - return addr -} - -// HugeRoundDown returns the address rounded down to the nearest huge page -// boundary. -func (v Addr) HugeRoundDown() Addr { - return v & ^Addr(HugePageSize-1) -} - -// HugeRoundUp returns the address rounded up to the nearest huge page boundary. -// ok is true iff rounding up did not wrap around. -func (v Addr) HugeRoundUp() (addr Addr, ok bool) { - addr = Addr(v + HugePageSize - 1).HugeRoundDown() - ok = addr >= v - return -} - -// PageOffset returns the offset of v into the current page. -func (v Addr) PageOffset() uint64 { - return uint64(v & Addr(PageSize-1)) -} - -// IsPageAligned returns true if v.PageOffset() == 0. -func (v Addr) IsPageAligned() bool { - return v.PageOffset() == 0 -} - -// AddrRange is a range of Addrs. -// -// type AddrRange - -// ToRange returns [v, v+length). -func (v Addr) ToRange(length uint64) (AddrRange, bool) { - end, ok := v.AddLength(length) - return AddrRange{v, end}, ok -} - -// IsPageAligned returns true if ar.Start.IsPageAligned() and -// ar.End.IsPageAligned(). -func (ar AddrRange) IsPageAligned() bool { - return ar.Start.IsPageAligned() && ar.End.IsPageAligned() -} - -// String implements fmt.Stringer.String. -func (ar AddrRange) String() string { - return fmt.Sprintf("[%#x, %#x)", ar.Start, ar.End) -} diff --git a/pkg/sentry/usermem/addr_range_seq_test.go b/pkg/sentry/usermem/addr_range_seq_test.go deleted file mode 100644 index 82f735026..000000000 --- a/pkg/sentry/usermem/addr_range_seq_test.go +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "testing" -) - -var addrRangeSeqTests = []struct { - desc string - ranges []AddrRange -}{ - { - desc: "Empty sequence", - }, - { - desc: "Single empty AddrRange", - ranges: []AddrRange{ - {0x10, 0x10}, - }, - }, - { - desc: "Single non-empty AddrRange of length 1", - ranges: []AddrRange{ - {0x10, 0x11}, - }, - }, - { - desc: "Single non-empty AddrRange of length 2", - ranges: []AddrRange{ - {0x10, 0x12}, - }, - }, - { - desc: "Multiple non-empty AddrRanges", - ranges: []AddrRange{ - {0x10, 0x11}, - {0x20, 0x22}, - }, - }, - { - desc: "Multiple AddrRanges including empty AddrRanges", - ranges: []AddrRange{ - {0x10, 0x10}, - {0x20, 0x20}, - {0x30, 0x33}, - {0x40, 0x44}, - {0x50, 0x50}, - {0x60, 0x60}, - {0x70, 0x77}, - {0x80, 0x88}, - {0x90, 0x90}, - {0xa0, 0xa0}, - }, - }, -} - -func testAddrRangeSeqEqualityWithTailIteration(t *testing.T, ars AddrRangeSeq, wantRanges []AddrRange) { - var wantLen int64 - for _, ar := range wantRanges { - wantLen += int64(ar.Length()) - } - - var i int - for !ars.IsEmpty() { - if gotLen := ars.NumBytes(); gotLen != wantLen { - t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d", i, ars, gotLen, wantLen) - } - if gotN, wantN := ars.NumRanges(), len(wantRanges)-i; gotN != wantN { - t.Errorf("Iteration %d: %v.NumRanges(): got %d, wanted %d", i, ars, gotN, wantN) - } - got := ars.Head() - if i >= len(wantRanges) { - t.Errorf("Iteration %d: %v.Head(): got %s, wanted ", i, ars, got) - } else if want := wantRanges[i]; got != want { - t.Errorf("Iteration %d: %v.Head(): got %s, wanted %s", i, ars, got, want) - } - ars = ars.Tail() - wantLen -= int64(got.Length()) - i++ - } - if gotLen := ars.NumBytes(); gotLen != 0 || wantLen != 0 { - t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d (which should be 0)", i, ars, gotLen, wantLen) - } - if gotN := ars.NumRanges(); gotN != 0 { - t.Errorf("Iteration %d: %v.NumRanges(): got %d, wanted 0", i, ars, gotN) - } -} - -func TestAddrRangeSeqTailIteration(t *testing.T) { - for _, test := range addrRangeSeqTests { - t.Run(test.desc, func(t *testing.T) { - testAddrRangeSeqEqualityWithTailIteration(t, AddrRangeSeqFromSlice(test.ranges), test.ranges) - }) - } -} - -func TestAddrRangeSeqDropFirstEmpty(t *testing.T) { - var ars AddrRangeSeq - if got, want := ars.DropFirst(1), ars; got != want { - t.Errorf("%v.DropFirst(1): got %v, wanted %v", ars, got, want) - } -} - -func TestAddrRangeSeqDropSingleByteIteration(t *testing.T) { - // Tests AddrRangeSeq iteration using Head/DropFirst, simulating - // I/O-per-AddrRange. - for _, test := range addrRangeSeqTests { - t.Run(test.desc, func(t *testing.T) { - // Figure out what AddrRanges we expect to see. - var wantLen int64 - var wantRanges []AddrRange - for _, ar := range test.ranges { - wantLen += int64(ar.Length()) - wantRanges = append(wantRanges, ar) - if ar.Length() == 0 { - // We "do" 0 bytes of I/O and then call DropFirst(0), - // advancing to the next AddrRange. - continue - } - // Otherwise we "do" 1 byte of I/O and then call DropFirst(1), - // advancing the AddrRange by 1 byte, or to the next AddrRange - // if this one is exhausted. - for ar.Start++; ar.Length() != 0; ar.Start++ { - wantRanges = append(wantRanges, ar) - } - } - t.Logf("Expected AddrRanges: %s (%d bytes)", wantRanges, wantLen) - - ars := AddrRangeSeqFromSlice(test.ranges) - var i int - for !ars.IsEmpty() { - if gotLen := ars.NumBytes(); gotLen != wantLen { - t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d", i, ars, gotLen, wantLen) - } - got := ars.Head() - if i >= len(wantRanges) { - t.Errorf("Iteration %d: %v.Head(): got %s, wanted ", i, ars, got) - } else if want := wantRanges[i]; got != want { - t.Errorf("Iteration %d: %v.Head(): got %s, wanted %s", i, ars, got, want) - } - if got.Length() == 0 { - ars = ars.DropFirst(0) - } else { - ars = ars.DropFirst(1) - wantLen-- - } - i++ - } - if gotLen := ars.NumBytes(); gotLen != 0 || wantLen != 0 { - t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d (which should be 0)", i, ars, gotLen, wantLen) - } - }) - } -} - -func TestAddrRangeSeqTakeFirstEmpty(t *testing.T) { - var ars AddrRangeSeq - if got, want := ars.TakeFirst(1), ars; got != want { - t.Errorf("%v.TakeFirst(1): got %v, wanted %v", ars, got, want) - } -} - -func TestAddrRangeSeqTakeFirst(t *testing.T) { - ranges := []AddrRange{ - {0x10, 0x11}, - {0x20, 0x22}, - {0x30, 0x30}, - {0x40, 0x44}, - {0x50, 0x55}, - {0x60, 0x60}, - {0x70, 0x77}, - } - ars := AddrRangeSeqFromSlice(ranges).TakeFirst(5) - want := []AddrRange{ - {0x10, 0x11}, // +1 byte (total 1 byte), not truncated - {0x20, 0x22}, // +2 bytes (total 3 bytes), not truncated - {0x30, 0x30}, // +0 bytes (total 3 bytes), no change - {0x40, 0x42}, // +2 bytes (total 5 bytes), partially truncated - {0x50, 0x50}, // +0 bytes (total 5 bytes), fully truncated - {0x60, 0x60}, // +0 bytes (total 5 bytes), "fully truncated" (no change) - {0x70, 0x70}, // +0 bytes (total 5 bytes), fully truncated - } - testAddrRangeSeqEqualityWithTailIteration(t, ars, want) -} diff --git a/pkg/sentry/usermem/addr_range_seq_unsafe.go b/pkg/sentry/usermem/addr_range_seq_unsafe.go deleted file mode 100644 index c09337c15..000000000 --- a/pkg/sentry/usermem/addr_range_seq_unsafe.go +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "bytes" - "fmt" - "reflect" - "unsafe" -) - -// An AddrRangeSeq represents a sequence of AddrRanges. -// -// AddrRangeSeqs are immutable and may be copied by value. The zero value of -// AddrRangeSeq represents an empty sequence. -// -// An AddrRangeSeq may contain AddrRanges with a length of 0. This is necessary -// since zero-length AddrRanges are significant to MM bounds checks. -type AddrRangeSeq struct { - // If length is 0, then the AddrRangeSeq represents no AddrRanges. - // Invariants: data == 0; offset == 0; limit == 0. - // - // If length is 1, then the AddrRangeSeq represents the single - // AddrRange{offset, offset+limit}. Invariants: data == 0. - // - // Otherwise, length >= 2, and the AddrRangeSeq represents the `length` - // AddrRanges in the array of AddrRanges starting at address `data`, - // starting at `offset` bytes into the first AddrRange and limited to the - // following `limit` bytes. (AddrRanges after `limit` are still iterated, - // but are truncated to a length of 0.) Invariants: data != 0; offset <= - // data[0].Length(); limit > 0; offset+limit <= the combined length of all - // AddrRanges in the array. - data unsafe.Pointer - length int - offset Addr - limit Addr -} - -// AddrRangeSeqOf returns an AddrRangeSeq representing the single AddrRange ar. -func AddrRangeSeqOf(ar AddrRange) AddrRangeSeq { - return AddrRangeSeq{ - length: 1, - offset: ar.Start, - limit: ar.Length(), - } -} - -// AddrRangeSeqFromSlice returns an AddrRangeSeq representing all AddrRanges in -// slice. -// -// Whether the returned AddrRangeSeq shares memory with slice is unspecified; -// clients should avoid mutating slices passed to AddrRangeSeqFromSlice. -// -// Preconditions: The combined length of all AddrRanges in slice <= -// math.MaxInt64. -func AddrRangeSeqFromSlice(slice []AddrRange) AddrRangeSeq { - var limit int64 - for _, ar := range slice { - len64 := int64(ar.Length()) - if len64 < 0 { - panic(fmt.Sprintf("Length of AddrRange %v overflows int64", ar)) - } - sum := limit + len64 - if sum < limit { - panic(fmt.Sprintf("Total length of AddrRanges %v overflows int64", slice)) - } - limit = sum - } - return addrRangeSeqFromSliceLimited(slice, limit) -} - -// Preconditions: The combined length of all AddrRanges in slice <= limit. -// limit >= 0. If len(slice) != 0, then limit > 0. -func addrRangeSeqFromSliceLimited(slice []AddrRange, limit int64) AddrRangeSeq { - switch len(slice) { - case 0: - return AddrRangeSeq{} - case 1: - return AddrRangeSeq{ - length: 1, - offset: slice[0].Start, - limit: Addr(limit), - } - default: - return AddrRangeSeq{ - data: unsafe.Pointer(&slice[0]), - length: len(slice), - limit: Addr(limit), - } - } -} - -// IsEmpty returns true if ars.NumRanges() == 0. -// -// Note that since AddrRangeSeq may contain AddrRanges with a length of zero, -// an AddrRange representing 0 bytes (AddrRangeSeq.NumBytes() == 0) is not -// necessarily empty. -func (ars AddrRangeSeq) IsEmpty() bool { - return ars.length == 0 -} - -// NumRanges returns the number of AddrRanges in ars. -func (ars AddrRangeSeq) NumRanges() int { - return ars.length -} - -// NumBytes returns the number of bytes represented by ars. -func (ars AddrRangeSeq) NumBytes() int64 { - return int64(ars.limit) -} - -// Head returns the first AddrRange in ars. -// -// Preconditions: !ars.IsEmpty(). -func (ars AddrRangeSeq) Head() AddrRange { - if ars.length == 0 { - panic("empty AddrRangeSeq") - } - if ars.length == 1 { - return AddrRange{ars.offset, ars.offset + ars.limit} - } - ar := *(*AddrRange)(ars.data) - ar.Start += ars.offset - if ar.Length() > ars.limit { - ar.End = ar.Start + ars.limit - } - return ar -} - -// Tail returns an AddrRangeSeq consisting of all AddrRanges in ars after the -// first. -// -// Preconditions: !ars.IsEmpty(). -func (ars AddrRangeSeq) Tail() AddrRangeSeq { - if ars.length == 0 { - panic("empty AddrRangeSeq") - } - if ars.length == 1 { - return AddrRangeSeq{} - } - return ars.externalTail() -} - -// Preconditions: ars.length >= 2. -func (ars AddrRangeSeq) externalTail() AddrRangeSeq { - headLen := (*AddrRange)(ars.data).Length() - ars.offset - var tailLimit int64 - if ars.limit > headLen { - tailLimit = int64(ars.limit - headLen) - } - var extSlice []AddrRange - extSliceHdr := (*reflect.SliceHeader)(unsafe.Pointer(&extSlice)) - extSliceHdr.Data = uintptr(ars.data) - extSliceHdr.Len = ars.length - extSliceHdr.Cap = ars.length - return addrRangeSeqFromSliceLimited(extSlice[1:], tailLimit) -} - -// DropFirst returns an AddrRangeSeq equivalent to ars, but with the first n -// bytes omitted. If n > ars.NumBytes(), DropFirst returns an empty -// AddrRangeSeq. -// -// If !ars.IsEmpty() and ars.Head().Length() == 0, DropFirst will always omit -// at least ars.Head(), even if n == 0. This guarantees that the basic pattern -// of: -// -// for !ars.IsEmpty() { -// n, err = doIOWith(ars.Head()) -// if err != nil { -// return err -// } -// ars = ars.DropFirst(n) -// } -// -// works even in the presence of zero-length AddrRanges. -// -// Preconditions: n >= 0. -func (ars AddrRangeSeq) DropFirst(n int) AddrRangeSeq { - if n < 0 { - panic(fmt.Sprintf("invalid n: %d", n)) - } - return ars.DropFirst64(int64(n)) -} - -// DropFirst64 is equivalent to DropFirst but takes an int64. -func (ars AddrRangeSeq) DropFirst64(n int64) AddrRangeSeq { - if n < 0 { - panic(fmt.Sprintf("invalid n: %d", n)) - } - if Addr(n) > ars.limit { - return AddrRangeSeq{} - } - // Handle initial empty AddrRange. - switch ars.length { - case 0: - return AddrRangeSeq{} - case 1: - if ars.limit == 0 { - return AddrRangeSeq{} - } - default: - if rawHeadLen := (*AddrRange)(ars.data).Length(); ars.offset == rawHeadLen { - ars = ars.externalTail() - } - } - for n != 0 { - // Calling ars.Head() here is surprisingly expensive, so inline getting - // the head's length. - var headLen Addr - if ars.length == 1 { - headLen = ars.limit - } else { - headLen = (*AddrRange)(ars.data).Length() - ars.offset - } - if Addr(n) < headLen { - // Dropping ends partway through the head AddrRange. - ars.offset += Addr(n) - ars.limit -= Addr(n) - return ars - } - n -= int64(headLen) - ars = ars.Tail() - } - return ars -} - -// TakeFirst returns an AddrRangeSeq equivalent to ars, but iterating at most n -// bytes. TakeFirst never removes AddrRanges from ars; AddrRanges beyond the -// first n bytes are reduced to a length of zero, but will still be iterated. -// -// Preconditions: n >= 0. -func (ars AddrRangeSeq) TakeFirst(n int) AddrRangeSeq { - if n < 0 { - panic(fmt.Sprintf("invalid n: %d", n)) - } - return ars.TakeFirst64(int64(n)) -} - -// TakeFirst64 is equivalent to TakeFirst but takes an int64. -func (ars AddrRangeSeq) TakeFirst64(n int64) AddrRangeSeq { - if n < 0 { - panic(fmt.Sprintf("invalid n: %d", n)) - } - if ars.limit > Addr(n) { - ars.limit = Addr(n) - } - return ars -} - -// String implements fmt.Stringer.String. -func (ars AddrRangeSeq) String() string { - // This is deliberately chosen to be the same as fmt's automatic stringer - // for []AddrRange. - var buf bytes.Buffer - buf.WriteByte('[') - var sep string - for !ars.IsEmpty() { - buf.WriteString(sep) - sep = " " - buf.WriteString(ars.Head().String()) - ars = ars.Tail() - } - buf.WriteByte(']') - return buf.String() -} diff --git a/pkg/sentry/usermem/bytes_io.go b/pkg/sentry/usermem/bytes_io.go deleted file mode 100644 index 7898851b3..000000000 --- a/pkg/sentry/usermem/bytes_io.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/syserror" -) - -const maxInt = int(^uint(0) >> 1) - -// BytesIO implements IO using a byte slice. Addresses are interpreted as -// offsets into the slice. Reads and writes beyond the end of the slice return -// EFAULT. -type BytesIO struct { - Bytes []byte -} - -// CopyOut implements IO.CopyOut. -func (b *BytesIO) CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error) { - rngN, rngErr := b.rangeCheck(addr, len(src)) - if rngN == 0 { - return 0, rngErr - } - return copy(b.Bytes[int(addr):], src[:rngN]), rngErr -} - -// CopyIn implements IO.CopyIn. -func (b *BytesIO) CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error) { - rngN, rngErr := b.rangeCheck(addr, len(dst)) - if rngN == 0 { - return 0, rngErr - } - return copy(dst[:rngN], b.Bytes[int(addr):]), rngErr -} - -// ZeroOut implements IO.ZeroOut. -func (b *BytesIO) ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error) { - if toZero > int64(maxInt) { - return 0, syserror.EINVAL - } - rngN, rngErr := b.rangeCheck(addr, int(toZero)) - if rngN == 0 { - return 0, rngErr - } - zeroSlice := b.Bytes[int(addr) : int(addr)+rngN] - for i := range zeroSlice { - zeroSlice[i] = 0 - } - return int64(rngN), rngErr -} - -// CopyOutFrom implements IO.CopyOutFrom. -func (b *BytesIO) CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) { - dsts, rngErr := b.blocksFromAddrRanges(ars) - n, err := src.ReadToBlocks(dsts) - if err != nil { - return int64(n), err - } - return int64(n), rngErr -} - -// CopyInTo implements IO.CopyInTo. -func (b *BytesIO) CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) { - srcs, rngErr := b.blocksFromAddrRanges(ars) - n, err := dst.WriteFromBlocks(srcs) - if err != nil { - return int64(n), err - } - return int64(n), rngErr -} - -func (b *BytesIO) rangeCheck(addr Addr, length int) (int, error) { - if length == 0 { - return 0, nil - } - if length < 0 { - return 0, syserror.EINVAL - } - max := Addr(len(b.Bytes)) - if addr >= max { - return 0, syserror.EFAULT - } - end, ok := addr.AddLength(uint64(length)) - if !ok || end > max { - return int(max - addr), syserror.EFAULT - } - return length, nil -} - -func (b *BytesIO) blocksFromAddrRanges(ars AddrRangeSeq) (safemem.BlockSeq, error) { - switch ars.NumRanges() { - case 0: - return safemem.BlockSeq{}, nil - case 1: - block, err := b.blockFromAddrRange(ars.Head()) - return safemem.BlockSeqOf(block), err - default: - blocks := make([]safemem.Block, 0, ars.NumRanges()) - for !ars.IsEmpty() { - block, err := b.blockFromAddrRange(ars.Head()) - if block.Len() != 0 { - blocks = append(blocks, block) - } - if err != nil { - return safemem.BlockSeqFromSlice(blocks), err - } - ars = ars.Tail() - } - return safemem.BlockSeqFromSlice(blocks), nil - } -} - -func (b *BytesIO) blockFromAddrRange(ar AddrRange) (safemem.Block, error) { - n, err := b.rangeCheck(ar.Start, int(ar.Length())) - if n == 0 { - return safemem.Block{}, err - } - return safemem.BlockFromSafeSlice(b.Bytes[int(ar.Start) : int(ar.Start)+n]), err -} - -// BytesIOSequence returns an IOSequence representing the given byte slice. -func BytesIOSequence(buf []byte) IOSequence { - return IOSequence{ - IO: &BytesIO{buf}, - Addrs: AddrRangeSeqOf(AddrRange{0, Addr(len(buf))}), - } -} diff --git a/pkg/sentry/usermem/bytes_io_unsafe.go b/pkg/sentry/usermem/bytes_io_unsafe.go deleted file mode 100644 index fca5952f4..000000000 --- a/pkg/sentry/usermem/bytes_io_unsafe.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "sync/atomic" - "unsafe" - - "gvisor.dev/gvisor/pkg/atomicbitops" - "gvisor.dev/gvisor/pkg/sentry/context" -) - -// SwapUint32 implements IO.SwapUint32. -func (b *BytesIO) SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error) { - if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil { - return 0, rngErr - } - return atomic.SwapUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)])), new), nil -} - -// CompareAndSwapUint32 implements IO.CompareAndSwapUint32. -func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error) { - if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil { - return 0, rngErr - } - return atomicbitops.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)])), old, new), nil -} - -// LoadUint32 implements IO.LoadUint32. -func (b *BytesIO) LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) { - if _, err := b.rangeCheck(addr, 4); err != nil { - return 0, err - } - return atomic.LoadUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)]))), nil -} diff --git a/pkg/sentry/usermem/usermem.go b/pkg/sentry/usermem/usermem.go deleted file mode 100644 index 7b1f312b1..000000000 --- a/pkg/sentry/usermem/usermem.go +++ /dev/null @@ -1,597 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package usermem governs access to user memory. -package usermem - -import ( - "bytes" - "errors" - "io" - "strconv" - - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/syserror" -) - -// IO provides access to the contents of a virtual memory space. -// -// FIXME(b/38173783): Implementations of IO cannot expect ctx to contain any -// meaningful data. -type IO interface { - // CopyOut copies len(src) bytes from src to the memory mapped at addr. It - // returns the number of bytes copied. If the number of bytes copied is < - // len(src), it returns a non-nil error explaining why. - // - // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or - // any following locks in the lock order. - // - // Postconditions: CopyOut does not retain src. - CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error) - - // CopyIn copies len(dst) bytes from the memory mapped at addr to dst. - // It returns the number of bytes copied. If the number of bytes copied is - // < len(dst), it returns a non-nil error explaining why. - // - // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or - // any following locks in the lock order. - // - // Postconditions: CopyIn does not retain dst. - CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error) - - // ZeroOut sets toZero bytes to 0, starting at addr. It returns the number - // of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a - // non-nil error explaining why. - // - // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or - // any following locks in the lock order. toZero >= 0. - ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error) - - // CopyOutFrom copies ars.NumBytes() bytes from src to the memory mapped at - // ars. It returns the number of bytes copied, which may be less than the - // number of bytes read from src if copying fails. CopyOutFrom may return a - // partial copy without an error iff src.ReadToBlocks returns a partial - // read without an error. - // - // CopyOutFrom calls src.ReadToBlocks at most once. - // - // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or - // any following locks in the lock order. src.ReadToBlocks must not block - // on mm.MemoryManager.activeMu or any preceding locks in the lock order. - CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) - - // CopyInTo copies ars.NumBytes() bytes from the memory mapped at ars to - // dst. It returns the number of bytes copied. CopyInTo may return a - // partial copy without an error iff dst.WriteFromBlocks returns a partial - // write without an error. - // - // CopyInTo calls dst.WriteFromBlocks at most once. - // - // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or - // any following locks in the lock order. dst.WriteFromBlocks must not - // block on mm.MemoryManager.activeMu or any preceding locks in the lock - // order. - CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) - - // TODO(jamieliu): The requirement that CopyOutFrom/CopyInTo call src/dst - // at most once, which is unnecessary in most cases, forces implementations - // to gather safemem.Blocks into a single slice to pass to src/dst. Add - // CopyOutFromIter/CopyInToIter, which relaxes this restriction, to avoid - // this allocation. - - // SwapUint32 atomically sets the uint32 value at addr to new and - // returns the previous value. - // - // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or - // any following locks in the lock order. addr must be aligned to a 4-byte - // boundary. - SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error) - - // CompareAndSwapUint32 atomically compares the uint32 value at addr to - // old; if they are equal, the value in memory is replaced by new. In - // either case, the previous value stored in memory is returned. - // - // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or - // any following locks in the lock order. addr must be aligned to a 4-byte - // boundary. - CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error) - - // LoadUint32 atomically loads the uint32 value at addr and returns it. - // - // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or - // any following locks in the lock order. addr must be aligned to a 4-byte - // boundary. - LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) -} - -// IOOpts contains options applicable to all IO methods. -type IOOpts struct { - // If IgnorePermissions is true, application-defined memory protections set - // by mmap(2) or mprotect(2) will be ignored. (Memory protections required - // by the target of the mapping are never ignored.) - IgnorePermissions bool - - // If AddressSpaceActive is true, the IO implementation may assume that it - // has an active AddressSpace and can therefore use AddressSpace copying - // without performing activation. See mm/io.go for details. - AddressSpaceActive bool -} - -// IOReadWriter is an io.ReadWriter that reads from / writes to addresses -// starting at addr in IO. The preconditions that apply to IO.CopyIn and -// IO.CopyOut also apply to IOReadWriter.Read and IOReadWriter.Write -// respectively. -type IOReadWriter struct { - Ctx context.Context - IO IO - Addr Addr - Opts IOOpts -} - -// Read implements io.Reader.Read. -// -// Note that an address space does not have an "end of file", so Read can only -// return io.EOF if IO.CopyIn returns io.EOF. Attempts to read unmapped or -// unreadable memory, or beyond the end of the address space, should return -// EFAULT. -func (rw *IOReadWriter) Read(dst []byte) (int, error) { - n, err := rw.IO.CopyIn(rw.Ctx, rw.Addr, dst, rw.Opts) - end, ok := rw.Addr.AddLength(uint64(n)) - if ok { - rw.Addr = end - } else { - // Disallow wraparound. - rw.Addr = ^Addr(0) - if err != nil { - err = syserror.EFAULT - } - } - return n, err -} - -// Writer implements io.Writer.Write. -func (rw *IOReadWriter) Write(src []byte) (int, error) { - n, err := rw.IO.CopyOut(rw.Ctx, rw.Addr, src, rw.Opts) - end, ok := rw.Addr.AddLength(uint64(n)) - if ok { - rw.Addr = end - } else { - // Disallow wraparound. - rw.Addr = ^Addr(0) - if err != nil { - err = syserror.EFAULT - } - } - return n, err -} - -// CopyObjectOut copies a fixed-size value or slice of fixed-size values from -// src to the memory mapped at addr in uio. It returns the number of bytes -// copied. -// -// CopyObjectOut must use reflection to encode src; performance-sensitive -// clients should do encoding manually and use uio.CopyOut directly. -// -// Preconditions: As for IO.CopyOut. -func CopyObjectOut(ctx context.Context, uio IO, addr Addr, src interface{}, opts IOOpts) (int, error) { - w := &IOReadWriter{ - Ctx: ctx, - IO: uio, - Addr: addr, - Opts: opts, - } - // Allocate a byte slice the size of the object being marshaled. This - // adds an extra reflection call, but avoids needing to grow the slice - // during encoding, which can result in many heap-allocated slices. - b := make([]byte, 0, binary.Size(src)) - return w.Write(binary.Marshal(b, ByteOrder, src)) -} - -// CopyObjectIn copies a fixed-size value or slice of fixed-size values from -// the memory mapped at addr in uio to dst. It returns the number of bytes -// copied. -// -// CopyObjectIn must use reflection to decode dst; performance-sensitive -// clients should use uio.CopyIn directly and do decoding manually. -// -// Preconditions: As for IO.CopyIn. -func CopyObjectIn(ctx context.Context, uio IO, addr Addr, dst interface{}, opts IOOpts) (int, error) { - r := &IOReadWriter{ - Ctx: ctx, - IO: uio, - Addr: addr, - Opts: opts, - } - buf := make([]byte, binary.Size(dst)) - if _, err := io.ReadFull(r, buf); err != nil { - return 0, err - } - binary.Unmarshal(buf, ByteOrder, dst) - return int(r.Addr - addr), nil -} - -// CopyStringIn tuning parameters, defined outside that function for tests. -const ( - copyStringIncrement = 64 - copyStringMaxInitBufLen = 256 -) - -// CopyStringIn copies a NUL-terminated string of unknown length from the -// memory mapped at addr in uio and returns it as a string (not including the -// trailing NUL). If the length of the string, including the terminating NUL, -// would exceed maxlen, CopyStringIn returns the string truncated to maxlen and -// ENAMETOOLONG. -// -// Preconditions: As for IO.CopyFromUser. maxlen >= 0. -func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpts) (string, error) { - initLen := maxlen - if initLen > copyStringMaxInitBufLen { - initLen = copyStringMaxInitBufLen - } - buf := make([]byte, initLen) - var done int - for done < maxlen { - // Read up to copyStringIncrement bytes at a time. - readlen := copyStringIncrement - if readlen > maxlen-done { - readlen = maxlen - done - } - end, ok := addr.AddLength(uint64(readlen)) - if !ok { - return stringFromImmutableBytes(buf[:done]), syserror.EFAULT - } - // Shorten the read to avoid crossing page boundaries, since faulting - // in a page unnecessarily is expensive. This also ensures that partial - // copies up to the end of application-mappable memory succeed. - if addr.RoundDown() != end.RoundDown() { - end = end.RoundDown() - readlen = int(end - addr) - } - // Ensure that our buffer is large enough to accommodate the read. - if done+readlen > len(buf) { - newBufLen := len(buf) * 2 - if newBufLen > maxlen { - newBufLen = maxlen - } - buf = append(buf, make([]byte, newBufLen-len(buf))...) - } - n, err := uio.CopyIn(ctx, addr, buf[done:done+readlen], opts) - // Look for the terminating zero byte, which may have occurred before - // hitting err. - if i := bytes.IndexByte(buf[done:done+n], byte(0)); i >= 0 { - return stringFromImmutableBytes(buf[:done+i]), nil - } - - done += n - if err != nil { - return stringFromImmutableBytes(buf[:done]), err - } - addr = end - } - return stringFromImmutableBytes(buf), syserror.ENAMETOOLONG -} - -// CopyOutVec copies bytes from src to the memory mapped at ars in uio. The -// maximum number of bytes copied is ars.NumBytes() or len(src), whichever is -// less. CopyOutVec returns the number of bytes copied; if this is less than -// the maximum, it returns a non-nil error explaining why. -// -// Preconditions: As for IO.CopyOut. -func CopyOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, src []byte, opts IOOpts) (int, error) { - var done int - for !ars.IsEmpty() && done < len(src) { - ar := ars.Head() - cplen := len(src) - done - if Addr(cplen) >= ar.Length() { - cplen = int(ar.Length()) - } - n, err := uio.CopyOut(ctx, ar.Start, src[done:done+cplen], opts) - done += n - if err != nil { - return done, err - } - ars = ars.DropFirst(n) - } - return done, nil -} - -// CopyInVec copies bytes from the memory mapped at ars in uio to dst. The -// maximum number of bytes copied is ars.NumBytes() or len(dst), whichever is -// less. CopyInVec returns the number of bytes copied; if this is less than the -// maximum, it returns a non-nil error explaining why. -// -// Preconditions: As for IO.CopyIn. -func CopyInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst []byte, opts IOOpts) (int, error) { - var done int - for !ars.IsEmpty() && done < len(dst) { - ar := ars.Head() - cplen := len(dst) - done - if Addr(cplen) >= ar.Length() { - cplen = int(ar.Length()) - } - n, err := uio.CopyIn(ctx, ar.Start, dst[done:done+cplen], opts) - done += n - if err != nil { - return done, err - } - ars = ars.DropFirst(n) - } - return done, nil -} - -// ZeroOutVec writes zeroes to the memory mapped at ars in uio. The maximum -// number of bytes written is ars.NumBytes() or toZero, whichever is less. -// ZeroOutVec returns the number of bytes written; if this is less than the -// maximum, it returns a non-nil error explaining why. -// -// Preconditions: As for IO.ZeroOut. -func ZeroOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, toZero int64, opts IOOpts) (int64, error) { - var done int64 - for !ars.IsEmpty() && done < toZero { - ar := ars.Head() - cplen := toZero - done - if Addr(cplen) >= ar.Length() { - cplen = int64(ar.Length()) - } - n, err := uio.ZeroOut(ctx, ar.Start, cplen, opts) - done += n - if err != nil { - return done, err - } - ars = ars.DropFirst64(n) - } - return done, nil -} - -func isASCIIWhitespace(b byte) bool { - // Compare Linux include/linux/ctype.h, lib/ctype.c. - // 9 => horizontal tab '\t' - // 10 => line feed '\n' - // 11 => vertical tab '\v' - // 12 => form feed '\c' - // 13 => carriage return '\r' - return b == ' ' || (b >= 9 && b <= 13) -} - -// CopyInt32StringsInVec copies up to len(dsts) whitespace-separated decimal -// strings from the memory mapped at ars in uio and converts them to int32 -// values in dsts. It returns the number of bytes read. -// -// CopyInt32StringsInVec shares the following properties with Linux's -// kernel/sysctl.c:proc_dointvec(write=1): -// -// - If any read value overflows the range of int32, or any invalid characters -// are encountered during the read, CopyInt32StringsInVec returns EINVAL. -// -// - If, upon reaching the end of ars, fewer than len(dsts) values have been -// read, CopyInt32StringsInVec returns no error if at least 1 value was read -// and EINVAL otherwise. -// -// - Trailing whitespace after the last successfully read value is counted in -// the number of bytes read. -// -// Unlike proc_dointvec(): -// -// - CopyInt32StringsInVec does not implicitly limit ars.NumBytes() to -// PageSize-1; callers that require this must do so explicitly. -// -// - CopyInt32StringsInVec returns EINVAL if ars.NumBytes() == 0. -// -// Preconditions: As for CopyInVec. -func CopyInt32StringsInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dsts []int32, opts IOOpts) (int64, error) { - if len(dsts) == 0 { - return 0, nil - } - - buf := make([]byte, ars.NumBytes()) - n, cperr := CopyInVec(ctx, uio, ars, buf, opts) - buf = buf[:n] - - var i, j int - for ; j < len(dsts); j++ { - // Skip leading whitespace. - for i < len(buf) && isASCIIWhitespace(buf[i]) { - i++ - } - if i == len(buf) { - break - } - - // Find the end of the value to be parsed (next whitespace or end of string). - nextI := i + 1 - for nextI < len(buf) && !isASCIIWhitespace(buf[nextI]) { - nextI++ - } - - // Parse a single value. - val, err := strconv.ParseInt(string(buf[i:nextI]), 10, 32) - if err != nil { - return int64(i), syserror.EINVAL - } - dsts[j] = int32(val) - - i = nextI - } - - // Skip trailing whitespace. - for i < len(buf) && isASCIIWhitespace(buf[i]) { - i++ - } - - if cperr != nil { - return int64(i), cperr - } - if j == 0 { - return int64(i), syserror.EINVAL - } - return int64(i), nil -} - -// CopyInt32StringInVec is equivalent to CopyInt32StringsInVec, but copies at -// most one int32. -func CopyInt32StringInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst *int32, opts IOOpts) (int64, error) { - dsts := [1]int32{*dst} - n, err := CopyInt32StringsInVec(ctx, uio, ars, dsts[:], opts) - *dst = dsts[0] - return n, err -} - -// IOSequence holds arguments to IO methods. -type IOSequence struct { - IO IO - Addrs AddrRangeSeq - Opts IOOpts -} - -// NumBytes returns s.Addrs.NumBytes(). -// -// Note that NumBytes() may return 0 even if !s.Addrs.IsEmpty(), since -// s.Addrs may contain a non-zero number of zero-length AddrRanges. -// Many clients of -// IOSequence currently do something like: -// -// if ioseq.NumBytes() == 0 { -// return 0, nil -// } -// if f.availableBytes == 0 { -// return 0, syserror.ErrWouldBlock -// } -// return ioseq.CopyOutFrom(..., reader) -// -// In such cases, using s.Addrs.IsEmpty() will cause them to have the wrong -// behavior for zero-length I/O. However, using s.NumBytes() == 0 instead means -// that we will return success for zero-length I/O in cases where Linux would -// return EFAULT due to a failed access_ok() check, so in the long term we -// should move checks for ErrWouldBlock etc. into the body of -// reader.ReadToBlocks and use s.Addrs.IsEmpty() instead. -func (s IOSequence) NumBytes() int64 { - return s.Addrs.NumBytes() -} - -// DropFirst returns a copy of s with s.Addrs.DropFirst(n). -// -// Preconditions: As for AddrRangeSeq.DropFirst. -func (s IOSequence) DropFirst(n int) IOSequence { - return IOSequence{s.IO, s.Addrs.DropFirst(n), s.Opts} -} - -// DropFirst64 returns a copy of s with s.Addrs.DropFirst64(n). -// -// Preconditions: As for AddrRangeSeq.DropFirst64. -func (s IOSequence) DropFirst64(n int64) IOSequence { - return IOSequence{s.IO, s.Addrs.DropFirst64(n), s.Opts} -} - -// TakeFirst returns a copy of s with s.Addrs.TakeFirst(n). -// -// Preconditions: As for AddrRangeSeq.TakeFirst. -func (s IOSequence) TakeFirst(n int) IOSequence { - return IOSequence{s.IO, s.Addrs.TakeFirst(n), s.Opts} -} - -// TakeFirst64 returns a copy of s with s.Addrs.TakeFirst64(n). -// -// Preconditions: As for AddrRangeSeq.TakeFirst64. -func (s IOSequence) TakeFirst64(n int64) IOSequence { - return IOSequence{s.IO, s.Addrs.TakeFirst64(n), s.Opts} -} - -// CopyOut invokes CopyOutVec over s.Addrs. -// -// As with CopyOutVec, if s.NumBytes() < len(src), the copy will be truncated -// to s.NumBytes(), and a nil error will be returned. -// -// Preconditions: As for CopyOutVec. -func (s IOSequence) CopyOut(ctx context.Context, src []byte) (int, error) { - return CopyOutVec(ctx, s.IO, s.Addrs, src, s.Opts) -} - -// CopyIn invokes CopyInVec over s.Addrs. -// -// As with CopyInVec, if s.NumBytes() < len(dst), the copy will be truncated to -// s.NumBytes(), and a nil error will be returned. -// -// Preconditions: As for CopyInVec. -func (s IOSequence) CopyIn(ctx context.Context, dst []byte) (int, error) { - return CopyInVec(ctx, s.IO, s.Addrs, dst, s.Opts) -} - -// ZeroOut invokes ZeroOutVec over s.Addrs. -// -// As with ZeroOutVec, if s.NumBytes() < toZero, the write will be truncated -// to s.NumBytes(), and a nil error will be returned. -// -// Preconditions: As for ZeroOutVec. -func (s IOSequence) ZeroOut(ctx context.Context, toZero int64) (int64, error) { - return ZeroOutVec(ctx, s.IO, s.Addrs, toZero, s.Opts) -} - -// CopyOutFrom invokes s.CopyOutFrom over s.Addrs. -// -// Preconditions: As for IO.CopyOutFrom. -func (s IOSequence) CopyOutFrom(ctx context.Context, src safemem.Reader) (int64, error) { - return s.IO.CopyOutFrom(ctx, s.Addrs, src, s.Opts) -} - -// CopyInTo invokes s.CopyInTo over s.Addrs. -// -// Preconditions: As for IO.CopyInTo. -func (s IOSequence) CopyInTo(ctx context.Context, dst safemem.Writer) (int64, error) { - return s.IO.CopyInTo(ctx, s.Addrs, dst, s.Opts) -} - -// Reader returns an io.Reader that reads from s. Reads beyond the end of s -// return io.EOF. The preconditions that apply to s.CopyIn also apply to the -// returned io.Reader.Read. -func (s IOSequence) Reader(ctx context.Context) io.Reader { - return &ioSequenceReadWriter{ctx, s} -} - -// Writer returns an io.Writer that writes to s. Writes beyond the end of s -// return ErrEndOfIOSequence. The preconditions that apply to s.CopyOut also -// apply to the returned io.Writer.Write. -func (s IOSequence) Writer(ctx context.Context) io.Writer { - return &ioSequenceReadWriter{ctx, s} -} - -// ErrEndOfIOSequence is returned by IOSequence.Writer().Write() when -// attempting to write beyond the end of the IOSequence. -var ErrEndOfIOSequence = errors.New("write beyond end of IOSequence") - -type ioSequenceReadWriter struct { - ctx context.Context - s IOSequence -} - -// Read implements io.Reader.Read. -func (rw *ioSequenceReadWriter) Read(dst []byte) (int, error) { - n, err := rw.s.CopyIn(rw.ctx, dst) - rw.s = rw.s.DropFirst(n) - if err == nil && rw.s.NumBytes() == 0 { - err = io.EOF - } - return n, err -} - -// Write implements io.Writer.Write. -func (rw *ioSequenceReadWriter) Write(src []byte) (int, error) { - n, err := rw.s.CopyOut(rw.ctx, src) - rw.s = rw.s.DropFirst(n) - if err == nil && n < len(src) { - err = ErrEndOfIOSequence - } - return n, err -} diff --git a/pkg/sentry/usermem/usermem_arm64.go b/pkg/sentry/usermem/usermem_arm64.go deleted file mode 100644 index fdfc30a66..000000000 --- a/pkg/sentry/usermem/usermem_arm64.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build arm64 - -package usermem - -import ( - "encoding/binary" - "syscall" -) - -const ( - // PageSize is the system page size. - // arm64 support 4K/16K/64K page size, - // which can be get by syscall.Getpagesize(). - // Currently, only 4K page size is supported. - PageSize = 1 << PageShift - - // HugePageSize is the system huge page size. - HugePageSize = 1 << HugePageShift - - // PageShift is the binary log of the system page size. - PageShift = 12 - - // HugePageShift is the binary log of the system huge page size. - // Should be calculated by "PageShift + (PageShift - 3)" - // when multiple page size support is ready. - HugePageShift = 21 -) - -var ( - // ByteOrder is the native byte order (little endian). - ByteOrder = binary.LittleEndian -) - -func init() { - // Make sure the page size is 4K on arm64 platform. - if size := syscall.Getpagesize(); size != PageSize { - panic("Only 4K page size is supported on arm64!") - } -} diff --git a/pkg/sentry/usermem/usermem_test.go b/pkg/sentry/usermem/usermem_test.go deleted file mode 100644 index 299f64754..000000000 --- a/pkg/sentry/usermem/usermem_test.go +++ /dev/null @@ -1,424 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "bytes" - "encoding/binary" - "fmt" - "reflect" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/syserror" -) - -// newContext returns a context.Context that we can use in these tests (we -// can't use contexttest because it depends on usermem). -func newContext() context.Context { - return context.Background() -} - -func newBytesIOString(s string) *BytesIO { - return &BytesIO{[]byte(s)} -} - -func TestBytesIOCopyOutSuccess(t *testing.T) { - b := newBytesIOString("ABCDE") - n, err := b.CopyOut(newContext(), 1, []byte("foo"), IOOpts{}) - if wantN := 3; n != wantN || err != nil { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := b.Bytes, []byte("AfooE"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyOutFailure(t *testing.T) { - b := newBytesIOString("ABC") - n, err := b.CopyOut(newContext(), 1, []byte("foo"), IOOpts{}) - if wantN, wantErr := 2, syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := b.Bytes, []byte("Afo"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyInSuccess(t *testing.T) { - b := newBytesIOString("AfooE") - var dst [3]byte - n, err := b.CopyIn(newContext(), 1, dst[:], IOOpts{}) - if wantN := 3; n != wantN || err != nil { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst[:], []byte("foo"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyInFailure(t *testing.T) { - b := newBytesIOString("Afo") - var dst [3]byte - n, err := b.CopyIn(newContext(), 1, dst[:], IOOpts{}) - if wantN, wantErr := 2, syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := dst[:], []byte("fo\x00"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } -} - -func TestBytesIOZeroOutSuccess(t *testing.T) { - b := newBytesIOString("ABCD") - n, err := b.ZeroOut(newContext(), 1, 2, IOOpts{}) - if wantN := int64(2); n != wantN || err != nil { - t.Errorf("ZeroOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := b.Bytes, []byte("A\x00\x00D"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOZeroOutFailure(t *testing.T) { - b := newBytesIOString("ABC") - n, err := b.ZeroOut(newContext(), 1, 3, IOOpts{}) - if wantN, wantErr := int64(2), syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("ZeroOut: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := b.Bytes, []byte("A\x00\x00"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyOutFromSuccess(t *testing.T) { - b := newBytesIOString("ABCDEFGH") - n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{ - {Start: 4, End: 7}, - {Start: 1, End: 4}, - }), safemem.FromIOReader{bytes.NewBufferString("barfoo")}, IOOpts{}) - if wantN := int64(6); n != wantN || err != nil { - t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := b.Bytes, []byte("AfoobarH"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyOutFromFailure(t *testing.T) { - b := newBytesIOString("ABCDE") - n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{ - {Start: 1, End: 4}, - {Start: 4, End: 7}, - }), safemem.FromIOReader{bytes.NewBufferString("foobar")}, IOOpts{}) - if wantN, wantErr := int64(4), syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := b.Bytes, []byte("Afoob"); !bytes.Equal(got, want) { - t.Errorf("Bytes: got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyInToSuccess(t *testing.T) { - b := newBytesIOString("AfoobarH") - var dst bytes.Buffer - n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{ - {Start: 4, End: 7}, - {Start: 1, End: 4}, - }), safemem.FromIOWriter{&dst}, IOOpts{}) - if wantN := int64(6); n != wantN || err != nil { - t.Errorf("CopyInTo: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst.Bytes(), []byte("barfoo"); !bytes.Equal(got, want) { - t.Errorf("dst.Bytes(): got %q, wanted %q", got, want) - } -} - -func TestBytesIOCopyInToFailure(t *testing.T) { - b := newBytesIOString("Afoob") - var dst bytes.Buffer - n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{ - {Start: 1, End: 4}, - {Start: 4, End: 7}, - }), safemem.FromIOWriter{&dst}, IOOpts{}) - if wantN, wantErr := int64(4), syserror.EFAULT; n != wantN || err != wantErr { - t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) - } - if got, want := dst.Bytes(), []byte("foob"); !bytes.Equal(got, want) { - t.Errorf("dst.Bytes(): got %q, wanted %q", got, want) - } -} - -type testStruct struct { - Int8 int8 - Uint8 uint8 - Int16 int16 - Uint16 uint16 - Int32 int32 - Uint32 uint32 - Int64 int64 - Uint64 uint64 -} - -func TestCopyObject(t *testing.T) { - wantObj := testStruct{1, 2, 3, 4, 5, 6, 7, 8} - wantN := binary.Size(wantObj) - b := &BytesIO{make([]byte, wantN)} - ctx := newContext() - if n, err := CopyObjectOut(ctx, b, 0, &wantObj, IOOpts{}); n != wantN || err != nil { - t.Fatalf("CopyObjectOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - var gotObj testStruct - if n, err := CopyObjectIn(ctx, b, 0, &gotObj, IOOpts{}); n != wantN || err != nil { - t.Errorf("CopyObjectIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if gotObj != wantObj { - t.Errorf("CopyObject round trip: got %+v, wanted %+v", gotObj, wantObj) - } -} - -func TestCopyStringInShort(t *testing.T) { - // Tests for string length <= copyStringIncrement. - want := strings.Repeat("A", copyStringIncrement-2) - mem := want + "\x00" - if got, err := CopyStringIn(newContext(), newBytesIOString(mem), 0, 2*copyStringIncrement, IOOpts{}); got != want || err != nil { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, nil)", got, err, want) - } -} - -func TestCopyStringInLong(t *testing.T) { - // Tests for copyStringIncrement < string length <= copyStringMaxInitBufLen - // (requiring multiple calls to IO.CopyIn()). - want := strings.Repeat("A", copyStringIncrement*3/4) + strings.Repeat("B", copyStringIncrement*3/4) - mem := want + "\x00" - if got, err := CopyStringIn(newContext(), newBytesIOString(mem), 0, 2*copyStringIncrement, IOOpts{}); got != want || err != nil { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, nil)", got, err, want) - } -} - -func TestCopyStringInVeryLong(t *testing.T) { - // Tests for string length > copyStringMaxInitBufLen (requiring buffer - // reallocation). - want := strings.Repeat("A", copyStringMaxInitBufLen*3/4) + strings.Repeat("B", copyStringMaxInitBufLen*3/4) - mem := want + "\x00" - if got, err := CopyStringIn(newContext(), newBytesIOString(mem), 0, 2*copyStringMaxInitBufLen, IOOpts{}); got != want || err != nil { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, nil)", got, err, want) - } -} - -func TestCopyStringInNoTerminatingZeroByte(t *testing.T) { - want := strings.Repeat("A", copyStringIncrement-1) - got, err := CopyStringIn(newContext(), newBytesIOString(want), 0, 2*copyStringIncrement, IOOpts{}) - if wantErr := syserror.EFAULT; got != want || err != wantErr { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, %v)", got, err, want, wantErr) - } -} - -func TestCopyStringInTruncatedByMaxlen(t *testing.T) { - got, err := CopyStringIn(newContext(), newBytesIOString(strings.Repeat("A", 10)), 0, 5, IOOpts{}) - if want, wantErr := strings.Repeat("A", 5), syserror.ENAMETOOLONG; got != want || err != wantErr { - t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, %v)", got, err, want, wantErr) - } -} - -func TestCopyInt32StringsInVec(t *testing.T) { - for _, test := range []struct { - str string - n int - initial []int32 - final []int32 - }{ - { - str: "100 200", - n: len("100 200"), - initial: []int32{1, 2}, - final: []int32{100, 200}, - }, - { - // Fewer values ok - str: "100", - n: len("100"), - initial: []int32{1, 2}, - final: []int32{100, 2}, - }, - { - // Extra values ok - str: "100 200 300", - n: len("100 200 "), - initial: []int32{1, 2}, - final: []int32{100, 200}, - }, - { - // Leading and trailing whitespace ok - str: " 100\t200\n", - n: len(" 100\t200\n"), - initial: []int32{1, 2}, - final: []int32{100, 200}, - }, - } { - t.Run(fmt.Sprintf("%q", test.str), func(t *testing.T) { - src := BytesIOSequence([]byte(test.str)) - dsts := append([]int32(nil), test.initial...) - if n, err := CopyInt32StringsInVec(newContext(), src.IO, src.Addrs, dsts, src.Opts); n != int64(test.n) || err != nil { - t.Errorf("CopyInt32StringsInVec: got (%d, %v), wanted (%d, nil)", n, err, test.n) - } - if !reflect.DeepEqual(dsts, test.final) { - t.Errorf("dsts: got %v, wanted %v", dsts, test.final) - } - }) - } -} - -func TestCopyInt32StringsInVecRequiresOneValidValue(t *testing.T) { - for _, s := range []string{"", "\n", "a123"} { - t.Run(fmt.Sprintf("%q", s), func(t *testing.T) { - src := BytesIOSequence([]byte(s)) - initial := []int32{1, 2} - dsts := append([]int32(nil), initial...) - if n, err := CopyInt32StringsInVec(newContext(), src.IO, src.Addrs, dsts, src.Opts); err != syserror.EINVAL { - t.Errorf("CopyInt32StringsInVec: got (%d, %v), wanted (_, %v)", n, err, syserror.EINVAL) - } - if !reflect.DeepEqual(dsts, initial) { - t.Errorf("dsts: got %v, wanted %v", dsts, initial) - } - }) - } -} - -func TestIOSequenceCopyOut(t *testing.T) { - buf := []byte("ABCD") - s := BytesIOSequence(buf) - - // CopyOut limited by len(src). - n, err := s.CopyOut(newContext(), []byte("fo")) - if wantN := 2; n != wantN || err != nil { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("foCD"); !bytes.Equal(buf, want) { - t.Errorf("buf: got %q, wanted %q", buf, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(2); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - // CopyOut limited by s.NumBytes(). - n, err = s.CopyOut(newContext(), []byte("obar")) - if wantN := 2; n != wantN || err != nil { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("foob"); !bytes.Equal(buf, want) { - t.Errorf("buf: got %q, wanted %q", buf, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(0); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } -} - -func TestIOSequenceCopyIn(t *testing.T) { - s := BytesIOSequence([]byte("foob")) - dst := []byte("ABCDEF") - - // CopyIn limited by len(dst). - n, err := s.CopyIn(newContext(), dst[:2]) - if wantN := 2; n != wantN || err != nil { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("foCDEF"); !bytes.Equal(dst, want) { - t.Errorf("dst: got %q, wanted %q", dst, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(2); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - // CopyIn limited by s.Remaining(). - n, err = s.CopyIn(newContext(), dst[2:]) - if wantN := 2; n != wantN || err != nil { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("foobEF"); !bytes.Equal(dst, want) { - t.Errorf("dst: got %q, wanted %q", dst, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(0); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } -} - -func TestIOSequenceZeroOut(t *testing.T) { - buf := []byte("ABCD") - s := BytesIOSequence(buf) - - // ZeroOut limited by toZero. - n, err := s.ZeroOut(newContext(), 2) - if wantN := int64(2); n != wantN || err != nil { - t.Errorf("ZeroOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("\x00\x00CD"); !bytes.Equal(buf, want) { - t.Errorf("buf: got %q, wanted %q", buf, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(2); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - // ZeroOut limited by s.NumBytes(). - n, err = s.ZeroOut(newContext(), 4) - if wantN := int64(2); n != wantN || err != nil { - t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if want := []byte("\x00\x00\x00\x00"); !bytes.Equal(buf, want) { - t.Errorf("buf: got %q, wanted %q", buf, want) - } - s = s.DropFirst(2) - if got, want := s.NumBytes(), int64(0); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } -} - -func TestIOSequenceTakeFirst(t *testing.T) { - s := BytesIOSequence([]byte("foobar")) - if got, want := s.NumBytes(), int64(6); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - s = s.TakeFirst(3) - if got, want := s.NumBytes(), int64(3); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - // TakeFirst(n) where n > s.NumBytes() is a no-op. - s = s.TakeFirst(9) - if got, want := s.NumBytes(), int64(3); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } - - var dst [3]byte - n, err := s.CopyIn(newContext(), dst[:]) - if wantN := 3; n != wantN || err != nil { - t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) - } - if got, want := dst[:], []byte("foo"); !bytes.Equal(got, want) { - t.Errorf("dst: got %q, wanted %q", got, want) - } - s = s.DropFirst(3) - if got, want := s.NumBytes(), int64(0); got != want { - t.Errorf("NumBytes: got %v, wanted %v", got, want) - } -} diff --git a/pkg/sentry/usermem/usermem_unsafe.go b/pkg/sentry/usermem/usermem_unsafe.go deleted file mode 100644 index 876783e78..000000000 --- a/pkg/sentry/usermem/usermem_unsafe.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package usermem - -import ( - "unsafe" -) - -// stringFromImmutableBytes is equivalent to string(bs), except that it never -// copies even if escape analysis can't prove that bs does not escape. This is -// only valid if bs is never mutated after stringFromImmutableBytes returns. -func stringFromImmutableBytes(bs []byte) string { - // Compare strings.Builder.String(). - return *(*string)(unsafe.Pointer(&bs)) -} diff --git a/pkg/sentry/usermem/usermem_x86.go b/pkg/sentry/usermem/usermem_x86.go deleted file mode 100644 index 8059b72d2..000000000 --- a/pkg/sentry/usermem/usermem_x86.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build amd64 i386 - -package usermem - -import "encoding/binary" - -const ( - // PageSize is the system page size. - PageSize = 1 << PageShift - - // HugePageSize is the system huge page size. - HugePageSize = 1 << HugePageShift - - // PageShift is the binary log of the system page size. - PageShift = 12 - - // HugePageShift is the binary log of the system huge page size. - HugePageShift = 21 -) - -var ( - // ByteOrder is the native byte order (little endian). - ByteOrder = binary.LittleEndian -) diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 51acdc4e9..6b1009328 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -26,14 +26,14 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/fspath", "//pkg/sentry/arch", - "//pkg/sentry/context", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", "//pkg/waiter", ], ) @@ -48,11 +48,11 @@ go_test( library = ":vfs", deps = [ "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", + "//pkg/context", + "//pkg/sentry/contexttest", "//pkg/sentry/kernel/auth", - "//pkg/sentry/usermem", "//pkg/sync", "//pkg/syserror", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/vfs/context.go b/pkg/sentry/vfs/context.go index 705194ebc..d97362b9a 100644 --- a/pkg/sentry/vfs/context.go +++ b/pkg/sentry/vfs/context.go @@ -15,7 +15,7 @@ package vfs import ( - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" ) // contextID is this package's type for context.Context.Value keys. diff --git a/pkg/sentry/vfs/device.go b/pkg/sentry/vfs/device.go index 9f9d6e783..3af2aa58d 100644 --- a/pkg/sentry/vfs/device.go +++ b/pkg/sentry/vfs/device.go @@ -17,7 +17,7 @@ package vfs import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 51c95c2d9..225024463 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -18,12 +18,12 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index c00b3c84b..fb9b87fdc 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -19,12 +19,12 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go index 9ed58512f..1720d325d 100644 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ b/pkg/sentry/vfs/file_description_impl_util_test.go @@ -22,11 +22,11 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // fileDescription is the common fd struct which a filesystem implementation diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index ea78f555b..a06a6caf3 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -18,8 +18,8 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" ) // A Filesystem is a tree of nodes represented by Dentries, which forms part of diff --git a/pkg/sentry/vfs/filesystem_type.go b/pkg/sentry/vfs/filesystem_type.go index 023301780..c58b70728 100644 --- a/pkg/sentry/vfs/filesystem_type.go +++ b/pkg/sentry/vfs/filesystem_type.go @@ -18,7 +18,7 @@ import ( "bytes" "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 00177b371..d39528051 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -19,7 +19,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/vfs/pathname.go b/pkg/sentry/vfs/pathname.go index cf80df90e..b318c681a 100644 --- a/pkg/sentry/vfs/pathname.go +++ b/pkg/sentry/vfs/pathname.go @@ -15,8 +15,8 @@ package vfs import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/vfs/testutil.go b/pkg/sentry/vfs/testutil.go index ee5c8b9e2..392c7611e 100644 --- a/pkg/sentry/vfs/testutil.go +++ b/pkg/sentry/vfs/testutil.go @@ -18,8 +18,8 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" ) diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 1f6f56293..b2bf48853 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -31,8 +31,8 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" diff --git a/pkg/usermem/BUILD b/pkg/usermem/BUILD new file mode 100644 index 000000000..ff8b9e91a --- /dev/null +++ b/pkg/usermem/BUILD @@ -0,0 +1,55 @@ +load("//tools:defs.bzl", "go_library", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +package(licenses = ["notice"]) + +go_template_instance( + name = "addr_range", + out = "addr_range.go", + package = "usermem", + prefix = "Addr", + template = "//pkg/segment:generic_range", + types = { + "T": "Addr", + }, +) + +go_library( + name = "usermem", + srcs = [ + "access_type.go", + "addr.go", + "addr_range.go", + "addr_range_seq_unsafe.go", + "bytes_io.go", + "bytes_io_unsafe.go", + "usermem.go", + "usermem_arm64.go", + "usermem_unsafe.go", + "usermem_x86.go", + ], + visibility = ["//:sandbox"], + deps = [ + "//pkg/atomicbitops", + "//pkg/binary", + "//pkg/context", + "//pkg/log", + "//pkg/safemem", + "//pkg/syserror", + ], +) + +go_test( + name = "usermem_test", + size = "small", + srcs = [ + "addr_range_seq_test.go", + "usermem_test.go", + ], + library = ":usermem", + deps = [ + "//pkg/context", + "//pkg/safemem", + "//pkg/syserror", + ], +) diff --git a/pkg/usermem/README.md b/pkg/usermem/README.md new file mode 100644 index 000000000..f6d2137eb --- /dev/null +++ b/pkg/usermem/README.md @@ -0,0 +1,31 @@ +This package defines primitives for sentry access to application memory. + +Major types: + +- The `IO` interface represents a virtual address space and provides I/O + methods on that address space. `IO` is the lowest-level primitive. The + primary implementation of the `IO` interface is `mm.MemoryManager`. + +- `IOSequence` represents a collection of individually-contiguous address + ranges in a `IO` that is operated on sequentially, analogous to Linux's + `struct iov_iter`. + +Major usage patterns: + +- Access to a task's virtual memory, subject to the application's memory + protections and while running on that task's goroutine, from a context that + is at or above the level of the `kernel` package (e.g. most syscall + implementations in `syscalls/linux`); use the `kernel.Task.Copy*` wrappers + defined in `kernel/task_usermem.go`. + +- Access to a task's virtual memory, from a context that is at or above the + level of the `kernel` package, but where any of the above constraints does + not hold (e.g. `PTRACE_POKEDATA`, which ignores application memory + protections); obtain the task's `mm.MemoryManager` by calling + `kernel.Task.MemoryManager`, and call its `IO` methods directly. + +- Access to a task's virtual memory, from a context that is below the level of + the `kernel` package (e.g. filesystem I/O); clients must pass I/O arguments + from higher layers, usually in the form of an `IOSequence`. The + `kernel.Task.SingleIOSequence` and `kernel.Task.IovecsIOSequence` functions + in `kernel/task_usermem.go` are convenience functions for doing so. diff --git a/pkg/usermem/access_type.go b/pkg/usermem/access_type.go new file mode 100644 index 000000000..9c1742a59 --- /dev/null +++ b/pkg/usermem/access_type.go @@ -0,0 +1,128 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package usermem + +import ( + "syscall" +) + +// AccessType specifies memory access types. This is used for +// setting mapping permissions, as well as communicating faults. +// +// +stateify savable +type AccessType struct { + // Read is read access. + Read bool + + // Write is write access. + Write bool + + // Execute is executable access. + Execute bool +} + +// String returns a pretty representation of access. This looks like the +// familiar r-x, rw-, etc. and can be relied on as such. +func (a AccessType) String() string { + bits := [3]byte{'-', '-', '-'} + if a.Read { + bits[0] = 'r' + } + if a.Write { + bits[1] = 'w' + } + if a.Execute { + bits[2] = 'x' + } + return string(bits[:]) +} + +// Any returns true iff at least one of Read, Write or Execute is true. +func (a AccessType) Any() bool { + return a.Read || a.Write || a.Execute +} + +// Prot returns the system prot (syscall.PROT_READ, etc.) for this access. +func (a AccessType) Prot() int { + var prot int + if a.Read { + prot |= syscall.PROT_READ + } + if a.Write { + prot |= syscall.PROT_WRITE + } + if a.Execute { + prot |= syscall.PROT_EXEC + } + return prot +} + +// SupersetOf returns true iff the access types in a are a superset of the +// access types in other. +func (a AccessType) SupersetOf(other AccessType) bool { + if !a.Read && other.Read { + return false + } + if !a.Write && other.Write { + return false + } + if !a.Execute && other.Execute { + return false + } + return true +} + +// Intersect returns the access types set in both a and other. +func (a AccessType) Intersect(other AccessType) AccessType { + return AccessType{ + Read: a.Read && other.Read, + Write: a.Write && other.Write, + Execute: a.Execute && other.Execute, + } +} + +// Union returns the access types set in either a or other. +func (a AccessType) Union(other AccessType) AccessType { + return AccessType{ + Read: a.Read || other.Read, + Write: a.Write || other.Write, + Execute: a.Execute || other.Execute, + } +} + +// Effective returns the set of effective access types allowed by a, even if +// some types are not explicitly allowed. +func (a AccessType) Effective() AccessType { + // In Linux, Write and Execute access generally imply Read access. See + // mm/mmap.c:protection_map. + // + // The notable exception is get_user_pages, which only checks against + // the original vma flags. That said, most user memory accesses do not + // use GUP. + if a.Write || a.Execute { + a.Read = true + } + return a +} + +// Convenient access types. +var ( + NoAccess = AccessType{} + Read = AccessType{Read: true} + Write = AccessType{Write: true} + Execute = AccessType{Execute: true} + ReadWrite = AccessType{Read: true, Write: true} + AnyAccess = AccessType{Read: true, Write: true, Execute: true} +) diff --git a/pkg/usermem/addr.go b/pkg/usermem/addr.go new file mode 100644 index 000000000..e79210804 --- /dev/null +++ b/pkg/usermem/addr.go @@ -0,0 +1,108 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package usermem + +import ( + "fmt" +) + +// Addr represents a generic virtual address. +// +// +stateify savable +type Addr uintptr + +// AddLength adds the given length to start and returns the result. ok is true +// iff adding the length did not overflow the range of Addr. +// +// Note: This function is usually used to get the end of an address range +// defined by its start address and length. Since the resulting end is +// exclusive, end == 0 is technically valid, and corresponds to a range that +// extends to the end of the address space, but ok will be false. This isn't +// expected to ever come up in practice. +func (v Addr) AddLength(length uint64) (end Addr, ok bool) { + end = v + Addr(length) + // The second half of the following check is needed in case uintptr is + // smaller than 64 bits. + ok = end >= v && length <= uint64(^Addr(0)) + return +} + +// RoundDown returns the address rounded down to the nearest page boundary. +func (v Addr) RoundDown() Addr { + return v & ^Addr(PageSize-1) +} + +// RoundUp returns the address rounded up to the nearest page boundary. ok is +// true iff rounding up did not wrap around. +func (v Addr) RoundUp() (addr Addr, ok bool) { + addr = Addr(v + PageSize - 1).RoundDown() + ok = addr >= v + return +} + +// MustRoundUp is equivalent to RoundUp, but panics if rounding up wraps +// around. +func (v Addr) MustRoundUp() Addr { + addr, ok := v.RoundUp() + if !ok { + panic(fmt.Sprintf("usermem.Addr(%d).RoundUp() wraps", v)) + } + return addr +} + +// HugeRoundDown returns the address rounded down to the nearest huge page +// boundary. +func (v Addr) HugeRoundDown() Addr { + return v & ^Addr(HugePageSize-1) +} + +// HugeRoundUp returns the address rounded up to the nearest huge page boundary. +// ok is true iff rounding up did not wrap around. +func (v Addr) HugeRoundUp() (addr Addr, ok bool) { + addr = Addr(v + HugePageSize - 1).HugeRoundDown() + ok = addr >= v + return +} + +// PageOffset returns the offset of v into the current page. +func (v Addr) PageOffset() uint64 { + return uint64(v & Addr(PageSize-1)) +} + +// IsPageAligned returns true if v.PageOffset() == 0. +func (v Addr) IsPageAligned() bool { + return v.PageOffset() == 0 +} + +// AddrRange is a range of Addrs. +// +// type AddrRange + +// ToRange returns [v, v+length). +func (v Addr) ToRange(length uint64) (AddrRange, bool) { + end, ok := v.AddLength(length) + return AddrRange{v, end}, ok +} + +// IsPageAligned returns true if ar.Start.IsPageAligned() and +// ar.End.IsPageAligned(). +func (ar AddrRange) IsPageAligned() bool { + return ar.Start.IsPageAligned() && ar.End.IsPageAligned() +} + +// String implements fmt.Stringer.String. +func (ar AddrRange) String() string { + return fmt.Sprintf("[%#x, %#x)", ar.Start, ar.End) +} diff --git a/pkg/usermem/addr_range_seq_test.go b/pkg/usermem/addr_range_seq_test.go new file mode 100644 index 000000000..82f735026 --- /dev/null +++ b/pkg/usermem/addr_range_seq_test.go @@ -0,0 +1,197 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package usermem + +import ( + "testing" +) + +var addrRangeSeqTests = []struct { + desc string + ranges []AddrRange +}{ + { + desc: "Empty sequence", + }, + { + desc: "Single empty AddrRange", + ranges: []AddrRange{ + {0x10, 0x10}, + }, + }, + { + desc: "Single non-empty AddrRange of length 1", + ranges: []AddrRange{ + {0x10, 0x11}, + }, + }, + { + desc: "Single non-empty AddrRange of length 2", + ranges: []AddrRange{ + {0x10, 0x12}, + }, + }, + { + desc: "Multiple non-empty AddrRanges", + ranges: []AddrRange{ + {0x10, 0x11}, + {0x20, 0x22}, + }, + }, + { + desc: "Multiple AddrRanges including empty AddrRanges", + ranges: []AddrRange{ + {0x10, 0x10}, + {0x20, 0x20}, + {0x30, 0x33}, + {0x40, 0x44}, + {0x50, 0x50}, + {0x60, 0x60}, + {0x70, 0x77}, + {0x80, 0x88}, + {0x90, 0x90}, + {0xa0, 0xa0}, + }, + }, +} + +func testAddrRangeSeqEqualityWithTailIteration(t *testing.T, ars AddrRangeSeq, wantRanges []AddrRange) { + var wantLen int64 + for _, ar := range wantRanges { + wantLen += int64(ar.Length()) + } + + var i int + for !ars.IsEmpty() { + if gotLen := ars.NumBytes(); gotLen != wantLen { + t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d", i, ars, gotLen, wantLen) + } + if gotN, wantN := ars.NumRanges(), len(wantRanges)-i; gotN != wantN { + t.Errorf("Iteration %d: %v.NumRanges(): got %d, wanted %d", i, ars, gotN, wantN) + } + got := ars.Head() + if i >= len(wantRanges) { + t.Errorf("Iteration %d: %v.Head(): got %s, wanted ", i, ars, got) + } else if want := wantRanges[i]; got != want { + t.Errorf("Iteration %d: %v.Head(): got %s, wanted %s", i, ars, got, want) + } + ars = ars.Tail() + wantLen -= int64(got.Length()) + i++ + } + if gotLen := ars.NumBytes(); gotLen != 0 || wantLen != 0 { + t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d (which should be 0)", i, ars, gotLen, wantLen) + } + if gotN := ars.NumRanges(); gotN != 0 { + t.Errorf("Iteration %d: %v.NumRanges(): got %d, wanted 0", i, ars, gotN) + } +} + +func TestAddrRangeSeqTailIteration(t *testing.T) { + for _, test := range addrRangeSeqTests { + t.Run(test.desc, func(t *testing.T) { + testAddrRangeSeqEqualityWithTailIteration(t, AddrRangeSeqFromSlice(test.ranges), test.ranges) + }) + } +} + +func TestAddrRangeSeqDropFirstEmpty(t *testing.T) { + var ars AddrRangeSeq + if got, want := ars.DropFirst(1), ars; got != want { + t.Errorf("%v.DropFirst(1): got %v, wanted %v", ars, got, want) + } +} + +func TestAddrRangeSeqDropSingleByteIteration(t *testing.T) { + // Tests AddrRangeSeq iteration using Head/DropFirst, simulating + // I/O-per-AddrRange. + for _, test := range addrRangeSeqTests { + t.Run(test.desc, func(t *testing.T) { + // Figure out what AddrRanges we expect to see. + var wantLen int64 + var wantRanges []AddrRange + for _, ar := range test.ranges { + wantLen += int64(ar.Length()) + wantRanges = append(wantRanges, ar) + if ar.Length() == 0 { + // We "do" 0 bytes of I/O and then call DropFirst(0), + // advancing to the next AddrRange. + continue + } + // Otherwise we "do" 1 byte of I/O and then call DropFirst(1), + // advancing the AddrRange by 1 byte, or to the next AddrRange + // if this one is exhausted. + for ar.Start++; ar.Length() != 0; ar.Start++ { + wantRanges = append(wantRanges, ar) + } + } + t.Logf("Expected AddrRanges: %s (%d bytes)", wantRanges, wantLen) + + ars := AddrRangeSeqFromSlice(test.ranges) + var i int + for !ars.IsEmpty() { + if gotLen := ars.NumBytes(); gotLen != wantLen { + t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d", i, ars, gotLen, wantLen) + } + got := ars.Head() + if i >= len(wantRanges) { + t.Errorf("Iteration %d: %v.Head(): got %s, wanted ", i, ars, got) + } else if want := wantRanges[i]; got != want { + t.Errorf("Iteration %d: %v.Head(): got %s, wanted %s", i, ars, got, want) + } + if got.Length() == 0 { + ars = ars.DropFirst(0) + } else { + ars = ars.DropFirst(1) + wantLen-- + } + i++ + } + if gotLen := ars.NumBytes(); gotLen != 0 || wantLen != 0 { + t.Errorf("Iteration %d: %v.NumBytes(): got %d, wanted %d (which should be 0)", i, ars, gotLen, wantLen) + } + }) + } +} + +func TestAddrRangeSeqTakeFirstEmpty(t *testing.T) { + var ars AddrRangeSeq + if got, want := ars.TakeFirst(1), ars; got != want { + t.Errorf("%v.TakeFirst(1): got %v, wanted %v", ars, got, want) + } +} + +func TestAddrRangeSeqTakeFirst(t *testing.T) { + ranges := []AddrRange{ + {0x10, 0x11}, + {0x20, 0x22}, + {0x30, 0x30}, + {0x40, 0x44}, + {0x50, 0x55}, + {0x60, 0x60}, + {0x70, 0x77}, + } + ars := AddrRangeSeqFromSlice(ranges).TakeFirst(5) + want := []AddrRange{ + {0x10, 0x11}, // +1 byte (total 1 byte), not truncated + {0x20, 0x22}, // +2 bytes (total 3 bytes), not truncated + {0x30, 0x30}, // +0 bytes (total 3 bytes), no change + {0x40, 0x42}, // +2 bytes (total 5 bytes), partially truncated + {0x50, 0x50}, // +0 bytes (total 5 bytes), fully truncated + {0x60, 0x60}, // +0 bytes (total 5 bytes), "fully truncated" (no change) + {0x70, 0x70}, // +0 bytes (total 5 bytes), fully truncated + } + testAddrRangeSeqEqualityWithTailIteration(t, ars, want) +} diff --git a/pkg/usermem/addr_range_seq_unsafe.go b/pkg/usermem/addr_range_seq_unsafe.go new file mode 100644 index 000000000..c09337c15 --- /dev/null +++ b/pkg/usermem/addr_range_seq_unsafe.go @@ -0,0 +1,277 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package usermem + +import ( + "bytes" + "fmt" + "reflect" + "unsafe" +) + +// An AddrRangeSeq represents a sequence of AddrRanges. +// +// AddrRangeSeqs are immutable and may be copied by value. The zero value of +// AddrRangeSeq represents an empty sequence. +// +// An AddrRangeSeq may contain AddrRanges with a length of 0. This is necessary +// since zero-length AddrRanges are significant to MM bounds checks. +type AddrRangeSeq struct { + // If length is 0, then the AddrRangeSeq represents no AddrRanges. + // Invariants: data == 0; offset == 0; limit == 0. + // + // If length is 1, then the AddrRangeSeq represents the single + // AddrRange{offset, offset+limit}. Invariants: data == 0. + // + // Otherwise, length >= 2, and the AddrRangeSeq represents the `length` + // AddrRanges in the array of AddrRanges starting at address `data`, + // starting at `offset` bytes into the first AddrRange and limited to the + // following `limit` bytes. (AddrRanges after `limit` are still iterated, + // but are truncated to a length of 0.) Invariants: data != 0; offset <= + // data[0].Length(); limit > 0; offset+limit <= the combined length of all + // AddrRanges in the array. + data unsafe.Pointer + length int + offset Addr + limit Addr +} + +// AddrRangeSeqOf returns an AddrRangeSeq representing the single AddrRange ar. +func AddrRangeSeqOf(ar AddrRange) AddrRangeSeq { + return AddrRangeSeq{ + length: 1, + offset: ar.Start, + limit: ar.Length(), + } +} + +// AddrRangeSeqFromSlice returns an AddrRangeSeq representing all AddrRanges in +// slice. +// +// Whether the returned AddrRangeSeq shares memory with slice is unspecified; +// clients should avoid mutating slices passed to AddrRangeSeqFromSlice. +// +// Preconditions: The combined length of all AddrRanges in slice <= +// math.MaxInt64. +func AddrRangeSeqFromSlice(slice []AddrRange) AddrRangeSeq { + var limit int64 + for _, ar := range slice { + len64 := int64(ar.Length()) + if len64 < 0 { + panic(fmt.Sprintf("Length of AddrRange %v overflows int64", ar)) + } + sum := limit + len64 + if sum < limit { + panic(fmt.Sprintf("Total length of AddrRanges %v overflows int64", slice)) + } + limit = sum + } + return addrRangeSeqFromSliceLimited(slice, limit) +} + +// Preconditions: The combined length of all AddrRanges in slice <= limit. +// limit >= 0. If len(slice) != 0, then limit > 0. +func addrRangeSeqFromSliceLimited(slice []AddrRange, limit int64) AddrRangeSeq { + switch len(slice) { + case 0: + return AddrRangeSeq{} + case 1: + return AddrRangeSeq{ + length: 1, + offset: slice[0].Start, + limit: Addr(limit), + } + default: + return AddrRangeSeq{ + data: unsafe.Pointer(&slice[0]), + length: len(slice), + limit: Addr(limit), + } + } +} + +// IsEmpty returns true if ars.NumRanges() == 0. +// +// Note that since AddrRangeSeq may contain AddrRanges with a length of zero, +// an AddrRange representing 0 bytes (AddrRangeSeq.NumBytes() == 0) is not +// necessarily empty. +func (ars AddrRangeSeq) IsEmpty() bool { + return ars.length == 0 +} + +// NumRanges returns the number of AddrRanges in ars. +func (ars AddrRangeSeq) NumRanges() int { + return ars.length +} + +// NumBytes returns the number of bytes represented by ars. +func (ars AddrRangeSeq) NumBytes() int64 { + return int64(ars.limit) +} + +// Head returns the first AddrRange in ars. +// +// Preconditions: !ars.IsEmpty(). +func (ars AddrRangeSeq) Head() AddrRange { + if ars.length == 0 { + panic("empty AddrRangeSeq") + } + if ars.length == 1 { + return AddrRange{ars.offset, ars.offset + ars.limit} + } + ar := *(*AddrRange)(ars.data) + ar.Start += ars.offset + if ar.Length() > ars.limit { + ar.End = ar.Start + ars.limit + } + return ar +} + +// Tail returns an AddrRangeSeq consisting of all AddrRanges in ars after the +// first. +// +// Preconditions: !ars.IsEmpty(). +func (ars AddrRangeSeq) Tail() AddrRangeSeq { + if ars.length == 0 { + panic("empty AddrRangeSeq") + } + if ars.length == 1 { + return AddrRangeSeq{} + } + return ars.externalTail() +} + +// Preconditions: ars.length >= 2. +func (ars AddrRangeSeq) externalTail() AddrRangeSeq { + headLen := (*AddrRange)(ars.data).Length() - ars.offset + var tailLimit int64 + if ars.limit > headLen { + tailLimit = int64(ars.limit - headLen) + } + var extSlice []AddrRange + extSliceHdr := (*reflect.SliceHeader)(unsafe.Pointer(&extSlice)) + extSliceHdr.Data = uintptr(ars.data) + extSliceHdr.Len = ars.length + extSliceHdr.Cap = ars.length + return addrRangeSeqFromSliceLimited(extSlice[1:], tailLimit) +} + +// DropFirst returns an AddrRangeSeq equivalent to ars, but with the first n +// bytes omitted. If n > ars.NumBytes(), DropFirst returns an empty +// AddrRangeSeq. +// +// If !ars.IsEmpty() and ars.Head().Length() == 0, DropFirst will always omit +// at least ars.Head(), even if n == 0. This guarantees that the basic pattern +// of: +// +// for !ars.IsEmpty() { +// n, err = doIOWith(ars.Head()) +// if err != nil { +// return err +// } +// ars = ars.DropFirst(n) +// } +// +// works even in the presence of zero-length AddrRanges. +// +// Preconditions: n >= 0. +func (ars AddrRangeSeq) DropFirst(n int) AddrRangeSeq { + if n < 0 { + panic(fmt.Sprintf("invalid n: %d", n)) + } + return ars.DropFirst64(int64(n)) +} + +// DropFirst64 is equivalent to DropFirst but takes an int64. +func (ars AddrRangeSeq) DropFirst64(n int64) AddrRangeSeq { + if n < 0 { + panic(fmt.Sprintf("invalid n: %d", n)) + } + if Addr(n) > ars.limit { + return AddrRangeSeq{} + } + // Handle initial empty AddrRange. + switch ars.length { + case 0: + return AddrRangeSeq{} + case 1: + if ars.limit == 0 { + return AddrRangeSeq{} + } + default: + if rawHeadLen := (*AddrRange)(ars.data).Length(); ars.offset == rawHeadLen { + ars = ars.externalTail() + } + } + for n != 0 { + // Calling ars.Head() here is surprisingly expensive, so inline getting + // the head's length. + var headLen Addr + if ars.length == 1 { + headLen = ars.limit + } else { + headLen = (*AddrRange)(ars.data).Length() - ars.offset + } + if Addr(n) < headLen { + // Dropping ends partway through the head AddrRange. + ars.offset += Addr(n) + ars.limit -= Addr(n) + return ars + } + n -= int64(headLen) + ars = ars.Tail() + } + return ars +} + +// TakeFirst returns an AddrRangeSeq equivalent to ars, but iterating at most n +// bytes. TakeFirst never removes AddrRanges from ars; AddrRanges beyond the +// first n bytes are reduced to a length of zero, but will still be iterated. +// +// Preconditions: n >= 0. +func (ars AddrRangeSeq) TakeFirst(n int) AddrRangeSeq { + if n < 0 { + panic(fmt.Sprintf("invalid n: %d", n)) + } + return ars.TakeFirst64(int64(n)) +} + +// TakeFirst64 is equivalent to TakeFirst but takes an int64. +func (ars AddrRangeSeq) TakeFirst64(n int64) AddrRangeSeq { + if n < 0 { + panic(fmt.Sprintf("invalid n: %d", n)) + } + if ars.limit > Addr(n) { + ars.limit = Addr(n) + } + return ars +} + +// String implements fmt.Stringer.String. +func (ars AddrRangeSeq) String() string { + // This is deliberately chosen to be the same as fmt's automatic stringer + // for []AddrRange. + var buf bytes.Buffer + buf.WriteByte('[') + var sep string + for !ars.IsEmpty() { + buf.WriteString(sep) + sep = " " + buf.WriteString(ars.Head().String()) + ars = ars.Tail() + } + buf.WriteByte(']') + return buf.String() +} diff --git a/pkg/usermem/bytes_io.go b/pkg/usermem/bytes_io.go new file mode 100644 index 000000000..e177d30eb --- /dev/null +++ b/pkg/usermem/bytes_io.go @@ -0,0 +1,141 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package usermem + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/syserror" +) + +const maxInt = int(^uint(0) >> 1) + +// BytesIO implements IO using a byte slice. Addresses are interpreted as +// offsets into the slice. Reads and writes beyond the end of the slice return +// EFAULT. +type BytesIO struct { + Bytes []byte +} + +// CopyOut implements IO.CopyOut. +func (b *BytesIO) CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error) { + rngN, rngErr := b.rangeCheck(addr, len(src)) + if rngN == 0 { + return 0, rngErr + } + return copy(b.Bytes[int(addr):], src[:rngN]), rngErr +} + +// CopyIn implements IO.CopyIn. +func (b *BytesIO) CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error) { + rngN, rngErr := b.rangeCheck(addr, len(dst)) + if rngN == 0 { + return 0, rngErr + } + return copy(dst[:rngN], b.Bytes[int(addr):]), rngErr +} + +// ZeroOut implements IO.ZeroOut. +func (b *BytesIO) ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error) { + if toZero > int64(maxInt) { + return 0, syserror.EINVAL + } + rngN, rngErr := b.rangeCheck(addr, int(toZero)) + if rngN == 0 { + return 0, rngErr + } + zeroSlice := b.Bytes[int(addr) : int(addr)+rngN] + for i := range zeroSlice { + zeroSlice[i] = 0 + } + return int64(rngN), rngErr +} + +// CopyOutFrom implements IO.CopyOutFrom. +func (b *BytesIO) CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) { + dsts, rngErr := b.blocksFromAddrRanges(ars) + n, err := src.ReadToBlocks(dsts) + if err != nil { + return int64(n), err + } + return int64(n), rngErr +} + +// CopyInTo implements IO.CopyInTo. +func (b *BytesIO) CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) { + srcs, rngErr := b.blocksFromAddrRanges(ars) + n, err := dst.WriteFromBlocks(srcs) + if err != nil { + return int64(n), err + } + return int64(n), rngErr +} + +func (b *BytesIO) rangeCheck(addr Addr, length int) (int, error) { + if length == 0 { + return 0, nil + } + if length < 0 { + return 0, syserror.EINVAL + } + max := Addr(len(b.Bytes)) + if addr >= max { + return 0, syserror.EFAULT + } + end, ok := addr.AddLength(uint64(length)) + if !ok || end > max { + return int(max - addr), syserror.EFAULT + } + return length, nil +} + +func (b *BytesIO) blocksFromAddrRanges(ars AddrRangeSeq) (safemem.BlockSeq, error) { + switch ars.NumRanges() { + case 0: + return safemem.BlockSeq{}, nil + case 1: + block, err := b.blockFromAddrRange(ars.Head()) + return safemem.BlockSeqOf(block), err + default: + blocks := make([]safemem.Block, 0, ars.NumRanges()) + for !ars.IsEmpty() { + block, err := b.blockFromAddrRange(ars.Head()) + if block.Len() != 0 { + blocks = append(blocks, block) + } + if err != nil { + return safemem.BlockSeqFromSlice(blocks), err + } + ars = ars.Tail() + } + return safemem.BlockSeqFromSlice(blocks), nil + } +} + +func (b *BytesIO) blockFromAddrRange(ar AddrRange) (safemem.Block, error) { + n, err := b.rangeCheck(ar.Start, int(ar.Length())) + if n == 0 { + return safemem.Block{}, err + } + return safemem.BlockFromSafeSlice(b.Bytes[int(ar.Start) : int(ar.Start)+n]), err +} + +// BytesIOSequence returns an IOSequence representing the given byte slice. +func BytesIOSequence(buf []byte) IOSequence { + return IOSequence{ + IO: &BytesIO{buf}, + Addrs: AddrRangeSeqOf(AddrRange{0, Addr(len(buf))}), + } +} diff --git a/pkg/usermem/bytes_io_unsafe.go b/pkg/usermem/bytes_io_unsafe.go new file mode 100644 index 000000000..20de5037d --- /dev/null +++ b/pkg/usermem/bytes_io_unsafe.go @@ -0,0 +1,47 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package usermem + +import ( + "sync/atomic" + "unsafe" + + "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/context" +) + +// SwapUint32 implements IO.SwapUint32. +func (b *BytesIO) SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error) { + if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil { + return 0, rngErr + } + return atomic.SwapUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)])), new), nil +} + +// CompareAndSwapUint32 implements IO.CompareAndSwapUint32. +func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error) { + if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil { + return 0, rngErr + } + return atomicbitops.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)])), old, new), nil +} + +// LoadUint32 implements IO.LoadUint32. +func (b *BytesIO) LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) { + if _, err := b.rangeCheck(addr, 4); err != nil { + return 0, err + } + return atomic.LoadUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)]))), nil +} diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go new file mode 100644 index 000000000..71fd4e155 --- /dev/null +++ b/pkg/usermem/usermem.go @@ -0,0 +1,597 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package usermem governs access to user memory. +package usermem + +import ( + "bytes" + "errors" + "io" + "strconv" + + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/syserror" +) + +// IO provides access to the contents of a virtual memory space. +// +// FIXME(b/38173783): Implementations of IO cannot expect ctx to contain any +// meaningful data. +type IO interface { + // CopyOut copies len(src) bytes from src to the memory mapped at addr. It + // returns the number of bytes copied. If the number of bytes copied is < + // len(src), it returns a non-nil error explaining why. + // + // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or + // any following locks in the lock order. + // + // Postconditions: CopyOut does not retain src. + CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error) + + // CopyIn copies len(dst) bytes from the memory mapped at addr to dst. + // It returns the number of bytes copied. If the number of bytes copied is + // < len(dst), it returns a non-nil error explaining why. + // + // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or + // any following locks in the lock order. + // + // Postconditions: CopyIn does not retain dst. + CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error) + + // ZeroOut sets toZero bytes to 0, starting at addr. It returns the number + // of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a + // non-nil error explaining why. + // + // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or + // any following locks in the lock order. toZero >= 0. + ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error) + + // CopyOutFrom copies ars.NumBytes() bytes from src to the memory mapped at + // ars. It returns the number of bytes copied, which may be less than the + // number of bytes read from src if copying fails. CopyOutFrom may return a + // partial copy without an error iff src.ReadToBlocks returns a partial + // read without an error. + // + // CopyOutFrom calls src.ReadToBlocks at most once. + // + // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or + // any following locks in the lock order. src.ReadToBlocks must not block + // on mm.MemoryManager.activeMu or any preceding locks in the lock order. + CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) + + // CopyInTo copies ars.NumBytes() bytes from the memory mapped at ars to + // dst. It returns the number of bytes copied. CopyInTo may return a + // partial copy without an error iff dst.WriteFromBlocks returns a partial + // write without an error. + // + // CopyInTo calls dst.WriteFromBlocks at most once. + // + // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or + // any following locks in the lock order. dst.WriteFromBlocks must not + // block on mm.MemoryManager.activeMu or any preceding locks in the lock + // order. + CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) + + // TODO(jamieliu): The requirement that CopyOutFrom/CopyInTo call src/dst + // at most once, which is unnecessary in most cases, forces implementations + // to gather safemem.Blocks into a single slice to pass to src/dst. Add + // CopyOutFromIter/CopyInToIter, which relaxes this restriction, to avoid + // this allocation. + + // SwapUint32 atomically sets the uint32 value at addr to new and + // returns the previous value. + // + // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or + // any following locks in the lock order. addr must be aligned to a 4-byte + // boundary. + SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error) + + // CompareAndSwapUint32 atomically compares the uint32 value at addr to + // old; if they are equal, the value in memory is replaced by new. In + // either case, the previous value stored in memory is returned. + // + // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or + // any following locks in the lock order. addr must be aligned to a 4-byte + // boundary. + CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error) + + // LoadUint32 atomically loads the uint32 value at addr and returns it. + // + // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or + // any following locks in the lock order. addr must be aligned to a 4-byte + // boundary. + LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) +} + +// IOOpts contains options applicable to all IO methods. +type IOOpts struct { + // If IgnorePermissions is true, application-defined memory protections set + // by mmap(2) or mprotect(2) will be ignored. (Memory protections required + // by the target of the mapping are never ignored.) + IgnorePermissions bool + + // If AddressSpaceActive is true, the IO implementation may assume that it + // has an active AddressSpace and can therefore use AddressSpace copying + // without performing activation. See mm/io.go for details. + AddressSpaceActive bool +} + +// IOReadWriter is an io.ReadWriter that reads from / writes to addresses +// starting at addr in IO. The preconditions that apply to IO.CopyIn and +// IO.CopyOut also apply to IOReadWriter.Read and IOReadWriter.Write +// respectively. +type IOReadWriter struct { + Ctx context.Context + IO IO + Addr Addr + Opts IOOpts +} + +// Read implements io.Reader.Read. +// +// Note that an address space does not have an "end of file", so Read can only +// return io.EOF if IO.CopyIn returns io.EOF. Attempts to read unmapped or +// unreadable memory, or beyond the end of the address space, should return +// EFAULT. +func (rw *IOReadWriter) Read(dst []byte) (int, error) { + n, err := rw.IO.CopyIn(rw.Ctx, rw.Addr, dst, rw.Opts) + end, ok := rw.Addr.AddLength(uint64(n)) + if ok { + rw.Addr = end + } else { + // Disallow wraparound. + rw.Addr = ^Addr(0) + if err != nil { + err = syserror.EFAULT + } + } + return n, err +} + +// Writer implements io.Writer.Write. +func (rw *IOReadWriter) Write(src []byte) (int, error) { + n, err := rw.IO.CopyOut(rw.Ctx, rw.Addr, src, rw.Opts) + end, ok := rw.Addr.AddLength(uint64(n)) + if ok { + rw.Addr = end + } else { + // Disallow wraparound. + rw.Addr = ^Addr(0) + if err != nil { + err = syserror.EFAULT + } + } + return n, err +} + +// CopyObjectOut copies a fixed-size value or slice of fixed-size values from +// src to the memory mapped at addr in uio. It returns the number of bytes +// copied. +// +// CopyObjectOut must use reflection to encode src; performance-sensitive +// clients should do encoding manually and use uio.CopyOut directly. +// +// Preconditions: As for IO.CopyOut. +func CopyObjectOut(ctx context.Context, uio IO, addr Addr, src interface{}, opts IOOpts) (int, error) { + w := &IOReadWriter{ + Ctx: ctx, + IO: uio, + Addr: addr, + Opts: opts, + } + // Allocate a byte slice the size of the object being marshaled. This + // adds an extra reflection call, but avoids needing to grow the slice + // during encoding, which can result in many heap-allocated slices. + b := make([]byte, 0, binary.Size(src)) + return w.Write(binary.Marshal(b, ByteOrder, src)) +} + +// CopyObjectIn copies a fixed-size value or slice of fixed-size values from +// the memory mapped at addr in uio to dst. It returns the number of bytes +// copied. +// +// CopyObjectIn must use reflection to decode dst; performance-sensitive +// clients should use uio.CopyIn directly and do decoding manually. +// +// Preconditions: As for IO.CopyIn. +func CopyObjectIn(ctx context.Context, uio IO, addr Addr, dst interface{}, opts IOOpts) (int, error) { + r := &IOReadWriter{ + Ctx: ctx, + IO: uio, + Addr: addr, + Opts: opts, + } + buf := make([]byte, binary.Size(dst)) + if _, err := io.ReadFull(r, buf); err != nil { + return 0, err + } + binary.Unmarshal(buf, ByteOrder, dst) + return int(r.Addr - addr), nil +} + +// CopyStringIn tuning parameters, defined outside that function for tests. +const ( + copyStringIncrement = 64 + copyStringMaxInitBufLen = 256 +) + +// CopyStringIn copies a NUL-terminated string of unknown length from the +// memory mapped at addr in uio and returns it as a string (not including the +// trailing NUL). If the length of the string, including the terminating NUL, +// would exceed maxlen, CopyStringIn returns the string truncated to maxlen and +// ENAMETOOLONG. +// +// Preconditions: As for IO.CopyFromUser. maxlen >= 0. +func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpts) (string, error) { + initLen := maxlen + if initLen > copyStringMaxInitBufLen { + initLen = copyStringMaxInitBufLen + } + buf := make([]byte, initLen) + var done int + for done < maxlen { + // Read up to copyStringIncrement bytes at a time. + readlen := copyStringIncrement + if readlen > maxlen-done { + readlen = maxlen - done + } + end, ok := addr.AddLength(uint64(readlen)) + if !ok { + return stringFromImmutableBytes(buf[:done]), syserror.EFAULT + } + // Shorten the read to avoid crossing page boundaries, since faulting + // in a page unnecessarily is expensive. This also ensures that partial + // copies up to the end of application-mappable memory succeed. + if addr.RoundDown() != end.RoundDown() { + end = end.RoundDown() + readlen = int(end - addr) + } + // Ensure that our buffer is large enough to accommodate the read. + if done+readlen > len(buf) { + newBufLen := len(buf) * 2 + if newBufLen > maxlen { + newBufLen = maxlen + } + buf = append(buf, make([]byte, newBufLen-len(buf))...) + } + n, err := uio.CopyIn(ctx, addr, buf[done:done+readlen], opts) + // Look for the terminating zero byte, which may have occurred before + // hitting err. + if i := bytes.IndexByte(buf[done:done+n], byte(0)); i >= 0 { + return stringFromImmutableBytes(buf[:done+i]), nil + } + + done += n + if err != nil { + return stringFromImmutableBytes(buf[:done]), err + } + addr = end + } + return stringFromImmutableBytes(buf), syserror.ENAMETOOLONG +} + +// CopyOutVec copies bytes from src to the memory mapped at ars in uio. The +// maximum number of bytes copied is ars.NumBytes() or len(src), whichever is +// less. CopyOutVec returns the number of bytes copied; if this is less than +// the maximum, it returns a non-nil error explaining why. +// +// Preconditions: As for IO.CopyOut. +func CopyOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, src []byte, opts IOOpts) (int, error) { + var done int + for !ars.IsEmpty() && done < len(src) { + ar := ars.Head() + cplen := len(src) - done + if Addr(cplen) >= ar.Length() { + cplen = int(ar.Length()) + } + n, err := uio.CopyOut(ctx, ar.Start, src[done:done+cplen], opts) + done += n + if err != nil { + return done, err + } + ars = ars.DropFirst(n) + } + return done, nil +} + +// CopyInVec copies bytes from the memory mapped at ars in uio to dst. The +// maximum number of bytes copied is ars.NumBytes() or len(dst), whichever is +// less. CopyInVec returns the number of bytes copied; if this is less than the +// maximum, it returns a non-nil error explaining why. +// +// Preconditions: As for IO.CopyIn. +func CopyInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst []byte, opts IOOpts) (int, error) { + var done int + for !ars.IsEmpty() && done < len(dst) { + ar := ars.Head() + cplen := len(dst) - done + if Addr(cplen) >= ar.Length() { + cplen = int(ar.Length()) + } + n, err := uio.CopyIn(ctx, ar.Start, dst[done:done+cplen], opts) + done += n + if err != nil { + return done, err + } + ars = ars.DropFirst(n) + } + return done, nil +} + +// ZeroOutVec writes zeroes to the memory mapped at ars in uio. The maximum +// number of bytes written is ars.NumBytes() or toZero, whichever is less. +// ZeroOutVec returns the number of bytes written; if this is less than the +// maximum, it returns a non-nil error explaining why. +// +// Preconditions: As for IO.ZeroOut. +func ZeroOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, toZero int64, opts IOOpts) (int64, error) { + var done int64 + for !ars.IsEmpty() && done < toZero { + ar := ars.Head() + cplen := toZero - done + if Addr(cplen) >= ar.Length() { + cplen = int64(ar.Length()) + } + n, err := uio.ZeroOut(ctx, ar.Start, cplen, opts) + done += n + if err != nil { + return done, err + } + ars = ars.DropFirst64(n) + } + return done, nil +} + +func isASCIIWhitespace(b byte) bool { + // Compare Linux include/linux/ctype.h, lib/ctype.c. + // 9 => horizontal tab '\t' + // 10 => line feed '\n' + // 11 => vertical tab '\v' + // 12 => form feed '\c' + // 13 => carriage return '\r' + return b == ' ' || (b >= 9 && b <= 13) +} + +// CopyInt32StringsInVec copies up to len(dsts) whitespace-separated decimal +// strings from the memory mapped at ars in uio and converts them to int32 +// values in dsts. It returns the number of bytes read. +// +// CopyInt32StringsInVec shares the following properties with Linux's +// kernel/sysctl.c:proc_dointvec(write=1): +// +// - If any read value overflows the range of int32, or any invalid characters +// are encountered during the read, CopyInt32StringsInVec returns EINVAL. +// +// - If, upon reaching the end of ars, fewer than len(dsts) values have been +// read, CopyInt32StringsInVec returns no error if at least 1 value was read +// and EINVAL otherwise. +// +// - Trailing whitespace after the last successfully read value is counted in +// the number of bytes read. +// +// Unlike proc_dointvec(): +// +// - CopyInt32StringsInVec does not implicitly limit ars.NumBytes() to +// PageSize-1; callers that require this must do so explicitly. +// +// - CopyInt32StringsInVec returns EINVAL if ars.NumBytes() == 0. +// +// Preconditions: As for CopyInVec. +func CopyInt32StringsInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dsts []int32, opts IOOpts) (int64, error) { + if len(dsts) == 0 { + return 0, nil + } + + buf := make([]byte, ars.NumBytes()) + n, cperr := CopyInVec(ctx, uio, ars, buf, opts) + buf = buf[:n] + + var i, j int + for ; j < len(dsts); j++ { + // Skip leading whitespace. + for i < len(buf) && isASCIIWhitespace(buf[i]) { + i++ + } + if i == len(buf) { + break + } + + // Find the end of the value to be parsed (next whitespace or end of string). + nextI := i + 1 + for nextI < len(buf) && !isASCIIWhitespace(buf[nextI]) { + nextI++ + } + + // Parse a single value. + val, err := strconv.ParseInt(string(buf[i:nextI]), 10, 32) + if err != nil { + return int64(i), syserror.EINVAL + } + dsts[j] = int32(val) + + i = nextI + } + + // Skip trailing whitespace. + for i < len(buf) && isASCIIWhitespace(buf[i]) { + i++ + } + + if cperr != nil { + return int64(i), cperr + } + if j == 0 { + return int64(i), syserror.EINVAL + } + return int64(i), nil +} + +// CopyInt32StringInVec is equivalent to CopyInt32StringsInVec, but copies at +// most one int32. +func CopyInt32StringInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst *int32, opts IOOpts) (int64, error) { + dsts := [1]int32{*dst} + n, err := CopyInt32StringsInVec(ctx, uio, ars, dsts[:], opts) + *dst = dsts[0] + return n, err +} + +// IOSequence holds arguments to IO methods. +type IOSequence struct { + IO IO + Addrs AddrRangeSeq + Opts IOOpts +} + +// NumBytes returns s.Addrs.NumBytes(). +// +// Note that NumBytes() may return 0 even if !s.Addrs.IsEmpty(), since +// s.Addrs may contain a non-zero number of zero-length AddrRanges. +// Many clients of +// IOSequence currently do something like: +// +// if ioseq.NumBytes() == 0 { +// return 0, nil +// } +// if f.availableBytes == 0 { +// return 0, syserror.ErrWouldBlock +// } +// return ioseq.CopyOutFrom(..., reader) +// +// In such cases, using s.Addrs.IsEmpty() will cause them to have the wrong +// behavior for zero-length I/O. However, using s.NumBytes() == 0 instead means +// that we will return success for zero-length I/O in cases where Linux would +// return EFAULT due to a failed access_ok() check, so in the long term we +// should move checks for ErrWouldBlock etc. into the body of +// reader.ReadToBlocks and use s.Addrs.IsEmpty() instead. +func (s IOSequence) NumBytes() int64 { + return s.Addrs.NumBytes() +} + +// DropFirst returns a copy of s with s.Addrs.DropFirst(n). +// +// Preconditions: As for AddrRangeSeq.DropFirst. +func (s IOSequence) DropFirst(n int) IOSequence { + return IOSequence{s.IO, s.Addrs.DropFirst(n), s.Opts} +} + +// DropFirst64 returns a copy of s with s.Addrs.DropFirst64(n). +// +// Preconditions: As for AddrRangeSeq.DropFirst64. +func (s IOSequence) DropFirst64(n int64) IOSequence { + return IOSequence{s.IO, s.Addrs.DropFirst64(n), s.Opts} +} + +// TakeFirst returns a copy of s with s.Addrs.TakeFirst(n). +// +// Preconditions: As for AddrRangeSeq.TakeFirst. +func (s IOSequence) TakeFirst(n int) IOSequence { + return IOSequence{s.IO, s.Addrs.TakeFirst(n), s.Opts} +} + +// TakeFirst64 returns a copy of s with s.Addrs.TakeFirst64(n). +// +// Preconditions: As for AddrRangeSeq.TakeFirst64. +func (s IOSequence) TakeFirst64(n int64) IOSequence { + return IOSequence{s.IO, s.Addrs.TakeFirst64(n), s.Opts} +} + +// CopyOut invokes CopyOutVec over s.Addrs. +// +// As with CopyOutVec, if s.NumBytes() < len(src), the copy will be truncated +// to s.NumBytes(), and a nil error will be returned. +// +// Preconditions: As for CopyOutVec. +func (s IOSequence) CopyOut(ctx context.Context, src []byte) (int, error) { + return CopyOutVec(ctx, s.IO, s.Addrs, src, s.Opts) +} + +// CopyIn invokes CopyInVec over s.Addrs. +// +// As with CopyInVec, if s.NumBytes() < len(dst), the copy will be truncated to +// s.NumBytes(), and a nil error will be returned. +// +// Preconditions: As for CopyInVec. +func (s IOSequence) CopyIn(ctx context.Context, dst []byte) (int, error) { + return CopyInVec(ctx, s.IO, s.Addrs, dst, s.Opts) +} + +// ZeroOut invokes ZeroOutVec over s.Addrs. +// +// As with ZeroOutVec, if s.NumBytes() < toZero, the write will be truncated +// to s.NumBytes(), and a nil error will be returned. +// +// Preconditions: As for ZeroOutVec. +func (s IOSequence) ZeroOut(ctx context.Context, toZero int64) (int64, error) { + return ZeroOutVec(ctx, s.IO, s.Addrs, toZero, s.Opts) +} + +// CopyOutFrom invokes s.CopyOutFrom over s.Addrs. +// +// Preconditions: As for IO.CopyOutFrom. +func (s IOSequence) CopyOutFrom(ctx context.Context, src safemem.Reader) (int64, error) { + return s.IO.CopyOutFrom(ctx, s.Addrs, src, s.Opts) +} + +// CopyInTo invokes s.CopyInTo over s.Addrs. +// +// Preconditions: As for IO.CopyInTo. +func (s IOSequence) CopyInTo(ctx context.Context, dst safemem.Writer) (int64, error) { + return s.IO.CopyInTo(ctx, s.Addrs, dst, s.Opts) +} + +// Reader returns an io.Reader that reads from s. Reads beyond the end of s +// return io.EOF. The preconditions that apply to s.CopyIn also apply to the +// returned io.Reader.Read. +func (s IOSequence) Reader(ctx context.Context) io.Reader { + return &ioSequenceReadWriter{ctx, s} +} + +// Writer returns an io.Writer that writes to s. Writes beyond the end of s +// return ErrEndOfIOSequence. The preconditions that apply to s.CopyOut also +// apply to the returned io.Writer.Write. +func (s IOSequence) Writer(ctx context.Context) io.Writer { + return &ioSequenceReadWriter{ctx, s} +} + +// ErrEndOfIOSequence is returned by IOSequence.Writer().Write() when +// attempting to write beyond the end of the IOSequence. +var ErrEndOfIOSequence = errors.New("write beyond end of IOSequence") + +type ioSequenceReadWriter struct { + ctx context.Context + s IOSequence +} + +// Read implements io.Reader.Read. +func (rw *ioSequenceReadWriter) Read(dst []byte) (int, error) { + n, err := rw.s.CopyIn(rw.ctx, dst) + rw.s = rw.s.DropFirst(n) + if err == nil && rw.s.NumBytes() == 0 { + err = io.EOF + } + return n, err +} + +// Write implements io.Writer.Write. +func (rw *ioSequenceReadWriter) Write(src []byte) (int, error) { + n, err := rw.s.CopyOut(rw.ctx, src) + rw.s = rw.s.DropFirst(n) + if err == nil && n < len(src) { + err = ErrEndOfIOSequence + } + return n, err +} diff --git a/pkg/usermem/usermem_arm64.go b/pkg/usermem/usermem_arm64.go new file mode 100644 index 000000000..fdfc30a66 --- /dev/null +++ b/pkg/usermem/usermem_arm64.go @@ -0,0 +1,53 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package usermem + +import ( + "encoding/binary" + "syscall" +) + +const ( + // PageSize is the system page size. + // arm64 support 4K/16K/64K page size, + // which can be get by syscall.Getpagesize(). + // Currently, only 4K page size is supported. + PageSize = 1 << PageShift + + // HugePageSize is the system huge page size. + HugePageSize = 1 << HugePageShift + + // PageShift is the binary log of the system page size. + PageShift = 12 + + // HugePageShift is the binary log of the system huge page size. + // Should be calculated by "PageShift + (PageShift - 3)" + // when multiple page size support is ready. + HugePageShift = 21 +) + +var ( + // ByteOrder is the native byte order (little endian). + ByteOrder = binary.LittleEndian +) + +func init() { + // Make sure the page size is 4K on arm64 platform. + if size := syscall.Getpagesize(); size != PageSize { + panic("Only 4K page size is supported on arm64!") + } +} diff --git a/pkg/usermem/usermem_test.go b/pkg/usermem/usermem_test.go new file mode 100644 index 000000000..bf3c5df2b --- /dev/null +++ b/pkg/usermem/usermem_test.go @@ -0,0 +1,424 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package usermem + +import ( + "bytes" + "encoding/binary" + "fmt" + "reflect" + "strings" + "testing" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/syserror" +) + +// newContext returns a context.Context that we can use in these tests (we +// can't use contexttest because it depends on usermem). +func newContext() context.Context { + return context.Background() +} + +func newBytesIOString(s string) *BytesIO { + return &BytesIO{[]byte(s)} +} + +func TestBytesIOCopyOutSuccess(t *testing.T) { + b := newBytesIOString("ABCDE") + n, err := b.CopyOut(newContext(), 1, []byte("foo"), IOOpts{}) + if wantN := 3; n != wantN || err != nil { + t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := b.Bytes, []byte("AfooE"); !bytes.Equal(got, want) { + t.Errorf("Bytes: got %q, wanted %q", got, want) + } +} + +func TestBytesIOCopyOutFailure(t *testing.T) { + b := newBytesIOString("ABC") + n, err := b.CopyOut(newContext(), 1, []byte("foo"), IOOpts{}) + if wantN, wantErr := 2, syserror.EFAULT; n != wantN || err != wantErr { + t.Errorf("CopyOut: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) + } + if got, want := b.Bytes, []byte("Afo"); !bytes.Equal(got, want) { + t.Errorf("Bytes: got %q, wanted %q", got, want) + } +} + +func TestBytesIOCopyInSuccess(t *testing.T) { + b := newBytesIOString("AfooE") + var dst [3]byte + n, err := b.CopyIn(newContext(), 1, dst[:], IOOpts{}) + if wantN := 3; n != wantN || err != nil { + t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := dst[:], []byte("foo"); !bytes.Equal(got, want) { + t.Errorf("dst: got %q, wanted %q", got, want) + } +} + +func TestBytesIOCopyInFailure(t *testing.T) { + b := newBytesIOString("Afo") + var dst [3]byte + n, err := b.CopyIn(newContext(), 1, dst[:], IOOpts{}) + if wantN, wantErr := 2, syserror.EFAULT; n != wantN || err != wantErr { + t.Errorf("CopyIn: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) + } + if got, want := dst[:], []byte("fo\x00"); !bytes.Equal(got, want) { + t.Errorf("dst: got %q, wanted %q", got, want) + } +} + +func TestBytesIOZeroOutSuccess(t *testing.T) { + b := newBytesIOString("ABCD") + n, err := b.ZeroOut(newContext(), 1, 2, IOOpts{}) + if wantN := int64(2); n != wantN || err != nil { + t.Errorf("ZeroOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := b.Bytes, []byte("A\x00\x00D"); !bytes.Equal(got, want) { + t.Errorf("Bytes: got %q, wanted %q", got, want) + } +} + +func TestBytesIOZeroOutFailure(t *testing.T) { + b := newBytesIOString("ABC") + n, err := b.ZeroOut(newContext(), 1, 3, IOOpts{}) + if wantN, wantErr := int64(2), syserror.EFAULT; n != wantN || err != wantErr { + t.Errorf("ZeroOut: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) + } + if got, want := b.Bytes, []byte("A\x00\x00"); !bytes.Equal(got, want) { + t.Errorf("Bytes: got %q, wanted %q", got, want) + } +} + +func TestBytesIOCopyOutFromSuccess(t *testing.T) { + b := newBytesIOString("ABCDEFGH") + n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{ + {Start: 4, End: 7}, + {Start: 1, End: 4}, + }), safemem.FromIOReader{bytes.NewBufferString("barfoo")}, IOOpts{}) + if wantN := int64(6); n != wantN || err != nil { + t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := b.Bytes, []byte("AfoobarH"); !bytes.Equal(got, want) { + t.Errorf("Bytes: got %q, wanted %q", got, want) + } +} + +func TestBytesIOCopyOutFromFailure(t *testing.T) { + b := newBytesIOString("ABCDE") + n, err := b.CopyOutFrom(newContext(), AddrRangeSeqFromSlice([]AddrRange{ + {Start: 1, End: 4}, + {Start: 4, End: 7}, + }), safemem.FromIOReader{bytes.NewBufferString("foobar")}, IOOpts{}) + if wantN, wantErr := int64(4), syserror.EFAULT; n != wantN || err != wantErr { + t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) + } + if got, want := b.Bytes, []byte("Afoob"); !bytes.Equal(got, want) { + t.Errorf("Bytes: got %q, wanted %q", got, want) + } +} + +func TestBytesIOCopyInToSuccess(t *testing.T) { + b := newBytesIOString("AfoobarH") + var dst bytes.Buffer + n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{ + {Start: 4, End: 7}, + {Start: 1, End: 4}, + }), safemem.FromIOWriter{&dst}, IOOpts{}) + if wantN := int64(6); n != wantN || err != nil { + t.Errorf("CopyInTo: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := dst.Bytes(), []byte("barfoo"); !bytes.Equal(got, want) { + t.Errorf("dst.Bytes(): got %q, wanted %q", got, want) + } +} + +func TestBytesIOCopyInToFailure(t *testing.T) { + b := newBytesIOString("Afoob") + var dst bytes.Buffer + n, err := b.CopyInTo(newContext(), AddrRangeSeqFromSlice([]AddrRange{ + {Start: 1, End: 4}, + {Start: 4, End: 7}, + }), safemem.FromIOWriter{&dst}, IOOpts{}) + if wantN, wantErr := int64(4), syserror.EFAULT; n != wantN || err != wantErr { + t.Errorf("CopyOutFrom: got (%v, %v), wanted (%v, %v)", n, err, wantN, wantErr) + } + if got, want := dst.Bytes(), []byte("foob"); !bytes.Equal(got, want) { + t.Errorf("dst.Bytes(): got %q, wanted %q", got, want) + } +} + +type testStruct struct { + Int8 int8 + Uint8 uint8 + Int16 int16 + Uint16 uint16 + Int32 int32 + Uint32 uint32 + Int64 int64 + Uint64 uint64 +} + +func TestCopyObject(t *testing.T) { + wantObj := testStruct{1, 2, 3, 4, 5, 6, 7, 8} + wantN := binary.Size(wantObj) + b := &BytesIO{make([]byte, wantN)} + ctx := newContext() + if n, err := CopyObjectOut(ctx, b, 0, &wantObj, IOOpts{}); n != wantN || err != nil { + t.Fatalf("CopyObjectOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + var gotObj testStruct + if n, err := CopyObjectIn(ctx, b, 0, &gotObj, IOOpts{}); n != wantN || err != nil { + t.Errorf("CopyObjectIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if gotObj != wantObj { + t.Errorf("CopyObject round trip: got %+v, wanted %+v", gotObj, wantObj) + } +} + +func TestCopyStringInShort(t *testing.T) { + // Tests for string length <= copyStringIncrement. + want := strings.Repeat("A", copyStringIncrement-2) + mem := want + "\x00" + if got, err := CopyStringIn(newContext(), newBytesIOString(mem), 0, 2*copyStringIncrement, IOOpts{}); got != want || err != nil { + t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, nil)", got, err, want) + } +} + +func TestCopyStringInLong(t *testing.T) { + // Tests for copyStringIncrement < string length <= copyStringMaxInitBufLen + // (requiring multiple calls to IO.CopyIn()). + want := strings.Repeat("A", copyStringIncrement*3/4) + strings.Repeat("B", copyStringIncrement*3/4) + mem := want + "\x00" + if got, err := CopyStringIn(newContext(), newBytesIOString(mem), 0, 2*copyStringIncrement, IOOpts{}); got != want || err != nil { + t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, nil)", got, err, want) + } +} + +func TestCopyStringInVeryLong(t *testing.T) { + // Tests for string length > copyStringMaxInitBufLen (requiring buffer + // reallocation). + want := strings.Repeat("A", copyStringMaxInitBufLen*3/4) + strings.Repeat("B", copyStringMaxInitBufLen*3/4) + mem := want + "\x00" + if got, err := CopyStringIn(newContext(), newBytesIOString(mem), 0, 2*copyStringMaxInitBufLen, IOOpts{}); got != want || err != nil { + t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, nil)", got, err, want) + } +} + +func TestCopyStringInNoTerminatingZeroByte(t *testing.T) { + want := strings.Repeat("A", copyStringIncrement-1) + got, err := CopyStringIn(newContext(), newBytesIOString(want), 0, 2*copyStringIncrement, IOOpts{}) + if wantErr := syserror.EFAULT; got != want || err != wantErr { + t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, %v)", got, err, want, wantErr) + } +} + +func TestCopyStringInTruncatedByMaxlen(t *testing.T) { + got, err := CopyStringIn(newContext(), newBytesIOString(strings.Repeat("A", 10)), 0, 5, IOOpts{}) + if want, wantErr := strings.Repeat("A", 5), syserror.ENAMETOOLONG; got != want || err != wantErr { + t.Errorf("CopyStringIn: got (%q, %v), wanted (%q, %v)", got, err, want, wantErr) + } +} + +func TestCopyInt32StringsInVec(t *testing.T) { + for _, test := range []struct { + str string + n int + initial []int32 + final []int32 + }{ + { + str: "100 200", + n: len("100 200"), + initial: []int32{1, 2}, + final: []int32{100, 200}, + }, + { + // Fewer values ok + str: "100", + n: len("100"), + initial: []int32{1, 2}, + final: []int32{100, 2}, + }, + { + // Extra values ok + str: "100 200 300", + n: len("100 200 "), + initial: []int32{1, 2}, + final: []int32{100, 200}, + }, + { + // Leading and trailing whitespace ok + str: " 100\t200\n", + n: len(" 100\t200\n"), + initial: []int32{1, 2}, + final: []int32{100, 200}, + }, + } { + t.Run(fmt.Sprintf("%q", test.str), func(t *testing.T) { + src := BytesIOSequence([]byte(test.str)) + dsts := append([]int32(nil), test.initial...) + if n, err := CopyInt32StringsInVec(newContext(), src.IO, src.Addrs, dsts, src.Opts); n != int64(test.n) || err != nil { + t.Errorf("CopyInt32StringsInVec: got (%d, %v), wanted (%d, nil)", n, err, test.n) + } + if !reflect.DeepEqual(dsts, test.final) { + t.Errorf("dsts: got %v, wanted %v", dsts, test.final) + } + }) + } +} + +func TestCopyInt32StringsInVecRequiresOneValidValue(t *testing.T) { + for _, s := range []string{"", "\n", "a123"} { + t.Run(fmt.Sprintf("%q", s), func(t *testing.T) { + src := BytesIOSequence([]byte(s)) + initial := []int32{1, 2} + dsts := append([]int32(nil), initial...) + if n, err := CopyInt32StringsInVec(newContext(), src.IO, src.Addrs, dsts, src.Opts); err != syserror.EINVAL { + t.Errorf("CopyInt32StringsInVec: got (%d, %v), wanted (_, %v)", n, err, syserror.EINVAL) + } + if !reflect.DeepEqual(dsts, initial) { + t.Errorf("dsts: got %v, wanted %v", dsts, initial) + } + }) + } +} + +func TestIOSequenceCopyOut(t *testing.T) { + buf := []byte("ABCD") + s := BytesIOSequence(buf) + + // CopyOut limited by len(src). + n, err := s.CopyOut(newContext(), []byte("fo")) + if wantN := 2; n != wantN || err != nil { + t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if want := []byte("foCD"); !bytes.Equal(buf, want) { + t.Errorf("buf: got %q, wanted %q", buf, want) + } + s = s.DropFirst(2) + if got, want := s.NumBytes(), int64(2); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } + + // CopyOut limited by s.NumBytes(). + n, err = s.CopyOut(newContext(), []byte("obar")) + if wantN := 2; n != wantN || err != nil { + t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if want := []byte("foob"); !bytes.Equal(buf, want) { + t.Errorf("buf: got %q, wanted %q", buf, want) + } + s = s.DropFirst(2) + if got, want := s.NumBytes(), int64(0); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } +} + +func TestIOSequenceCopyIn(t *testing.T) { + s := BytesIOSequence([]byte("foob")) + dst := []byte("ABCDEF") + + // CopyIn limited by len(dst). + n, err := s.CopyIn(newContext(), dst[:2]) + if wantN := 2; n != wantN || err != nil { + t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if want := []byte("foCDEF"); !bytes.Equal(dst, want) { + t.Errorf("dst: got %q, wanted %q", dst, want) + } + s = s.DropFirst(2) + if got, want := s.NumBytes(), int64(2); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } + + // CopyIn limited by s.Remaining(). + n, err = s.CopyIn(newContext(), dst[2:]) + if wantN := 2; n != wantN || err != nil { + t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if want := []byte("foobEF"); !bytes.Equal(dst, want) { + t.Errorf("dst: got %q, wanted %q", dst, want) + } + s = s.DropFirst(2) + if got, want := s.NumBytes(), int64(0); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } +} + +func TestIOSequenceZeroOut(t *testing.T) { + buf := []byte("ABCD") + s := BytesIOSequence(buf) + + // ZeroOut limited by toZero. + n, err := s.ZeroOut(newContext(), 2) + if wantN := int64(2); n != wantN || err != nil { + t.Errorf("ZeroOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if want := []byte("\x00\x00CD"); !bytes.Equal(buf, want) { + t.Errorf("buf: got %q, wanted %q", buf, want) + } + s = s.DropFirst(2) + if got, want := s.NumBytes(), int64(2); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } + + // ZeroOut limited by s.NumBytes(). + n, err = s.ZeroOut(newContext(), 4) + if wantN := int64(2); n != wantN || err != nil { + t.Errorf("CopyOut: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if want := []byte("\x00\x00\x00\x00"); !bytes.Equal(buf, want) { + t.Errorf("buf: got %q, wanted %q", buf, want) + } + s = s.DropFirst(2) + if got, want := s.NumBytes(), int64(0); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } +} + +func TestIOSequenceTakeFirst(t *testing.T) { + s := BytesIOSequence([]byte("foobar")) + if got, want := s.NumBytes(), int64(6); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } + + s = s.TakeFirst(3) + if got, want := s.NumBytes(), int64(3); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } + + // TakeFirst(n) where n > s.NumBytes() is a no-op. + s = s.TakeFirst(9) + if got, want := s.NumBytes(), int64(3); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } + + var dst [3]byte + n, err := s.CopyIn(newContext(), dst[:]) + if wantN := 3; n != wantN || err != nil { + t.Errorf("CopyIn: got (%v, %v), wanted (%v, nil)", n, err, wantN) + } + if got, want := dst[:], []byte("foo"); !bytes.Equal(got, want) { + t.Errorf("dst: got %q, wanted %q", got, want) + } + s = s.DropFirst(3) + if got, want := s.NumBytes(), int64(0); got != want { + t.Errorf("NumBytes: got %v, wanted %v", got, want) + } +} diff --git a/pkg/usermem/usermem_unsafe.go b/pkg/usermem/usermem_unsafe.go new file mode 100644 index 000000000..876783e78 --- /dev/null +++ b/pkg/usermem/usermem_unsafe.go @@ -0,0 +1,27 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package usermem + +import ( + "unsafe" +) + +// stringFromImmutableBytes is equivalent to string(bs), except that it never +// copies even if escape analysis can't prove that bs does not escape. This is +// only valid if bs is never mutated after stringFromImmutableBytes returns. +func stringFromImmutableBytes(bs []byte) string { + // Compare strings.Builder.String(). + return *(*string)(unsafe.Pointer(&bs)) +} diff --git a/pkg/usermem/usermem_x86.go b/pkg/usermem/usermem_x86.go new file mode 100644 index 000000000..8059b72d2 --- /dev/null +++ b/pkg/usermem/usermem_x86.go @@ -0,0 +1,38 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build amd64 i386 + +package usermem + +import "encoding/binary" + +const ( + // PageSize is the system page size. + PageSize = 1 << PageShift + + // HugePageSize is the system huge page size. + HugePageSize = 1 << HugePageShift + + // PageShift is the binary log of the system page size. + PageShift = 12 + + // HugePageShift is the binary log of the system huge page size. + HugePageShift = 21 +) + +var ( + // ByteOrder is the native byte order (little endian). + ByteOrder = binary.LittleEndian +) diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index f3ebc0231..a96c80261 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -30,6 +30,7 @@ go_library( deps = [ "//pkg/abi", "//pkg/abi/linux", + "//pkg/context", "//pkg/control/server", "//pkg/cpuid", "//pkg/eventchannel", @@ -39,7 +40,6 @@ go_library( "//pkg/refs", "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", - "//pkg/sentry/context", "//pkg/sentry/control", "//pkg/sentry/fs", "//pkg/sentry/fs/dev", @@ -71,7 +71,6 @@ go_library( "//pkg/sentry/time", "//pkg/sentry/unimpl:unimplemented_syscall_go_proto", "//pkg/sentry/usage", - "//pkg/sentry/usermem", "//pkg/sentry/watchdog", "//pkg/sync", "//pkg/syserror", @@ -88,6 +87,7 @@ go_library( "//pkg/tcpip/transport/tcp", "//pkg/tcpip/transport/udp", "//pkg/urpc", + "//pkg/usermem", "//runsc/boot/filter", "//runsc/boot/platforms", "//runsc/specutils", @@ -111,7 +111,7 @@ go_test( "//pkg/control/server", "//pkg/log", "//pkg/p9", - "//pkg/sentry/context/contexttest", + "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", "//pkg/sync", diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go index e5de1f3d7..417d2d5fb 100644 --- a/runsc/boot/fds.go +++ b/runsc/boot/fds.go @@ -17,7 +17,7 @@ package boot import ( "fmt" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" "gvisor.dev/gvisor/pkg/sentry/kernel" diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 421ccd255..0f62842ea 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -32,8 +32,8 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index bec0dc292..44aa63196 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -27,7 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/control/server" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" diff --git a/runsc/boot/user.go b/runsc/boot/user.go index 56cc12ee0..f0aa52135 100644 --- a/runsc/boot/user.go +++ b/runsc/boot/user.go @@ -22,10 +22,10 @@ import ( "strings" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" ) type fileReader struct { diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go index 9aee2ad07..fb4e13dfb 100644 --- a/runsc/boot/user_test.go +++ b/runsc/boot/user_test.go @@ -23,7 +23,7 @@ import ( "testing" specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) diff --git a/tools/go_marshal/defs.bzl b/tools/go_marshal/defs.bzl index 2918ceffe..d79786a68 100644 --- a/tools/go_marshal/defs.bzl +++ b/tools/go_marshal/defs.bzl @@ -54,8 +54,8 @@ go_marshal = rule( # marshal_deps are the dependencies requied by generated code. marshal_deps = [ "//tools/go_marshal/marshal", - "//pkg/sentry/platform/safecopy", - "//pkg/sentry/usermem", + "//pkg/safecopy", + "//pkg/usermem", ] # marshal_test_deps are required by test targets. diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go index 8392f3f6d..af90bdecb 100644 --- a/tools/go_marshal/gomarshal/generator.go +++ b/tools/go_marshal/gomarshal/generator.go @@ -27,8 +27,8 @@ import ( const ( marshalImport = "gvisor.dev/gvisor/tools/go_marshal/marshal" - usermemImport = "gvisor.dev/gvisor/pkg/sentry/usermem" - safecopyImport = "gvisor.dev/gvisor/pkg/sentry/platform/safecopy" + safecopyImport = "gvisor.dev/gvisor/pkg/safecopy" + usermemImport = "gvisor.dev/gvisor/pkg/usermem" ) // List of identifiers we use in generated code, that may conflict a diff --git a/tools/go_marshal/test/BUILD b/tools/go_marshal/test/BUILD index 38ba49fed..e345e3a8e 100644 --- a/tools/go_marshal/test/BUILD +++ b/tools/go_marshal/test/BUILD @@ -15,7 +15,7 @@ go_test( deps = [ ":test", "//pkg/binary", - "//pkg/sentry/usermem", + "//pkg/usermem", "//tools/go_marshal/analysis", ], ) diff --git a/tools/go_marshal/test/benchmark_test.go b/tools/go_marshal/test/benchmark_test.go index e70db06d8..e12403741 100644 --- a/tools/go_marshal/test/benchmark_test.go +++ b/tools/go_marshal/test/benchmark_test.go @@ -22,7 +22,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/sentry/usermem" + "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/tools/go_marshal/analysis" test "gvisor.dev/gvisor/tools/go_marshal/test" ) -- cgit v1.2.3 From f263801a74d4ccac042b068d0928c8738e40af5b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 28 Jan 2020 13:36:16 -0800 Subject: fs/splice: don't report partial errors for special files Special files can have additional requirements for granularity. For example, read from eventfd returns EINVAL if a size is less 8 bytes. Reported-by: syzbot+3905f5493bec08eb7b02@syzkaller.appspotmail.com PiperOrigin-RevId: 292002926 --- pkg/sentry/fs/attr.go | 5 +++++ pkg/sentry/fs/file.go | 7 ------- pkg/sentry/fs/splice.go | 5 ----- pkg/sentry/syscalls/linux/sys_splice.go | 19 +++++++++++++++---- test/syscalls/linux/eventfd.cc | 25 +++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 16 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go index fa9e7d517..f60bd423d 100644 --- a/pkg/sentry/fs/attr.go +++ b/pkg/sentry/fs/attr.go @@ -206,6 +206,11 @@ func IsPipe(s StableAttr) bool { return s.Type == Pipe } +// IsAnonymous returns true if StableAttr.Type matches any type of anonymous. +func IsAnonymous(s StableAttr) bool { + return s.Type == Anonymous +} + // IsSocket returns true if StableAttr.Type matches any type of socket. func IsSocket(s StableAttr) bool { return s.Type == Socket diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index ca3466f4f..78100e448 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -555,10 +555,6 @@ type lockedWriter struct { // // This applies only to Write, not WriteAt. Offset int64 - - // Err contains the first error encountered while copying. This is - // useful to determine whether Writer or Reader failed during io.Copy. - Err error } // Write implements io.Writer.Write. @@ -594,8 +590,5 @@ func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) { break } } - if w.Err == nil { - w.Err = err - } return written, err } diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index 791d1526c..33da82868 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -167,11 +167,6 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, if !srcPipe && !opts.SrcOffset { atomic.StoreInt64(&src.offset, src.offset+n) } - - // Don't report any errors if we have some progress without data loss. - if w.Err == nil { - err = nil - } } // Drop locks. diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index dd3a5807f..f43d6c155 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -211,8 +211,10 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal opts := fs.SpliceOpts{ Length: count, } + inFileAttr := inFile.Dirent.Inode.StableAttr + outFileAttr := outFile.Dirent.Inode.StableAttr switch { - case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && !fs.IsPipe(outFile.Dirent.Inode.StableAttr): + case fs.IsPipe(inFileAttr) && !fs.IsPipe(outFileAttr): if inOffset != 0 { return 0, nil, syserror.ESPIPE } @@ -229,7 +231,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal opts.DstOffset = true opts.DstStart = offset } - case !fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr): + case !fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr): if outOffset != 0 { return 0, nil, syserror.ESPIPE } @@ -246,13 +248,13 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal opts.SrcOffset = true opts.SrcStart = offset } - case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr): + case fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr): if inOffset != 0 || outOffset != 0 { return 0, nil, syserror.ESPIPE } // We may not refer to the same pipe; otherwise it's a continuous loop. - if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID { + if inFileAttr.InodeID == outFileAttr.InodeID { return 0, nil, syserror.EINVAL } default: @@ -262,6 +264,15 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // Splice data. n, err := doSplice(t, outFile, inFile, opts, nonBlock) + // Special files can have additional requirements for granularity. For + // example, read from eventfd returns EINVAL if a size is less 8 bytes. + // Inotify is another example. read will return EINVAL is a buffer is + // too small to return the next event, but a size of an event isn't + // fixed, it is sizeof(struct inotify_event) + {NAME_LEN} + 1. + if n != 0 && err != nil && (fs.IsAnonymous(inFileAttr) || fs.IsAnonymous(outFileAttr)) { + err = nil + } + // See above; inFile is chosen arbitrarily here. return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "splice", inFile) } diff --git a/test/syscalls/linux/eventfd.cc b/test/syscalls/linux/eventfd.cc index 367682c3d..927001eee 100644 --- a/test/syscalls/linux/eventfd.cc +++ b/test/syscalls/linux/eventfd.cc @@ -132,6 +132,31 @@ TEST(EventfdTest, BigWriteBigRead) { EXPECT_EQ(l[0], 1); } +TEST(EventfdTest, SpliceFromPipePartialSucceeds) { + int pipes[2]; + ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); + const FileDescriptor pipe_rfd(pipes[0]); + const FileDescriptor pipe_wfd(pipes[1]); + constexpr uint64_t kVal{1}; + + FileDescriptor efd = ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK)); + + uint64_t event_array[2]; + event_array[0] = kVal; + event_array[1] = kVal; + ASSERT_THAT(write(pipe_wfd.get(), event_array, sizeof(event_array)), + SyscallSucceedsWithValue(sizeof(event_array))); + EXPECT_THAT(splice(pipe_rfd.get(), /*__offin=*/nullptr, efd.get(), + /*__offout=*/nullptr, sizeof(event_array[0]) + 1, + SPLICE_F_NONBLOCK), + SyscallSucceedsWithValue(sizeof(event_array[0]))); + + uint64_t val; + ASSERT_THAT(read(efd.get(), &val, sizeof(val)), + SyscallSucceedsWithValue(sizeof(val))); + EXPECT_EQ(val, kVal); +} + // NotifyNonZero is inherently racy, so random save is disabled. TEST(EventfdTest, NotifyNonZero_NoRandomSave) { // Waits will time out at 10 seconds. -- cgit v1.2.3 From ede8dfab3760afc8063c3418f217e52f7ec70d42 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 30 Jan 2020 09:13:36 -0800 Subject: Enforce splice offset limits Splice must not allow negative offsets. Writes also must not allow offset + size to overflow int64. Reads are similarly broken, but not just in splice (b/148095030). Reported-by: syzbot+0e1ff0b95fb2859b4190@syzkaller.appspotmail.com PiperOrigin-RevId: 292361208 --- pkg/sentry/fs/tmpfs/inode_file.go | 10 ++++-- pkg/sentry/syscalls/linux/sys_splice.go | 16 ++++------ test/syscalls/linux/splice.cc | 56 +++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 12 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index dabc10662..25abbc151 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -17,6 +17,7 @@ package tmpfs import ( "fmt" "io" + "math" "time" "gvisor.dev/gvisor/pkg/abi/linux" @@ -444,10 +445,15 @@ func (rw *fileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) defer rw.f.dataMu.Unlock() // Compute the range to write. - end := fs.WriteEndOffset(rw.offset, int64(srcs.NumBytes())) - if end == rw.offset { // srcs.NumBytes() == 0? + if srcs.NumBytes() == 0 { + // Nothing to do. return 0, nil } + end := fs.WriteEndOffset(rw.offset, int64(srcs.NumBytes())) + if end == math.MaxInt64 { + // Overflow. + return 0, syserror.EINVAL + } // Check if seals prevent either file growth or all writes. switch { diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index f43d6c155..fd642834b 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -25,6 +25,10 @@ import ( // doSplice implements a blocking splice operation. func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) { + if opts.Length < 0 || opts.SrcStart < 0 || opts.DstStart < 0 { + return 0, syserror.EINVAL + } + var ( total int64 n int64 @@ -82,11 +86,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc offsetAddr := args[2].Pointer() count := int64(args[3].SizeT()) - // Don't send a negative number of bytes. - if count < 0 { - return 0, nil, syserror.EINVAL - } - // Get files. inFile := t.GetFile(inFD) if inFile == nil { @@ -136,11 +135,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, err } - // The offset must be valid. - if offset < 0 { - return 0, nil, syserror.EINVAL - } - // Do the splice. n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{ Length: count, @@ -227,6 +221,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if _, err := t.CopyIn(outOffset, &offset); err != nil { return 0, nil, err } + // Use the destination offset. opts.DstOffset = true opts.DstStart = offset @@ -244,6 +239,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if _, err := t.CopyIn(inOffset, &offset); err != nil { return 0, nil, err } + // Use the source offset. opts.SrcOffset = true opts.SrcStart = offset diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc index 85232cb1f..faa1247f6 100644 --- a/test/syscalls/linux/splice.cc +++ b/test/syscalls/linux/splice.cc @@ -60,6 +60,62 @@ TEST(SpliceTest, TwoRegularFiles) { SyscallFailsWithErrno(EINVAL)); } +int memfd_create(const std::string& name, unsigned int flags) { + return syscall(__NR_memfd_create, name.c_str(), flags); +} + +TEST(SpliceTest, NegativeOffset) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill the pipe. + std::vector buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Open the output file as write only. + int fd; + EXPECT_THAT(fd = memfd_create("negative", 0), SyscallSucceeds()); + const FileDescriptor out_fd(fd); + + loff_t out_offset = 0xffffffffffffffffull; + constexpr int kSize = 2; + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + +// Write offset + size overflows int64. +// +// This is a regression test for b/148041624. +TEST(SpliceTest, WriteOverflow) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + // Fill the pipe. + std::vector buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + // Open the output file. + int fd; + EXPECT_THAT(fd = memfd_create("overflow", 0), SyscallSucceeds()); + const FileDescriptor out_fd(fd); + + // out_offset + kSize overflows INT64_MAX. + loff_t out_offset = 0x7ffffffffffffffeull; + constexpr int kSize = 3; + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), &out_offset, kSize, 0), + SyscallFailsWithErrno(EINVAL)); +} + TEST(SpliceTest, SamePipe) { // Create a new pipe. int fds[2]; -- cgit v1.2.3 From bc3a24d62788bd6b881afeda230f1a5550a5709a Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Fri, 31 Jan 2020 11:49:37 -0800 Subject: Internal change. PiperOrigin-RevId: 292587459 --- kokoro/kythe/generate_xrefs.sh | 2 +- pkg/sentry/fs/g3doc/inotify.md | 16 ++++++++-------- pkg/sentry/mm/README.md | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/kokoro/kythe/generate_xrefs.sh b/kokoro/kythe/generate_xrefs.sh index 7a0fbb3cd..323b0f77b 100644 --- a/kokoro/kythe/generate_xrefs.sh +++ b/kokoro/kythe/generate_xrefs.sh @@ -23,7 +23,7 @@ bazel version python3 -V -readonly KYTHE_VERSION='v0.0.39' +readonly KYTHE_VERSION='v0.0.41' readonly WORKDIR="$(mktemp -d)" readonly KYTHE_DIR="${WORKDIR}/kythe-${KYTHE_VERSION}" if [[ -n "$KOKORO_GIT_COMMIT" ]]; then diff --git a/pkg/sentry/fs/g3doc/inotify.md b/pkg/sentry/fs/g3doc/inotify.md index 71a577d9d..85063d4e6 100644 --- a/pkg/sentry/fs/g3doc/inotify.md +++ b/pkg/sentry/fs/g3doc/inotify.md @@ -112,11 +112,11 @@ attempts to queue a new event, it is already holding `fs.Watches.mu`. If we used `Inotify.mu` to also protect the event queue, this would violate the above lock ordering. -[dirent]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/dirent.go -[event]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_event.go -[fd_table]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/kernel/fd_table.go -[inode]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode.go -[inode_watches]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inode_inotify.go -[inotify]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify.go -[syscall_dir]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/syscalls/linux/ -[watch]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/fs/inotify_watch.go +[dirent]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/dirent.go +[event]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_event.go +[fd_table]: https://github.com/google/gvisor/blob/master/pkg/sentry/kernel/fd_table.go +[inode]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode.go +[inode_watches]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inode_inotify.go +[inotify]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify.go +[syscall_dir]: https://github.com/google/gvisor/blob/master/pkg/sentry/syscalls/linux/ +[watch]: https://github.com/google/gvisor/blob/master/pkg/sentry/fs/inotify_watch.go diff --git a/pkg/sentry/mm/README.md b/pkg/sentry/mm/README.md index e1322e373..f4d43d927 100644 --- a/pkg/sentry/mm/README.md +++ b/pkg/sentry/mm/README.md @@ -274,7 +274,7 @@ In the sentry: methods [`platform.AddressSpace.MapFile` and `platform.AddressSpace.Unmap`][platform]. -[memmap]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/memmap/memmap.go -[mm]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/mm/mm.go -[pgalloc]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/pgalloc/pgalloc.go -[platform]: https://github.com/google/gvisor/blob/master/+/master/pkg/sentry/platform/platform.go +[memmap]: https://github.com/google/gvisor/blob/master/pkg/sentry/memmap/memmap.go +[mm]: https://github.com/google/gvisor/blob/master/pkg/sentry/mm/mm.go +[pgalloc]: https://github.com/google/gvisor/blob/master/pkg/sentry/pgalloc/pgalloc.go +[platform]: https://github.com/google/gvisor/blob/master/pkg/sentry/platform/platform.go -- cgit v1.2.3 From d7cd484091543827678f1548b8e5668a7a86e13f Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 4 Feb 2020 08:20:10 -0800 Subject: Add support for sentry internal pipe for gofer mounts Internal pipes are supported similarly to how internal UDS is done. It is also controlled by the same flag. Fixes #1102 PiperOrigin-RevId: 293150045 --- pkg/sentry/fs/gofer/BUILD | 2 + pkg/sentry/fs/gofer/cache_policy.go | 3 + pkg/sentry/fs/gofer/fifo.go | 40 ++++++++ pkg/sentry/fs/gofer/gofer_test.go | 2 +- pkg/sentry/fs/gofer/path.go | 183 +++++++++++++++++++++++------------ pkg/sentry/fs/gofer/session.go | 183 ++++++++++++++++++++++------------- pkg/sentry/fs/gofer/session_state.go | 12 +-- pkg/sentry/fs/gofer/socket.go | 8 +- test/syscalls/BUILD | 1 - 9 files changed, 293 insertions(+), 141 deletions(-) create mode 100644 pkg/sentry/fs/gofer/fifo.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/BUILD b/pkg/sentry/fs/gofer/BUILD index 971d3718e..fea135eea 100644 --- a/pkg/sentry/fs/gofer/BUILD +++ b/pkg/sentry/fs/gofer/BUILD @@ -9,6 +9,7 @@ go_library( "cache_policy.go", "context_file.go", "device.go", + "fifo.go", "file.go", "file_state.go", "fs.go", @@ -38,6 +39,7 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/host", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", diff --git a/pkg/sentry/fs/gofer/cache_policy.go b/pkg/sentry/fs/gofer/cache_policy.go index ebea03c42..07a564e92 100644 --- a/pkg/sentry/fs/gofer/cache_policy.go +++ b/pkg/sentry/fs/gofer/cache_policy.go @@ -127,6 +127,9 @@ func (cp cachePolicy) revalidate(ctx context.Context, name string, parent, child childIops, ok := child.InodeOperations.(*inodeOperations) if !ok { + if _, ok := child.InodeOperations.(*fifo); ok { + return false + } panic(fmt.Sprintf("revalidating inode operations of unknown type %T", child.InodeOperations)) } parentIops, ok := parent.InodeOperations.(*inodeOperations) diff --git a/pkg/sentry/fs/gofer/fifo.go b/pkg/sentry/fs/gofer/fifo.go new file mode 100644 index 000000000..456557058 --- /dev/null +++ b/pkg/sentry/fs/gofer/fifo.go @@ -0,0 +1,40 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fs" +) + +// +stateify savable +type fifo struct { + fs.InodeOperations + fileIops *inodeOperations +} + +var _ fs.InodeOperations = (*fifo)(nil) + +// Rename implements fs.InodeOperations. It forwards the call to the underlying +// file inode to handle the file rename. Note that file key remains the same +// after the rename to keep the endpoint mapping. +func (i *fifo) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { + return i.fileIops.Rename(ctx, inode, oldParent, oldName, newParent, newName, replacement) +} + +// StatFS implements fs.InodeOperations. +func (i *fifo) StatFS(ctx context.Context) (fs.Info, error) { + return i.fileIops.StatFS(ctx) +} diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 0c2f89ae8..2df2fe889 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -61,7 +61,7 @@ func rootTest(t *testing.T, name string, cp cachePolicy, fn func(context.Context ctx := contexttest.Context(t) sattr, rootInodeOperations := newInodeOperations(ctx, s, contextFile{ file: rootFile, - }, root.QID, p9.AttrMaskAll(), root.Attr, false /* socket */) + }, root.QID, p9.AttrMaskAll(), root.Attr) m := fs.NewMountSource(ctx, s, &filesystem{}, fs.MountSourceFlags{}) rootInode := fs.NewInode(ctx, rootInodeOperations, m, sattr) diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index 0c1be05ef..a35c3a23d 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -23,14 +23,24 @@ import ( "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // maxFilenameLen is the maximum length of a filename. This is dictated by 9P's // encoding of strings, which uses 2 bytes for the length prefix. const maxFilenameLen = (1 << 16) - 1 +func changeType(mode p9.FileMode, newType p9.FileMode) p9.FileMode { + if newType&^p9.FileModeMask != 0 { + panic(fmt.Sprintf("newType contained more bits than just file mode: %x", newType)) + } + clear := mode &^ p9.FileModeMask + return clear | newType +} + // Lookup loads an Inode at name into a Dirent based on the session's cache // policy. func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { @@ -69,8 +79,25 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string return nil, err } + if i.session().overrides != nil { + // Check if file belongs to a internal named pipe. Note that it doesn't need + // to check for sockets because it's done in newInodeOperations below. + deviceKey := device.MultiDeviceKey{ + Device: p9attr.RDev, + SecondaryDevice: i.session().connID, + Inode: qids[0].Path, + } + unlock := i.session().overrides.lock() + if pipeInode := i.session().overrides.getPipe(deviceKey); pipeInode != nil { + unlock() + pipeInode.IncRef() + return fs.NewDirent(ctx, pipeInode, name), nil + } + unlock() + } + // Construct the Inode operations. - sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr, false) + sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr) // Construct a positive Dirent. return fs.NewDirent(ctx, fs.NewInode(ctx, node, dir.MountSource, sattr), name), nil @@ -138,7 +165,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string qid := qids[0] // Construct the InodeOperations. - sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr, false) + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr) // Construct the positive Dirent. d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) @@ -223,82 +250,115 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, return nil, syserror.ENAMETOOLONG } - if i.session().endpoints == nil { + if i.session().overrides == nil { return nil, syscall.EOPNOTSUPP } - // Create replaces the directory fid with the newly created/opened - // file, so clone this directory so it doesn't change out from under - // this node. - _, newFile, err := i.fileState.file.walk(ctx, nil) + // Stabilize the override map while creation is in progress. + unlock := i.session().overrides.lock() + defer unlock() + + sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket) if err != nil { return nil, err } - // We're not going to use newFile after return. - defer newFile.close(ctx) - // Stabilize the endpoint map while creation is in progress. - unlock := i.session().endpoints.lock() - defer unlock() + // Construct the positive Dirent. + childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) + i.session().overrides.addBoundEndpoint(iops.fileState.key, childDir, ep) + return childDir, nil +} - // Create a regular file in the gofer and then mark it as a socket by - // adding this inode key in the 'endpoints' map. - owner := fs.FileOwnerFromContext(ctx) - hostFile, err := newFile.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) - if err != nil { - return nil, err +// CreateFifo implements fs.InodeOperations.CreateFifo. +func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { + if len(name) > maxFilenameLen { + return syserror.ENAMETOOLONG } - // We're not going to use this file. - hostFile.Close() - i.touchModificationAndStatusChangeTime(ctx, dir) + owner := fs.FileOwnerFromContext(ctx) + mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe - // Get the attributes of the file to create inode key. - qid, mask, attr, err := getattr(ctx, newFile) - if err != nil { - return nil, err + // N.B. FIFOs use major/minor numbers 0. + if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { + if i.session().overrides == nil || err != syscall.EPERM { + return err + } + // If gofer doesn't support mknod, check if we can create an internal fifo. + return i.createInternalFifo(ctx, dir, name, owner, perm) } - key := device.MultiDeviceKey{ - Device: attr.RDev, - SecondaryDevice: i.session().connID, - Inode: qid.Path, + i.touchModificationAndStatusChangeTime(ctx, dir) + return nil +} + +func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error { + if i.session().overrides == nil { + return syserror.EPERM } - // Create child dirent. + // Stabilize the override map while creation is in progress. + unlock := i.session().overrides.lock() + defer unlock() - // Get an unopened p9.File for the file we created so that it can be - // cloned and re-opened multiple times after creation. - _, unopened, err := i.fileState.file.walk(ctx, []string{name}) + sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe) if err != nil { - return nil, err + return err } - // Construct the InodeOperations. - sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr, true) + // First create a pipe. + p := pipe.NewPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize) + + // Wrap the fileOps with our Fifo. + iops := &fifo{ + InodeOperations: pipe.NewInodeOperations(ctx, perm, p), + fileIops: fileOps, + } + inode := fs.NewInode(ctx, iops, dir.MountSource, sattr) // Construct the positive Dirent. childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) - i.session().endpoints.add(key, childDir, ep) - return childDir, nil + i.session().overrides.addPipe(fileOps.fileState.key, childDir, inode) + return nil } -// CreateFifo implements fs.InodeOperations.CreateFifo. -func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { - if len(name) > maxFilenameLen { - return syserror.ENAMETOOLONG +// Caller must hold Session.endpoint lock. +func (i *inodeOperations) createEndpointFile(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions, fileType p9.FileMode) (fs.StableAttr, *inodeOperations, error) { + _, dirClone, err := i.fileState.file.walk(ctx, nil) + if err != nil { + return fs.StableAttr{}, nil, err } + // We're not going to use dirClone after return. + defer dirClone.close(ctx) + // Create a regular file in the gofer and then mark it as a socket by + // adding this inode key in the 'overrides' map. owner := fs.FileOwnerFromContext(ctx) - mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe - - // N.B. FIFOs use major/minor numbers 0. - if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { - return err + hostFile, err := dirClone.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)) + if err != nil { + return fs.StableAttr{}, nil, err } + // We're not going to use this file. + hostFile.Close() i.touchModificationAndStatusChangeTime(ctx, dir) - return nil + + // Get the attributes of the file to create inode key. + qid, mask, attr, err := getattr(ctx, dirClone) + if err != nil { + return fs.StableAttr{}, nil, err + } + + // Get an unopened p9.File for the file we created so that it can be + // cloned and re-opened multiple times after creation. + _, unopened, err := i.fileState.file.walk(ctx, []string{name}) + if err != nil { + return fs.StableAttr{}, nil, err + } + + // Construct new inode with file type overridden. + attr.Mode = changeType(attr.Mode, fileType) + sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr) + return sattr, iops, nil } // Remove implements InodeOperations.Remove. @@ -307,20 +367,23 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string return syserror.ENAMETOOLONG } - var key device.MultiDeviceKey - removeSocket := false - if i.session().endpoints != nil { - // Find out if file being deleted is a socket that needs to be + var key *device.MultiDeviceKey + if i.session().overrides != nil { + // Find out if file being deleted is a socket or pipe that needs to be // removed from endpoint map. if d, err := i.Lookup(ctx, dir, name); err == nil { defer d.DecRef() - if fs.IsSocket(d.Inode.StableAttr) { - child := d.Inode.InodeOperations.(*inodeOperations) - key = child.fileState.key - removeSocket = true - // Stabilize the endpoint map while deletion is in progress. - unlock := i.session().endpoints.lock() + if fs.IsSocket(d.Inode.StableAttr) || fs.IsPipe(d.Inode.StableAttr) { + switch iops := d.Inode.InodeOperations.(type) { + case *inodeOperations: + key = &iops.fileState.key + case *fifo: + key = &iops.fileIops.fileState.key + } + + // Stabilize the override map while deletion is in progress. + unlock := i.session().overrides.lock() defer unlock() } } @@ -329,8 +392,8 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string if err := i.fileState.file.unlinkAt(ctx, name, 0); err != nil { return err } - if removeSocket { - i.session().endpoints.remove(key) + if key != nil { + i.session().overrides.remove(*key) } i.touchModificationAndStatusChangeTime(ctx, dir) diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index 498c4645a..f6b3ef178 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -33,60 +33,107 @@ import ( var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize // +stateify savable -type endpointMaps struct { - // mu protexts the direntMap, the keyMap, and the pathMap below. - mu sync.RWMutex `state:"nosave"` +type overrideInfo struct { + dirent *fs.Dirent + + // endpoint is set when dirent points to a socket. inode must not be set. + endpoint transport.BoundEndpoint + + // inode is set when dirent points to a pipe. endpoint must not be set. + inode *fs.Inode +} - // direntMap links sockets to their dirents. - // It is filled concurrently with the keyMap and is stored upon save. - // Before saving, this map is used to populate the pathMap. - direntMap map[transport.BoundEndpoint]*fs.Dirent +func (l *overrideInfo) inodeType() fs.InodeType { + switch { + case l.endpoint != nil: + return fs.Socket + case l.inode != nil: + return fs.Pipe + } + panic("endpoint or node must be set") +} - // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets. +// +stateify savable +type overrideMaps struct { + // mu protexts the keyMap, and the pathMap below. + mu sync.RWMutex `state:"nosave"` + + // keyMap links MultiDeviceKeys (containing inode IDs) to their sockets/pipes. // It is not stored during save because the inode ID may change upon restore. - keyMap map[device.MultiDeviceKey]transport.BoundEndpoint `state:"nosave"` + keyMap map[device.MultiDeviceKey]*overrideInfo `state:"nosave"` - // pathMap links the sockets to their paths. + // pathMap links the sockets/pipes to their paths. // It is filled before saving from the direntMap and is stored upon save. // Upon restore, this map is used to re-populate the keyMap. - pathMap map[transport.BoundEndpoint]string + pathMap map[*overrideInfo]string +} + +// addBoundEndpoint adds the bound endpoint to the map. +// A reference is taken on the dirent argument. +// +// Precondition: maps must have been locked with 'lock'. +func (e *overrideMaps) addBoundEndpoint(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) { + d.IncRef() + e.keyMap[key] = &overrideInfo{dirent: d, endpoint: ep} } -// add adds the endpoint to the maps. +// addPipe adds the pipe inode to the map. // A reference is taken on the dirent argument. // // Precondition: maps must have been locked with 'lock'. -func (e *endpointMaps) add(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) { - e.keyMap[key] = ep +func (e *overrideMaps) addPipe(key device.MultiDeviceKey, d *fs.Dirent, inode *fs.Inode) { d.IncRef() - e.direntMap[ep] = d + e.keyMap[key] = &overrideInfo{dirent: d, inode: inode} } // remove deletes the key from the maps. // // Precondition: maps must have been locked with 'lock'. -func (e *endpointMaps) remove(key device.MultiDeviceKey) { - endpoint := e.get(key) +func (e *overrideMaps) remove(key device.MultiDeviceKey) { + endpoint := e.keyMap[key] delete(e.keyMap, key) - - d := e.direntMap[endpoint] - d.DecRef() - delete(e.direntMap, endpoint) + endpoint.dirent.DecRef() } // lock blocks other addition and removal operations from happening while // the backing file is being created or deleted. Returns a function that unlocks // the endpoint map. -func (e *endpointMaps) lock() func() { +func (e *overrideMaps) lock() func() { e.mu.Lock() return func() { e.mu.Unlock() } } -// get returns the endpoint mapped to the given key. +// getBoundEndpoint returns the bound endpoint mapped to the given key. // -// Precondition: maps must have been locked for reading. -func (e *endpointMaps) get(key device.MultiDeviceKey) transport.BoundEndpoint { - return e.keyMap[key] +// Precondition: maps must have been locked. +func (e *overrideMaps) getBoundEndpoint(key device.MultiDeviceKey) transport.BoundEndpoint { + if v := e.keyMap[key]; v != nil { + return v.endpoint + } + return nil +} + +// getPipe returns the pipe inode mapped to the given key. +// +// Precondition: maps must have been locked. +func (e *overrideMaps) getPipe(key device.MultiDeviceKey) *fs.Inode { + if v := e.keyMap[key]; v != nil { + return v.inode + } + return nil +} + +// getType returns the inode type if there is a corresponding endpoint for the +// given key. Returns false otherwise. +func (e *overrideMaps) getType(key device.MultiDeviceKey) (fs.InodeType, bool) { + e.mu.Lock() + v := e.keyMap[key] + e.mu.Unlock() + + if v != nil { + return v.inodeType(), true + } + return 0, false } // session holds state for each 9p session established during sys_mount. @@ -137,16 +184,16 @@ type session struct { // mounter is the EUID/EGID that mounted this file system. mounter fs.FileOwner `state:"wait"` - // endpoints is used to map inodes that represent socket files to their - // corresponding endpoint. Socket files are created as regular files in the - // gofer and their presence in this map indicate that they should indeed be - // socket files. This allows unix domain sockets to be used with paths that - // belong to a gofer. + // overrides is used to map inodes that represent socket/pipes files to their + // corresponding endpoint/iops. These files are created as regular files in + // the gofer and their presence in this map indicate that they should indeed + // be socket/pipe files. This allows unix domain sockets and named pipes to + // be used with paths that belong to a gofer. // // TODO(gvisor.dev/issue/1200): there are few possible races with someone // stat'ing the file and another deleting it concurrently, where the file // will not be reported as socket file. - endpoints *endpointMaps `state:"wait"` + overrides *overrideMaps `state:"wait"` } // Destroy tears down the session. @@ -179,15 +226,21 @@ func (s *session) SaveInodeMapping(inode *fs.Inode, path string) { // This is very unintuitive. We *CANNOT* trust the inode's StableAttrs, // because overlay copyUp may have changed them out from under us. // So much for "immutable". - sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr - s.inodeMappings[sattr.InodeID] = path + switch iops := inode.InodeOperations.(type) { + case *inodeOperations: + s.inodeMappings[iops.fileState.sattr.InodeID] = path + case *fifo: + s.inodeMappings[iops.fileIops.fileState.sattr.InodeID] = path + default: + panic(fmt.Sprintf("Invalid type: %T", iops)) + } } -// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File and attributes -// (p9.QID, p9.AttrMask, p9.Attr). +// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File +// and attributes (p9.QID, p9.AttrMask, p9.Attr). // // Endpoints lock must not be held if socket == false. -func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr, socket bool) (fs.StableAttr, *inodeOperations) { +func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr) (fs.StableAttr, *inodeOperations) { deviceKey := device.MultiDeviceKey{ Device: attr.RDev, SecondaryDevice: s.connID, @@ -201,17 +254,11 @@ func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p BlockSize: bsize(attr), } - if s.endpoints != nil { - if socket { - sattr.Type = fs.Socket - } else { - // If unix sockets are allowed on this filesystem, check if this file is - // supposed to be a socket file. - unlock := s.endpoints.lock() - if s.endpoints.get(deviceKey) != nil { - sattr.Type = fs.Socket - } - unlock() + if s.overrides != nil && sattr.Type == fs.RegularFile { + // If overrides are allowed on this filesystem, check if this file is + // supposed to be of a different type, e.g. socket. + if t, ok := s.overrides.getType(deviceKey); ok { + sattr.Type = t } } @@ -267,7 +314,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF s.EnableLeakCheck("gofer.session") if o.privateunixsocket { - s.endpoints = newEndpointMaps() + s.overrides = newOverrideMaps() } // Construct the MountSource with the session and superBlockFlags. @@ -305,26 +352,24 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF return nil, err } - sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr, false) + sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr) return fs.NewInode(ctx, iops, m, sattr), nil } -// newEndpointMaps creates a new endpointMaps. -func newEndpointMaps() *endpointMaps { - return &endpointMaps{ - direntMap: make(map[transport.BoundEndpoint]*fs.Dirent), - keyMap: make(map[device.MultiDeviceKey]transport.BoundEndpoint), - pathMap: make(map[transport.BoundEndpoint]string), +// newOverrideMaps creates a new overrideMaps. +func newOverrideMaps() *overrideMaps { + return &overrideMaps{ + keyMap: make(map[device.MultiDeviceKey]*overrideInfo), + pathMap: make(map[*overrideInfo]string), } } -// fillKeyMap populates key and dirent maps upon restore from saved -// pathmap. +// fillKeyMap populates key and dirent maps upon restore from saved pathmap. func (s *session) fillKeyMap(ctx context.Context) error { - unlock := s.endpoints.lock() + unlock := s.overrides.lock() defer unlock() - for ep, dirPath := range s.endpoints.pathMap { + for ep, dirPath := range s.overrides.pathMap { _, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath)) if err != nil { return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err) @@ -341,25 +386,25 @@ func (s *session) fillKeyMap(ctx context.Context) error { Inode: qid.Path, } - s.endpoints.keyMap[key] = ep + s.overrides.keyMap[key] = ep } return nil } -// fillPathMap populates paths for endpoints from dirents in direntMap +// fillPathMap populates paths for overrides from dirents in direntMap // before save. func (s *session) fillPathMap() error { - unlock := s.endpoints.lock() + unlock := s.overrides.lock() defer unlock() - for ep, dir := range s.endpoints.direntMap { - mountRoot := dir.MountRoot() + for _, endpoint := range s.overrides.keyMap { + mountRoot := endpoint.dirent.MountRoot() defer mountRoot.DecRef() - dirPath, _ := dir.FullName(mountRoot) + dirPath, _ := endpoint.dirent.FullName(mountRoot) if dirPath == "" { return fmt.Errorf("error getting path from dirent") } - s.endpoints.pathMap[ep] = dirPath + s.overrides.pathMap[endpoint] = dirPath } return nil } @@ -368,7 +413,7 @@ func (s *session) fillPathMap() error { func (s *session) restoreEndpointMaps(ctx context.Context) error { // When restoring, only need to create the keyMap because the dirent and path // maps got stored through the save. - s.endpoints.keyMap = make(map[device.MultiDeviceKey]transport.BoundEndpoint) + s.overrides.keyMap = make(map[device.MultiDeviceKey]*overrideInfo) if err := s.fillKeyMap(ctx); err != nil { return fmt.Errorf("failed to insert sockets into endpoint map: %v", err) } @@ -376,6 +421,6 @@ func (s *session) restoreEndpointMaps(ctx context.Context) error { // Re-create pathMap because it can no longer be trusted as socket paths can // change while process continues to run. Empty pathMap will be re-filled upon // next save. - s.endpoints.pathMap = make(map[transport.BoundEndpoint]string) + s.overrides.pathMap = make(map[*overrideInfo]string) return nil } diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index 0285c5361..111da59f9 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -25,9 +25,9 @@ import ( // beforeSave is invoked by stateify. func (s *session) beforeSave() { - if s.endpoints != nil { + if s.overrides != nil { if err := s.fillPathMap(); err != nil { - panic("failed to save paths to endpoint map before saving" + err.Error()) + panic("failed to save paths to override map before saving" + err.Error()) } } } @@ -74,10 +74,10 @@ func (s *session) afterLoad() { panic(fmt.Sprintf("new attach name %v, want %v", opts.aname, s.aname)) } - // Check if endpointMaps exist when uds sockets are enabled - // (only pathmap will actualy have been saved). - if opts.privateunixsocket != (s.endpoints != nil) { - panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.endpoints != nil)) + // Check if overrideMaps exist when uds sockets are enabled (only pathmaps + // will actually have been saved). + if opts.privateunixsocket != (s.overrides != nil) { + panic(fmt.Sprintf("new privateunixsocket option %v, want %v", opts.privateunixsocket, s.overrides != nil)) } if args.Flags != s.superBlockFlags { panic(fmt.Sprintf("new mount flags %v, want %v", args.Flags, s.superBlockFlags)) diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index 376cfce2c..10ba2f5f0 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -32,15 +32,15 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport. return nil } - if i.session().endpoints != nil { - unlock := i.session().endpoints.lock() + if i.session().overrides != nil { + unlock := i.session().overrides.lock() defer unlock() - ep := i.session().endpoints.get(i.fileState.key) + ep := i.session().overrides.getBoundEndpoint(i.fileState.key) if ep != nil { return ep } - // Not found in endpoints map, it may be a gofer backed unix socket... + // Not found in overrides map, it may be a gofer backed unix socket... } inode.IncRef() diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 40e974314..8f2b75a1c 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -225,7 +225,6 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:mknod_test", - use_tmpfs = True, # mknod is not supported over gofer. ) syscall_test( -- cgit v1.2.3 From 492229d0176c1af2ab4ea4cf91bf211e940b5b12 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Tue, 4 Feb 2020 11:28:36 -0800 Subject: VFS2 gofer client Updates #1198 Opening host pipes (by spinning in fdpipe) and host sockets is not yet complete, and will be done in a future CL. Major differences from VFS1 gofer client (sentry/fs/gofer), with varying levels of backportability: - "Cache policies" are replaced by InteropMode, which control the behavior of timestamps in addition to caching. Under InteropModeExclusive (analogous to cacheAll) and InteropModeWritethrough (analogous to cacheAllWritethrough), client timestamps are *not* written back to the server (it is not possible in 9P or Linux for clients to set ctime, so writing back client-authoritative timestamps results in incoherence between atime/mtime and ctime). Under InteropModeShared (analogous to cacheRemoteRevalidating), client timestamps are not used at all (remote filesystem clocks are authoritative). cacheNone is translated to InteropModeShared + new option filesystemOptions.specialRegularFiles. - Under InteropModeShared, "unstable attribute" reloading for permission checks, lookup, and revalidation are fused, which is feasible in VFS2 since gofer.filesystem controls path resolution. This results in a ~33% reduction in RPCs for filesystem operations compared to cacheRemoteRevalidating. For example, consider stat("/foo/bar/baz") where "/foo/bar/baz" fails revalidation, resulting in the instantiation of a new dentry: VFS1 RPCs: getattr("/") // fs.MountNamespace.FindLink() => fs.Inode.CheckPermission() => gofer.inodeOperations.check() => gofer.inodeOperations.UnstableAttr() walkgetattr("/", "foo") = fid1 // fs.Dirent.walk() => gofer.session.Revalidate() => gofer.cachePolicy.Revalidate() clunk(fid1) getattr("/foo") // CheckPermission walkgetattr("/foo", "bar") = fid2 // Revalidate clunk(fid2) getattr("/foo/bar") // CheckPermission walkgetattr("/foo/bar", "baz") = fid3 // Revalidate clunk(fid3) walkgetattr("/foo/bar", "baz") = fid4 // fs.Dirent.walk() => gofer.inodeOperations.Lookup getattr("/foo/bar/baz") // linux.stat() => gofer.inodeOperations.UnstableAttr() VFS2 RPCs: getattr("/") // gofer.filesystem.walkExistingLocked() walkgetattr("/", "foo") = fid1 // gofer.filesystem.stepExistingLocked() clunk(fid1) // No getattr: walkgetattr already updated metadata for permission check walkgetattr("/foo", "bar") = fid2 clunk(fid2) walkgetattr("/foo/bar", "baz") = fid3 // No clunk: fid3 used for new gofer.dentry // No getattr: walkgetattr already updated metadata for stat() - gofer.filesystem.unlinkAt() does not require instantiation of a dentry that represents the file to be deleted. Updates #898. - gofer.regularFileFD.OnClose() skips Tflushf for regular files under InteropModeExclusive, as it's nonsensical to request a remote file flush without flushing locally-buffered writes to that remote file first. - Symlink targets are cached when InteropModeShared is not in effect. - p9.QID.Path (which is already required to be unique for each file within a server, and is accordingly already synthesized from device/inode numbers in all known gofers) is used as-is for inode numbers, rather than being mapped along with attr.RDev in the client to yet another synthetic inode number. - Relevant parts of fsutil.CachingInodeOperations are inlined directly into gofer package code. This avoids having to duplicate part of its functionality in fsutil.HostMappable. PiperOrigin-RevId: 293190213 --- pkg/safemem/seq_unsafe.go | 17 + pkg/sentry/fs/fsutil/BUILD | 4 +- pkg/sentry/fs/fsutil/frame_ref_set.go | 13 +- pkg/sentry/fs/fsutil/inode_cached.go | 2 +- pkg/sentry/fsimpl/gofer/BUILD | 55 ++ pkg/sentry/fsimpl/gofer/directory.go | 190 +++++ pkg/sentry/fsimpl/gofer/filesystem.go | 1087 ++++++++++++++++++++++++++++ pkg/sentry/fsimpl/gofer/gofer.go | 1147 ++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/gofer/handle.go | 135 ++++ pkg/sentry/fsimpl/gofer/handle_unsafe.go | 66 ++ pkg/sentry/fsimpl/gofer/p9file.go | 219 ++++++ pkg/sentry/fsimpl/gofer/pagemath.go | 31 + pkg/sentry/fsimpl/gofer/regular_file.go | 860 ++++++++++++++++++++++ pkg/sentry/fsimpl/gofer/special_file.go | 159 +++++ pkg/sentry/fsimpl/gofer/symlink.go | 47 ++ pkg/sentry/fsimpl/gofer/time.go | 75 ++ pkg/sentry/fsimpl/tmpfs/filesystem.go | 2 +- pkg/sentry/socket/hostinet/socket.go | 23 +- 18 files changed, 4103 insertions(+), 29 deletions(-) create mode 100644 pkg/sentry/fsimpl/gofer/BUILD create mode 100644 pkg/sentry/fsimpl/gofer/directory.go create mode 100644 pkg/sentry/fsimpl/gofer/filesystem.go create mode 100644 pkg/sentry/fsimpl/gofer/gofer.go create mode 100644 pkg/sentry/fsimpl/gofer/handle.go create mode 100644 pkg/sentry/fsimpl/gofer/handle_unsafe.go create mode 100644 pkg/sentry/fsimpl/gofer/p9file.go create mode 100644 pkg/sentry/fsimpl/gofer/pagemath.go create mode 100644 pkg/sentry/fsimpl/gofer/regular_file.go create mode 100644 pkg/sentry/fsimpl/gofer/special_file.go create mode 100644 pkg/sentry/fsimpl/gofer/symlink.go create mode 100644 pkg/sentry/fsimpl/gofer/time.go (limited to 'pkg/sentry/fs') diff --git a/pkg/safemem/seq_unsafe.go b/pkg/safemem/seq_unsafe.go index 354a95dde..dcdfc9600 100644 --- a/pkg/safemem/seq_unsafe.go +++ b/pkg/safemem/seq_unsafe.go @@ -18,6 +18,7 @@ import ( "bytes" "fmt" "reflect" + "syscall" "unsafe" ) @@ -297,3 +298,19 @@ func ZeroSeq(dsts BlockSeq) (uint64, error) { } return done, nil } + +// IovecsFromBlockSeq returns a []syscall.Iovec representing seq. +func IovecsFromBlockSeq(bs BlockSeq) []syscall.Iovec { + iovs := make([]syscall.Iovec, 0, bs.NumBlocks()) + for ; !bs.IsEmpty(); bs = bs.Tail() { + b := bs.Head() + iovs = append(iovs, syscall.Iovec{ + Base: &b.ToSlice()[0], + Len: uint64(b.Len()), + }) + // We don't need to care about b.NeedSafecopy(), because the host + // kernel will handle such address ranges just fine (by returning + // EFAULT). + } + return iovs +} diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 4ab2a384f..789369220 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -28,13 +28,13 @@ go_template_instance( "platform": "gvisor.dev/gvisor/pkg/sentry/platform", }, package = "fsutil", - prefix = "frameRef", + prefix = "FrameRef", template = "//pkg/segment:generic_set", types = { "Key": "uint64", "Range": "platform.FileRange", "Value": "uint64", - "Functions": "frameRefSetFunctions", + "Functions": "FrameRefSetFunctions", }, ) diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go index dd63db32b..6564fd0c6 100644 --- a/pkg/sentry/fs/fsutil/frame_ref_set.go +++ b/pkg/sentry/fs/fsutil/frame_ref_set.go @@ -20,24 +20,25 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform" ) -type frameRefSetFunctions struct{} +// FrameRefSetFunctions implements segment.Functions for FrameRefSet. +type FrameRefSetFunctions struct{} // MinKey implements segment.Functions.MinKey. -func (frameRefSetFunctions) MinKey() uint64 { +func (FrameRefSetFunctions) MinKey() uint64 { return 0 } // MaxKey implements segment.Functions.MaxKey. -func (frameRefSetFunctions) MaxKey() uint64 { +func (FrameRefSetFunctions) MaxKey() uint64 { return math.MaxUint64 } // ClearValue implements segment.Functions.ClearValue. -func (frameRefSetFunctions) ClearValue(val *uint64) { +func (FrameRefSetFunctions) ClearValue(val *uint64) { } // Merge implements segment.Functions.Merge. -func (frameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) { +func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) { if val1 != val2 { return 0, false } @@ -45,6 +46,6 @@ func (frameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform. } // Split implements segment.Functions.Split. -func (frameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) { +func (FrameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) { return val, val } diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 573b8586e..800c8b4e1 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -111,7 +111,7 @@ type CachingInodeOperations struct { // refs tracks active references to data in the cache. // // refs is protected by dataMu. - refs frameRefSet + refs FrameRefSet } // CachingInodeOperationsOptions configures a CachingInodeOperations. diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD new file mode 100644 index 000000000..4ba76a1e8 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -0,0 +1,55 @@ +load("//tools:defs.bzl", "go_library") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +licenses(["notice"]) + +go_template_instance( + name = "dentry_list", + out = "dentry_list.go", + package = "gofer", + prefix = "dentry", + template = "//pkg/ilist:generic_list", + types = { + "Element": "*dentry", + "Linker": "*dentry", + }, +) + +go_library( + name = "gofer", + srcs = [ + "dentry_list.go", + "directory.go", + "filesystem.go", + "gofer.go", + "handle.go", + "handle_unsafe.go", + "p9file.go", + "pagemath.go", + "regular_file.go", + "special_file.go", + "symlink.go", + "time.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/fd", + "//pkg/fspath", + "//pkg/log", + "//pkg/p9", + "//pkg/safemem", + "//pkg/sentry/fs/fsutil", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/time", + "//pkg/sentry/memmap", + "//pkg/sentry/pgalloc", + "//pkg/sentry/platform", + "//pkg/sentry/usage", + "//pkg/sentry/vfs", + "//pkg/syserror", + "//pkg/unet", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go new file mode 100644 index 000000000..baa2cdd8e --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -0,0 +1,190 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "sync" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +func (d *dentry) isDir() bool { + return d.fileType() == linux.S_IFDIR +} + +// Preconditions: d.dirMu must be locked. d.isDir(). fs.opts.interop != +// InteropModeShared. +func (d *dentry) cacheNegativeChildLocked(name string) { + if d.negativeChildren == nil { + d.negativeChildren = make(map[string]struct{}) + } + d.negativeChildren[name] = struct{}{} +} + +type directoryFD struct { + fileDescription + vfs.DirectoryFileDescriptionDefaultImpl + + mu sync.Mutex + off int64 + dirents []vfs.Dirent +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *directoryFD) Release() { +} + +// IterDirents implements vfs.FileDescriptionImpl.IterDirents. +func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { + fd.mu.Lock() + defer fd.mu.Unlock() + + if fd.dirents == nil { + ds, err := fd.dentry().getDirents(ctx) + if err != nil { + return err + } + fd.dirents = ds + } + + for fd.off < int64(len(fd.dirents)) { + if !cb.Handle(fd.dirents[fd.off]) { + return nil + } + fd.off++ + } + return nil +} + +// Preconditions: d.isDir(). There exists at least one directoryFD representing d. +func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) { + // 9P2000.L's readdir does not specify behavior in the presence of + // concurrent mutation of an iterated directory, so implementations may + // duplicate or omit entries in this case, which violates POSIX semantics. + // Thus we read all directory entries while holding d.dirMu to exclude + // directory mutations. (Note that it is impossible for the client to + // exclude concurrent mutation from other remote filesystem users. Since + // there is no way to detect if the server has incorrectly omitted + // directory entries, we simply assume that the server is well-behaved + // under InteropModeShared.) This is inconsistent with Linux (which appears + // to assume that directory fids have the correct semantics, and translates + // struct file_operations::readdir calls directly to readdir RPCs), but is + // consistent with VFS1. + + d.fs.renameMu.RLock() + defer d.fs.renameMu.RUnlock() + d.dirMu.Lock() + defer d.dirMu.Unlock() + if d.dirents != nil { + return d.dirents, nil + } + + // It's not clear if 9P2000.L's readdir is expected to return "." and "..", + // so we generate them here. + parent := d.vfsd.ParentOrSelf().Impl().(*dentry) + dirents := []vfs.Dirent{ + { + Name: ".", + Type: linux.DT_DIR, + Ino: d.ino, + NextOff: 1, + }, + { + Name: "..", + Type: uint8(atomic.LoadUint32(&parent.mode) >> 12), + Ino: parent.ino, + NextOff: 2, + }, + } + off := uint64(0) + const count = 64 * 1024 // for consistency with the vfs1 client + d.handleMu.RLock() + defer d.handleMu.RUnlock() + if !d.handleReadable { + // This should not be possible because a readable handle should have + // been opened when the calling directoryFD was opened. + panic("gofer.dentry.getDirents called without a readable handle") + } + for { + p9ds, err := d.handle.file.readdir(ctx, off, count) + if err != nil { + return nil, err + } + if len(p9ds) == 0 { + // Cache dirents for future directoryFDs if permitted. + if d.fs.opts.interop != InteropModeShared { + d.dirents = dirents + } + return dirents, nil + } + for _, p9d := range p9ds { + if p9d.Name == "." || p9d.Name == ".." { + continue + } + dirent := vfs.Dirent{ + Name: p9d.Name, + Ino: p9d.QID.Path, + NextOff: int64(len(dirents) + 1), + } + // p9 does not expose 9P2000.U's DMDEVICE, DMNAMEDPIPE, or + // DMSOCKET. + switch p9d.Type { + case p9.TypeSymlink: + dirent.Type = linux.DT_LNK + case p9.TypeDir: + dirent.Type = linux.DT_DIR + default: + dirent.Type = linux.DT_REG + } + dirents = append(dirents, dirent) + } + off = p9ds[len(p9ds)-1].Offset + } +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + fd.mu.Lock() + defer fd.mu.Unlock() + + switch whence { + case linux.SEEK_SET: + if offset < 0 { + return 0, syserror.EINVAL + } + if offset == 0 { + // Ensure that the next call to fd.IterDirents() calls + // fd.dentry().getDirents(). + fd.dirents = nil + } + fd.off = offset + return fd.off, nil + case linux.SEEK_CUR: + offset += fd.off + if offset < 0 { + return 0, syserror.EINVAL + } + // Don't clear fd.dirents in this case, even if offset == 0. + fd.off = offset + return fd.off, nil + default: + return 0, syserror.EINVAL + } +} diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go new file mode 100644 index 000000000..8eb61debf --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -0,0 +1,1087 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "sync" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Sync implements vfs.FilesystemImpl.Sync. +func (fs *filesystem) Sync(ctx context.Context) error { + // Snapshot current dentries and special files. + fs.syncMu.Lock() + ds := make([]*dentry, 0, len(fs.dentries)) + for d := range fs.dentries { + ds = append(ds, d) + } + sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs)) + for sffd := range fs.specialFileFDs { + sffds = append(sffds, sffd) + } + fs.syncMu.Unlock() + + // Return the first error we encounter, but sync everything we can + // regardless. + var retErr error + + // Sync regular files. + for _, d := range ds { + if !d.TryIncRef() { + continue + } + err := d.syncSharedHandle(ctx) + d.DecRef() + if err != nil && retErr == nil { + retErr = err + } + } + + // Sync special files, which may be writable but do not use dentry shared + // handles (so they won't be synced by the above). + for _, sffd := range sffds { + if !sffd.vfsfd.TryIncRef() { + continue + } + err := sffd.Sync(ctx) + sffd.vfsfd.DecRef() + if err != nil && retErr == nil { + retErr = err + } + } + + return retErr +} + +// maxFilenameLen is the maximum length of a filename. This is dictated by 9P's +// encoding of strings, which uses 2 bytes for the length prefix. +const maxFilenameLen = (1 << 16) - 1 + +// dentrySlicePool is a pool of *[]*dentry used to store dentries for which +// dentry.checkCachingLocked() must be called. The pool holds pointers to +// slices because Go lacks generics, so sync.Pool operates on interface{}, so +// every call to (what should be) sync.Pool<[]*dentry>.Put() allocates a copy +// of the slice header on the heap. +var dentrySlicePool = sync.Pool{ + New: func() interface{} { + ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity + return &ds + }, +} + +func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry { + if ds == nil { + ds = dentrySlicePool.Get().(*[]*dentry) + } + *ds = append(*ds, d) + return ds +} + +// Preconditions: ds != nil. +func putDentrySlice(ds *[]*dentry) { + // Allow dentries to be GC'd. + for i := range *ds { + (*ds)[i] = nil + } + *ds = (*ds)[:0] + dentrySlicePool.Put(ds) +} + +// stepLocked resolves rp.Component() to an existing file, starting from the +// given directory. +// +// Dentries which may become cached as a result of the traversal are appended +// to *ds. +// +// Preconditions: fs.renameMu must be locked. d.dirMu must be locked. +// !rp.Done(). If fs.opts.interop == InteropModeShared, then d's cached +// metadata must be up to date. +func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) { + if !d.isDir() { + return nil, syserror.ENOTDIR + } + if err := d.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err + } +afterSymlink: + name := rp.Component() + if name == "." { + rp.Advance() + return d, nil + } + if name == ".." { + parentVFSD, err := rp.ResolveParent(&d.vfsd) + if err != nil { + return nil, err + } + parent := parentVFSD.Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // We must assume that parentVFSD is correct, because if d has been + // moved elsewhere in the remote filesystem so that its parent has + // changed, we have no way of determining its new parent's location + // in the filesystem. Get updated metadata for parentVFSD. + _, attrMask, attr, err := parent.file.getAttr(ctx, dentryAttrMask()) + if err != nil { + return nil, err + } + parent.updateFromP9Attrs(attrMask, &attr) + } + rp.Advance() + return parent, nil + } + childVFSD, err := rp.ResolveChild(&d.vfsd, name) + if err != nil { + return nil, err + } + // FIXME(jamieliu): Linux performs revalidation before mount lookup + // (fs/namei.c:lookup_fast() => __d_lookup_rcu(), d_revalidate(), + // __follow_mount_rcu()). + child, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, childVFSD, ds) + if err != nil { + return nil, err + } + if child == nil { + return nil, syserror.ENOENT + } + if child.isSymlink() && rp.ShouldFollowSymlink() { + target, err := child.readlink(ctx, rp.Mount()) + if err != nil { + return nil, err + } + if err := rp.HandleSymlink(target); err != nil { + return nil, err + } + goto afterSymlink // don't check the current directory again + } + rp.Advance() + return child, nil +} + +// revalidateChildLocked must be called after a call to parent.vfsd.Child(name) +// or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be +// nil) to verify that the returned child (or lack thereof) is correct. If no file +// exists at name, revalidateChildLocked returns (nil, nil). +// +// Preconditions: fs.renameMu must be locked. parent.dirMu must be locked. +// parent.isDir(). name is not "." or "..". +// +// Postconditions: If revalidateChildLocked returns a non-nil dentry, its +// cached metadata is up to date. +func (fs *filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *dentry, name string, childVFSD *vfs.Dentry, ds **[]*dentry) (*dentry, error) { + if childVFSD != nil && fs.opts.interop != InteropModeShared { + // We have a cached dentry that is assumed to be correct. + return childVFSD.Impl().(*dentry), nil + } + // We either don't have a cached dentry or need to verify that it's still + // correct, either of which requires a remote lookup. Check if this name is + // valid before performing the lookup. + if len(name) > maxFilenameLen { + return nil, syserror.ENAMETOOLONG + } + // Check if we've already cached this lookup with a negative result. + if _, ok := parent.negativeChildren[name]; ok { + return nil, nil + } + // Perform the remote lookup. + qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name) + if err != nil && err != syserror.ENOENT { + return nil, err + } + if childVFSD != nil { + child := childVFSD.Impl().(*dentry) + if !file.isNil() && qid.Path == child.ino { + // The file at this path hasn't changed. Just update cached + // metadata. + file.close(ctx) + child.updateFromP9Attrs(attrMask, &attr) + return child, nil + } + // The file at this path has changed or no longer exists. Remove + // the stale dentry from the tree, and re-evaluate its caching + // status (i.e. if it has 0 references, drop it). + vfsObj.ForceDeleteDentry(childVFSD) + *ds = appendDentry(*ds, child) + childVFSD = nil + } + if file.isNil() { + // No file exists at this path now. Cache the negative lookup if + // allowed. + if fs.opts.interop != InteropModeShared { + parent.cacheNegativeChildLocked(name) + } + return nil, nil + } + // Create a new dentry representing the file. + child, err := fs.newDentry(ctx, file, qid, attrMask, &attr) + if err != nil { + file.close(ctx) + return nil, err + } + parent.IncRef() // reference held by child on its parent + parent.vfsd.InsertChild(&child.vfsd, name) + // For now, child has 0 references, so our caller should call + // child.checkCachingLocked(). + *ds = appendDentry(*ds, child) + return child, nil +} + +// walkParentDirLocked resolves all but the last path component of rp to an +// existing directory, starting from the given directory (which is usually +// rp.Start().Impl().(*dentry)). It does not check that the returned directory +// is searchable by the provider of rp. +// +// Preconditions: fs.renameMu must be locked. !rp.Done(). If fs.opts.interop == +// InteropModeShared, then d's cached metadata must be up to date. +func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) { + for !rp.Final() { + d.dirMu.Lock() + next, err := fs.stepLocked(ctx, rp, d, ds) + d.dirMu.Unlock() + if err != nil { + return nil, err + } + d = next + } + if !d.isDir() { + return nil, syserror.ENOTDIR + } + return d, nil +} + +// resolveLocked resolves rp to an existing file. +// +// Preconditions: fs.renameMu must be locked. +func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) { + d := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for rp.Start() as required by fs.stepLocked(). + if err := d.updateFromGetattr(ctx); err != nil { + return nil, err + } + } + for !rp.Done() { + d.dirMu.Lock() + next, err := fs.stepLocked(ctx, rp, d, ds) + d.dirMu.Unlock() + if err != nil { + return nil, err + } + d = next + } + if rp.MustBeDir() && !d.isDir() { + return nil, syserror.ENOTDIR + } + return d, nil +} + +// doCreateAt checks that creating a file at rp is permitted, then invokes +// create to do so. +// +// Preconditions: !rp.Done(). For the final path component in rp, +// !rp.ShouldFollowSymlink(). +func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + start := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for start as required by + // fs.walkParentDirLocked(). + if err := start.updateFromGetattr(ctx); err != nil { + return err + } + } + parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) + if err != nil { + return err + } + if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { + return err + } + if parent.isDeleted() { + return syserror.ENOENT + } + name := rp.Component() + if name == "." || name == ".." { + return syserror.EEXIST + } + if len(name) > maxFilenameLen { + return syserror.ENAMETOOLONG + } + if !dir && rp.MustBeDir() { + return syserror.ENOENT + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + parent.dirMu.Lock() + defer parent.dirMu.Unlock() + if fs.opts.interop == InteropModeShared { + // The existence of a dentry at name would be inconclusive because the + // file it represents may have been deleted from the remote filesystem, + // so we would need to make an RPC to revalidate the dentry. Just + // attempt the file creation RPC instead. If a file does exist, the RPC + // will fail with EEXIST like we would have. If the RPC succeeds, and a + // stale dentry exists, the dentry will fail revalidation next time + // it's used. + return create(parent, name) + } + if parent.vfsd.Child(name) != nil { + return syserror.EEXIST + } + // No cached dentry exists; however, there might still be an existing file + // at name. As above, we attempt the file creation RPC anyway. + if err := create(parent, name); err != nil { + return err + } + parent.touchCMtime(ctx) + delete(parent.negativeChildren, name) + parent.dirents = nil + return nil +} + +// Preconditions: !rp.Done(). +func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + start := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for start as required by + // fs.walkParentDirLocked(). + if err := start.updateFromGetattr(ctx); err != nil { + return err + } + } + parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) + if err != nil { + return err + } + if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { + return err + } + if err := rp.Mount().CheckBeginWrite(); err != nil { + return err + } + defer rp.Mount().EndWrite() + + name := rp.Component() + if dir { + if name == "." { + return syserror.EINVAL + } + if name == ".." { + return syserror.ENOTEMPTY + } + } else { + if name == "." || name == ".." { + return syserror.EISDIR + } + } + vfsObj := rp.VirtualFilesystem() + mntns := vfs.MountNamespaceFromContext(ctx) + parent.dirMu.Lock() + defer parent.dirMu.Unlock() + childVFSD := parent.vfsd.Child(name) + var child *dentry + // We only need a dentry representing the file at name if it can be a mount + // point. If childVFSD is nil, then it can't be a mount point. If childVFSD + // is non-nil but stale, the actual file can't be a mount point either; we + // detect this case by just speculatively calling PrepareDeleteDentry and + // only revalidating the dentry if that fails (indicating that the existing + // dentry is a mount point). + if childVFSD != nil { + child = childVFSD.Impl().(*dentry) + if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil { + child, err = fs.revalidateChildLocked(ctx, vfsObj, parent, name, childVFSD, &ds) + if err != nil { + return err + } + if child != nil { + childVFSD = &child.vfsd + if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil { + return err + } + } else { + childVFSD = nil + } + } + } else if _, ok := parent.negativeChildren[name]; ok { + return syserror.ENOENT + } + flags := uint32(0) + if dir { + if child != nil && !child.isDir() { + return syserror.ENOTDIR + } + flags = linux.AT_REMOVEDIR + } else { + if child != nil && child.isDir() { + return syserror.EISDIR + } + if rp.MustBeDir() { + return syserror.ENOTDIR + } + } + err = parent.file.unlinkAt(ctx, name, flags) + if err != nil { + if childVFSD != nil { + vfsObj.AbortDeleteDentry(childVFSD) + } + return err + } + if fs.opts.interop != InteropModeShared { + parent.touchCMtime(ctx) + parent.cacheNegativeChildLocked(name) + parent.dirents = nil + } + if child != nil { + child.setDeleted() + vfsObj.CommitDeleteDentry(childVFSD) + ds = appendDentry(ds, child) + } + return nil +} + +// renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls +// dentry.checkCachingLocked on all dentries in *ds with fs.renameMu locked for +// writing. +// +// ds is a pointer-to-pointer since defer evaluates its arguments immediately, +// but dentry slices are allocated lazily, and it's much easier to say "defer +// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() { +// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this. +func (fs *filesystem) renameMuRUnlockAndCheckCaching(ds **[]*dentry) { + fs.renameMu.RUnlock() + if *ds == nil { + return + } + if len(**ds) != 0 { + fs.renameMu.Lock() + for _, d := range **ds { + d.checkCachingLocked() + } + fs.renameMu.Unlock() + } + putDentrySlice(*ds) +} + +func (fs *filesystem) renameMuUnlockAndCheckCaching(ds **[]*dentry) { + if *ds == nil { + fs.renameMu.Unlock() + return + } + for _, d := range **ds { + d.checkCachingLocked() + } + fs.renameMu.Unlock() + putDentrySlice(*ds) +} + +// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. +func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return nil, err + } + if opts.CheckSearchable { + if !d.isDir() { + return nil, syserror.ENOTDIR + } + if err := d.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err + } + } + d.IncRef() + return &d.vfsd, nil +} + +// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. +func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + start := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for start as required by + // fs.walkParentDirLocked(). + if err := start.updateFromGetattr(ctx); err != nil { + return nil, err + } + } + d, err := fs.walkParentDirLocked(ctx, rp, start, &ds) + if err != nil { + return nil, err + } + d.IncRef() + return &d.vfsd, nil +} + +// LinkAt implements vfs.FilesystemImpl.LinkAt. +func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { + return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string) error { + if rp.Mount() != vd.Mount() { + return syserror.EXDEV + } + // 9P2000.L supports hard links, but we don't. + return syserror.EPERM + }) +} + +// MkdirAt implements vfs.FilesystemImpl.MkdirAt. +func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { + return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string) error { + creds := rp.Credentials() + _, err := parent.file.mkdir(ctx, name, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + return err + }) +} + +// MknodAt implements vfs.FilesystemImpl.MknodAt. +func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { + return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error { + creds := rp.Credentials() + _, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + return err + }) +} + +// OpenAt implements vfs.FilesystemImpl.OpenAt. +func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + // Reject O_TMPFILE, which is not supported; supporting it correctly in the + // presence of other remote filesystem users requires remote filesystem + // support, and it isn't clear that there's any way to implement this in + // 9P. + if opts.Flags&linux.O_TMPFILE != 0 { + return nil, syserror.EOPNOTSUPP + } + mayCreate := opts.Flags&linux.O_CREAT != 0 + mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL) + + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + + start := rp.Start().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + // Get updated metadata for start as required by fs.stepLocked(). + if err := start.updateFromGetattr(ctx); err != nil { + return nil, err + } + } + if rp.Done() { + return start.openLocked(ctx, rp, opts.Flags) + } + +afterTrailingSymlink: + parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) + if err != nil { + return nil, err + } + // Check for search permission in the parent directory. + if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err + } + // Determine whether or not we need to create a file. + parent.dirMu.Lock() + child, err := fs.stepLocked(ctx, rp, parent, &ds) + if err == syserror.ENOENT && mayCreate { + fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts) + parent.dirMu.Unlock() + return fd, err + } + if err != nil { + parent.dirMu.Unlock() + return nil, err + } + // Open existing child or follow symlink. + parent.dirMu.Unlock() + if mustCreate { + return nil, syserror.EEXIST + } + if child.isSymlink() && rp.ShouldFollowSymlink() { + target, err := child.readlink(ctx, rp.Mount()) + if err != nil { + return nil, err + } + if err := rp.HandleSymlink(target); err != nil { + return nil, err + } + start = parent + goto afterTrailingSymlink + } + return child.openLocked(ctx, rp, opts.Flags) +} + +// Preconditions: fs.renameMu must be locked. +func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, flags uint32) (*vfs.FileDescription, error) { + ats := vfs.AccessTypesForOpenFlags(flags) + if err := d.checkPermissions(rp.Credentials(), ats, d.isDir()); err != nil { + return nil, err + } + mnt := rp.Mount() + filetype := d.fileType() + switch { + case filetype == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD: + if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, flags&linux.O_TRUNC != 0); err != nil { + return nil, err + } + fd := ®ularFileFD{} + if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ + AllowDirectIO: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil + case filetype == linux.S_IFDIR: + // Can't open directories with O_CREAT. + if flags&linux.O_CREAT != 0 { + return nil, syserror.EISDIR + } + // Can't open directories writably. + if ats&vfs.MayWrite != 0 { + return nil, syserror.EISDIR + } + if flags&linux.O_DIRECT != 0 { + return nil, syserror.EINVAL + } + if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil { + return nil, err + } + fd := &directoryFD{} + if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } + return &fd.vfsfd, nil + case filetype == linux.S_IFLNK: + // Can't open symlinks without O_PATH (which is unimplemented). + return nil, syserror.ELOOP + default: + if flags&linux.O_DIRECT != 0 { + return nil, syserror.EINVAL + } + h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, flags&linux.O_TRUNC != 0) + if err != nil { + return nil, err + } + fd := &specialFileFD{ + handle: h, + } + if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + h.close(ctx) + return nil, err + } + return &fd.vfsfd, nil + } +} + +// Preconditions: d.fs.renameMu must be locked. d.dirMu must be locked. +func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { + if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { + return nil, err + } + if d.isDeleted() { + return nil, syserror.ENOENT + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return nil, err + } + defer mnt.EndWrite() + + // 9P2000.L's lcreate takes a fid representing the parent directory, and + // converts it into an open fid representing the created file, so we need + // to duplicate the directory fid first. + _, dirfile, err := d.file.walk(ctx, nil) + if err != nil { + return nil, err + } + creds := rp.Credentials() + name := rp.Component() + fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, (p9.OpenFlags)(opts.Flags), (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + if err != nil { + dirfile.close(ctx) + return nil, err + } + // Then we need to walk to the file we just created to get a non-open fid + // representing it, and to get its metadata. This must use d.file since, as + // explained above, dirfile was invalidated by dirfile.Create(). + walkQID, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name) + if err != nil { + openFile.close(ctx) + if fdobj != nil { + fdobj.Close() + } + return nil, err + } + // Sanity-check that we walked to the file we created. + if createQID.Path != walkQID.Path { + // Probably due to concurrent remote filesystem mutation? + ctx.Warningf("gofer.dentry.createAndOpenChildLocked: created file has QID %v before walk, QID %v after (interop=%v)", createQID, walkQID, d.fs.opts.interop) + nonOpenFile.close(ctx) + openFile.close(ctx) + if fdobj != nil { + fdobj.Close() + } + return nil, syserror.EAGAIN + } + + // Construct the new dentry. + child, err := d.fs.newDentry(ctx, nonOpenFile, createQID, attrMask, &attr) + if err != nil { + nonOpenFile.close(ctx) + openFile.close(ctx) + if fdobj != nil { + fdobj.Close() + } + return nil, err + } + // Incorporate the fid that was opened by lcreate. + useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD + if useRegularFileFD { + child.handleMu.Lock() + child.handle.file = openFile + if fdobj != nil { + child.handle.fd = int32(fdobj.Release()) + } + child.handleReadable = vfs.MayReadFileWithOpenFlags(opts.Flags) + child.handleWritable = vfs.MayWriteFileWithOpenFlags(opts.Flags) + child.handleMu.Unlock() + } + // Take a reference on the new dentry to be held by the new file + // description. (This reference also means that the new dentry is not + // eligible for caching yet, so we don't need to append to a dentry slice.) + child.refs = 1 + // Insert the dentry into the tree. + d.IncRef() // reference held by child on its parent d + d.vfsd.InsertChild(&child.vfsd, name) + if d.fs.opts.interop != InteropModeShared { + d.touchCMtime(ctx) + delete(d.negativeChildren, name) + d.dirents = nil + } + + // Finally, construct a file description representing the created file. + var childVFSFD *vfs.FileDescription + mnt.IncRef() + if useRegularFileFD { + fd := ®ularFileFD{} + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{ + AllowDirectIO: true, + }); err != nil { + return nil, err + } + childVFSFD = &fd.vfsfd + } else { + fd := &specialFileFD{ + handle: handle{ + file: openFile, + fd: -1, + }, + } + if fdobj != nil { + fd.handle.fd = int32(fdobj.Release()) + } + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + fd.handle.close(ctx) + return nil, err + } + childVFSFD = &fd.vfsfd + } + return childVFSFD, nil +} + +// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. +func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return "", err + } + if !d.isSymlink() { + return "", syserror.EINVAL + } + return d.readlink(ctx, rp.Mount()) +} + +// RenameAt implements vfs.FilesystemImpl.RenameAt. +func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { + if opts.Flags != 0 { + // Requires 9P support. + return syserror.EINVAL + } + + var ds *[]*dentry + fs.renameMu.Lock() + defer fs.renameMuUnlockAndCheckCaching(&ds) + newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds) + if err != nil { + return err + } + newName := rp.Component() + if newName == "." || newName == ".." { + return syserror.EBUSY + } + mnt := rp.Mount() + if mnt != oldParentVD.Mount() { + return syserror.EXDEV + } + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + + oldParent := oldParentVD.Dentry().Impl().(*dentry) + if fs.opts.interop == InteropModeShared { + if err := oldParent.updateFromGetattr(ctx); err != nil { + return err + } + } + if err := oldParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { + return err + } + vfsObj := rp.VirtualFilesystem() + // We need a dentry representing the renamed file since, if it's a + // directory, we need to check for write permission on it. + oldParent.dirMu.Lock() + defer oldParent.dirMu.Unlock() + renamed, err := fs.revalidateChildLocked(ctx, vfsObj, oldParent, oldName, oldParent.vfsd.Child(oldName), &ds) + if err != nil { + return err + } + if renamed == nil { + return syserror.ENOENT + } + if renamed.isDir() { + if renamed == newParent || renamed.vfsd.IsAncestorOf(&newParent.vfsd) { + return syserror.EINVAL + } + if oldParent != newParent { + if err := renamed.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { + return err + } + } + } else { + if opts.MustBeDir || rp.MustBeDir() { + return syserror.ENOTDIR + } + } + + if oldParent != newParent { + if err := newParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { + return err + } + newParent.dirMu.Lock() + defer newParent.dirMu.Unlock() + } + if newParent.isDeleted() { + return syserror.ENOENT + } + replacedVFSD := newParent.vfsd.Child(newName) + var replaced *dentry + // This is similar to unlinkAt, except: + // + // - We revalidate the replaced dentry unconditionally for simplicity. + // + // - If rp.MustBeDir(), then we need a dentry representing the replaced + // file regardless to confirm that it's a directory. + if replacedVFSD != nil || rp.MustBeDir() { + replaced, err = fs.revalidateChildLocked(ctx, vfsObj, newParent, newName, replacedVFSD, &ds) + if err != nil { + return err + } + if replaced != nil { + if replaced.isDir() { + if !renamed.isDir() { + return syserror.EISDIR + } + } else { + if rp.MustBeDir() || renamed.isDir() { + return syserror.ENOTDIR + } + } + replacedVFSD = &replaced.vfsd + } else { + replacedVFSD = nil + } + } + + if oldParent == newParent && oldName == newName { + return nil + } + if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), &renamed.vfsd, replacedVFSD); err != nil { + return err + } + if err := renamed.file.rename(ctx, newParent.file, newName); err != nil { + vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) + return err + } + if fs.opts.interop != InteropModeShared { + oldParent.cacheNegativeChildLocked(oldName) + oldParent.dirents = nil + delete(newParent.negativeChildren, newName) + newParent.dirents = nil + } + vfsObj.CommitRenameReplaceDentry(&renamed.vfsd, &newParent.vfsd, newName, replacedVFSD) + return nil +} + +// RmdirAt implements vfs.FilesystemImpl.RmdirAt. +func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { + return fs.unlinkAt(ctx, rp, true /* dir */) +} + +// SetStatAt implements vfs.FilesystemImpl.SetStatAt. +func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return err + } + return d.setStat(ctx, rp.Credentials(), &opts.Stat, rp.Mount()) +} + +// StatAt implements vfs.FilesystemImpl.StatAt. +func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return linux.Statx{}, err + } + // Since walking updates metadata for all traversed dentries under + // InteropModeShared, including the returned one, we can return cached + // metadata here regardless of fs.opts.interop. + var stat linux.Statx + d.statTo(&stat) + return stat, nil +} + +// StatFSAt implements vfs.FilesystemImpl.StatFSAt. +func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return linux.Statfs{}, err + } + fsstat, err := d.file.statFS(ctx) + if err != nil { + return linux.Statfs{}, err + } + nameLen := uint64(fsstat.NameLength) + if nameLen > maxFilenameLen { + nameLen = maxFilenameLen + } + return linux.Statfs{ + // This is primarily for distinguishing a gofer file system in + // tests. Testing is important, so instead of defining + // something completely random, use a standard value. + Type: linux.V9FS_MAGIC, + BlockSize: int64(fsstat.BlockSize), + Blocks: fsstat.Blocks, + BlocksFree: fsstat.BlocksFree, + BlocksAvailable: fsstat.BlocksAvailable, + Files: fsstat.Files, + FilesFree: fsstat.FilesFree, + NameLength: nameLen, + }, nil +} + +// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. +func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { + return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error { + creds := rp.Credentials() + _, err := parent.file.symlink(ctx, target, name, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + return err + }) +} + +// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. +func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { + return fs.unlinkAt(ctx, rp, false /* dir */) +} + +// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. +func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return nil, err + } + return d.listxattr(ctx) +} + +// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. +func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return "", err + } + return d.getxattr(ctx, name) +} + +// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. +func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return err + } + return d.setxattr(ctx, &opts) +} + +// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. +func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { + var ds *[]*dentry + fs.renameMu.RLock() + defer fs.renameMuRUnlockAndCheckCaching(&ds) + d, err := fs.resolveLocked(ctx, rp, &ds) + if err != nil { + return err + } + return d.removexattr(ctx, name) +} + +// PrependPath implements vfs.FilesystemImpl.PrependPath. +func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { + fs.renameMu.RLock() + defer fs.renameMu.RUnlock() + return vfs.GenericPrependPath(vfsroot, vd, b) +} diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go new file mode 100644 index 000000000..d0552bd99 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -0,0 +1,1147 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package gofer provides a filesystem implementation that is backed by a 9p +// server, interchangably referred to as "gofers" throughout this package. +// +// Lock order: +// regularFileFD/directoryFD.mu +// filesystem.renameMu +// dentry.dirMu +// filesystem.syncMu +// dentry.metadataMu +// *** "memmap.Mappable locks" below this point +// dentry.mapsMu +// *** "memmap.Mappable locks taken by Translate" below this point +// dentry.handleMu +// dentry.dataMu +// +// Locking dentry.dirMu in multiple dentries requires holding +// filesystem.renameMu for writing. +package gofer + +import ( + "fmt" + "strconv" + "sync" + "sync/atomic" + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/usermem" +) + +// FilesystemType implements vfs.FilesystemType. +type FilesystemType struct{} + +// filesystem implements vfs.FilesystemImpl. +type filesystem struct { + vfsfs vfs.Filesystem + + // mfp is used to allocate memory that caches regular file contents. mfp is + // immutable. + mfp pgalloc.MemoryFileProvider + + // Immutable options. + opts filesystemOptions + + // client is the client used by this filesystem. client is immutable. + client *p9.Client + + // uid and gid are the effective KUID and KGID of the filesystem's creator, + // and are used as the owner and group for files that don't specify one. + // uid and gid are immutable. + uid auth.KUID + gid auth.KGID + + // renameMu serves two purposes: + // + // - It synchronizes path resolution with renaming initiated by this + // client. + // + // - It is held by path resolution to ensure that reachable dentries remain + // valid. A dentry is reachable by path resolution if it has a non-zero + // reference count (such that it is usable as vfs.ResolvingPath.Start() or + // is reachable from its children), or if it is a child dentry (such that + // it is reachable from its parent). + renameMu sync.RWMutex + + // cachedDentries contains all dentries with 0 references. (Due to race + // conditions, it may also contain dentries with non-zero references.) + // cachedDentriesLen is the number of dentries in cachedDentries. These + // fields are protected by renameMu. + cachedDentries dentryList + cachedDentriesLen uint64 + + // dentries contains all dentries in this filesystem. specialFileFDs + // contains all open specialFileFDs. These fields are protected by syncMu. + syncMu sync.Mutex + dentries map[*dentry]struct{} + specialFileFDs map[*specialFileFD]struct{} +} + +type filesystemOptions struct { + // "Standard" 9P options. + fd int + aname string + interop InteropMode // derived from the "cache" mount option + msize uint32 + version string + + // maxCachedDentries is the maximum number of dentries with 0 references + // retained by the client. + maxCachedDentries uint64 + + // If forcePageCache is true, host FDs may not be used for application + // memory mappings even if available; instead, the client must perform its + // own caching of regular file pages. This is primarily useful for testing. + forcePageCache bool + + // If limitHostFDTranslation is true, apply maxFillRange() constraints to + // host FD mappings returned by dentry.(memmap.Mappable).Translate(). This + // makes memory accounting behavior more consistent between cases where + // host FDs are / are not available, but may increase the frequency of + // sentry-handled page faults on files for which a host FD is available. + limitHostFDTranslation bool + + // If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote + // filesystem may not be coherent with writable host FDs opened later, so + // mappings of the former must be replaced by mappings of the latter. This + // is usually only the case when the remote filesystem is an overlayfs + // mount on Linux < 4.19. + overlayfsStaleRead bool + + // If regularFilesUseSpecialFileFD is true, application FDs representing + // regular files will use distinct file handles for each FD, in the same + // way that application FDs representing "special files" such as sockets + // do. Note that this disables client caching and mmap for regular files. + regularFilesUseSpecialFileFD bool +} + +// InteropMode controls the client's interaction with other remote filesystem +// users. +type InteropMode uint32 + +const ( + // InteropModeExclusive is appropriate when the filesystem client is the + // only user of the remote filesystem. + // + // - The client may cache arbitrary filesystem state (file data, metadata, + // filesystem structure, etc.). + // + // - Client changes to filesystem state may be sent to the remote + // filesystem asynchronously, except when server permission checks are + // necessary. + // + // - File timestamps are based on client clocks. This ensures that users of + // the client observe timestamps that are coherent with their own clocks + // and consistent with Linux's semantics. However, since it is not always + // possible for clients to set arbitrary atimes and mtimes, and never + // possible for clients to set arbitrary ctimes, file timestamp changes are + // stored in the client only and never sent to the remote filesystem. + InteropModeExclusive InteropMode = iota + + // InteropModeWritethrough is appropriate when there are read-only users of + // the remote filesystem that expect to observe changes made by the + // filesystem client. + // + // - The client may cache arbitrary filesystem state. + // + // - Client changes to filesystem state must be sent to the remote + // filesystem synchronously. + // + // - File timestamps are based on client clocks. As a corollary, access + // timestamp changes from other remote filesystem users will not be visible + // to the client. + InteropModeWritethrough + + // InteropModeShared is appropriate when there are users of the remote + // filesystem that may mutate its state other than the client. + // + // - The client must verify cached filesystem state before using it. + // + // - Client changes to filesystem state must be sent to the remote + // filesystem synchronously. + // + // - File timestamps are based on server clocks. This is necessary to + // ensure that timestamp changes are synchronized between remote filesystem + // users. + // + // Note that the correctness of InteropModeShared depends on the server + // correctly implementing 9P fids (i.e. each fid immutably represents a + // single filesystem object), even in the presence of remote filesystem + // mutations from other users. If this is violated, the behavior of the + // client is undefined. + InteropModeShared +) + +// GetFilesystem implements vfs.FilesystemType.GetFilesystem. +func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + mfp := pgalloc.MemoryFileProviderFromContext(ctx) + if mfp == nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: context does not provide a pgalloc.MemoryFileProvider") + return nil, nil, syserror.EINVAL + } + + mopts := vfs.GenericParseMountOptions(opts.Data) + var fsopts filesystemOptions + + // Check that the transport is "fd". + trans, ok := mopts["trans"] + if !ok { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: transport must be specified as 'trans=fd'") + return nil, nil, syserror.EINVAL + } + delete(mopts, "trans") + if trans != "fd" { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: unsupported transport: trans=%s", trans) + return nil, nil, syserror.EINVAL + } + + // Check that read and write FDs are provided and identical. + rfdstr, ok := mopts["rfdno"] + if !ok { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: read FD must be specified as 'rfdno=") + return nil, nil, syserror.EINVAL + } + delete(mopts, "rfdno") + rfd, err := strconv.Atoi(rfdstr) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid read FD: rfdno=%s", rfdstr) + return nil, nil, syserror.EINVAL + } + wfdstr, ok := mopts["wfdno"] + if !ok { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: write FD must be specified as 'wfdno=") + return nil, nil, syserror.EINVAL + } + delete(mopts, "wfdno") + wfd, err := strconv.Atoi(wfdstr) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid write FD: wfdno=%s", wfdstr) + return nil, nil, syserror.EINVAL + } + if rfd != wfd { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: read FD (%d) and write FD (%d) must be equal", rfd, wfd) + return nil, nil, syserror.EINVAL + } + fsopts.fd = rfd + + // Get the attach name. + fsopts.aname = "/" + if aname, ok := mopts["aname"]; ok { + delete(mopts, "aname") + fsopts.aname = aname + } + + // Parse the cache policy. For historical reasons, this defaults to the + // least generally-applicable option, InteropModeExclusive. + fsopts.interop = InteropModeExclusive + if cache, ok := mopts["cache"]; ok { + delete(mopts, "cache") + switch cache { + case "fscache": + fsopts.interop = InteropModeExclusive + case "fscache_writethrough": + fsopts.interop = InteropModeWritethrough + case "none": + fsopts.regularFilesUseSpecialFileFD = true + fallthrough + case "remote_revalidating": + fsopts.interop = InteropModeShared + default: + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: cache=%s", cache) + return nil, nil, syserror.EINVAL + } + } + + // Parse the 9P message size. + fsopts.msize = 1024 * 1024 // 1M, tested to give good enough performance up to 64M + if msizestr, ok := mopts["msize"]; ok { + delete(mopts, "msize") + msize, err := strconv.ParseUint(msizestr, 10, 32) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: msize=%s", msizestr) + return nil, nil, syserror.EINVAL + } + fsopts.msize = uint32(msize) + } + + // Parse the 9P protocol version. + fsopts.version = p9.HighestVersionString() + if version, ok := mopts["version"]; ok { + delete(mopts, "version") + fsopts.version = version + } + + // Parse the dentry cache limit. + fsopts.maxCachedDentries = 1000 + if str, ok := mopts["dentry_cache_limit"]; ok { + delete(mopts, "dentry_cache_limit") + maxCachedDentries, err := strconv.ParseUint(str, 10, 64) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str) + return nil, nil, syserror.EINVAL + } + fsopts.maxCachedDentries = maxCachedDentries + } + + // Handle simple flags. + if _, ok := mopts["force_page_cache"]; ok { + delete(mopts, "force_page_cache") + fsopts.forcePageCache = true + } + if _, ok := mopts["limit_host_fd_translation"]; ok { + delete(mopts, "limit_host_fd_translation") + fsopts.limitHostFDTranslation = true + } + if _, ok := mopts["overlayfs_stale_read"]; ok { + delete(mopts, "overlayfs_stale_read") + fsopts.overlayfsStaleRead = true + } + // fsopts.regularFilesUseSpecialFileFD can only be enabled by specifying + // "cache=none". + + // Check for unparsed options. + if len(mopts) != 0 { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: unknown options: %v", mopts) + return nil, nil, syserror.EINVAL + } + + // Establish a connection with the server. + conn, err := unet.NewSocket(fsopts.fd) + if err != nil { + return nil, nil, err + } + + // Perform version negotiation with the server. + ctx.UninterruptibleSleepStart(false) + client, err := p9.NewClient(conn, fsopts.msize, fsopts.version) + ctx.UninterruptibleSleepFinish(false) + if err != nil { + conn.Close() + return nil, nil, err + } + // Ownership of conn has been transferred to client. + + // Perform attach to obtain the filesystem root. + ctx.UninterruptibleSleepStart(false) + attached, err := client.Attach(fsopts.aname) + ctx.UninterruptibleSleepFinish(false) + if err != nil { + client.Close() + return nil, nil, err + } + attachFile := p9file{attached} + qid, attrMask, attr, err := attachFile.getAttr(ctx, dentryAttrMask()) + if err != nil { + attachFile.close(ctx) + client.Close() + return nil, nil, err + } + + // Construct the filesystem object. + fs := &filesystem{ + mfp: mfp, + opts: fsopts, + uid: creds.EffectiveKUID, + gid: creds.EffectiveKGID, + client: client, + dentries: make(map[*dentry]struct{}), + specialFileFDs: make(map[*specialFileFD]struct{}), + } + fs.vfsfs.Init(vfsObj, fs) + + // Construct the root dentry. + root, err := fs.newDentry(ctx, attachFile, qid, attrMask, &attr) + if err != nil { + attachFile.close(ctx) + fs.vfsfs.DecRef() + return nil, nil, err + } + // Set the root's reference count to 2. One reference is returned to the + // caller, and the other is deliberately leaked to prevent the root from + // being "cached" and subsequently evicted. Its resources will still be + // cleaned up by fs.Release(). + root.refs = 2 + + return &fs.vfsfs, &root.vfsd, nil +} + +// Release implements vfs.FilesystemImpl.Release. +func (fs *filesystem) Release() { + ctx := context.Background() + mf := fs.mfp.MemoryFile() + + fs.syncMu.Lock() + for d := range fs.dentries { + d.handleMu.Lock() + d.dataMu.Lock() + if d.handleWritable { + // Write dirty cached data to the remote file. + if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt); err != nil { + log.Warningf("gofer.filesystem.Release: failed to flush dentry: %v", err) + } + // TODO(jamieliu): Do we need to flushf/fsync d? + } + // Discard cached pages. + d.cache.DropAll(mf) + d.dirty.RemoveAll() + d.dataMu.Unlock() + // Close the host fd if one exists. + if d.handle.fd >= 0 { + syscall.Close(int(d.handle.fd)) + d.handle.fd = -1 + } + d.handleMu.Unlock() + } + // There can't be any specialFileFDs still using fs, since each such + // FileDescription would hold a reference on a Mount holding a reference on + // fs. + fs.syncMu.Unlock() + + // Close the connection to the server. This implicitly clunks all fids. + fs.client.Close() +} + +// dentry implements vfs.DentryImpl. +type dentry struct { + vfsd vfs.Dentry + + // refs is the reference count. Each dentry holds a reference on its + // parent, even if disowned. refs is accessed using atomic memory + // operations. + refs int64 + + // fs is the owning filesystem. fs is immutable. + fs *filesystem + + // We don't support hard links, so each dentry maps 1:1 to an inode. + + // file is the unopened p9.File that backs this dentry. file is immutable. + file p9file + + // If deleted is non-zero, the file represented by this dentry has been + // deleted. deleted is accessed using atomic memory operations. + deleted uint32 + + // If cached is true, dentryEntry links dentry into + // filesystem.cachedDentries. cached and dentryEntry are protected by + // filesystem.renameMu. + cached bool + dentryEntry + + dirMu sync.Mutex + + // If this dentry represents a directory, and InteropModeShared is not in + // effect, negativeChildren is a set of child names in this directory that + // are known not to exist. negativeChildren is protected by dirMu. + negativeChildren map[string]struct{} + + // If this dentry represents a directory, InteropModeShared is not in + // effect, and dirents is not nil, it is a cache of all entries in the + // directory, in the order they were returned by the server. dirents is + // protected by dirMu. + dirents []vfs.Dirent + + // Cached metadata; protected by metadataMu and accessed using atomic + // memory operations unless otherwise specified. + metadataMu sync.Mutex + ino uint64 // immutable + mode uint32 // type is immutable, perms are mutable + uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic + gid uint32 // auth.KGID, but ... + blockSize uint32 // 0 if unknown + // Timestamps, all nsecs from the Unix epoch. + atime int64 + mtime int64 + ctime int64 + btime int64 + // File size, protected by both metadataMu and dataMu (i.e. both must be + // locked to mutate it). + size uint64 + + mapsMu sync.Mutex + + // If this dentry represents a regular file, mappings tracks mappings of + // the file into memmap.MappingSpaces. mappings is protected by mapsMu. + mappings memmap.MappingSet + + // If this dentry represents a regular file or directory: + // + // - handle is the I/O handle used by all regularFileFDs/directoryFDs + // representing this dentry. + // + // - handleReadable is true if handle is readable. + // + // - handleWritable is true if handle is writable. + // + // Invariants: + // + // - If handleReadable == handleWritable == false, then handle.file == nil + // (i.e. there is no open handle). Conversely, if handleReadable || + // handleWritable == true, then handle.file != nil (i.e. there is an open + // handle). + // + // - handleReadable and handleWritable cannot transition from true to false + // (i.e. handles may not be downgraded). + // + // These fields are protected by handleMu. + handleMu sync.RWMutex + handle handle + handleReadable bool + handleWritable bool + + dataMu sync.RWMutex + + // If this dentry represents a regular file that is client-cached, cache + // maps offsets into the cached file to offsets into + // filesystem.mfp.MemoryFile() that store the file's data. cache is + // protected by dataMu. + cache fsutil.FileRangeSet + + // If this dentry represents a regular file that is client-cached, dirty + // tracks dirty segments in cache. dirty is protected by dataMu. + dirty fsutil.DirtySet + + // pf implements platform.File for mappings of handle.fd. + pf dentryPlatformFile + + // If this dentry represents a symbolic link, InteropModeShared is not in + // effect, and haveTarget is true, target is the symlink target. haveTarget + // and target are protected by dataMu. + haveTarget bool + target string +} + +// dentryAttrMask returns a p9.AttrMask enabling all attributes used by the +// gofer client. +func dentryAttrMask() p9.AttrMask { + return p9.AttrMask{ + Mode: true, + UID: true, + GID: true, + ATime: true, + MTime: true, + CTime: true, + Size: true, + BTime: true, + } +} + +// newDentry creates a new dentry representing the given file. The dentry +// initially has no references, but is not cached; it is the caller's +// responsibility to set the dentry's reference count and/or call +// dentry.checkCachingLocked() as appropriate. +func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, mask p9.AttrMask, attr *p9.Attr) (*dentry, error) { + if !mask.Mode { + ctx.Warningf("can't create gofer.dentry without file type") + return nil, syserror.EIO + } + if attr.Mode.FileType() == p9.ModeRegular && !mask.Size { + ctx.Warningf("can't create regular file gofer.dentry without file size") + return nil, syserror.EIO + } + + d := &dentry{ + fs: fs, + file: file, + ino: qid.Path, + mode: uint32(attr.Mode), + uid: uint32(fs.uid), + gid: uint32(fs.gid), + blockSize: usermem.PageSize, + handle: handle{ + fd: -1, + }, + } + d.pf.dentry = d + if mask.UID { + d.uid = uint32(attr.UID) + } + if mask.GID { + d.gid = uint32(attr.GID) + } + if mask.Size { + d.size = attr.Size + } + if attr.BlockSize != 0 { + d.blockSize = uint32(attr.BlockSize) + } + if mask.ATime { + d.atime = dentryTimestampFromP9(attr.ATimeSeconds, attr.ATimeNanoSeconds) + } + if mask.MTime { + d.mtime = dentryTimestampFromP9(attr.MTimeSeconds, attr.MTimeNanoSeconds) + } + if mask.CTime { + d.ctime = dentryTimestampFromP9(attr.CTimeSeconds, attr.CTimeNanoSeconds) + } + if mask.BTime { + d.btime = dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds) + } + d.vfsd.Init(d) + + fs.syncMu.Lock() + fs.dentries[d] = struct{}{} + fs.syncMu.Unlock() + return d, nil +} + +// updateFromP9Attrs is called to update d's metadata after an update from the +// remote filesystem. +func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) { + d.metadataMu.Lock() + if mask.Mode { + if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want { + d.metadataMu.Unlock() + panic(fmt.Sprintf("gofer.dentry file type changed from %#o to %#o", want, got)) + } + atomic.StoreUint32(&d.mode, uint32(attr.Mode)) + } + if mask.UID { + atomic.StoreUint32(&d.uid, uint32(attr.UID)) + } + if mask.GID { + atomic.StoreUint32(&d.gid, uint32(attr.GID)) + } + // There is no P9_GETATTR_* bit for I/O block size. + if attr.BlockSize != 0 { + atomic.StoreUint32(&d.blockSize, uint32(attr.BlockSize)) + } + if mask.ATime { + atomic.StoreInt64(&d.atime, dentryTimestampFromP9(attr.ATimeSeconds, attr.ATimeNanoSeconds)) + } + if mask.MTime { + atomic.StoreInt64(&d.mtime, dentryTimestampFromP9(attr.MTimeSeconds, attr.MTimeNanoSeconds)) + } + if mask.CTime { + atomic.StoreInt64(&d.ctime, dentryTimestampFromP9(attr.CTimeSeconds, attr.CTimeNanoSeconds)) + } + if mask.BTime { + atomic.StoreInt64(&d.btime, dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds)) + } + if mask.Size { + d.dataMu.Lock() + atomic.StoreUint64(&d.size, attr.Size) + d.dataMu.Unlock() + } + d.metadataMu.Unlock() +} + +func (d *dentry) updateFromGetattr(ctx context.Context) error { + // Use d.handle.file, which represents a 9P fid that has been opened, in + // preference to d.file, which represents a 9P fid that has not. This may + // be significantly more efficient in some implementations. + var ( + file p9file + handleMuRLocked bool + ) + d.handleMu.RLock() + if !d.handle.file.isNil() { + file = d.handle.file + handleMuRLocked = true + } else { + file = d.file + d.handleMu.RUnlock() + } + _, attrMask, attr, err := file.getAttr(ctx, dentryAttrMask()) + if handleMuRLocked { + d.handleMu.RUnlock() + } + if err != nil { + return err + } + d.updateFromP9Attrs(attrMask, &attr) + return nil +} + +func (d *dentry) fileType() uint32 { + return atomic.LoadUint32(&d.mode) & linux.S_IFMT +} + +func (d *dentry) statTo(stat *linux.Statx) { + stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS | linux.STATX_BTIME + stat.Blksize = atomic.LoadUint32(&d.blockSize) + stat.Nlink = 1 + if d.isDir() { + stat.Nlink = 2 + } + stat.UID = atomic.LoadUint32(&d.uid) + stat.GID = atomic.LoadUint32(&d.gid) + stat.Mode = uint16(atomic.LoadUint32(&d.mode)) + stat.Ino = d.ino + stat.Size = atomic.LoadUint64(&d.size) + // This is consistent with regularFileFD.Seek(), which treats regular files + // as having no holes. + stat.Blocks = (stat.Size + 511) / 512 + stat.Atime = statxTimestampFromDentry(atomic.LoadInt64(&d.atime)) + stat.Btime = statxTimestampFromDentry(atomic.LoadInt64(&d.btime)) + stat.Ctime = statxTimestampFromDentry(atomic.LoadInt64(&d.ctime)) + stat.Mtime = statxTimestampFromDentry(atomic.LoadInt64(&d.mtime)) + // TODO(jamieliu): device number +} + +func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mnt *vfs.Mount) error { + if stat.Mask == 0 { + return nil + } + if stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_SIZE) != 0 { + return syserror.EPERM + } + if err := vfs.CheckSetStat(creds, stat, uint16(atomic.LoadUint32(&d.mode))&^linux.S_IFMT, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil { + return err + } + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + setLocalAtime := false + setLocalMtime := false + if d.fs.opts.interop != InteropModeShared { + // Timestamp updates will be handled locally. + setLocalAtime = stat.Mask&linux.STATX_ATIME != 0 + setLocalMtime = stat.Mask&linux.STATX_MTIME != 0 + stat.Mask &^= linux.STATX_ATIME | linux.STATX_MTIME + if !setLocalMtime && (stat.Mask&linux.STATX_SIZE != 0) { + // Truncate updates mtime. + setLocalMtime = true + stat.Mtime.Nsec = linux.UTIME_NOW + } + } + d.metadataMu.Lock() + defer d.metadataMu.Unlock() + if stat.Mask != 0 { + if err := d.file.setAttr(ctx, p9.SetAttrMask{ + Permissions: stat.Mask&linux.STATX_MODE != 0, + UID: stat.Mask&linux.STATX_UID != 0, + GID: stat.Mask&linux.STATX_GID != 0, + Size: stat.Mask&linux.STATX_SIZE != 0, + ATime: stat.Mask&linux.STATX_ATIME != 0, + MTime: stat.Mask&linux.STATX_MTIME != 0, + ATimeNotSystemTime: stat.Atime.Nsec != linux.UTIME_NOW, + MTimeNotSystemTime: stat.Mtime.Nsec != linux.UTIME_NOW, + }, p9.SetAttr{ + Permissions: p9.FileMode(stat.Mode), + UID: p9.UID(stat.UID), + GID: p9.GID(stat.GID), + Size: stat.Size, + ATimeSeconds: uint64(stat.Atime.Sec), + ATimeNanoSeconds: uint64(stat.Atime.Nsec), + MTimeSeconds: uint64(stat.Mtime.Sec), + MTimeNanoSeconds: uint64(stat.Mtime.Nsec), + }); err != nil { + return err + } + } + if d.fs.opts.interop == InteropModeShared { + // There's no point to updating d's metadata in this case since it'll + // be overwritten by revalidation before the next time it's used + // anyway. (InteropModeShared inhibits client caching of regular file + // data, so there's no cache to truncate either.) + return nil + } + now, haveNow := nowFromContext(ctx) + if !haveNow { + ctx.Warningf("gofer.dentry.setStat: current time not available") + } + if stat.Mask&linux.STATX_MODE != 0 { + atomic.StoreUint32(&d.mode, d.fileType()|uint32(stat.Mode)) + } + if stat.Mask&linux.STATX_UID != 0 { + atomic.StoreUint32(&d.uid, stat.UID) + } + if stat.Mask&linux.STATX_GID != 0 { + atomic.StoreUint32(&d.gid, stat.GID) + } + if setLocalAtime { + if stat.Atime.Nsec == linux.UTIME_NOW { + if haveNow { + atomic.StoreInt64(&d.atime, now) + } + } else { + atomic.StoreInt64(&d.atime, dentryTimestampFromStatx(stat.Atime)) + } + } + if setLocalMtime { + if stat.Mtime.Nsec == linux.UTIME_NOW { + if haveNow { + atomic.StoreInt64(&d.mtime, now) + } + } else { + atomic.StoreInt64(&d.mtime, dentryTimestampFromStatx(stat.Mtime)) + } + } + if haveNow { + atomic.StoreInt64(&d.ctime, now) + } + if stat.Mask&linux.STATX_SIZE != 0 { + d.dataMu.Lock() + oldSize := d.size + d.size = stat.Size + // d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings + // below. This allows concurrent calls to Read/Translate/etc. These + // functions synchronize with truncation by refusing to use cache + // contents beyond the new d.size. (We are still holding d.metadataMu, + // so we can't race with Write or another truncate.) + d.dataMu.Unlock() + if d.size < oldSize { + oldpgend := pageRoundUp(oldSize) + newpgend := pageRoundUp(d.size) + if oldpgend != newpgend { + d.mapsMu.Lock() + d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ + // Compare Linux's mm/truncate.c:truncate_setsize() => + // truncate_pagecache() => + // mm/memory.c:unmap_mapping_range(evencows=1). + InvalidatePrivate: true, + }) + d.mapsMu.Unlock() + } + // We are now guaranteed that there are no translations of + // truncated pages, and can remove them from the cache. Since + // truncated pages have been removed from the remote file, they + // should be dropped without being written back. + d.dataMu.Lock() + d.cache.Truncate(d.size, d.fs.mfp.MemoryFile()) + d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend}) + d.dataMu.Unlock() + } + } + return nil +} + +func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error { + return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&d.mode))&0777, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))) +} + +// IncRef implements vfs.DentryImpl.IncRef. +func (d *dentry) IncRef() { + // d.refs may be 0 if d.fs.renameMu is locked, which serializes against + // d.checkCachingLocked(). + atomic.AddInt64(&d.refs, 1) +} + +// TryIncRef implements vfs.DentryImpl.TryIncRef. +func (d *dentry) TryIncRef() bool { + for { + refs := atomic.LoadInt64(&d.refs) + if refs == 0 { + return false + } + if atomic.CompareAndSwapInt64(&d.refs, refs, refs+1) { + return true + } + } +} + +// DecRef implements vfs.DentryImpl.DecRef. +func (d *dentry) DecRef() { + if refs := atomic.AddInt64(&d.refs, -1); refs == 0 { + d.fs.renameMu.Lock() + d.checkCachingLocked() + d.fs.renameMu.Unlock() + } else if refs < 0 { + panic("gofer.dentry.DecRef() called without holding a reference") + } +} + +// checkCachingLocked should be called after d's reference count becomes 0 or it +// becomes disowned. +// +// Preconditions: d.fs.renameMu must be locked for writing. +func (d *dentry) checkCachingLocked() { + // Dentries with a non-zero reference count must be retained. (The only way + // to obtain a reference on a dentry with zero references is via path + // resolution, which requires renameMu, so if d.refs is zero then it will + // remain zero while we hold renameMu for writing.) + if atomic.LoadInt64(&d.refs) != 0 { + if d.cached { + d.fs.cachedDentries.Remove(d) + d.fs.cachedDentriesLen-- + d.cached = false + } + return + } + // Non-child dentries with zero references are no longer reachable by path + // resolution and should be dropped immediately. + if d.vfsd.Parent() == nil || d.vfsd.IsDisowned() { + if d.cached { + d.fs.cachedDentries.Remove(d) + d.fs.cachedDentriesLen-- + d.cached = false + } + d.destroyLocked() + return + } + // If d is already cached, just move it to the front of the LRU. + if d.cached { + d.fs.cachedDentries.Remove(d) + d.fs.cachedDentries.PushFront(d) + return + } + // Cache the dentry, then evict the least recently used cached dentry if + // the cache becomes over-full. + d.fs.cachedDentries.PushFront(d) + d.fs.cachedDentriesLen++ + d.cached = true + if d.fs.cachedDentriesLen > d.fs.opts.maxCachedDentries { + victim := d.fs.cachedDentries.Back() + d.fs.cachedDentries.Remove(victim) + d.fs.cachedDentriesLen-- + victim.cached = false + // victim.refs may have become non-zero from an earlier path + // resolution since it was inserted into fs.cachedDentries; see + // dentry.incRefLocked(). Either way, we brought + // fs.cachedDentriesLen back down to fs.opts.maxCachedDentries, so + // we don't loop. + if atomic.LoadInt64(&victim.refs) == 0 { + if victimParentVFSD := victim.vfsd.Parent(); victimParentVFSD != nil { + victimParent := victimParentVFSD.Impl().(*dentry) + victimParent.dirMu.Lock() + if !victim.vfsd.IsDisowned() { + // victim can't be a mount point (in any mount + // namespace), since VFS holds references on mount + // points. + d.fs.vfsfs.VirtualFilesystem().ForceDeleteDentry(&victim.vfsd) + // We're only deleting the dentry, not the file it + // represents, so we don't need to update + // victimParent.dirents etc. + } + victimParent.dirMu.Unlock() + } + victim.destroyLocked() + } + } +} + +// Preconditions: d.fs.renameMu must be locked for writing. d.refs == 0. d is +// not a child dentry. +func (d *dentry) destroyLocked() { + ctx := context.Background() + d.handleMu.Lock() + if !d.handle.file.isNil() { + mf := d.fs.mfp.MemoryFile() + d.dataMu.Lock() + // Write dirty pages back to the remote filesystem. + if d.handleWritable { + if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { + log.Warningf("gofer.dentry.DecRef: failed to write dirty data back: %v", err) + } + } + // Discard cached data. + d.cache.DropAll(mf) + d.dirty.RemoveAll() + d.dataMu.Unlock() + // Clunk open fids and close open host FDs. + d.handle.close(ctx) + } + d.handleMu.Unlock() + d.file.close(ctx) + // Remove d from the set of all dentries. + d.fs.syncMu.Lock() + delete(d.fs.dentries, d) + d.fs.syncMu.Unlock() + // Drop the reference held by d on its parent. + if parentVFSD := d.vfsd.Parent(); parentVFSD != nil { + parent := parentVFSD.Impl().(*dentry) + // This is parent.DecRef() without recursive locking of d.fs.renameMu. + if refs := atomic.AddInt64(&parent.refs, -1); refs == 0 { + parent.checkCachingLocked() + } else if refs < 0 { + panic("gofer.dentry.DecRef() called without holding a reference") + } + } +} + +func (d *dentry) isDeleted() bool { + return atomic.LoadUint32(&d.deleted) != 0 +} + +func (d *dentry) setDeleted() { + atomic.StoreUint32(&d.deleted, 1) +} + +func (d *dentry) listxattr(ctx context.Context) ([]string, error) { + return nil, syserror.ENOTSUP +} + +func (d *dentry) getxattr(ctx context.Context, name string) (string, error) { + // TODO(jamieliu): add vfs.GetxattrOptions.Size + return d.file.getXattr(ctx, name, linux.XATTR_SIZE_MAX) +} + +func (d *dentry) setxattr(ctx context.Context, opts *vfs.SetxattrOptions) error { + return d.file.setXattr(ctx, opts.Name, opts.Value, opts.Flags) +} + +func (d *dentry) removexattr(ctx context.Context, name string) error { + return syserror.ENOTSUP +} + +// Preconditions: d.isRegularFile() || d.isDirectory(). +func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool) error { + // O_TRUNC unconditionally requires us to obtain a new handle (opened with + // O_TRUNC). + if !trunc { + d.handleMu.RLock() + if (!read || d.handleReadable) && (!write || d.handleWritable) { + // The current handle is sufficient. + d.handleMu.RUnlock() + return nil + } + d.handleMu.RUnlock() + } + + haveOldFD := false + d.handleMu.Lock() + if (read && !d.handleReadable) || (write && !d.handleWritable) || trunc { + // Get a new handle. + wantReadable := d.handleReadable || read + wantWritable := d.handleWritable || write + h, err := openHandle(ctx, d.file, wantReadable, wantWritable, trunc) + if err != nil { + d.handleMu.Unlock() + return err + } + if !d.handle.file.isNil() { + // Check that old and new handles are compatible: If the old handle + // includes a host file descriptor but the new one does not, or + // vice versa, old and new memory mappings may be incoherent. + haveOldFD = d.handle.fd >= 0 + haveNewFD := h.fd >= 0 + if haveOldFD != haveNewFD { + d.handleMu.Unlock() + ctx.Warningf("gofer.dentry.ensureSharedHandle: can't change host FD availability from %v to %v across dentry handle upgrade", haveOldFD, haveNewFD) + h.close(ctx) + return syserror.EIO + } + if haveOldFD { + // We may have raced with callers of d.pf.FD() that are now + // using the old file descriptor, preventing us from safely + // closing it. We could handle this by invalidating existing + // memmap.Translations, but this is expensive. Instead, use + // dup2() to make the old file descriptor refer to the new file + // description, then close the new file descriptor (which is no + // longer needed). Racing callers may use the old or new file + // description, but this doesn't matter since they refer to the + // same file (unless d.fs.opts.overlayfsStaleRead is true, + // which we handle separately). + if err := syscall.Dup2(int(h.fd), int(d.handle.fd)); err != nil { + d.handleMu.Unlock() + ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to dup fd %d to fd %d: %v", h.fd, d.handle.fd, err) + h.close(ctx) + return err + } + syscall.Close(int(h.fd)) + h.fd = d.handle.fd + if d.fs.opts.overlayfsStaleRead { + // Replace sentry mappings of the old FD with mappings of + // the new FD, since the two are not necessarily coherent. + if err := d.pf.hostFileMapper.RegenerateMappings(int(h.fd)); err != nil { + d.handleMu.Unlock() + ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to replace sentry mappings of old FD with mappings of new FD: %v", err) + h.close(ctx) + return err + } + } + // Clunk the old fid before making the new handle visible (by + // unlocking d.handleMu). + d.handle.file.close(ctx) + } + } + // Switch to the new handle. + d.handle = h + d.handleReadable = wantReadable + d.handleWritable = wantWritable + } + d.handleMu.Unlock() + + if d.fs.opts.overlayfsStaleRead && haveOldFD { + // Invalidate application mappings that may be using the old FD; they + // will be replaced with mappings using the new FD after future calls + // to d.Translate(). This requires holding d.mapsMu, which precedes + // d.handleMu in the lock order. + d.mapsMu.Lock() + d.mappings.InvalidateAll(memmap.InvalidateOpts{}) + d.mapsMu.Unlock() + } + + return nil +} + +// fileDescription is embedded by gofer implementations of +// vfs.FileDescriptionImpl. +type fileDescription struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl +} + +func (fd *fileDescription) filesystem() *filesystem { + return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) +} + +func (fd *fileDescription) dentry() *dentry { + return fd.vfsfd.Dentry().Impl().(*dentry) +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + d := fd.dentry() + if d.fs.opts.interop == InteropModeShared && opts.Mask&(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME|linux.STATX_SIZE|linux.STATX_BLOCKS|linux.STATX_BTIME) != 0 && opts.Sync != linux.AT_STATX_DONT_SYNC { + // TODO(jamieliu): Use specialFileFD.handle.file for the getattr if + // available? + if err := d.updateFromGetattr(ctx); err != nil { + return linux.Statx{}, err + } + } + var stat linux.Statx + d.statTo(&stat) + return stat, nil +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + return fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, fd.vfsfd.Mount()) +} + +// Listxattr implements vfs.FileDescriptionImpl.Listxattr. +func (fd *fileDescription) Listxattr(ctx context.Context) ([]string, error) { + return fd.dentry().listxattr(ctx) +} + +// Getxattr implements vfs.FileDescriptionImpl.Getxattr. +func (fd *fileDescription) Getxattr(ctx context.Context, name string) (string, error) { + return fd.dentry().getxattr(ctx, name) +} + +// Setxattr implements vfs.FileDescriptionImpl.Setxattr. +func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error { + return fd.dentry().setxattr(ctx, &opts) +} + +// Removexattr implements vfs.FileDescriptionImpl.Removexattr. +func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { + return fd.dentry().removexattr(ctx, name) +} diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go new file mode 100644 index 000000000..cfe66f797 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/handle.go @@ -0,0 +1,135 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/safemem" +) + +// handle represents a remote "open file descriptor", consisting of an opened +// fid (p9.File) and optionally a host file descriptor. +type handle struct { + file p9file + fd int32 // -1 if unavailable +} + +// Preconditions: read || write. +func openHandle(ctx context.Context, file p9file, read, write, trunc bool) (handle, error) { + _, newfile, err := file.walk(ctx, nil) + if err != nil { + return handle{fd: -1}, err + } + var flags p9.OpenFlags + switch { + case read && !write: + flags = p9.ReadOnly + case !read && write: + flags = p9.WriteOnly + case read && write: + flags = p9.ReadWrite + } + if trunc { + flags |= p9.OpenTruncate + } + fdobj, _, _, err := newfile.open(ctx, flags) + if err != nil { + newfile.close(ctx) + return handle{fd: -1}, err + } + fd := int32(-1) + if fdobj != nil { + fd = int32(fdobj.Release()) + } + return handle{ + file: newfile, + fd: fd, + }, nil +} + +func (h *handle) close(ctx context.Context) { + h.file.close(ctx) + h.file = p9file{} + if h.fd >= 0 { + syscall.Close(int(h.fd)) + h.fd = -1 + } +} + +func (h *handle) readToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { + if dsts.IsEmpty() { + return 0, nil + } + if h.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + n, err := hostPreadv(h.fd, dsts, int64(offset)) + ctx.UninterruptibleSleepFinish(false) + return n, err + } + if dsts.NumBlocks() == 1 && !dsts.Head().NeedSafecopy() { + n, err := h.file.readAt(ctx, dsts.Head().ToSlice(), offset) + return uint64(n), err + } + // Buffer the read since p9.File.ReadAt() takes []byte. + buf := make([]byte, dsts.NumBytes()) + n, err := h.file.readAt(ctx, buf, offset) + if n == 0 { + return 0, err + } + if cp, cperr := safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:n]))); cperr != nil { + return cp, cperr + } + return uint64(n), err +} + +func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { + if srcs.IsEmpty() { + return 0, nil + } + if h.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + n, err := hostPwritev(h.fd, srcs, int64(offset)) + ctx.UninterruptibleSleepFinish(false) + return n, err + } + if srcs.NumBlocks() == 1 && !srcs.Head().NeedSafecopy() { + n, err := h.file.writeAt(ctx, srcs.Head().ToSlice(), offset) + return uint64(n), err + } + // Buffer the write since p9.File.WriteAt() takes []byte. + buf := make([]byte, srcs.NumBytes()) + cp, cperr := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), srcs) + if cp == 0 { + return 0, cperr + } + n, err := h.file.writeAt(ctx, buf[:cp], offset) + if err != nil { + return uint64(n), err + } + return cp, cperr +} + +func (h *handle) sync(ctx context.Context) error { + if h.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + err := syscall.Fsync(int(h.fd)) + ctx.UninterruptibleSleepFinish(false) + return err + } + return h.file.fsync(ctx) +} diff --git a/pkg/sentry/fsimpl/gofer/handle_unsafe.go b/pkg/sentry/fsimpl/gofer/handle_unsafe.go new file mode 100644 index 000000000..19560ab26 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/handle_unsafe.go @@ -0,0 +1,66 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "syscall" + "unsafe" + + "gvisor.dev/gvisor/pkg/safemem" +) + +// Preconditions: !dsts.IsEmpty(). +func hostPreadv(fd int32, dsts safemem.BlockSeq, off int64) (uint64, error) { + // No buffering is necessary regardless of safecopy; host syscalls will + // return EFAULT if appropriate, instead of raising SIGBUS. + if dsts.NumBlocks() == 1 { + // Use pread() instead of preadv() to avoid iovec allocation and + // copying. + dst := dsts.Head() + n, _, e := syscall.Syscall6(syscall.SYS_PREAD64, uintptr(fd), dst.Addr(), uintptr(dst.Len()), uintptr(off), 0, 0) + if e != 0 { + return 0, e + } + return uint64(n), nil + } + iovs := safemem.IovecsFromBlockSeq(dsts) + n, _, e := syscall.Syscall6(syscall.SYS_PREADV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(off), 0, 0) + if e != 0 { + return 0, e + } + return uint64(n), nil +} + +// Preconditions: !srcs.IsEmpty(). +func hostPwritev(fd int32, srcs safemem.BlockSeq, off int64) (uint64, error) { + // No buffering is necessary regardless of safecopy; host syscalls will + // return EFAULT if appropriate, instead of raising SIGBUS. + if srcs.NumBlocks() == 1 { + // Use pwrite() instead of pwritev() to avoid iovec allocation and + // copying. + src := srcs.Head() + n, _, e := syscall.Syscall6(syscall.SYS_PWRITE64, uintptr(fd), src.Addr(), uintptr(src.Len()), uintptr(off), 0, 0) + if e != 0 { + return 0, e + } + return uint64(n), nil + } + iovs := safemem.IovecsFromBlockSeq(srcs) + n, _, e := syscall.Syscall6(syscall.SYS_PWRITEV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(off), 0, 0) + if e != 0 { + return 0, e + } + return uint64(n), nil +} diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go new file mode 100644 index 000000000..755ac2985 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/p9file.go @@ -0,0 +1,219 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/syserror" +) + +// p9file is a wrapper around p9.File that provides methods that are +// Context-aware. +type p9file struct { + file p9.File +} + +func (f p9file) isNil() bool { + return f.file == nil +} + +func (f p9file) walk(ctx context.Context, names []string) ([]p9.QID, p9file, error) { + ctx.UninterruptibleSleepStart(false) + qids, newfile, err := f.file.Walk(names) + ctx.UninterruptibleSleepFinish(false) + return qids, p9file{newfile}, err +} + +func (f p9file) walkGetAttr(ctx context.Context, names []string) ([]p9.QID, p9file, p9.AttrMask, p9.Attr, error) { + ctx.UninterruptibleSleepStart(false) + qids, newfile, attrMask, attr, err := f.file.WalkGetAttr(names) + ctx.UninterruptibleSleepFinish(false) + return qids, p9file{newfile}, attrMask, attr, err +} + +// walkGetAttrOne is a wrapper around p9.File.WalkGetAttr that takes a single +// path component and returns a single qid. +func (f p9file) walkGetAttrOne(ctx context.Context, name string) (p9.QID, p9file, p9.AttrMask, p9.Attr, error) { + ctx.UninterruptibleSleepStart(false) + qids, newfile, attrMask, attr, err := f.file.WalkGetAttr([]string{name}) + ctx.UninterruptibleSleepFinish(false) + if err != nil { + return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, err + } + if len(qids) != 1 { + ctx.Warningf("p9.File.WalkGetAttr returned %d qids (%v), wanted 1", len(qids), qids) + if newfile != nil { + p9file{newfile}.close(ctx) + } + return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, syserror.EIO + } + return qids[0], p9file{newfile}, attrMask, attr, nil +} + +func (f p9file) statFS(ctx context.Context) (p9.FSStat, error) { + ctx.UninterruptibleSleepStart(false) + fsstat, err := f.file.StatFS() + ctx.UninterruptibleSleepFinish(false) + return fsstat, err +} + +func (f p9file) getAttr(ctx context.Context, req p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) { + ctx.UninterruptibleSleepStart(false) + qid, attrMask, attr, err := f.file.GetAttr(req) + ctx.UninterruptibleSleepFinish(false) + return qid, attrMask, attr, err +} + +func (f p9file) setAttr(ctx context.Context, valid p9.SetAttrMask, attr p9.SetAttr) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.SetAttr(valid, attr) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) getXattr(ctx context.Context, name string, size uint64) (string, error) { + ctx.UninterruptibleSleepStart(false) + val, err := f.file.GetXattr(name, size) + ctx.UninterruptibleSleepFinish(false) + return val, err +} + +func (f p9file) setXattr(ctx context.Context, name, value string, flags uint32) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.SetXattr(name, value, flags) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Allocate(mode, offset, length) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) close(ctx context.Context) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Close() + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) open(ctx context.Context, flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { + ctx.UninterruptibleSleepStart(false) + fdobj, qid, iounit, err := f.file.Open(flags) + ctx.UninterruptibleSleepFinish(false) + return fdobj, qid, iounit, err +} + +func (f p9file) readAt(ctx context.Context, p []byte, offset uint64) (int, error) { + ctx.UninterruptibleSleepStart(false) + n, err := f.file.ReadAt(p, offset) + ctx.UninterruptibleSleepFinish(false) + return n, err +} + +func (f p9file) writeAt(ctx context.Context, p []byte, offset uint64) (int, error) { + ctx.UninterruptibleSleepStart(false) + n, err := f.file.WriteAt(p, offset) + ctx.UninterruptibleSleepFinish(false) + return n, err +} + +func (f p9file) fsync(ctx context.Context) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.FSync() + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) create(ctx context.Context, name string, flags p9.OpenFlags, permissions p9.FileMode, uid p9.UID, gid p9.GID) (*fd.FD, p9file, p9.QID, uint32, error) { + ctx.UninterruptibleSleepStart(false) + fdobj, newfile, qid, iounit, err := f.file.Create(name, flags, permissions, uid, gid) + ctx.UninterruptibleSleepFinish(false) + return fdobj, p9file{newfile}, qid, iounit, err +} + +func (f p9file) mkdir(ctx context.Context, name string, permissions p9.FileMode, uid p9.UID, gid p9.GID) (p9.QID, error) { + ctx.UninterruptibleSleepStart(false) + qid, err := f.file.Mkdir(name, permissions, uid, gid) + ctx.UninterruptibleSleepFinish(false) + return qid, err +} + +func (f p9file) symlink(ctx context.Context, oldName string, newName string, uid p9.UID, gid p9.GID) (p9.QID, error) { + ctx.UninterruptibleSleepStart(false) + qid, err := f.file.Symlink(oldName, newName, uid, gid) + ctx.UninterruptibleSleepFinish(false) + return qid, err +} + +func (f p9file) link(ctx context.Context, target p9file, newName string) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Link(target.file, newName) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) mknod(ctx context.Context, name string, mode p9.FileMode, major uint32, minor uint32, uid p9.UID, gid p9.GID) (p9.QID, error) { + ctx.UninterruptibleSleepStart(false) + qid, err := f.file.Mknod(name, mode, major, minor, uid, gid) + ctx.UninterruptibleSleepFinish(false) + return qid, err +} + +func (f p9file) rename(ctx context.Context, newDir p9file, newName string) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Rename(newDir.file, newName) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) unlinkAt(ctx context.Context, name string, flags uint32) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.UnlinkAt(name, flags) + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) readdir(ctx context.Context, offset uint64, count uint32) ([]p9.Dirent, error) { + ctx.UninterruptibleSleepStart(false) + dirents, err := f.file.Readdir(offset, count) + ctx.UninterruptibleSleepFinish(false) + return dirents, err +} + +func (f p9file) readlink(ctx context.Context) (string, error) { + ctx.UninterruptibleSleepStart(false) + target, err := f.file.Readlink() + ctx.UninterruptibleSleepFinish(false) + return target, err +} + +func (f p9file) flush(ctx context.Context) error { + ctx.UninterruptibleSleepStart(false) + err := f.file.Flush() + ctx.UninterruptibleSleepFinish(false) + return err +} + +func (f p9file) connect(ctx context.Context, flags p9.ConnectFlags) (*fd.FD, error) { + ctx.UninterruptibleSleepStart(false) + fdobj, err := f.file.Connect(flags) + ctx.UninterruptibleSleepFinish(false) + return fdobj, err +} diff --git a/pkg/sentry/fsimpl/gofer/pagemath.go b/pkg/sentry/fsimpl/gofer/pagemath.go new file mode 100644 index 000000000..847cb0784 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/pagemath.go @@ -0,0 +1,31 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/usermem" +) + +// This are equivalent to usermem.Addr.RoundDown/Up, but without the +// potentially truncating conversion to usermem.Addr. This is necessary because +// there is no way to define generic "PageRoundDown/Up" functions in Go. + +func pageRoundDown(x uint64) uint64 { + return x &^ (usermem.PageSize - 1) +} + +func pageRoundUp(x uint64) uint64 { + return pageRoundDown(x + usermem.PageSize - 1) +} diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go new file mode 100644 index 000000000..8e11e06b3 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -0,0 +1,860 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "fmt" + "io" + "math" + "sync" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +func (d *dentry) isRegularFile() bool { + return d.fileType() == linux.S_IFREG +} + +type regularFileFD struct { + fileDescription + + // off is the file offset. off is protected by mu. + mu sync.Mutex + off int64 +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *regularFileFD) Release() { +} + +// OnClose implements vfs.FileDescriptionImpl.OnClose. +func (fd *regularFileFD) OnClose(ctx context.Context) error { + if !fd.vfsfd.IsWritable() { + return nil + } + // Skip flushing if writes may be buffered by the client, since (as with + // the VFS1 client) we don't flush buffered writes on close anyway. + d := fd.dentry() + if d.fs.opts.interop == InteropModeExclusive { + return nil + } + d.handleMu.RLock() + defer d.handleMu.RUnlock() + return d.handle.file.flush(ctx) +} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + // Check for reading at EOF before calling into MM (but not under + // InteropModeShared, which makes d.size unreliable). + d := fd.dentry() + if d.fs.opts.interop != InteropModeShared && uint64(offset) >= atomic.LoadUint64(&d.size) { + return 0, io.EOF + } + + if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { + // Lock d.metadataMu for the rest of the read to prevent d.size from + // changing. + d.metadataMu.Lock() + defer d.metadataMu.Unlock() + // Write dirty cached pages that will be touched by the read back to + // the remote file. + if err := d.writeback(ctx, offset, dst.NumBytes()); err != nil { + return 0, err + } + } + + rw := getDentryReadWriter(ctx, d, offset) + if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { + // Require the read to go to the remote file. + rw.direct = true + } + n, err := dst.CopyOutFrom(ctx, rw) + putDentryReadWriter(rw) + if d.fs.opts.interop != InteropModeShared { + // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed(). + d.touchAtime(ctx, fd.vfsfd.Mount()) + } + return n, err +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.PRead(ctx, dst, fd.off, opts) + fd.off += n + fd.mu.Unlock() + return n, err +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + d := fd.dentry() + d.metadataMu.Lock() + defer d.metadataMu.Unlock() + if d.fs.opts.interop != InteropModeShared { + // Compare Linux's mm/filemap.c:__generic_file_write_iter() => + // file_update_time(). This is d.touchCMtime(), but without locking + // d.metadataMu (recursively). + if now, ok := nowFromContext(ctx); ok { + atomic.StoreInt64(&d.mtime, now) + atomic.StoreInt64(&d.ctime, now) + } + } + if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { + // Write dirty cached pages that will be touched by the write back to + // the remote file. + if err := d.writeback(ctx, offset, src.NumBytes()); err != nil { + return 0, err + } + // Remove touched pages from the cache. + pgstart := pageRoundDown(uint64(offset)) + pgend := pageRoundUp(uint64(offset + src.NumBytes())) + if pgend < pgstart { + return 0, syserror.EINVAL + } + mr := memmap.MappableRange{pgstart, pgend} + var freed []platform.FileRange + d.dataMu.Lock() + cseg := d.cache.LowerBoundSegment(mr.Start) + for cseg.Ok() && cseg.Start() < mr.End { + cseg = d.cache.Isolate(cseg, mr) + freed = append(freed, platform.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()}) + cseg = d.cache.Remove(cseg).NextSegment() + } + d.dataMu.Unlock() + // Invalidate mappings of removed pages. + d.mapsMu.Lock() + d.mappings.Invalidate(mr, memmap.InvalidateOpts{}) + d.mapsMu.Unlock() + // Finally free pages removed from the cache. + mf := d.fs.mfp.MemoryFile() + for _, freedFR := range freed { + mf.DecRef(freedFR) + } + } + rw := getDentryReadWriter(ctx, d, offset) + if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 { + // Require the write to go to the remote file. + rw.direct = true + } + n, err := src.CopyInTo(ctx, rw) + putDentryReadWriter(rw) + if n != 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 { + // Write dirty cached pages touched by the write back to the remote + // file. + if err := d.writeback(ctx, offset, src.NumBytes()); err != nil { + return 0, err + } + // Request the remote filesystem to sync the remote file. + if err := d.handle.file.fsync(ctx); err != nil { + return 0, err + } + } + return n, err +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.PWrite(ctx, src, fd.off, opts) + fd.off += n + fd.mu.Unlock() + return n, err +} + +type dentryReadWriter struct { + ctx context.Context + d *dentry + off uint64 + direct bool +} + +var dentryReadWriterPool = sync.Pool{ + New: func() interface{} { + return &dentryReadWriter{} + }, +} + +func getDentryReadWriter(ctx context.Context, d *dentry, offset int64) *dentryReadWriter { + rw := dentryReadWriterPool.Get().(*dentryReadWriter) + rw.ctx = ctx + rw.d = d + rw.off = uint64(offset) + rw.direct = false + return rw +} + +func putDentryReadWriter(rw *dentryReadWriter) { + rw.ctx = nil + rw.d = nil + dentryReadWriterPool.Put(rw) +} + +// ReadToBlocks implements safemem.Reader.ReadToBlocks. +func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { + if dsts.IsEmpty() { + return 0, nil + } + + // If we have a mmappable host FD (which must be used here to ensure + // coherence with memory-mapped I/O), or if InteropModeShared is in effect + // (which prevents us from caching file contents and makes dentry.size + // unreliable), or if the file was opened O_DIRECT, read directly from + // dentry.handle without locking dentry.dataMu. + rw.d.handleMu.RLock() + if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { + n, err := rw.d.handle.readToBlocksAt(rw.ctx, dsts, rw.off) + rw.d.handleMu.RUnlock() + rw.off += n + return n, err + } + + // Otherwise read from/through the cache. + mf := rw.d.fs.mfp.MemoryFile() + fillCache := mf.ShouldCacheEvictable() + var dataMuUnlock func() + if fillCache { + rw.d.dataMu.Lock() + dataMuUnlock = rw.d.dataMu.Unlock + } else { + rw.d.dataMu.RLock() + dataMuUnlock = rw.d.dataMu.RUnlock + } + + // Compute the range to read (limited by file size and overflow-checked). + if rw.off >= rw.d.size { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return 0, io.EOF + } + end := rw.d.size + if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end { + end = rend + } + + var done uint64 + seg, gap := rw.d.cache.Find(rw.off) + for rw.off < end { + mr := memmap.MappableRange{rw.off, end} + switch { + case seg.Ok(): + // Get internal mappings from the cache. + ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) + if err != nil { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, err + } + + // Copy from internal mappings. + n, err := safemem.CopySeq(dsts, ims) + done += n + rw.off += n + dsts = dsts.DropFirst64(n) + if err != nil { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, err + } + + // Continue. + seg, gap = seg.NextNonEmpty() + + case gap.Ok(): + gapMR := gap.Range().Intersect(mr) + if fillCache { + // Read into the cache, then re-enter the loop to read from the + // cache. + reqMR := memmap.MappableRange{ + Start: pageRoundDown(gapMR.Start), + End: pageRoundUp(gapMR.End), + } + optMR := gap.Range() + err := rw.d.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mf, usage.PageCache, rw.d.handle.readToBlocksAt) + mf.MarkEvictable(rw.d, pgalloc.EvictableRange{optMR.Start, optMR.End}) + seg, gap = rw.d.cache.Find(rw.off) + if !seg.Ok() { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, err + } + // err might have occurred in part of gap.Range() outside + // gapMR. Forget about it for now; if the error matters and + // persists, we'll run into it again in a later iteration of + // this loop. + } else { + // Read directly from the file. + gapDsts := dsts.TakeFirst64(gapMR.Length()) + n, err := rw.d.handle.readToBlocksAt(rw.ctx, gapDsts, gapMR.Start) + done += n + rw.off += n + dsts = dsts.DropFirst64(n) + // Partial reads are fine. But we must stop reading. + if n != gapDsts.NumBytes() || err != nil { + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, err + } + + // Continue. + seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} + } + } + } + dataMuUnlock() + rw.d.handleMu.RUnlock() + return done, nil +} + +// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. +// +// Preconditions: rw.d.metadataMu must be locked. +func (rw *dentryReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { + if srcs.IsEmpty() { + return 0, nil + } + + // If we have a mmappable host FD (which must be used here to ensure + // coherence with memory-mapped I/O), or if InteropModeShared is in effect + // (which prevents us from caching file contents), or if the file was + // opened with O_DIRECT, write directly to dentry.handle without locking + // dentry.dataMu. + rw.d.handleMu.RLock() + if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct { + n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, srcs, rw.off) + rw.d.handleMu.RUnlock() + rw.off += n + return n, err + } + + // Otherwise write to/through the cache. + mf := rw.d.fs.mfp.MemoryFile() + rw.d.dataMu.Lock() + + // Compute the range to write (overflow-checked). + start := rw.off + end := rw.off + srcs.NumBytes() + if end <= rw.off { + end = math.MaxInt64 + } + + var ( + done uint64 + retErr error + ) + seg, gap := rw.d.cache.Find(rw.off) + for rw.off < end { + mr := memmap.MappableRange{rw.off, end} + switch { + case seg.Ok(): + // Get internal mappings from the cache. + segMR := seg.Range().Intersect(mr) + ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write) + if err != nil { + retErr = err + goto exitLoop + } + + // Copy to internal mappings. + n, err := safemem.CopySeq(ims, srcs) + done += n + rw.off += n + srcs = srcs.DropFirst64(n) + rw.d.dirty.MarkDirty(segMR) + if err != nil { + retErr = err + goto exitLoop + } + + // Continue. + seg, gap = seg.NextNonEmpty() + + case gap.Ok(): + // Write directly to the file. At present, we never fill the cache + // when writing, since doing so can convert small writes into + // inefficient read-modify-write cycles, and we have no mechanism + // for detecting or avoiding this. + gapMR := gap.Range().Intersect(mr) + gapSrcs := srcs.TakeFirst64(gapMR.Length()) + n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, gapSrcs, gapMR.Start) + done += n + rw.off += n + srcs = srcs.DropFirst64(n) + // Partial writes are fine. But we must stop writing. + if n != gapSrcs.NumBytes() || err != nil { + retErr = err + goto exitLoop + } + + // Continue. + seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} + } + } +exitLoop: + if rw.off > rw.d.size { + atomic.StoreUint64(&rw.d.size, rw.off) + // The remote file's size will implicitly be extended to the correct + // value when we write back to it. + } + // If InteropModeWritethrough is in effect, flush written data back to the + // remote filesystem. + if rw.d.fs.opts.interop == InteropModeWritethrough && done != 0 { + if err := fsutil.SyncDirty(rw.ctx, memmap.MappableRange{ + Start: start, + End: rw.off, + }, &rw.d.cache, &rw.d.dirty, rw.d.size, mf, rw.d.handle.writeFromBlocksAt); err != nil { + // We have no idea how many bytes were actually flushed. + rw.off = start + done = 0 + retErr = err + } + } + rw.d.dataMu.Unlock() + rw.d.handleMu.RUnlock() + return done, retErr +} + +func (d *dentry) writeback(ctx context.Context, offset, size int64) error { + if size == 0 { + return nil + } + d.handleMu.RLock() + defer d.handleMu.RUnlock() + d.dataMu.Lock() + defer d.dataMu.Unlock() + // Compute the range of valid bytes (overflow-checked). + if uint64(offset) >= d.size { + return nil + } + end := int64(d.size) + if rend := offset + size; rend > offset && rend < end { + end = rend + } + return fsutil.SyncDirty(ctx, memmap.MappableRange{ + Start: uint64(offset), + End: uint64(end), + }, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt) +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + fd.mu.Lock() + defer fd.mu.Unlock() + switch whence { + case linux.SEEK_SET: + // Use offset as specified. + case linux.SEEK_CUR: + offset += fd.off + case linux.SEEK_END, linux.SEEK_DATA, linux.SEEK_HOLE: + // Ensure file size is up to date. + d := fd.dentry() + if fd.filesystem().opts.interop == InteropModeShared { + if err := d.updateFromGetattr(ctx); err != nil { + return 0, err + } + } + size := int64(atomic.LoadUint64(&d.size)) + // For SEEK_DATA and SEEK_HOLE, treat the file as a single contiguous + // block of data. + switch whence { + case linux.SEEK_END: + offset += size + case linux.SEEK_DATA: + if offset > size { + return 0, syserror.ENXIO + } + // Use offset as specified. + case linux.SEEK_HOLE: + if offset > size { + return 0, syserror.ENXIO + } + offset = size + } + default: + return 0, syserror.EINVAL + } + if offset < 0 { + return 0, syserror.EINVAL + } + fd.off = offset + return offset, nil +} + +// Sync implements vfs.FileDescriptionImpl.Sync. +func (fd *regularFileFD) Sync(ctx context.Context) error { + return fd.dentry().syncSharedHandle(ctx) +} + +func (d *dentry) syncSharedHandle(ctx context.Context) error { + d.handleMu.RLock() + if !d.handleWritable { + d.handleMu.RUnlock() + return nil + } + d.dataMu.Lock() + // Write dirty cached data to the remote file. + err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt) + d.dataMu.Unlock() + if err == nil { + // Sync the remote file. + err = d.handle.sync(ctx) + } + d.handleMu.RUnlock() + return err +} + +// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. +func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + d := fd.dentry() + switch d.fs.opts.interop { + case InteropModeExclusive: + // Any mapping is fine. + case InteropModeWritethrough: + // Shared writable mappings require a host FD, since otherwise we can't + // synchronously flush memory-mapped writes to the remote file. + if opts.Private || !opts.MaxPerms.Write { + break + } + fallthrough + case InteropModeShared: + // All mappings require a host FD to be coherent with other filesystem + // users. + if d.fs.opts.forcePageCache { + // Whether or not we have a host FD, we're not allowed to use it. + return syserror.ENODEV + } + d.handleMu.RLock() + haveFD := d.handle.fd >= 0 + d.handleMu.RUnlock() + if !haveFD { + return syserror.ENODEV + } + default: + panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop)) + } + return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts) +} + +func (d *dentry) mayCachePages() bool { + if d.fs.opts.interop == InteropModeShared { + return false + } + if d.fs.opts.forcePageCache { + return true + } + d.handleMu.RLock() + haveFD := d.handle.fd >= 0 + d.handleMu.RUnlock() + return haveFD +} + +// AddMapping implements memmap.Mappable.AddMapping. +func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { + d.mapsMu.Lock() + mapped := d.mappings.AddMapping(ms, ar, offset, writable) + // Do this unconditionally since whether we have a host FD can change + // across save/restore. + for _, r := range mapped { + d.pf.hostFileMapper.IncRefOn(r) + } + if d.mayCachePages() { + // d.Evict() will refuse to evict memory-mapped pages, so tell the + // MemoryFile to not bother trying. + mf := d.fs.mfp.MemoryFile() + for _, r := range mapped { + mf.MarkUnevictable(d, pgalloc.EvictableRange{r.Start, r.End}) + } + } + d.mapsMu.Unlock() + return nil +} + +// RemoveMapping implements memmap.Mappable.RemoveMapping. +func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { + d.mapsMu.Lock() + unmapped := d.mappings.RemoveMapping(ms, ar, offset, writable) + for _, r := range unmapped { + d.pf.hostFileMapper.DecRefOn(r) + } + if d.mayCachePages() { + // Pages that are no longer referenced by any application memory + // mappings are now considered unused; allow MemoryFile to evict them + // when necessary. + mf := d.fs.mfp.MemoryFile() + d.dataMu.Lock() + for _, r := range unmapped { + // Since these pages are no longer mapped, they are no longer + // concurrently dirtyable by a writable memory mapping. + d.dirty.AllowClean(r) + mf.MarkEvictable(d, pgalloc.EvictableRange{r.Start, r.End}) + } + d.dataMu.Unlock() + } + d.mapsMu.Unlock() +} + +// CopyMapping implements memmap.Mappable.CopyMapping. +func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { + return d.AddMapping(ctx, ms, dstAR, offset, writable) +} + +// Translate implements memmap.Mappable.Translate. +func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { + d.handleMu.RLock() + if d.handle.fd >= 0 && !d.fs.opts.forcePageCache { + d.handleMu.RUnlock() + mr := optional + if d.fs.opts.limitHostFDTranslation { + mr = maxFillRange(required, optional) + } + return []memmap.Translation{ + { + Source: mr, + File: &d.pf, + Offset: mr.Start, + Perms: usermem.AnyAccess, + }, + }, nil + } + + d.dataMu.Lock() + + // Constrain translations to d.size (rounded up) to prevent translation to + // pages that may be concurrently truncated. + pgend := pageRoundUp(d.size) + var beyondEOF bool + if required.End > pgend { + if required.Start >= pgend { + d.dataMu.Unlock() + d.handleMu.RUnlock() + return nil, &memmap.BusError{io.EOF} + } + beyondEOF = true + required.End = pgend + } + if optional.End > pgend { + optional.End = pgend + } + + mf := d.fs.mfp.MemoryFile() + cerr := d.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, d.handle.readToBlocksAt) + + var ts []memmap.Translation + var translatedEnd uint64 + for seg := d.cache.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() { + segMR := seg.Range().Intersect(optional) + // TODO(jamieliu): Make Translations writable even if writability is + // not required if already kept-dirty by another writable translation. + perms := usermem.AccessType{ + Read: true, + Execute: true, + } + if at.Write { + // From this point forward, this memory can be dirtied through the + // mapping at any time. + d.dirty.KeepDirty(segMR) + perms.Write = true + } + ts = append(ts, memmap.Translation{ + Source: segMR, + File: mf, + Offset: seg.FileRangeOf(segMR).Start, + Perms: perms, + }) + translatedEnd = segMR.End + } + + d.dataMu.Unlock() + d.handleMu.RUnlock() + + // Don't return the error returned by c.cache.Fill if it occurred outside + // of required. + if translatedEnd < required.End && cerr != nil { + return ts, &memmap.BusError{cerr} + } + if beyondEOF { + return ts, &memmap.BusError{io.EOF} + } + return ts, nil +} + +func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange { + const maxReadahead = 64 << 10 // 64 KB, chosen arbitrarily + if required.Length() >= maxReadahead { + return required + } + if optional.Length() <= maxReadahead { + return optional + } + optional.Start = required.Start + if optional.Length() <= maxReadahead { + return optional + } + optional.End = optional.Start + maxReadahead + return optional +} + +// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. +func (d *dentry) InvalidateUnsavable(ctx context.Context) error { + // Whether we have a host fd (and consequently what platform.File is + // mapped) can change across save/restore, so invalidate all translations + // unconditionally. + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.mappings.InvalidateAll(memmap.InvalidateOpts{}) + + // Write the cache's contents back to the remote file so that if we have a + // host fd after restore, the remote file's contents are coherent. + mf := d.fs.mfp.MemoryFile() + d.dataMu.Lock() + defer d.dataMu.Unlock() + if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { + return err + } + + // Discard the cache so that it's not stored in saved state. This is safe + // because per InvalidateUnsavable invariants, no new translations can have + // been returned after we invalidated all existing translations above. + d.cache.DropAll(mf) + d.dirty.RemoveAll() + + return nil +} + +// Evict implements pgalloc.EvictableMemoryUser.Evict. +func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.dataMu.Lock() + defer d.dataMu.Unlock() + + mr := memmap.MappableRange{er.Start, er.End} + mf := d.fs.mfp.MemoryFile() + // Only allow pages that are no longer memory-mapped to be evicted. + for mgap := d.mappings.LowerBoundGap(mr.Start); mgap.Ok() && mgap.Start() < mr.End; mgap = mgap.NextGap() { + mgapMR := mgap.Range().Intersect(mr) + if mgapMR.Length() == 0 { + continue + } + if err := fsutil.SyncDirty(ctx, mgapMR, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil { + log.Warningf("Failed to writeback cached data %v: %v", mgapMR, err) + } + d.cache.Drop(mgapMR, mf) + d.dirty.KeepClean(mgapMR) + } +} + +// dentryPlatformFile implements platform.File. It exists solely because dentry +// cannot implement both vfs.DentryImpl.IncRef and platform.File.IncRef. +// +// dentryPlatformFile is only used when a host FD representing the remote file +// is available (i.e. dentry.handle.fd >= 0), and that FD is used for +// application memory mappings (i.e. !filesystem.opts.forcePageCache). +type dentryPlatformFile struct { + *dentry + + // fdRefs counts references on platform.File offsets. fdRefs is protected + // by dentry.dataMu. + fdRefs fsutil.FrameRefSet + + // If this dentry represents a regular file, and handle.fd >= 0, + // hostFileMapper caches mappings of handle.fd. + hostFileMapper fsutil.HostFileMapper +} + +// IncRef implements platform.File.IncRef. +func (d *dentryPlatformFile) IncRef(fr platform.FileRange) { + d.dataMu.Lock() + seg, gap := d.fdRefs.Find(fr.Start) + for { + switch { + case seg.Ok() && seg.Start() < fr.End: + seg = d.fdRefs.Isolate(seg, fr) + seg.SetValue(seg.Value() + 1) + seg, gap = seg.NextNonEmpty() + case gap.Ok() && gap.Start() < fr.End: + newRange := gap.Range().Intersect(fr) + usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped) + seg, gap = d.fdRefs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty() + default: + d.fdRefs.MergeAdjacent(fr) + d.dataMu.Unlock() + return + } + } +} + +// DecRef implements platform.File.DecRef. +func (d *dentryPlatformFile) DecRef(fr platform.FileRange) { + d.dataMu.Lock() + seg := d.fdRefs.FindSegment(fr.Start) + + for seg.Ok() && seg.Start() < fr.End { + seg = d.fdRefs.Isolate(seg, fr) + if old := seg.Value(); old == 1 { + usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped) + seg = d.fdRefs.Remove(seg).NextSegment() + } else { + seg.SetValue(old - 1) + seg = seg.NextSegment() + } + } + d.fdRefs.MergeAdjacent(fr) + d.dataMu.Unlock() + +} + +// MapInternal implements platform.File.MapInternal. +func (d *dentryPlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { + d.handleMu.RLock() + bs, err := d.hostFileMapper.MapInternal(fr, int(d.handle.fd), at.Write) + d.handleMu.RUnlock() + return bs, err +} + +// FD implements platform.File.FD. +func (d *dentryPlatformFile) FD() int { + d.handleMu.RLock() + fd := d.handle.fd + d.handleMu.RUnlock() + return int(fd) +} diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go new file mode 100644 index 000000000..08c691c47 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -0,0 +1,159 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "sync" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// specialFileFD implements vfs.FileDescriptionImpl for files other than +// regular files, directories, and symlinks: pipes, sockets, etc. It is also +// used for regular files when filesystemOptions.specialRegularFiles is in +// effect. specialFileFD differs from regularFileFD by using per-FD handles +// instead of shared per-dentry handles, and never buffering I/O. +type specialFileFD struct { + fileDescription + + // handle is immutable. + handle handle + + // off is the file offset. off is protected by mu. (POSIX 2.9.7 only + // requires operations using the file offset to be atomic for regular files + // and symlinks; however, since specialFileFD may be used for regular + // files, we apply this atomicity unconditionally.) + mu sync.Mutex + off int64 +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *specialFileFD) Release() { + fd.handle.close(context.Background()) + fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) + fs.syncMu.Lock() + delete(fs.specialFileFDs, fd) + fs.syncMu.Unlock() +} + +// OnClose implements vfs.FileDescriptionImpl.OnClose. +func (fd *specialFileFD) OnClose(ctx context.Context) error { + if !fd.vfsfd.IsWritable() { + return nil + } + return fd.handle.file.flush(ctx) +} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + // Going through dst.CopyOutFrom() holds MM locks around file operations of + // unknown duration. For regularFileFD, doing so is necessary to support + // mmap due to lock ordering; MM locks precede dentry.dataMu. That doesn't + // hold here since specialFileFD doesn't client-cache data. Just buffer the + // read instead. + if d := fd.dentry(); d.fs.opts.interop != InteropModeShared { + d.touchAtime(ctx, fd.vfsfd.Mount()) + } + buf := make([]byte, dst.NumBytes()) + n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) + if n == 0 { + return 0, err + } + if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil { + return int64(cp), cperr + } + return int64(n), err +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.PRead(ctx, dst, fd.off, opts) + fd.off += n + fd.mu.Unlock() + return n, err +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if opts.Flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + // Do a buffered write. See rationale in PRead. + if d := fd.dentry(); d.fs.opts.interop != InteropModeShared { + d.touchCMtime(ctx) + } + buf := make([]byte, src.NumBytes()) + // Don't do partial writes if we get a partial read from src. + if _, err := src.CopyIn(ctx, buf); err != nil { + return 0, err + } + n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) + return int64(n), err +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + fd.mu.Lock() + n, err := fd.PWrite(ctx, src, fd.off, opts) + fd.off += n + fd.mu.Unlock() + return n, err +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + fd.mu.Lock() + defer fd.mu.Unlock() + switch whence { + case linux.SEEK_SET: + // Use offset as given. + case linux.SEEK_CUR: + offset += fd.off + default: + // SEEK_END, SEEK_DATA, and SEEK_HOLE aren't supported since it's not + // clear that file size is even meaningful for these files. + return 0, syserror.EINVAL + } + if offset < 0 { + return 0, syserror.EINVAL + } + fd.off = offset + return offset, nil +} + +// Sync implements vfs.FileDescriptionImpl.Sync. +func (fd *specialFileFD) Sync(ctx context.Context) error { + if !fd.vfsfd.IsWritable() { + return nil + } + return fd.handle.sync(ctx) +} diff --git a/pkg/sentry/fsimpl/gofer/symlink.go b/pkg/sentry/fsimpl/gofer/symlink.go new file mode 100644 index 000000000..adf43be60 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/symlink.go @@ -0,0 +1,47 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +func (d *dentry) isSymlink() bool { + return d.fileType() == linux.S_IFLNK +} + +// Precondition: d.isSymlink(). +func (d *dentry) readlink(ctx context.Context, mnt *vfs.Mount) (string, error) { + if d.fs.opts.interop != InteropModeShared { + d.touchAtime(ctx, mnt) + d.dataMu.Lock() + if d.haveTarget { + target := d.target + d.dataMu.Unlock() + return target, nil + } + } + target, err := d.file.readlink(ctx) + if d.fs.opts.interop != InteropModeShared { + if err == nil { + d.haveTarget = true + d.target = target + } + d.dataMu.Unlock() + } + return target, err +} diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go new file mode 100644 index 000000000..7598ec6a8 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/time.go @@ -0,0 +1,75 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +func dentryTimestampFromP9(s, ns uint64) int64 { + return int64(s*1e9 + ns) +} + +func dentryTimestampFromStatx(ts linux.StatxTimestamp) int64 { + return ts.Sec*1e9 + int64(ts.Nsec) +} + +func statxTimestampFromDentry(ns int64) linux.StatxTimestamp { + return linux.StatxTimestamp{ + Sec: ns / 1e9, + Nsec: uint32(ns % 1e9), + } +} + +func nowFromContext(ctx context.Context) (int64, bool) { + if clock := ktime.RealtimeClockFromContext(ctx); clock != nil { + return clock.Now().Nanoseconds(), true + } + return 0, false +} + +// Preconditions: fs.interop != InteropModeShared. +func (d *dentry) touchAtime(ctx context.Context, mnt *vfs.Mount) { + if err := mnt.CheckBeginWrite(); err != nil { + return + } + now, ok := nowFromContext(ctx) + if !ok { + mnt.EndWrite() + return + } + d.metadataMu.Lock() + atomic.StoreInt64(&d.atime, now) + d.metadataMu.Unlock() + mnt.EndWrite() +} + +// Preconditions: fs.interop != InteropModeShared. The caller has successfully +// called vfs.Mount.CheckBeginWrite(). +func (d *dentry) touchCMtime(ctx context.Context) { + now, ok := nowFromContext(ctx) + if !ok { + return + } + d.metadataMu.Lock() + atomic.StoreInt64(&d.mtime, now) + atomic.StoreInt64(&d.ctime, now) + d.metadataMu.Unlock() +} diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 5ee9cf1e9..72bc15264 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -622,7 +622,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error if child.inode.isDir() { return syserror.EISDIR } - if !rp.MustBeDir() { + if rp.MustBeDir() { return syserror.ENOTDIR } mnt := rp.Mount() diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index bde4c7a1e..34f63986f 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -126,7 +126,7 @@ func (s *socketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOS } return uint64(n), nil } - return readv(s.fd, iovecsFromBlockSeq(dsts)) + return readv(s.fd, safemem.IovecsFromBlockSeq(dsts)) })) return int64(n), err } @@ -149,7 +149,7 @@ func (s *socketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO } return uint64(n), nil } - return writev(s.fd, iovecsFromBlockSeq(srcs)) + return writev(s.fd, safemem.IovecsFromBlockSeq(srcs)) })) return int64(n), err } @@ -402,7 +402,7 @@ func (s *socketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags // We always do a non-blocking recv*(). sysflags := flags | syscall.MSG_DONTWAIT - iovs := iovecsFromBlockSeq(dsts) + iovs := safemem.IovecsFromBlockSeq(dsts) msg := syscall.Msghdr{ Iov: &iovs[0], Iovlen: uint64(len(iovs)), @@ -522,7 +522,7 @@ func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] return uint64(n), nil } - iovs := iovecsFromBlockSeq(srcs) + iovs := safemem.IovecsFromBlockSeq(srcs) msg := syscall.Msghdr{ Iov: &iovs[0], Iovlen: uint64(len(iovs)), @@ -567,21 +567,6 @@ func (s *socketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to [] return int(n), syserr.FromError(err) } -func iovecsFromBlockSeq(bs safemem.BlockSeq) []syscall.Iovec { - iovs := make([]syscall.Iovec, 0, bs.NumBlocks()) - for ; !bs.IsEmpty(); bs = bs.Tail() { - b := bs.Head() - iovs = append(iovs, syscall.Iovec{ - Base: &b.ToSlice()[0], - Len: uint64(b.Len()), - }) - // We don't need to care about b.NeedSafecopy(), because the host - // kernel will handle such address ranges just fine (by returning - // EFAULT). - } - return iovs -} - func translateIOSyscallError(err error) error { if err == syscall.EAGAIN || err == syscall.EWOULDBLOCK { return syserror.ErrWouldBlock -- cgit v1.2.3 From 1b6a12a768216a99a5e0428c42ea4faf79cf3b50 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Wed, 5 Feb 2020 22:45:44 -0800 Subject: Add notes to relevant tests. These were out-of-band notes that can help provide additional context and simplify automated imports. PiperOrigin-RevId: 293525915 --- pkg/metric/metric.go | 1 - pkg/sentry/arch/arch_x86.go | 4 ++ pkg/sentry/arch/signal_amd64.go | 2 +- pkg/sentry/fs/file_overlay_test.go | 1 + pkg/sentry/fs/proc/README.md | 4 ++ pkg/sentry/kernel/BUILD | 1 + pkg/sentry/kernel/kernel.go | 3 ++ pkg/sentry/kernel/kernel_opts.go | 20 +++++++ pkg/sentry/socket/hostinet/BUILD | 1 + pkg/sentry/socket/hostinet/socket.go | 5 +- pkg/sentry/socket/hostinet/sockopt_impl.go | 27 ++++++++++ pkg/tcpip/transport/tcp/endpoint.go | 3 ++ runsc/boot/filter/BUILD | 1 + runsc/boot/filter/config.go | 13 ----- runsc/boot/filter/config_profile.go | 34 ++++++++++++ runsc/container/console_test.go | 5 +- runsc/dockerutil/dockerutil.go | 11 ++-- runsc/testutil/BUILD | 5 +- runsc/testutil/testutil.go | 54 ------------------- runsc/testutil/testutil_runfiles.go | 75 +++++++++++++++++++++++++++ test/image/image_test.go | 8 +-- test/syscalls/build_defs.bzl | 35 +++++++++++-- test/syscalls/linux/chroot.cc | 2 +- test/syscalls/linux/concurrency.cc | 3 +- test/syscalls/linux/exec_proc_exe_workload.cc | 6 +++ test/syscalls/linux/fork.cc | 5 +- test/syscalls/linux/mmap.cc | 8 +-- test/syscalls/linux/open_create.cc | 1 + test/syscalls/linux/preadv.cc | 1 + test/syscalls/linux/proc.cc | 46 +++++++++++++--- test/syscalls/linux/readv.cc | 4 +- test/syscalls/linux/rseq.cc | 2 +- test/syscalls/linux/select.cc | 2 +- test/syscalls/linux/shm.cc | 2 +- test/syscalls/linux/sigprocmask.cc | 2 +- test/syscalls/linux/socket_unix_non_stream.cc | 4 +- test/syscalls/linux/symlink.cc | 2 +- test/syscalls/linux/tcp_socket.cc | 3 +- test/syscalls/linux/time.cc | 1 + test/syscalls/linux/tkill.cc | 2 +- test/util/temp_path.cc | 1 + tools/build/tags.bzl | 4 ++ tools/defs.bzl | 17 +++++- 43 files changed, 318 insertions(+), 113 deletions(-) create mode 100644 pkg/sentry/kernel/kernel_opts.go create mode 100644 pkg/sentry/socket/hostinet/sockopt_impl.go create mode 100644 runsc/boot/filter/config_profile.go create mode 100644 runsc/testutil/testutil_runfiles.go (limited to 'pkg/sentry/fs') diff --git a/pkg/metric/metric.go b/pkg/metric/metric.go index 93d4f2b8c..006fcd9ab 100644 --- a/pkg/metric/metric.go +++ b/pkg/metric/metric.go @@ -46,7 +46,6 @@ var ( // // TODO(b/67298402): Support non-cumulative metrics. // TODO(b/67298427): Support metric fields. -// type Uint64Metric struct { // value is the actual value of the metric. It must be accessed // atomically. diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go index a18093155..3db8bd34b 100644 --- a/pkg/sentry/arch/arch_x86.go +++ b/pkg/sentry/arch/arch_x86.go @@ -114,6 +114,10 @@ func newX86FPStateSlice() []byte { size, align := cpuid.HostFeatureSet().ExtendedStateSize() capacity := size // Always use at least 4096 bytes. + // + // For the KVM platform, this state is a fixed 4096 bytes, so make sure + // that the underlying array is at _least_ that size otherwise we will + // corrupt random memory. This is not a pleasant thing to debug. if capacity < 4096 { capacity = 4096 } diff --git a/pkg/sentry/arch/signal_amd64.go b/pkg/sentry/arch/signal_amd64.go index 81b92bb43..6fb756f0e 100644 --- a/pkg/sentry/arch/signal_amd64.go +++ b/pkg/sentry/arch/signal_amd64.go @@ -55,7 +55,7 @@ type SignalContext64 struct { Trapno uint64 Oldmask linux.SignalSet Cr2 uint64 - // Pointer to a struct _fpstate. + // Pointer to a struct _fpstate. See b/33003106#comment8. Fpstate uint64 Reserved [8]uint64 } diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go index 02538bb4f..a76d87e3a 100644 --- a/pkg/sentry/fs/file_overlay_test.go +++ b/pkg/sentry/fs/file_overlay_test.go @@ -177,6 +177,7 @@ func TestReaddirRevalidation(t *testing.T) { // TestReaddirOverlayFrozen tests that calling Readdir on an overlay file with // a frozen dirent tree does not make Readdir calls to the underlying files. +// This is a regression test for b/114808269. func TestReaddirOverlayFrozen(t *testing.T) { ctx := contexttest.Context(t) diff --git a/pkg/sentry/fs/proc/README.md b/pkg/sentry/fs/proc/README.md index 5d4ec6c7b..6667a0916 100644 --- a/pkg/sentry/fs/proc/README.md +++ b/pkg/sentry/fs/proc/README.md @@ -11,6 +11,8 @@ inconsistency, please file a bug. The following files are implemented: + + | File /proc/ | Content | | :------------------------ | :---------------------------------------------------- | | [cpuinfo](#cpuinfo) | Info about the CPU | @@ -22,6 +24,8 @@ The following files are implemented: | [uptime](#uptime) | Wall clock since boot, combined idle time of all cpus | | [version](#version) | Kernel version | + + ### cpuinfo ```bash diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index a27628c0a..2231d6973 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -91,6 +91,7 @@ go_library( "fs_context.go", "ipc_namespace.go", "kernel.go", + "kernel_opts.go", "kernel_state.go", "pending_signals.go", "pending_signals_list.go", diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index dcd6e91c4..3ee760ba2 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -235,6 +235,9 @@ type Kernel struct { // events. This is initialized lazily on the first unimplemented // syscall. unimplementedSyscallEmitter eventchannel.Emitter `state:"nosave"` + + // SpecialOpts contains special kernel options. + SpecialOpts } // InitKernelArgs holds arguments to Init. diff --git a/pkg/sentry/kernel/kernel_opts.go b/pkg/sentry/kernel/kernel_opts.go new file mode 100644 index 000000000..2e66ec587 --- /dev/null +++ b/pkg/sentry/kernel/kernel_opts.go @@ -0,0 +1,20 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +// SpecialOpts contains non-standard options for the kernel. +// +// +stateify savable +type SpecialOpts struct{} diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index 5a07d5d0e..023bad156 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -10,6 +10,7 @@ go_library( "save_restore.go", "socket.go", "socket_unsafe.go", + "sockopt_impl.go", "stack.go", ], visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index 34f63986f..de76388ac 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -285,7 +285,7 @@ func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outPt } // Whitelist options and constrain option length. - var optlen int + optlen := getSockOptLen(t, level, name) switch level { case linux.SOL_IP: switch name { @@ -330,7 +330,7 @@ func (s *socketOperations) GetSockOpt(t *kernel.Task, level int, name int, outPt // SetSockOpt implements socket.Socket.SetSockOpt. func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error { // Whitelist options and constrain option length. - var optlen int + optlen := setSockOptLen(t, level, name) switch level { case linux.SOL_IP: switch name { @@ -353,6 +353,7 @@ func (s *socketOperations) SetSockOpt(t *kernel.Task, level int, name int, opt [ optlen = sizeofInt32 } } + if optlen == 0 { // Pretend to accept socket options we don't understand. This seems // dangerous, but it's what netstack does... diff --git a/pkg/sentry/socket/hostinet/sockopt_impl.go b/pkg/sentry/socket/hostinet/sockopt_impl.go new file mode 100644 index 000000000..8a783712e --- /dev/null +++ b/pkg/sentry/socket/hostinet/sockopt_impl.go @@ -0,0 +1,27 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hostinet + +import ( + "gvisor.dev/gvisor/pkg/sentry/kernel" +) + +func getSockOptLen(t *kernel.Task, level, name int) int { + return 0 // No custom options. +} + +func setSockOptLen(t *kernel.Task, level, name int) int { + return 0 // No custom options. +} diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index e4a6b1b8b..f2be0e651 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2166,6 +2166,9 @@ func (e *endpoint) listen(backlog int) *tcpip.Error { e.isRegistered = true e.setEndpointState(StateListen) + // The channel may be non-nil when we're restoring the endpoint, and it + // may be pre-populated with some previously accepted (but not Accepted) + // endpoints. if e.acceptedChan == nil { e.acceptedChan = make(chan *endpoint, backlog) } diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index ce30f6c53..ed18f0047 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -8,6 +8,7 @@ go_library( "config.go", "config_amd64.go", "config_arm64.go", + "config_profile.go", "extra_filters.go", "extra_filters_msan.go", "extra_filters_race.go", diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index f8d351c7b..c69f4c602 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -536,16 +536,3 @@ func controlServerFilters(fd int) seccomp.SyscallRules { }, } } - -// profileFilters returns extra syscalls made by runtime/pprof package. -func profileFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - syscall.SYS_OPENAT: []seccomp.Rule{ - { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), - }, - }, - } -} diff --git a/runsc/boot/filter/config_profile.go b/runsc/boot/filter/config_profile.go new file mode 100644 index 000000000..194952a7b --- /dev/null +++ b/runsc/boot/filter/config_profile.go @@ -0,0 +1,34 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/seccomp" +) + +// profileFilters returns extra syscalls made by runtime/pprof package. +func profileFilters() seccomp.SyscallRules { + return seccomp.SyscallRules{ + syscall.SYS_OPENAT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(syscall.O_RDONLY | syscall.O_LARGEFILE | syscall.O_CLOEXEC), + }, + }, + } +} diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 060b63bf3..c2518d52b 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -196,7 +196,10 @@ func TestJobControlSignalExec(t *testing.T) { defer ptyMaster.Close() defer ptySlave.Close() - // Exec bash and attach a terminal. + // Exec bash and attach a terminal. Note that occasionally /bin/sh + // may be a different shell or have a different configuration (such + // as disabling interactive mode and job control). Since we want to + // explicitly test interactive mode, use /bin/bash. See b/116981926. execArgs := &control.ExecArgs{ Filename: "/bin/bash", // Don't let bash execute from profile or rc files, otherwise diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go index 9b6346ca2..1ff5e8cc3 100644 --- a/runsc/dockerutil/dockerutil.go +++ b/runsc/dockerutil/dockerutil.go @@ -143,8 +143,11 @@ func PrepareFiles(names ...string) (string, error) { return "", fmt.Errorf("os.Chmod(%q, 0777) failed: %v", dir, err) } for _, name := range names { - src := getLocalPath(name) - dst := path.Join(dir, name) + src, err := testutil.FindFile(name) + if err != nil { + return "", fmt.Errorf("testutil.Preparefiles(%q) failed: %v", name, err) + } + dst := path.Join(dir, path.Base(name)) if err := testutil.Copy(src, dst); err != nil { return "", fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err) } @@ -152,10 +155,6 @@ func PrepareFiles(names ...string) (string, error) { return dir, nil } -func getLocalPath(file string) string { - return path.Join(".", file) -} - // do executes docker command. func do(args ...string) (string, error) { log.Printf("Running: docker %s\n", args) diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD index f845120b0..945405303 100644 --- a/runsc/testutil/BUILD +++ b/runsc/testutil/BUILD @@ -5,7 +5,10 @@ package(licenses = ["notice"]) go_library( name = "testutil", testonly = 1, - srcs = ["testutil.go"], + srcs = [ + "testutil.go", + "testutil_runfiles.go", + ], visibility = ["//:sandbox"], deps = [ "//pkg/log", diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go index edf2e809a..80c2c9680 100644 --- a/runsc/testutil/testutil.go +++ b/runsc/testutil/testutil.go @@ -79,60 +79,6 @@ func ConfigureExePath() error { return nil } -// FindFile searchs for a file inside the test run environment. It returns the -// full path to the file. It fails if none or more than one file is found. -func FindFile(path string) (string, error) { - wd, err := os.Getwd() - if err != nil { - return "", err - } - - // The test root is demarcated by a path element called "__main__". Search for - // it backwards from the working directory. - root := wd - for { - dir, name := filepath.Split(root) - if name == "__main__" { - break - } - if len(dir) == 0 { - return "", fmt.Errorf("directory __main__ not found in %q", wd) - } - // Remove ending slash to loop around. - root = dir[:len(dir)-1] - } - - // Annoyingly, bazel adds the build type to the directory path for go - // binaries, but not for c++ binaries. We use two different patterns to - // to find our file. - patterns := []string{ - // Try the obvious path first. - filepath.Join(root, path), - // If it was a go binary, use a wildcard to match the build - // type. The pattern is: /test-path/__main__/directories/*/file. - filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)), - } - - for _, p := range patterns { - matches, err := filepath.Glob(p) - if err != nil { - // "The only possible returned error is ErrBadPattern, - // when pattern is malformed." -godoc - return "", fmt.Errorf("error globbing %q: %v", p, err) - } - switch len(matches) { - case 0: - // Try the next pattern. - case 1: - // We found it. - return matches[0], nil - default: - return "", fmt.Errorf("more than one match found for %q: %s", path, matches) - } - } - return "", fmt.Errorf("file %q not found", path) -} - // TestConfig returns the default configuration to use in tests. Note that // 'RootDir' must be set by caller if required. func TestConfig() *boot.Config { diff --git a/runsc/testutil/testutil_runfiles.go b/runsc/testutil/testutil_runfiles.go new file mode 100644 index 000000000..ece9ea9a1 --- /dev/null +++ b/runsc/testutil/testutil_runfiles.go @@ -0,0 +1,75 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutil + +import ( + "fmt" + "os" + "path/filepath" +) + +// FindFile searchs for a file inside the test run environment. It returns the +// full path to the file. It fails if none or more than one file is found. +func FindFile(path string) (string, error) { + wd, err := os.Getwd() + if err != nil { + return "", err + } + + // The test root is demarcated by a path element called "__main__". Search for + // it backwards from the working directory. + root := wd + for { + dir, name := filepath.Split(root) + if name == "__main__" { + break + } + if len(dir) == 0 { + return "", fmt.Errorf("directory __main__ not found in %q", wd) + } + // Remove ending slash to loop around. + root = dir[:len(dir)-1] + } + + // Annoyingly, bazel adds the build type to the directory path for go + // binaries, but not for c++ binaries. We use two different patterns to + // to find our file. + patterns := []string{ + // Try the obvious path first. + filepath.Join(root, path), + // If it was a go binary, use a wildcard to match the build + // type. The pattern is: /test-path/__main__/directories/*/file. + filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)), + } + + for _, p := range patterns { + matches, err := filepath.Glob(p) + if err != nil { + // "The only possible returned error is ErrBadPattern, + // when pattern is malformed." -godoc + return "", fmt.Errorf("error globbing %q: %v", p, err) + } + switch len(matches) { + case 0: + // Try the next pattern. + case 1: + // We found it. + return matches[0], nil + default: + return "", fmt.Errorf("more than one match found for %q: %s", path, matches) + } + } + return "", fmt.Errorf("file %q not found", path) +} diff --git a/test/image/image_test.go b/test/image/image_test.go index d0dcb1861..0a1e19d6f 100644 --- a/test/image/image_test.go +++ b/test/image/image_test.go @@ -107,7 +107,7 @@ func TestHttpd(t *testing.T) { } d := dockerutil.MakeDocker("http-test") - dir, err := dockerutil.PrepareFiles("latin10k.txt") + dir, err := dockerutil.PrepareFiles("test/image/latin10k.txt") if err != nil { t.Fatalf("PrepareFiles() failed: %v", err) } @@ -139,7 +139,7 @@ func TestNginx(t *testing.T) { } d := dockerutil.MakeDocker("net-test") - dir, err := dockerutil.PrepareFiles("latin10k.txt") + dir, err := dockerutil.PrepareFiles("test/image/latin10k.txt") if err != nil { t.Fatalf("PrepareFiles() failed: %v", err) } @@ -183,7 +183,7 @@ func TestMysql(t *testing.T) { } client := dockerutil.MakeDocker("mysql-client-test") - dir, err := dockerutil.PrepareFiles("mysql.sql") + dir, err := dockerutil.PrepareFiles("test/image/mysql.sql") if err != nil { t.Fatalf("PrepareFiles() failed: %v", err) } @@ -283,7 +283,7 @@ func TestRuby(t *testing.T) { } d := dockerutil.MakeDocker("ruby-test") - dir, err := dockerutil.PrepareFiles("ruby.rb", "ruby.sh") + dir, err := dockerutil.PrepareFiles("test/image/ruby.rb", "test/image/ruby.sh") if err != nil { t.Fatalf("PrepareFiles() failed: %v", err) } diff --git a/test/syscalls/build_defs.bzl b/test/syscalls/build_defs.bzl index 1df761dd0..cbab85ef7 100644 --- a/test/syscalls/build_defs.bzl +++ b/test/syscalls/build_defs.bzl @@ -2,8 +2,6 @@ load("//tools:defs.bzl", "loopback") -# syscall_test is a macro that will create targets to run the given test target -# on the host (native) and runsc. def syscall_test( test, shard_count = 5, @@ -13,6 +11,19 @@ def syscall_test( add_uds_tree = False, add_hostinet = False, tags = None): + """syscall_test is a macro that will create targets for all platforms. + + Args: + test: the test target. + shard_count: shards for defined tests. + size: the defined test size. + use_tmpfs: use tmpfs in the defined tests. + add_overlay: add an overlay test. + add_uds_tree: add a UDS test. + add_hostinet: add a hostinet test. + tags: starting test tags. + """ + _syscall_test( test = test, shard_count = shard_count, @@ -111,6 +122,19 @@ def _syscall_test( # all the tests on a specific flavor. Use --test_tag_filters=ptrace,file_shared. tags += [full_platform, "file_" + file_access] + # Hash this target into one of 15 buckets. This can be used to + # randomly split targets between different workflows. + hash15 = hash(native.package_name() + name) % 15 + tags.append("hash15:" + str(hash15)) + + # TODO(b/139838000): Tests using hostinet must be disabled on Guitar until + # we figure out how to request ipv4 sockets on Guitar machines. + if network == "host": + tags.append("noguitar") + + # Disable off-host networking. + tags.append("requires-net:loopback") + # Add tag to prevent the tests from running in a Bazel sandbox. # TODO(b/120560048): Make the tests run without this tag. tags.append("no-sandbox") @@ -118,8 +142,11 @@ def _syscall_test( # TODO(b/112165693): KVM tests are tagged "manual" to until the platform is # more stable. if platform == "kvm": - tags += ["manual"] - tags += ["requires-kvm"] + tags.append("manual") + tags.append("requires-kvm") + + # TODO(b/112165693): Remove when tests pass reliably. + tags.append("notap") args = [ # Arguments are passed directly to syscall_test_runner binary. diff --git a/test/syscalls/linux/chroot.cc b/test/syscalls/linux/chroot.cc index 0a2d44a2c..85ec013d5 100644 --- a/test/syscalls/linux/chroot.cc +++ b/test/syscalls/linux/chroot.cc @@ -167,7 +167,7 @@ TEST(ChrootTest, DotDotFromOpenFD) { } // Test that link resolution in a chroot can escape the root by following an -// open proc fd. +// open proc fd. Regression test for b/32316719. TEST(ChrootTest, ProcFdLinkResolutionInChroot) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_CHROOT))); diff --git a/test/syscalls/linux/concurrency.cc b/test/syscalls/linux/concurrency.cc index f41f99900..7cd6a75bd 100644 --- a/test/syscalls/linux/concurrency.cc +++ b/test/syscalls/linux/concurrency.cc @@ -46,7 +46,8 @@ TEST(ConcurrencyTest, SingleProcessMultithreaded) { } // Test that multiple threads in this process continue to execute in parallel, -// even if an unrelated second process is spawned. +// even if an unrelated second process is spawned. Regression test for +// b/32119508. TEST(ConcurrencyTest, MultiProcessMultithreaded) { // In PID 1, start TIDs 1 and 2, and put both to sleep. // diff --git a/test/syscalls/linux/exec_proc_exe_workload.cc b/test/syscalls/linux/exec_proc_exe_workload.cc index b790fe5be..2989379b7 100644 --- a/test/syscalls/linux/exec_proc_exe_workload.cc +++ b/test/syscalls/linux/exec_proc_exe_workload.cc @@ -21,6 +21,12 @@ #include "test/util/posix_error.h" int main(int argc, char** argv, char** envp) { + // This is annoying. Because remote build systems may put these binaries + // in a content-addressable-store, you may wind up with /proc/self/exe + // pointing to some random path (but with a sensible argv[0]). + // + // Therefore, this test simply checks that the /proc/self/exe + // is absolute and *doesn't* match argv[1]. std::string exe = gvisor::testing::ProcessExePath(getpid()).ValueOrDie(); if (exe[0] != '/') { diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index 906f3358d..ff8bdfeb0 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -271,7 +271,7 @@ TEST_F(ForkTest, Alarm) { EXPECT_EQ(0, alarmed); } -// Child cannot affect parent private memory. +// Child cannot affect parent private memory. Regression test for b/24137240. TEST_F(ForkTest, PrivateMemory) { std::atomic local(0); @@ -298,6 +298,9 @@ TEST_F(ForkTest, PrivateMemory) { } // Kernel-accessed buffers should remain coherent across COW. +// +// The buffer must be >= usermem.ZeroCopyMinBytes, as UnsafeAccess operates +// differently. Regression test for b/33811887. TEST_F(ForkTest, COWSegment) { constexpr int kBufSize = 1024; char* read_buf = private_; diff --git a/test/syscalls/linux/mmap.cc b/test/syscalls/linux/mmap.cc index 1c4d9f1c7..11fb1b457 100644 --- a/test/syscalls/linux/mmap.cc +++ b/test/syscalls/linux/mmap.cc @@ -1418,7 +1418,7 @@ TEST_P(MMapFileParamTest, NoSigBusOnPageContainingEOF) { // // On most platforms this is trivial, but when the file is mapped via the sentry // page cache (which does not yet support writing to shared mappings), a bug -// caused reads to fail unnecessarily on such mappings. +// caused reads to fail unnecessarily on such mappings. See b/28913513. TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { uintptr_t addr; size_t len = strlen(kFileContents); @@ -1435,7 +1435,7 @@ TEST_F(MMapFileTest, ReadingWritableSharedFilePageSucceeds) { // Tests that EFAULT is returned when invoking a syscall that requires the OS to // read past end of file (resulting in a fault in sentry context in the gVisor -// case). +// case). See b/28913513. TEST_F(MMapFileTest, InternalSigBus) { uintptr_t addr; ASSERT_THAT(addr = Map(0, 2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, @@ -1578,7 +1578,7 @@ TEST_F(MMapFileTest, Bug38498194) { } // Tests that reading from a file to a memory mapping of the same file does not -// deadlock. +// deadlock. See b/34813270. TEST_F(MMapFileTest, SelfRead) { uintptr_t addr; ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ | PROT_WRITE, MAP_SHARED, @@ -1590,7 +1590,7 @@ TEST_F(MMapFileTest, SelfRead) { } // Tests that writing to a file from a memory mapping of the same file does not -// deadlock. +// deadlock. Regression test for b/34813270. TEST_F(MMapFileTest, SelfWrite) { uintptr_t addr; ASSERT_THAT(addr = Map(0, kPageSize, PROT_READ, MAP_SHARED, fd_.get(), 0), diff --git a/test/syscalls/linux/open_create.cc b/test/syscalls/linux/open_create.cc index 431733dbe..902d0a0dc 100644 --- a/test/syscalls/linux/open_create.cc +++ b/test/syscalls/linux/open_create.cc @@ -132,6 +132,7 @@ TEST(CreateTest, CreateFailsOnDirWithoutWritePerms) { } // A file originally created RW, but opened RO can later be opened RW. +// Regression test for b/65385065. TEST(CreateTest, OpenCreateROThenRW) { TempPath file(NewTempAbsPath()); diff --git a/test/syscalls/linux/preadv.cc b/test/syscalls/linux/preadv.cc index f7ea44054..5b0743fe9 100644 --- a/test/syscalls/linux/preadv.cc +++ b/test/syscalls/linux/preadv.cc @@ -37,6 +37,7 @@ namespace testing { namespace { +// Stress copy-on-write. Attempts to reproduce b/38430174. TEST(PreadvTest, MMConcurrencyStress) { // Fill a one-page file with zeroes (the contents don't really matter). const auto f = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 169b723eb..a23fdb58d 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1352,13 +1352,19 @@ TEST(ProcPidSymlink, SubprocessZombied) { // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux // on proc files. - // 4.17 & gVisor: Syscall succeeds and returns 1 + // + // ~4.3: Syscall fails with EACCES. + // 4.17 & gVisor: Syscall succeeds and returns 1. + // // EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), // SyscallFailsWithErrno(EACCES)); // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux // on proc files. - // 4.17 & gVisor: Syscall succeeds and returns 1. + // + // ~4.3: Syscall fails with EACCES. + // 4.17 & gVisor: Syscall succeeds and returns 1. + // // EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), // SyscallFailsWithErrno(EACCES)); } @@ -1431,8 +1437,12 @@ TEST(ProcPidFile, SubprocessRunning) { TEST(ProcPidFile, SubprocessZombie) { char buf[1]; - // 4.17: Succeeds and returns 1 - // gVisor: Succeeds and returns 0 + // FIXME(gvisor.dev/issue/164): Loosen requirement due to inconsistent + // behavior on different kernels. + // + // ~4.3: Succeds and returns 0. + // 4.17: Succeeds and returns 1. + // gVisor: Succeeds and returns 0. EXPECT_THAT(ReadWhileZombied("auxv", buf, sizeof(buf)), SyscallSucceeds()); EXPECT_THAT(ReadWhileZombied("cmdline", buf, sizeof(buf)), @@ -1458,7 +1468,10 @@ TEST(ProcPidFile, SubprocessZombie) { // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux // on proc files. + // + // ~4.3: Fails and returns EACCES. // gVisor & 4.17: Succeeds and returns 1. + // // EXPECT_THAT(ReadWhileZombied("io", buf, sizeof(buf)), // SyscallFailsWithErrno(EACCES)); } @@ -1467,9 +1480,12 @@ TEST(ProcPidFile, SubprocessZombie) { TEST(ProcPidFile, SubprocessExited) { char buf[1]; - // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between kernels. + // + // ~4.3: Fails and returns ESRCH. // gVisor: Fails with ESRCH. // 4.17: Succeeds and returns 1. + // // EXPECT_THAT(ReadWhileExited("auxv", buf, sizeof(buf)), // SyscallFailsWithErrno(ESRCH)); @@ -1641,7 +1657,7 @@ TEST(ProcTask, KilledThreadsDisappear) { EXPECT_NO_ERRNO(DirContainsExactly("/proc/self/task", TaskFiles(initial, {child1.Tid()}))); - // Stat child1's task file. + // Stat child1's task file. Regression test for b/32097707. struct stat statbuf; const std::string child1_task_file = absl::StrCat("/proc/self/task/", child1.Tid()); @@ -1669,7 +1685,7 @@ TEST(ProcTask, KilledThreadsDisappear) { EXPECT_NO_ERRNO(EventuallyDirContainsExactly( "/proc/self/task", TaskFiles(initial, {child3.Tid(), child5.Tid()}))); - // Stat child1's task file again. This time it should fail. + // Stat child1's task file again. This time it should fail. See b/32097707. EXPECT_THAT(stat(child1_task_file.c_str(), &statbuf), SyscallFailsWithErrno(ENOENT)); @@ -1824,7 +1840,7 @@ TEST(ProcSysVmOvercommitMemory, HasNumericValue) { } // Check that link for proc fd entries point the target node, not the -// symlink itself. +// symlink itself. Regression test for b/31155070. TEST(ProcTaskFd, FstatatFollowsSymlink) { const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); const FileDescriptor fd = @@ -1883,6 +1899,20 @@ TEST(ProcMounts, IsSymlink) { EXPECT_EQ(link, "self/mounts"); } +TEST(ProcSelfMountinfo, RequiredFieldsArePresent) { + auto mountinfo = + ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mountinfo")); + EXPECT_THAT( + mountinfo, + AllOf( + // Root mount. + ContainsRegex( + R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / / (rw|ro).*- \S+ \S+ (rw|ro)\S*)"), + // Proc mount - always rw. + ContainsRegex( + R"([0-9]+ [0-9]+ [0-9]+:[0-9]+ / /proc rw.*- \S+ \S+ rw\S*)"))); +} + // Check that /proc/self/mounts looks something like a real mounts file. TEST(ProcSelfMounts, RequiredFieldsArePresent) { auto mounts = ASSERT_NO_ERRNO_AND_VALUE(GetContents("/proc/self/mounts")); diff --git a/test/syscalls/linux/readv.cc b/test/syscalls/linux/readv.cc index 4069cbc7e..baaf9f757 100644 --- a/test/syscalls/linux/readv.cc +++ b/test/syscalls/linux/readv.cc @@ -254,7 +254,9 @@ TEST_F(ReadvTest, IovecOutsideTaskAddressRangeInNonemptyArray) { // This test depends on the maximum extent of a single readv() syscall, so // we can't tolerate interruption from saving. TEST(ReadvTestNoFixture, TruncatedAtMax_NoRandomSave) { - // Ensure that we won't be interrupted by ITIMER_PROF. + // Ensure that we won't be interrupted by ITIMER_PROF. This is particularly + // important in environments where automated profiling tools may start + // ITIMER_PROF automatically. struct itimerval itv = {}; auto const cleanup_itimer = ASSERT_NO_ERRNO_AND_VALUE(ScopedItimer(ITIMER_PROF, itv)); diff --git a/test/syscalls/linux/rseq.cc b/test/syscalls/linux/rseq.cc index 106c045e3..4bfb1ff56 100644 --- a/test/syscalls/linux/rseq.cc +++ b/test/syscalls/linux/rseq.cc @@ -36,7 +36,7 @@ namespace { // We must be very careful about how these tests are written. Each thread may // only have one struct rseq registration, which may be done automatically at // thread start (as of 2019-11-13, glibc does *not* support rseq and thus does -// not do so). +// not do so, but other libraries do). // // Testing of rseq is thus done primarily in a child process with no // registration. This means exec'ing a nostdlib binary, as rseq registration can diff --git a/test/syscalls/linux/select.cc b/test/syscalls/linux/select.cc index 424e2a67f..be2364fb8 100644 --- a/test/syscalls/linux/select.cc +++ b/test/syscalls/linux/select.cc @@ -146,7 +146,7 @@ TEST_F(SelectTest, IgnoreBitsAboveNfds) { // This test illustrates Linux's behavior of 'select' calls passing after // setrlimit RLIMIT_NOFILE is called. In particular, versions of sshd rely on -// this behavior. +// this behavior. See b/122318458. TEST_F(SelectTest, SetrlimitCallNOFILE) { fd_set read_set; FD_ZERO(&read_set); diff --git a/test/syscalls/linux/shm.cc b/test/syscalls/linux/shm.cc index 7ba752599..c7fdbb924 100644 --- a/test/syscalls/linux/shm.cc +++ b/test/syscalls/linux/shm.cc @@ -473,7 +473,7 @@ TEST(ShmTest, PartialUnmap) { } // Check that sentry does not panic when asked for a zero-length private shm -// segment. +// segment. Regression test for b/110694797. TEST(ShmTest, GracefullyFailOnZeroLenSegmentCreation) { EXPECT_THAT(Shmget(IPC_PRIVATE, 0, 0), PosixErrorIs(EINVAL, _)); } diff --git a/test/syscalls/linux/sigprocmask.cc b/test/syscalls/linux/sigprocmask.cc index 654c6a47f..a603fc1d1 100644 --- a/test/syscalls/linux/sigprocmask.cc +++ b/test/syscalls/linux/sigprocmask.cc @@ -237,7 +237,7 @@ TEST_F(SigProcMaskTest, SignalHandler) { } // Check that sigprocmask correctly handles aliasing of the set and oldset -// pointers. +// pointers. Regression test for b/30502311. TEST_F(SigProcMaskTest, AliasedSets) { sigset_t mask; diff --git a/test/syscalls/linux/socket_unix_non_stream.cc b/test/syscalls/linux/socket_unix_non_stream.cc index 276a94eb8..884319e1d 100644 --- a/test/syscalls/linux/socket_unix_non_stream.cc +++ b/test/syscalls/linux/socket_unix_non_stream.cc @@ -109,7 +109,7 @@ PosixErrorOr> CreateFragmentedRegion(const int size, } // A contiguous iov that is heavily fragmented in FileMem can still be sent -// successfully. +// successfully. See b/115833655. TEST_P(UnixNonStreamSocketPairTest, FragmentedSendMsg) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); @@ -165,7 +165,7 @@ TEST_P(UnixNonStreamSocketPairTest, FragmentedSendMsg) { } // A contiguous iov that is heavily fragmented in FileMem can still be received -// into successfully. +// into successfully. Regression test for b/115833655. TEST_P(UnixNonStreamSocketPairTest, FragmentedRecvMsg) { auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); diff --git a/test/syscalls/linux/symlink.cc b/test/syscalls/linux/symlink.cc index b249ff91f..03ee1250d 100644 --- a/test/syscalls/linux/symlink.cc +++ b/test/syscalls/linux/symlink.cc @@ -38,7 +38,7 @@ mode_t FilePermission(const std::string& path) { } // Test that name collisions are checked on the new link path, not the source -// path. +// path. Regression test for b/31782115. TEST(SymlinkTest, CanCreateSymlinkWithCachedSourceDirent) { const std::string srcname = NewTempAbsPath(); const std::string newname = NewTempAbsPath(); diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 8a8b68e75..c4591a3b9 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -244,7 +244,8 @@ TEST_P(TcpSocketTest, ZeroWriteAllowed) { } // Test that a non-blocking write with a buffer that is larger than the send -// buffer size will not actually write the whole thing at once. +// buffer size will not actually write the whole thing at once. Regression test +// for b/64438887. TEST_P(TcpSocketTest, NonblockingLargeWrite) { // Set the FD to O_NONBLOCK. int opts; diff --git a/test/syscalls/linux/time.cc b/test/syscalls/linux/time.cc index c7eead17e..1ccb95733 100644 --- a/test/syscalls/linux/time.cc +++ b/test/syscalls/linux/time.cc @@ -62,6 +62,7 @@ TEST(TimeTest, VsyscallTime_InvalidAddressSIGSEGV) { ::testing::KilledBySignal(SIGSEGV), ""); } +// Mimics the gettimeofday(2) wrapper from the Go runtime <= 1.2. int vsyscall_gettimeofday(struct timeval* tv, struct timezone* tz) { constexpr uint64_t kVsyscallGettimeofdayEntry = 0xffffffffff600000; return reinterpret_cast( diff --git a/test/syscalls/linux/tkill.cc b/test/syscalls/linux/tkill.cc index bae377c69..8d8ebbb24 100644 --- a/test/syscalls/linux/tkill.cc +++ b/test/syscalls/linux/tkill.cc @@ -54,7 +54,7 @@ void SigHandler(int sig, siginfo_t* info, void* context) { TEST_CHECK(info->si_code == SI_TKILL); } -// Test with a real signal. +// Test with a real signal. Regression test for b/24790092. TEST(TkillTest, ValidTIDAndRealSignal) { struct sigaction sa; sa.sa_sigaction = SigHandler; diff --git a/test/util/temp_path.cc b/test/util/temp_path.cc index 35aacb172..9c10b6674 100644 --- a/test/util/temp_path.cc +++ b/test/util/temp_path.cc @@ -77,6 +77,7 @@ std::string NewTempAbsPath() { std::string NewTempRelPath() { return NextTempBasename(); } std::string GetAbsoluteTestTmpdir() { + // Note that TEST_TMPDIR is guaranteed to be set. char* env_tmpdir = getenv("TEST_TMPDIR"); std::string tmp_dir = env_tmpdir != nullptr ? std::string(env_tmpdir) : "/tmp"; diff --git a/tools/build/tags.bzl b/tools/build/tags.bzl index e99c87f81..a6db44e47 100644 --- a/tools/build/tags.bzl +++ b/tools/build/tags.bzl @@ -33,4 +33,8 @@ go_suffixes = [ "_wasm_unsafe", "_linux", "_linux_unsafe", + "_opts", + "_opts_unsafe", + "_impl", + "_impl_unsafe", ] diff --git a/tools/defs.bzl b/tools/defs.bzl index 5d5fa134a..c03b557ae 100644 --- a/tools/defs.bzl +++ b/tools/defs.bzl @@ -73,6 +73,16 @@ def calculate_sets(srcs): result[target].append(file) return result +def go_imports(name, src, out): + """Simplify a single Go source file by eliminating unused imports.""" + native.genrule( + name = name, + srcs = [src], + outs = [out], + tools = ["@org_golang_x_tools//cmd/goimports:goimports"], + cmd = ("$(location @org_golang_x_tools//cmd/goimports:goimports) $(SRCS) > $@"), + ) + def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = False, **kwargs): """Wraps the standard go_library and does stateification and marshalling. @@ -107,10 +117,15 @@ def go_library(name, srcs, deps = [], imports = [], stateify = True, marshal = F state_sets = calculate_sets(srcs) for (suffix, srcs) in state_sets.items(): go_stateify( - name = name + suffix + "_state_autogen", + name = name + suffix + "_state_autogen_with_imports", srcs = srcs, imports = imports, package = name, + out = name + suffix + "_state_autogen_with_imports.go", + ) + go_imports( + name = name + suffix + "_state_autogen", + src = name + suffix + "_state_autogen_with_imports.go", out = name + suffix + "_state_autogen.go", ) all_srcs = all_srcs + [ -- cgit v1.2.3 From 17b9f5e66238bde1e4ed3bd9e5fb67342c8b58ec Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 7 Feb 2020 14:46:24 -0800 Subject: Support listxattr and removexattr syscalls. Note that these are only implemented for tmpfs, and other impls will still return EOPNOTSUPP. PiperOrigin-RevId: 293899385 --- pkg/p9/client_file.go | 33 ++++ pkg/p9/file.go | 16 ++ pkg/p9/handlers.go | 33 ++++ pkg/p9/messages.go | 199 +++++++++++++++---- pkg/p9/p9.go | 4 + pkg/p9/version.go | 8 +- pkg/sentry/fs/copy_up.go | 2 +- pkg/sentry/fs/fsutil/inode.go | 20 +- pkg/sentry/fs/gofer/context_file.go | 14 ++ pkg/sentry/fs/gofer/inode.go | 13 +- pkg/sentry/fs/inode.go | 14 +- pkg/sentry/fs/inode_operations.go | 13 +- pkg/sentry/fs/inode_overlay.go | 18 +- pkg/sentry/fs/tmpfs/tmpfs.go | 9 +- pkg/sentry/syscalls/linux/linux64_amd64.go | 27 ++- pkg/sentry/syscalls/linux/linux64_arm64.go | 37 ++-- pkg/sentry/syscalls/linux/sys_xattr.go | 200 +++++++++++++++++++- runsc/fsgofer/fsgofer.go | 16 +- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/xattr.cc | 294 ++++++++++++++++------------- 20 files changed, 733 insertions(+), 238 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go index 0254e4ccc..2ee07b664 100644 --- a/pkg/p9/client_file.go +++ b/pkg/p9/client_file.go @@ -194,6 +194,39 @@ func (c *clientFile) SetXattr(name, value string, flags uint32) error { return c.client.sendRecv(&Tsetxattr{FID: c.fid, Name: name, Value: value, Flags: flags}, &Rsetxattr{}) } +// ListXattr implements File.ListXattr. +func (c *clientFile) ListXattr(size uint64) (map[string]struct{}, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return nil, syscall.EBADF + } + if !versionSupportsListRemoveXattr(c.client.version) { + return nil, syscall.EOPNOTSUPP + } + + rlistxattr := Rlistxattr{} + if err := c.client.sendRecv(&Tlistxattr{FID: c.fid, Size: size}, &rlistxattr); err != nil { + return nil, err + } + + xattrs := make(map[string]struct{}, len(rlistxattr.Xattrs)) + for _, x := range rlistxattr.Xattrs { + xattrs[x] = struct{}{} + } + return xattrs, nil +} + +// RemoveXattr implements File.RemoveXattr. +func (c *clientFile) RemoveXattr(name string) error { + if atomic.LoadUint32(&c.closed) != 0 { + return syscall.EBADF + } + if !versionSupportsListRemoveXattr(c.client.version) { + return syscall.EOPNOTSUPP + } + + return c.client.sendRecv(&Tremovexattr{FID: c.fid, Name: name}, &Rremovexattr{}) +} + // Allocate implements File.Allocate. func (c *clientFile) Allocate(mode AllocateMode, offset, length uint64) error { if atomic.LoadUint32(&c.closed) != 0 { diff --git a/pkg/p9/file.go b/pkg/p9/file.go index 4607cfcdf..d4ffbc8e3 100644 --- a/pkg/p9/file.go +++ b/pkg/p9/file.go @@ -105,6 +105,22 @@ type File interface { // TODO(b/127675828): Determine concurrency guarantees once implemented. SetXattr(name, value string, flags uint32) error + // ListXattr lists the names of the extended attributes on this node. + // + // Size indicates the size of the buffer that has been allocated to hold the + // attribute list. If the list would be larger than size, implementations may + // return ERANGE to indicate that the buffer is too small, but they are also + // free to ignore the hint entirely (i.e. the value returned may be larger + // than size). All size checking is done independently at the syscall layer. + // + // TODO(b/148303075): Determine concurrency guarantees once implemented. + ListXattr(size uint64) (map[string]struct{}, error) + + // RemoveXattr removes extended attributes on this node. + // + // TODO(b/148303075): Determine concurrency guarantees once implemented. + RemoveXattr(name string) error + // Allocate allows the caller to directly manipulate the allocated disk space // for the file. See fallocate(2) for more details. Allocate(mode AllocateMode, offset, length uint64) error diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go index 7d6653a07..2ac45eb80 100644 --- a/pkg/p9/handlers.go +++ b/pkg/p9/handlers.go @@ -941,6 +941,39 @@ func (t *Tsetxattr) handle(cs *connState) message { return &Rsetxattr{} } +// handle implements handler.handle. +func (t *Tlistxattr) handle(cs *connState) message { + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + xattrs, err := ref.file.ListXattr(t.Size) + if err != nil { + return newErr(err) + } + xattrList := make([]string, 0, len(xattrs)) + for x := range xattrs { + xattrList = append(xattrList, x) + } + return &Rlistxattr{Xattrs: xattrList} +} + +// handle implements handler.handle. +func (t *Tremovexattr) handle(cs *connState) message { + ref, ok := cs.LookupFID(t.FID) + if !ok { + return newErr(syscall.EBADF) + } + defer ref.DecRef() + + if err := ref.file.RemoveXattr(t.Name); err != nil { + return newErr(err) + } + return &Rremovexattr{} +} + // handle implements handler.handle. func (t *Treaddir) handle(cs *connState) message { ref, ok := cs.LookupFID(t.Directory) diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go index ceb723d86..b1cede5f5 100644 --- a/pkg/p9/messages.go +++ b/pkg/p9/messages.go @@ -174,11 +174,11 @@ type Rflush struct { } // Decode implements encoder.Decode. -func (*Rflush) Decode(b *buffer) { +func (*Rflush) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rflush) Encode(b *buffer) { +func (*Rflush) Encode(*buffer) { } // Type implements message.Type. @@ -188,7 +188,7 @@ func (*Rflush) Type() MsgType { // String implements fmt.Stringer. func (r *Rflush) String() string { - return fmt.Sprintf("RFlush{}") + return "RFlush{}" } // Twalk is a walk request. @@ -300,11 +300,11 @@ type Rclunk struct { } // Decode implements encoder.Decode. -func (*Rclunk) Decode(b *buffer) { +func (*Rclunk) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rclunk) Encode(b *buffer) { +func (*Rclunk) Encode(*buffer) { } // Type implements message.Type. @@ -314,7 +314,7 @@ func (*Rclunk) Type() MsgType { // String implements fmt.Stringer. func (r *Rclunk) String() string { - return fmt.Sprintf("Rclunk{}") + return "Rclunk{}" } // Tremove is a remove request. @@ -350,11 +350,11 @@ type Rremove struct { } // Decode implements encoder.Decode. -func (*Rremove) Decode(b *buffer) { +func (*Rremove) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rremove) Encode(b *buffer) { +func (*Rremove) Encode(*buffer) { } // Type implements message.Type. @@ -364,7 +364,7 @@ func (*Rremove) Type() MsgType { // String implements fmt.Stringer. func (r *Rremove) String() string { - return fmt.Sprintf("Rremove{}") + return "Rremove{}" } // Rlerror is an error response. @@ -745,16 +745,16 @@ func (*Rlink) Type() MsgType { } // Decode implements encoder.Decode. -func (*Rlink) Decode(b *buffer) { +func (*Rlink) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rlink) Encode(b *buffer) { +func (*Rlink) Encode(*buffer) { } // String implements fmt.Stringer. func (r *Rlink) String() string { - return fmt.Sprintf("Rlink{}") + return "Rlink{}" } // Trenameat is a rename request. @@ -803,11 +803,11 @@ type Rrenameat struct { } // Decode implements encoder.Decode. -func (*Rrenameat) Decode(b *buffer) { +func (*Rrenameat) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rrenameat) Encode(b *buffer) { +func (*Rrenameat) Encode(*buffer) { } // Type implements message.Type. @@ -817,7 +817,7 @@ func (*Rrenameat) Type() MsgType { // String implements fmt.Stringer. func (r *Rrenameat) String() string { - return fmt.Sprintf("Rrenameat{}") + return "Rrenameat{}" } // Tunlinkat is an unlink request. @@ -861,11 +861,11 @@ type Runlinkat struct { } // Decode implements encoder.Decode. -func (*Runlinkat) Decode(b *buffer) { +func (*Runlinkat) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Runlinkat) Encode(b *buffer) { +func (*Runlinkat) Encode(*buffer) { } // Type implements message.Type. @@ -875,7 +875,7 @@ func (*Runlinkat) Type() MsgType { // String implements fmt.Stringer. func (r *Runlinkat) String() string { - return fmt.Sprintf("Runlinkat{}") + return "Runlinkat{}" } // Trename is a rename request. @@ -922,11 +922,11 @@ type Rrename struct { } // Decode implements encoder.Decode. -func (*Rrename) Decode(b *buffer) { +func (*Rrename) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rrename) Encode(b *buffer) { +func (*Rrename) Encode(*buffer) { } // Type implements message.Type. @@ -936,7 +936,7 @@ func (*Rrename) Type() MsgType { // String implements fmt.Stringer. func (r *Rrename) String() string { - return fmt.Sprintf("Rrename{}") + return "Rrename{}" } // Treadlink is a readlink request. @@ -1409,11 +1409,11 @@ type Rsetattr struct { } // Decode implements encoder.Decode. -func (*Rsetattr) Decode(b *buffer) { +func (*Rsetattr) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rsetattr) Encode(b *buffer) { +func (*Rsetattr) Encode(*buffer) { } // Type implements message.Type. @@ -1423,7 +1423,7 @@ func (*Rsetattr) Type() MsgType { // String implements fmt.Stringer. func (r *Rsetattr) String() string { - return fmt.Sprintf("Rsetattr{}") + return "Rsetattr{}" } // Tallocate is an allocate request. This is an extension to 9P protocol, not @@ -1466,11 +1466,11 @@ type Rallocate struct { } // Decode implements encoder.Decode. -func (*Rallocate) Decode(b *buffer) { +func (*Rallocate) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rallocate) Encode(b *buffer) { +func (*Rallocate) Encode(*buffer) { } // Type implements message.Type. @@ -1480,7 +1480,71 @@ func (*Rallocate) Type() MsgType { // String implements fmt.Stringer. func (r *Rallocate) String() string { - return fmt.Sprintf("Rallocate{}") + return "Rallocate{}" +} + +// Tlistxattr is a listxattr request. +type Tlistxattr struct { + // FID refers to the file on which to list xattrs. + FID FID + + // Size is the buffer size for the xattr list. + Size uint64 +} + +// Decode implements encoder.Decode. +func (t *Tlistxattr) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Size = b.Read64() +} + +// Encode implements encoder.Encode. +func (t *Tlistxattr) Encode(b *buffer) { + b.WriteFID(t.FID) + b.Write64(t.Size) +} + +// Type implements message.Type. +func (*Tlistxattr) Type() MsgType { + return MsgTlistxattr +} + +// String implements fmt.Stringer. +func (t *Tlistxattr) String() string { + return fmt.Sprintf("Tlistxattr{FID: %d, Size: %d}", t.FID, t.Size) +} + +// Rlistxattr is a listxattr response. +type Rlistxattr struct { + // Xattrs is a list of extended attribute names. + Xattrs []string +} + +// Decode implements encoder.Decode. +func (r *Rlistxattr) Decode(b *buffer) { + n := b.Read16() + r.Xattrs = r.Xattrs[:0] + for i := 0; i < int(n); i++ { + r.Xattrs = append(r.Xattrs, b.ReadString()) + } +} + +// Encode implements encoder.Encode. +func (r *Rlistxattr) Encode(b *buffer) { + b.Write16(uint16(len(r.Xattrs))) + for _, x := range r.Xattrs { + b.WriteString(x) + } +} + +// Type implements message.Type. +func (*Rlistxattr) Type() MsgType { + return MsgRlistxattr +} + +// String implements fmt.Stringer. +func (r *Rlistxattr) String() string { + return fmt.Sprintf("Rlistxattr{Xattrs: %v}", r.Xattrs) } // Txattrwalk walks extended attributes. @@ -1594,11 +1658,11 @@ type Rxattrcreate struct { } // Decode implements encoder.Decode. -func (r *Rxattrcreate) Decode(b *buffer) { +func (r *Rxattrcreate) Decode(*buffer) { } // Encode implements encoder.Encode. -func (r *Rxattrcreate) Encode(b *buffer) { +func (r *Rxattrcreate) Encode(*buffer) { } // Type implements message.Type. @@ -1608,7 +1672,7 @@ func (*Rxattrcreate) Type() MsgType { // String implements fmt.Stringer. func (r *Rxattrcreate) String() string { - return fmt.Sprintf("Rxattrcreate{}") + return "Rxattrcreate{}" } // Tgetxattr is a getxattr request. @@ -1719,11 +1783,11 @@ type Rsetxattr struct { } // Decode implements encoder.Decode. -func (r *Rsetxattr) Decode(b *buffer) { +func (r *Rsetxattr) Decode(*buffer) { } // Encode implements encoder.Encode. -func (r *Rsetxattr) Encode(b *buffer) { +func (r *Rsetxattr) Encode(*buffer) { } // Type implements message.Type. @@ -1733,7 +1797,60 @@ func (*Rsetxattr) Type() MsgType { // String implements fmt.Stringer. func (r *Rsetxattr) String() string { - return fmt.Sprintf("Rsetxattr{}") + return "Rsetxattr{}" +} + +// Tremovexattr is a removexattr request. +type Tremovexattr struct { + // FID refers to the file on which to set xattrs. + FID FID + + // Name is the attribute name. + Name string +} + +// Decode implements encoder.Decode. +func (t *Tremovexattr) Decode(b *buffer) { + t.FID = b.ReadFID() + t.Name = b.ReadString() +} + +// Encode implements encoder.Encode. +func (t *Tremovexattr) Encode(b *buffer) { + b.WriteFID(t.FID) + b.WriteString(t.Name) +} + +// Type implements message.Type. +func (*Tremovexattr) Type() MsgType { + return MsgTremovexattr +} + +// String implements fmt.Stringer. +func (t *Tremovexattr) String() string { + return fmt.Sprintf("Tremovexattr{FID: %d, Name: %s}", t.FID, t.Name) +} + +// Rremovexattr is a removexattr response. +type Rremovexattr struct { +} + +// Decode implements encoder.Decode. +func (r *Rremovexattr) Decode(*buffer) { +} + +// Encode implements encoder.Encode. +func (r *Rremovexattr) Encode(*buffer) { +} + +// Type implements message.Type. +func (*Rremovexattr) Type() MsgType { + return MsgRremovexattr +} + +// String implements fmt.Stringer. +func (r *Rremovexattr) String() string { + return "Rremovexattr{}" } // Treaddir is a readdir request. @@ -1880,11 +1997,11 @@ type Rfsync struct { } // Decode implements encoder.Decode. -func (*Rfsync) Decode(b *buffer) { +func (*Rfsync) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rfsync) Encode(b *buffer) { +func (*Rfsync) Encode(*buffer) { } // Type implements message.Type. @@ -1894,7 +2011,7 @@ func (*Rfsync) Type() MsgType { // String implements fmt.Stringer. func (r *Rfsync) String() string { - return fmt.Sprintf("Rfsync{}") + return "Rfsync{}" } // Tstatfs is a stat request. @@ -1980,11 +2097,11 @@ type Rflushf struct { } // Decode implements encoder.Decode. -func (*Rflushf) Decode(b *buffer) { +func (*Rflushf) Decode(*buffer) { } // Encode implements encoder.Encode. -func (*Rflushf) Encode(b *buffer) { +func (*Rflushf) Encode(*buffer) { } // Type implements message.Type. @@ -1994,7 +2111,7 @@ func (*Rflushf) Type() MsgType { // String implements fmt.Stringer. func (*Rflushf) String() string { - return fmt.Sprintf("Rflushf{}") + return "Rflushf{}" } // Twalkgetattr is a walk request. @@ -2484,6 +2601,8 @@ func init() { msgRegistry.register(MsgRgetattr, func() message { return &Rgetattr{} }) msgRegistry.register(MsgTsetattr, func() message { return &Tsetattr{} }) msgRegistry.register(MsgRsetattr, func() message { return &Rsetattr{} }) + msgRegistry.register(MsgTlistxattr, func() message { return &Tlistxattr{} }) + msgRegistry.register(MsgRlistxattr, func() message { return &Rlistxattr{} }) msgRegistry.register(MsgTxattrwalk, func() message { return &Txattrwalk{} }) msgRegistry.register(MsgRxattrwalk, func() message { return &Rxattrwalk{} }) msgRegistry.register(MsgTxattrcreate, func() message { return &Txattrcreate{} }) @@ -2492,6 +2611,8 @@ func init() { msgRegistry.register(MsgRgetxattr, func() message { return &Rgetxattr{} }) msgRegistry.register(MsgTsetxattr, func() message { return &Tsetxattr{} }) msgRegistry.register(MsgRsetxattr, func() message { return &Rsetxattr{} }) + msgRegistry.register(MsgTremovexattr, func() message { return &Tremovexattr{} }) + msgRegistry.register(MsgRremovexattr, func() message { return &Rremovexattr{} }) msgRegistry.register(MsgTreaddir, func() message { return &Treaddir{} }) msgRegistry.register(MsgRreaddir, func() message { return &Rreaddir{} }) msgRegistry.register(MsgTfsync, func() message { return &Tfsync{} }) diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go index 5ab00d625..20ab31f7a 100644 --- a/pkg/p9/p9.go +++ b/pkg/p9/p9.go @@ -335,6 +335,8 @@ const ( MsgRgetattr = 25 MsgTsetattr = 26 MsgRsetattr = 27 + MsgTlistxattr = 28 + MsgRlistxattr = 29 MsgTxattrwalk = 30 MsgRxattrwalk = 31 MsgTxattrcreate = 32 @@ -343,6 +345,8 @@ const ( MsgRgetxattr = 35 MsgTsetxattr = 36 MsgRsetxattr = 37 + MsgTremovexattr = 38 + MsgRremovexattr = 39 MsgTreaddir = 40 MsgRreaddir = 41 MsgTfsync = 50 diff --git a/pkg/p9/version.go b/pkg/p9/version.go index 34a15eb55..09cde9f5a 100644 --- a/pkg/p9/version.go +++ b/pkg/p9/version.go @@ -26,7 +26,7 @@ const ( // // Clients are expected to start requesting this version number and // to continuously decrement it until a Tversion request succeeds. - highestSupportedVersion uint32 = 10 + highestSupportedVersion uint32 = 11 // lowestSupportedVersion is the lowest supported version X in a // version string of the format 9P2000.L.Google.X. @@ -167,3 +167,9 @@ func VersionSupportsOpenTruncateFlag(v uint32) bool { func versionSupportsGetSetXattr(v uint32) bool { return v >= 10 } + +// versionSupportsListRemoveXattr returns true if version v supports +// the Tlistxattr and Tremovexattr messages. +func versionSupportsListRemoveXattr(v uint32) bool { + return v >= 11 +} diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index f6c79e51b..b060a12ff 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -401,7 +401,7 @@ func copyAttributesLocked(ctx context.Context, upper *Inode, lower *Inode) error if err != nil { return err } - lowerXattr, err := lower.ListXattr(ctx) + lowerXattr, err := lower.ListXattr(ctx, linux.XATTR_SIZE_MAX) if err != nil && err != syserror.EOPNOTSUPP { return err } diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index 252830572..daecc4ffe 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -247,7 +247,7 @@ func (i *InodeSimpleExtendedAttributes) SetXattr(_ context.Context, _ *fs.Inode, } // ListXattr implements fs.InodeOperations.ListXattr. -func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { +func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) { i.mu.RLock() names := make(map[string]struct{}, len(i.xattrs)) for name := range i.xattrs { @@ -257,6 +257,17 @@ func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (m return names, nil } +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (i *InodeSimpleExtendedAttributes) RemoveXattr(_ context.Context, _ *fs.Inode, name string) error { + i.mu.RLock() + defer i.mu.RUnlock() + if _, ok := i.xattrs[name]; ok { + delete(i.xattrs, name) + return nil + } + return syserror.ENOATTR +} + // staticFile is a file with static contents. It is returned by // InodeStaticFileGetter.GetFile. // @@ -460,10 +471,15 @@ func (InodeNoExtendedAttributes) SetXattr(context.Context, *fs.Inode, string, st } // ListXattr implements fs.InodeOperations.ListXattr. -func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { +func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) { return nil, syserror.EOPNOTSUPP } +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (InodeNoExtendedAttributes) RemoveXattr(context.Context, *fs.Inode, string) error { + return syserror.EOPNOTSUPP +} + // InodeNoopRelease implements fs.InodeOperations.Release as a noop. type InodeNoopRelease struct{} diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go index 3da818aed..125907d70 100644 --- a/pkg/sentry/fs/gofer/context_file.go +++ b/pkg/sentry/fs/gofer/context_file.go @@ -73,6 +73,20 @@ func (c *contextFile) setXattr(ctx context.Context, name, value string, flags ui return err } +func (c *contextFile) listXattr(ctx context.Context, size uint64) (map[string]struct{}, error) { + ctx.UninterruptibleSleepStart(false) + xattrs, err := c.file.ListXattr(size) + ctx.UninterruptibleSleepFinish(false) + return xattrs, err +} + +func (c *contextFile) removeXattr(ctx context.Context, name string) error { + ctx.UninterruptibleSleepStart(false) + err := c.file.RemoveXattr(name) + ctx.UninterruptibleSleepFinish(false) + return err +} + func (c *contextFile) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error { ctx.UninterruptibleSleepStart(false) err := c.file.Allocate(mode, offset, length) diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index ac28174d2..1c934981b 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -604,18 +604,23 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length } // GetXattr implements fs.InodeOperations.GetXattr. -func (i *inodeOperations) GetXattr(ctx context.Context, inode *fs.Inode, name string, size uint64) (string, error) { +func (i *inodeOperations) GetXattr(ctx context.Context, _ *fs.Inode, name string, size uint64) (string, error) { return i.fileState.file.getXattr(ctx, name, size) } // SetXattr implements fs.InodeOperations.SetXattr. -func (i *inodeOperations) SetXattr(ctx context.Context, inode *fs.Inode, name string, value string, flags uint32) error { +func (i *inodeOperations) SetXattr(ctx context.Context, _ *fs.Inode, name string, value string, flags uint32) error { return i.fileState.file.setXattr(ctx, name, value, flags) } // ListXattr implements fs.InodeOperations.ListXattr. -func (i *inodeOperations) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) { - return nil, syscall.EOPNOTSUPP +func (i *inodeOperations) ListXattr(ctx context.Context, _ *fs.Inode, size uint64) (map[string]struct{}, error) { + return i.fileState.file.listXattr(ctx, size) +} + +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (i *inodeOperations) RemoveXattr(ctx context.Context, _ *fs.Inode, name string) error { + return i.fileState.file.removeXattr(ctx, name) } // Allocate implements fs.InodeOperations.Allocate. diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index b66c091ab..55fb71c16 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -278,11 +278,19 @@ func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, fla } // ListXattr calls i.InodeOperations.ListXattr with i as the Inode. -func (i *Inode) ListXattr(ctx context.Context) (map[string]struct{}, error) { +func (i *Inode) ListXattr(ctx context.Context, size uint64) (map[string]struct{}, error) { if i.overlay != nil { - return overlayListXattr(ctx, i.overlay) + return overlayListXattr(ctx, i.overlay, size) } - return i.InodeOperations.ListXattr(ctx, i) + return i.InodeOperations.ListXattr(ctx, i, size) +} + +// RemoveXattr calls i.InodeOperations.RemoveXattr with i as the Inode. +func (i *Inode) RemoveXattr(ctx context.Context, d *Dirent, name string) error { + if i.overlay != nil { + return overlayRemoveXattr(ctx, i.overlay, d, name) + } + return i.InodeOperations.RemoveXattr(ctx, i, name) } // CheckPermission will check if the caller may access this file in the diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go index 70f2eae96..2bbfb72ef 100644 --- a/pkg/sentry/fs/inode_operations.go +++ b/pkg/sentry/fs/inode_operations.go @@ -190,7 +190,18 @@ type InodeOperations interface { // ListXattr returns the set of all extended attributes names that // have values. Inodes that do not support extended attributes return // EOPNOTSUPP. - ListXattr(ctx context.Context, inode *Inode) (map[string]struct{}, error) + // + // If this is called through the listxattr(2) syscall, size indicates the + // size of the buffer that the application has allocated to hold the + // attribute list. If the list would be larger than size, implementations may + // return ERANGE to indicate that the buffer is too small, but they are also + // free to ignore the hint entirely. All size checking is done independently + // at the syscall layer. + ListXattr(ctx context.Context, inode *Inode, size uint64) (map[string]struct{}, error) + + // RemoveXattr removes an extended attribute specified by name. Inodes that + // do not support extended attributes return EOPNOTSUPP. + RemoveXattr(ctx context.Context, inode *Inode, name string) error // Check determines whether an Inode can be accessed with the // requested permission mask using the context (which gives access diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 4729b4aac..5ada33a32 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -564,15 +564,15 @@ func overlaySetxattr(ctx context.Context, o *overlayEntry, d *Dirent, name, valu return o.upper.SetXattr(ctx, d, name, value, flags) } -func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{}, error) { +func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[string]struct{}, error) { o.copyMu.RLock() defer o.copyMu.RUnlock() var names map[string]struct{} var err error if o.upper != nil { - names, err = o.upper.ListXattr(ctx) + names, err = o.upper.ListXattr(ctx, size) } else { - names, err = o.lower.ListXattr(ctx) + names, err = o.lower.ListXattr(ctx, size) } for name := range names { // Same as overlayGetXattr, we shouldn't forward along @@ -584,6 +584,18 @@ func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{} return names, err } +func overlayRemoveXattr(ctx context.Context, o *overlayEntry, d *Dirent, name string) error { + // Don't allow changes to overlay xattrs through a removexattr syscall. + if strings.HasPrefix(XattrOverlayPrefix, name) { + return syserror.EPERM + } + + if err := copyUp(ctx, d); err != nil { + return err + } + return o.upper.RemoveXattr(ctx, d, name) +} + func overlayCheck(ctx context.Context, o *overlayEntry, p PermMask) error { o.copyMu.RLock() // Hot path. Avoid defers. diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index c00cef0a5..3c2b583ae 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -159,8 +159,13 @@ func (d *Dir) SetXattr(ctx context.Context, i *fs.Inode, name, value string, fla } // ListXattr implements fs.InodeOperations.ListXattr. -func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode) (map[string]struct{}, error) { - return d.ramfsDir.ListXattr(ctx, i) +func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode, size uint64) (map[string]struct{}, error) { + return d.ramfsDir.ListXattr(ctx, i, size) +} + +// RemoveXattr implements fs.InodeOperations.RemoveXattr. +func (d *Dir) RemoveXattr(ctx context.Context, i *fs.Inode, name string) error { + return d.ramfsDir.RemoveXattr(ctx, i, name) } // Lookup implements fs.InodeOperations.Lookup. diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go index 588f8b087..79066ad2a 100644 --- a/pkg/sentry/syscalls/linux/linux64_amd64.go +++ b/pkg/sentry/syscalls/linux/linux64_amd64.go @@ -228,21 +228,18 @@ var AMD64 = &kernel.SyscallTable{ 185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil), 186: syscalls.Supported("gettid", Gettid), 187: syscalls.Supported("readahead", Readahead), - // TODO(b/148303075): Enable set/getxattr (in their various - // forms) once we also have list and removexattr. The JVM - // assumes that if get/set exist, then list and remove do too. - 188: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 189: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 190: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), + 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), + 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), + 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), + 194: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), + 195: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), + 196: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), + 197: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), + 198: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), + 199: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), 200: syscalls.Supported("tkill", Tkill), 201: syscalls.Supported("time", Time), 202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go index 06e5ee401..7421619de 100644 --- a/pkg/sentry/syscalls/linux/linux64_arm64.go +++ b/pkg/sentry/syscalls/linux/linux64_arm64.go @@ -36,26 +36,23 @@ var ARM64 = &kernel.SyscallTable{ }, AuditNumber: linux.AUDIT_ARCH_AARCH64, Table: map[uintptr]kernel.Syscall{ - 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), - // TODO(b/148303075): Enable set/getxattr (in their various - // forms) once we also have list and removexattr. The JVM - // assumes that if get/set exist, then list and remove do too. - 5: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 6: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 7: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 13: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), - 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}), + 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}), + 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil), + 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil), + 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil), + 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil), + 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil), + 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil), + 11: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil), + 12: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil), + 13: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil), + 14: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil), + 15: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil), + 16: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil), 17: syscalls.Supported("getcwd", Getcwd), 18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil), 19: syscalls.Supported("eventfd2", Eventfd2), diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index efb95555c..342337726 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -72,7 +72,7 @@ func getXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink } valueLen := 0 - err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -172,7 +172,7 @@ func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink return 0, nil, err } - return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error { + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { if dirPath && !fs.IsDir(d.Inode.StableAttr) { return syserror.ENOTDIR } @@ -187,12 +187,12 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si return syserror.EINVAL } - if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { + name, err := copyInXattrName(t, nameAddr) + if err != nil { return err } - name, err := copyInXattrName(t, nameAddr) - if err != nil { + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { return err } @@ -226,12 +226,18 @@ func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { return name, nil } +// Restrict xattrs to regular files and directories. +// +// TODO(b/148380782): In Linux, this restriction technically only applies to +// xattrs in the "user.*" namespace. Make file type checks specific to the +// namespace once we allow other xattr prefixes. +func xattrFileTypeOk(i *fs.Inode) bool { + return fs.IsRegular(i.StableAttr) || fs.IsDir(i.StableAttr) +} + func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error { // Restrict xattrs to regular files and directories. - // - // In Linux, this restriction technically only applies to xattrs in the - // "user.*" namespace, but we don't allow any other xattr prefixes anyway. - if !fs.IsRegular(i.StableAttr) && !fs.IsDir(i.StableAttr) { + if !xattrFileTypeOk(i) { if perms.Write { return syserror.EPERM } @@ -240,3 +246,179 @@ func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error return i.CheckPermission(t, perms) } + +// ListXattr implements linux syscall listxattr(2). +func ListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return listXattrFromPath(t, args, true) +} + +// LListXattr implements linux syscall llistxattr(2). +func LListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return listXattrFromPath(t, args, false) +} + +// FListXattr implements linux syscall flistxattr(2). +func FListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + listAddr := args[1].Pointer() + size := uint64(args[2].SizeT()) + + // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. + f := t.GetFile(fd) + if f == nil { + return 0, nil, syserror.EBADF + } + defer f.DecRef() + + n, err := listXattr(t, f.Dirent, listAddr, size) + if err != nil { + return 0, nil, err + } + + return uintptr(n), nil, nil +} + +func listXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + listAddr := args[1].Pointer() + size := uint64(args[2].SizeT()) + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + n := 0 + err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + + n, err = listXattr(t, d, listAddr, size) + return err + }) + if err != nil { + return 0, nil, err + } + + return uintptr(n), nil, nil +} + +func listXattr(t *kernel.Task, d *fs.Dirent, addr usermem.Addr, size uint64) (int, error) { + if !xattrFileTypeOk(d.Inode) { + return 0, nil + } + + // If listxattr(2) is called with size 0, the buffer size needed to contain + // the xattr list will be returned successfully even if it is nonzero. In + // that case, we need to retrieve the entire list so we can compute and + // return the correct size. + requestedSize := size + if size == 0 || size > linux.XATTR_SIZE_MAX { + requestedSize = linux.XATTR_SIZE_MAX + } + xattrs, err := d.Inode.ListXattr(t, requestedSize) + if err != nil { + return 0, err + } + + // TODO(b/148380782): support namespaces other than "user". + for x := range xattrs { + if !strings.HasPrefix(x, linux.XATTR_USER_PREFIX) { + delete(xattrs, x) + } + } + + listSize := xattrListSize(xattrs) + if listSize > linux.XATTR_SIZE_MAX { + return 0, syserror.E2BIG + } + if uint64(listSize) > requestedSize { + return 0, syserror.ERANGE + } + + // Don't copy out the attributes if size is 0. + if size == 0 { + return listSize, nil + } + + buf := make([]byte, 0, listSize) + for x := range xattrs { + buf = append(buf, []byte(x)...) + buf = append(buf, 0) + } + if _, err := t.CopyOutBytes(addr, buf); err != nil { + return 0, err + } + + return len(buf), nil +} + +func xattrListSize(xattrs map[string]struct{}) int { + size := 0 + for x := range xattrs { + size += len(x) + 1 + } + return size +} + +// RemoveXattr implements linux syscall removexattr(2). +func RemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return removeXattrFromPath(t, args, true) +} + +// LRemoveXattr implements linux syscall lremovexattr(2). +func LRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return removeXattrFromPath(t, args, false) +} + +// FRemoveXattr implements linux syscall fremovexattr(2). +func FRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + nameAddr := args[1].Pointer() + + // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH. + f := t.GetFile(fd) + if f == nil { + return 0, nil, syserror.EBADF + } + defer f.DecRef() + + return 0, nil, removeXattr(t, f.Dirent, nameAddr) +} + +func removeXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) { + pathAddr := args[0].Pointer() + nameAddr := args[1].Pointer() + + path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */) + if err != nil { + return 0, nil, err + } + + return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error { + if dirPath && !fs.IsDir(d.Inode.StableAttr) { + return syserror.ENOTDIR + } + + return removeXattr(t, d, nameAddr) + }) +} + +// removeXattr implements removexattr(2) from the given *fs.Dirent. +func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error { + name, err := copyInXattrName(t, nameAddr) + if err != nil { + return err + } + + if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil { + return err + } + + if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) { + return syserror.EOPNOTSUPP + } + + return d.Inode.RemoveXattr(t, d, name) +} diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 4d84ad999..cadd83273 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -768,12 +768,22 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { } // TODO(b/127675828): support getxattr. -func (l *localFile) GetXattr(name string, size uint64) (string, error) { +func (*localFile) GetXattr(string, uint64) (string, error) { return "", syscall.EOPNOTSUPP } // TODO(b/127675828): support setxattr. -func (l *localFile) SetXattr(name, value string, flags uint32) error { +func (*localFile) SetXattr(string, string, uint32) error { + return syscall.EOPNOTSUPP +} + +// TODO(b/148303075): support listxattr. +func (*localFile) ListXattr(uint64) (map[string]struct{}, error) { + return nil, syscall.EOPNOTSUPP +} + +// TODO(b/148303075): support removexattr. +func (*localFile) RemoveXattr(string) error { return syscall.EOPNOTSUPP } @@ -790,7 +800,7 @@ func (l *localFile) Allocate(mode p9.AllocateMode, offset, length uint64) error } // Rename implements p9.File; this should never be called. -func (l *localFile) Rename(p9.File, string) error { +func (*localFile) Rename(p9.File, string) error { panic("rename called directly") } diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 12d389c3e..ca1af209a 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3782,6 +3782,7 @@ cc_binary( "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", gtest, "//test/util:posix_error", diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 85eb31847..8b00ef44c 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -24,6 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" #include "test/syscalls/linux/file_base.h" #include "test/util/capability_util.h" #include "test/util/file_descriptor.h" @@ -38,36 +39,36 @@ namespace { class XattrTest : public FileTest {}; -TEST_F(XattrTest, XattrNullName) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); +TEST_F(XattrTest, XattrNonexistentFile) { + const char* path = "/does/not/exist"; + EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), + SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(getxattr(path, nullptr, nullptr, 0), + SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(listxattr(path, nullptr, 0), SyscallFailsWithErrno(ENOENT)); + EXPECT_THAT(removexattr(path, nullptr), SyscallFailsWithErrno(ENOENT)); +} +TEST_F(XattrTest, XattrNullName) { const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EFAULT)); EXPECT_THAT(getxattr(path, nullptr, nullptr, 0), SyscallFailsWithErrno(EFAULT)); + EXPECT_THAT(removexattr(path, nullptr), SyscallFailsWithErrno(EFAULT)); } TEST_F(XattrTest, XattrEmptyName) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); EXPECT_THAT(setxattr(path, "", nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(ERANGE)); EXPECT_THAT(getxattr(path, "", nullptr, 0), SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(removexattr(path, ""), SyscallFailsWithErrno(ERANGE)); } TEST_F(XattrTest, XattrLargeName) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); std::string name = "user."; name += std::string(XATTR_NAME_MAX - name.length(), 'a'); @@ -86,28 +87,23 @@ TEST_F(XattrTest, XattrLargeName) { SyscallFailsWithErrno(ERANGE)); EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), SyscallFailsWithErrno(ERANGE)); + EXPECT_THAT(removexattr(path, name.c_str()), SyscallFailsWithErrno(ERANGE)); } TEST_F(XattrTest, XattrInvalidPrefix) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); std::string name(XATTR_NAME_MAX, 'a'); EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EOPNOTSUPP)); EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), SyscallFailsWithErrno(EOPNOTSUPP)); + EXPECT_THAT(removexattr(path, name.c_str()), + SyscallFailsWithErrno(EOPNOTSUPP)); } // Do not allow save/restore cycles after making the test file read-only, as // the restore will fail to open it with r/w permissions. TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -124,19 +120,21 @@ TEST_F(XattrTest, XattrReadOnly_NoRandomSave) { EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EACCES)); char buf = '-'; EXPECT_THAT(getxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); EXPECT_EQ(buf, val); + + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); } // Do not allow save/restore cycles after making the test file write-only, as // the restore will fail to open it with r/w permissions. TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - // Drop capabilities that allow us to override file and directory permissions. ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -152,6 +150,14 @@ TEST_F(XattrTest, XattrWriteOnly_NoRandomSave) { EXPECT_THAT(setxattr(path, name, &val, size, /*flags=*/0), SyscallSucceeds()); EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(EACCES)); + + // listxattr will succeed even without read permissions. + char list[sizeof(name)]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(removexattr(path, name), SyscallSucceeds()); } TEST_F(XattrTest, XattrTrustedWithNonadmin) { @@ -163,64 +169,66 @@ TEST_F(XattrTest, XattrTrustedWithNonadmin) { const char name[] = "trusted.abc"; EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EPERM)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM)); EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } TEST_F(XattrTest, XattrOnDirectory) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const char name[] = "user.test"; - EXPECT_THAT(setxattr(dir.path().c_str(), name, NULL, 0, /*flags=*/0), + EXPECT_THAT(setxattr(dir.path().c_str(), name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); - EXPECT_THAT(getxattr(dir.path().c_str(), name, NULL, 0), + EXPECT_THAT(getxattr(dir.path().c_str(), name, nullptr, 0), SyscallSucceedsWithValue(0)); + + char list[sizeof(name)]; + EXPECT_THAT(listxattr(dir.path().c_str(), list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(removexattr(dir.path().c_str(), name), SyscallSucceeds()); } TEST_F(XattrTest, XattrOnSymlink) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); const char name[] = "user.test"; - EXPECT_THAT(setxattr(link.path().c_str(), name, NULL, 0, /*flags=*/0), + EXPECT_THAT(setxattr(link.path().c_str(), name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); - EXPECT_THAT(getxattr(link.path().c_str(), name, NULL, 0), + EXPECT_THAT(getxattr(link.path().c_str(), name, nullptr, 0), SyscallSucceedsWithValue(0)); + + char list[sizeof(name)]; + EXPECT_THAT(listxattr(link.path().c_str(), list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(removexattr(link.path().c_str(), name), SyscallSucceeds()); } TEST_F(XattrTest, XattrOnInvalidFileTypes) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char name[] = "user.test"; char char_device[] = "/dev/zero"; - EXPECT_THAT(setxattr(char_device, name, NULL, 0, /*flags=*/0), + EXPECT_THAT(setxattr(char_device, name, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EPERM)); - EXPECT_THAT(getxattr(char_device, name, NULL, 0), + EXPECT_THAT(getxattr(char_device, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(listxattr(char_device, nullptr, 0), SyscallSucceedsWithValue(0)); // Use tmpfs, where creation of named pipes is supported. const std::string fifo = NewTempAbsPathInDir("/dev/shm"); const char* path = fifo.c_str(); EXPECT_THAT(mknod(path, S_IFIFO | S_IRUSR | S_IWUSR, 0), SyscallSucceeds()); - EXPECT_THAT(setxattr(path, name, NULL, 0, /*flags=*/0), + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallFailsWithErrno(EPERM)); - EXPECT_THAT(getxattr(path, name, NULL, 0), SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(listxattr(path, nullptr, 0), SyscallSucceedsWithValue(0)); + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(EPERM)); } TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -236,10 +244,6 @@ TEST_F(XattrTest, SetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, SetxattrZeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -252,10 +256,6 @@ TEST_F(XattrTest, SetxattrZeroSize) { } TEST_F(XattrTest, SetxattrSizeTooLarge) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; @@ -271,10 +271,6 @@ TEST_F(XattrTest, SetxattrSizeTooLarge) { } TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 1, /*flags=*/0), @@ -284,10 +280,6 @@ TEST_F(XattrTest, SetxattrNullValueAndNonzeroSize) { } TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); @@ -296,10 +288,6 @@ TEST_F(XattrTest, SetxattrNullValueAndZeroSize) { } TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val(XATTR_SIZE_MAX + 1); @@ -316,10 +304,6 @@ TEST_F(XattrTest, SetxattrValueTooLargeButOKSize) { } TEST_F(XattrTest, SetxattrReplaceWithSmaller) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -335,10 +319,6 @@ TEST_F(XattrTest, SetxattrReplaceWithSmaller) { } TEST_F(XattrTest, SetxattrReplaceWithLarger) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -353,10 +333,6 @@ TEST_F(XattrTest, SetxattrReplaceWithLarger) { } TEST_F(XattrTest, SetxattrCreateFlag) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_CREATE), @@ -368,10 +344,6 @@ TEST_F(XattrTest, SetxattrCreateFlag) { } TEST_F(XattrTest, SetxattrReplaceFlag) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; EXPECT_THAT(setxattr(path, name, nullptr, 0, XATTR_REPLACE), @@ -384,10 +356,6 @@ TEST_F(XattrTest, SetxattrReplaceFlag) { } TEST_F(XattrTest, SetxattrInvalidFlags) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); int invalid_flags = 0xff; EXPECT_THAT(setxattr(path, nullptr, nullptr, 0, invalid_flags), @@ -395,10 +363,6 @@ TEST_F(XattrTest, SetxattrInvalidFlags) { } TEST_F(XattrTest, Getxattr) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; int val = 1234; @@ -411,10 +375,6 @@ TEST_F(XattrTest, Getxattr) { } TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; std::vector val = {'a', 'a'}; @@ -427,10 +387,6 @@ TEST_F(XattrTest, GetxattrSizeSmallerThanValue) { } TEST_F(XattrTest, GetxattrSizeLargerThanValue) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -446,10 +402,6 @@ TEST_F(XattrTest, GetxattrSizeLargerThanValue) { } TEST_F(XattrTest, GetxattrZeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -463,10 +415,6 @@ TEST_F(XattrTest, GetxattrZeroSize) { } TEST_F(XattrTest, GetxattrSizeTooLarge) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -483,10 +431,6 @@ TEST_F(XattrTest, GetxattrSizeTooLarge) { } TEST_F(XattrTest, GetxattrNullValue) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -498,10 +442,6 @@ TEST_F(XattrTest, GetxattrNullValue) { } TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - const char* path = test_file_name_.c_str(); const char name[] = "user.test"; char val = 'a'; @@ -518,35 +458,109 @@ TEST_F(XattrTest, GetxattrNullValueAndZeroSize) { } TEST_F(XattrTest, GetxattrNonexistentName) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); +} + +TEST_F(XattrTest, Listxattr) { + const char* path = test_file_name_.c_str(); + const std::string name = "user.test"; + const std::string name2 = "user.test2"; + const std::string name3 = "user.test3"; + EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name2.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + EXPECT_THAT(setxattr(path, name3.c_str(), nullptr, 0, /*flags=*/0), + SyscallSucceeds()); + std::vector list(name.size() + 1 + name2.size() + 1 + name3.size() + 1); + char* buf = list.data(); + EXPECT_THAT(listxattr(path, buf, XATTR_SIZE_MAX), + SyscallSucceedsWithValue(list.size())); + + absl::flat_hash_set got = {}; + for (char* p = buf; p < buf + list.size(); p += strlen(p) + 1) { + got.insert(std::string{p}); + } + + absl::flat_hash_set expected = {name, name2, name3}; + EXPECT_EQ(got, expected); +} + +TEST_F(XattrTest, ListxattrNoXattrs) { + const char* path = test_file_name_.c_str(); + + std::vector list, expected; + EXPECT_THAT(listxattr(path, list.data(), sizeof(list)), + SyscallSucceedsWithValue(0)); + EXPECT_EQ(list, expected); + + // Listxattr should succeed if there are no attributes, even if the buffer + // passed in is a nullptr. + EXPECT_THAT(listxattr(path, nullptr, sizeof(list)), + SyscallSucceedsWithValue(0)); +} + +TEST_F(XattrTest, ListxattrNullBuffer) { const char* path = test_file_name_.c_str(); const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + + EXPECT_THAT(listxattr(path, nullptr, sizeof(name)), + SyscallFailsWithErrno(EFAULT)); +} + +TEST_F(XattrTest, ListxattrSizeTooSmall) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + + char list[sizeof(name) - 1]; + EXPECT_THAT(listxattr(path, list, sizeof(list)), + SyscallFailsWithErrno(ERANGE)); +} + +TEST_F(XattrTest, ListxattrZeroSize) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + EXPECT_THAT(listxattr(path, nullptr, 0), + SyscallSucceedsWithValue(sizeof(name))); +} + +TEST_F(XattrTest, RemoveXattr) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(setxattr(path, name, nullptr, 0, /*flags=*/0), SyscallSucceeds()); + EXPECT_THAT(removexattr(path, name), SyscallSucceeds()); EXPECT_THAT(getxattr(path, name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); } -TEST_F(XattrTest, LGetSetxattrOnSymlink) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); +TEST_F(XattrTest, RemoveXattrNonexistentName) { + const char* path = test_file_name_.c_str(); + const char name[] = "user.test"; + EXPECT_THAT(removexattr(path, name), SyscallFailsWithErrno(ENODATA)); +} +TEST_F(XattrTest, LXattrOnSymlink) { + const char name[] = "user.test"; TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); TempPath link = ASSERT_NO_ERRNO_AND_VALUE( TempPath::CreateSymlinkTo(dir.path(), test_file_name_)); - EXPECT_THAT(lsetxattr(link.path().c_str(), nullptr, nullptr, 0, 0), + EXPECT_THAT(lsetxattr(link.path().c_str(), name, nullptr, 0, 0), SyscallFailsWithErrno(EPERM)); - EXPECT_THAT(lgetxattr(link.path().c_str(), nullptr, nullptr, 0), + EXPECT_THAT(lgetxattr(link.path().c_str(), name, nullptr, 0), SyscallFailsWithErrno(ENODATA)); + EXPECT_THAT(llistxattr(link.path().c_str(), nullptr, 0), + SyscallSucceedsWithValue(0)); + EXPECT_THAT(lremovexattr(link.path().c_str(), name), + SyscallFailsWithErrno(EPERM)); } -TEST_F(XattrTest, LGetSetxattrOnNonsymlink) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); - +TEST_F(XattrTest, LXattrOnNonsymlink) { const char* path = test_file_name_.c_str(); const char name[] = "user.test"; int val = 1234; @@ -558,13 +572,16 @@ TEST_F(XattrTest, LGetSetxattrOnNonsymlink) { EXPECT_THAT(lgetxattr(path, name, &buf, size), SyscallSucceedsWithValue(size)); EXPECT_EQ(buf, val); -} -TEST_F(XattrTest, FGetSetxattr) { - // TODO(gvisor.dev/issue/1636): Re-enable once list/remove xattr are - // supported, and get/set have been added pack to the syscall table. - SKIP_IF(IsRunningOnGvisor()); + char list[sizeof(name)]; + EXPECT_THAT(llistxattr(path, list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + EXPECT_THAT(lremovexattr(path, name), SyscallSucceeds()); +} + +TEST_F(XattrTest, XattrWithFD) { const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_.c_str(), 0)); const char name[] = "user.test"; @@ -577,6 +594,13 @@ TEST_F(XattrTest, FGetSetxattr) { EXPECT_THAT(fgetxattr(fd.get(), name, &buf, size), SyscallSucceedsWithValue(size)); EXPECT_EQ(buf, val); + + char list[sizeof(name)]; + EXPECT_THAT(flistxattr(fd.get(), list, sizeof(list)), + SyscallSucceedsWithValue(sizeof(name))); + EXPECT_STREQ(list, name); + + EXPECT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds()); } } // namespace -- cgit v1.2.3 From a6f9361c2f7c5b46a200de1dc891a0ce059ad90e Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 10 Feb 2020 13:51:26 -0800 Subject: Add context to comments. PiperOrigin-RevId: 294295852 --- pkg/sentry/fs/gofer/attr.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go index 71cccdc34..6db4b762d 100644 --- a/pkg/sentry/fs/gofer/attr.go +++ b/pkg/sentry/fs/gofer/attr.go @@ -88,8 +88,9 @@ func bsize(pattr p9.Attr) int64 { if pattr.BlockSize > 0 { return int64(pattr.BlockSize) } - // Some files may have no clue of their block size. Better not to report - // something misleading or buggy and have a safe default. + // Some files, particularly those that are not on a local file system, + // may have no clue of their block size. Better not to report something + // misleading or buggy and have a safe default. return usermem.PageSize } @@ -149,6 +150,7 @@ func links(valid p9.AttrMask, pattr p9.Attr) uint64 { } // This node is likely backed by a file system that doesn't support links. + // // We could readdir() and count children directories to provide an accurate // link count. However this may be expensive since the gofer may be backed by remote // storage. Instead, simply return 2 links for directories and 1 for everything else -- cgit v1.2.3 From 115898e368e4afe5418a7290d9545fafc7f6f25e Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 11 Feb 2020 11:37:12 -0800 Subject: Prevent DATA RACE in UnstableAttr. The slaveInodeOperations is currently copying the object when truncate is called (which is a no-op). This may result in a (unconsequential) data race when being modified concurrently. PiperOrigin-RevId: 294484276 --- pkg/sentry/fs/tty/slave.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index db55cdc48..6a2dbc576 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -73,7 +73,7 @@ func (si *slaveInodeOperations) Release(ctx context.Context) { } // Truncate implements fs.InodeOperations.Truncate. -func (slaveInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { +func (*slaveInodeOperations) Truncate(context.Context, *fs.Inode, int64) error { return nil } -- cgit v1.2.3 From 6dced977eab69401a114257e386addb9cb03a39d Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 11 Feb 2020 17:38:04 -0800 Subject: Ensure fsimpl/gofer.dentryPlatformFile.hostFileMapper is initialized. Fixes #1812. (The more direct cause of the deadlock is panic unsafety because the historically high cost of defer means that we avoid it in hot paths, including much of MM; defer is much cheaper as of Go 1.14, but still a measurable overhead.) PiperOrigin-RevId: 294560316 --- pkg/sentry/fs/fsutil/host_file_mapper.go | 17 +++++++++++------ pkg/sentry/fsimpl/gofer/regular_file.go | 5 +++++ 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go index 67278aa86..e82afd112 100644 --- a/pkg/sentry/fs/fsutil/host_file_mapper.go +++ b/pkg/sentry/fs/fsutil/host_file_mapper.go @@ -65,13 +65,18 @@ type mapping struct { writable bool } -// NewHostFileMapper returns a HostFileMapper with no references or cached -// mappings. +// Init must be called on zero-value HostFileMappers before first use. +func (f *HostFileMapper) Init() { + f.refs = make(map[uint64]int32) + f.mappings = make(map[uint64]mapping) +} + +// NewHostFileMapper returns an initialized HostFileMapper allocated on the +// heap with no references or cached mappings. func NewHostFileMapper() *HostFileMapper { - return &HostFileMapper{ - refs: make(map[uint64]int32), - mappings: make(map[uint64]mapping), - } + f := &HostFileMapper{} + f.Init() + return f } // IncRefOn increments the reference count on all offsets in mr. diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 8e11e06b3..54c1031a7 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -571,6 +571,8 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt default: panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop)) } + // After this point, d may be used as a memmap.Mappable. + d.pf.hostFileMapperInitOnce.Do(d.pf.hostFileMapper.Init) return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts) } @@ -799,6 +801,9 @@ type dentryPlatformFile struct { // If this dentry represents a regular file, and handle.fd >= 0, // hostFileMapper caches mappings of handle.fd. hostFileMapper fsutil.HostFileMapper + + // hostFileMapperInitOnce is used to lazily initialize hostFileMapper. + hostFileMapperInitOnce sync.Once } // IncRef implements platform.File.IncRef. -- cgit v1.2.3 From 4075de11be44372c454aae7f9650cdc814c52229 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Fri, 14 Feb 2020 11:11:55 -0800 Subject: Plumb VFS2 inside the Sentry - Added fsbridge package with interface that can be used to open and read from VFS1 and VFS2 files. - Converted ELF loader to use fsbridge - Added VFS2 types to FSContext - Added vfs.MountNamespace to ThreadGroup Updates #1623 PiperOrigin-RevId: 295183950 --- pkg/sentry/control/BUILD | 5 + pkg/sentry/control/proc.go | 127 +++++++++++++-- pkg/sentry/fs/proc/BUILD | 1 + pkg/sentry/fs/proc/task.go | 17 +- pkg/sentry/fsbridge/BUILD | 24 +++ pkg/sentry/fsbridge/bridge.go | 54 ++++++ pkg/sentry/fsbridge/fs.go | 181 +++++++++++++++++++++ pkg/sentry/fsbridge/vfs.go | 134 +++++++++++++++ pkg/sentry/fsimpl/devtmpfs/devtmpfs.go | 4 + pkg/sentry/fsimpl/gofer/filesystem.go | 5 +- pkg/sentry/fsimpl/gofer/gofer.go | 3 + pkg/sentry/fsimpl/kernfs/filesystem.go | 10 +- pkg/sentry/fsimpl/proc/BUILD | 1 + pkg/sentry/fsimpl/proc/filesystem.go | 18 +- pkg/sentry/fsimpl/proc/tasks_test.go | 17 +- pkg/sentry/fsimpl/sys/BUILD | 1 + pkg/sentry/fsimpl/sys/sys.go | 3 + pkg/sentry/fsimpl/sys/sys_test.go | 7 +- pkg/sentry/fsimpl/testutil/BUILD | 2 +- pkg/sentry/fsimpl/testutil/kernel.go | 24 +-- pkg/sentry/fsimpl/testutil/testutil.go | 12 +- pkg/sentry/fsimpl/tmpfs/filesystem.go | 12 +- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 3 + pkg/sentry/kernel/BUILD | 2 + pkg/sentry/kernel/fs_context.go | 98 +++++++++-- pkg/sentry/kernel/kernel.go | 145 +++++++++++++---- pkg/sentry/kernel/task.go | 27 +++ pkg/sentry/kernel/task_clone.go | 11 +- pkg/sentry/kernel/task_context.go | 2 +- pkg/sentry/kernel/task_exit.go | 7 + pkg/sentry/kernel/task_log.go | 15 +- pkg/sentry/kernel/task_start.go | 49 +++--- pkg/sentry/kernel/thread_group.go | 6 +- pkg/sentry/loader/BUILD | 2 + pkg/sentry/loader/elf.go | 28 ++-- pkg/sentry/loader/interpreter.go | 6 +- pkg/sentry/loader/loader.go | 179 ++++++-------------- pkg/sentry/loader/vdso.go | 7 +- pkg/sentry/mm/BUILD | 2 +- pkg/sentry/mm/metadata.go | 10 +- pkg/sentry/mm/mm.go | 4 +- pkg/sentry/strace/strace.go | 28 ++++ pkg/sentry/syscalls/linux/BUILD | 1 + pkg/sentry/syscalls/linux/sys_prctl.go | 3 +- pkg/sentry/syscalls/linux/sys_thread.go | 17 +- .../syscalls/linux/vfs2/linux64_override_amd64.go | 106 ++++++++++++ pkg/sentry/vfs/BUILD | 1 + pkg/sentry/vfs/context.go | 7 +- pkg/sentry/vfs/mount.go | 10 +- pkg/sentry/vfs/options.go | 2 +- pkg/sentry/vfs/vfs.go | 5 +- runsc/boot/loader.go | 11 +- 52 files changed, 1134 insertions(+), 322 deletions(-) create mode 100644 pkg/sentry/fsbridge/BUILD create mode 100644 pkg/sentry/fsbridge/bridge.go create mode 100644 pkg/sentry/fsbridge/fs.go create mode 100644 pkg/sentry/fsbridge/vfs.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index e69496477..d16d78aa5 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -16,10 +16,13 @@ go_library( ], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/fd", + "//pkg/fspath", "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/fs/host", + "//pkg/sentry/fsbridge", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/time", @@ -27,8 +30,10 @@ go_library( "//pkg/sentry/state", "//pkg/sentry/strace", "//pkg/sentry/usage", + "//pkg/sentry/vfs", "//pkg/sentry/watchdog", "//pkg/sync", + "//pkg/syserror", "//pkg/tcpip/link/sniffer", "//pkg/urpc", ], diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index ced51c66c..8973754c8 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -18,19 +18,26 @@ import ( "bytes" "encoding/json" "fmt" + "path" "sort" "strings" "text/tabwriter" "time" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/urpc" ) @@ -60,6 +67,12 @@ type ExecArgs struct { // process's MountNamespace. MountNamespace *fs.MountNamespace + // MountNamespaceVFS2 is the mount namespace to execute the new process in. + // A reference on MountNamespace must be held for the lifetime of the + // ExecArgs. If MountNamespace is nil, it will default to the init + // process's MountNamespace. + MountNamespaceVFS2 *vfs.MountNamespace + // WorkingDirectory defines the working directory for the new process. WorkingDirectory string `json:"wd"` @@ -150,6 +163,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI Envv: args.Envv, WorkingDirectory: args.WorkingDirectory, MountNamespace: args.MountNamespace, + MountNamespaceVFS2: args.MountNamespaceVFS2, Credentials: creds, FDTable: fdTable, Umask: 0022, @@ -166,24 +180,53 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI // be donated to the new process in CreateProcess. initArgs.MountNamespace.IncRef() } + if initArgs.MountNamespaceVFS2 != nil { + // initArgs must hold a reference on MountNamespaceVFS2, which will + // be donated to the new process in CreateProcess. + initArgs.MountNamespaceVFS2.IncRef() + } ctx := initArgs.NewContext(proc.Kernel) if initArgs.Filename == "" { - // Get the full path to the filename from the PATH env variable. - paths := fs.GetPath(initArgs.Envv) - mns := initArgs.MountNamespace - if mns == nil { - mns = proc.Kernel.GlobalInit().Leader().MountNamespace() - } - f, err := mns.ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths) - if err != nil { - return nil, 0, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) + if kernel.VFS2Enabled { + // Get the full path to the filename from the PATH env variable. + if initArgs.MountNamespaceVFS2 == nil { + // Set initArgs so that 'ctx' returns the namespace. + // + // MountNamespaceVFS2 adds a reference to the namespace, which is + // transferred to the new process. + initArgs.MountNamespaceVFS2 = proc.Kernel.GlobalInit().Leader().MountNamespaceVFS2() + } + + paths := fs.GetPath(initArgs.Envv) + vfsObj := proc.Kernel.VFS + file, err := ResolveExecutablePath(ctx, vfsObj, initArgs.WorkingDirectory, initArgs.Argv[0], paths) + if err != nil { + return nil, 0, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) + } + initArgs.File = fsbridge.NewVFSFile(file) + } else { + // Get the full path to the filename from the PATH env variable. + paths := fs.GetPath(initArgs.Envv) + if initArgs.MountNamespace == nil { + // Set initArgs so that 'ctx' returns the namespace. + initArgs.MountNamespace = proc.Kernel.GlobalInit().Leader().MountNamespace() + + // initArgs must hold a reference on MountNamespace, which will + // be donated to the new process in CreateProcess. + initArgs.MountNamespaceVFS2.IncRef() + } + f, err := initArgs.MountNamespace.ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths) + if err != nil { + return nil, 0, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) + } + initArgs.Filename = f } - initArgs.Filename = f } mounter := fs.FileOwnerFromContext(ctx) + // TODO(gvisor.dev/issue/1623): Use host FD when supported in VFS2. var ttyFile *fs.File for appFD, hostFile := range args.FilePayload.Files { var appFile *fs.File @@ -411,3 +454,67 @@ func ttyName(tty *kernel.TTY) string { } return fmt.Sprintf("pts/%d", tty.Index) } + +// ResolveExecutablePath resolves the given executable name given a set of +// paths that might contain it. +func ResolveExecutablePath(ctx context.Context, vfsObj *vfs.VirtualFilesystem, wd, name string, paths []string) (*vfs.FileDescription, error) { + root := vfs.RootFromContext(ctx) + defer root.DecRef() + creds := auth.CredentialsFromContext(ctx) + + // Absolute paths can be used directly. + if path.IsAbs(name) { + return openExecutable(ctx, vfsObj, creds, root, name) + } + + // Paths with '/' in them should be joined to the working directory, or + // to the root if working directory is not set. + if strings.IndexByte(name, '/') > 0 { + if len(wd) == 0 { + wd = "/" + } + if !path.IsAbs(wd) { + return nil, fmt.Errorf("working directory %q must be absolute", wd) + } + return openExecutable(ctx, vfsObj, creds, root, path.Join(wd, name)) + } + + // Otherwise, we must lookup the name in the paths, starting from the + // calling context's root directory. + for _, p := range paths { + if !path.IsAbs(p) { + // Relative paths aren't safe, no one should be using them. + log.Warningf("Skipping relative path %q in $PATH", p) + continue + } + + binPath := path.Join(p, name) + f, err := openExecutable(ctx, vfsObj, creds, root, binPath) + if err != nil { + return nil, err + } + if f == nil { + continue // Not found/no access. + } + return f, nil + } + return nil, syserror.ENOENT +} + +func openExecutable(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, root vfs.VirtualDentry, path string) (*vfs.FileDescription, error) { + pop := vfs.PathOperation{ + Root: root, + Start: root, // binPath is absolute, Start can be anything. + Path: fspath.Parse(path), + FollowFinalSymlink: true, + } + opts := &vfs.OpenOptions{ + Flags: linux.O_RDONLY, + FileExec: true, + } + f, err := vfsObj.OpenAt(ctx, creds, &pop, opts) + if err == syserror.ENOENT || err == syserror.EACCES { + return nil, nil + } + return f, err +} diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD index 280093c5e..77c2c5c0e 100644 --- a/pkg/sentry/fs/proc/BUILD +++ b/pkg/sentry/fs/proc/BUILD @@ -36,6 +36,7 @@ go_library( "//pkg/sentry/fs/proc/device", "//pkg/sentry/fs/proc/seqfile", "//pkg/sentry/fs/ramfs", + "//pkg/sentry/fsbridge", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index ca020e11e..8ab8d8a02 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -28,6 +28,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/proc/device" "gvisor.dev/gvisor/pkg/sentry/fs/proc/seqfile" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/mm" @@ -249,7 +250,7 @@ func newExe(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { return newProcInode(t, exeSymlink, msrc, fs.Symlink, t) } -func (e *exe) executable() (d *fs.Dirent, err error) { +func (e *exe) executable() (file fsbridge.File, err error) { e.t.WithMuLocked(func(t *kernel.Task) { mm := t.MemoryManager() if mm == nil { @@ -262,8 +263,8 @@ func (e *exe) executable() (d *fs.Dirent, err error) { // The MemoryManager may be destroyed, in which case // MemoryManager.destroy will simply set the executable to nil // (with locks held). - d = mm.Executable() - if d == nil { + file = mm.Executable() + if file == nil { err = syserror.ENOENT } }) @@ -283,15 +284,7 @@ func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { } defer exec.DecRef() - root := fs.RootFromContext(ctx) - if root == nil { - // This doesn't correspond to anything in Linux because the vfs is - // global there. - return "", syserror.EINVAL - } - defer root.DecRef() - n, _ := exec.FullName(root) - return n, nil + return exec.PathnameWithDeleted(ctx), nil } // namespaceSymlink represents a symlink in the namespacefs, such as the files diff --git a/pkg/sentry/fsbridge/BUILD b/pkg/sentry/fsbridge/BUILD new file mode 100644 index 000000000..6c798f0bd --- /dev/null +++ b/pkg/sentry/fsbridge/BUILD @@ -0,0 +1,24 @@ +load("//tools:defs.bzl", "go_library") + +licenses(["notice"]) + +go_library( + name = "fsbridge", + srcs = [ + "bridge.go", + "fs.go", + "vfs.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/fspath", + "//pkg/sentry/fs", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/memmap", + "//pkg/sentry/vfs", + "//pkg/syserror", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/fsbridge/bridge.go b/pkg/sentry/fsbridge/bridge.go new file mode 100644 index 000000000..8e7590721 --- /dev/null +++ b/pkg/sentry/fsbridge/bridge.go @@ -0,0 +1,54 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fsbridge provides common interfaces to bridge between VFS1 and VFS2 +// files. +package fsbridge + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/usermem" +) + +// File provides a common interface to bridge between VFS1 and VFS2 files. +type File interface { + // PathnameWithDeleted returns an absolute pathname to vd, consistent with + // Linux's d_path(). In particular, if vd.Dentry() has been disowned, + // PathnameWithDeleted appends " (deleted)" to the returned pathname. + PathnameWithDeleted(ctx context.Context) string + + // ReadFull read all contents from the file. + ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) + + // ConfigureMMap mutates opts to implement mmap(2) for the file. + ConfigureMMap(context.Context, *memmap.MMapOpts) error + + // Type returns the file type, e.g. linux.S_IFREG. + Type(context.Context) (linux.FileMode, error) + + // IncRef increments reference. + IncRef() + + // DecRef decrements reference. + DecRef() +} + +// Lookup provides a common interface to open files. +type Lookup interface { + // OpenPath opens a file. + OpenPath(ctx context.Context, path string, opts vfs.OpenOptions, remainingTraversals *uint, resolveFinal bool) (File, error) +} diff --git a/pkg/sentry/fsbridge/fs.go b/pkg/sentry/fsbridge/fs.go new file mode 100644 index 000000000..093ce1fb3 --- /dev/null +++ b/pkg/sentry/fsbridge/fs.go @@ -0,0 +1,181 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fsbridge + +import ( + "io" + "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// fsFile implements File interface over fs.File. +// +// +stateify savable +type fsFile struct { + file *fs.File +} + +var _ File = (*fsFile)(nil) + +// NewFSFile creates a new File over fs.File. +func NewFSFile(file *fs.File) File { + return &fsFile{file: file} +} + +// PathnameWithDeleted implements File. +func (f *fsFile) PathnameWithDeleted(ctx context.Context) string { + root := fs.RootFromContext(ctx) + if root == nil { + // This doesn't correspond to anything in Linux because the vfs is + // global there. + return "" + } + defer root.DecRef() + + name, _ := f.file.Dirent.FullName(root) + return name +} + +// ReadFull implements File. +func (f *fsFile) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { + var total int64 + for dst.NumBytes() > 0 { + n, err := f.file.Preadv(ctx, dst, offset+total) + total += n + if err == io.EOF && total != 0 { + return total, io.ErrUnexpectedEOF + } else if err != nil { + return total, err + } + dst = dst.DropFirst64(n) + } + return total, nil +} + +// ConfigureMMap implements File. +func (f *fsFile) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + return f.file.ConfigureMMap(ctx, opts) +} + +// Type implements File. +func (f *fsFile) Type(context.Context) (linux.FileMode, error) { + return linux.FileMode(f.file.Dirent.Inode.StableAttr.Type.LinuxType()), nil +} + +// IncRef implements File. +func (f *fsFile) IncRef() { + f.file.IncRef() +} + +// DecRef implements File. +func (f *fsFile) DecRef() { + f.file.DecRef() +} + +// fsLookup implements Lookup interface using fs.File. +// +// +stateify savable +type fsLookup struct { + mntns *fs.MountNamespace + + root *fs.Dirent + workingDir *fs.Dirent +} + +var _ Lookup = (*fsLookup)(nil) + +// NewFSLookup creates a new Lookup using VFS1. +func NewFSLookup(mntns *fs.MountNamespace, root, workingDir *fs.Dirent) Lookup { + return &fsLookup{ + mntns: mntns, + root: root, + workingDir: workingDir, + } +} + +// OpenPath implements Lookup. +func (l *fsLookup) OpenPath(ctx context.Context, path string, opts vfs.OpenOptions, remainingTraversals *uint, resolveFinal bool) (File, error) { + var d *fs.Dirent + var err error + if resolveFinal { + d, err = l.mntns.FindInode(ctx, l.root, l.workingDir, path, remainingTraversals) + } else { + d, err = l.mntns.FindLink(ctx, l.root, l.workingDir, path, remainingTraversals) + } + if err != nil { + return nil, err + } + defer d.DecRef() + + if !resolveFinal && fs.IsSymlink(d.Inode.StableAttr) { + return nil, syserror.ELOOP + } + + fsPerm := openOptionsToPermMask(&opts) + if err := d.Inode.CheckPermission(ctx, fsPerm); err != nil { + return nil, err + } + + // If they claim it's a directory, then make sure. + if strings.HasSuffix(path, "/") { + if d.Inode.StableAttr.Type != fs.Directory { + return nil, syserror.ENOTDIR + } + } + + if opts.FileExec && d.Inode.StableAttr.Type != fs.RegularFile { + ctx.Infof("%q is not a regular file: %v", path, d.Inode.StableAttr.Type) + return nil, syserror.EACCES + } + + f, err := d.Inode.GetFile(ctx, d, flagsToFileFlags(opts.Flags)) + if err != nil { + return nil, err + } + + return &fsFile{file: f}, nil +} + +func openOptionsToPermMask(opts *vfs.OpenOptions) fs.PermMask { + mode := opts.Flags & linux.O_ACCMODE + return fs.PermMask{ + Read: mode == linux.O_RDONLY || mode == linux.O_RDWR, + Write: mode == linux.O_WRONLY || mode == linux.O_RDWR, + Execute: opts.FileExec, + } +} + +func flagsToFileFlags(flags uint32) fs.FileFlags { + return fs.FileFlags{ + Direct: flags&linux.O_DIRECT != 0, + DSync: flags&(linux.O_DSYNC|linux.O_SYNC) != 0, + Sync: flags&linux.O_SYNC != 0, + NonBlocking: flags&linux.O_NONBLOCK != 0, + Read: (flags & linux.O_ACCMODE) != linux.O_WRONLY, + Write: (flags & linux.O_ACCMODE) != linux.O_RDONLY, + Append: flags&linux.O_APPEND != 0, + Directory: flags&linux.O_DIRECTORY != 0, + Async: flags&linux.O_ASYNC != 0, + LargeFile: flags&linux.O_LARGEFILE != 0, + Truncate: flags&linux.O_TRUNC != 0, + } +} diff --git a/pkg/sentry/fsbridge/vfs.go b/pkg/sentry/fsbridge/vfs.go new file mode 100644 index 000000000..e657c39bc --- /dev/null +++ b/pkg/sentry/fsbridge/vfs.go @@ -0,0 +1,134 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fsbridge + +import ( + "io" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/usermem" +) + +// fsFile implements File interface over vfs.FileDescription. +// +// +stateify savable +type vfsFile struct { + file *vfs.FileDescription +} + +var _ File = (*vfsFile)(nil) + +// NewVFSFile creates a new File over fs.File. +func NewVFSFile(file *vfs.FileDescription) File { + return &vfsFile{file: file} +} + +// PathnameWithDeleted implements File. +func (f *vfsFile) PathnameWithDeleted(ctx context.Context) string { + root := vfs.RootFromContext(ctx) + defer root.DecRef() + + vfsObj := f.file.VirtualDentry().Mount().Filesystem().VirtualFilesystem() + name, _ := vfsObj.PathnameWithDeleted(ctx, root, f.file.VirtualDentry()) + return name +} + +// ReadFull implements File. +func (f *vfsFile) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { + var total int64 + for dst.NumBytes() > 0 { + n, err := f.file.PRead(ctx, dst, offset+total, vfs.ReadOptions{}) + total += n + if err == io.EOF && total != 0 { + return total, io.ErrUnexpectedEOF + } else if err != nil { + return total, err + } + dst = dst.DropFirst64(n) + } + return total, nil +} + +// ConfigureMMap implements File. +func (f *vfsFile) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + return f.file.ConfigureMMap(ctx, opts) +} + +// Type implements File. +func (f *vfsFile) Type(ctx context.Context) (linux.FileMode, error) { + stat, err := f.file.Stat(ctx, vfs.StatOptions{}) + if err != nil { + return 0, err + } + return linux.FileMode(stat.Mode).FileType(), nil +} + +// IncRef implements File. +func (f *vfsFile) IncRef() { + f.file.IncRef() +} + +// DecRef implements File. +func (f *vfsFile) DecRef() { + f.file.DecRef() +} + +// fsLookup implements Lookup interface using fs.File. +// +// +stateify savable +type vfsLookup struct { + mntns *vfs.MountNamespace + + root vfs.VirtualDentry + workingDir vfs.VirtualDentry +} + +var _ Lookup = (*vfsLookup)(nil) + +// NewVFSLookup creates a new Lookup using VFS2. +func NewVFSLookup(mntns *vfs.MountNamespace, root, workingDir vfs.VirtualDentry) Lookup { + return &vfsLookup{ + mntns: mntns, + root: root, + workingDir: workingDir, + } +} + +// OpenPath implements Lookup. +// +// remainingTraversals is not configurable in VFS2, all callers are using the +// default anyways. +// +// TODO(gvisor.dev/issue/1623): Check mount has read and exec permission. +func (l *vfsLookup) OpenPath(ctx context.Context, path string, opts vfs.OpenOptions, _ *uint, resolveFinal bool) (File, error) { + vfsObj := l.mntns.Root().Mount().Filesystem().VirtualFilesystem() + creds := auth.CredentialsFromContext(ctx) + pop := &vfs.PathOperation{ + Root: l.root, + Start: l.root, + Path: fspath.Parse(path), + FollowFinalSymlink: resolveFinal, + } + fd, err := vfsObj.OpenAt(ctx, creds, pop, &opts) + if err != nil { + return nil, err + } + return &vfsFile{file: fd}, nil +} diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go index e03a0c665..abd4f24e7 100644 --- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go +++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go @@ -28,6 +28,9 @@ import ( "gvisor.dev/gvisor/pkg/sync" ) +// Name is the default filesystem name. +const Name = "devtmpfs" + // FilesystemType implements vfs.FilesystemType. type FilesystemType struct { initOnce sync.Once @@ -107,6 +110,7 @@ func (a *Accessor) wrapContext(ctx context.Context) *accessorContext { func (ac *accessorContext) Value(key interface{}) interface{} { switch key { case vfs.CtxMountNamespace: + ac.a.mntns.IncRef() return ac.a.mntns case vfs.CtxRoot: ac.a.root.IncRef() diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 138adb9f7..5cfb0dc4c 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -400,6 +400,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b } vfsObj := rp.VirtualFilesystem() mntns := vfs.MountNamespaceFromContext(ctx) + defer mntns.DecRef() parent.dirMu.Lock() defer parent.dirMu.Unlock() childVFSD := parent.vfsd.Child(name) @@ -934,7 +935,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if oldParent == newParent && oldName == newName { return nil } - if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), &renamed.vfsd, replacedVFSD); err != nil { + mntns := vfs.MountNamespaceFromContext(ctx) + defer mntns.DecRef() + if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil { return err } if err := renamed.file.rename(ctx, newParent.file, newName); err != nil { diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index d0552bd99..d00850e25 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -52,6 +52,9 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) +// Name is the default filesystem name. +const Name = "9p" + // FilesystemType implements vfs.FilesystemType. type FilesystemType struct{} diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index ee98eb66a..292f58afd 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -544,6 +544,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } mntns := vfs.MountNamespaceFromContext(ctx) + defer mntns.DecRef() virtfs := rp.VirtualFilesystem() srcDirDentry := srcDirVFSD.Impl().(*Dentry) @@ -595,7 +596,10 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error parentDentry := vfsd.Parent().Impl().(*Dentry) parentDentry.dirMu.Lock() defer parentDentry.dirMu.Unlock() - if err := virtfs.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { + + mntns := vfs.MountNamespaceFromContext(ctx) + defer mntns.DecRef() + if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { return err } if err := parentDentry.inode.RmDir(ctx, rp.Component(), vfsd); err != nil { @@ -697,7 +701,9 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error parentDentry := vfsd.Parent().Impl().(*Dentry) parentDentry.dirMu.Lock() defer parentDentry.dirMu.Unlock() - if err := virtfs.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { + mntns := vfs.MountNamespaceFromContext(ctx) + defer mntns.DecRef() + if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { return err } if err := parentDentry.inode.Unlink(ctx, rp.Component(), vfsd); err != nil { diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 12aac2e6a..a83245866 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -14,6 +14,7 @@ go_library( "tasks_net.go", "tasks_sys.go", ], + visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index 11477b6a9..5c19d5522 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -26,15 +26,18 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" ) -// procFSType is the factory class for procfs. +// Name is the default filesystem name. +const Name = "proc" + +// FilesystemType is the factory class for procfs. // // +stateify savable -type procFSType struct{} +type FilesystemType struct{} -var _ vfs.FilesystemType = (*procFSType)(nil) +var _ vfs.FilesystemType = (*FilesystemType)(nil) // GetFilesystem implements vfs.FilesystemType. -func (ft *procFSType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { +func (ft *FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { k := kernel.KernelFromContext(ctx) if k == nil { return nil, nil, fmt.Errorf("procfs requires a kernel") @@ -47,12 +50,13 @@ func (ft *procFSType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFile procfs := &kernfs.Filesystem{} procfs.VFSFilesystem().Init(vfsObj, procfs) - var data *InternalData + var cgroups map[string]string if opts.InternalData != nil { - data = opts.InternalData.(*InternalData) + data := opts.InternalData.(*InternalData) + cgroups = data.Cgroups } - _, dentry := newTasksInode(procfs, k, pidns, data.Cgroups) + _, dentry := newTasksInode(procfs, k, pidns, cgroups) return procfs.VFSFilesystem(), dentry.VFSDentry(), nil } diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go index 6fc3524db..96c72cbc9 100644 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_test.go @@ -90,8 +90,7 @@ func setup(t *testing.T) *testutil.System { ctx := k.SupervisorContext() creds := auth.CredentialsFromContext(ctx) - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("procfs", &procFSType{}, &vfs.RegisterFilesystemTypeOptions{ + k.VFS.MustRegisterFilesystemType(Name, &FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, }) fsOpts := vfs.GetFilesystemOptions{ @@ -102,11 +101,11 @@ func setup(t *testing.T) *testutil.System { }, }, } - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "procfs", &fsOpts) + mntns, err := k.VFS.NewMountNamespace(ctx, creds, "", Name, &fsOpts) if err != nil { t.Fatalf("NewMountNamespace(): %v", err) } - return testutil.NewSystem(ctx, t, vfsObj, mntns) + return testutil.NewSystem(ctx, t, k.VFS, mntns) } func TestTasksEmpty(t *testing.T) { @@ -131,7 +130,7 @@ func TestTasks(t *testing.T) { var tasks []*kernel.Task for i := 0; i < 5; i++ { tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc) + task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root) if err != nil { t.Fatalf("CreateTask(): %v", err) } @@ -213,7 +212,7 @@ func TestTasksOffset(t *testing.T) { k := kernel.KernelFromContext(s.Ctx) for i := 0; i < 3; i++ { tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - if _, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc); err != nil { + if _, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root); err != nil { t.Fatalf("CreateTask(): %v", err) } } @@ -337,7 +336,7 @@ func TestTask(t *testing.T) { k := kernel.KernelFromContext(s.Ctx) tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - _, err := testutil.CreateTask(s.Ctx, "name", tc) + _, err := testutil.CreateTask(s.Ctx, "name", tc, s.MntNs, s.Root, s.Root) if err != nil { t.Fatalf("CreateTask(): %v", err) } @@ -352,7 +351,7 @@ func TestProcSelf(t *testing.T) { k := kernel.KernelFromContext(s.Ctx) tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(s.Ctx, "name", tc) + task, err := testutil.CreateTask(s.Ctx, "name", tc, s.MntNs, s.Root, s.Root) if err != nil { t.Fatalf("CreateTask(): %v", err) } @@ -433,7 +432,7 @@ func TestTree(t *testing.T) { var tasks []*kernel.Task for i := 0; i < 5; i++ { tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc) + task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root) if err != nil { t.Fatalf("CreateTask(): %v", err) } diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index 66c0d8bc8..a741e2bb6 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -7,6 +7,7 @@ go_library( srcs = [ "sys.go", ], + visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index d693fceae..c36c4fa11 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -28,6 +28,9 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) +// Name is the default filesystem name. +const Name = "sysfs" + // FilesystemType implements vfs.FilesystemType. type FilesystemType struct{} diff --git a/pkg/sentry/fsimpl/sys/sys_test.go b/pkg/sentry/fsimpl/sys/sys_test.go index 8b1cf0bd0..5d1ba5867 100644 --- a/pkg/sentry/fsimpl/sys/sys_test.go +++ b/pkg/sentry/fsimpl/sys/sys_test.go @@ -34,16 +34,15 @@ func newTestSystem(t *testing.T) *testutil.System { } ctx := k.SupervisorContext() creds := auth.CredentialsFromContext(ctx) - v := vfs.New() - v.MustRegisterFilesystemType("sysfs", sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + k.VFS.MustRegisterFilesystemType(sys.Name, sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, }) - mns, err := v.NewMountNamespace(ctx, creds, "", "sysfs", &vfs.GetFilesystemOptions{}) + mns, err := k.VFS.NewMountNamespace(ctx, creds, "", sys.Name, &vfs.GetFilesystemOptions{}) if err != nil { t.Fatalf("Failed to create new mount namespace: %v", err) } - return testutil.NewSystem(ctx, t, v, mns) + return testutil.NewSystem(ctx, t, k.VFS, mns) } func TestReadCPUFile(t *testing.T) { diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD index efd5974c4..e4f36f4ae 100644 --- a/pkg/sentry/fsimpl/testutil/BUILD +++ b/pkg/sentry/fsimpl/testutil/BUILD @@ -16,7 +16,7 @@ go_library( "//pkg/cpuid", "//pkg/fspath", "//pkg/memutil", - "//pkg/sentry/fs", + "//pkg/sentry/fsimpl/tmpfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/sched", diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go index 89f8c4915..a91b3ec4d 100644 --- a/pkg/sentry/fsimpl/testutil/kernel.go +++ b/pkg/sentry/fsimpl/testutil/kernel.go @@ -24,7 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/memutil" - "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" @@ -33,6 +33,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/vfs" // Platforms are plugable. _ "gvisor.dev/gvisor/pkg/sentry/platform/kvm" @@ -99,26 +100,27 @@ func Boot() (*kernel.Kernel, error) { return nil, fmt.Errorf("initializing kernel: %v", err) } - ctx := k.SupervisorContext() + kernel.VFS2Enabled = true + + vfsObj := vfs.New() + vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) + k.VFS = vfsObj - // Create mount namespace without root as it's the minimum required to create - // the global thread group. - mntns, err := fs.NewMountNamespace(ctx, nil) - if err != nil { - return nil, err - } ls, err := limits.NewLinuxLimitSet() if err != nil { return nil, err } - tg := k.NewThreadGroup(mntns, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, ls) + tg := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, ls) k.TestOnly_SetGlobalInit(tg) return k, nil } // CreateTask creates a new bare bones task for tests. -func CreateTask(ctx context.Context, name string, tc *kernel.ThreadGroup) (*kernel.Task, error) { +func CreateTask(ctx context.Context, name string, tc *kernel.ThreadGroup, mntns *vfs.MountNamespace, root, cwd vfs.VirtualDentry) (*kernel.Task, error) { k := kernel.KernelFromContext(ctx) config := &kernel.TaskConfig{ Kernel: k, @@ -129,6 +131,8 @@ func CreateTask(ctx context.Context, name string, tc *kernel.ThreadGroup) (*kern UTSNamespace: kernel.UTSNamespaceFromContext(ctx), IPCNamespace: kernel.IPCNamespaceFromContext(ctx), AbstractSocketNamespace: kernel.NewAbstractSocketNamespace(), + MountNamespaceVFS2: mntns, + FSContext: kernel.NewFSContextVFS2(root, cwd, 0022), } return k.TaskSet().NewTask(config) } diff --git a/pkg/sentry/fsimpl/testutil/testutil.go b/pkg/sentry/fsimpl/testutil/testutil.go index 69fd84ddd..b97e3534a 100644 --- a/pkg/sentry/fsimpl/testutil/testutil.go +++ b/pkg/sentry/fsimpl/testutil/testutil.go @@ -41,12 +41,12 @@ type System struct { Creds *auth.Credentials VFS *vfs.VirtualFilesystem Root vfs.VirtualDentry - mns *vfs.MountNamespace + MntNs *vfs.MountNamespace } // NewSystem constructs a System. // -// Precondition: Caller must hold a reference on mns, whose ownership +// Precondition: Caller must hold a reference on MntNs, whose ownership // is transferred to the new System. func NewSystem(ctx context.Context, t *testing.T, v *vfs.VirtualFilesystem, mns *vfs.MountNamespace) *System { s := &System{ @@ -54,7 +54,7 @@ func NewSystem(ctx context.Context, t *testing.T, v *vfs.VirtualFilesystem, mns Ctx: ctx, Creds: auth.CredentialsFromContext(ctx), VFS: v, - mns: mns, + MntNs: mns, Root: mns.Root(), } return s @@ -75,7 +75,7 @@ func (s *System) WithSubtest(t *testing.T) *System { Ctx: s.Ctx, Creds: s.Creds, VFS: s.VFS, - mns: s.mns, + MntNs: s.MntNs, Root: s.Root, } } @@ -90,7 +90,7 @@ func (s *System) WithTemporaryContext(ctx context.Context) *System { Ctx: ctx, Creds: s.Creds, VFS: s.VFS, - mns: s.mns, + MntNs: s.MntNs, Root: s.Root, } } @@ -98,7 +98,7 @@ func (s *System) WithTemporaryContext(ctx context.Context) *System { // Destroy release resources associated with a test system. func (s *System) Destroy() { s.Root.DecRef() - s.mns.DecRef() // Reference on mns passed to NewSystem. + s.MntNs.DecRef() // Reference on MntNs passed to NewSystem. } // ReadToEnd reads the contents of fd until EOF to a string. diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 8785452b6..7f7b791c4 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -486,7 +486,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa vfsObj := rp.VirtualFilesystem() oldParentDir := oldParent.inode.impl.(*directory) newParentDir := newParent.inode.impl.(*directory) - if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil { + mntns := vfs.MountNamespaceFromContext(ctx) + defer mntns.DecRef() + if err := vfsObj.PrepareRenameDentry(mntns, renamedVFSD, replacedVFSD); err != nil { return err } if replaced != nil { @@ -543,7 +545,9 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error } defer mnt.EndWrite() vfsObj := rp.VirtualFilesystem() - if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { + mntns := vfs.MountNamespaceFromContext(ctx) + defer mntns.DecRef() + if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil { return err } parent.inode.impl.(*directory).childList.Remove(child) @@ -631,7 +635,9 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error } defer mnt.EndWrite() vfsObj := rp.VirtualFilesystem() - if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { + mntns := vfs.MountNamespaceFromContext(ctx) + defer mntns.DecRef() + if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil { return err } parent.inode.impl.(*directory).childList.Remove(child) diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 2108d0f4d..c5bb17562 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -40,6 +40,9 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) +// Name is the default filesystem name. +const Name = "tmpfs" + // FilesystemType implements vfs.FilesystemType. type FilesystemType struct{} diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 2231d6973..46306945f 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -157,6 +157,7 @@ go_library( "//pkg/context", "//pkg/cpuid", "//pkg/eventchannel", + "//pkg/fspath", "//pkg/log", "//pkg/metric", "//pkg/refs", @@ -167,6 +168,7 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/lock", "//pkg/sentry/fs/timerfd", + "//pkg/sentry/fsbridge", "//pkg/sentry/hostcpu", "//pkg/sentry/inet", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/kernel/fs_context.go b/pkg/sentry/kernel/fs_context.go index 2448c1d99..7218aa24e 100644 --- a/pkg/sentry/kernel/fs_context.go +++ b/pkg/sentry/kernel/fs_context.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" ) @@ -37,10 +38,16 @@ type FSContext struct { // destroyed. root *fs.Dirent + // rootVFS2 is the filesystem root. + rootVFS2 vfs.VirtualDentry + // cwd is the current working directory. Will be nil iff the FSContext // has been destroyed. cwd *fs.Dirent + // cwdVFS2 is the current working directory. + cwdVFS2 vfs.VirtualDentry + // umask is the current file mode creation mask. When a thread using this // context invokes a syscall that creates a file, bits set in umask are // removed from the permissions that the file is created with. @@ -60,6 +67,19 @@ func newFSContext(root, cwd *fs.Dirent, umask uint) *FSContext { return &f } +// NewFSContextVFS2 returns a new filesystem context. +func NewFSContextVFS2(root, cwd vfs.VirtualDentry, umask uint) *FSContext { + root.IncRef() + cwd.IncRef() + f := FSContext{ + rootVFS2: root, + cwdVFS2: cwd, + umask: umask, + } + f.EnableLeakCheck("kernel.FSContext") + return &f +} + // destroy is the destructor for an FSContext. // // This will call DecRef on both root and cwd Dirents. If either call to @@ -75,11 +95,17 @@ func (f *FSContext) destroy() { f.mu.Lock() defer f.mu.Unlock() - f.root.DecRef() - f.root = nil - - f.cwd.DecRef() - f.cwd = nil + if VFS2Enabled { + f.rootVFS2.DecRef() + f.rootVFS2 = vfs.VirtualDentry{} + f.cwdVFS2.DecRef() + f.cwdVFS2 = vfs.VirtualDentry{} + } else { + f.root.DecRef() + f.root = nil + f.cwd.DecRef() + f.cwd = nil + } } // DecRef implements RefCounter.DecRef with destructor f.destroy. @@ -93,12 +119,21 @@ func (f *FSContext) DecRef() { func (f *FSContext) Fork() *FSContext { f.mu.Lock() defer f.mu.Unlock() - f.cwd.IncRef() - f.root.IncRef() + + if VFS2Enabled { + f.cwdVFS2.IncRef() + f.rootVFS2.IncRef() + } else { + f.cwd.IncRef() + f.root.IncRef() + } + return &FSContext{ - cwd: f.cwd, - root: f.root, - umask: f.umask, + cwd: f.cwd, + root: f.root, + cwdVFS2: f.cwdVFS2, + rootVFS2: f.rootVFS2, + umask: f.umask, } } @@ -109,12 +144,23 @@ func (f *FSContext) Fork() *FSContext { func (f *FSContext) WorkingDirectory() *fs.Dirent { f.mu.Lock() defer f.mu.Unlock() - if f.cwd != nil { - f.cwd.IncRef() - } + + f.cwd.IncRef() return f.cwd } +// WorkingDirectoryVFS2 returns the current working directory. +// +// This will return nil if called after destroy(), otherwise it will return a +// Dirent with a reference taken. +func (f *FSContext) WorkingDirectoryVFS2() vfs.VirtualDentry { + f.mu.Lock() + defer f.mu.Unlock() + + f.cwdVFS2.IncRef() + return f.cwdVFS2 +} + // SetWorkingDirectory sets the current working directory. // This will take an extra reference on the Dirent. // @@ -137,6 +183,20 @@ func (f *FSContext) SetWorkingDirectory(d *fs.Dirent) { old.DecRef() } +// SetWorkingDirectoryVFS2 sets the current working directory. +// This will take an extra reference on the VirtualDentry. +// +// This is not a valid call after destroy. +func (f *FSContext) SetWorkingDirectoryVFS2(d vfs.VirtualDentry) { + f.mu.Lock() + defer f.mu.Unlock() + + old := f.cwdVFS2 + f.cwdVFS2 = d + d.IncRef() + old.DecRef() +} + // RootDirectory returns the current filesystem root. // // This will return nil if called after destroy(), otherwise it will return a @@ -150,6 +210,18 @@ func (f *FSContext) RootDirectory() *fs.Dirent { return f.root } +// RootDirectoryVFS2 returns the current filesystem root. +// +// This will return nil if called after destroy(), otherwise it will return a +// Dirent with a reference taken. +func (f *FSContext) RootDirectoryVFS2() vfs.VirtualDentry { + f.mu.Lock() + defer f.mu.Unlock() + + f.rootVFS2.IncRef() + return f.rootVFS2 +} + // SetRootDirectory sets the root directory. // This will take an extra reference on the Dirent. // diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 3ee760ba2..2665f057c 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -43,11 +43,13 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/timerfd" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/hostcpu" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -71,6 +73,10 @@ import ( "gvisor.dev/gvisor/pkg/tcpip" ) +// VFS2Enabled is set to true when VFS2 is enabled. Added as a global for allow +// easy access everywhere. To be removed once VFS2 becomes the default. +var VFS2Enabled = false + // Kernel represents an emulated Linux kernel. It must be initialized by calling // Init() or LoadFrom(). // @@ -238,6 +244,9 @@ type Kernel struct { // SpecialOpts contains special kernel options. SpecialOpts + + // VFS keeps the filesystem state used across the kernel. + VFS *vfs.VirtualFilesystem } // InitKernelArgs holds arguments to Init. @@ -624,7 +633,7 @@ type CreateProcessArgs struct { // File is a passed host FD pointing to a file to load as the init binary. // // This is checked if and only if Filename is "". - File *fs.File + File fsbridge.File // Argvv is a list of arguments. Argv []string @@ -673,6 +682,13 @@ type CreateProcessArgs struct { // increment it). MountNamespace *fs.MountNamespace + // MountNamespaceVFS2 optionally contains the mount namespace for this + // process. If nil, the init process's mount namespace is used. + // + // Anyone setting MountNamespaceVFS2 must donate a reference (i.e. + // increment it). + MountNamespaceVFS2 *vfs.MountNamespace + // ContainerID is the container that the process belongs to. ContainerID string } @@ -711,11 +727,22 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { return ctx.args.Credentials case fs.CtxRoot: if ctx.args.MountNamespace != nil { - // MountNamespace.Root() will take a reference on the root - // dirent for us. + // MountNamespace.Root() will take a reference on the root dirent for us. return ctx.args.MountNamespace.Root() } return nil + case vfs.CtxRoot: + if ctx.args.MountNamespaceVFS2 == nil { + return nil + } + // MountNamespaceVFS2.Root() takes a reference on the root dirent for us. + return ctx.args.MountNamespaceVFS2.Root() + case vfs.CtxMountNamespace: + if ctx.k.globalInit == nil { + return nil + } + // MountNamespaceVFS2 takes a reference for us. + return ctx.k.GlobalInit().Leader().MountNamespaceVFS2() case fs.CtxDirentCacheLimiter: return ctx.k.DirentCacheLimiter case ktime.CtxRealtimeClock: @@ -757,34 +784,77 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, defer k.extMu.Unlock() log.Infof("EXEC: %v", args.Argv) - // Grab the mount namespace. - mounts := args.MountNamespace - if mounts == nil { - mounts = k.GlobalInit().Leader().MountNamespace() - mounts.IncRef() - } - - tg := k.NewThreadGroup(mounts, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits) ctx := args.NewContext(k) - // Get the root directory from the MountNamespace. - root := mounts.Root() - // The call to newFSContext below will take a reference on root, so we - // don't need to hold this one. - defer root.DecRef() - - // Grab the working directory. - remainingTraversals := uint(args.MaxSymlinkTraversals) - wd := root // Default. - if args.WorkingDirectory != "" { - var err error - wd, err = mounts.FindInode(ctx, root, nil, args.WorkingDirectory, &remainingTraversals) - if err != nil { - return nil, 0, fmt.Errorf("failed to find initial working directory %q: %v", args.WorkingDirectory, err) + var ( + opener fsbridge.Lookup + fsContext *FSContext + mntns *fs.MountNamespace + ) + + if VFS2Enabled { + mntnsVFS2 := args.MountNamespaceVFS2 + if mntnsVFS2 == nil { + // MountNamespaceVFS2 adds a reference to the namespace, which is + // transferred to the new process. + mntnsVFS2 = k.GlobalInit().Leader().MountNamespaceVFS2() + } + // Get the root directory from the MountNamespace. + root := args.MountNamespaceVFS2.Root() + // The call to newFSContext below will take a reference on root, so we + // don't need to hold this one. + defer root.DecRef() + + // Grab the working directory. + wd := root // Default. + if args.WorkingDirectory != "" { + pop := vfs.PathOperation{ + Root: root, + Start: wd, + Path: fspath.Parse(args.WorkingDirectory), + FollowFinalSymlink: true, + } + var err error + wd, err = k.VFS.GetDentryAt(ctx, args.Credentials, &pop, &vfs.GetDentryOptions{ + CheckSearchable: true, + }) + if err != nil { + return nil, 0, fmt.Errorf("failed to find initial working directory %q: %v", args.WorkingDirectory, err) + } + defer wd.DecRef() + } + opener = fsbridge.NewVFSLookup(mntnsVFS2, root, wd) + fsContext = NewFSContextVFS2(root, wd, args.Umask) + + } else { + mntns = args.MountNamespace + if mntns == nil { + mntns = k.GlobalInit().Leader().MountNamespace() + mntns.IncRef() } - defer wd.DecRef() + // Get the root directory from the MountNamespace. + root := mntns.Root() + // The call to newFSContext below will take a reference on root, so we + // don't need to hold this one. + defer root.DecRef() + + // Grab the working directory. + remainingTraversals := args.MaxSymlinkTraversals + wd := root // Default. + if args.WorkingDirectory != "" { + var err error + wd, err = mntns.FindInode(ctx, root, nil, args.WorkingDirectory, &remainingTraversals) + if err != nil { + return nil, 0, fmt.Errorf("failed to find initial working directory %q: %v", args.WorkingDirectory, err) + } + defer wd.DecRef() + } + opener = fsbridge.NewFSLookup(mntns, root, wd) + fsContext = newFSContext(root, wd, args.Umask) } + tg := k.NewThreadGroup(mntns, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits) + // Check which file to start from. switch { case args.Filename != "": @@ -805,11 +875,9 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, } // Create a fresh task context. - remainingTraversals = uint(args.MaxSymlinkTraversals) + remainingTraversals := args.MaxSymlinkTraversals loadArgs := loader.LoadArgs{ - Mounts: mounts, - Root: root, - WorkingDirectory: wd, + Opener: opener, RemainingTraversals: &remainingTraversals, ResolveFinal: true, Filename: args.Filename, @@ -834,13 +902,14 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, Kernel: k, ThreadGroup: tg, TaskContext: tc, - FSContext: newFSContext(root, wd, args.Umask), + FSContext: fsContext, FDTable: args.FDTable, Credentials: args.Credentials, AllowedCPUMask: sched.NewFullCPUSet(k.applicationCores), UTSNamespace: args.UTSNamespace, IPCNamespace: args.IPCNamespace, AbstractSocketNamespace: args.AbstractSocketNamespace, + MountNamespaceVFS2: args.MountNamespaceVFS2, ContainerID: args.ContainerID, } t, err := k.tasks.NewTask(config) @@ -1378,6 +1447,20 @@ func (ctx supervisorContext) Value(key interface{}) interface{} { return ctx.k.globalInit.mounts.Root() } return nil + case vfs.CtxRoot: + if ctx.k.globalInit == nil { + return vfs.VirtualDentry{} + } + mntns := ctx.k.GlobalInit().Leader().MountNamespaceVFS2() + defer mntns.DecRef() + // Root() takes a reference on the root dirent for us. + return mntns.Root() + case vfs.CtxMountNamespace: + if ctx.k.globalInit == nil { + return nil + } + // MountNamespaceVFS2() takes a reference for us. + return ctx.k.GlobalInit().Leader().MountNamespaceVFS2() case fs.CtxDirentCacheLimiter: return ctx.k.DirentCacheLimiter case ktime.CtxRealtimeClock: diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 981e8c7fe..a3443ff21 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -424,6 +424,11 @@ type Task struct { // abstractSockets is protected by mu. abstractSockets *AbstractSocketNamespace + // mountNamespaceVFS2 is the task's mount namespace. + // + // It is protected by mu. It is owned by the task goroutine. + mountNamespaceVFS2 *vfs.MountNamespace + // parentDeathSignal is sent to this task's thread group when its parent exits. // // parentDeathSignal is protected by mu. @@ -638,6 +643,11 @@ func (t *Task) Value(key interface{}) interface{} { return int32(t.ThreadGroup().ID()) case fs.CtxRoot: return t.fsContext.RootDirectory() + case vfs.CtxRoot: + return t.fsContext.RootDirectoryVFS2() + case vfs.CtxMountNamespace: + t.mountNamespaceVFS2.IncRef() + return t.mountNamespaceVFS2 case fs.CtxDirentCacheLimiter: return t.k.DirentCacheLimiter case inet.CtxStack: @@ -701,6 +711,14 @@ func (t *Task) SyscallRestartBlock() SyscallRestartBlock { // Preconditions: The caller must be running on the task goroutine, or t.mu // must be locked. func (t *Task) IsChrooted() bool { + if VFS2Enabled { + realRoot := t.mountNamespaceVFS2.Root() + defer realRoot.DecRef() + root := t.fsContext.RootDirectoryVFS2() + defer root.DecRef() + return root != realRoot + } + realRoot := t.tg.mounts.Root() defer realRoot.DecRef() root := t.fsContext.RootDirectory() @@ -796,6 +814,15 @@ func (t *Task) MountNamespace() *fs.MountNamespace { return t.tg.mounts } +// MountNamespaceVFS2 returns t's MountNamespace. A reference is taken on the +// returned mount namespace. +func (t *Task) MountNamespaceVFS2() *vfs.MountNamespace { + t.mu.Lock() + defer t.mu.Unlock() + t.mountNamespaceVFS2.IncRef() + return t.mountNamespaceVFS2 +} + // AbstractSockets returns t's AbstractSocketNamespace. func (t *Task) AbstractSockets() *AbstractSocketNamespace { return t.abstractSockets diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index 53d4d211b..ba74b4c1c 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -199,6 +199,12 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { ipcns = NewIPCNamespace(userns) } + // TODO(b/63601033): Implement CLONE_NEWNS. + mntnsVFS2 := t.mountNamespaceVFS2 + if mntnsVFS2 != nil { + mntnsVFS2.IncRef() + } + tc, err := t.tc.Fork(t, t.k, !opts.NewAddressSpace) if err != nil { return 0, nil, err @@ -241,7 +247,9 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { rseqAddr := usermem.Addr(0) rseqSignature := uint32(0) if opts.NewThreadGroup { - tg.mounts.IncRef() + if tg.mounts != nil { + tg.mounts.IncRef() + } sh := t.tg.signalHandlers if opts.NewSignalHandlers { sh = sh.Fork() @@ -265,6 +273,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { UTSNamespace: utsns, IPCNamespace: ipcns, AbstractSocketNamespace: t.abstractSockets, + MountNamespaceVFS2: mntnsVFS2, RSeqAddr: rseqAddr, RSeqSignature: rseqSignature, ContainerID: t.ContainerID(), diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go index 2d6e7733c..2be982684 100644 --- a/pkg/sentry/kernel/task_context.go +++ b/pkg/sentry/kernel/task_context.go @@ -136,7 +136,7 @@ func (t *Task) Stack() *arch.Stack { func (k *Kernel) LoadTaskImage(ctx context.Context, args loader.LoadArgs) (*TaskContext, *syserr.Error) { // If File is not nil, we should load that instead of resolving Filename. if args.File != nil { - args.Filename = args.File.MappedName(ctx) + args.Filename = args.File.PathnameWithDeleted(ctx) } // Prepare a new user address space to load into. diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index 435761e5a..c4ade6e8e 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -269,6 +269,13 @@ func (*runExitMain) execute(t *Task) taskRunState { t.fsContext.DecRef() t.fdTable.DecRef() + t.mu.Lock() + if t.mountNamespaceVFS2 != nil { + t.mountNamespaceVFS2.DecRef() + t.mountNamespaceVFS2 = nil + } + t.mu.Unlock() + // If this is the last task to exit from the thread group, release the // thread group's resources. if lastExiter { diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go index 41259210c..6d737d3e5 100644 --- a/pkg/sentry/kernel/task_log.go +++ b/pkg/sentry/kernel/task_log.go @@ -198,18 +198,11 @@ func (t *Task) traceExecEvent(tc *TaskContext) { if !trace.IsEnabled() { return } - d := tc.MemoryManager.Executable() - if d == nil { + file := tc.MemoryManager.Executable() + if file == nil { trace.Logf(t.traceContext, traceCategory, "exec: << unknown >>") return } - defer d.DecRef() - root := t.fsContext.RootDirectory() - if root == nil { - trace.Logf(t.traceContext, traceCategory, "exec: << no root directory >>") - return - } - defer root.DecRef() - n, _ := d.FullName(root) - trace.Logf(t.traceContext, traceCategory, "exec: %s", n) + defer file.DecRef() + trace.Logf(t.traceContext, traceCategory, "exec: %s", file.PathnameWithDeleted(t)) } diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index de838beef..f9236a842 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -80,6 +81,9 @@ type TaskConfig struct { // AbstractSocketNamespace is the AbstractSocketNamespace of the new task. AbstractSocketNamespace *AbstractSocketNamespace + // MountNamespaceVFS2 is the MountNamespace of the new task. + MountNamespaceVFS2 *vfs.MountNamespace + // RSeqAddr is a pointer to the the userspace linux.RSeq structure. RSeqAddr usermem.Addr @@ -116,28 +120,29 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { parent: cfg.Parent, children: make(map[*Task]struct{}), }, - runState: (*runApp)(nil), - interruptChan: make(chan struct{}, 1), - signalMask: cfg.SignalMask, - signalStack: arch.SignalStack{Flags: arch.SignalStackFlagDisable}, - tc: *tc, - fsContext: cfg.FSContext, - fdTable: cfg.FDTable, - p: cfg.Kernel.Platform.NewContext(), - k: cfg.Kernel, - ptraceTracees: make(map[*Task]struct{}), - allowedCPUMask: cfg.AllowedCPUMask.Copy(), - ioUsage: &usage.IO{}, - niceness: cfg.Niceness, - netns: cfg.NetworkNamespaced, - utsns: cfg.UTSNamespace, - ipcns: cfg.IPCNamespace, - abstractSockets: cfg.AbstractSocketNamespace, - rseqCPU: -1, - rseqAddr: cfg.RSeqAddr, - rseqSignature: cfg.RSeqSignature, - futexWaiter: futex.NewWaiter(), - containerID: cfg.ContainerID, + runState: (*runApp)(nil), + interruptChan: make(chan struct{}, 1), + signalMask: cfg.SignalMask, + signalStack: arch.SignalStack{Flags: arch.SignalStackFlagDisable}, + tc: *tc, + fsContext: cfg.FSContext, + fdTable: cfg.FDTable, + p: cfg.Kernel.Platform.NewContext(), + k: cfg.Kernel, + ptraceTracees: make(map[*Task]struct{}), + allowedCPUMask: cfg.AllowedCPUMask.Copy(), + ioUsage: &usage.IO{}, + niceness: cfg.Niceness, + netns: cfg.NetworkNamespaced, + utsns: cfg.UTSNamespace, + ipcns: cfg.IPCNamespace, + abstractSockets: cfg.AbstractSocketNamespace, + mountNamespaceVFS2: cfg.MountNamespaceVFS2, + rseqCPU: -1, + rseqAddr: cfg.RSeqAddr, + rseqSignature: cfg.RSeqSignature, + futexWaiter: futex.NewWaiter(), + containerID: cfg.ContainerID, } t.creds.Store(cfg.Credentials) t.endStopCond.L = &t.tg.signalHandlers.mu diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 768e958d2..268f62e9d 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -256,7 +256,7 @@ type ThreadGroup struct { tty *TTY } -// NewThreadGroup returns a new, empty thread group in PID namespace ns. The +// NewThreadGroup returns a new, empty thread group in PID namespace pidns. The // thread group leader will send its parent terminationSignal when it exits. // The new thread group isn't visible to the system until a task has been // created inside of it by a successful call to TaskSet.NewTask. @@ -317,7 +317,9 @@ func (tg *ThreadGroup) release() { for _, it := range its { it.DestroyTimer() } - tg.mounts.DecRef() + if tg.mounts != nil { + tg.mounts.DecRef() + } } // forEachChildThreadGroupLocked indicates over all child ThreadGroups. diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD index 23790378a..c6aa65f28 100644 --- a/pkg/sentry/loader/BUILD +++ b/pkg/sentry/loader/BUILD @@ -33,6 +33,7 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/anon", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fsbridge", "//pkg/sentry/kernel/auth", "//pkg/sentry/limits", "//pkg/sentry/memmap", @@ -40,6 +41,7 @@ go_library( "//pkg/sentry/pgalloc", "//pkg/sentry/uniqueid", "//pkg/sentry/usage", + "//pkg/sentry/vfs", "//pkg/syserr", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go index 122ed05c2..616fafa2c 100644 --- a/pkg/sentry/loader/elf.go +++ b/pkg/sentry/loader/elf.go @@ -27,7 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" @@ -97,11 +97,11 @@ type elfInfo struct { // accepts from the ELF, and it doesn't parse unnecessary parts of the file. // // ctx may be nil if f does not need it. -func parseHeader(ctx context.Context, f *fs.File) (elfInfo, error) { +func parseHeader(ctx context.Context, f fsbridge.File) (elfInfo, error) { // Check ident first; it will tell us the endianness of the rest of the // structs. var ident [elf.EI_NIDENT]byte - _, err := readFull(ctx, f, usermem.BytesIOSequence(ident[:]), 0) + _, err := f.ReadFull(ctx, usermem.BytesIOSequence(ident[:]), 0) if err != nil { log.Infof("Error reading ELF ident: %v", err) // The entire ident array always exists. @@ -137,7 +137,7 @@ func parseHeader(ctx context.Context, f *fs.File) (elfInfo, error) { var hdr elf.Header64 hdrBuf := make([]byte, header64Size) - _, err = readFull(ctx, f, usermem.BytesIOSequence(hdrBuf), 0) + _, err = f.ReadFull(ctx, usermem.BytesIOSequence(hdrBuf), 0) if err != nil { log.Infof("Error reading ELF header: %v", err) // The entire header always exists. @@ -187,7 +187,7 @@ func parseHeader(ctx context.Context, f *fs.File) (elfInfo, error) { } phdrBuf := make([]byte, totalPhdrSize) - _, err = readFull(ctx, f, usermem.BytesIOSequence(phdrBuf), int64(hdr.Phoff)) + _, err = f.ReadFull(ctx, usermem.BytesIOSequence(phdrBuf), int64(hdr.Phoff)) if err != nil { log.Infof("Error reading ELF phdrs: %v", err) // If phdrs were specified, they should all exist. @@ -227,7 +227,7 @@ func parseHeader(ctx context.Context, f *fs.File) (elfInfo, error) { // mapSegment maps a phdr into the Task. offset is the offset to apply to // phdr.Vaddr. -func mapSegment(ctx context.Context, m *mm.MemoryManager, f *fs.File, phdr *elf.ProgHeader, offset usermem.Addr) error { +func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr *elf.ProgHeader, offset usermem.Addr) error { // We must make a page-aligned mapping. adjust := usermem.Addr(phdr.Vaddr).PageOffset() @@ -395,7 +395,7 @@ type loadedELF struct { // // Preconditions: // * f is an ELF file -func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f *fs.File, info elfInfo, sharedLoadOffset usermem.Addr) (loadedELF, error) { +func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, info elfInfo, sharedLoadOffset usermem.Addr) (loadedELF, error) { first := true var start, end usermem.Addr var interpreter string @@ -431,7 +431,7 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f *fs.File, info el } path := make([]byte, phdr.Filesz) - _, err := readFull(ctx, f, usermem.BytesIOSequence(path), int64(phdr.Off)) + _, err := f.ReadFull(ctx, usermem.BytesIOSequence(path), int64(phdr.Off)) if err != nil { // If an interpreter was specified, it should exist. ctx.Infof("Error reading PT_INTERP path: %v", err) @@ -564,7 +564,7 @@ func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f *fs.File, info el // Preconditions: // * f is an ELF file // * f is the first ELF loaded into m -func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureSet, f *fs.File) (loadedELF, arch.Context, error) { +func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureSet, f fsbridge.File) (loadedELF, arch.Context, error) { info, err := parseHeader(ctx, f) if err != nil { ctx.Infof("Failed to parse initial ELF: %v", err) @@ -602,7 +602,7 @@ func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureS // // Preconditions: // * f is an ELF file -func loadInterpreterELF(ctx context.Context, m *mm.MemoryManager, f *fs.File, initial loadedELF) (loadedELF, error) { +func loadInterpreterELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, initial loadedELF) (loadedELF, error) { info, err := parseHeader(ctx, f) if err != nil { if err == syserror.ENOEXEC { @@ -649,16 +649,14 @@ func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error // Refresh the traversal limit. *args.RemainingTraversals = linux.MaxSymlinkTraversals args.Filename = bin.interpreter - d, i, err := openPath(ctx, args) + intFile, err := openPath(ctx, args) if err != nil { ctx.Infof("Error opening interpreter %s: %v", bin.interpreter, err) return loadedELF{}, nil, err } - defer i.DecRef() - // We don't need the Dirent. - d.DecRef() + defer intFile.DecRef() - interp, err = loadInterpreterELF(ctx, args.MemoryManager, i, bin) + interp, err = loadInterpreterELF(ctx, args.MemoryManager, intFile, bin) if err != nil { ctx.Infof("Error loading interpreter: %v", err) return loadedELF{}, nil, err diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go index 098a45d36..3886b4d33 100644 --- a/pkg/sentry/loader/interpreter.go +++ b/pkg/sentry/loader/interpreter.go @@ -19,7 +19,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -37,9 +37,9 @@ const ( ) // parseInterpreterScript returns the interpreter path and argv. -func parseInterpreterScript(ctx context.Context, filename string, f *fs.File, argv []string) (newpath string, newargv []string, err error) { +func parseInterpreterScript(ctx context.Context, filename string, f fsbridge.File, argv []string) (newpath string, newargv []string, err error) { line := make([]byte, interpMaxLineLength) - n, err := readFull(ctx, f, usermem.BytesIOSequence(line), 0) + n, err := f.ReadFull(ctx, usermem.BytesIOSequence(line), 0) // Short read is OK. if err != nil && err != io.ErrUnexpectedEOF { if err == io.EOF { diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go index 9a613d6b7..d6675b8f0 100644 --- a/pkg/sentry/loader/loader.go +++ b/pkg/sentry/loader/loader.go @@ -20,7 +20,6 @@ import ( "fmt" "io" "path" - "strings" "gvisor.dev/gvisor/pkg/abi" "gvisor.dev/gvisor/pkg/abi/linux" @@ -29,8 +28,10 @@ import ( "gvisor.dev/gvisor/pkg/rand" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -41,16 +42,6 @@ type LoadArgs struct { // MemoryManager is the memory manager to load the executable into. MemoryManager *mm.MemoryManager - // Mounts is the mount namespace in which to look up Filename. - Mounts *fs.MountNamespace - - // Root is the root directory under which to look up Filename. - Root *fs.Dirent - - // WorkingDirectory is the working directory under which to look up - // Filename. - WorkingDirectory *fs.Dirent - // RemainingTraversals is the maximum number of symlinks to follow to // resolve Filename. This counter is passed by reference to keep it // updated throughout the call stack. @@ -65,7 +56,12 @@ type LoadArgs struct { // File is an open fs.File object of the executable. If File is not // nil, then File will be loaded and Filename will be ignored. - File *fs.File + // + // The caller is responsible for checking that the user can execute this file. + File fsbridge.File + + // Opener is used to open the executable file when 'File' is nil. + Opener fsbridge.Lookup // CloseOnExec indicates that the executable (or one of its parent // directories) was opened with O_CLOEXEC. If the executable is an @@ -106,103 +102,32 @@ func readFull(ctx context.Context, f *fs.File, dst usermem.IOSequence, offset in // installed in the Task FDTable. The caller takes ownership of both. // // args.Filename must be a readable, executable, regular file. -func openPath(ctx context.Context, args LoadArgs) (*fs.Dirent, *fs.File, error) { +func openPath(ctx context.Context, args LoadArgs) (fsbridge.File, error) { if args.Filename == "" { ctx.Infof("cannot open empty name") - return nil, nil, syserror.ENOENT - } - - var d *fs.Dirent - var err error - if args.ResolveFinal { - d, err = args.Mounts.FindInode(ctx, args.Root, args.WorkingDirectory, args.Filename, args.RemainingTraversals) - } else { - d, err = args.Mounts.FindLink(ctx, args.Root, args.WorkingDirectory, args.Filename, args.RemainingTraversals) - } - if err != nil { - return nil, nil, err - } - // Defer a DecRef for the sake of failure cases. - defer d.DecRef() - - if !args.ResolveFinal && fs.IsSymlink(d.Inode.StableAttr) { - return nil, nil, syserror.ELOOP - } - - if err := checkPermission(ctx, d); err != nil { - return nil, nil, err - } - - // If they claim it's a directory, then make sure. - // - // N.B. we reject directories below, but we must first reject - // non-directories passed as directories. - if strings.HasSuffix(args.Filename, "/") && !fs.IsDir(d.Inode.StableAttr) { - return nil, nil, syserror.ENOTDIR - } - - if err := checkIsRegularFile(ctx, d, args.Filename); err != nil { - return nil, nil, err - } - - f, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) - if err != nil { - return nil, nil, err - } - // Defer a DecRef for the sake of failure cases. - defer f.DecRef() - - if err := checkPread(ctx, f, args.Filename); err != nil { - return nil, nil, err - } - - d.IncRef() - f.IncRef() - return d, f, err -} - -// checkFile performs checks on a file to be executed. -func checkFile(ctx context.Context, f *fs.File, filename string) error { - if err := checkPermission(ctx, f.Dirent); err != nil { - return err - } - - if err := checkIsRegularFile(ctx, f.Dirent, filename); err != nil { - return err + return nil, syserror.ENOENT } - return checkPread(ctx, f, filename) -} - -// checkPermission checks whether the file is readable and executable. -func checkPermission(ctx context.Context, d *fs.Dirent) error { - perms := fs.PermMask{ - // TODO(gvisor.dev/issue/160): Linux requires only execute - // permission, not read. However, our backing filesystems may - // prevent us from reading the file without read permission. - // - // Additionally, a task with a non-readable executable has - // additional constraints on access via ptrace and procfs. - Read: true, - Execute: true, + // TODO(gvisor.dev/issue/160): Linux requires only execute permission, + // not read. However, our backing filesystems may prevent us from reading + // the file without read permission. Additionally, a task with a + // non-readable executable has additional constraints on access via + // ptrace and procfs. + opts := vfs.OpenOptions{ + Flags: linux.O_RDONLY, + FileExec: true, } - return d.Inode.CheckPermission(ctx, perms) + return args.Opener.OpenPath(ctx, args.Filename, opts, args.RemainingTraversals, args.ResolveFinal) } // checkIsRegularFile prevents us from trying to execute a directory, pipe, etc. -func checkIsRegularFile(ctx context.Context, d *fs.Dirent, filename string) error { - attr := d.Inode.StableAttr - if !fs.IsRegular(attr) { - ctx.Infof("%s is not regular: %v", filename, attr) - return syserror.EACCES +func checkIsRegularFile(ctx context.Context, file fsbridge.File, filename string) error { + t, err := file.Type(ctx) + if err != nil { + return err } - return nil -} - -// checkPread checks whether we can read the file at arbitrary offsets. -func checkPread(ctx context.Context, f *fs.File, filename string) error { - if !f.Flags().Pread { - ctx.Infof("%s cannot be read at an offset: %+v", filename, f.Flags()) + if t != linux.ModeRegular { + ctx.Infof("%q is not a regular file: %v", filename, t) return syserror.EACCES } return nil @@ -224,8 +149,10 @@ const ( maxLoaderAttempts = 6 ) -// loadExecutable loads an executable that is pointed to by args.File. If nil, -// the path args.Filename is resolved and loaded. If the executable is an +// loadExecutable loads an executable that is pointed to by args.File. The +// caller is responsible for checking that the user can execute this file. +// If nil, the path args.Filename is resolved and loaded (check that the user +// can execute this file is done here in this case). If the executable is an // interpreter script rather than an ELF, the binary of the corresponding // interpreter will be loaded. // @@ -234,37 +161,27 @@ const ( // * arch.Context matching the binary arch // * fs.Dirent of the binary file // * Possibly updated args.Argv -func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, *fs.Dirent, []string, error) { +func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, fsbridge.File, []string, error) { for i := 0; i < maxLoaderAttempts; i++ { - var ( - d *fs.Dirent - err error - ) if args.File == nil { - d, args.File, err = openPath(ctx, args) - // We will return d in the successful case, but defer a DecRef for the - // sake of intermediate loops and failure cases. - if d != nil { - defer d.DecRef() - } - if args.File != nil { - defer args.File.DecRef() + var err error + args.File, err = openPath(ctx, args) + if err != nil { + ctx.Infof("Error opening %s: %v", args.Filename, err) + return loadedELF{}, nil, nil, nil, err } + // Ensure file is release in case the code loops or errors out. + defer args.File.DecRef() } else { - d = args.File.Dirent - d.IncRef() - defer d.DecRef() - err = checkFile(ctx, args.File, args.Filename) - } - if err != nil { - ctx.Infof("Error opening %s: %v", args.Filename, err) - return loadedELF{}, nil, nil, nil, err + if err := checkIsRegularFile(ctx, args.File, args.Filename); err != nil { + return loadedELF{}, nil, nil, nil, err + } } // Check the header. Is this an ELF or interpreter script? var hdr [4]uint8 // N.B. We assume that reading from a regular file cannot block. - _, err = readFull(ctx, args.File, usermem.BytesIOSequence(hdr[:]), 0) + _, err := args.File.ReadFull(ctx, usermem.BytesIOSequence(hdr[:]), 0) // Allow unexpected EOF, as a valid executable could be only three bytes // (e.g., #!a). if err != nil && err != io.ErrUnexpectedEOF { @@ -281,9 +198,10 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context ctx.Infof("Error loading ELF: %v", err) return loadedELF{}, nil, nil, nil, err } - // An ELF is always terminal. Hold on to d. - d.IncRef() - return loaded, ac, d, args.Argv, err + // An ELF is always terminal. Hold on to file. + args.File.IncRef() + return loaded, ac, args.File, args.Argv, err + case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)): if args.CloseOnExec { return loadedELF{}, nil, nil, nil, syserror.ENOENT @@ -295,6 +213,7 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context } // Refresh the traversal limit for the interpreter. *args.RemainingTraversals = linux.MaxSymlinkTraversals + default: ctx.Infof("Unknown magic: %v", hdr) return loadedELF{}, nil, nil, nil, syserror.ENOEXEC @@ -317,11 +236,11 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context // * Load is called on the Task goroutine. func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *VDSO) (abi.OS, arch.Context, string, *syserr.Error) { // Load the executable itself. - loaded, ac, d, newArgv, err := loadExecutable(ctx, args) + loaded, ac, file, newArgv, err := loadExecutable(ctx, args) if err != nil { return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux()) } - defer d.DecRef() + defer file.DecRef() // Load the VDSO. vdsoAddr, err := loadVDSO(ctx, args.MemoryManager, vdso, loaded) @@ -390,7 +309,7 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V m.SetEnvvStart(sl.EnvvStart) m.SetEnvvEnd(sl.EnvvEnd) m.SetAuxv(auxv) - m.SetExecutable(d) + m.SetExecutable(file) ac.SetIP(uintptr(loaded.entry)) ac.SetStack(uintptr(stack.Bottom)) diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go index 52f446ed7..161b28c2c 100644 --- a/pkg/sentry/loader/vdso.go +++ b/pkg/sentry/loader/vdso.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/anon" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -69,6 +70,8 @@ type byteReader struct { var _ fs.FileOperations = (*byteReader)(nil) // newByteReaderFile creates a fake file to read data from. +// +// TODO(gvisor.dev/issue/1623): Convert to VFS2. func newByteReaderFile(ctx context.Context, data []byte) *fs.File { // Create a fake inode. inode := fs.NewInode( @@ -123,7 +126,7 @@ func (b *byteReader) Write(ctx context.Context, file *fs.File, src usermem.IOSeq // * PT_LOAD segments don't extend beyond the end of the file. // // ctx may be nil if f does not need it. -func validateVDSO(ctx context.Context, f *fs.File, size uint64) (elfInfo, error) { +func validateVDSO(ctx context.Context, f fsbridge.File, size uint64) (elfInfo, error) { info, err := parseHeader(ctx, f) if err != nil { log.Infof("Unable to parse VDSO header: %v", err) @@ -221,7 +224,7 @@ type VDSO struct { // PrepareVDSO validates the system VDSO and returns a VDSO, containing the // param page for updating by the kernel. func PrepareVDSO(ctx context.Context, mfp pgalloc.MemoryFileProvider) (*VDSO, error) { - vdsoFile := newByteReaderFile(ctx, vdsoBin) + vdsoFile := fsbridge.NewFSFile(newByteReaderFile(ctx, vdsoBin)) // First make sure the VDSO is valid. vdsoFile does not use ctx, so a // nil context can be passed. diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index e5729ced5..73591dab7 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -105,8 +105,8 @@ go_library( "//pkg/safecopy", "//pkg/safemem", "//pkg/sentry/arch", - "//pkg/sentry/fs", "//pkg/sentry/fs/proc/seqfile", + "//pkg/sentry/fsbridge", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/futex", "//pkg/sentry/kernel/shm", diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go index f550acae0..6a49334f4 100644 --- a/pkg/sentry/mm/metadata.go +++ b/pkg/sentry/mm/metadata.go @@ -16,7 +16,7 @@ package mm import ( "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/usermem" ) @@ -132,7 +132,7 @@ func (mm *MemoryManager) SetAuxv(auxv arch.Auxv) { // // An additional reference will be taken in the case of a non-nil executable, // which must be released by the caller. -func (mm *MemoryManager) Executable() *fs.Dirent { +func (mm *MemoryManager) Executable() fsbridge.File { mm.metadataMu.Lock() defer mm.metadataMu.Unlock() @@ -147,15 +147,15 @@ func (mm *MemoryManager) Executable() *fs.Dirent { // SetExecutable sets the executable. // // This takes a reference on d. -func (mm *MemoryManager) SetExecutable(d *fs.Dirent) { +func (mm *MemoryManager) SetExecutable(file fsbridge.File) { mm.metadataMu.Lock() // Grab a new reference. - d.IncRef() + file.IncRef() // Set the executable. orig := mm.executable - mm.executable = d + mm.executable = file mm.metadataMu.Unlock() diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index 09e582dd3..637383c7a 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -37,7 +37,7 @@ package mm import ( "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" @@ -215,7 +215,7 @@ type MemoryManager struct { // is not nil, it holds a reference on the Dirent. // // executable is protected by metadataMu. - executable *fs.Dirent + executable fsbridge.File // dumpability describes if and how this MemoryManager may be dumped to // userspace. diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go index a796b2396..46cb2a1cc 100644 --- a/pkg/sentry/strace/strace.go +++ b/pkg/sentry/strace/strace.go @@ -141,6 +141,10 @@ func path(t *kernel.Task, addr usermem.Addr) string { } func fd(t *kernel.Task, fd int32) string { + if kernel.VFS2Enabled { + return fdVFS2(t, fd) + } + root := t.FSContext().RootDirectory() if root != nil { defer root.DecRef() @@ -169,6 +173,30 @@ func fd(t *kernel.Task, fd int32) string { return fmt.Sprintf("%#x %s", fd, name) } +func fdVFS2(t *kernel.Task, fd int32) string { + root := t.FSContext().RootDirectoryVFS2() + defer root.DecRef() + + vfsObj := root.Mount().Filesystem().VirtualFilesystem() + if fd == linux.AT_FDCWD { + wd := t.FSContext().WorkingDirectoryVFS2() + defer wd.DecRef() + + name, _ := vfsObj.PathnameWithDeleted(t, root, wd) + return fmt.Sprintf("AT_FDCWD %s", name) + } + + file := t.GetFileVFS2(fd) + if file == nil { + // Cast FD to uint64 to avoid printing negative hex. + return fmt.Sprintf("%#x (bad FD)", uint64(fd)) + } + defer file.DecRef() + + name, _ := vfsObj.PathnameWithDeleted(t, root, file.VirtualDentry()) + return fmt.Sprintf("%#x %s", fd, name) +} + func fdpair(t *kernel.Task, addr usermem.Addr) string { var fds [2]int32 _, err := t.CopyIn(addr, &fds) diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index be16ee686..0d24fd3c4 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -74,6 +74,7 @@ go_library( "//pkg/sentry/fs/lock", "//pkg/sentry/fs/timerfd", "//pkg/sentry/fs/tmpfs", + "//pkg/sentry/fsbridge", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/epoll", diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index 98db32d77..9c6728530 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" @@ -135,7 +136,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } // Set the underlying executable. - t.MemoryManager().SetExecutable(file.Dirent) + t.MemoryManager().SetExecutable(fsbridge.NewFSFile(file)) case linux.PR_SET_MM_AUXV, linux.PR_SET_MM_START_CODE, diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 0c9e2255d..00915fdde 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" "gvisor.dev/gvisor/pkg/sentry/loader" @@ -119,7 +120,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user defer root.DecRef() var wd *fs.Dirent - var executable *fs.File + var executable fsbridge.File var closeOnExec bool if dirFD == linux.AT_FDCWD || path.IsAbs(pathname) { // Even if the pathname is absolute, we may still need the wd @@ -136,7 +137,15 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user closeOnExec = fdFlags.CloseOnExec if atEmptyPath && len(pathname) == 0 { - executable = f + // TODO(gvisor.dev/issue/160): Linux requires only execute permission, + // not read. However, our backing filesystems may prevent us from reading + // the file without read permission. Additionally, a task with a + // non-readable executable has additional constraints on access via + // ptrace and procfs. + if err := f.Dirent.Inode.CheckPermission(t, fs.PermMask{Read: true, Execute: true}); err != nil { + return 0, nil, err + } + executable = fsbridge.NewFSFile(f) } else { wd = f.Dirent wd.IncRef() @@ -152,9 +161,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user // Load the new TaskContext. remainingTraversals := uint(linux.MaxSymlinkTraversals) loadArgs := loader.LoadArgs{ - Mounts: t.MountNamespace(), - Root: root, - WorkingDirectory: wd, + Opener: fsbridge.NewFSLookup(t.MountNamespace(), root, wd), RemainingTraversals: &remainingTraversals, ResolveFinal: resolveFinal, Filename: pathname, diff --git a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go b/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go index c134714ee..e0ac32b33 100644 --- a/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go +++ b/pkg/sentry/syscalls/linux/vfs2/linux64_override_amd64.go @@ -22,4 +22,110 @@ import ( // Override syscall table to add syscalls implementations from this package. func Override(table map[uintptr]kernel.Syscall) { table[0] = syscalls.Supported("read", Read) + + // Remove syscalls that haven't been converted yet. It's better to get ENOSYS + // rather than a SIGSEGV deep in the stack. + delete(table, 1) // write + delete(table, 2) // open + delete(table, 3) // close + delete(table, 4) // stat + delete(table, 5) // fstat + delete(table, 6) // lstat + delete(table, 7) // poll + delete(table, 8) // lseek + delete(table, 9) // mmap + delete(table, 16) // ioctl + delete(table, 17) // pread64 + delete(table, 18) // pwrite64 + delete(table, 19) // readv + delete(table, 20) // writev + delete(table, 21) // access + delete(table, 22) // pipe + delete(table, 32) // dup + delete(table, 33) // dup2 + delete(table, 40) // sendfile + delete(table, 59) // execve + delete(table, 72) // fcntl + delete(table, 73) // flock + delete(table, 74) // fsync + delete(table, 75) // fdatasync + delete(table, 76) // truncate + delete(table, 77) // ftruncate + delete(table, 78) // getdents + delete(table, 79) // getcwd + delete(table, 80) // chdir + delete(table, 81) // fchdir + delete(table, 82) // rename + delete(table, 83) // mkdir + delete(table, 84) // rmdir + delete(table, 85) // creat + delete(table, 86) // link + delete(table, 87) // unlink + delete(table, 88) // symlink + delete(table, 89) // readlink + delete(table, 90) // chmod + delete(table, 91) // fchmod + delete(table, 92) // chown + delete(table, 93) // fchown + delete(table, 94) // lchown + delete(table, 133) // mknod + delete(table, 137) // statfs + delete(table, 138) // fstatfs + delete(table, 161) // chroot + delete(table, 162) // sync + delete(table, 165) // mount + delete(table, 166) // umount2 + delete(table, 172) // iopl + delete(table, 173) // ioperm + delete(table, 187) // readahead + delete(table, 188) // setxattr + delete(table, 189) // lsetxattr + delete(table, 190) // fsetxattr + delete(table, 191) // getxattr + delete(table, 192) // lgetxattr + delete(table, 193) // fgetxattr + delete(table, 206) // io_setup + delete(table, 207) // io_destroy + delete(table, 208) // io_getevents + delete(table, 209) // io_submit + delete(table, 210) // io_cancel + delete(table, 213) // epoll_create + delete(table, 214) // epoll_ctl_old + delete(table, 215) // epoll_wait_old + delete(table, 216) // remap_file_pages + delete(table, 217) // getdents64 + delete(table, 232) // epoll_wait + delete(table, 233) // epoll_ctl + delete(table, 253) // inotify_init + delete(table, 254) // inotify_add_watch + delete(table, 255) // inotify_rm_watch + delete(table, 257) // openat + delete(table, 258) // mkdirat + delete(table, 259) // mknodat + delete(table, 260) // fchownat + delete(table, 261) // futimesat + delete(table, 262) // fstatat + delete(table, 263) // unlinkat + delete(table, 264) // renameat + delete(table, 265) // linkat + delete(table, 266) // symlinkat + delete(table, 267) // readlinkat + delete(table, 268) // fchmodat + delete(table, 269) // faccessat + delete(table, 270) // pselect + delete(table, 271) // ppoll + delete(table, 285) // fallocate + delete(table, 291) // epoll_create1 + delete(table, 292) // dup3 + delete(table, 293) // pipe2 + delete(table, 294) // inotify_init1 + delete(table, 295) // preadv + delete(table, 296) // pwritev + delete(table, 306) // syncfs + delete(table, 316) // renameat2 + delete(table, 319) // memfd_create + delete(table, 322) // execveat + delete(table, 327) // preadv2 + delete(table, 328) // pwritev2 + delete(table, 332) // statx } diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 14b39eb9d..0b4f18ab5 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -43,6 +43,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fspath", + "//pkg/log", "//pkg/sentry/arch", "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/vfs/context.go b/pkg/sentry/vfs/context.go index d97362b9a..82781e6d3 100644 --- a/pkg/sentry/vfs/context.go +++ b/pkg/sentry/vfs/context.go @@ -29,9 +29,10 @@ const ( CtxRoot ) -// MountNamespaceFromContext returns the MountNamespace used by ctx. It does -// not take a reference on the returned MountNamespace. If ctx is not -// associated with a MountNamespace, MountNamespaceFromContext returns nil. +// MountNamespaceFromContext returns the MountNamespace used by ctx. If ctx is +// not associated with a MountNamespace, MountNamespaceFromContext returns nil. +// +// A reference is taken on the returned MountNamespace. func MountNamespaceFromContext(ctx context.Context) *MountNamespace { if v := ctx.Value(CtxMountNamespace); v != nil { return v.(*MountNamespace) diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 1fbb420f9..ad2c9fcf4 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -114,6 +114,7 @@ type MountNamespace struct { func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *GetFilesystemOptions) (*MountNamespace, error) { rft := vfs.getFilesystemType(fsTypeName) if rft == nil { + ctx.Warningf("Unknown filesystem: %s", fsTypeName) return nil, syserror.ENODEV } fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, *opts) @@ -231,9 +232,12 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti return syserror.EINVAL } vfs.mountMu.Lock() - if mntns := MountNamespaceFromContext(ctx); mntns != nil && mntns != vd.mount.ns { - vfs.mountMu.Unlock() - return syserror.EINVAL + if mntns := MountNamespaceFromContext(ctx); mntns != nil { + defer mntns.DecRef() + if mntns != vd.mount.ns { + vfs.mountMu.Unlock() + return syserror.EINVAL + } } // TODO(jamieliu): Linux special-cases umount of the caller's root, which diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go index fdf8be157..6af7fdac1 100644 --- a/pkg/sentry/vfs/options.go +++ b/pkg/sentry/vfs/options.go @@ -61,7 +61,7 @@ type MountOptions struct { type OpenOptions struct { // Flags contains access mode and flags as specified for open(2). // - // FilesystemImpls is reponsible for implementing the following flags: + // FilesystemImpls are responsible for implementing the following flags: // O_RDONLY, O_WRONLY, O_RDWR, O_APPEND, O_CREAT, O_DIRECT, O_DSYNC, // O_EXCL, O_NOATIME, O_NOCTTY, O_NONBLOCK, O_PATH, O_SYNC, O_TMPFILE, and // O_TRUNC. VFS is responsible for handling O_DIRECTORY, O_LARGEFILE, and diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 9629afee9..51deae313 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -393,7 +393,8 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential // be executed. return nil, syserror.EACCES } - if linux.FileMode(stat.Mode).FileType() != linux.ModeRegular { + if t := linux.FileMode(stat.Mode).FileType(); t != linux.ModeRegular { + ctx.Infof("%q is not a regular file: %v", pop.Path, t) return nil, syserror.EACCES } } @@ -743,6 +744,8 @@ func (vfs *VirtualFilesystem) SyncAllFilesystems(ctx context.Context) error { // VirtualDentry methods require that a reference is held on the VirtualDentry. // // VirtualDentry is analogous to Linux's struct path. +// +// +stateify savable type VirtualDentry struct { mount *Mount dentry *Dentry diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 9f0d5d7af..239ca5302 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -795,16 +795,19 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { return 0, fmt.Errorf("container %q not started", args.ContainerID) } + // TODO(gvisor.dev/issue/1623): Add VFS2 support + // Get the container MountNamespace from the Task. tg.Leader().WithMuLocked(func(t *kernel.Task) { - // task.MountNamespace() does not take a ref, so we must do so - // ourselves. + // task.MountNamespace() does not take a ref, so we must do so ourselves. args.MountNamespace = t.MountNamespace() args.MountNamespace.IncRef() }) - defer args.MountNamespace.DecRef() + if args.MountNamespace != nil { + defer args.MountNamespace.DecRef() + } - // Add the HOME enviroment varible if it is not already set. + // Add the HOME environment variable if it is not already set. root := args.MountNamespace.Root() defer root.DecRef() ctx := fs.WithRoot(l.k.SupervisorContext(), root) -- cgit v1.2.3 From 4a73bae269ae9f52a962ae3b08a17ccaacf7ba80 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Thu, 20 Feb 2020 15:19:40 -0800 Subject: Initial network namespace support. TCP/IP will work with netstack networking. hostinet doesn't work, and sockets will have the same behavior as it is now. Before the userspace is able to create device, the default loopback device can be used to test. /proc/net and /sys/net will still be connected to the root network stack; this is the same behavior now. Issue #1833 PiperOrigin-RevId: 296309389 --- pkg/sentry/fs/proc/net.go | 5 +- pkg/sentry/fs/proc/sys_net.go | 4 +- pkg/sentry/fsimpl/proc/tasks_net.go | 5 +- pkg/sentry/fsimpl/proc/tasks_sys.go | 4 +- pkg/sentry/fsimpl/testutil/kernel.go | 1 + pkg/sentry/inet/BUILD | 1 + pkg/sentry/inet/namespace.go | 99 +++++++++++++++++++++++++ pkg/sentry/kernel/kernel.go | 26 ++++--- pkg/sentry/kernel/task.go | 9 +-- pkg/sentry/kernel/task_clone.go | 16 ++-- pkg/sentry/kernel/task_net.go | 19 +++-- pkg/sentry/kernel/task_start.go | 8 +- pkg/tcpip/time_unsafe.go | 2 + runsc/boot/BUILD | 2 +- runsc/boot/controller.go | 11 +-- runsc/boot/loader.go | 121 +++++++++++++++++++++---------- runsc/boot/network.go | 27 +++++++ runsc/boot/pprof.go | 18 ----- runsc/boot/pprof/BUILD | 11 +++ runsc/boot/pprof/pprof.go | 20 +++++ runsc/sandbox/network.go | 25 +------ test/syscalls/BUILD | 2 + test/syscalls/linux/BUILD | 17 +++++ test/syscalls/linux/network_namespace.cc | 121 +++++++++++++++++++++++++++++++ 24 files changed, 451 insertions(+), 123 deletions(-) create mode 100644 pkg/sentry/inet/namespace.go delete mode 100644 runsc/boot/pprof.go create mode 100644 runsc/boot/pprof/BUILD create mode 100644 runsc/boot/pprof/pprof.go create mode 100644 test/syscalls/linux/network_namespace.cc (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 6f2775344..95d5817ff 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -43,7 +43,10 @@ import ( // newNet creates a new proc net entry. func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSource) *fs.Inode { var contents map[string]*fs.Inode - if s := p.k.NetworkStack(); s != nil { + // TODO(gvisor.dev/issue/1833): Support for using the network stack in the + // network namespace of the calling process. We should make this per-process, + // a.k.a. /proc/PID/net, and make /proc/net a symlink to /proc/self/net. + if s := p.k.RootNetworkNamespace().Stack(); s != nil { contents = map[string]*fs.Inode{ "dev": seqfile.NewSeqFileInode(ctx, &netDev{s: s}, msrc), "snmp": seqfile.NewSeqFileInode(ctx, &netSnmp{s: s}, msrc), diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index 0772d4ae4..d4c4b533d 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -357,7 +357,9 @@ func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s ine func (p *proc) newSysNetDir(ctx context.Context, msrc *fs.MountSource) *fs.Inode { var contents map[string]*fs.Inode - if s := p.k.NetworkStack(); s != nil { + // TODO(gvisor.dev/issue/1833): Support for using the network stack in the + // network namespace of the calling process. + if s := p.k.RootNetworkNamespace().Stack(); s != nil { contents = map[string]*fs.Inode{ "ipv4": p.newSysNetIPv4Dir(ctx, msrc, s), "core": p.newSysNetCore(ctx, msrc, s), diff --git a/pkg/sentry/fsimpl/proc/tasks_net.go b/pkg/sentry/fsimpl/proc/tasks_net.go index 608fec017..d4e1812d8 100644 --- a/pkg/sentry/fsimpl/proc/tasks_net.go +++ b/pkg/sentry/fsimpl/proc/tasks_net.go @@ -39,7 +39,10 @@ import ( func newNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry { var contents map[string]*kernfs.Dentry - if stack := k.NetworkStack(); stack != nil { + // TODO(gvisor.dev/issue/1833): Support for using the network stack in the + // network namespace of the calling process. We should make this per-process, + // a.k.a. /proc/PID/net, and make /proc/net a symlink to /proc/self/net. + if stack := k.RootNetworkNamespace().Stack(); stack != nil { const ( arp = "IP address HW type Flags HW address Mask Device\n" netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n" diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index c7ce74883..3d5dc463c 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -50,7 +50,9 @@ func newSysDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *k func newSysNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry { var contents map[string]*kernfs.Dentry - if stack := k.NetworkStack(); stack != nil { + // TODO(gvisor.dev/issue/1833): Support for using the network stack in the + // network namespace of the calling process. + if stack := k.RootNetworkNamespace().Stack(); stack != nil { contents = map[string]*kernfs.Dentry{ "ipv4": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ "tcp_sack": newDentry(root, inoGen.NextIno(), 0644, &tcpSackData{stack: stack}), diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go index d0be32e72..488478e29 100644 --- a/pkg/sentry/fsimpl/testutil/kernel.go +++ b/pkg/sentry/fsimpl/testutil/kernel.go @@ -128,6 +128,7 @@ func CreateTask(ctx context.Context, name string, tc *kernel.ThreadGroup, mntns ThreadGroup: tc, TaskContext: &kernel.TaskContext{Name: name}, Credentials: auth.CredentialsFromContext(ctx), + NetworkNamespace: k.RootNetworkNamespace(), AllowedCPUMask: sched.NewFullCPUSet(k.ApplicationCores()), UTSNamespace: kernel.UTSNamespaceFromContext(ctx), IPCNamespace: kernel.IPCNamespaceFromContext(ctx), diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD index 334432abf..07bf39fed 100644 --- a/pkg/sentry/inet/BUILD +++ b/pkg/sentry/inet/BUILD @@ -10,6 +10,7 @@ go_library( srcs = [ "context.go", "inet.go", + "namespace.go", "test_stack.go", ], deps = [ diff --git a/pkg/sentry/inet/namespace.go b/pkg/sentry/inet/namespace.go new file mode 100644 index 000000000..c16667e7f --- /dev/null +++ b/pkg/sentry/inet/namespace.go @@ -0,0 +1,99 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package inet + +// Namespace represents a network namespace. See network_namespaces(7). +// +// +stateify savable +type Namespace struct { + // stack is the network stack implementation of this network namespace. + stack Stack `state:"nosave"` + + // creator allows kernel to create new network stack for network namespaces. + // If nil, no networking will function if network is namespaced. + creator NetworkStackCreator + + // isRoot indicates whether this is the root network namespace. + isRoot bool +} + +// NewRootNamespace creates the root network namespace, with creator +// allowing new network namespaces to be created. If creator is nil, no +// networking will function if the network is namespaced. +func NewRootNamespace(stack Stack, creator NetworkStackCreator) *Namespace { + return &Namespace{ + stack: stack, + creator: creator, + isRoot: true, + } +} + +// NewNamespace creates a new network namespace from the root. +func NewNamespace(root *Namespace) *Namespace { + n := &Namespace{ + creator: root.creator, + } + n.init() + return n +} + +// Stack returns the network stack of n. Stack may return nil if no network +// stack is configured. +func (n *Namespace) Stack() Stack { + return n.stack +} + +// IsRoot returns whether n is the root network namespace. +func (n *Namespace) IsRoot() bool { + return n.isRoot +} + +// RestoreRootStack restores the root network namespace with stack. This should +// only be called when restoring kernel. +func (n *Namespace) RestoreRootStack(stack Stack) { + if !n.isRoot { + panic("RestoreRootStack can only be called on root network namespace") + } + if n.stack != nil { + panic("RestoreRootStack called after a stack has already been set") + } + n.stack = stack +} + +func (n *Namespace) init() { + // Root network namespace will have stack assigned later. + if n.isRoot { + return + } + if n.creator != nil { + var err error + n.stack, err = n.creator.CreateStack() + if err != nil { + panic(err) + } + } +} + +// afterLoad is invoked by stateify. +func (n *Namespace) afterLoad() { + n.init() +} + +// NetworkStackCreator allows new instances of a network stack to be created. It +// is used by the kernel to create new network namespaces when requested. +type NetworkStackCreator interface { + // CreateStack creates a new network stack for a network namespace. + CreateStack() (Stack, error) +} diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 7da0368f1..c62fd6eb1 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -111,7 +111,7 @@ type Kernel struct { timekeeper *Timekeeper tasks *TaskSet rootUserNamespace *auth.UserNamespace - networkStack inet.Stack `state:"nosave"` + rootNetworkNamespace *inet.Namespace applicationCores uint useHostCores bool extraAuxv []arch.AuxEntry @@ -260,8 +260,9 @@ type InitKernelArgs struct { // RootUserNamespace is the root user namespace. RootUserNamespace *auth.UserNamespace - // NetworkStack is the TCP/IP network stack. NetworkStack may be nil. - NetworkStack inet.Stack + // RootNetworkNamespace is the root network namespace. If nil, no networking + // will be available. + RootNetworkNamespace *inet.Namespace // ApplicationCores is the number of logical CPUs visible to sandboxed // applications. The set of logical CPU IDs is [0, ApplicationCores); thus @@ -320,7 +321,10 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.rootUTSNamespace = args.RootUTSNamespace k.rootIPCNamespace = args.RootIPCNamespace k.rootAbstractSocketNamespace = args.RootAbstractSocketNamespace - k.networkStack = args.NetworkStack + k.rootNetworkNamespace = args.RootNetworkNamespace + if k.rootNetworkNamespace == nil { + k.rootNetworkNamespace = inet.NewRootNamespace(nil, nil) + } k.applicationCores = args.ApplicationCores if args.UseHostCores { k.useHostCores = true @@ -543,8 +547,6 @@ func (ts *TaskSet) unregisterEpollWaiters() { func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack, clocks sentrytime.Clocks) error { loadStart := time.Now() - k.networkStack = net - initAppCores := k.applicationCores // Load the pre-saved CPUID FeatureSet. @@ -575,6 +577,10 @@ func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack, clocks sentrytime.Clocks) log.Infof("Kernel load stats: %s", &stats) log.Infof("Kernel load took [%s].", time.Since(kernelStart)) + // rootNetworkNamespace should be populated after loading the state file. + // Restore the root network stack. + k.rootNetworkNamespace.RestoreRootStack(net) + // Load the memory file's state. memoryStart := time.Now() if err := k.mf.LoadFrom(k.SupervisorContext(), r); err != nil { @@ -905,6 +911,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, FSContext: fsContext, FDTable: args.FDTable, Credentials: args.Credentials, + NetworkNamespace: k.RootNetworkNamespace(), AllowedCPUMask: sched.NewFullCPUSet(k.applicationCores), UTSNamespace: args.UTSNamespace, IPCNamespace: args.IPCNamespace, @@ -1255,10 +1262,9 @@ func (k *Kernel) RootAbstractSocketNamespace() *AbstractSocketNamespace { return k.rootAbstractSocketNamespace } -// NetworkStack returns the network stack. NetworkStack may return nil if no -// network stack is available. -func (k *Kernel) NetworkStack() inet.Stack { - return k.networkStack +// RootNetworkNamespace returns the root network namespace, always non-nil. +func (k *Kernel) RootNetworkNamespace() *inet.Namespace { + return k.rootNetworkNamespace } // GlobalInit returns the thread group with ID 1 in the root PID namespace, or diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index a3443ff21..e37e23231 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -486,13 +486,10 @@ type Task struct { numaPolicy int32 numaNodeMask uint64 - // If netns is true, the task is in a non-root network namespace. Network - // namespaces aren't currently implemented in full; being in a network - // namespace simply prevents the task from observing any network devices - // (including loopback) or using abstract socket addresses (see unix(7)). + // netns is the task's network namespace. netns is never nil. // - // netns is protected by mu. netns is owned by the task goroutine. - netns bool + // netns is protected by mu. + netns *inet.Namespace // If rseqPreempted is true, before the next call to p.Switch(), // interrupt rseq critical regions as defined by rseqAddr and diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index ba74b4c1c..78866f280 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" + "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -54,8 +55,7 @@ type SharingOptions struct { NewUserNamespace bool // If NewNetworkNamespace is true, the task should have an independent - // network namespace. (Note that network namespaces are not really - // implemented; see comment on Task.netns for details.) + // network namespace. NewNetworkNamespace bool // If NewFiles is true, the task should use an independent file descriptor @@ -199,6 +199,11 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { ipcns = NewIPCNamespace(userns) } + netns := t.NetworkNamespace() + if opts.NewNetworkNamespace { + netns = inet.NewNamespace(netns) + } + // TODO(b/63601033): Implement CLONE_NEWNS. mntnsVFS2 := t.mountNamespaceVFS2 if mntnsVFS2 != nil { @@ -268,7 +273,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { FDTable: fdTable, Credentials: creds, Niceness: t.Niceness(), - NetworkNamespaced: t.netns, + NetworkNamespace: netns, AllowedCPUMask: t.CPUMask(), UTSNamespace: utsns, IPCNamespace: ipcns, @@ -283,9 +288,6 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { } else { cfg.InheritParent = t } - if opts.NewNetworkNamespace { - cfg.NetworkNamespaced = true - } nt, err := t.tg.pidns.owner.NewTask(cfg) if err != nil { if opts.NewThreadGroup { @@ -482,7 +484,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { t.mu.Unlock() return syserror.EPERM } - t.netns = true + t.netns = inet.NewNamespace(t.netns) } if opts.NewUTSNamespace { if !haveCapSysAdmin { diff --git a/pkg/sentry/kernel/task_net.go b/pkg/sentry/kernel/task_net.go index 172a31e1d..f7711232c 100644 --- a/pkg/sentry/kernel/task_net.go +++ b/pkg/sentry/kernel/task_net.go @@ -22,14 +22,23 @@ import ( func (t *Task) IsNetworkNamespaced() bool { t.mu.Lock() defer t.mu.Unlock() - return t.netns + return !t.netns.IsRoot() } // NetworkContext returns the network stack used by the task. NetworkContext // may return nil if no network stack is available. +// +// TODO(gvisor.dev/issue/1833): Migrate callers of this method to +// NetworkNamespace(). func (t *Task) NetworkContext() inet.Stack { - if t.IsNetworkNamespaced() { - return nil - } - return t.k.networkStack + t.mu.Lock() + defer t.mu.Unlock() + return t.netns.Stack() +} + +// NetworkNamespace returns the network namespace observed by the task. +func (t *Task) NetworkNamespace() *inet.Namespace { + t.mu.Lock() + defer t.mu.Unlock() + return t.netns } diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index f9236a842..a5035bb7f 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -17,6 +17,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" @@ -65,9 +66,8 @@ type TaskConfig struct { // Niceness is the niceness of the new task. Niceness int - // If NetworkNamespaced is true, the new task should observe a non-root - // network namespace. - NetworkNamespaced bool + // NetworkNamespace is the network namespace to be used for the new task. + NetworkNamespace *inet.Namespace // AllowedCPUMask contains the cpus that this task can run on. AllowedCPUMask sched.CPUSet @@ -133,7 +133,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { allowedCPUMask: cfg.AllowedCPUMask.Copy(), ioUsage: &usage.IO{}, niceness: cfg.Niceness, - netns: cfg.NetworkNamespaced, + netns: cfg.NetworkNamespace, utsns: cfg.UTSNamespace, ipcns: cfg.IPCNamespace, abstractSockets: cfg.AbstractSocketNamespace, diff --git a/pkg/tcpip/time_unsafe.go b/pkg/tcpip/time_unsafe.go index 48764b978..2f98a996f 100644 --- a/pkg/tcpip/time_unsafe.go +++ b/pkg/tcpip/time_unsafe.go @@ -25,6 +25,8 @@ import ( ) // StdClock implements Clock with the time package. +// +// +stateify savable type StdClock struct{} var _ Clock = (*StdClock)(nil) diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index ae4dd102a..26f68fe3d 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -19,7 +19,6 @@ go_library( "loader_amd64.go", "loader_arm64.go", "network.go", - "pprof.go", "strace.go", "user.go", ], @@ -91,6 +90,7 @@ go_library( "//pkg/usermem", "//runsc/boot/filter", "//runsc/boot/platforms", + "//runsc/boot/pprof", "//runsc/specutils", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 9c9e94864..17e774e0c 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -32,6 +32,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/specutils" ) @@ -142,7 +143,7 @@ func newController(fd int, l *Loader) (*controller, error) { } srv.Register(manager) - if eps, ok := l.k.NetworkStack().(*netstack.Stack); ok { + if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok { net := &Network{ Stack: eps.Stack, } @@ -341,7 +342,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { return fmt.Errorf("creating memory file: %v", err) } k.SetMemoryFile(mf) - networkStack := cm.l.k.NetworkStack() + networkStack := cm.l.k.RootNetworkNamespace().Stack() cm.l.k = k // Set up the restore environment. @@ -365,9 +366,9 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { } if cm.l.conf.ProfileEnable { - // initializePProf opens /proc/self/maps, so has to be - // called before installing seccomp filters. - initializePProf() + // pprof.Initialize opens /proc/self/maps, so has to be called before + // installing seccomp filters. + pprof.Initialize() } // Seccomp filters have to be applied before parsing the state file. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index eef43b9df..e7ca98134 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -49,6 +49,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/link/loopback" "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" "gvisor.dev/gvisor/pkg/tcpip/network/arp" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" @@ -60,6 +61,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/runsc/boot/filter" _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms. + "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/specutils" // Include supported socket providers. @@ -230,11 +232,8 @@ func New(args Args) (*Loader, error) { return nil, fmt.Errorf("enabling strace: %v", err) } - // Create an empty network stack because the network namespace may be empty at - // this point. Netns is configured before Run() is called. Netstack is - // configured using a control uRPC message. Host network is configured inside - // Run(). - networkStack, err := newEmptyNetworkStack(args.Conf, k, k) + // Create root network namespace/stack. + netns, err := newRootNetworkNamespace(args.Conf, k, k) if err != nil { return nil, fmt.Errorf("creating network: %v", err) } @@ -277,7 +276,7 @@ func New(args Args) (*Loader, error) { FeatureSet: cpuid.HostFeatureSet(), Timekeeper: tk, RootUserNamespace: creds.UserNamespace, - NetworkStack: networkStack, + RootNetworkNamespace: netns, ApplicationCores: uint(args.NumCPU), Vdso: vdso, RootUTSNamespace: kernel.NewUTSNamespace(args.Spec.Hostname, args.Spec.Hostname, creds.UserNamespace), @@ -466,7 +465,7 @@ func (l *Loader) run() error { // Delay host network configuration to this point because network namespace // is configured after the loader is created and before Run() is called. log.Debugf("Configuring host network") - stack := l.k.NetworkStack().(*hostinet.Stack) + stack := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack) if err := stack.Configure(); err != nil { return err } @@ -485,7 +484,7 @@ func (l *Loader) run() error { // l.restore is set by the container manager when a restore call is made. if !l.restore { if l.conf.ProfileEnable { - initializePProf() + pprof.Initialize() } // Finally done with all configuration. Setup filters before user code @@ -908,48 +907,92 @@ func (l *Loader) WaitExit() kernel.ExitStatus { return l.k.GlobalInit().ExitStatus() } -func newEmptyNetworkStack(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) { +func newRootNetworkNamespace(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (*inet.Namespace, error) { + // Create an empty network stack because the network namespace may be empty at + // this point. Netns is configured before Run() is called. Netstack is + // configured using a control uRPC message. Host network is configured inside + // Run(). switch conf.Network { case NetworkHost: - return hostinet.NewStack(), nil + // No network namespacing support for hostinet yet, hence creator is nil. + return inet.NewRootNamespace(hostinet.NewStack(), nil), nil case NetworkNone, NetworkSandbox: - // NetworkNone sets up loopback using netstack. - netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()} - transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()} - s := netstack.Stack{stack.New(stack.Options{ - NetworkProtocols: netProtos, - TransportProtocols: transProtos, - Clock: clock, - Stats: netstack.Metrics, - HandleLocal: true, - // Enable raw sockets for users with sufficient - // privileges. - RawFactory: raw.EndpointFactory{}, - UniqueID: uniqueID, - })} - - // Enable SACK Recovery. - if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { - return nil, fmt.Errorf("failed to enable SACK: %v", err) + s, err := newEmptySandboxNetworkStack(clock, uniqueID) + if err != nil { + return nil, err } + creator := &sandboxNetstackCreator{ + clock: clock, + uniqueID: uniqueID, + } + return inet.NewRootNamespace(s, creator), nil - // Set default TTLs as required by socket/netstack. - s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) - s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + default: + panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + } - // Enable Receive Buffer Auto-Tuning. - if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { - return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) - } +} - s.FillDefaultIPTables() +func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) { + netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()} + transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()} + s := netstack.Stack{stack.New(stack.Options{ + NetworkProtocols: netProtos, + TransportProtocols: transProtos, + Clock: clock, + Stats: netstack.Metrics, + HandleLocal: true, + // Enable raw sockets for users with sufficient + // privileges. + RawFactory: raw.EndpointFactory{}, + UniqueID: uniqueID, + })} - return &s, nil + // Enable SACK Recovery. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil { + return nil, fmt.Errorf("failed to enable SACK: %v", err) + } - default: - panic(fmt.Sprintf("invalid network configuration: %v", conf.Network)) + // Set default TTLs as required by socket/netstack. + s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL)) + + // Enable Receive Buffer Auto-Tuning. + if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil { + return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) + } + + s.FillDefaultIPTables() + + return &s, nil +} + +// sandboxNetstackCreator implements kernel.NetworkStackCreator. +// +// +stateify savable +type sandboxNetstackCreator struct { + clock tcpip.Clock + uniqueID stack.UniqueID +} + +// CreateStack implements kernel.NetworkStackCreator.CreateStack. +func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) { + s, err := newEmptySandboxNetworkStack(f.clock, f.uniqueID) + if err != nil { + return nil, err } + + // Setup loopback. + n := &Network{Stack: s.(*netstack.Stack).Stack} + nicID := tcpip.NICID(f.uniqueID.UniqueID()) + link := DefaultLoopbackLink + linkEP := loopback.New() + if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil { + return nil, err + } + + return s, nil } // signal sends a signal to one or more processes in a container. If PID is 0, diff --git a/runsc/boot/network.go b/runsc/boot/network.go index 6a8765ec8..bee6ee336 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -17,6 +17,7 @@ package boot import ( "fmt" "net" + "strings" "syscall" "gvisor.dev/gvisor/pkg/log" @@ -31,6 +32,32 @@ import ( "gvisor.dev/gvisor/pkg/urpc" ) +var ( + // DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and + // "::1/8" on "lo" interface. + DefaultLoopbackLink = LoopbackLink{ + Name: "lo", + Addresses: []net.IP{ + net.IP("\x7f\x00\x00\x01"), + net.IPv6loopback, + }, + Routes: []Route{ + { + Destination: net.IPNet{ + IP: net.IPv4(0x7f, 0, 0, 0), + Mask: net.IPv4Mask(0xff, 0, 0, 0), + }, + }, + { + Destination: net.IPNet{ + IP: net.IPv6loopback, + Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)), + }, + }, + }, + } +) + // Network exposes methods that can be used to configure a network stack. type Network struct { Stack *stack.Stack diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof.go deleted file mode 100644 index 463362f02..000000000 --- a/runsc/boot/pprof.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -func initializePProf() { -} diff --git a/runsc/boot/pprof/BUILD b/runsc/boot/pprof/BUILD new file mode 100644 index 000000000..29cb42b2f --- /dev/null +++ b/runsc/boot/pprof/BUILD @@ -0,0 +1,11 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "pprof", + srcs = ["pprof.go"], + visibility = [ + "//runsc:__subpackages__", + ], +) diff --git a/runsc/boot/pprof/pprof.go b/runsc/boot/pprof/pprof.go new file mode 100644 index 000000000..1ded20dee --- /dev/null +++ b/runsc/boot/pprof/pprof.go @@ -0,0 +1,20 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package pprof provides a stub to initialize custom profilers. +package pprof + +// Initialize will be called at boot for initializing custom profilers. +func Initialize() { +} diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go index 99e143696..bc093fba5 100644 --- a/runsc/sandbox/network.go +++ b/runsc/sandbox/network.go @@ -21,7 +21,6 @@ import ( "path/filepath" "runtime" "strconv" - "strings" "syscall" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -75,30 +74,8 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi } func createDefaultLoopbackInterface(conn *urpc.Client) error { - link := boot.LoopbackLink{ - Name: "lo", - Addresses: []net.IP{ - net.IP("\x7f\x00\x00\x01"), - net.IPv6loopback, - }, - Routes: []boot.Route{ - { - Destination: net.IPNet{ - - IP: net.IPv4(0x7f, 0, 0, 0), - Mask: net.IPv4Mask(0xff, 0, 0, 0), - }, - }, - { - Destination: net.IPNet{ - IP: net.IPv6loopback, - Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)), - }, - }, - }, - } if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{ - LoopbackLinks: []boot.LoopbackLink{link}, + LoopbackLinks: []boot.LoopbackLink{boot.DefaultLoopbackLink}, }, nil); err != nil { return fmt.Errorf("creating loopback link and routes: %v", err) } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index d69ac8356..d1977d4de 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -258,6 +258,8 @@ syscall_test( syscall_test(test = "//test/syscalls/linux:munmap_test") +syscall_test(test = "//test/syscalls/linux:network_namespace_test") + syscall_test( add_overlay = True, test = "//test/syscalls/linux:open_create_test", diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 05a818795..aa303af84 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3639,6 +3639,23 @@ cc_binary( ], ) +cc_binary( + name = "network_namespace_test", + testonly = 1, + srcs = ["network_namespace.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + gtest, + "//test/util:capability_util", + "//test/util:memory_util", + "//test/util:test_main", + "//test/util:test_util", + "//test/util:thread_util", + "@com_google_absl//absl/synchronization", + ], +) + cc_binary( name = "semaphore_test", testonly = 1, diff --git a/test/syscalls/linux/network_namespace.cc b/test/syscalls/linux/network_namespace.cc new file mode 100644 index 000000000..6ea48c263 --- /dev/null +++ b/test/syscalls/linux/network_namespace.cc @@ -0,0 +1,121 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/synchronization/notification.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/memory_util.h" +#include "test/util/test_util.h" +#include "test/util/thread_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +using TestFunc = std::function; +using RunFunc = std::function; + +struct NamespaceStrategy { + RunFunc run; + + static NamespaceStrategy Of(RunFunc run) { + NamespaceStrategy s; + s.run = run; + return s; + } +}; + +PosixError RunWithUnshare(TestFunc fn) { + PosixError err = PosixError(-1, "function did not return a value"); + ScopedThread t([&] { + if (unshare(CLONE_NEWNET) != 0) { + err = PosixError(errno); + return; + } + err = fn(); + }); + t.Join(); + return err; +} + +PosixError RunWithClone(TestFunc fn) { + struct Args { + absl::Notification n; + TestFunc fn; + PosixError err; + }; + Args args; + args.fn = fn; + args.err = PosixError(-1, "function did not return a value"); + + ASSIGN_OR_RETURN_ERRNO( + Mapping child_stack, + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + pid_t child = clone( + +[](void *arg) { + Args *args = reinterpret_cast(arg); + args->err = args->fn(); + args->n.Notify(); + syscall(SYS_exit, 0); // Exit manually. No return address on stack. + return 0; + }, + reinterpret_cast(child_stack.addr() + kPageSize), + CLONE_NEWNET | CLONE_THREAD | CLONE_SIGHAND | CLONE_VM, &args); + if (child < 0) { + return PosixError(errno, "clone() failed"); + } + args.n.WaitForNotification(); + return args.err; +} + +class NetworkNamespaceTest + : public ::testing::TestWithParam {}; + +TEST_P(NetworkNamespaceTest, LoopbackExists) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + EXPECT_NO_ERRNO(GetParam().run([]() { + // TODO(gvisor.dev/issue/1833): Update this to test that only "lo" exists. + // Check loopback device exists. + int sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + return PosixError(errno, "socket() failed"); + } + struct ifreq ifr; + snprintf(ifr.ifr_name, IFNAMSIZ, "lo"); + if (ioctl(sock, SIOCGIFINDEX, &ifr) < 0) { + return PosixError(errno, "ioctl() failed, lo cannot be found"); + } + return NoError(); + })); +} + +INSTANTIATE_TEST_SUITE_P( + AllNetworkNamespaceTest, NetworkNamespaceTest, + ::testing::Values(NamespaceStrategy::Of(RunWithUnshare), + NamespaceStrategy::Of(RunWithClone))); + +} // namespace + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From b8f56c79be40d9c75f4e2f279c9d821d1c1c3569 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Fri, 21 Feb 2020 15:41:56 -0800 Subject: Implement tap/tun device in vfs. PiperOrigin-RevId: 296526279 --- pkg/abi/linux/BUILD | 1 + pkg/abi/linux/ioctl.go | 26 ++ pkg/abi/linux/ioctl_tun.go | 29 ++ pkg/sentry/fs/dev/BUILD | 5 + pkg/sentry/fs/dev/dev.go | 10 +- pkg/sentry/fs/dev/net_tun.go | 170 +++++++++++ pkg/syserror/syserror.go | 1 + pkg/tcpip/buffer/view.go | 6 + pkg/tcpip/link/channel/BUILD | 1 + pkg/tcpip/link/channel/channel.go | 180 +++++++++--- pkg/tcpip/link/tun/BUILD | 18 +- pkg/tcpip/link/tun/device.go | 352 +++++++++++++++++++++++ pkg/tcpip/link/tun/protocol.go | 56 ++++ pkg/tcpip/stack/nic.go | 32 +++ pkg/tcpip/stack/stack.go | 39 +++ test/syscalls/BUILD | 2 + test/syscalls/linux/BUILD | 30 ++ test/syscalls/linux/dev.cc | 7 + test/syscalls/linux/socket_netlink_route_util.cc | 163 +++++++++++ test/syscalls/linux/socket_netlink_route_util.h | 55 ++++ test/syscalls/linux/tuntap.cc | 346 ++++++++++++++++++++++ 21 files changed, 1490 insertions(+), 39 deletions(-) create mode 100644 pkg/abi/linux/ioctl_tun.go create mode 100644 pkg/sentry/fs/dev/net_tun.go create mode 100644 pkg/tcpip/link/tun/device.go create mode 100644 pkg/tcpip/link/tun/protocol.go create mode 100644 test/syscalls/linux/socket_netlink_route_util.cc create mode 100644 test/syscalls/linux/socket_netlink_route_util.h create mode 100644 test/syscalls/linux/tuntap.cc (limited to 'pkg/sentry/fs') diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index a89f34d4b..322d1ccc4 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -30,6 +30,7 @@ go_library( "futex.go", "inotify.go", "ioctl.go", + "ioctl_tun.go", "ip.go", "ipc.go", "limits.go", diff --git a/pkg/abi/linux/ioctl.go b/pkg/abi/linux/ioctl.go index 0e18db9ef..2062e6a4b 100644 --- a/pkg/abi/linux/ioctl.go +++ b/pkg/abi/linux/ioctl.go @@ -72,3 +72,29 @@ const ( SIOCGMIIPHY = 0x8947 SIOCGMIIREG = 0x8948 ) + +// ioctl(2) directions. Used to calculate requests number. +// Constants from asm-generic/ioctl.h. +const ( + _IOC_NONE = 0 + _IOC_WRITE = 1 + _IOC_READ = 2 +) + +// Constants from asm-generic/ioctl.h. +const ( + _IOC_NRBITS = 8 + _IOC_TYPEBITS = 8 + _IOC_SIZEBITS = 14 + _IOC_DIRBITS = 2 + + _IOC_NRSHIFT = 0 + _IOC_TYPESHIFT = _IOC_NRSHIFT + _IOC_NRBITS + _IOC_SIZESHIFT = _IOC_TYPESHIFT + _IOC_TYPEBITS + _IOC_DIRSHIFT = _IOC_SIZESHIFT + _IOC_SIZEBITS +) + +// IOC outputs the result of _IOC macro in asm-generic/ioctl.h. +func IOC(dir, typ, nr, size uint32) uint32 { + return uint32(dir)<<_IOC_DIRSHIFT | typ<<_IOC_TYPESHIFT | nr<<_IOC_NRSHIFT | size<<_IOC_SIZESHIFT +} diff --git a/pkg/abi/linux/ioctl_tun.go b/pkg/abi/linux/ioctl_tun.go new file mode 100644 index 000000000..c59c9c136 --- /dev/null +++ b/pkg/abi/linux/ioctl_tun.go @@ -0,0 +1,29 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// ioctl(2) request numbers from linux/if_tun.h +var ( + TUNSETIFF = IOC(_IOC_WRITE, 'T', 202, 4) + TUNGETIFF = IOC(_IOC_READ, 'T', 210, 4) +) + +// Flags from net/if_tun.h +const ( + IFF_TUN = 0x0001 + IFF_TAP = 0x0002 + IFF_NO_PI = 0x1000 + IFF_NOFILTER = 0x1000 +) diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index 4c4b7d5cc..9b6bb26d0 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -9,6 +9,7 @@ go_library( "device.go", "fs.go", "full.go", + "net_tun.go", "null.go", "random.go", "tty.go", @@ -19,15 +20,19 @@ go_library( "//pkg/context", "//pkg/rand", "//pkg/safemem", + "//pkg/sentry/arch", "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/ramfs", "//pkg/sentry/fs/tmpfs", + "//pkg/sentry/kernel", "//pkg/sentry/memmap", "//pkg/sentry/mm", "//pkg/sentry/pgalloc", + "//pkg/sentry/socket/netstack", "//pkg/syserror", + "//pkg/tcpip/link/tun", "//pkg/usermem", "//pkg/waiter", ], diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go index 35bd23991..7e66c29b0 100644 --- a/pkg/sentry/fs/dev/dev.go +++ b/pkg/sentry/fs/dev/dev.go @@ -66,8 +66,8 @@ func newMemDevice(ctx context.Context, iops fs.InodeOperations, msrc *fs.MountSo }) } -func newDirectory(ctx context.Context, msrc *fs.MountSource) *fs.Inode { - iops := ramfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)) +func newDirectory(ctx context.Context, contents map[string]*fs.Inode, msrc *fs.MountSource) *fs.Inode { + iops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) return fs.NewInode(ctx, iops, msrc, fs.StableAttr{ DeviceID: devDevice.DeviceID(), InodeID: devDevice.NextIno(), @@ -111,7 +111,7 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode { // A devpts is typically mounted at /dev/pts to provide // pseudoterminal support. Place an empty directory there for // the devpts to be mounted over. - "pts": newDirectory(ctx, msrc), + "pts": newDirectory(ctx, nil, msrc), // Similarly, applications expect a ptmx device at /dev/ptmx // connected to the terminals provided by /dev/pts/. Rather // than creating a device directly (which requires a hairy @@ -124,6 +124,10 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode { "ptmx": newSymlink(ctx, "pts/ptmx", msrc), "tty": newCharacterDevice(ctx, newTTYDevice(ctx, fs.RootOwner, 0666), msrc, ttyDevMajor, ttyDevMinor), + + "net": newDirectory(ctx, map[string]*fs.Inode{ + "tun": newCharacterDevice(ctx, newNetTunDevice(ctx, fs.RootOwner, 0666), msrc, netTunDevMajor, netTunDevMinor), + }, msrc), } iops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go new file mode 100644 index 000000000..755644488 --- /dev/null +++ b/pkg/sentry/fs/dev/net_tun.go @@ -0,0 +1,170 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dev + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/netstack" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip/link/tun" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +const ( + netTunDevMajor = 10 + netTunDevMinor = 200 +) + +// +stateify savable +type netTunInodeOperations struct { + fsutil.InodeGenericChecker `state:"nosave"` + fsutil.InodeNoExtendedAttributes `state:"nosave"` + fsutil.InodeNoopAllocate `state:"nosave"` + fsutil.InodeNoopRelease `state:"nosave"` + fsutil.InodeNoopTruncate `state:"nosave"` + fsutil.InodeNoopWriteOut `state:"nosave"` + fsutil.InodeNotDirectory `state:"nosave"` + fsutil.InodeNotMappable `state:"nosave"` + fsutil.InodeNotSocket `state:"nosave"` + fsutil.InodeNotSymlink `state:"nosave"` + fsutil.InodeVirtual `state:"nosave"` + + fsutil.InodeSimpleAttributes +} + +var _ fs.InodeOperations = (*netTunInodeOperations)(nil) + +func newNetTunDevice(ctx context.Context, owner fs.FileOwner, mode linux.FileMode) *netTunInodeOperations { + return &netTunInodeOperations{ + InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, fs.FilePermsFromMode(mode), linux.TMPFS_MAGIC), + } +} + +// GetFile implements fs.InodeOperations.GetFile. +func (iops *netTunInodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, d, flags, &netTunFileOperations{}), nil +} + +// +stateify savable +type netTunFileOperations struct { + fsutil.FileNoSeek `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + + device tun.Device +} + +var _ fs.FileOperations = (*netTunFileOperations)(nil) + +// Release implements fs.FileOperations.Release. +func (fops *netTunFileOperations) Release() { + fops.device.Release() +} + +// Ioctl implements fs.FileOperations.Ioctl. +func (fops *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + request := args[1].Uint() + data := args[2].Pointer() + + switch request { + case linux.TUNSETIFF: + t := kernel.TaskFromContext(ctx) + if t == nil { + panic("Ioctl should be called from a task context") + } + if !t.HasCapability(linux.CAP_NET_ADMIN) { + return 0, syserror.EPERM + } + stack, ok := t.NetworkContext().(*netstack.Stack) + if !ok { + return 0, syserror.EINVAL + } + + var req linux.IFReq + if _, err := usermem.CopyObjectIn(ctx, io, data, &req, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + flags := usermem.ByteOrder.Uint16(req.Data[:]) + return 0, fops.device.SetIff(stack.Stack, req.Name(), flags) + + case linux.TUNGETIFF: + var req linux.IFReq + + copy(req.IFName[:], fops.device.Name()) + + // Linux adds IFF_NOFILTER (the same value as IFF_NO_PI unfortunately) when + // there is no sk_filter. See __tun_chr_ioctl() in net/drivers/tun.c. + flags := fops.device.Flags() | linux.IFF_NOFILTER + usermem.ByteOrder.PutUint16(req.Data[:], flags) + + _, err := usermem.CopyObjectOut(ctx, io, data, &req, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + + default: + return 0, syserror.ENOTTY + } +} + +// Write implements fs.FileOperations.Write. +func (fops *netTunFileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + data := make([]byte, src.NumBytes()) + if _, err := src.CopyIn(ctx, data); err != nil { + return 0, err + } + return fops.device.Write(data) +} + +// Read implements fs.FileOperations.Read. +func (fops *netTunFileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + data, err := fops.device.Read() + if err != nil { + return 0, err + } + n, err := dst.CopyOut(ctx, data) + if n > 0 && n < len(data) { + // Not an error for partial copying. Packet truncated. + err = nil + } + return int64(n), err +} + +// Readiness implements watier.Waitable.Readiness. +func (fops *netTunFileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { + return fops.device.Readiness(mask) +} + +// EventRegister implements watier.Waitable.EventRegister. +func (fops *netTunFileOperations) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + fops.device.EventRegister(e, mask) +} + +// EventUnregister implements watier.Waitable.EventUnregister. +func (fops *netTunFileOperations) EventUnregister(e *waiter.Entry) { + fops.device.EventUnregister(e) +} diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go index 2269f6237..4b5a0fca6 100644 --- a/pkg/syserror/syserror.go +++ b/pkg/syserror/syserror.go @@ -29,6 +29,7 @@ var ( EACCES = error(syscall.EACCES) EAGAIN = error(syscall.EAGAIN) EBADF = error(syscall.EBADF) + EBADFD = error(syscall.EBADFD) EBUSY = error(syscall.EBUSY) ECHILD = error(syscall.ECHILD) ECONNREFUSED = error(syscall.ECONNREFUSED) diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go index 150310c11..17e94c562 100644 --- a/pkg/tcpip/buffer/view.go +++ b/pkg/tcpip/buffer/view.go @@ -156,3 +156,9 @@ func (vv *VectorisedView) Append(vv2 VectorisedView) { vv.views = append(vv.views, vv2.views...) vv.size += vv2.size } + +// AppendView appends the given view into this vectorised view. +func (vv *VectorisedView) AppendView(v View) { + vv.views = append(vv.views, v) + vv.size += len(v) +} diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD index 3974c464e..b8b93e78e 100644 --- a/pkg/tcpip/link/channel/BUILD +++ b/pkg/tcpip/link/channel/BUILD @@ -7,6 +7,7 @@ go_library( srcs = ["channel.go"], visibility = ["//visibility:public"], deps = [ + "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", "//pkg/tcpip/stack", diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go index 78d447acd..5944ba190 100644 --- a/pkg/tcpip/link/channel/channel.go +++ b/pkg/tcpip/link/channel/channel.go @@ -20,6 +20,7 @@ package channel import ( "context" + "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/stack" @@ -33,6 +34,118 @@ type PacketInfo struct { Route stack.Route } +// Notification is the interface for receiving notification from the packet +// queue. +type Notification interface { + // WriteNotify will be called when a write happens to the queue. + WriteNotify() +} + +// NotificationHandle is an opaque handle to the registered notification target. +// It can be used to unregister the notification when no longer interested. +// +// +stateify savable +type NotificationHandle struct { + n Notification +} + +type queue struct { + // mu protects fields below. + mu sync.RWMutex + // c is the outbound packet channel. Sending to c should hold mu. + c chan PacketInfo + numWrite int + numRead int + notify []*NotificationHandle +} + +func (q *queue) Close() { + close(q.c) +} + +func (q *queue) Read() (PacketInfo, bool) { + q.mu.Lock() + defer q.mu.Unlock() + select { + case p := <-q.c: + q.numRead++ + return p, true + default: + return PacketInfo{}, false + } +} + +func (q *queue) ReadContext(ctx context.Context) (PacketInfo, bool) { + // We have to receive from channel without holding the lock, since it can + // block indefinitely. This will cause a window that numWrite - numRead + // produces a larger number, but won't go to negative. numWrite >= numRead + // still holds. + select { + case pkt := <-q.c: + q.mu.Lock() + defer q.mu.Unlock() + q.numRead++ + return pkt, true + case <-ctx.Done(): + return PacketInfo{}, false + } +} + +func (q *queue) Write(p PacketInfo) bool { + wrote := false + + // It's important to make sure nobody can see numWrite until we increment it, + // so numWrite >= numRead holds. + q.mu.Lock() + select { + case q.c <- p: + wrote = true + q.numWrite++ + default: + } + notify := q.notify + q.mu.Unlock() + + if wrote { + // Send notification outside of lock. + for _, h := range notify { + h.n.WriteNotify() + } + } + return wrote +} + +func (q *queue) Num() int { + q.mu.RLock() + defer q.mu.RUnlock() + n := q.numWrite - q.numRead + if n < 0 { + panic("numWrite < numRead") + } + return n +} + +func (q *queue) AddNotify(notify Notification) *NotificationHandle { + q.mu.Lock() + defer q.mu.Unlock() + h := &NotificationHandle{n: notify} + q.notify = append(q.notify, h) + return h +} + +func (q *queue) RemoveNotify(handle *NotificationHandle) { + q.mu.Lock() + defer q.mu.Unlock() + // Make a copy, since we reads the array outside of lock when notifying. + notify := make([]*NotificationHandle, 0, len(q.notify)) + for _, h := range q.notify { + if h != handle { + notify = append(notify, h) + } + } + q.notify = notify +} + // Endpoint is link layer endpoint that stores outbound packets in a channel // and allows injection of inbound packets. type Endpoint struct { @@ -41,14 +154,16 @@ type Endpoint struct { linkAddr tcpip.LinkAddress LinkEPCapabilities stack.LinkEndpointCapabilities - // c is where outbound packets are queued. - c chan PacketInfo + // Outbound packet queue. + q *queue } // New creates a new channel endpoint. func New(size int, mtu uint32, linkAddr tcpip.LinkAddress) *Endpoint { return &Endpoint{ - c: make(chan PacketInfo, size), + q: &queue{ + c: make(chan PacketInfo, size), + }, mtu: mtu, linkAddr: linkAddr, } @@ -57,43 +172,36 @@ func New(size int, mtu uint32, linkAddr tcpip.LinkAddress) *Endpoint { // Close closes e. Further packet injections will panic. Reads continue to // succeed until all packets are read. func (e *Endpoint) Close() { - close(e.c) + e.q.Close() } -// Read does non-blocking read for one packet from the outbound packet queue. +// Read does non-blocking read one packet from the outbound packet queue. func (e *Endpoint) Read() (PacketInfo, bool) { - select { - case pkt := <-e.c: - return pkt, true - default: - return PacketInfo{}, false - } + return e.q.Read() } // ReadContext does blocking read for one packet from the outbound packet queue. // It can be cancelled by ctx, and in this case, it returns false. func (e *Endpoint) ReadContext(ctx context.Context) (PacketInfo, bool) { - select { - case pkt := <-e.c: - return pkt, true - case <-ctx.Done(): - return PacketInfo{}, false - } + return e.q.ReadContext(ctx) } // Drain removes all outbound packets from the channel and counts them. func (e *Endpoint) Drain() int { c := 0 for { - select { - case <-e.c: - c++ - default: + if _, ok := e.Read(); !ok { return c } + c++ } } +// NumQueued returns the number of packet queued for outbound. +func (e *Endpoint) NumQueued() int { + return e.q.Num() +} + // InjectInbound injects an inbound packet. func (e *Endpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { e.InjectLinkAddr(protocol, "", pkt) @@ -155,10 +263,7 @@ func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne Route: route, } - select { - case e.c <- p: - default: - } + e.q.Write(p) return nil } @@ -171,7 +276,6 @@ func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts []tcpip.Pac route.Release() payloadView := pkts[0].Data.ToView() n := 0 -packetLoop: for _, pkt := range pkts { off := pkt.DataOffset size := pkt.DataSize @@ -185,12 +289,10 @@ packetLoop: Route: route, } - select { - case e.c <- p: - n++ - default: - break packetLoop + if !e.q.Write(p) { + break } + n++ } return n, nil @@ -204,13 +306,21 @@ func (e *Endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error { GSO: nil, } - select { - case e.c <- p: - default: - } + e.q.Write(p) return nil } // Wait implements stack.LinkEndpoint.Wait. func (*Endpoint) Wait() {} + +// AddNotify adds a notification target for receiving event about outgoing +// packets. +func (e *Endpoint) AddNotify(notify Notification) *NotificationHandle { + return e.q.AddNotify(notify) +} + +// RemoveNotify removes handle from the list of notification targets. +func (e *Endpoint) RemoveNotify(handle *NotificationHandle) { + e.q.RemoveNotify(handle) +} diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index e5096ea38..e0db6cf54 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -4,6 +4,22 @@ package(licenses = ["notice"]) go_library( name = "tun", - srcs = ["tun_unsafe.go"], + srcs = [ + "device.go", + "protocol.go", + "tun_unsafe.go", + ], visibility = ["//visibility:public"], + deps = [ + "//pkg/abi/linux", + "//pkg/refs", + "//pkg/sync", + "//pkg/syserror", + "//pkg/tcpip", + "//pkg/tcpip/buffer", + "//pkg/tcpip/header", + "//pkg/tcpip/link/channel", + "//pkg/tcpip/stack", + "//pkg/waiter", + ], ) diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go new file mode 100644 index 000000000..6ff47a742 --- /dev/null +++ b/pkg/tcpip/link/tun/device.go @@ -0,0 +1,352 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tun + +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/link/channel" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" +) + +const ( + // drivers/net/tun.c:tun_net_init() + defaultDevMtu = 1500 + + // Queue length for outbound packet, arriving at fd side for read. Overflow + // causes packet drops. gVisor implementation-specific. + defaultDevOutQueueLen = 1024 +) + +var zeroMAC [6]byte + +// Device is an opened /dev/net/tun device. +// +// +stateify savable +type Device struct { + waiter.Queue + + mu sync.RWMutex `state:"nosave"` + endpoint *tunEndpoint + notifyHandle *channel.NotificationHandle + flags uint16 +} + +// beforeSave is invoked by stateify. +func (d *Device) beforeSave() { + d.mu.Lock() + defer d.mu.Unlock() + // TODO(b/110961832): Restore the device to stack. At this moment, the stack + // is not savable. + if d.endpoint != nil { + panic("/dev/net/tun does not support save/restore when a device is associated with it.") + } +} + +// Release implements fs.FileOperations.Release. +func (d *Device) Release() { + d.mu.Lock() + defer d.mu.Unlock() + + // Decrease refcount if there is an endpoint associated with this file. + if d.endpoint != nil { + d.endpoint.RemoveNotify(d.notifyHandle) + d.endpoint.DecRef() + d.endpoint = nil + } +} + +// SetIff services TUNSETIFF ioctl(2) request. +func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) error { + d.mu.Lock() + defer d.mu.Unlock() + + if d.endpoint != nil { + return syserror.EINVAL + } + + // Input validations. + isTun := flags&linux.IFF_TUN != 0 + isTap := flags&linux.IFF_TAP != 0 + supportedFlags := uint16(linux.IFF_TUN | linux.IFF_TAP | linux.IFF_NO_PI) + if isTap && isTun || !isTap && !isTun || flags&^supportedFlags != 0 { + return syserror.EINVAL + } + + prefix := "tun" + if isTap { + prefix = "tap" + } + + endpoint, err := attachOrCreateNIC(s, name, prefix) + if err != nil { + return syserror.EINVAL + } + + d.endpoint = endpoint + d.notifyHandle = d.endpoint.AddNotify(d) + d.flags = flags + return nil +} + +func attachOrCreateNIC(s *stack.Stack, name, prefix string) (*tunEndpoint, error) { + for { + // 1. Try to attach to an existing NIC. + if name != "" { + if nic, found := s.GetNICByName(name); found { + endpoint, ok := nic.LinkEndpoint().(*tunEndpoint) + if !ok { + // Not a NIC created by tun device. + return nil, syserror.EOPNOTSUPP + } + if !endpoint.TryIncRef() { + // Race detected: NIC got deleted in between. + continue + } + return endpoint, nil + } + } + + // 2. Creating a new NIC. + id := tcpip.NICID(s.UniqueID()) + endpoint := &tunEndpoint{ + Endpoint: channel.New(defaultDevOutQueueLen, defaultDevMtu, ""), + stack: s, + nicID: id, + name: name, + } + if endpoint.name == "" { + endpoint.name = fmt.Sprintf("%s%d", prefix, id) + } + err := s.CreateNICWithOptions(endpoint.nicID, endpoint, stack.NICOptions{ + Name: endpoint.name, + }) + switch err { + case nil: + return endpoint, nil + case tcpip.ErrDuplicateNICID: + // Race detected: A NIC has been created in between. + continue + default: + return nil, syserror.EINVAL + } + } +} + +// Write inject one inbound packet to the network interface. +func (d *Device) Write(data []byte) (int64, error) { + d.mu.RLock() + endpoint := d.endpoint + d.mu.RUnlock() + if endpoint == nil { + return 0, syserror.EBADFD + } + if !endpoint.IsAttached() { + return 0, syserror.EIO + } + + dataLen := int64(len(data)) + + // Packet information. + var pktInfoHdr PacketInfoHeader + if !d.hasFlags(linux.IFF_NO_PI) { + if len(data) < PacketInfoHeaderSize { + // Ignore bad packet. + return dataLen, nil + } + pktInfoHdr = PacketInfoHeader(data[:PacketInfoHeaderSize]) + data = data[PacketInfoHeaderSize:] + } + + // Ethernet header (TAP only). + var ethHdr header.Ethernet + if d.hasFlags(linux.IFF_TAP) { + if len(data) < header.EthernetMinimumSize { + // Ignore bad packet. + return dataLen, nil + } + ethHdr = header.Ethernet(data[:header.EthernetMinimumSize]) + data = data[header.EthernetMinimumSize:] + } + + // Try to determine network protocol number, default zero. + var protocol tcpip.NetworkProtocolNumber + switch { + case pktInfoHdr != nil: + protocol = pktInfoHdr.Protocol() + case ethHdr != nil: + protocol = ethHdr.Type() + } + + // Try to determine remote link address, default zero. + var remote tcpip.LinkAddress + switch { + case ethHdr != nil: + remote = ethHdr.SourceAddress() + default: + remote = tcpip.LinkAddress(zeroMAC[:]) + } + + pkt := tcpip.PacketBuffer{ + Data: buffer.View(data).ToVectorisedView(), + } + if ethHdr != nil { + pkt.LinkHeader = buffer.View(ethHdr) + } + endpoint.InjectLinkAddr(protocol, remote, pkt) + return dataLen, nil +} + +// Read reads one outgoing packet from the network interface. +func (d *Device) Read() ([]byte, error) { + d.mu.RLock() + endpoint := d.endpoint + d.mu.RUnlock() + if endpoint == nil { + return nil, syserror.EBADFD + } + + for { + info, ok := endpoint.Read() + if !ok { + return nil, syserror.ErrWouldBlock + } + + v, ok := d.encodePkt(&info) + if !ok { + // Ignore unsupported packet. + continue + } + return v, nil + } +} + +// encodePkt encodes packet for fd side. +func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) { + var vv buffer.VectorisedView + + // Packet information. + if !d.hasFlags(linux.IFF_NO_PI) { + hdr := make(PacketInfoHeader, PacketInfoHeaderSize) + hdr.Encode(&PacketInfoFields{ + Protocol: info.Proto, + }) + vv.AppendView(buffer.View(hdr)) + } + + // If the packet does not already have link layer header, and the route + // does not exist, we can't compute it. This is possibly a raw packet, tun + // device doesn't support this at the moment. + if info.Pkt.LinkHeader == nil && info.Route.RemoteLinkAddress == "" { + return nil, false + } + + // Ethernet header (TAP only). + if d.hasFlags(linux.IFF_TAP) { + // Add ethernet header if not provided. + if info.Pkt.LinkHeader == nil { + hdr := &header.EthernetFields{ + SrcAddr: info.Route.LocalLinkAddress, + DstAddr: info.Route.RemoteLinkAddress, + Type: info.Proto, + } + if hdr.SrcAddr == "" { + hdr.SrcAddr = d.endpoint.LinkAddress() + } + + eth := make(header.Ethernet, header.EthernetMinimumSize) + eth.Encode(hdr) + vv.AppendView(buffer.View(eth)) + } else { + vv.AppendView(info.Pkt.LinkHeader) + } + } + + // Append upper headers. + vv.AppendView(buffer.View(info.Pkt.Header.View()[len(info.Pkt.LinkHeader):])) + // Append data payload. + vv.Append(info.Pkt.Data) + + return vv.ToView(), true +} + +// Name returns the name of the attached network interface. Empty string if +// unattached. +func (d *Device) Name() string { + d.mu.RLock() + defer d.mu.RUnlock() + if d.endpoint != nil { + return d.endpoint.name + } + return "" +} + +// Flags returns the flags set for d. Zero value if unset. +func (d *Device) Flags() uint16 { + d.mu.RLock() + defer d.mu.RUnlock() + return d.flags +} + +func (d *Device) hasFlags(flags uint16) bool { + return d.flags&flags == flags +} + +// Readiness implements watier.Waitable.Readiness. +func (d *Device) Readiness(mask waiter.EventMask) waiter.EventMask { + if mask&waiter.EventIn != 0 { + d.mu.RLock() + endpoint := d.endpoint + d.mu.RUnlock() + if endpoint != nil && endpoint.NumQueued() == 0 { + mask &= ^waiter.EventIn + } + } + return mask & (waiter.EventIn | waiter.EventOut) +} + +// WriteNotify implements channel.Notification.WriteNotify. +func (d *Device) WriteNotify() { + d.Notify(waiter.EventIn) +} + +// tunEndpoint is the link endpoint for the NIC created by the tun device. +// +// It is ref-counted as multiple opening files can attach to the same NIC. +// The last owner is responsible for deleting the NIC. +type tunEndpoint struct { + *channel.Endpoint + + refs.AtomicRefCount + + stack *stack.Stack + nicID tcpip.NICID + name string +} + +// DecRef decrements refcount of e, removes NIC if refcount goes to 0. +func (e *tunEndpoint) DecRef() { + e.DecRefWithDestructor(func() { + e.stack.RemoveNIC(e.nicID) + }) +} diff --git a/pkg/tcpip/link/tun/protocol.go b/pkg/tcpip/link/tun/protocol.go new file mode 100644 index 000000000..89d9d91a9 --- /dev/null +++ b/pkg/tcpip/link/tun/protocol.go @@ -0,0 +1,56 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tun + +import ( + "encoding/binary" + + "gvisor.dev/gvisor/pkg/tcpip" +) + +const ( + // PacketInfoHeaderSize is the size of the packet information header. + PacketInfoHeaderSize = 4 + + offsetFlags = 0 + offsetProtocol = 2 +) + +// PacketInfoFields contains fields sent through the wire if IFF_NO_PI flag is +// not set. +type PacketInfoFields struct { + Flags uint16 + Protocol tcpip.NetworkProtocolNumber +} + +// PacketInfoHeader is the wire representation of the packet information sent if +// IFF_NO_PI flag is not set. +type PacketInfoHeader []byte + +// Encode encodes f into h. +func (h PacketInfoHeader) Encode(f *PacketInfoFields) { + binary.BigEndian.PutUint16(h[offsetFlags:][:2], f.Flags) + binary.BigEndian.PutUint16(h[offsetProtocol:][:2], uint16(f.Protocol)) +} + +// Flags returns the flag field in h. +func (h PacketInfoHeader) Flags() uint16 { + return binary.BigEndian.Uint16(h[offsetFlags:]) +} + +// Protocol returns the protocol field in h. +func (h PacketInfoHeader) Protocol() tcpip.NetworkProtocolNumber { + return tcpip.NetworkProtocolNumber(binary.BigEndian.Uint16(h[offsetProtocol:])) +} diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go index 862954ab2..46d3a6646 100644 --- a/pkg/tcpip/stack/nic.go +++ b/pkg/tcpip/stack/nic.go @@ -298,6 +298,33 @@ func (n *NIC) enable() *tcpip.Error { return nil } +// remove detaches NIC from the link endpoint, and marks existing referenced +// network endpoints expired. This guarantees no packets between this NIC and +// the network stack. +func (n *NIC) remove() *tcpip.Error { + n.mu.Lock() + defer n.mu.Unlock() + + // Detach from link endpoint, so no packet comes in. + n.linkEP.Attach(nil) + + // Remove permanent and permanentTentative addresses, so no packet goes out. + var errs []*tcpip.Error + for nid, ref := range n.mu.endpoints { + switch ref.getKind() { + case permanentTentative, permanent: + if err := n.removePermanentAddressLocked(nid.LocalAddress); err != nil { + errs = append(errs, err) + } + } + } + if len(errs) > 0 { + return errs[0] + } + + return nil +} + // becomeIPv6Router transitions n into an IPv6 router. // // When transitioning into an IPv6 router, host-only state (NDP discovered @@ -1302,6 +1329,11 @@ func (n *NIC) Stack() *Stack { return n.stack } +// LinkEndpoint returns the link endpoint of n. +func (n *NIC) LinkEndpoint() LinkEndpoint { + return n.linkEP +} + // isAddrTentative returns true if addr is tentative on n. // // Note that if addr is not associated with n, then this function will return diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index f0ed76fbe..900dd46c5 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -916,6 +916,18 @@ func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) *tcpip.Error { return s.CreateNICWithOptions(id, ep, NICOptions{}) } +// GetNICByName gets the NIC specified by name. +func (s *Stack) GetNICByName(name string) (*NIC, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + for _, nic := range s.nics { + if nic.Name() == name { + return nic, true + } + } + return nil, false +} + // EnableNIC enables the given NIC so that the link-layer endpoint can start // delivering packets to it. func (s *Stack) EnableNIC(id tcpip.NICID) *tcpip.Error { @@ -956,6 +968,33 @@ func (s *Stack) CheckNIC(id tcpip.NICID) bool { return nic.enabled() } +// RemoveNIC removes NIC and all related routes from the network stack. +func (s *Stack) RemoveNIC(id tcpip.NICID) *tcpip.Error { + s.mu.Lock() + defer s.mu.Unlock() + + nic, ok := s.nics[id] + if !ok { + return tcpip.ErrUnknownNICID + } + delete(s.nics, id) + + // Remove routes in-place. n tracks the number of routes written. + n := 0 + for i, r := range s.routeTable { + if r.NIC != id { + // Keep this route. + if i > n { + s.routeTable[n] = r + } + n++ + } + } + s.routeTable = s.routeTable[:n] + + return nic.remove() +} + // NICAddressRanges returns a map of NICIDs to their associated subnets. func (s *Stack) NICAddressRanges() map[tcpip.NICID][]tcpip.Subnet { s.mu.RLock() diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index d1977d4de..3518e862d 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -678,6 +678,8 @@ syscall_test( test = "//test/syscalls/linux:truncate_test", ) +syscall_test(test = "//test/syscalls/linux:tuntap_test") + syscall_test(test = "//test/syscalls/linux:udp_bind_test") syscall_test( diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index aa303af84..704bae17b 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -131,6 +131,17 @@ cc_library( ], ) +cc_library( + name = "socket_netlink_route_util", + testonly = 1, + srcs = ["socket_netlink_route_util.cc"], + hdrs = ["socket_netlink_route_util.h"], + deps = [ + ":socket_netlink_util", + "@com_google_absl//absl/types:optional", + ], +) + cc_library( name = "socket_test_util", testonly = 1, @@ -3430,6 +3441,25 @@ cc_binary( ], ) +cc_binary( + name = "tuntap_test", + testonly = 1, + srcs = ["tuntap.cc"], + linkstatic = 1, + deps = [ + ":socket_test_util", + gtest, + "//test/syscalls/linux:socket_netlink_route_util", + "//test/util:capability_util", + "//test/util:file_descriptor", + "//test/util:fs_util", + "//test/util:posix_error", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + ], +) + cc_library( name = "udp_socket_test_cases", testonly = 1, diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc index 4dd302eed..4e473268c 100644 --- a/test/syscalls/linux/dev.cc +++ b/test/syscalls/linux/dev.cc @@ -153,6 +153,13 @@ TEST(DevTest, TTYExists) { EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); } +TEST(DevTest, NetTunExists) { + struct stat statbuf = {}; + ASSERT_THAT(stat("/dev/net/tun", &statbuf), SyscallSucceeds()); + // Check that it's a character device with rw-rw-rw- permissions. + EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); +} + } // namespace } // namespace testing diff --git a/test/syscalls/linux/socket_netlink_route_util.cc b/test/syscalls/linux/socket_netlink_route_util.cc new file mode 100644 index 000000000..53eb3b6b2 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_route_util.cc @@ -0,0 +1,163 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/syscalls/linux/socket_netlink_route_util.h" + +#include +#include +#include + +#include "absl/types/optional.h" +#include "test/syscalls/linux/socket_netlink_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr uint32_t kSeq = 12345; + +} // namespace + +PosixError DumpLinks( + const FileDescriptor& fd, uint32_t seq, + const std::function& fn) { + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + }; + + struct request req = {}; + req.hdr.nlmsg_len = sizeof(req); + req.hdr.nlmsg_type = RTM_GETLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.hdr.nlmsg_seq = seq; + req.ifm.ifi_family = AF_UNSPEC; + + return NetlinkRequestResponse(fd, &req, sizeof(req), fn, false); +} + +PosixErrorOr> DumpLinks() { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + std::vector links; + RETURN_IF_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { + if (hdr->nlmsg_type != RTM_NEWLINK || + hdr->nlmsg_len < NLMSG_SPACE(sizeof(struct ifinfomsg))) { + return; + } + const struct ifinfomsg* msg = + reinterpret_cast(NLMSG_DATA(hdr)); + const auto* rta = FindRtAttr(hdr, msg, IFLA_IFNAME); + if (rta == nullptr) { + // Ignore links that do not have a name. + return; + } + + links.emplace_back(); + links.back().index = msg->ifi_index; + links.back().type = msg->ifi_type; + links.back().name = + std::string(reinterpret_cast(RTA_DATA(rta))); + })); + return links; +} + +PosixErrorOr> FindLoopbackLink() { + ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); + for (const auto& link : links) { + if (link.type == ARPHRD_LOOPBACK) { + return absl::optional(link); + } + } + return absl::optional(); +} + +PosixError LinkAddLocalAddr(int index, int family, int prefixlen, + const void* addr, int addrlen) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifaddrmsg ifaddr; + char attrbuf[512]; + }; + + struct request req = {}; + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifaddr)); + req.hdr.nlmsg_type = RTM_NEWADDR; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = kSeq; + req.ifaddr.ifa_index = index; + req.ifaddr.ifa_family = family; + req.ifaddr.ifa_prefixlen = prefixlen; + + struct rtattr* rta = reinterpret_cast( + reinterpret_cast(&req) + NLMSG_ALIGN(req.hdr.nlmsg_len)); + rta->rta_type = IFA_LOCAL; + rta->rta_len = RTA_LENGTH(addrlen); + req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + RTA_LENGTH(addrlen); + memcpy(RTA_DATA(rta), addr, addrlen); + + return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len); +} + +PosixError LinkChangeFlags(int index, unsigned int flags, unsigned int change) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifinfo; + char pad[NLMSG_ALIGNTO]; + }; + + struct request req = {}; + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo)); + req.hdr.nlmsg_type = RTM_NEWLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = kSeq; + req.ifinfo.ifi_index = index; + req.ifinfo.ifi_flags = flags; + req.ifinfo.ifi_change = change; + + return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len); +} + +PosixError LinkSetMacAddr(int index, const void* addr, int addrlen) { + ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + + struct request { + struct nlmsghdr hdr; + struct ifinfomsg ifinfo; + char attrbuf[512]; + }; + + struct request req = {}; + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo)); + req.hdr.nlmsg_type = RTM_NEWLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = kSeq; + req.ifinfo.ifi_index = index; + + struct rtattr* rta = reinterpret_cast( + reinterpret_cast(&req) + NLMSG_ALIGN(req.hdr.nlmsg_len)); + rta->rta_type = IFLA_ADDRESS; + rta->rta_len = RTA_LENGTH(addrlen); + req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) + RTA_LENGTH(addrlen); + memcpy(RTA_DATA(rta), addr, addrlen); + + return NetlinkRequestAckOrError(fd, kSeq, &req, req.hdr.nlmsg_len); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/socket_netlink_route_util.h b/test/syscalls/linux/socket_netlink_route_util.h new file mode 100644 index 000000000..2c018e487 --- /dev/null +++ b/test/syscalls/linux/socket_netlink_route_util.h @@ -0,0 +1,55 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_ +#define GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_ + +#include +#include + +#include + +#include "absl/types/optional.h" +#include "test/syscalls/linux/socket_netlink_util.h" + +namespace gvisor { +namespace testing { + +struct Link { + int index; + int16_t type; + std::string name; +}; + +PosixError DumpLinks(const FileDescriptor& fd, uint32_t seq, + const std::function& fn); + +PosixErrorOr> DumpLinks(); + +PosixErrorOr> FindLoopbackLink(); + +// LinkAddLocalAddr sets IFA_LOCAL attribute on the interface. +PosixError LinkAddLocalAddr(int index, int family, int prefixlen, + const void* addr, int addrlen); + +// LinkChangeFlags changes interface flags. E.g. IFF_UP. +PosixError LinkChangeFlags(int index, unsigned int flags, unsigned int change); + +// LinkSetMacAddr sets IFLA_ADDRESS attribute of the interface. +PosixError LinkSetMacAddr(int index, const void* addr, int addrlen); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_SYSCALLS_LINUX_SOCKET_NETLINK_ROUTE_UTIL_H_ diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc new file mode 100644 index 000000000..f6ac9d7b8 --- /dev/null +++ b/test/syscalls/linux/tuntap.cc @@ -0,0 +1,346 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_split.h" +#include "test/syscalls/linux/socket_netlink_route_util.h" +#include "test/syscalls/linux/socket_test_util.h" +#include "test/util/capability_util.h" +#include "test/util/file_descriptor.h" +#include "test/util/fs_util.h" +#include "test/util/posix_error.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { +namespace { + +constexpr int kIPLen = 4; + +constexpr const char kDevNetTun[] = "/dev/net/tun"; +constexpr const char kTapName[] = "tap0"; + +constexpr const uint8_t kMacA[ETH_ALEN] = {0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA}; +constexpr const uint8_t kMacB[ETH_ALEN] = {0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB}; + +PosixErrorOr> DumpLinkNames() { + ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); + std::set names; + for (const auto& link : links) { + names.emplace(link.name); + } + return names; +} + +PosixErrorOr> GetLinkByName(const std::string& name) { + ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks()); + for (const auto& link : links) { + if (link.name == name) { + return absl::optional(link); + } + } + return absl::optional(); +} + +struct pihdr { + uint16_t pi_flags; + uint16_t pi_protocol; +} __attribute__((packed)); + +struct ping_pkt { + pihdr pi; + struct ethhdr eth; + struct iphdr ip; + struct icmphdr icmp; + char payload[64]; +} __attribute__((packed)); + +ping_pkt CreatePingPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip, + const uint8_t dstmac[ETH_ALEN], const char* dstip) { + ping_pkt pkt = {}; + + pkt.pi.pi_protocol = htons(ETH_P_IP); + + memcpy(pkt.eth.h_dest, dstmac, sizeof(pkt.eth.h_dest)); + memcpy(pkt.eth.h_source, srcmac, sizeof(pkt.eth.h_source)); + pkt.eth.h_proto = htons(ETH_P_IP); + + pkt.ip.ihl = 5; + pkt.ip.version = 4; + pkt.ip.tos = 0; + pkt.ip.tot_len = htons(sizeof(struct iphdr) + sizeof(struct icmphdr) + + sizeof(pkt.payload)); + pkt.ip.id = 1; + pkt.ip.frag_off = 1 << 6; // Do not fragment + pkt.ip.ttl = 64; + pkt.ip.protocol = IPPROTO_ICMP; + inet_pton(AF_INET, dstip, &pkt.ip.daddr); + inet_pton(AF_INET, srcip, &pkt.ip.saddr); + pkt.ip.check = IPChecksum(pkt.ip); + + pkt.icmp.type = ICMP_ECHO; + pkt.icmp.code = 0; + pkt.icmp.checksum = 0; + pkt.icmp.un.echo.sequence = 1; + pkt.icmp.un.echo.id = 1; + + strncpy(pkt.payload, "abcd", sizeof(pkt.payload)); + pkt.icmp.checksum = ICMPChecksum(pkt.icmp, pkt.payload, sizeof(pkt.payload)); + + return pkt; +} + +struct arp_pkt { + pihdr pi; + struct ethhdr eth; + struct arphdr arp; + uint8_t arp_sha[ETH_ALEN]; + uint8_t arp_spa[kIPLen]; + uint8_t arp_tha[ETH_ALEN]; + uint8_t arp_tpa[kIPLen]; +} __attribute__((packed)); + +std::string CreateArpPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip, + const uint8_t dstmac[ETH_ALEN], const char* dstip) { + std::string buffer; + buffer.resize(sizeof(arp_pkt)); + + arp_pkt* pkt = reinterpret_cast(&buffer[0]); + { + pkt->pi.pi_protocol = htons(ETH_P_ARP); + + memcpy(pkt->eth.h_dest, kMacA, sizeof(pkt->eth.h_dest)); + memcpy(pkt->eth.h_source, kMacB, sizeof(pkt->eth.h_source)); + pkt->eth.h_proto = htons(ETH_P_ARP); + + pkt->arp.ar_hrd = htons(ARPHRD_ETHER); + pkt->arp.ar_pro = htons(ETH_P_IP); + pkt->arp.ar_hln = ETH_ALEN; + pkt->arp.ar_pln = kIPLen; + pkt->arp.ar_op = htons(ARPOP_REPLY); + + memcpy(pkt->arp_sha, srcmac, sizeof(pkt->arp_sha)); + inet_pton(AF_INET, srcip, pkt->arp_spa); + memcpy(pkt->arp_tha, dstmac, sizeof(pkt->arp_tha)); + inet_pton(AF_INET, dstip, pkt->arp_tpa); + } + return buffer; +} + +} // namespace + +class TuntapTest : public ::testing::Test { + protected: + void TearDown() override { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))) { + // Bring back capability if we had dropped it in test case. + ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, true)); + } + } +}; + +TEST_F(TuntapTest, CreateInterfaceNoCap) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + ASSERT_NO_ERRNO(SetCapability(CAP_NET_ADMIN, false)); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr = {}; + ifr.ifr_flags = IFF_TAP; + strncpy(ifr.ifr_name, kTapName, IFNAMSIZ); + + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallFailsWithErrno(EPERM)); +} + +TEST_F(TuntapTest, CreateFixedNameInterface) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr_set = {}; + ifr_set.ifr_flags = IFF_TAP; + strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ); + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set), + SyscallSucceedsWithValue(0)); + + struct ifreq ifr_get = {}; + EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get), + SyscallSucceedsWithValue(0)); + + struct ifreq ifr_expect = ifr_set; + // See __tun_chr_ioctl() in net/drivers/tun.c. + ifr_expect.ifr_flags |= IFF_NOFILTER; + + EXPECT_THAT(DumpLinkNames(), + IsPosixErrorOkAndHolds(::testing::Contains(kTapName))); + EXPECT_THAT(memcmp(&ifr_expect, &ifr_get, sizeof(ifr_get)), ::testing::Eq(0)); +} + +TEST_F(TuntapTest, CreateInterface) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr = {}; + ifr.ifr_flags = IFF_TAP; + // Empty ifr.ifr_name. Let kernel assign. + + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0)); + + struct ifreq ifr_get = {}; + EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get), + SyscallSucceedsWithValue(0)); + + std::string ifname = ifr_get.ifr_name; + EXPECT_THAT(ifname, ::testing::StartsWith("tap")); + EXPECT_THAT(DumpLinkNames(), + IsPosixErrorOkAndHolds(::testing::Contains(ifname))); +} + +TEST_F(TuntapTest, InvalidReadWrite) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + char buf[128] = {}; + EXPECT_THAT(read(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD)); + EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD)); +} + +TEST_F(TuntapTest, WriteToDownDevice) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + // FIXME: gVisor always creates enabled/up'd interfaces. + SKIP_IF(IsRunningOnGvisor()); + + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + // Device created should be down by default. + struct ifreq ifr = {}; + ifr.ifr_flags = IFF_TAP; + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0)); + + char buf[128] = {}; + EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EIO)); +} + +// This test sets up a TAP device and pings kernel by sending ICMP echo request. +// +// It works as the following: +// * Open /dev/net/tun, and create kTapName interface. +// * Use rtnetlink to do initial setup of the interface: +// * Assign IP address 10.0.0.1/24 to kernel. +// * MAC address: kMacA +// * Bring up the interface. +// * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel. +// * Loop to receive packets from TAP device/fd: +// * If packet is an ICMP echo reply, it stops and passes the test. +// * If packet is an ARP request, it responds with canned reply and resends +// the +// ICMP request packet. +TEST_F(TuntapTest, PingKernel) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + + // Interface creation. + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); + + struct ifreq ifr_set = {}; + ifr_set.ifr_flags = IFF_TAP; + strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ); + EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set), + SyscallSucceedsWithValue(0)); + + absl::optional link = + ASSERT_NO_ERRNO_AND_VALUE(GetLinkByName(kTapName)); + ASSERT_TRUE(link.has_value()); + + // Interface setup. + struct in_addr addr; + inet_pton(AF_INET, "10.0.0.1", &addr); + EXPECT_NO_ERRNO(LinkAddLocalAddr(link->index, AF_INET, /*prefixlen=*/24, + &addr, sizeof(addr))); + + if (!IsRunningOnGvisor()) { + // FIXME: gVisor doesn't support setting MAC address on interfaces yet. + EXPECT_NO_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA))); + + // FIXME: gVisor always creates enabled/up'd interfaces. + EXPECT_NO_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP)); + } + + ping_pkt ping_req = CreatePingPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1"); + std::string arp_rep = CreateArpPacket(kMacB, "10.0.0.2", kMacA, "10.0.0.1"); + + // Send ping, this would trigger an ARP request on Linux. + EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)), + SyscallSucceedsWithValue(sizeof(ping_req))); + + // Receive loop to process inbound packets. + struct inpkt { + union { + pihdr pi; + ping_pkt ping; + arp_pkt arp; + }; + }; + while (1) { + inpkt r = {}; + int n = read(fd.get(), &r, sizeof(r)); + EXPECT_THAT(n, SyscallSucceeds()); + + if (n < sizeof(pihdr)) { + std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol + << " len: " << n << std::endl; + continue; + } + + // Process ARP packet. + if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) { + // Respond with canned ARP reply. + EXPECT_THAT(write(fd.get(), arp_rep.data(), arp_rep.size()), + SyscallSucceedsWithValue(arp_rep.size())); + // First ping request might have been dropped due to mac address not in + // ARP cache. Send it again. + EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)), + SyscallSucceedsWithValue(sizeof(ping_req))); + } + + // Process ping response packet. + if (n >= sizeof(ping_pkt) && r.pi.pi_protocol == ping_req.pi.pi_protocol && + r.ping.ip.protocol == ping_req.ip.protocol && + !memcmp(&r.ping.ip.saddr, &ping_req.ip.daddr, kIPLen) && + !memcmp(&r.ping.ip.daddr, &ping_req.ip.saddr, kIPLen) && + r.ping.icmp.type == 0 && r.ping.icmp.code == 0) { + // Ends and passes the test. + break; + } + } +} + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From 53504e29ca27b8dc9e098fbb88983fdbce90cca3 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 25 Feb 2020 12:16:43 -0800 Subject: Fix mount refcount issue. Each mount is holds a reference on a root Dirent, but the mount itself may live beyond it's own reference. This means that a call to Root() can come after the associated reference has been dropped. Instead of introducing a separate layer of references for mount objects, we simply change the Root() method to use TryIncRef() and allow it to return nil if the mount is already gone. This requires updating a small number of callers and minimizes the change (since VFSv2 will replace this code shortly). PiperOrigin-RevId: 297174230 --- pkg/sentry/fs/mount_test.go | 11 ++++++----- pkg/sentry/fs/mounts.go | 10 +++++++--- pkg/sentry/fs/proc/mounts.go | 16 ++++++++++++---- 3 files changed, 25 insertions(+), 12 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go index e672a438c..a3d10770b 100644 --- a/pkg/sentry/fs/mount_test.go +++ b/pkg/sentry/fs/mount_test.go @@ -36,11 +36,12 @@ func mountPathsAre(root *Dirent, got []*Mount, want ...string) error { gotPaths := make(map[string]struct{}, len(got)) gotStr := make([]string, len(got)) for i, g := range got { - groot := g.Root() - name, _ := groot.FullName(root) - groot.DecRef() - gotStr[i] = name - gotPaths[name] = struct{}{} + if groot := g.Root(); groot != nil { + name, _ := groot.FullName(root) + groot.DecRef() + gotStr[i] = name + gotPaths[name] = struct{}{} + } } if len(got) != len(want) { return fmt.Errorf("mount paths are different, got: %q, want: %q", gotStr, want) diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index 574a2cc91..c7981f66e 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -100,10 +100,14 @@ func newUndoMount(d *Dirent) *Mount { } } -// Root returns the root dirent of this mount. Callers must call DecRef on the -// returned dirent. +// Root returns the root dirent of this mount. +// +// This may return nil if the mount has already been free. Callers must handle this +// case appropriately. If non-nil, callers must call DecRef on the returned *Dirent. func (m *Mount) Root() *Dirent { - m.root.IncRef() + if !m.root.TryIncRef() { + return nil + } return m.root } diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go index c10888100..94deb553b 100644 --- a/pkg/sentry/fs/proc/mounts.go +++ b/pkg/sentry/fs/proc/mounts.go @@ -60,13 +60,15 @@ func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) { }) for _, m := range ms { mroot := m.Root() + if mroot == nil { + continue // No longer valid. + } mountPath, desc := mroot.FullName(rootDir) mroot.DecRef() if !desc { // MountSources that are not descendants of the chroot jail are ignored. continue } - fn(mountPath, m) } } @@ -91,6 +93,12 @@ func (mif *mountInfoFile) ReadSeqFileData(ctx context.Context, handle seqfile.Se var buf bytes.Buffer forEachMount(mif.t, func(mountPath string, m *fs.Mount) { + mroot := m.Root() + if mroot == nil { + return // No longer valid. + } + defer mroot.DecRef() + // Format: // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue // (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) @@ -107,9 +115,6 @@ func (mif *mountInfoFile) ReadSeqFileData(ctx context.Context, handle seqfile.Se // (3) Major:Minor device ID. We don't have a superblock, so we // just use the root inode device number. - mroot := m.Root() - defer mroot.DecRef() - sa := mroot.Inode.StableAttr fmt.Fprintf(&buf, "%d:%d ", sa.DeviceFileMajor, sa.DeviceFileMinor) @@ -207,6 +212,9 @@ func (mf *mountsFile) ReadSeqFileData(ctx context.Context, handle seqfile.SeqHan // // The "needs dump"and fsck flags are always 0, which is allowed. root := m.Root() + if root == nil { + return // No longer valid. + } defer root.DecRef() flags := root.Inode.MountSource.Flags -- cgit v1.2.3 From fba479b3c78621cb122af20d1d677fe9193a971c Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 25 Feb 2020 19:03:23 -0800 Subject: Fix DATA RACE in fs.MayDelete. MayDelete must lock the directory also, otherwise concurrent renames may race. Note that this also changes the methods to be aligned with the actual Remove and RemoveDirectory methods to minimize confusion when reading the code. (It was hard to see that resolution was correct.) PiperOrigin-RevId: 297258304 --- pkg/sentry/fs/dirent.go | 25 ++++++++++++++----------- pkg/sentry/syscalls/linux/sys_file.go | 4 ++-- 2 files changed, 16 insertions(+), 13 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index acab0411a..e0b32e1c1 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -1438,8 +1438,8 @@ func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName }, nil } -func checkSticky(ctx context.Context, dir *Dirent, victim *Dirent) error { - uattr, err := dir.Inode.UnstableAttr(ctx) +func (d *Dirent) checkSticky(ctx context.Context, victim *Dirent) error { + uattr, err := d.Inode.UnstableAttr(ctx) if err != nil { return syserror.EPERM } @@ -1465,30 +1465,33 @@ func checkSticky(ctx context.Context, dir *Dirent, victim *Dirent) error { return syserror.EPERM } -// MayDelete determines whether `name`, a child of `dir`, can be deleted or +// MayDelete determines whether `name`, a child of `d`, can be deleted or // renamed by `ctx`. // // Compare Linux kernel fs/namei.c:may_delete. -func MayDelete(ctx context.Context, root, dir *Dirent, name string) error { - if err := dir.Inode.CheckPermission(ctx, PermMask{Write: true, Execute: true}); err != nil { +func (d *Dirent) MayDelete(ctx context.Context, root *Dirent, name string) error { + if err := d.Inode.CheckPermission(ctx, PermMask{Write: true, Execute: true}); err != nil { return err } - victim, err := dir.Walk(ctx, root, name) + unlock := d.lockDirectory() + defer unlock() + + victim, err := d.walk(ctx, root, name, true /* may unlock */) if err != nil { return err } defer victim.DecRef() - return mayDelete(ctx, dir, victim) + return d.mayDelete(ctx, victim) } // mayDelete determines whether `victim`, a child of `dir`, can be deleted or // renamed by `ctx`. // // Preconditions: `dir` is writable and executable by `ctx`. -func mayDelete(ctx context.Context, dir, victim *Dirent) error { - if err := checkSticky(ctx, dir, victim); err != nil { +func (d *Dirent) mayDelete(ctx context.Context, victim *Dirent) error { + if err := d.checkSticky(ctx, victim); err != nil { return err } @@ -1542,7 +1545,7 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string defer renamed.DecRef() // Check that the renamed dirent is deletable. - if err := mayDelete(ctx, oldParent, renamed); err != nil { + if err := oldParent.mayDelete(ctx, renamed); err != nil { return err } @@ -1580,7 +1583,7 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string // across the Rename, so must call DecRef manually (no defer). // Check that we can delete replaced. - if err := mayDelete(ctx, newParent, replaced); err != nil { + if err := newParent.mayDelete(ctx, replaced); err != nil { replaced.DecRef() return err } diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index c21f14dc0..d10a9bed8 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -1236,7 +1236,7 @@ func rmdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error { return syserror.ENOTEMPTY } - if err := fs.MayDelete(t, root, d, name); err != nil { + if err := d.MayDelete(t, root, name); err != nil { return err } @@ -1517,7 +1517,7 @@ func unlinkAt(t *kernel.Task, dirFD int32, addr usermem.Addr) error { return syserror.ENOTDIR } - if err := fs.MayDelete(t, root, d, name); err != nil { + if err := d.MayDelete(t, root, name); err != nil { return err } -- cgit v1.2.3 From 6b4d36e3253238dd72d0861ac1220d147e1de8dd Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Fri, 28 Feb 2020 10:37:52 -0800 Subject: Hide /dev/net/tun when using hostinet. /dev/net/tun does not currently work with hostinet. This has caused some program starts failing because it thinks the feature exists. PiperOrigin-RevId: 297876196 --- pkg/sentry/fs/dev/BUILD | 1 + pkg/sentry/fs/dev/dev.go | 7 +++++-- pkg/sentry/fs/dev/net_tun.go | 7 +++++++ pkg/sentry/kernel/kernel.go | 4 ++++ test/syscalls/BUILD | 5 +++++ test/syscalls/linux/BUILD | 12 +++++++++++ test/syscalls/linux/dev.cc | 7 ------- test/syscalls/linux/tuntap.cc | 7 +++++++ test/syscalls/linux/tuntap_hostinet.cc | 37 ++++++++++++++++++++++++++++++++++ 9 files changed, 78 insertions(+), 9 deletions(-) create mode 100644 test/syscalls/linux/tuntap_hostinet.cc (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/dev/BUILD b/pkg/sentry/fs/dev/BUILD index 9b6bb26d0..9379a4d7b 100644 --- a/pkg/sentry/fs/dev/BUILD +++ b/pkg/sentry/fs/dev/BUILD @@ -26,6 +26,7 @@ go_library( "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/ramfs", "//pkg/sentry/fs/tmpfs", + "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/memmap", "//pkg/sentry/mm", diff --git a/pkg/sentry/fs/dev/dev.go b/pkg/sentry/fs/dev/dev.go index 7e66c29b0..acbd401a0 100644 --- a/pkg/sentry/fs/dev/dev.go +++ b/pkg/sentry/fs/dev/dev.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/usermem" ) @@ -124,10 +125,12 @@ func New(ctx context.Context, msrc *fs.MountSource) *fs.Inode { "ptmx": newSymlink(ctx, "pts/ptmx", msrc), "tty": newCharacterDevice(ctx, newTTYDevice(ctx, fs.RootOwner, 0666), msrc, ttyDevMajor, ttyDevMinor), + } - "net": newDirectory(ctx, map[string]*fs.Inode{ + if isNetTunSupported(inet.StackFromContext(ctx)) { + contents["net"] = newDirectory(ctx, map[string]*fs.Inode{ "tun": newCharacterDevice(ctx, newNetTunDevice(ctx, fs.RootOwner, 0666), msrc, netTunDevMajor, netTunDevMinor), - }, msrc), + }, msrc) } iops := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go index 755644488..dc7ad075a 100644 --- a/pkg/sentry/fs/dev/net_tun.go +++ b/pkg/sentry/fs/dev/net_tun.go @@ -20,6 +20,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/syserror" @@ -168,3 +169,9 @@ func (fops *netTunFileOperations) EventRegister(e *waiter.Entry, mask waiter.Eve func (fops *netTunFileOperations) EventUnregister(e *waiter.Entry) { fops.device.EventUnregister(e) } + +// isNetTunSupported returns whether /dev/net/tun device is supported for s. +func isNetTunSupported(s inet.Stack) bool { + _, ok := s.(*netstack.Stack) + return ok +} diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 8b76750e9..1d627564f 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -755,6 +755,8 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { return ctx.k.GlobalInit().Leader().MountNamespaceVFS2() case fs.CtxDirentCacheLimiter: return ctx.k.DirentCacheLimiter + case inet.CtxStack: + return ctx.k.RootNetworkNamespace().Stack() case ktime.CtxRealtimeClock: return ctx.k.RealtimeClock() case limits.CtxLimits: @@ -1481,6 +1483,8 @@ func (ctx supervisorContext) Value(key interface{}) interface{} { return ctx.k.GlobalInit().Leader().MountNamespaceVFS2() case fs.CtxDirentCacheLimiter: return ctx.k.DirentCacheLimiter + case inet.CtxStack: + return ctx.k.RootNetworkNamespace().Stack() case ktime.CtxRealtimeClock: return ctx.k.RealtimeClock() case limits.CtxLimits: diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 3518e862d..a69b0ce13 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -680,6 +680,11 @@ syscall_test( syscall_test(test = "//test/syscalls/linux:tuntap_test") +syscall_test( + add_hostinet = True, + test = "//test/syscalls/linux:tuntap_hostinet_test", +) + syscall_test(test = "//test/syscalls/linux:udp_bind_test") syscall_test( diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 704bae17b..70c120e42 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -3460,6 +3460,18 @@ cc_binary( ], ) +cc_binary( + name = "tuntap_hostinet_test", + testonly = 1, + srcs = ["tuntap_hostinet.cc"], + linkstatic = 1, + deps = [ + gtest, + "//test/util:test_main", + "//test/util:test_util", + ], +) + cc_library( name = "udp_socket_test_cases", testonly = 1, diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc index 4e473268c..4dd302eed 100644 --- a/test/syscalls/linux/dev.cc +++ b/test/syscalls/linux/dev.cc @@ -153,13 +153,6 @@ TEST(DevTest, TTYExists) { EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); } -TEST(DevTest, NetTunExists) { - struct stat statbuf = {}; - ASSERT_THAT(stat("/dev/net/tun", &statbuf), SyscallSucceeds()); - // Check that it's a character device with rw-rw-rw- permissions. - EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); -} - } // namespace } // namespace testing diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc index f6ac9d7b8..f734511d6 100644 --- a/test/syscalls/linux/tuntap.cc +++ b/test/syscalls/linux/tuntap.cc @@ -153,6 +153,13 @@ std::string CreateArpPacket(const uint8_t srcmac[ETH_ALEN], const char* srcip, } // namespace +TEST(TuntapStaticTest, NetTunExists) { + struct stat statbuf; + ASSERT_THAT(stat(kDevNetTun, &statbuf), SyscallSucceeds()); + // Check that it's a character device with rw-rw-rw- permissions. + EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666); +} + class TuntapTest : public ::testing::Test { protected: void TearDown() override { diff --git a/test/syscalls/linux/tuntap_hostinet.cc b/test/syscalls/linux/tuntap_hostinet.cc new file mode 100644 index 000000000..0c527419e --- /dev/null +++ b/test/syscalls/linux/tuntap_hostinet.cc @@ -0,0 +1,37 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +TEST(TuntapHostInetTest, NoNetTun) { + SKIP_IF(!IsRunningOnGvisor()); + + struct stat statbuf; + ASSERT_THAT(stat("/dev/net/tun", &statbuf), SyscallFailsWithErrno(ENOENT)); +} + +} // namespace +} // namespace testing + +} // namespace gvisor -- cgit v1.2.3 From 62bd3ca8a37543c060292ca86669e537b9ebf36d Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Mon, 2 Mar 2020 10:05:52 -0800 Subject: Take write lock when removing xattr PiperOrigin-RevId: 298380654 --- pkg/sentry/fs/fsutil/inode.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go index daecc4ffe..1922ff08c 100644 --- a/pkg/sentry/fs/fsutil/inode.go +++ b/pkg/sentry/fs/fsutil/inode.go @@ -259,8 +259,8 @@ func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode, ui // RemoveXattr implements fs.InodeOperations.RemoveXattr. func (i *InodeSimpleExtendedAttributes) RemoveXattr(_ context.Context, _ *fs.Inode, name string) error { - i.mu.RLock() - defer i.mu.RUnlock() + i.mu.Lock() + defer i.mu.Unlock() if _, ok := i.xattrs[name]; ok { delete(i.xattrs, name) return nil -- cgit v1.2.3 From da48fc6cca23a38faef51c5b5f8ae609940773a0 Mon Sep 17 00:00:00 2001 From: Ian Lewis Date: Thu, 5 Mar 2020 18:21:39 -0800 Subject: Stub oom_score_adj and oom_score. Adds an oom_score_adj and oom_score proc file stub. oom_score_adj accepts writes of values -1000 to 1000 and persists the value with the task. New tasks inherit the parent's oom_score_adj. oom_score is a read-only stub that always returns the value '0'. Issue #202 PiperOrigin-RevId: 299245355 --- pkg/sentry/fs/proc/task.go | 126 ++++++++++++++++++++++++++----- pkg/sentry/fsimpl/proc/task.go | 12 +-- pkg/sentry/fsimpl/proc/task_files.go | 43 +++++++++++ pkg/sentry/fsimpl/proc/tasks_test.go | 32 ++++---- pkg/sentry/kernel/task.go | 33 ++++++++ pkg/sentry/kernel/task_clone.go | 6 ++ pkg/sentry/kernel/task_start.go | 4 + test/syscalls/BUILD | 8 +- test/syscalls/linux/BUILD | 13 ++++ test/syscalls/linux/proc.cc | 21 ++++++ test/syscalls/linux/proc_pid_oomscore.cc | 72 ++++++++++++++++++ 11 files changed, 330 insertions(+), 40 deletions(-) create mode 100644 test/syscalls/linux/proc_pid_oomscore.cc (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 8ab8d8a02..4e9b0fc00 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -72,24 +72,26 @@ var _ fs.InodeOperations = (*taskDir)(nil) // newTaskDir creates a new proc task entry. func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode { contents := map[string]*fs.Inode{ - "auxv": newAuxvec(t, msrc), - "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), - "comm": newComm(t, msrc), - "environ": newExecArgInode(t, msrc, environExecArg), - "exe": newExe(t, msrc), - "fd": newFdDir(t, msrc), - "fdinfo": newFdInfoDir(t, msrc), - "gid_map": newGIDMap(t, msrc), - "io": newIO(t, msrc, isThreadGroup), - "maps": newMaps(t, msrc), - "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), - "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), - "ns": newNamespaceDir(t, msrc), - "smaps": newSmaps(t, msrc), - "stat": newTaskStat(t, msrc, isThreadGroup, p.pidns), - "statm": newStatm(t, msrc), - "status": newStatus(t, msrc, p.pidns), - "uid_map": newUIDMap(t, msrc), + "auxv": newAuxvec(t, msrc), + "cmdline": newExecArgInode(t, msrc, cmdlineExecArg), + "comm": newComm(t, msrc), + "environ": newExecArgInode(t, msrc, environExecArg), + "exe": newExe(t, msrc), + "fd": newFdDir(t, msrc), + "fdinfo": newFdInfoDir(t, msrc), + "gid_map": newGIDMap(t, msrc), + "io": newIO(t, msrc, isThreadGroup), + "maps": newMaps(t, msrc), + "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), + "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), + "ns": newNamespaceDir(t, msrc), + "oom_score": newOOMScore(t, msrc), + "oom_score_adj": newOOMScoreAdj(t, msrc), + "smaps": newSmaps(t, msrc), + "stat": newTaskStat(t, msrc, isThreadGroup, p.pidns), + "statm": newStatm(t, msrc), + "status": newStatus(t, msrc, p.pidns), + "uid_map": newUIDMap(t, msrc), } if isThreadGroup { contents["task"] = p.newSubtasks(t, msrc) @@ -796,4 +798,92 @@ func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequenc return int64(n), err } +// newOOMScore returns a oom_score file. It is a stub that always returns 0. +// TODO(gvisor.dev/issue/1967) +func newOOMScore(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + return newStaticProcInode(t, msrc, []byte("0\n")) +} + +// oomScoreAdj is a file containing the oom_score adjustment for a task. +// +// +stateify savable +type oomScoreAdj struct { + fsutil.SimpleFileInode + + t *kernel.Task +} + +// +stateify savable +type oomScoreAdjFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + t *kernel.Task +} + +// newOOMScoreAdj returns a oom_score_adj file. +func newOOMScoreAdj(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + i := &oomScoreAdj{ + SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), + t: t, + } + return newProcInode(t, i, msrc, fs.SpecialFile, t) +} + +// Truncate implements fs.InodeOperations.Truncate. Truncate is called when +// O_TRUNC is specified for any kind of existing Dirent but is not called via +// (f)truncate for proc files. +func (*oomScoreAdj) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + +// GetFile implements fs.InodeOperations.GetFile. +func (o *oomScoreAdj) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + return fs.NewFile(ctx, dirent, flags, &oomScoreAdjFile{t: o.t}), nil +} + +// Read implements fs.FileOperations.Read. +func (f *oomScoreAdjFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + adj, err := f.t.OOMScoreAdj() + if err != nil { + return 0, err + } + adjBytes := []byte(strconv.FormatInt(int64(adj), 10) + "\n") + n, err := dst.CopyOut(ctx, adjBytes) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit input size so as not to impact performance if input size is large. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + + if err := f.t.SetOOMScoreAdj(v); err != nil { + return 0, err + } + + return n, nil +} + // LINT.ThenChange(../../fsimpl/proc/task.go|../../fsimpl/proc/task_files.go) diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 2d814668a..18e5cd6f6 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -62,11 +62,13 @@ func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNames "pid": newNamespaceSymlink(task, inoGen.NextIno(), "pid"), "user": newNamespaceSymlink(task, inoGen.NextIno(), "user"), }), - "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}), - "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), - "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{task: task}), - "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{task: task, pidns: pidns}), - "uid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: false}), + "oom_score": newTaskOwnedFile(task, inoGen.NextIno(), 0444, newStaticFile("0\n")), + "oom_score_adj": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &oomScoreAdj{task: task}), + "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}), + "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), + "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{task: task}), + "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{task: task, pidns: pidns}), + "uid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: false}), } if isThreadGroup { contents["task"] = newSubtasks(task, pidns, inoGen, cgroupControllers) diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index efd3b3453..5a231ac86 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -525,3 +525,46 @@ func (i *ioData) Generate(ctx context.Context, buf *bytes.Buffer) error { fmt.Fprintf(buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled) return nil } + +// oomScoreAdj is a stub of the /proc//oom_score_adj file. +// +// +stateify savable +type oomScoreAdj struct { + kernfs.DynamicBytesFile + + task *kernel.Task +} + +var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error { + adj, err := o.task.OOMScoreAdj() + if err != nil { + return err + } + fmt.Fprintf(buf, "%d\n", adj) + return nil +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit input size so as not to impact performance if input size is large. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + + if err := o.task.SetOOMScoreAdj(v); err != nil { + return 0, err + } + + return n, nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go index c5d531fe0..0eb401619 100644 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_test.go @@ -63,21 +63,23 @@ var ( "thread-self": threadSelfLink.NextOff, } taskStaticFiles = map[string]testutil.DirentType{ - "auxv": linux.DT_REG, - "cgroup": linux.DT_REG, - "cmdline": linux.DT_REG, - "comm": linux.DT_REG, - "environ": linux.DT_REG, - "gid_map": linux.DT_REG, - "io": linux.DT_REG, - "maps": linux.DT_REG, - "ns": linux.DT_DIR, - "smaps": linux.DT_REG, - "stat": linux.DT_REG, - "statm": linux.DT_REG, - "status": linux.DT_REG, - "task": linux.DT_DIR, - "uid_map": linux.DT_REG, + "auxv": linux.DT_REG, + "cgroup": linux.DT_REG, + "cmdline": linux.DT_REG, + "comm": linux.DT_REG, + "environ": linux.DT_REG, + "gid_map": linux.DT_REG, + "io": linux.DT_REG, + "maps": linux.DT_REG, + "ns": linux.DT_DIR, + "oom_score": linux.DT_REG, + "oom_score_adj": linux.DT_REG, + "smaps": linux.DT_REG, + "stat": linux.DT_REG, + "statm": linux.DT_REG, + "status": linux.DT_REG, + "task": linux.DT_DIR, + "uid_map": linux.DT_REG, } ) diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index 2cee2e6ed..c0dbbe890 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -37,6 +37,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" ) @@ -554,6 +555,13 @@ type Task struct { // // startTime is protected by mu. startTime ktime.Time + + // oomScoreAdj is the task's OOM score adjustment. This is currently not + // used but is maintained for consistency. + // TODO(gvisor.dev/issue/1967) + // + // oomScoreAdj is protected by mu, and is owned by the task goroutine. + oomScoreAdj int32 } func (t *Task) savePtraceTracer() *Task { @@ -847,3 +855,28 @@ func (t *Task) AbstractSockets() *AbstractSocketNamespace { func (t *Task) ContainerID() string { return t.containerID } + +// OOMScoreAdj gets the task's OOM score adjustment. +func (t *Task) OOMScoreAdj() (int32, error) { + t.mu.Lock() + defer t.mu.Unlock() + if t.ExitState() == TaskExitDead { + return 0, syserror.ESRCH + } + return t.oomScoreAdj, nil +} + +// SetOOMScoreAdj sets the task's OOM score adjustment. The value should be +// between -1000 and 1000 inclusive. +func (t *Task) SetOOMScoreAdj(adj int32) error { + t.mu.Lock() + defer t.mu.Unlock() + if t.ExitState() == TaskExitDead { + return syserror.ESRCH + } + if adj > 1000 || adj < -1000 { + return syserror.EINVAL + } + t.oomScoreAdj = adj + return nil +} diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index 78866f280..dda502bb8 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -264,6 +264,11 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { rseqSignature = t.rseqSignature } + adj, err := t.OOMScoreAdj() + if err != nil { + return 0, nil, err + } + cfg := &TaskConfig{ Kernel: t.k, ThreadGroup: tg, @@ -282,6 +287,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { RSeqAddr: rseqAddr, RSeqSignature: rseqSignature, ContainerID: t.ContainerID(), + OOMScoreAdj: adj, } if opts.NewThreadGroup { cfg.Parent = t diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index a5035bb7f..2bbf48bb8 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -93,6 +93,9 @@ type TaskConfig struct { // ContainerID is the container the new task belongs to. ContainerID string + + // oomScoreAdj is the task's OOM score adjustment. + OOMScoreAdj int32 } // NewTask creates a new task defined by cfg. @@ -143,6 +146,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { rseqSignature: cfg.RSeqSignature, futexWaiter: futex.NewWaiter(), containerID: cfg.ContainerID, + oomScoreAdj: cfg.OOMScoreAdj, } t.creds.Store(cfg.Credentials) t.endStopCond.L = &t.tg.signalHandlers.mu diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index a69b0ce13..9800a0cdf 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -318,10 +318,14 @@ syscall_test( test = "//test/syscalls/linux:proc_test", ) -syscall_test(test = "//test/syscalls/linux:proc_pid_uid_gid_map_test") - syscall_test(test = "//test/syscalls/linux:proc_net_test") +syscall_test(test = "//test/syscalls/linux:proc_pid_oomscore_test") + +syscall_test(test = "//test/syscalls/linux:proc_pid_smaps_test") + +syscall_test(test = "//test/syscalls/linux:proc_pid_uid_gid_map_test") + syscall_test( size = "medium", test = "//test/syscalls/linux:pselect_test", diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 0fbd556de..43455f1a3 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1631,6 +1631,19 @@ cc_binary( ], ) +cc_binary( + name = "proc_pid_oomscore_test", + testonly = 1, + srcs = ["proc_pid_oomscore.cc"], + linkstatic = 1, + deps = [ + "//test/util:fs_util", + "//test/util:test_main", + "//test/util:test_util", + "@com_google_absl//absl/strings", + ], +) + cc_binary( name = "proc_pid_smaps_test", testonly = 1, diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index f91187e75..5a70f6c3b 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1431,6 +1431,12 @@ TEST(ProcPidFile, SubprocessRunning) { EXPECT_THAT(ReadWhileRunning("uid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("oom_score", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileRunning("oom_score_adj", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); } // Test whether /proc/PID/ files can be read for a zombie process. @@ -1466,6 +1472,12 @@ TEST(ProcPidFile, SubprocessZombie) { EXPECT_THAT(ReadWhileZombied("uid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); + EXPECT_THAT(ReadWhileZombied("oom_score", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + + EXPECT_THAT(ReadWhileZombied("oom_score_adj", buf, sizeof(buf)), + SyscallSucceedsWithValue(sizeof(buf))); + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux // on proc files. // @@ -1527,6 +1539,15 @@ TEST(ProcPidFile, SubprocessExited) { EXPECT_THAT(ReadWhileExited("uid_map", buf, sizeof(buf)), SyscallSucceedsWithValue(sizeof(buf))); + + if (!IsRunningOnGvisor()) { + // FIXME(gvisor.dev/issue/164): Succeeds on gVisor. + EXPECT_THAT(ReadWhileExited("oom_score", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); + } + + EXPECT_THAT(ReadWhileExited("oom_score_adj", buf, sizeof(buf)), + SyscallFailsWithErrno(ESRCH)); } PosixError DirContainsImpl(absl::string_view path, diff --git a/test/syscalls/linux/proc_pid_oomscore.cc b/test/syscalls/linux/proc_pid_oomscore.cc new file mode 100644 index 000000000..707821a3f --- /dev/null +++ b/test/syscalls/linux/proc_pid_oomscore.cc @@ -0,0 +1,72 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include + +#include "test/util/fs_util.h" +#include "test/util/test_util.h" + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr ReadProcNumber(std::string path) { + ASSIGN_OR_RETURN_ERRNO(std::string contents, GetContents(path)); + EXPECT_EQ(contents[contents.length() - 1], '\n'); + + int num; + if (!absl::SimpleAtoi(contents, &num)) { + return PosixError(EINVAL, "invalid value: " + contents); + } + + return num; +} + +TEST(ProcPidOomscoreTest, BasicRead) { + auto const oom_score = + ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score")); + EXPECT_LE(oom_score, 1000); + EXPECT_GE(oom_score, -1000); +} + +TEST(ProcPidOomscoreAdjTest, BasicRead) { + auto const oom_score = + ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score_adj")); + + // oom_score_adj defaults to 0. + EXPECT_EQ(oom_score, 0); +} + +TEST(ProcPidOomscoreAdjTest, BasicWrite) { + constexpr int test_value = 7; + FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/oom_score_adj", O_WRONLY)); + ASSERT_THAT( + RetryEINTR(write)(fd.get(), std::to_string(test_value).c_str(), 1), + SyscallSucceeds()); + + auto const oom_score = + ASSERT_NO_ERRNO_AND_VALUE(ReadProcNumber("/proc/self/oom_score_adj")); + EXPECT_EQ(oom_score, test_value); +} + +} // namespace + +} // namespace testing +} // namespace gvisor -- cgit v1.2.3 From 6fa5cee82c0f515b001dee5f3840e1f875b2f477 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 6 Mar 2020 12:30:37 -0800 Subject: Prevent memory leaks in ilist When list elements are removed from a list but not discarded, it becomes important to invalidate the references they hold to their former neighbors to prevent memory leaks. PiperOrigin-RevId: 299412421 --- pkg/ilist/list.go | 8 ++++++-- pkg/sentry/fs/dirent_cache.go | 2 -- pkg/sentry/fs/inotify.go | 5 ++++- pkg/sentry/kernel/epoll/epoll_state.go | 13 ++++++++----- pkg/tcpip/network/fragmentation/fragmentation.go | 8 +++++--- 5 files changed, 23 insertions(+), 13 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/ilist/list.go b/pkg/ilist/list.go index f3a609b57..8f93e4d6d 100644 --- a/pkg/ilist/list.go +++ b/pkg/ilist/list.go @@ -169,8 +169,9 @@ func (l *List) InsertBefore(a, e Element) { // Remove removes e from l. func (l *List) Remove(e Element) { - prev := ElementMapper{}.linkerFor(e).Prev() - next := ElementMapper{}.linkerFor(e).Next() + linker := ElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() if prev != nil { ElementMapper{}.linkerFor(prev).SetNext(next) @@ -183,6 +184,9 @@ func (l *List) Remove(e Element) { } else { l.tail = prev } + + linker.SetNext(nil) + linker.SetPrev(nil) } // Entry is a default implementation of Linker. Users can add anonymous fields diff --git a/pkg/sentry/fs/dirent_cache.go b/pkg/sentry/fs/dirent_cache.go index 25514ace4..33de32c69 100644 --- a/pkg/sentry/fs/dirent_cache.go +++ b/pkg/sentry/fs/dirent_cache.go @@ -101,8 +101,6 @@ func (c *DirentCache) remove(d *Dirent) { panic(fmt.Sprintf("trying to remove %v, which is not in the dirent cache", d)) } c.list.Remove(d) - d.SetPrev(nil) - d.SetNext(nil) d.DecRef() c.currentSize-- if c.limit != nil { diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index 928c90aa0..e3a715c1f 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -143,7 +143,10 @@ func (i *Inotify) Read(ctx context.Context, _ *File, dst usermem.IOSequence, _ i } var writeLen int64 - for event := i.events.Front(); event != nil; event = event.Next() { + for it := i.events.Front(); it != nil; { + event := it + it = it.Next() + // Does the buffer have enough remaining space to hold the event we're // about to write out? if dst.NumBytes() < int64(event.sizeOf()) { diff --git a/pkg/sentry/kernel/epoll/epoll_state.go b/pkg/sentry/kernel/epoll/epoll_state.go index a0d35d350..8e9f200d0 100644 --- a/pkg/sentry/kernel/epoll/epoll_state.go +++ b/pkg/sentry/kernel/epoll/epoll_state.go @@ -38,11 +38,14 @@ func (e *EventPoll) afterLoad() { } } - for it := e.waitingList.Front(); it != nil; it = it.Next() { - if it.id.File.Readiness(it.mask) != 0 { - e.waitingList.Remove(it) - e.readyList.PushBack(it) - it.curList = &e.readyList + for it := e.waitingList.Front(); it != nil; { + entry := it + it = it.Next() + + if entry.id.File.Readiness(entry.mask) != 0 { + e.waitingList.Remove(entry) + e.readyList.PushBack(entry) + entry.curList = &e.readyList e.Notify(waiter.EventIn) } } diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go index 92f2aa13a..f42abc4bb 100644 --- a/pkg/tcpip/network/fragmentation/fragmentation.go +++ b/pkg/tcpip/network/fragmentation/fragmentation.go @@ -115,10 +115,12 @@ func (f *Fragmentation) Process(id uint32, first, last uint16, more bool, vv buf // Evict reassemblers if we are consuming more memory than highLimit until // we reach lowLimit. if f.size > f.highLimit { - tail := f.rList.Back() - for f.size > f.lowLimit && tail != nil { + for f.size > f.lowLimit { + tail := f.rList.Back() + if tail == nil { + break + } f.release(tail) - tail = tail.Prev() } } f.mu.Unlock() -- cgit v1.2.3 From 960f6a975b7e44c0efe8fd38c66b02017c4fe137 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 6 Mar 2020 12:58:45 -0800 Subject: Add plumbing for importing fds in VFS2, along with non-socket, non-TTY impl. In VFS2, imported file descriptors are stored in a kernfs-based filesystem. Upon calling ImportFD, the host fd can be accessed in two ways: 1. a FileDescription that can be added to the FDTable, and 2. a Dentry in the host.filesystem mount, which we will want to access through magic symlinks in /proc/[pid]/fd/. An implementation of the kernfs.Inode interface stores a unique host fd. This inode can be inserted into file descriptions as well as dentries. This change also plumbs in three FileDescriptionImpls corresponding to fds for sockets, TTYs, and other files (only the latter is implemented here). These implementations will mostly make corresponding syscalls to the host. Where possible, the logic is ported over from pkg/sentry/fs/host. Updates #1672 PiperOrigin-RevId: 299417263 --- pkg/sentry/fs/host/util.go | 12 +- pkg/sentry/fsimpl/host/BUILD | 27 +++ pkg/sentry/fsimpl/host/default_file.go | 233 ++++++++++++++++++++ pkg/sentry/fsimpl/host/host.go | 286 +++++++++++++++++++++++++ pkg/sentry/fsimpl/host/util.go | 86 ++++++++ pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 2 +- pkg/sentry/fsimpl/kernfs/fd_impl_util.go | 18 +- pkg/sentry/fsimpl/kernfs/filesystem.go | 6 +- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 54 ++--- pkg/sentry/fsimpl/kernfs/kernfs.go | 7 +- pkg/sentry/fsimpl/proc/subtasks.go | 13 +- pkg/sentry/fsimpl/proc/task.go | 23 +- pkg/sentry/fsimpl/proc/tasks.go | 19 +- pkg/sentry/vfs/BUILD | 4 + pkg/sentry/vfs/file_description_impl_util.go | 4 +- 15 files changed, 731 insertions(+), 63 deletions(-) create mode 100644 pkg/sentry/fsimpl/host/BUILD create mode 100644 pkg/sentry/fsimpl/host/default_file.go create mode 100644 pkg/sentry/fsimpl/host/host.go create mode 100644 pkg/sentry/fsimpl/host/util.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go index e37e687c6..7c60dc1db 100644 --- a/pkg/sentry/fs/host/util.go +++ b/pkg/sentry/fs/host/util.go @@ -24,7 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/syserror" ) @@ -152,9 +152,9 @@ func unstableAttr(mo *superOperations, s *syscall.Stat_t) fs.UnstableAttr { Usage: s.Blocks * 512, Perms: fs.FilePermsFromMode(linux.FileMode(s.Mode)), Owner: owner(mo, s), - AccessTime: ktime.FromUnix(s.Atim.Sec, s.Atim.Nsec), - ModificationTime: ktime.FromUnix(s.Mtim.Sec, s.Mtim.Nsec), - StatusChangeTime: ktime.FromUnix(s.Ctim.Sec, s.Ctim.Nsec), + AccessTime: time.FromUnix(s.Atim.Sec, s.Atim.Nsec), + ModificationTime: time.FromUnix(s.Mtim.Sec, s.Mtim.Nsec), + StatusChangeTime: time.FromUnix(s.Ctim.Sec, s.Ctim.Nsec), Links: uint64(s.Nlink), } } @@ -165,6 +165,8 @@ type dirInfo struct { bufp int // location of next record in buf. } +// LINT.IfChange + // isBlockError unwraps os errors and checks if they are caused by EAGAIN or // EWOULDBLOCK. This is so they can be transformed into syserror.ErrWouldBlock. func isBlockError(err error) bool { @@ -177,6 +179,8 @@ func isBlockError(err error) bool { return false } +// LINT.ThenChange(../../fsimpl/host/util.go) + func hostEffectiveKIDs() (uint32, []uint32, error) { gids, err := os.Getgroups() if err != nil { diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD new file mode 100644 index 000000000..731f192b3 --- /dev/null +++ b/pkg/sentry/fsimpl/host/BUILD @@ -0,0 +1,27 @@ +load("//tools:defs.bzl", "go_library") + +licenses(["notice"]) + +go_library( + name = "host", + srcs = [ + "default_file.go", + "host.go", + "util.go", + ], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/log", + "//pkg/refs", + "//pkg/safemem", + "//pkg/sentry/fsimpl/kernfs", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/memmap", + "//pkg/sentry/vfs", + "//pkg/sync", + "//pkg/syserror", + "//pkg/usermem", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/pkg/sentry/fsimpl/host/default_file.go b/pkg/sentry/fsimpl/host/default_file.go new file mode 100644 index 000000000..172cdb161 --- /dev/null +++ b/pkg/sentry/fsimpl/host/default_file.go @@ -0,0 +1,233 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "math" + "syscall" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// defaultFileFD implements FileDescriptionImpl for non-socket, non-TTY files. +type defaultFileFD struct { + fileDescription + + // canMap specifies whether we allow the file to be memory mapped. + canMap bool + + // mu protects the fields below. + mu sync.Mutex + + // offset specifies the current file offset. + offset int64 +} + +// TODO(gvisor.dev/issue/1672): Implement Waitable interface. + +// PRead implements FileDescriptionImpl. +func (f *defaultFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. + if f.inode.isStream { + return 0, syserror.ESPIPE + } + + return readFromHostFD(ctx, f.inode.hostFD, dst, offset, int(opts.Flags)) +} + +// Read implements FileDescriptionImpl. +func (f *defaultFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. + if f.inode.isStream { + // These files can't be memory mapped, assert this. + if f.canMap { + panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") + } + + f.mu.Lock() + n, err := readFromHostFD(ctx, f.inode.hostFD, dst, -1, int(opts.Flags)) + f.mu.Unlock() + if isBlockError(err) { + // If we got any data at all, return it as a "completed" partial read + // rather than retrying until complete. + if n != 0 { + err = nil + } else { + err = syserror.ErrWouldBlock + } + } + return n, err + } + // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so. + f.mu.Lock() + n, err := readFromHostFD(ctx, f.inode.hostFD, dst, f.offset, int(opts.Flags)) + f.offset += n + f.mu.Unlock() + return n, err +} + +func readFromHostFD(ctx context.Context, fd int, dst usermem.IOSequence, offset int64, flags int) (int64, error) { + if flags&^(linux.RWF_VALID) != 0 { + return 0, syserror.EOPNOTSUPP + } + + reader := safemem.FromVecReaderFunc{ + func(srcs [][]byte) (int64, error) { + n, err := unix.Preadv2(fd, srcs, offset, flags) + return int64(n), err + }, + } + n, err := dst.CopyOutFrom(ctx, reader) + return int64(n), err +} + +// PWrite implements FileDescriptionImpl. +func (f *defaultFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. + if f.inode.isStream { + return 0, syserror.ESPIPE + } + + return writeToHostFD(ctx, f.inode.hostFD, src, offset, int(opts.Flags)) +} + +// Write implements FileDescriptionImpl. +func (f *defaultFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. + if f.inode.isStream { + // These files can't be memory mapped, assert this. + if f.canMap { + panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") + } + + f.mu.Lock() + n, err := writeToHostFD(ctx, f.inode.hostFD, src, -1, int(opts.Flags)) + f.mu.Unlock() + if isBlockError(err) { + err = syserror.ErrWouldBlock + } + return n, err + } + // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so. + // TODO(gvisor.dev/issue/1672): Write to end of file and update offset if O_APPEND is set on this file. + f.mu.Lock() + n, err := writeToHostFD(ctx, f.inode.hostFD, src, f.offset, int(opts.Flags)) + f.offset += n + f.mu.Unlock() + return n, err +} + +func writeToHostFD(ctx context.Context, fd int, src usermem.IOSequence, offset int64, flags int) (int64, error) { + if flags&^(linux.RWF_VALID) != 0 { + return 0, syserror.EOPNOTSUPP + } + + writer := safemem.FromVecWriterFunc{ + func(srcs [][]byte) (int64, error) { + n, err := unix.Pwritev2(fd, srcs, offset, flags) + return int64(n), err + }, + } + n, err := src.CopyInTo(ctx, writer) + return int64(n), err +} + +// Seek implements FileDescriptionImpl. +// +// Note that we do not support seeking on directories, since we do not even +// allow directory fds to be imported at all. +func (f *defaultFileFD) Seek(_ context.Context, offset int64, whence int32) (int64, error) { + // TODO(b/34716638): Some char devices do support seeking, e.g. /dev/null. + if f.inode.isStream { + return 0, syserror.ESPIPE + } + + f.mu.Lock() + defer f.mu.Unlock() + + switch whence { + case linux.SEEK_SET: + if offset < 0 { + return f.offset, syserror.EINVAL + } + f.offset = offset + + case linux.SEEK_CUR: + // Check for overflow. Note that underflow cannot occur, since f.offset >= 0. + if offset > math.MaxInt64-f.offset { + return f.offset, syserror.EOVERFLOW + } + if f.offset+offset < 0 { + return f.offset, syserror.EINVAL + } + f.offset += offset + + case linux.SEEK_END: + var s syscall.Stat_t + if err := syscall.Fstat(f.inode.hostFD, &s); err != nil { + return f.offset, err + } + size := s.Size + + // Check for overflow. Note that underflow cannot occur, since size >= 0. + if offset > math.MaxInt64-size { + return f.offset, syserror.EOVERFLOW + } + if size+offset < 0 { + return f.offset, syserror.EINVAL + } + f.offset = size + offset + + case linux.SEEK_DATA, linux.SEEK_HOLE: + // Modifying the offset in the host file table should not matter, since + // this is the only place where we use it. + // + // For reading and writing, we always rely on our internal offset. + n, err := unix.Seek(f.inode.hostFD, offset, int(whence)) + if err != nil { + return f.offset, err + } + f.offset = n + + default: + // Invalid whence. + return f.offset, syserror.EINVAL + } + + return f.offset, nil +} + +// Sync implements FileDescriptionImpl. +func (f *defaultFileFD) Sync(context.Context) error { + // TODO(gvisor.dev/issue/1672): Currently we do not support the SyncData optimization, so we always sync everything. + return unix.Fsync(f.inode.hostFD) +} + +// ConfigureMMap implements FileDescriptionImpl. +func (f *defaultFileFD) ConfigureMMap(_ context.Context, opts *memmap.MMapOpts) error { + if !f.canMap { + return syserror.ENODEV + } + // TODO(gvisor.dev/issue/1672): Implement ConfigureMMap and Mappable interface. + return syserror.ENODEV +} diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go new file mode 100644 index 000000000..c205e6a0b --- /dev/null +++ b/pkg/sentry/fsimpl/host/host.go @@ -0,0 +1,286 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package host provides a filesystem implementation for host files imported as +// file descriptors. +package host + +import ( + "errors" + "fmt" + "syscall" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" +) + +// filesystem implements vfs.FilesystemImpl. +type filesystem struct { + kernfs.Filesystem +} + +// ImportFD sets up and returns a vfs.FileDescription from a donated fd. +func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID, isTTY bool) (*vfs.FileDescription, error) { + // Must be importing to a mount of host.filesystem. + fs, ok := mnt.Filesystem().Impl().(*filesystem) + if !ok { + return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl()) + } + + // Retrieve metadata. + var s syscall.Stat_t + if err := syscall.Fstat(hostFD, &s); err != nil { + return nil, err + } + + fileMode := linux.FileMode(s.Mode) + fileType := fileMode.FileType() + // Pipes, character devices, and sockets can return EWOULDBLOCK for + // operations that would block. + isStream := fileType == syscall.S_IFIFO || fileType == syscall.S_IFCHR || fileType == syscall.S_IFSOCK + + i := &inode{ + hostFD: hostFD, + isStream: isStream, + isTTY: isTTY, + ino: fs.NextIno(), + mode: fileMode, + uid: ownerUID, + gid: ownerGID, + } + + d := &kernfs.Dentry{} + d.Init(i) + // i.open will take a reference on d. + defer d.DecRef() + + return i.open(d.VFSDentry(), mnt) +} + +// inode implements kernfs.Inode. +type inode struct { + kernfs.InodeNotDirectory + kernfs.InodeNotSymlink + + // When the reference count reaches zero, the host fd is closed. + refs.AtomicRefCount + + // hostFD contains the host fd that this file was originally created from, + // which must be available at time of restore. + // + // This field is initialized at creation time and is immutable. + hostFD int + + // isStream is true if the host fd points to a file representing a stream, + // e.g. a socket or a pipe. Such files are not seekable and can return + // EWOULDBLOCK for I/O operations. + // + // This field is initialized at creation time and is immutable. + isStream bool + + // isTTY is true if this file represents a TTY. + // + // This field is initialized at creation time and is immutable. + isTTY bool + + // ino is an inode number unique within this filesystem. + ino uint64 + + // mu protects the inode metadata below. + mu sync.Mutex + + // mode is the file mode of this inode. Note that this value may become out + // of date if the mode is changed on the host, e.g. with chmod. + mode linux.FileMode + + // uid and gid of the file owner. Note that these refer to the owner of the + // file created on import, not the fd on the host. + uid auth.KUID + gid auth.KGID +} + +// Note that these flags may become out of date, since they can be modified +// on the host, e.g. with fcntl. +func fileFlagsFromHostFD(fd int) (int, error) { + flags, err := unix.FcntlInt(uintptr(fd), syscall.F_GETFL, 0) + if err != nil { + log.Warningf("Failed to get file flags for donated FD %d: %v", fd, err) + return 0, err + } + // TODO(gvisor.dev/issue/1672): implement behavior corresponding to these allowed flags. + flags &= syscall.O_ACCMODE | syscall.O_DIRECT | syscall.O_NONBLOCK | syscall.O_DSYNC | syscall.O_SYNC | syscall.O_APPEND + return flags, nil +} + +// CheckPermissions implements kernfs.Inode. +func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, atx vfs.AccessTypes) error { + return vfs.GenericCheckPermissions(creds, atx, false /* isDir */, uint16(i.mode), i.uid, i.gid) +} + +// Mode implements kernfs.Inode. +func (i *inode) Mode() linux.FileMode { + return i.mode +} + +// Stat implements kernfs.Inode. +func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + var s unix.Statx_t + if err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(opts.Mask), &s); err != nil { + return linux.Statx{}, err + } + ls := unixToLinuxStatx(s) + + // Use our own internal inode number and file owner. + // + // TODO(gvisor.dev/issue/1672): Use a kernfs-specific device number as well. + // If we use the device number from the host, it may collide with another + // sentry-internal device number. We handle device/inode numbers without + // relying on the host to prevent collisions. + ls.Ino = i.ino + ls.UID = uint32(i.uid) + ls.GID = uint32(i.gid) + + // Update file mode from the host. + i.mode = linux.FileMode(ls.Mode) + + return ls, nil +} + +// SetStat implements kernfs.Inode. +func (i *inode) SetStat(_ *vfs.Filesystem, opts vfs.SetStatOptions) error { + s := opts.Stat + + m := s.Mask + if m == 0 { + return nil + } + if m&(linux.STATX_UID|linux.STATX_GID) != 0 { + return syserror.EPERM + } + if m&linux.STATX_MODE != 0 { + if err := syscall.Fchmod(i.hostFD, uint32(s.Mode)); err != nil { + return err + } + i.mode = linux.FileMode(s.Mode) + } + if m&linux.STATX_SIZE != 0 { + if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil { + return err + } + } + if m&(linux.STATX_ATIME|linux.STATX_MTIME) != 0 { + timestamps := []unix.Timespec{ + toTimespec(s.Atime, m&linux.STATX_ATIME == 0), + toTimespec(s.Mtime, m&linux.STATX_MTIME == 0), + } + if err := unix.UtimesNanoAt(i.hostFD, "", timestamps, unix.AT_EMPTY_PATH); err != nil { + return err + } + } + return nil +} + +// DecRef implements kernfs.Inode. +func (i *inode) DecRef() { + i.AtomicRefCount.DecRefWithDestructor(i.Destroy) +} + +// Destroy implements kernfs.Inode. +func (i *inode) Destroy() { + if err := unix.Close(i.hostFD); err != nil { + log.Warningf("failed to close host fd %d: %v", i.hostFD, err) + } +} + +// Open implements kernfs.Inode. +func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + return i.open(vfsd, rp.Mount()) +} + +func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) { + + fileType := i.mode.FileType() + if fileType == syscall.S_IFSOCK { + if i.isTTY { + return nil, errors.New("cannot use host socket as TTY") + } + // TODO(gvisor.dev/issue/1672): support importing sockets. + return nil, errors.New("importing host sockets not supported") + } + + if i.isTTY { + // TODO(gvisor.dev/issue/1672): support importing host fd as TTY. + return nil, errors.New("importing host fd as TTY not supported") + } + + // For simplicity, set offset to 0. Technically, we should + // only set to 0 on files that are not seekable (sockets, pipes, etc.), + // and use the offset from the host fd otherwise. + fd := &defaultFileFD{ + fileDescription: fileDescription{ + inode: i, + }, + canMap: canMap(uint32(fileType)), + mu: sync.Mutex{}, + offset: 0, + } + + vfsfd := &fd.vfsfd + flags, err := fileFlagsFromHostFD(i.hostFD) + if err != nil { + return nil, err + } + + if err := vfsfd.Init(fd, uint32(flags), mnt, d, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } + return vfsfd, nil +} + +// fileDescription is embedded by host fd implementations of FileDescriptionImpl. +type fileDescription struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + + // inode is vfsfd.Dentry().Impl().(*kernfs.Dentry).Inode().(*inode), but + // cached to reduce indirections and casting. fileDescription does not hold + // a reference on the inode through the inode field (since one is already + // held via the Dentry). + // + // inode is immutable after fileDescription creation. + inode *inode +} + +// SetStat implements vfs.FileDescriptionImpl. +func (f *fileDescription) SetStat(_ context.Context, opts vfs.SetStatOptions) error { + return f.inode.SetStat(nil, opts) +} + +// Stat implements vfs.FileDescriptionImpl. +func (f *fileDescription) Stat(_ context.Context, opts vfs.StatOptions) (linux.Statx, error) { + return f.inode.Stat(nil, opts) +} + +// Release implements vfs.FileDescriptionImpl. +func (f *fileDescription) Release() { + // noop +} diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go new file mode 100644 index 000000000..e1ccacb4d --- /dev/null +++ b/pkg/sentry/fsimpl/host/util.go @@ -0,0 +1,86 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "syscall" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserror" +) + +func toTimespec(ts linux.StatxTimestamp, omit bool) unix.Timespec { + if omit { + return unix.Timespec{ + Sec: 0, + Nsec: unix.UTIME_OMIT, + } + } + return unix.Timespec{ + Sec: int64(ts.Sec), + Nsec: int64(ts.Nsec), + } +} + +func unixToLinuxStatx(s unix.Statx_t) linux.Statx { + return linux.Statx{ + Mask: s.Mask, + Blksize: s.Blksize, + Attributes: s.Attributes, + Nlink: s.Nlink, + UID: s.Uid, + GID: s.Gid, + Mode: s.Mode, + Ino: s.Ino, + Size: s.Size, + Blocks: s.Blocks, + AttributesMask: s.Attributes_mask, + Atime: unixToLinuxStatxTimestamp(s.Atime), + Btime: unixToLinuxStatxTimestamp(s.Btime), + Ctime: unixToLinuxStatxTimestamp(s.Ctime), + Mtime: unixToLinuxStatxTimestamp(s.Mtime), + RdevMajor: s.Rdev_major, + RdevMinor: s.Rdev_minor, + DevMajor: s.Dev_major, + DevMinor: s.Dev_minor, + } +} + +func unixToLinuxStatxTimestamp(ts unix.StatxTimestamp) linux.StatxTimestamp { + return linux.StatxTimestamp{Sec: ts.Sec, Nsec: ts.Nsec} +} + +// wouldBlock returns true for file types that can return EWOULDBLOCK +// for blocking operations, e.g. pipes, character devices, and sockets. +func wouldBlock(fileType uint32) bool { + return fileType == syscall.S_IFIFO || fileType == syscall.S_IFCHR || fileType == syscall.S_IFSOCK +} + +// canMap returns true if a file with fileType is allowed to be memory mapped. +// This is ported over from VFS1, but it's probably not the best way for us +// to check if a file can be memory mapped. +func canMap(fileType uint32) bool { + // TODO(gvisor.dev/issue/1672): Also allow "special files" to be mapped (see fs/host:canMap()). + // + // TODO(b/38213152): Some obscure character devices can be mapped. + return fileType == syscall.S_IFREG +} + +// isBlockError checks if an error is EAGAIN or EWOULDBLOCK. +// If so, they can be transformed into syserror.ErrWouldBlock. +func isBlockError(err error) bool { + return err == syserror.EAGAIN || err == syserror.EWOULDBLOCK +} diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index d092ccb2a..1c026f4d8 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -122,7 +122,7 @@ func (fd *DynamicBytesFD) Release() {} // Stat implements vfs.FileDescriptionImpl.Stat. func (fd *DynamicBytesFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { fs := fd.vfsfd.VirtualDentry().Mount().Filesystem() - return fd.inode.Stat(fs), nil + return fd.inode.Stat(fs, opts) } // SetStat implements vfs.FileDescriptionImpl.SetStat. diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 5650512e0..da821d524 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -107,9 +107,13 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent fs.mu.Lock() defer fs.mu.Unlock() + opts := vfs.StatOptions{Mask: linux.STATX_INO} // Handle ".". if fd.off == 0 { - stat := fd.inode().Stat(vfsFS) + stat, err := fd.inode().Stat(vfsFS, opts) + if err != nil { + return err + } dirent := vfs.Dirent{ Name: ".", Type: linux.DT_DIR, @@ -125,7 +129,10 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent // Handle "..". if fd.off == 1 { parentInode := vfsd.ParentOrSelf().Impl().(*Dentry).inode - stat := parentInode.Stat(vfsFS) + stat, err := parentInode.Stat(vfsFS, opts) + if err != nil { + return err + } dirent := vfs.Dirent{ Name: "..", Type: linux.FileMode(stat.Mode).DirentType(), @@ -146,7 +153,10 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent childIdx := fd.off - 2 for it := fd.children.nthLocked(childIdx); it != nil; it = it.Next() { inode := it.Dentry.Impl().(*Dentry).inode - stat := inode.Stat(vfsFS) + stat, err := inode.Stat(vfsFS, opts) + if err != nil { + return err + } dirent := vfs.Dirent{ Name: it.Name, Type: linux.FileMode(stat.Mode).DirentType(), @@ -190,7 +200,7 @@ func (fd *GenericDirectoryFD) Seek(ctx context.Context, offset int64, whence int func (fd *GenericDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { fs := fd.filesystem() inode := fd.inode() - return inode.Stat(fs), nil + return inode.Stat(fs, opts) } // SetStat implements vfs.FileDescriptionImpl.SetStat. diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 292f58afd..1d7e04ad4 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This file implements vfs.FilesystemImpl for kernfs. - package kernfs +// This file implements vfs.FilesystemImpl for kernfs. + import ( "fmt" @@ -634,7 +634,7 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if err != nil { return linux.Statx{}, err } - return inode.Stat(fs.VFSFilesystem()), nil + return inode.Stat(fs.VFSFilesystem(), opts) } // StatFSAt implements vfs.FilesystemImpl.StatFSAt. diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 099d70a16..d50018b18 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -36,20 +36,20 @@ type InodeNoopRefCount struct { } // IncRef implements Inode.IncRef. -func (n *InodeNoopRefCount) IncRef() { +func (InodeNoopRefCount) IncRef() { } // DecRef implements Inode.DecRef. -func (n *InodeNoopRefCount) DecRef() { +func (InodeNoopRefCount) DecRef() { } // TryIncRef implements Inode.TryIncRef. -func (n *InodeNoopRefCount) TryIncRef() bool { +func (InodeNoopRefCount) TryIncRef() bool { return true } // Destroy implements Inode.Destroy. -func (n *InodeNoopRefCount) Destroy() { +func (InodeNoopRefCount) Destroy() { } // InodeDirectoryNoNewChildren partially implements the Inode interface. @@ -58,27 +58,27 @@ func (n *InodeNoopRefCount) Destroy() { type InodeDirectoryNoNewChildren struct{} // NewFile implements Inode.NewFile. -func (*InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (*vfs.Dentry, error) { +func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (*vfs.Dentry, error) { return nil, syserror.EPERM } // NewDir implements Inode.NewDir. -func (*InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (*vfs.Dentry, error) { +func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (*vfs.Dentry, error) { return nil, syserror.EPERM } // NewLink implements Inode.NewLink. -func (*InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (*vfs.Dentry, error) { +func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (*vfs.Dentry, error) { return nil, syserror.EPERM } // NewSymlink implements Inode.NewSymlink. -func (*InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) { +func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) { return nil, syserror.EPERM } // NewNode implements Inode.NewNode. -func (*InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) { +func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) { return nil, syserror.EPERM } @@ -90,62 +90,62 @@ type InodeNotDirectory struct { } // HasChildren implements Inode.HasChildren. -func (*InodeNotDirectory) HasChildren() bool { +func (InodeNotDirectory) HasChildren() bool { return false } // NewFile implements Inode.NewFile. -func (*InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (*vfs.Dentry, error) { +func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (*vfs.Dentry, error) { panic("NewFile called on non-directory inode") } // NewDir implements Inode.NewDir. -func (*InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (*vfs.Dentry, error) { +func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (*vfs.Dentry, error) { panic("NewDir called on non-directory inode") } // NewLink implements Inode.NewLinkink. -func (*InodeNotDirectory) NewLink(context.Context, string, Inode) (*vfs.Dentry, error) { +func (InodeNotDirectory) NewLink(context.Context, string, Inode) (*vfs.Dentry, error) { panic("NewLink called on non-directory inode") } // NewSymlink implements Inode.NewSymlink. -func (*InodeNotDirectory) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) { +func (InodeNotDirectory) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) { panic("NewSymlink called on non-directory inode") } // NewNode implements Inode.NewNode. -func (*InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) { +func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) { panic("NewNode called on non-directory inode") } // Unlink implements Inode.Unlink. -func (*InodeNotDirectory) Unlink(context.Context, string, *vfs.Dentry) error { +func (InodeNotDirectory) Unlink(context.Context, string, *vfs.Dentry) error { panic("Unlink called on non-directory inode") } // RmDir implements Inode.RmDir. -func (*InodeNotDirectory) RmDir(context.Context, string, *vfs.Dentry) error { +func (InodeNotDirectory) RmDir(context.Context, string, *vfs.Dentry) error { panic("RmDir called on non-directory inode") } // Rename implements Inode.Rename. -func (*InodeNotDirectory) Rename(context.Context, string, string, *vfs.Dentry, *vfs.Dentry) (*vfs.Dentry, error) { +func (InodeNotDirectory) Rename(context.Context, string, string, *vfs.Dentry, *vfs.Dentry) (*vfs.Dentry, error) { panic("Rename called on non-directory inode") } // Lookup implements Inode.Lookup. -func (*InodeNotDirectory) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { +func (InodeNotDirectory) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { panic("Lookup called on non-directory inode") } // IterDirents implements Inode.IterDirents. -func (*InodeNotDirectory) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) { +func (InodeNotDirectory) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) { panic("IterDirents called on non-directory inode") } // Valid implements Inode.Valid. -func (*InodeNotDirectory) Valid(context.Context) bool { +func (InodeNotDirectory) Valid(context.Context) bool { return true } @@ -157,17 +157,17 @@ func (*InodeNotDirectory) Valid(context.Context) bool { type InodeNoDynamicLookup struct{} // Lookup implements Inode.Lookup. -func (*InodeNoDynamicLookup) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { +func (InodeNoDynamicLookup) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { return nil, syserror.ENOENT } // IterDirents implements Inode.IterDirents. -func (*InodeNoDynamicLookup) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { +func (InodeNoDynamicLookup) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { return offset, nil } // Valid implements Inode.Valid. -func (*InodeNoDynamicLookup) Valid(ctx context.Context) bool { +func (InodeNoDynamicLookup) Valid(ctx context.Context) bool { return true } @@ -177,7 +177,7 @@ func (*InodeNoDynamicLookup) Valid(ctx context.Context) bool { type InodeNotSymlink struct{} // Readlink implements Inode.Readlink. -func (*InodeNotSymlink) Readlink(context.Context) (string, error) { +func (InodeNotSymlink) Readlink(context.Context) (string, error) { return "", syserror.EINVAL } @@ -219,7 +219,7 @@ func (a *InodeAttrs) Mode() linux.FileMode { // Stat partially implements Inode.Stat. Note that this function doesn't provide // all the stat fields, and the embedder should consider extending the result // with filesystem-specific fields. -func (a *InodeAttrs) Stat(*vfs.Filesystem) linux.Statx { +func (a *InodeAttrs) Stat(*vfs.Filesystem, vfs.StatOptions) (linux.Statx, error) { var stat linux.Statx stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_NLINK stat.Ino = atomic.LoadUint64(&a.ino) @@ -230,7 +230,7 @@ func (a *InodeAttrs) Stat(*vfs.Filesystem) linux.Statx { // TODO: Implement other stat fields like timestamps. - return stat + return stat, nil } // SetStat implements Inode.SetStat. diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index c74fa999b..a8ab2a2ba 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -176,8 +176,6 @@ type Dentry struct { vfsd vfs.Dentry inode Inode - refs uint64 - // flags caches useful information about the dentry from the inode. See the // dflags* consts above. Must be accessed by atomic ops. flags uint32 @@ -302,7 +300,8 @@ type Inode interface { // this inode. The returned file description should hold a reference on the // inode for its lifetime. // - // Precondition: !rp.Done(). vfsd.Impl() must be a kernfs Dentry. + // Precondition: rp.Done(). vfsd.Impl() must be the kernfs Dentry containing + // the inode on which Open() is being called. Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) } @@ -328,7 +327,7 @@ type inodeMetadata interface { // Stat returns the metadata for this inode. This corresponds to // vfs.FilesystemImpl.StatAt. - Stat(fs *vfs.Filesystem) linux.Statx + Stat(fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) // SetStat updates the metadata for this inode. This corresponds to // vfs.FilesystemImpl.SetStatAt. diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index f3f4e49b4..611645f3f 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -121,8 +121,13 @@ func (i *subtasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.O } // Stat implements kernfs.Inode. -func (i *subtasksInode) Stat(vsfs *vfs.Filesystem) linux.Statx { - stat := i.InodeAttrs.Stat(vsfs) - stat.Nlink += uint32(i.task.ThreadGroup().Count()) - return stat +func (i *subtasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + stat, err := i.InodeAttrs.Stat(vsfs, opts) + if err != nil { + return linux.Statx{}, err + } + if opts.Mask&linux.STATX_NLINK != 0 { + stat.Nlink += uint32(i.task.ThreadGroup().Count()) + } + return stat, nil } diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 18e5cd6f6..c0d643f51 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -154,12 +154,21 @@ func newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, childre } // Stat implements kernfs.Inode. -func (i *taskOwnedInode) Stat(fs *vfs.Filesystem) linux.Statx { - stat := i.Inode.Stat(fs) - uid, gid := i.getOwner(linux.FileMode(stat.Mode)) - stat.UID = uint32(uid) - stat.GID = uint32(gid) - return stat +func (i *taskOwnedInode) Stat(fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + stat, err := i.Inode.Stat(fs, opts) + if err != nil { + return linux.Statx{}, err + } + if opts.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 { + uid, gid := i.getOwner(linux.FileMode(stat.Mode)) + if opts.Mask&linux.STATX_UID != 0 { + stat.UID = uint32(uid) + } + if opts.Mask&linux.STATX_GID != 0 { + stat.GID = uint32(gid) + } + } + return stat, nil } // CheckPermissions implements kernfs.Inode. @@ -236,7 +245,7 @@ func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentr // member, there is one entry containing three colon-separated fields: // hierarchy-ID:controller-list:cgroup-path" func newCgroupData(controllers map[string]string) dynamicInode { - buf := bytes.Buffer{} + var buf bytes.Buffer // The hierarchy ids must be positive integers (for cgroup v1), but the // exact number does not matter, so long as they are unique. We can diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index 10c08fa90..b1e39c82f 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -211,17 +211,22 @@ func (i *tasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.Open return fd.VFSFileDescription(), nil } -func (i *tasksInode) Stat(vsfs *vfs.Filesystem) linux.Statx { - stat := i.InodeAttrs.Stat(vsfs) +func (i *tasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + stat, err := i.InodeAttrs.Stat(vsfs, opts) + if err != nil { + return linux.Statx{}, err + } - // Add dynamic children to link count. - for _, tg := range i.pidns.ThreadGroups() { - if leader := tg.Leader(); leader != nil { - stat.Nlink++ + if opts.Mask&linux.STATX_NLINK != 0 { + // Add dynamic children to link count. + for _, tg := range i.pidns.ThreadGroups() { + if leader := tg.Leader(); leader != nil { + stat.Nlink++ + } } } - return stat + return stat, nil } func cpuInfoData(k *kernel.Kernel) string { diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 07c8383e6..cb4deb068 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -42,10 +42,13 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/fd", "//pkg/fspath", "//pkg/gohacks", "//pkg/log", + "//pkg/safemem", "//pkg/sentry/arch", + "//pkg/sentry/fs", "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", @@ -53,6 +56,7 @@ go_library( "//pkg/syserror", "//pkg/usermem", "//pkg/waiter", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index c2a52ec1b..45191d1c3 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -33,8 +33,8 @@ import ( // implementations to adapt: // - Have a local fileDescription struct (containing FileDescription) which // embeds FileDescriptionDefaultImpl and overrides the default methods -// which are common to all fd implementations for that for that filesystem -// like StatusFlags, SetStatusFlags, Stat, SetStat, StatFS, etc. +// which are common to all fd implementations for that filesystem like +// StatusFlags, SetStatusFlags, Stat, SetStat, StatFS, etc. // - This should be embedded in all file description implementations as the // first field by value. // - Directory FDs would also embed DirectoryFileDescriptionDefaultImpl. -- cgit v1.2.3 From b36de6e7be0542b410901d3cbcd1b3c0fc493cf5 Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Mon, 9 Mar 2020 19:57:35 -0700 Subject: Move /proc/net to /proc/PID/net, and make /proc/net -> /proc/self/net. Issue #1833 PiperOrigin-RevId: 299998105 --- pkg/sentry/fs/proc/net.go | 53 +-- pkg/sentry/fs/proc/proc.go | 2 +- pkg/sentry/fs/proc/task.go | 1 + pkg/sentry/fsimpl/proc/BUILD | 2 +- pkg/sentry/fsimpl/proc/task.go | 1 + pkg/sentry/fsimpl/proc/task_net.go | 790 +++++++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/proc/tasks.go | 2 +- pkg/sentry/fsimpl/proc/tasks_net.go | 787 ---------------------------------- pkg/sentry/fsimpl/proc/tasks_test.go | 3 +- test/syscalls/linux/proc_net.cc | 25 ++ 10 files changed, 849 insertions(+), 817 deletions(-) create mode 100644 pkg/sentry/fsimpl/proc/task_net.go delete mode 100644 pkg/sentry/fsimpl/proc/tasks_net.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index 95d5817ff..bd18177d4 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -40,47 +40,48 @@ import ( // LINT.IfChange -// newNet creates a new proc net entry. -func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSource) *fs.Inode { +// newNetDir creates a new proc net entry. +func newNetDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { + k := t.Kernel() + var contents map[string]*fs.Inode - // TODO(gvisor.dev/issue/1833): Support for using the network stack in the - // network namespace of the calling process. We should make this per-process, - // a.k.a. /proc/PID/net, and make /proc/net a symlink to /proc/self/net. - if s := p.k.RootNetworkNamespace().Stack(); s != nil { + if s := t.NetworkNamespace().Stack(); s != nil { + // TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task + // network namespace. contents = map[string]*fs.Inode{ - "dev": seqfile.NewSeqFileInode(ctx, &netDev{s: s}, msrc), - "snmp": seqfile.NewSeqFileInode(ctx, &netSnmp{s: s}, msrc), + "dev": seqfile.NewSeqFileInode(t, &netDev{s: s}, msrc), + "snmp": seqfile.NewSeqFileInode(t, &netSnmp{s: s}, msrc), // The following files are simple stubs until they are // implemented in netstack, if the file contains a // header the stub is just the header otherwise it is // an empty file. - "arp": newStaticProcInode(ctx, msrc, []byte("IP address HW type Flags HW address Mask Device\n")), + "arp": newStaticProcInode(t, msrc, []byte("IP address HW type Flags HW address Mask Device\n")), - "netlink": newStaticProcInode(ctx, msrc, []byte("sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n")), - "netstat": newStaticProcInode(ctx, msrc, []byte("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")), - "packet": newStaticProcInode(ctx, msrc, []byte("sk RefCnt Type Proto Iface R Rmem User Inode\n")), - "protocols": newStaticProcInode(ctx, msrc, []byte("protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n")), + "netlink": newStaticProcInode(t, msrc, []byte("sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n")), + "netstat": newStaticProcInode(t, msrc, []byte("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")), + "packet": newStaticProcInode(t, msrc, []byte("sk RefCnt Type Proto Iface R Rmem User Inode\n")), + "protocols": newStaticProcInode(t, msrc, []byte("protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n")), // Linux sets psched values to: nsec per usec, psched // tick in ns, 1000000, high res timer ticks per sec // (ClockGetres returns 1ns resolution). - "psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))), - "ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function\n")), - "route": seqfile.NewSeqFileInode(ctx, &netRoute{s: s}, msrc), - "tcp": seqfile.NewSeqFileInode(ctx, &netTCP{k: k}, msrc), - "udp": seqfile.NewSeqFileInode(ctx, &netUDP{k: k}, msrc), - "unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc), + "psched": newStaticProcInode(t, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))), + "ptype": newStaticProcInode(t, msrc, []byte("Type Device Function\n")), + "route": seqfile.NewSeqFileInode(t, &netRoute{s: s}, msrc), + "tcp": seqfile.NewSeqFileInode(t, &netTCP{k: k}, msrc), + "udp": seqfile.NewSeqFileInode(t, &netUDP{k: k}, msrc), + "unix": seqfile.NewSeqFileInode(t, &netUnix{k: k}, msrc), } if s.SupportsIPv6() { - contents["if_inet6"] = seqfile.NewSeqFileInode(ctx, &ifinet6{s: s}, msrc) - contents["ipv6_route"] = newStaticProcInode(ctx, msrc, []byte("")) - contents["tcp6"] = seqfile.NewSeqFileInode(ctx, &netTCP6{k: k}, msrc) - contents["udp6"] = newStaticProcInode(ctx, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n")) + contents["if_inet6"] = seqfile.NewSeqFileInode(t, &ifinet6{s: s}, msrc) + contents["ipv6_route"] = newStaticProcInode(t, msrc, []byte("")) + contents["tcp6"] = seqfile.NewSeqFileInode(t, &netTCP6{k: k}, msrc) + contents["udp6"] = newStaticProcInode(t, msrc, []byte(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n")) } } - d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) - return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) + d := ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) + return newProcInode(t, d, msrc, fs.SpecialDirectory, t) } // ifinet6 implements seqfile.SeqSource for /proc/net/if_inet6. @@ -837,4 +838,4 @@ func (n *netUDP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se return data, 0 } -// LINT.ThenChange(../../fsimpl/proc/tasks_net.go) +// LINT.ThenChange(../../fsimpl/proc/task_net.go) diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index c8abb5052..c659224a7 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -70,6 +70,7 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string "loadavg": seqfile.NewSeqFileInode(ctx, &loadavgData{}, msrc), "meminfo": seqfile.NewSeqFileInode(ctx, &meminfoData{k}, msrc), "mounts": newProcInode(ctx, ramfs.NewSymlink(ctx, fs.RootOwner, "self/mounts"), msrc, fs.Symlink, nil), + "net": newProcInode(ctx, ramfs.NewSymlink(ctx, fs.RootOwner, "self/net"), msrc, fs.Symlink, nil), "self": newSelf(ctx, pidns, msrc), "stat": seqfile.NewSeqFileInode(ctx, &statData{k}, msrc), "thread-self": newThreadSelf(ctx, pidns, msrc), @@ -86,7 +87,6 @@ func New(ctx context.Context, msrc *fs.MountSource, cgroupControllers map[string } // Add more contents that need proc to be initialized. - p.AddChild(ctx, "net", p.newNetDir(ctx, k, msrc)) p.AddChild(ctx, "sys", p.newSysDir(ctx, msrc)) return newProcInode(ctx, p, msrc, fs.SpecialDirectory, nil), nil diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 4e9b0fc00..03cc788c8 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -84,6 +84,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bo "maps": newMaps(t, msrc), "mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), "mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), + "net": newNetDir(t, msrc), "ns": newNamespaceDir(t, msrc), "oom_score": newOOMScore(t, msrc), "oom_score_adj": newOOMScoreAdj(t, msrc), diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index a83245866..bb609a305 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -9,9 +9,9 @@ go_library( "subtasks.go", "task.go", "task_files.go", + "task_net.go", "tasks.go", "tasks_files.go", - "tasks_net.go", "tasks_sys.go", ], visibility = ["//pkg/sentry:internal"], diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index c0d643f51..493acbd1b 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -57,6 +57,7 @@ func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNames "maps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mapsData{task: task}), //"mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), //"mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), + "net": newTaskNetDir(task, inoGen), "ns": newTaskOwnedDir(task, inoGen.NextIno(), 0511, map[string]*kernfs.Dentry{ "net": newNamespaceSymlink(task, inoGen.NextIno(), "net"), "pid": newNamespaceSymlink(task, inoGen.NextIno(), "pid"), diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go new file mode 100644 index 000000000..373a7b17d --- /dev/null +++ b/pkg/sentry/fsimpl/proc/task_net.go @@ -0,0 +1,790 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "io" + "reflect" + "time" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/usermem" +) + +func newTaskNetDir(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry { + k := task.Kernel() + pidns := task.PIDNamespace() + root := auth.NewRootCredentials(pidns.UserNamespace()) + + var contents map[string]*kernfs.Dentry + if stack := task.NetworkNamespace().Stack(); stack != nil { + const ( + arp = "IP address HW type Flags HW address Mask Device\n" + netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n" + packet = "sk RefCnt Type Proto Iface R Rmem User Inode\n" + protocols = "protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n" + ptype = "Type Device Function\n" + upd6 = " sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n" + ) + psched := fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)) + + // TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task + // network namespace. + contents = map[string]*kernfs.Dentry{ + "dev": newDentry(root, inoGen.NextIno(), 0444, &netDevData{stack: stack}), + "snmp": newDentry(root, inoGen.NextIno(), 0444, &netSnmpData{stack: stack}), + + // The following files are simple stubs until they are implemented in + // netstack, if the file contains a header the stub is just the header + // otherwise it is an empty file. + "arp": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(arp)), + "netlink": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(netlink)), + "netstat": newDentry(root, inoGen.NextIno(), 0444, &netStatData{}), + "packet": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(packet)), + "protocols": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(protocols)), + + // Linux sets psched values to: nsec per usec, psched tick in ns, 1000000, + // high res timer ticks per sec (ClockGetres returns 1ns resolution). + "psched": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(psched)), + "ptype": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(ptype)), + "route": newDentry(root, inoGen.NextIno(), 0444, &netRouteData{stack: stack}), + "tcp": newDentry(root, inoGen.NextIno(), 0444, &netTCPData{kernel: k}), + "udp": newDentry(root, inoGen.NextIno(), 0444, &netUDPData{kernel: k}), + "unix": newDentry(root, inoGen.NextIno(), 0444, &netUnixData{kernel: k}), + } + + if stack.SupportsIPv6() { + contents["if_inet6"] = newDentry(root, inoGen.NextIno(), 0444, &ifinet6{stack: stack}) + contents["ipv6_route"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")) + contents["tcp6"] = newDentry(root, inoGen.NextIno(), 0444, &netTCP6Data{kernel: k}) + contents["udp6"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(upd6)) + } + } + + return newTaskOwnedDir(task, inoGen.NextIno(), 0555, contents) +} + +// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6. +// +// +stateify savable +type ifinet6 struct { + kernfs.DynamicBytesFile + + stack inet.Stack +} + +var _ dynamicInode = (*ifinet6)(nil) + +func (n *ifinet6) contents() []string { + var lines []string + nics := n.stack.Interfaces() + for id, naddrs := range n.stack.InterfaceAddrs() { + nic, ok := nics[id] + if !ok { + // NIC was added after NICNames was called. We'll just ignore it. + continue + } + + for _, a := range naddrs { + // IPv6 only. + if a.Family != linux.AF_INET6 { + continue + } + + // Fields: + // IPv6 address displayed in 32 hexadecimal chars without colons + // Netlink device number (interface index) in hexadecimal (use nic id) + // Prefix length in hexadecimal + // Scope value (use 0) + // Interface flags + // Device name + lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name)) + } + } + return lines +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error { + for _, l := range n.contents() { + buf.WriteString(l) + } + return nil +} + +// netDevData implements vfs.DynamicBytesSource for /proc/net/dev. +// +// +stateify savable +type netDevData struct { + kernfs.DynamicBytesFile + + stack inet.Stack +} + +var _ dynamicInode = (*netDevData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *netDevData) Generate(ctx context.Context, buf *bytes.Buffer) error { + interfaces := n.stack.Interfaces() + buf.WriteString("Inter-| Receive | Transmit\n") + buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n") + + for _, i := range interfaces { + // Implements the same format as + // net/core/net-procfs.c:dev_seq_printf_stats. + var stats inet.StatDev + if err := n.stack.Statistics(&stats, i.Name); err != nil { + log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err) + continue + } + fmt.Fprintf( + buf, + "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n", + i.Name, + // Received + stats[0], // bytes + stats[1], // packets + stats[2], // errors + stats[3], // dropped + stats[4], // fifo + stats[5], // frame + stats[6], // compressed + stats[7], // multicast + // Transmitted + stats[8], // bytes + stats[9], // packets + stats[10], // errors + stats[11], // dropped + stats[12], // fifo + stats[13], // frame + stats[14], // compressed + stats[15], // multicast + ) + } + + return nil +} + +// netUnixData implements vfs.DynamicBytesSource for /proc/net/unix. +// +// +stateify savable +type netUnixData struct { + kernfs.DynamicBytesFile + + kernel *kernel.Kernel +} + +var _ dynamicInode = (*netUnixData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error { + buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n") + for _, se := range n.kernel.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock) + continue + } + sfile := s.(*fs.File) + if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { + s.DecRef() + // Not a unix socket. + continue + } + sops := sfile.FileOperations.(*unix.SocketOperations) + + addr, err := sops.Endpoint().GetLocalAddress() + if err != nil { + log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err) + addr.Addr = "" + } + + sockFlags := 0 + if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok { + if ce.Listening() { + // For unix domain sockets, linux reports a single flag + // value if the socket is listening, of __SO_ACCEPTCON. + sockFlags = linux.SO_ACCEPTCON + } + } + + // In the socket entry below, the value for the 'Num' field requires + // some consideration. Linux prints the address to the struct + // unix_sock representing a socket in the kernel, but may redact the + // value for unprivileged users depending on the kptr_restrict + // sysctl. + // + // One use for this field is to allow a privileged user to + // introspect into the kernel memory to determine information about + // a socket not available through procfs, such as the socket's peer. + // + // In gvisor, returning a pointer to our internal structures would + // be pointless, as it wouldn't match the memory layout for struct + // unix_sock, making introspection difficult. We could populate a + // struct unix_sock with the appropriate data, but even that + // requires consideration for which kernel version to emulate, as + // the definition of this struct changes over time. + // + // For now, we always redact this pointer. + fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d", + (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. + sfile.ReadRefs()-1, // RefCount, don't count our own ref. + 0, // Protocol, always 0 for UDS. + sockFlags, // Flags. + sops.Endpoint().Type(), // Type. + sops.State(), // State. + sfile.InodeID(), // Inode. + ) + + // Path + if len(addr.Addr) != 0 { + if addr.Addr[0] == 0 { + // Abstract path. + fmt.Fprintf(buf, " @%s", string(addr.Addr[1:])) + } else { + fmt.Fprintf(buf, " %s", string(addr.Addr)) + } + } + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + return nil +} + +func networkToHost16(n uint16) uint16 { + // n is in network byte order, so is big-endian. The most-significant byte + // should be stored in the lower address. + // + // We manually inline binary.BigEndian.Uint16() because Go does not support + // non-primitive consts, so binary.BigEndian is a (mutable) var, so calls to + // binary.BigEndian.Uint16() require a read of binary.BigEndian and an + // interface method call, defeating inlining. + buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)} + return usermem.ByteOrder.Uint16(buf[:]) +} + +func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { + switch family { + case linux.AF_INET: + var a linux.SockAddrInet + if i != nil { + a = *i.(*linux.SockAddrInet) + } + + // linux.SockAddrInet.Port is stored in the network byte order and is + // printed like a number in host byte order. Note that all numbers in host + // byte order are printed with the most-significant byte first when + // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux. + port := networkToHost16(a.Port) + + // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order + // (i.e. most-significant byte in index 0). Linux represents this as a + // __be32 which is a typedef for an unsigned int, and is printed with + // %X. This means that for a little-endian machine, Linux prints the + // least-significant byte of the address first. To emulate this, we first + // invert the byte order for the address using usermem.ByteOrder.Uint32, + // which makes it have the equivalent encoding to a __be32 on a little + // endian machine. Note that this operation is a no-op on a big endian + // machine. Then similar to Linux, we format it with %X, which will print + // the most-significant byte of the __be32 address first, which is now + // actually the least-significant byte of the original address in + // linux.SockAddrInet.Addr on little endian machines, due to the conversion. + addr := usermem.ByteOrder.Uint32(a.Addr[:]) + + fmt.Fprintf(w, "%08X:%04X ", addr, port) + case linux.AF_INET6: + var a linux.SockAddrInet6 + if i != nil { + a = *i.(*linux.SockAddrInet6) + } + + port := networkToHost16(a.Port) + addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4]) + addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8]) + addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12]) + addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16]) + fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port) + } +} + +func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error { + // t may be nil here if our caller is not part of a task goroutine. This can + // happen for example if we're here for "sentryctl cat". When t is nil, + // degrade gracefully and retrieve what we can. + t := kernel.TaskFromContext(ctx) + + for _, se := range k.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(socket.Socket) + if !ok { + panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) + } + if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) { + s.DecRef() + // Not tcp4 sockets. + continue + } + + // Linux's documentation for the fields below can be found at + // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. + // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). + // Note that the header doesn't contain labels for all the fields. + + // Field: sl; entry number. + fmt.Fprintf(buf, "%4d: ", se.ID) + + // Field: local_adddress. + var localAddr linux.SockAddr + if t != nil { + if local, _, err := sops.GetSockName(t); err == nil { + localAddr = local + } + } + writeInetAddr(buf, family, localAddr) + + // Field: rem_address. + var remoteAddr linux.SockAddr + if t != nil { + if remote, _, err := sops.GetPeerName(t); err == nil { + remoteAddr = remote + } + } + writeInetAddr(buf, family, remoteAddr) + + // Field: state; socket state. + fmt.Fprintf(buf, "%02X ", sops.State()) + + // Field: tx_queue, rx_queue; number of packets in the transmit and + // receive queue. Unimplemented. + fmt.Fprintf(buf, "%08X:%08X ", 0, 0) + + // Field: tr, tm->when; timer active state and number of jiffies + // until timer expires. Unimplemented. + fmt.Fprintf(buf, "%02X:%08X ", 0, 0) + + // Field: retrnsmt; number of unrecovered RTO timeouts. + // Unimplemented. + fmt.Fprintf(buf, "%08X ", 0) + + // Field: uid. + uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) + if err != nil { + log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) + fmt.Fprintf(buf, "%5d ", 0) + } else { + creds := auth.CredentialsFromContext(ctx) + fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow())) + } + + // Field: timeout; number of unanswered 0-window probes. + // Unimplemented. + fmt.Fprintf(buf, "%8d ", 0) + + // Field: inode. + fmt.Fprintf(buf, "%8d ", sfile.InodeID()) + + // Field: refcount. Don't count the ref we obtain while deferencing + // the weakref to this socket. + fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) + + // Field: Socket struct address. Redacted due to the same reason as + // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. + fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) + + // Field: retransmit timeout. Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: predicted tick of soft clock (delayed ACK control data). + // Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: sending congestion window, Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. + // Unimplemented, report as large threshold. + fmt.Fprintf(buf, "%d", -1) + + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + + return nil +} + +// netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp. +// +// +stateify savable +type netTCPData struct { + kernfs.DynamicBytesFile + + kernel *kernel.Kernel +} + +var _ dynamicInode = (*netTCPData)(nil) + +func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error { + buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n") + return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET) +} + +// netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6. +// +// +stateify savable +type netTCP6Data struct { + kernfs.DynamicBytesFile + + kernel *kernel.Kernel +} + +var _ dynamicInode = (*netTCP6Data)(nil) + +func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error { + buf.WriteString(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n") + return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6) +} + +// netUDPData implements vfs.DynamicBytesSource for /proc/net/udp. +// +// +stateify savable +type netUDPData struct { + kernfs.DynamicBytesFile + + kernel *kernel.Kernel +} + +var _ dynamicInode = (*netUDPData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error { + // t may be nil here if our caller is not part of a task goroutine. This can + // happen for example if we're here for "sentryctl cat". When t is nil, + // degrade gracefully and retrieve what we can. + t := kernel.TaskFromContext(ctx) + + for _, se := range d.kernel.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(socket.Socket) + if !ok { + panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) + } + if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM { + s.DecRef() + // Not udp4 socket. + continue + } + + // For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock(). + + // Field: sl; entry number. + fmt.Fprintf(buf, "%5d: ", se.ID) + + // Field: local_adddress. + var localAddr linux.SockAddrInet + if t != nil { + if local, _, err := sops.GetSockName(t); err == nil { + localAddr = *local.(*linux.SockAddrInet) + } + } + writeInetAddr(buf, linux.AF_INET, &localAddr) + + // Field: rem_address. + var remoteAddr linux.SockAddrInet + if t != nil { + if remote, _, err := sops.GetPeerName(t); err == nil { + remoteAddr = *remote.(*linux.SockAddrInet) + } + } + writeInetAddr(buf, linux.AF_INET, &remoteAddr) + + // Field: state; socket state. + fmt.Fprintf(buf, "%02X ", sops.State()) + + // Field: tx_queue, rx_queue; number of packets in the transmit and + // receive queue. Unimplemented. + fmt.Fprintf(buf, "%08X:%08X ", 0, 0) + + // Field: tr, tm->when. Always 0 for UDP. + fmt.Fprintf(buf, "%02X:%08X ", 0, 0) + + // Field: retrnsmt. Always 0 for UDP. + fmt.Fprintf(buf, "%08X ", 0) + + // Field: uid. + uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) + if err != nil { + log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) + fmt.Fprintf(buf, "%5d ", 0) + } else { + creds := auth.CredentialsFromContext(ctx) + fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow())) + } + + // Field: timeout. Always 0 for UDP. + fmt.Fprintf(buf, "%8d ", 0) + + // Field: inode. + fmt.Fprintf(buf, "%8d ", sfile.InodeID()) + + // Field: ref; reference count on the socket inode. Don't count the ref + // we obtain while deferencing the weakref to this socket. + fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) + + // Field: Socket struct address. Redacted due to the same reason as + // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. + fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) + + // Field: drops; number of dropped packets. Unimplemented. + fmt.Fprintf(buf, "%d", 0) + + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + return nil +} + +// netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp. +// +// +stateify savable +type netSnmpData struct { + kernfs.DynamicBytesFile + + stack inet.Stack +} + +var _ dynamicInode = (*netSnmpData)(nil) + +type snmpLine struct { + prefix string + header string +} + +var snmp = []snmpLine{ + { + prefix: "Ip", + header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates", + }, + { + prefix: "Icmp", + header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps", + }, + { + prefix: "IcmpMsg", + }, + { + prefix: "Tcp", + header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors", + }, + { + prefix: "Udp", + header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", + }, + { + prefix: "UdpLite", + header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", + }, +} + +func toSlice(a interface{}) []uint64 { + v := reflect.Indirect(reflect.ValueOf(a)) + return v.Slice(0, v.Len()).Interface().([]uint64) +} + +func sprintSlice(s []uint64) string { + if len(s) == 0 { + return "" + } + r := fmt.Sprint(s) + return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice. +} + +// Generate implements vfs.DynamicBytesSource. +func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error { + types := []interface{}{ + &inet.StatSNMPIP{}, + &inet.StatSNMPICMP{}, + nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats. + &inet.StatSNMPTCP{}, + &inet.StatSNMPUDP{}, + &inet.StatSNMPUDPLite{}, + } + for i, stat := range types { + line := snmp[i] + if stat == nil { + fmt.Fprintf(buf, "%s:\n", line.prefix) + fmt.Fprintf(buf, "%s:\n", line.prefix) + continue + } + if err := d.stack.Statistics(stat, line.prefix); err != nil { + if err == syserror.EOPNOTSUPP { + log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) + } else { + log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) + } + } + + fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header) + + if line.prefix == "Tcp" { + tcp := stat.(*inet.StatSNMPTCP) + // "Tcp" needs special processing because MaxConn is signed. RFC 2012. + fmt.Sprintf("%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:])) + } else { + fmt.Sprintf("%s: %s\n", line.prefix, sprintSlice(toSlice(stat))) + } + } + return nil +} + +// netRouteData implements vfs.DynamicBytesSource for /proc/net/route. +// +// +stateify savable +type netRouteData struct { + kernfs.DynamicBytesFile + + stack inet.Stack +} + +var _ dynamicInode = (*netRouteData)(nil) + +// Generate implements vfs.DynamicBytesSource. +// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show. +func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error { + fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT") + + interfaces := d.stack.Interfaces() + for _, rt := range d.stack.RouteTable() { + // /proc/net/route only includes ipv4 routes. + if rt.Family != linux.AF_INET { + continue + } + + // /proc/net/route does not include broadcast or multicast routes. + if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST { + continue + } + + iface, ok := interfaces[rt.OutputInterface] + if !ok || iface.Name == "lo" { + continue + } + + var ( + gw uint32 + prefix uint32 + flags = linux.RTF_UP + ) + if len(rt.GatewayAddr) == header.IPv4AddressSize { + flags |= linux.RTF_GATEWAY + gw = usermem.ByteOrder.Uint32(rt.GatewayAddr) + } + if len(rt.DstAddr) == header.IPv4AddressSize { + prefix = usermem.ByteOrder.Uint32(rt.DstAddr) + } + l := fmt.Sprintf( + "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d", + iface.Name, + prefix, + gw, + flags, + 0, // RefCnt. + 0, // Use. + 0, // Metric. + (uint32(1)<> 8 & 0xff), byte(n & 0xff)} - return usermem.ByteOrder.Uint16(buf[:]) -} - -func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { - switch family { - case linux.AF_INET: - var a linux.SockAddrInet - if i != nil { - a = *i.(*linux.SockAddrInet) - } - - // linux.SockAddrInet.Port is stored in the network byte order and is - // printed like a number in host byte order. Note that all numbers in host - // byte order are printed with the most-significant byte first when - // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux. - port := networkToHost16(a.Port) - - // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order - // (i.e. most-significant byte in index 0). Linux represents this as a - // __be32 which is a typedef for an unsigned int, and is printed with - // %X. This means that for a little-endian machine, Linux prints the - // least-significant byte of the address first. To emulate this, we first - // invert the byte order for the address using usermem.ByteOrder.Uint32, - // which makes it have the equivalent encoding to a __be32 on a little - // endian machine. Note that this operation is a no-op on a big endian - // machine. Then similar to Linux, we format it with %X, which will print - // the most-significant byte of the __be32 address first, which is now - // actually the least-significant byte of the original address in - // linux.SockAddrInet.Addr on little endian machines, due to the conversion. - addr := usermem.ByteOrder.Uint32(a.Addr[:]) - - fmt.Fprintf(w, "%08X:%04X ", addr, port) - case linux.AF_INET6: - var a linux.SockAddrInet6 - if i != nil { - a = *i.(*linux.SockAddrInet6) - } - - port := networkToHost16(a.Port) - addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4]) - addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8]) - addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12]) - addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16]) - fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port) - } -} - -func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error { - // t may be nil here if our caller is not part of a task goroutine. This can - // happen for example if we're here for "sentryctl cat". When t is nil, - // degrade gracefully and retrieve what we can. - t := kernel.TaskFromContext(ctx) - - for _, se := range k.ListSockets() { - s := se.Sock.Get() - if s == nil { - log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID) - continue - } - sfile := s.(*fs.File) - sops, ok := sfile.FileOperations.(socket.Socket) - if !ok { - panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) - } - if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) { - s.DecRef() - // Not tcp4 sockets. - continue - } - - // Linux's documentation for the fields below can be found at - // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. - // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). - // Note that the header doesn't contain labels for all the fields. - - // Field: sl; entry number. - fmt.Fprintf(buf, "%4d: ", se.ID) - - // Field: local_adddress. - var localAddr linux.SockAddr - if t != nil { - if local, _, err := sops.GetSockName(t); err == nil { - localAddr = local - } - } - writeInetAddr(buf, family, localAddr) - - // Field: rem_address. - var remoteAddr linux.SockAddr - if t != nil { - if remote, _, err := sops.GetPeerName(t); err == nil { - remoteAddr = remote - } - } - writeInetAddr(buf, family, remoteAddr) - - // Field: state; socket state. - fmt.Fprintf(buf, "%02X ", sops.State()) - - // Field: tx_queue, rx_queue; number of packets in the transmit and - // receive queue. Unimplemented. - fmt.Fprintf(buf, "%08X:%08X ", 0, 0) - - // Field: tr, tm->when; timer active state and number of jiffies - // until timer expires. Unimplemented. - fmt.Fprintf(buf, "%02X:%08X ", 0, 0) - - // Field: retrnsmt; number of unrecovered RTO timeouts. - // Unimplemented. - fmt.Fprintf(buf, "%08X ", 0) - - // Field: uid. - uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) - if err != nil { - log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) - fmt.Fprintf(buf, "%5d ", 0) - } else { - creds := auth.CredentialsFromContext(ctx) - fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow())) - } - - // Field: timeout; number of unanswered 0-window probes. - // Unimplemented. - fmt.Fprintf(buf, "%8d ", 0) - - // Field: inode. - fmt.Fprintf(buf, "%8d ", sfile.InodeID()) - - // Field: refcount. Don't count the ref we obtain while deferencing - // the weakref to this socket. - fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) - - // Field: Socket struct address. Redacted due to the same reason as - // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. - fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) - - // Field: retransmit timeout. Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: predicted tick of soft clock (delayed ACK control data). - // Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: sending congestion window, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. - // Unimplemented, report as large threshold. - fmt.Fprintf(buf, "%d", -1) - - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - - return nil -} - -// netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp. -// -// +stateify savable -type netTCPData struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netTCPData)(nil) - -func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n") - return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET) -} - -// netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6. -// -// +stateify savable -type netTCP6Data struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netTCP6Data)(nil) - -func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n") - return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6) -} - -// netUDPData implements vfs.DynamicBytesSource for /proc/net/udp. -// -// +stateify savable -type netUDPData struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netUDPData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error { - // t may be nil here if our caller is not part of a task goroutine. This can - // happen for example if we're here for "sentryctl cat". When t is nil, - // degrade gracefully and retrieve what we can. - t := kernel.TaskFromContext(ctx) - - for _, se := range d.kernel.ListSockets() { - s := se.Sock.Get() - if s == nil { - log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID) - continue - } - sfile := s.(*fs.File) - sops, ok := sfile.FileOperations.(socket.Socket) - if !ok { - panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) - } - if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM { - s.DecRef() - // Not udp4 socket. - continue - } - - // For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock(). - - // Field: sl; entry number. - fmt.Fprintf(buf, "%5d: ", se.ID) - - // Field: local_adddress. - var localAddr linux.SockAddrInet - if t != nil { - if local, _, err := sops.GetSockName(t); err == nil { - localAddr = *local.(*linux.SockAddrInet) - } - } - writeInetAddr(buf, linux.AF_INET, &localAddr) - - // Field: rem_address. - var remoteAddr linux.SockAddrInet - if t != nil { - if remote, _, err := sops.GetPeerName(t); err == nil { - remoteAddr = *remote.(*linux.SockAddrInet) - } - } - writeInetAddr(buf, linux.AF_INET, &remoteAddr) - - // Field: state; socket state. - fmt.Fprintf(buf, "%02X ", sops.State()) - - // Field: tx_queue, rx_queue; number of packets in the transmit and - // receive queue. Unimplemented. - fmt.Fprintf(buf, "%08X:%08X ", 0, 0) - - // Field: tr, tm->when. Always 0 for UDP. - fmt.Fprintf(buf, "%02X:%08X ", 0, 0) - - // Field: retrnsmt. Always 0 for UDP. - fmt.Fprintf(buf, "%08X ", 0) - - // Field: uid. - uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) - if err != nil { - log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) - fmt.Fprintf(buf, "%5d ", 0) - } else { - creds := auth.CredentialsFromContext(ctx) - fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow())) - } - - // Field: timeout. Always 0 for UDP. - fmt.Fprintf(buf, "%8d ", 0) - - // Field: inode. - fmt.Fprintf(buf, "%8d ", sfile.InodeID()) - - // Field: ref; reference count on the socket inode. Don't count the ref - // we obtain while deferencing the weakref to this socket. - fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) - - // Field: Socket struct address. Redacted due to the same reason as - // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. - fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) - - // Field: drops; number of dropped packets. Unimplemented. - fmt.Fprintf(buf, "%d", 0) - - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - return nil -} - -// netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp. -// -// +stateify savable -type netSnmpData struct { - kernfs.DynamicBytesFile - - stack inet.Stack -} - -var _ dynamicInode = (*netSnmpData)(nil) - -type snmpLine struct { - prefix string - header string -} - -var snmp = []snmpLine{ - { - prefix: "Ip", - header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates", - }, - { - prefix: "Icmp", - header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps", - }, - { - prefix: "IcmpMsg", - }, - { - prefix: "Tcp", - header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors", - }, - { - prefix: "Udp", - header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", - }, - { - prefix: "UdpLite", - header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", - }, -} - -func toSlice(a interface{}) []uint64 { - v := reflect.Indirect(reflect.ValueOf(a)) - return v.Slice(0, v.Len()).Interface().([]uint64) -} - -func sprintSlice(s []uint64) string { - if len(s) == 0 { - return "" - } - r := fmt.Sprint(s) - return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice. -} - -// Generate implements vfs.DynamicBytesSource. -func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error { - types := []interface{}{ - &inet.StatSNMPIP{}, - &inet.StatSNMPICMP{}, - nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats. - &inet.StatSNMPTCP{}, - &inet.StatSNMPUDP{}, - &inet.StatSNMPUDPLite{}, - } - for i, stat := range types { - line := snmp[i] - if stat == nil { - fmt.Fprintf(buf, "%s:\n", line.prefix) - fmt.Fprintf(buf, "%s:\n", line.prefix) - continue - } - if err := d.stack.Statistics(stat, line.prefix); err != nil { - if err == syserror.EOPNOTSUPP { - log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) - } else { - log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) - } - } - - fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header) - - if line.prefix == "Tcp" { - tcp := stat.(*inet.StatSNMPTCP) - // "Tcp" needs special processing because MaxConn is signed. RFC 2012. - fmt.Sprintf("%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:])) - } else { - fmt.Sprintf("%s: %s\n", line.prefix, sprintSlice(toSlice(stat))) - } - } - return nil -} - -// netRouteData implements vfs.DynamicBytesSource for /proc/net/route. -// -// +stateify savable -type netRouteData struct { - kernfs.DynamicBytesFile - - stack inet.Stack -} - -var _ dynamicInode = (*netRouteData)(nil) - -// Generate implements vfs.DynamicBytesSource. -// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show. -func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT") - - interfaces := d.stack.Interfaces() - for _, rt := range d.stack.RouteTable() { - // /proc/net/route only includes ipv4 routes. - if rt.Family != linux.AF_INET { - continue - } - - // /proc/net/route does not include broadcast or multicast routes. - if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST { - continue - } - - iface, ok := interfaces[rt.OutputInterface] - if !ok || iface.Name == "lo" { - continue - } - - var ( - gw uint32 - prefix uint32 - flags = linux.RTF_UP - ) - if len(rt.GatewayAddr) == header.IPv4AddressSize { - flags |= linux.RTF_GATEWAY - gw = usermem.ByteOrder.Uint32(rt.GatewayAddr) - } - if len(rt.DstAddr) == header.IPv4AddressSize { - prefix = usermem.ByteOrder.Uint32(rt.DstAddr) - } - l := fmt.Sprintf( - "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d", - iface.Name, - prefix, - gw, - flags, - 0, // RefCnt. - 0, // Use. - 0, // Metric. - (uint32(1)< Date: Fri, 13 Mar 2020 13:17:59 -0700 Subject: Fix oom_score_adj. - Make oomScoreAdj a ThreadGroup field (Linux: signal_struct::oom_score_adj). - Avoid deadlock caused by Task.OOMScoreAdj()/SetOOMScoreAdj() locking Task.mu and TaskSet.mu in the wrong order (via Task.ExitState()). PiperOrigin-RevId: 300814698 --- pkg/sentry/fs/proc/task.go | 17 ++++++++++------- pkg/sentry/fsimpl/proc/task_files.go | 10 ++++++---- pkg/sentry/kernel/task.go | 29 ++++++----------------------- pkg/sentry/kernel/task_clone.go | 9 +++------ pkg/sentry/kernel/task_start.go | 4 ---- pkg/sentry/kernel/thread_group.go | 7 +++++++ 6 files changed, 32 insertions(+), 44 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 03cc788c8..d6c5dd2c1 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -853,15 +853,15 @@ func (o *oomScoreAdj) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.F // Read implements fs.FileOperations.Read. func (f *oomScoreAdjFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { - if offset != 0 { - return 0, io.EOF + if f.t.ExitState() == kernel.TaskExitDead { + return 0, syserror.ESRCH } - adj, err := f.t.OOMScoreAdj() - if err != nil { - return 0, err + var buf bytes.Buffer + fmt.Fprintf(&buf, "%d\n", f.t.OOMScoreAdj()) + if offset >= int64(buf.Len()) { + return 0, io.EOF } - adjBytes := []byte(strconv.FormatInt(int64(adj), 10) + "\n") - n, err := dst.CopyOut(ctx, adjBytes) + n, err := dst.CopyOut(ctx, buf.Bytes()[offset:]) return int64(n), err } @@ -880,6 +880,9 @@ func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOS return 0, err } + if f.t.ExitState() == kernel.TaskExitDead { + return 0, syserror.ESRCH + } if err := f.t.SetOOMScoreAdj(v); err != nil { return 0, err } diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 5a231ac86..4d3332771 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -539,11 +539,10 @@ var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil) // Generate implements vfs.DynamicBytesSource.Generate. func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error { - adj, err := o.task.OOMScoreAdj() - if err != nil { - return err + if o.task.ExitState() == kernel.TaskExitDead { + return syserror.ESRCH } - fmt.Fprintf(buf, "%d\n", adj) + fmt.Fprintf(buf, "%d\n", o.task.OOMScoreAdj()) return nil } @@ -562,6 +561,9 @@ func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset return 0, err } + if o.task.ExitState() == kernel.TaskExitDead { + return 0, syserror.ESRCH + } if err := o.task.SetOOMScoreAdj(v); err != nil { return 0, err } diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index c0dbbe890..8452ddf5b 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -555,13 +555,6 @@ type Task struct { // // startTime is protected by mu. startTime ktime.Time - - // oomScoreAdj is the task's OOM score adjustment. This is currently not - // used but is maintained for consistency. - // TODO(gvisor.dev/issue/1967) - // - // oomScoreAdj is protected by mu, and is owned by the task goroutine. - oomScoreAdj int32 } func (t *Task) savePtraceTracer() *Task { @@ -856,27 +849,17 @@ func (t *Task) ContainerID() string { return t.containerID } -// OOMScoreAdj gets the task's OOM score adjustment. -func (t *Task) OOMScoreAdj() (int32, error) { - t.mu.Lock() - defer t.mu.Unlock() - if t.ExitState() == TaskExitDead { - return 0, syserror.ESRCH - } - return t.oomScoreAdj, nil +// OOMScoreAdj gets the task's thread group's OOM score adjustment. +func (t *Task) OOMScoreAdj() int32 { + return atomic.LoadInt32(&t.tg.oomScoreAdj) } -// SetOOMScoreAdj sets the task's OOM score adjustment. The value should be -// between -1000 and 1000 inclusive. +// SetOOMScoreAdj sets the task's thread group's OOM score adjustment. The +// value should be between -1000 and 1000 inclusive. func (t *Task) SetOOMScoreAdj(adj int32) error { - t.mu.Lock() - defer t.mu.Unlock() - if t.ExitState() == TaskExitDead { - return syserror.ESRCH - } if adj > 1000 || adj < -1000 { return syserror.EINVAL } - t.oomScoreAdj = adj + atomic.StoreInt32(&t.tg.oomScoreAdj, adj) return nil } diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index dda502bb8..e1ecca99e 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -15,6 +15,8 @@ package kernel import ( + "sync/atomic" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/sentry/inet" @@ -260,15 +262,11 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { sh = sh.Fork() } tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, opts.TerminationSignal, tg.limits.GetCopy()) + tg.oomScoreAdj = atomic.LoadInt32(&t.tg.oomScoreAdj) rseqAddr = t.rseqAddr rseqSignature = t.rseqSignature } - adj, err := t.OOMScoreAdj() - if err != nil { - return 0, nil, err - } - cfg := &TaskConfig{ Kernel: t.k, ThreadGroup: tg, @@ -287,7 +285,6 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { RSeqAddr: rseqAddr, RSeqSignature: rseqSignature, ContainerID: t.ContainerID(), - OOMScoreAdj: adj, } if opts.NewThreadGroup { cfg.Parent = t diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index 2bbf48bb8..a5035bb7f 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -93,9 +93,6 @@ type TaskConfig struct { // ContainerID is the container the new task belongs to. ContainerID string - - // oomScoreAdj is the task's OOM score adjustment. - OOMScoreAdj int32 } // NewTask creates a new task defined by cfg. @@ -146,7 +143,6 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) { rseqSignature: cfg.RSeqSignature, futexWaiter: futex.NewWaiter(), containerID: cfg.ContainerID, - oomScoreAdj: cfg.OOMScoreAdj, } t.creds.Store(cfg.Credentials) t.endStopCond.L = &t.tg.signalHandlers.mu diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 268f62e9d..52849f5b3 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -254,6 +254,13 @@ type ThreadGroup struct { // // tty is protected by the signal mutex. tty *TTY + + // oomScoreAdj is the thread group's OOM score adjustment. This is + // currently not used but is maintained for consistency. + // TODO(gvisor.dev/issue/1967) + // + // oomScoreAdj is accessed using atomic memory operations. + oomScoreAdj int32 } // NewThreadGroup returns a new, empty thread group in PID namespace pidns. The -- cgit v1.2.3 From 5e413cad10d2358a21dd08216953faee70e62a0b Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Sat, 14 Mar 2020 07:13:15 -0700 Subject: Plumb VFS2 imported fds into virtual filesystem. - When setting up the virtual filesystem, mount a host.filesystem to contain all files that need to be imported. - Make read/preadv syscalls to the host in cases where preadv2 may not be supported yet (likewise for writing). - Make save/restore functions in kernel/kernel.go return early if vfs2 is enabled. PiperOrigin-RevId: 300922353 --- pkg/abi/linux/file.go | 3 + pkg/sentry/fs/host/control.go | 2 + pkg/sentry/fsimpl/host/BUILD | 2 + pkg/sentry/fsimpl/host/default_file.go | 45 +++++++----- pkg/sentry/fsimpl/host/host.go | 124 ++++++++++++++++++++++++++++++--- pkg/sentry/fsimpl/host/util.go | 28 ++------ pkg/sentry/kernel/kernel.go | 40 +++++++---- pkg/sentry/syscalls/linux/sys_stat.go | 5 +- pkg/sentry/syscalls/linux/vfs2/stat.go | 6 +- runsc/boot/filter/config.go | 1 + test/syscalls/linux/stat.cc | 60 ++++++++++++++-- 11 files changed, 246 insertions(+), 70 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go index e229ac21c..dbe58acbe 100644 --- a/pkg/abi/linux/file.go +++ b/pkg/abi/linux/file.go @@ -266,6 +266,9 @@ type Statx struct { DevMinor uint32 } +// SizeOfStatx is the size of a Statx struct. +var SizeOfStatx = binary.Size(Statx{}) + // FileMode represents a mode_t. type FileMode uint16 diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go index 1658979fc..cd84e1337 100644 --- a/pkg/sentry/fs/host/control.go +++ b/pkg/sentry/fs/host/control.go @@ -32,6 +32,8 @@ func newSCMRights(fds []int) control.SCMRights { } // Files implements control.SCMRights.Files. +// +// TODO(gvisor.dev/issue/2017): Port to VFS2. func (c *scmRights) Files(ctx context.Context, max int) (control.RightsFiles, bool) { n := max var trunc bool diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 731f192b3..5d67f88e3 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -9,9 +9,11 @@ go_library( "host.go", "util.go", ], + visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/fd", "//pkg/log", "//pkg/refs", "//pkg/safemem", diff --git a/pkg/sentry/fsimpl/host/default_file.go b/pkg/sentry/fsimpl/host/default_file.go index 172cdb161..98682ba5e 100644 --- a/pkg/sentry/fsimpl/host/default_file.go +++ b/pkg/sentry/fsimpl/host/default_file.go @@ -21,6 +21,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -64,9 +65,7 @@ func (f *defaultFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts v panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") } - f.mu.Lock() n, err := readFromHostFD(ctx, f.inode.hostFD, dst, -1, int(opts.Flags)) - f.mu.Unlock() if isBlockError(err) { // If we got any data at all, return it as a "completed" partial read // rather than retrying until complete. @@ -86,16 +85,22 @@ func (f *defaultFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts v return n, err } -func readFromHostFD(ctx context.Context, fd int, dst usermem.IOSequence, offset int64, flags int) (int64, error) { - if flags&^(linux.RWF_VALID) != 0 { +func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags int) (int64, error) { + // TODO(gvisor.dev/issue/1672): Support select preadv2 flags. + if flags != 0 { return 0, syserror.EOPNOTSUPP } - reader := safemem.FromVecReaderFunc{ - func(srcs [][]byte) (int64, error) { - n, err := unix.Preadv2(fd, srcs, offset, flags) - return int64(n), err - }, + var reader safemem.Reader + if offset == -1 { + reader = safemem.FromIOReader{fd.NewReadWriter(hostFD)} + } else { + reader = safemem.FromVecReaderFunc{ + func(srcs [][]byte) (int64, error) { + n, err := unix.Preadv(hostFD, srcs, offset) + return int64(n), err + }, + } } n, err := dst.CopyOutFrom(ctx, reader) return int64(n), err @@ -120,9 +125,7 @@ func (f *defaultFileFD) Write(ctx context.Context, src usermem.IOSequence, opts panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") } - f.mu.Lock() n, err := writeToHostFD(ctx, f.inode.hostFD, src, -1, int(opts.Flags)) - f.mu.Unlock() if isBlockError(err) { err = syserror.ErrWouldBlock } @@ -137,16 +140,22 @@ func (f *defaultFileFD) Write(ctx context.Context, src usermem.IOSequence, opts return n, err } -func writeToHostFD(ctx context.Context, fd int, src usermem.IOSequence, offset int64, flags int) (int64, error) { - if flags&^(linux.RWF_VALID) != 0 { +func writeToHostFD(ctx context.Context, hostFD int, src usermem.IOSequence, offset int64, flags int) (int64, error) { + // TODO(gvisor.dev/issue/1672): Support select pwritev2 flags. + if flags != 0 { return 0, syserror.EOPNOTSUPP } - writer := safemem.FromVecWriterFunc{ - func(srcs [][]byte) (int64, error) { - n, err := unix.Pwritev2(fd, srcs, offset, flags) - return int64(n), err - }, + var writer safemem.Writer + if offset == -1 { + writer = safemem.FromIOWriter{fd.NewReadWriter(hostFD)} + } else { + writer = safemem.FromVecWriterFunc{ + func(srcs [][]byte) (int64, error) { + n, err := unix.Pwritev(hostFD, srcs, offset) + return int64(n), err + }, + } } n, err := src.CopyInTo(ctx, writer) return int64(n), err diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index c205e6a0b..0be812d13 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -38,10 +38,19 @@ type filesystem struct { kernfs.Filesystem } +// NewMount returns a new disconnected mount in vfsObj that may be passed to ImportFD. +func NewMount(vfsObj *vfs.VirtualFilesystem) (*vfs.Mount, error) { + fs := &filesystem{} + fs.Init(vfsObj) + vfsfs := fs.VFSFilesystem() + // NewDisconnectedMount will take an additional reference on vfsfs. + defer vfsfs.DecRef() + return vfsObj.NewDisconnectedMount(vfsfs, nil, &vfs.MountOptions{}) +} + // ImportFD sets up and returns a vfs.FileDescription from a donated fd. func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID, isTTY bool) (*vfs.FileDescription, error) { - // Must be importing to a mount of host.filesystem. - fs, ok := mnt.Filesystem().Impl().(*filesystem) + fs, ok := mnt.Filesystem().Impl().(*kernfs.Filesystem) if !ok { return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl()) } @@ -54,8 +63,7 @@ func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID fileMode := linux.FileMode(s.Mode) fileType := fileMode.FileType() - // Pipes, character devices, and sockets can return EWOULDBLOCK for - // operations that would block. + // Pipes, character devices, and sockets. isStream := fileType == syscall.S_IFIFO || fileType == syscall.S_IFCHR || fileType == syscall.S_IFSOCK i := &inode{ @@ -143,11 +151,109 @@ func (i *inode) Mode() linux.FileMode { // Stat implements kernfs.Inode. func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + if opts.Mask&linux.STATX__RESERVED != 0 { + return linux.Statx{}, syserror.EINVAL + } + if opts.Sync&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { + return linux.Statx{}, syserror.EINVAL + } + + // Limit our host call only to known flags. + mask := opts.Mask & linux.STATX_ALL var s unix.Statx_t - if err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(opts.Mask), &s); err != nil { + err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(mask), &s) + // Fallback to fstat(2), if statx(2) is not supported on the host. + // + // TODO(b/151263641): Remove fallback. + if err == syserror.ENOSYS { + return i.fstat(opts) + } else if err != nil { + return linux.Statx{}, err + } + + ls := linux.Statx{Mask: mask} + // Unconditionally fill blksize, attributes, and device numbers, as indicated + // by /include/uapi/linux/stat.h. + // + // RdevMajor/RdevMinor are left as zero, so as not to expose host device + // numbers. + // + // TODO(gvisor.dev/issue/1672): Use kernfs-specific, internally defined + // device numbers. If we use the device number from the host, it may collide + // with another sentry-internal device number. We handle device/inode + // numbers without relying on the host to prevent collisions. + ls.Blksize = s.Blksize + ls.Attributes = s.Attributes + ls.AttributesMask = s.Attributes_mask + + if mask|linux.STATX_TYPE != 0 { + ls.Mode |= s.Mode & linux.S_IFMT + } + if mask|linux.STATX_MODE != 0 { + ls.Mode |= s.Mode &^ linux.S_IFMT + } + if mask|linux.STATX_NLINK != 0 { + ls.Nlink = s.Nlink + } + if mask|linux.STATX_ATIME != 0 { + ls.Atime = unixToLinuxStatxTimestamp(s.Atime) + } + if mask|linux.STATX_BTIME != 0 { + ls.Btime = unixToLinuxStatxTimestamp(s.Btime) + } + if mask|linux.STATX_CTIME != 0 { + ls.Ctime = unixToLinuxStatxTimestamp(s.Ctime) + } + if mask|linux.STATX_MTIME != 0 { + ls.Mtime = unixToLinuxStatxTimestamp(s.Mtime) + } + if mask|linux.STATX_SIZE != 0 { + ls.Size = s.Size + } + if mask|linux.STATX_BLOCKS != 0 { + ls.Blocks = s.Blocks + } + + // Use our own internal inode number and file owner. + if mask|linux.STATX_INO != 0 { + ls.Ino = i.ino + } + if mask|linux.STATX_UID != 0 { + ls.UID = uint32(i.uid) + } + if mask|linux.STATX_GID != 0 { + ls.GID = uint32(i.gid) + } + + return ls, nil +} + +// fstat is a best-effort fallback for inode.Stat() if the host does not +// support statx(2). +// +// We ignore the mask and sync flags in opts and simply supply +// STATX_BASIC_STATS, as fstat(2) itself does not allow the specification +// of a mask or sync flags. fstat(2) does not provide any metadata +// equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so +// those fields remain empty. +func (i *inode) fstat(opts vfs.StatOptions) (linux.Statx, error) { + var s unix.Stat_t + if err := unix.Fstat(i.hostFD, &s); err != nil { return linux.Statx{}, err } - ls := unixToLinuxStatx(s) + + // Note that rdev numbers are left as 0; do not expose host device numbers. + ls := linux.Statx{ + Mask: linux.STATX_BASIC_STATS, + Blksize: uint32(s.Blksize), + Nlink: uint32(s.Nlink), + Mode: uint16(s.Mode), + Size: uint64(s.Size), + Blocks: uint64(s.Blocks), + Atime: timespecToStatxTimestamp(s.Atim), + Ctime: timespecToStatxTimestamp(s.Ctim), + Mtime: timespecToStatxTimestamp(s.Mtim), + } // Use our own internal inode number and file owner. // @@ -159,9 +265,6 @@ func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, erro ls.UID = uint32(i.uid) ls.GID = uint32(i.gid) - // Update file mode from the host. - i.mode = linux.FileMode(ls.Mode) - return ls, nil } @@ -217,7 +320,6 @@ func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptio } func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) { - fileType := i.mode.FileType() if fileType == syscall.S_IFSOCK { if i.isTTY { @@ -227,6 +329,8 @@ func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error return nil, errors.New("importing host sockets not supported") } + // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that + // we don't allow importing arbitrary file types without proper support. if i.isTTY { // TODO(gvisor.dev/issue/1672): support importing host fd as TTY. return nil, errors.New("importing host fd as TTY not supported") diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go index e1ccacb4d..d519feef5 100644 --- a/pkg/sentry/fsimpl/host/util.go +++ b/pkg/sentry/fsimpl/host/util.go @@ -35,34 +35,14 @@ func toTimespec(ts linux.StatxTimestamp, omit bool) unix.Timespec { } } -func unixToLinuxStatx(s unix.Statx_t) linux.Statx { - return linux.Statx{ - Mask: s.Mask, - Blksize: s.Blksize, - Attributes: s.Attributes, - Nlink: s.Nlink, - UID: s.Uid, - GID: s.Gid, - Mode: s.Mode, - Ino: s.Ino, - Size: s.Size, - Blocks: s.Blocks, - AttributesMask: s.Attributes_mask, - Atime: unixToLinuxStatxTimestamp(s.Atime), - Btime: unixToLinuxStatxTimestamp(s.Btime), - Ctime: unixToLinuxStatxTimestamp(s.Ctime), - Mtime: unixToLinuxStatxTimestamp(s.Mtime), - RdevMajor: s.Rdev_major, - RdevMinor: s.Rdev_minor, - DevMajor: s.Dev_major, - DevMinor: s.Dev_minor, - } -} - func unixToLinuxStatxTimestamp(ts unix.StatxTimestamp) linux.StatxTimestamp { return linux.StatxTimestamp{Sec: ts.Sec, Nsec: ts.Nsec} } +func timespecToStatxTimestamp(ts unix.Timespec) linux.StatxTimestamp { + return linux.StatxTimestamp{Sec: int64(ts.Sec), Nsec: uint32(ts.Nsec)} +} + // wouldBlock returns true for file types that can return EWOULDBLOCK // for blocking operations, e.g. pipes, character devices, and sockets. func wouldBlock(fileType uint32) bool { diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 1d627564f..6feda8fa1 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -467,6 +467,11 @@ func (k *Kernel) flushMountSourceRefs() error { // // Precondition: Must be called with the kernel paused. func (ts *TaskSet) forEachFDPaused(f func(*fs.File, *vfs.FileDescription) error) (err error) { + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. + if VFS2Enabled { + return nil + } + ts.mu.RLock() defer ts.mu.RUnlock() for t := range ts.Root.tids { @@ -484,7 +489,7 @@ func (ts *TaskSet) forEachFDPaused(f func(*fs.File, *vfs.FileDescription) error) } func (ts *TaskSet) flushWritesToFiles(ctx context.Context) error { - // TODO(gvisor.dev/issues/1663): Add save support for VFS2. + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. return ts.forEachFDPaused(func(file *fs.File, _ *vfs.FileDescription) error { if flags := file.Flags(); !flags.Write { return nil @@ -533,6 +538,11 @@ func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error { } func (ts *TaskSet) unregisterEpollWaiters() { + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. + if VFS2Enabled { + return + } + ts.mu.RLock() defer ts.mu.RUnlock() for t := range ts.Root.tids { @@ -1005,11 +1015,14 @@ func (k *Kernel) pauseTimeLocked() { // This means we'll iterate FDTables shared by multiple tasks repeatedly, // but ktime.Timer.Pause is idempotent so this is harmless. if t.fdTable != nil { - t.fdTable.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { - if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { - tfd.PauseTimer() - } - }) + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. + if !VFS2Enabled { + t.fdTable.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { + if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { + tfd.PauseTimer() + } + }) + } } } k.timekeeper.PauseUpdates() @@ -1034,12 +1047,15 @@ func (k *Kernel) resumeTimeLocked() { it.ResumeTimer() } } - if t.fdTable != nil { - t.fdTable.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { - if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { - tfd.ResumeTimer() - } - }) + // TODO(gvisor.dev/issue/1663): Add save support for VFS2. + if !VFS2Enabled { + if t.fdTable != nil { + t.fdTable.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { + if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok { + tfd.ResumeTimer() + } + }) + } } } } diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go index 9bd2df104..a11a87cd1 100644 --- a/pkg/sentry/syscalls/linux/sys_stat.go +++ b/pkg/sentry/syscalls/linux/sys_stat.go @@ -136,7 +136,10 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall mask := args[3].Uint() statxAddr := args[4].Pointer() - if mask&linux.STATX__RESERVED > 0 { + if mask&linux.STATX__RESERVED != 0 { + return 0, nil, syserror.EINVAL + } + if flags&^(linux.AT_SYMLINK_NOFOLLOW|linux.AT_EMPTY_PATH|linux.AT_STATX_SYNC_TYPE) != 0 { return 0, nil, syserror.EINVAL } if flags&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE { diff --git a/pkg/sentry/syscalls/linux/vfs2/stat.go b/pkg/sentry/syscalls/linux/vfs2/stat.go index a74ea6fd5..97eaedd66 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat.go @@ -150,7 +150,11 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall mask := args[3].Uint() statxAddr := args[4].Pointer() - if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { + if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW|linux.AT_STATX_SYNC_TYPE) != 0 { + return 0, nil, syserror.EINVAL + } + + if mask&linux.STATX__RESERVED != 0 { return 0, nil, syserror.EINVAL } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index a4627905e..f459d1973 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -284,6 +284,7 @@ var allowedSyscalls = seccomp.SyscallRules{ {seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)}, }, syscall.SYS_SIGALTSTACK: {}, + unix.SYS_STATX: {}, syscall.SYS_SYNC_FILE_RANGE: {}, syscall.SYS_TGKILL: []seccomp.Rule{ { diff --git a/test/syscalls/linux/stat.cc b/test/syscalls/linux/stat.cc index c951ac3b3..513b9cd1c 100644 --- a/test/syscalls/linux/stat.cc +++ b/test/syscalls/linux/stat.cc @@ -607,7 +607,7 @@ int statx(int dirfd, const char* pathname, int flags, unsigned int mask, } TEST_F(StatTest, StatxAbsPath) { - SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && errno == ENOSYS); struct kernel_statx stx; @@ -617,7 +617,7 @@ TEST_F(StatTest, StatxAbsPath) { } TEST_F(StatTest, StatxRelPathDirFD) { - SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && errno == ENOSYS); struct kernel_statx stx; @@ -631,7 +631,7 @@ TEST_F(StatTest, StatxRelPathDirFD) { } TEST_F(StatTest, StatxRelPathCwd) { - SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && errno == ENOSYS); ASSERT_THAT(chdir(GetAbsoluteTestTmpdir().c_str()), SyscallSucceeds()); @@ -643,7 +643,7 @@ TEST_F(StatTest, StatxRelPathCwd) { } TEST_F(StatTest, StatxEmptyPath) { - SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, 0) < 0 && + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && errno == ENOSYS); const auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDONLY)); @@ -653,6 +653,58 @@ TEST_F(StatTest, StatxEmptyPath) { EXPECT_TRUE(S_ISREG(stx.stx_mode)); } +TEST_F(StatTest, StatxDoesNotRejectExtraneousMaskBits) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + // Set all mask bits except for STATX__RESERVED. + uint mask = 0xffffffff & ~0x80000000; + EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, mask, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxRejectsReservedMaskBit) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + // Set STATX__RESERVED in the mask. + EXPECT_THAT(statx(-1, test_file_name_.c_str(), 0, 0x80000000, &stx), + SyscallFailsWithErrno(EINVAL)); +} + +TEST_F(StatTest, StatxSymlink) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + std::string parent_dir = "/tmp"; + TempPath link = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(parent_dir, test_file_name_)); + std::string p = link.path(); + + struct kernel_statx stx; + EXPECT_THAT(statx(AT_FDCWD, p.c_str(), AT_SYMLINK_NOFOLLOW, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISLNK(stx.stx_mode)); + EXPECT_THAT(statx(AT_FDCWD, p.c_str(), 0, STATX_ALL, &stx), + SyscallSucceeds()); + EXPECT_TRUE(S_ISREG(stx.stx_mode)); +} + +TEST_F(StatTest, StatxInvalidFlags) { + SKIP_IF(!IsRunningOnGvisor() && statx(-1, nullptr, 0, 0, nullptr) < 0 && + errno == ENOSYS); + + struct kernel_statx stx; + EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), 12345, 0, &stx), + SyscallFailsWithErrno(EINVAL)); + EXPECT_THAT(statx(AT_FDCWD, test_file_name_.c_str(), + 0x6000 /* AT_STATX_SYNC_TYPE */, 0, &stx), + SyscallFailsWithErrno(EINVAL)); +} + } // namespace } // namespace testing -- cgit v1.2.3 From 42d78ba61bf93f927ee89099abf5ef27b394b0d9 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Tue, 17 Mar 2020 10:29:12 -0700 Subject: Remove HostFS from Sentry. PiperOrigin-RevId: 301402181 --- pkg/sentry/fs/host/BUILD | 4 +- pkg/sentry/fs/host/descriptor.go | 37 +--- pkg/sentry/fs/host/descriptor_state.go | 2 +- pkg/sentry/fs/host/descriptor_test.go | 4 +- pkg/sentry/fs/host/file.go | 4 +- pkg/sentry/fs/host/fs.go | 339 ----------------------------- pkg/sentry/fs/host/fs_test.go | 380 --------------------------------- pkg/sentry/fs/host/host.go | 59 +++++ pkg/sentry/fs/host/inode.go | 141 ++---------- pkg/sentry/fs/host/inode_state.go | 32 +-- pkg/sentry/fs/host/inode_test.go | 66 ------ pkg/sentry/fs/host/util.go | 84 +------- pkg/sentry/fs/host/util_unsafe.go | 41 ---- 13 files changed, 94 insertions(+), 1099 deletions(-) delete mode 100644 pkg/sentry/fs/host/fs.go delete mode 100644 pkg/sentry/fs/host/fs_test.go create mode 100644 pkg/sentry/fs/host/host.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 21003ea45..011625c80 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -10,7 +10,7 @@ go_library( "descriptor_state.go", "device.go", "file.go", - "fs.go", + "host.go", "inode.go", "inode_state.go", "ioctl_unsafe.go", @@ -62,14 +62,12 @@ go_test( size = "small", srcs = [ "descriptor_test.go", - "fs_test.go", "inode_test.go", "socket_test.go", "wait_test.go", ], library = ":host", deps = [ - "//pkg/context", "//pkg/fd", "//pkg/fdnotifier", "//pkg/sentry/contexttest", diff --git a/pkg/sentry/fs/host/descriptor.go b/pkg/sentry/fs/host/descriptor.go index 2a4d1b291..cfdce6a74 100644 --- a/pkg/sentry/fs/host/descriptor.go +++ b/pkg/sentry/fs/host/descriptor.go @@ -16,7 +16,6 @@ package host import ( "fmt" - "path" "syscall" "gvisor.dev/gvisor/pkg/fdnotifier" @@ -28,12 +27,9 @@ import ( // // +stateify savable type descriptor struct { - // donated is true if the host fd was donated by another process. - donated bool - // If origFD >= 0, it is the host fd that this file was originally created // from, which must be available at time of restore. The FD can be closed - // after descriptor is created. Only set if donated is true. + // after descriptor is created. origFD int // wouldBlock is true if value (below) points to a file that can @@ -41,15 +37,13 @@ type descriptor struct { wouldBlock bool // value is the wrapped host fd. It is never saved or restored - // directly. How it is restored depends on whether it was - // donated and the fs.MountSource it was originally - // opened/created from. + // directly. value int `state:"nosave"` } // newDescriptor returns a wrapped host file descriptor. On success, // the descriptor is registered for event notifications with queue. -func newDescriptor(fd int, donated bool, saveable bool, wouldBlock bool, queue *waiter.Queue) (*descriptor, error) { +func newDescriptor(fd int, saveable bool, wouldBlock bool, queue *waiter.Queue) (*descriptor, error) { ownedFD := fd origFD := -1 if saveable { @@ -69,7 +63,6 @@ func newDescriptor(fd int, donated bool, saveable bool, wouldBlock bool, queue * } } return &descriptor{ - donated: donated, origFD: origFD, wouldBlock: wouldBlock, value: ownedFD, @@ -77,25 +70,11 @@ func newDescriptor(fd int, donated bool, saveable bool, wouldBlock bool, queue * } // initAfterLoad initializes the value of the descriptor after Load. -func (d *descriptor) initAfterLoad(mo *superOperations, id uint64, queue *waiter.Queue) error { - if d.donated { - var err error - d.value, err = syscall.Dup(d.origFD) - if err != nil { - return fmt.Errorf("failed to dup restored fd %d: %v", d.origFD, err) - } - } else { - name, ok := mo.inodeMappings[id] - if !ok { - return fmt.Errorf("failed to find path for inode number %d", id) - } - fullpath := path.Join(mo.root, name) - - var err error - d.value, err = open(nil, fullpath) - if err != nil { - return fmt.Errorf("failed to open %q: %v", fullpath, err) - } +func (d *descriptor) initAfterLoad(id uint64, queue *waiter.Queue) error { + var err error + d.value, err = syscall.Dup(d.origFD) + if err != nil { + return fmt.Errorf("failed to dup restored fd %d: %v", d.origFD, err) } if d.wouldBlock { if err := syscall.SetNonblock(d.value, true); err != nil { diff --git a/pkg/sentry/fs/host/descriptor_state.go b/pkg/sentry/fs/host/descriptor_state.go index 8167390a9..e880582ab 100644 --- a/pkg/sentry/fs/host/descriptor_state.go +++ b/pkg/sentry/fs/host/descriptor_state.go @@ -16,7 +16,7 @@ package host // beforeSave is invoked by stateify. func (d *descriptor) beforeSave() { - if d.donated && d.origFD < 0 { + if d.origFD < 0 { panic("donated file descriptor cannot be saved") } } diff --git a/pkg/sentry/fs/host/descriptor_test.go b/pkg/sentry/fs/host/descriptor_test.go index 4205981f5..d8e4605b6 100644 --- a/pkg/sentry/fs/host/descriptor_test.go +++ b/pkg/sentry/fs/host/descriptor_test.go @@ -47,10 +47,10 @@ func TestDescriptorRelease(t *testing.T) { // FD ownership is transferred to the descritor. queue := &waiter.Queue{} - d, err := newDescriptor(fd, false /* donated*/, tc.saveable, tc.wouldBlock, queue) + d, err := newDescriptor(fd, tc.saveable, tc.wouldBlock, queue) if err != nil { syscall.Close(fd) - t.Fatalf("newDescriptor(%d, %t, false, %t, queue) failed, err: %v", fd, tc.saveable, tc.wouldBlock, err) + t.Fatalf("newDescriptor(%d, %t, %t, queue) failed, err: %v", fd, tc.saveable, tc.wouldBlock, err) } if tc.saveable { if d.origFD < 0 { diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go index e08f56d04..034862694 100644 --- a/pkg/sentry/fs/host/file.go +++ b/pkg/sentry/fs/host/file.go @@ -101,8 +101,8 @@ func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner }) return s, nil default: - msrc := newMountSource(ctx, "/", mounter, &Filesystem{}, fs.MountSourceFlags{}, false /* dontTranslateOwnership */) - inode, err := newInode(ctx, msrc, donated, saveable, true /* donated */) + msrc := fs.NewNonCachingMountSource(ctx, &filesystem{}, fs.MountSourceFlags{}) + inode, err := newInode(ctx, msrc, donated, saveable) if err != nil { return nil, err } diff --git a/pkg/sentry/fs/host/fs.go b/pkg/sentry/fs/host/fs.go deleted file mode 100644 index d3e8e3a36..000000000 --- a/pkg/sentry/fs/host/fs.go +++ /dev/null @@ -1,339 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package host implements an fs.Filesystem for files backed by host -// file descriptors. -package host - -import ( - "fmt" - "path" - "path/filepath" - "strconv" - "strings" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// FilesystemName is the name under which Filesystem is registered. -const FilesystemName = "whitelistfs" - -const ( - // whitelistKey is the mount option containing a comma-separated list - // of host paths to whitelist. - whitelistKey = "whitelist" - - // rootPathKey is the mount option containing the root path of the - // mount. - rootPathKey = "root" - - // dontTranslateOwnershipKey is the key to superOperations.dontTranslateOwnership. - dontTranslateOwnershipKey = "dont_translate_ownership" -) - -// maxTraversals determines link traversals in building the whitelist. -const maxTraversals = 10 - -// Filesystem is a pseudo file system that is only available during the setup -// to lock down the configurations. This filesystem should only be mounted at root. -// -// Think twice before exposing this to applications. -// -// +stateify savable -type Filesystem struct { - // whitelist is a set of host paths to whitelist. - paths []string -} - -var _ fs.Filesystem = (*Filesystem)(nil) - -// Name is the identifier of this file system. -func (*Filesystem) Name() string { - return FilesystemName -} - -// AllowUserMount prohibits users from using mount(2) with this file system. -func (*Filesystem) AllowUserMount() bool { - return false -} - -// AllowUserList allows this filesystem to be listed in /proc/filesystems. -func (*Filesystem) AllowUserList() bool { - return true -} - -// Flags returns that there is nothing special about this file system. -func (*Filesystem) Flags() fs.FilesystemFlags { - return 0 -} - -// Mount returns an fs.Inode exposing the host file system. It is intended to be locked -// down in PreExec below. -func (f *Filesystem) Mount(ctx context.Context, _ string, flags fs.MountSourceFlags, data string, _ interface{}) (*fs.Inode, error) { - // Parse generic comma-separated key=value options. - options := fs.GenericMountSourceOptions(data) - - // Grab the whitelist if one was specified. - // TODO(edahlgren/mpratt/hzy): require another option "testonly" in order to allow - // no whitelist. - if wl, ok := options[whitelistKey]; ok { - f.paths = strings.Split(wl, "|") - delete(options, whitelistKey) - } - - // If the rootPath was set, use it. Othewise default to the root of the - // host fs. - rootPath := "/" - if rp, ok := options[rootPathKey]; ok { - rootPath = rp - delete(options, rootPathKey) - - // We must relativize the whitelisted paths to the new root. - for i, p := range f.paths { - rel, err := filepath.Rel(rootPath, p) - if err != nil { - return nil, fmt.Errorf("whitelist path %q must be a child of root path %q", p, rootPath) - } - f.paths[i] = path.Join("/", rel) - } - } - fd, err := open(nil, rootPath) - if err != nil { - return nil, fmt.Errorf("failed to find root: %v", err) - } - - var dontTranslateOwnership bool - if v, ok := options[dontTranslateOwnershipKey]; ok { - b, err := strconv.ParseBool(v) - if err != nil { - return nil, fmt.Errorf("invalid value for %q: %v", dontTranslateOwnershipKey, err) - } - dontTranslateOwnership = b - delete(options, dontTranslateOwnershipKey) - } - - // Fail if the caller passed us more options than we know about. - if len(options) > 0 { - return nil, fmt.Errorf("unsupported mount options: %v", options) - } - - // The mounting EUID/EGID will be cached by this file system. This will - // be used to assign ownership to files that we own. - owner := fs.FileOwnerFromContext(ctx) - - // Construct the host file system mount and inode. - msrc := newMountSource(ctx, rootPath, owner, f, flags, dontTranslateOwnership) - return newInode(ctx, msrc, fd, false /* saveable */, false /* donated */) -} - -// InstallWhitelist locks down the MountNamespace to only the currently installed -// Dirents and the given paths. -func (f *Filesystem) InstallWhitelist(ctx context.Context, m *fs.MountNamespace) error { - return installWhitelist(ctx, m, f.paths) -} - -func installWhitelist(ctx context.Context, m *fs.MountNamespace, paths []string) error { - if len(paths) == 0 || (len(paths) == 1 && paths[0] == "") { - // Warning will be logged during filter installation if the empty - // whitelist matters (allows for host file access). - return nil - } - - // Done tracks entries already added. - done := make(map[string]bool) - root := m.Root() - defer root.DecRef() - - for i := 0; i < len(paths); i++ { - // Make sure the path is absolute. This is a sanity check. - if !path.IsAbs(paths[i]) { - return fmt.Errorf("path %q is not absolute", paths[i]) - } - - // We need to add all the intermediate paths, in case one of - // them is a symlink that needs to be resolved. - for j := 1; j <= len(paths[i]); j++ { - if j < len(paths[i]) && paths[i][j] != '/' { - continue - } - current := paths[i][:j] - - // Lookup the given component in the tree. - remainingTraversals := uint(maxTraversals) - d, err := m.FindLink(ctx, root, nil, current, &remainingTraversals) - if err != nil { - log.Warningf("populate failed for %q: %v", current, err) - continue - } - - // It's critical that this DecRef happens after the - // freeze below. This ensures that the dentry is in - // place to be frozen. Otherwise, we freeze without - // these entries. - defer d.DecRef() - - // Expand the last component if necessary. - if current == paths[i] { - // Is it a directory or symlink? - sattr := d.Inode.StableAttr - if fs.IsDir(sattr) { - for name := range childDentAttrs(ctx, d) { - paths = append(paths, path.Join(current, name)) - } - } - if fs.IsSymlink(sattr) { - // Only expand symlinks once. The - // folder structure may contain - // recursive symlinks and we don't want - // to end up infinitely expanding this - // symlink. This is safe because this - // is the last component. If a later - // path wants to symlink something - // beneath this symlink that will still - // be handled by the FindLink above. - if done[current] { - continue - } - - s, err := d.Inode.Readlink(ctx) - if err != nil { - log.Warningf("readlink failed for %q: %v", current, err) - continue - } - if path.IsAbs(s) { - paths = append(paths, s) - } else { - target := path.Join(path.Dir(current), s) - paths = append(paths, target) - } - } - } - - // Only report this one once even though we may look - // it up more than once. If we whitelist /a/b,/a then - // /a will be "done" when it is looked up for /a/b, - // however we still need to expand all of its contents - // when whitelisting /a. - if !done[current] { - log.Debugf("whitelisted: %s", current) - } - done[current] = true - } - } - - // Freeze the mount tree in place. This prevents any new paths from - // being opened and any old ones from being removed. If we do provide - // tmpfs mounts, we'll want to freeze/thaw those separately. - m.Freeze() - return nil -} - -func childDentAttrs(ctx context.Context, d *fs.Dirent) map[string]fs.DentAttr { - dirname, _ := d.FullName(nil /* root */) - dir, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) - if err != nil { - log.Warningf("failed to open directory %q: %v", dirname, err) - return nil - } - dir.DecRef() - var stubSerializer fs.CollectEntriesSerializer - if err := dir.Readdir(ctx, &stubSerializer); err != nil { - log.Warningf("failed to iterate on host directory %q: %v", dirname, err) - return nil - } - delete(stubSerializer.Entries, ".") - delete(stubSerializer.Entries, "..") - return stubSerializer.Entries -} - -// newMountSource constructs a new host fs.MountSource -// relative to a root path. The root should match the mount point. -func newMountSource(ctx context.Context, root string, mounter fs.FileOwner, filesystem fs.Filesystem, flags fs.MountSourceFlags, dontTranslateOwnership bool) *fs.MountSource { - return fs.NewMountSource(ctx, &superOperations{ - root: root, - inodeMappings: make(map[uint64]string), - mounter: mounter, - dontTranslateOwnership: dontTranslateOwnership, - }, filesystem, flags) -} - -// superOperations implements fs.MountSourceOperations. -// -// +stateify savable -type superOperations struct { - fs.SimpleMountSourceOperations - - // root is the path of the mount point. All inode mappings - // are relative to this root. - root string - - // inodeMappings contains mappings of fs.Inodes associated - // with this MountSource to paths under root. - inodeMappings map[uint64]string - - // mounter is the cached EUID/EGID that mounted this file system. - mounter fs.FileOwner - - // dontTranslateOwnership indicates whether to not translate file - // ownership. - // - // By default, files/directories owned by the sandbox uses UID/GID - // of the mounter. For files/directories that are not owned by the - // sandbox, file UID/GID is translated to a UID/GID which cannot - // be mapped in the sandboxed application's user namespace. The - // UID/GID will look like the nobody UID/GID (65534) but is not - // strictly owned by the user "nobody". - // - // If whitelistfs is a lower filesystem in an overlay, set - // dont_translate_ownership=true in mount options. - dontTranslateOwnership bool -} - -var _ fs.MountSourceOperations = (*superOperations)(nil) - -// ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings. -func (m *superOperations) ResetInodeMappings() { - m.inodeMappings = make(map[uint64]string) -} - -// SaveInodeMapping implements fs.MountSourceOperations.SaveInodeMapping. -func (m *superOperations) SaveInodeMapping(inode *fs.Inode, path string) { - // This is very unintuitive. We *CANNOT* trust the inode's StableAttrs, - // because overlay copyUp may have changed them out from under us. - // So much for "immutable". - sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr - m.inodeMappings[sattr.InodeID] = path -} - -// Keep implements fs.MountSourceOperations.Keep. -// -// TODO(b/72455313,b/77596690): It is possible to change the permissions on a -// host file while it is in the dirent cache (say from RO to RW), but it is not -// possible to re-open the file with more relaxed permissions, since the host -// FD is already open and stored in the inode. -// -// Using the dirent LRU cache increases the odds that this bug is encountered. -// Since host file access is relatively fast anyways, we disable the LRU cache -// for host fs files. Once we can properly deal with permissions changes and -// re-opening host files, we should revisit whether or not to make use of the -// LRU cache. -func (*superOperations) Keep(*fs.Dirent) bool { - return false -} - -func init() { - fs.RegisterFilesystem(&Filesystem{}) -} diff --git a/pkg/sentry/fs/host/fs_test.go b/pkg/sentry/fs/host/fs_test.go deleted file mode 100644 index 3111d2df9..000000000 --- a/pkg/sentry/fs/host/fs_test.go +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "fmt" - "io/ioutil" - "os" - "path" - "reflect" - "sort" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// newTestMountNamespace creates a MountNamespace with a ramfs root. -// It returns the host folder created, which should be removed when done. -func newTestMountNamespace(t *testing.T) (*fs.MountNamespace, string, error) { - p, err := ioutil.TempDir("", "root") - if err != nil { - return nil, "", err - } - - fd, err := open(nil, p) - if err != nil { - os.RemoveAll(p) - return nil, "", err - } - ctx := contexttest.Context(t) - root, err := newInode(ctx, newMountSource(ctx, p, fs.RootOwner, &Filesystem{}, fs.MountSourceFlags{}, false), fd, false, false) - if err != nil { - os.RemoveAll(p) - return nil, "", err - } - mm, err := fs.NewMountNamespace(ctx, root) - if err != nil { - os.RemoveAll(p) - return nil, "", err - } - return mm, p, nil -} - -// createTestDirs populates the root with some test files and directories. -// /a/a1.txt -// /a/a2.txt -// /b/b1.txt -// /b/c/c1.txt -// /symlinks/normal.txt -// /symlinks/to_normal.txt -> /symlinks/normal.txt -// /symlinks/recursive -> /symlinks -func createTestDirs(ctx context.Context, t *testing.T, m *fs.MountNamespace) error { - r := m.Root() - defer r.DecRef() - - if err := r.CreateDirectory(ctx, r, "a", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - a, err := r.Walk(ctx, r, "a") - if err != nil { - return err - } - defer a.DecRef() - - a1, err := a.Create(ctx, r, "a1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - a1.DecRef() - - a2, err := a.Create(ctx, r, "a2.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - a2.DecRef() - - if err := r.CreateDirectory(ctx, r, "b", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - b, err := r.Walk(ctx, r, "b") - if err != nil { - return err - } - defer b.DecRef() - - b1, err := b.Create(ctx, r, "b1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - b1.DecRef() - - if err := b.CreateDirectory(ctx, r, "c", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - c, err := b.Walk(ctx, r, "c") - if err != nil { - return err - } - defer c.DecRef() - - c1, err := c.Create(ctx, r, "c1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - c1.DecRef() - - if err := r.CreateDirectory(ctx, r, "symlinks", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - symlinks, err := r.Walk(ctx, r, "symlinks") - if err != nil { - return err - } - defer symlinks.DecRef() - - normal, err := symlinks.Create(ctx, r, "normal.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - normal.DecRef() - - if err := symlinks.CreateLink(ctx, r, "/symlinks/normal.txt", "to_normal.txt"); err != nil { - return err - } - - return symlinks.CreateLink(ctx, r, "/symlinks", "recursive") -} - -// allPaths returns a slice of all paths of entries visible in the rootfs. -func allPaths(ctx context.Context, t *testing.T, m *fs.MountNamespace, base string) ([]string, error) { - var paths []string - root := m.Root() - defer root.DecRef() - - maxTraversals := uint(1) - d, err := m.FindLink(ctx, root, nil, base, &maxTraversals) - if err != nil { - t.Logf("FindLink failed for %q", base) - return paths, err - } - defer d.DecRef() - - if fs.IsDir(d.Inode.StableAttr) { - dir, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) - if err != nil { - return nil, fmt.Errorf("failed to open directory %q: %v", base, err) - } - iter, ok := dir.FileOperations.(fs.DirIterator) - if !ok { - return nil, fmt.Errorf("cannot directly iterate on host directory %q", base) - } - dirCtx := &fs.DirCtx{ - Serializer: noopDentrySerializer{}, - } - if _, err := fs.DirentReaddir(ctx, d, iter, root, dirCtx, 0); err != nil { - return nil, err - } - for name := range dirCtx.DentAttrs() { - if name == "." || name == ".." { - continue - } - - fullName := path.Join(base, name) - paths = append(paths, fullName) - - // Recurse. - subpaths, err := allPaths(ctx, t, m, fullName) - if err != nil { - return paths, err - } - paths = append(paths, subpaths...) - } - } - - return paths, nil -} - -type noopDentrySerializer struct{} - -func (noopDentrySerializer) CopyOut(string, fs.DentAttr) error { - return nil -} -func (noopDentrySerializer) Written() int { - return 4096 -} - -// pathsEqual returns true if the two string slices contain the same entries. -func pathsEqual(got, want []string) bool { - sort.Strings(got) - sort.Strings(want) - - if len(got) != len(want) { - return false - } - - for i := range got { - if got[i] != want[i] { - return false - } - } - - return true -} - -func TestWhitelist(t *testing.T) { - for _, test := range []struct { - // description of the test. - desc string - // paths are the paths to whitelist - paths []string - // want are all of the directory entries that should be - // visible (nothing beyond this set should be visible). - want []string - }{ - { - desc: "root", - paths: []string{"/"}, - want: []string{"/a", "/a/a1.txt", "/a/a2.txt", "/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt", "/symlinks", "/symlinks/normal.txt", "/symlinks/to_normal.txt", "/symlinks/recursive"}, - }, - { - desc: "top-level directories", - paths: []string{"/a", "/b"}, - want: []string{"/a", "/a/a1.txt", "/a/a2.txt", "/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "nested directories (1/2)", - paths: []string{"/b", "/b/c"}, - want: []string{"/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "nested directories (2/2)", - paths: []string{"/b/c", "/b"}, - want: []string{"/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "single file", - paths: []string{"/b/c/c1.txt"}, - want: []string{"/b", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "single file and directory", - paths: []string{"/a/a1.txt", "/b/c"}, - want: []string{"/a", "/a/a1.txt", "/b", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "symlink", - paths: []string{"/symlinks/to_normal.txt"}, - want: []string{"/symlinks", "/symlinks/normal.txt", "/symlinks/to_normal.txt"}, - }, - { - desc: "recursive symlink", - paths: []string{"/symlinks/recursive/normal.txt"}, - want: []string{"/symlinks", "/symlinks/normal.txt", "/symlinks/recursive"}, - }, - } { - t.Run(test.desc, func(t *testing.T) { - m, p, err := newTestMountNamespace(t) - if err != nil { - t.Errorf("Failed to create MountNamespace: %v", err) - } - defer os.RemoveAll(p) - - ctx := withRoot(contexttest.RootContext(t), m.Root()) - if err := createTestDirs(ctx, t, m); err != nil { - t.Errorf("Failed to create test dirs: %v", err) - } - - if err := installWhitelist(ctx, m, test.paths); err != nil { - t.Errorf("installWhitelist(%v) err got %v want nil", test.paths, err) - } - - got, err := allPaths(ctx, t, m, "/") - if err != nil { - t.Fatalf("Failed to lookup paths (whitelisted: %v): %v", test.paths, err) - } - - if !pathsEqual(got, test.want) { - t.Errorf("For paths %v got %v want %v", test.paths, got, test.want) - } - }) - } -} - -func TestRootPath(t *testing.T) { - // Create a temp dir, which will be the root of our mounted fs. - rootPath, err := ioutil.TempDir(os.TempDir(), "root") - if err != nil { - t.Fatalf("TempDir failed: %v", err) - } - defer os.RemoveAll(rootPath) - - // Create two files inside the new root, one which will be whitelisted - // and one not. - whitelisted, err := ioutil.TempFile(rootPath, "white") - if err != nil { - t.Fatalf("TempFile failed: %v", err) - } - if _, err := ioutil.TempFile(rootPath, "black"); err != nil { - t.Fatalf("TempFile failed: %v", err) - } - - // Create a mount with a root path and single whitelisted file. - hostFS := &Filesystem{} - ctx := contexttest.Context(t) - data := fmt.Sprintf("%s=%s,%s=%s", rootPathKey, rootPath, whitelistKey, whitelisted.Name()) - inode, err := hostFS.Mount(ctx, "", fs.MountSourceFlags{}, data, nil) - if err != nil { - t.Fatalf("Mount failed: %v", err) - } - mm, err := fs.NewMountNamespace(ctx, inode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - if err := hostFS.InstallWhitelist(ctx, mm); err != nil { - t.Fatalf("InstallWhitelist failed: %v", err) - } - - // Get the contents of the root directory. - rootDir := mm.Root() - rctx := withRoot(ctx, rootDir) - f, err := rootDir.Inode.GetFile(rctx, rootDir, fs.FileFlags{}) - if err != nil { - t.Fatalf("GetFile failed: %v", err) - } - c := &fs.CollectEntriesSerializer{} - if err := f.Readdir(rctx, c); err != nil { - t.Fatalf("Readdir failed: %v", err) - } - - // We should have only our whitelisted file, plus the dots. - want := []string{path.Base(whitelisted.Name()), ".", ".."} - got := c.Order - sort.Strings(want) - sort.Strings(got) - if !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got %v, wanted %v", got, want) - } -} - -type rootContext struct { - context.Context - root *fs.Dirent -} - -// withRoot returns a copy of ctx with the given root. -func withRoot(ctx context.Context, root *fs.Dirent) context.Context { - return &rootContext{ - Context: ctx, - root: root, - } -} - -// Value implements Context.Value. -func (rc rootContext) Value(key interface{}) interface{} { - switch key { - case fs.CtxRoot: - rc.root.IncRef() - return rc.root - default: - return rc.Context.Value(key) - } -} diff --git a/pkg/sentry/fs/host/host.go b/pkg/sentry/fs/host/host.go new file mode 100644 index 000000000..081ba1dd8 --- /dev/null +++ b/pkg/sentry/fs/host/host.go @@ -0,0 +1,59 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package host supports file descriptors imported directly. +package host + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// filesystem is a host filesystem. +// +// +stateify savable +type filesystem struct{} + +func init() { + fs.RegisterFilesystem(&filesystem{}) +} + +// FilesystemName is the name under which the filesystem is registered. +const FilesystemName = "host" + +// Name is the name of the filesystem. +func (*filesystem) Name() string { + return FilesystemName +} + +// Mount returns an error. Mounting hostfs is not allowed. +func (*filesystem) Mount(ctx context.Context, device string, flags fs.MountSourceFlags, data string, dataObj interface{}) (*fs.Inode, error) { + return nil, syserror.EPERM +} + +// AllowUserMount prohibits users from using mount(2) with this file system. +func (*filesystem) AllowUserMount() bool { + return false +} + +// AllowUserList prohibits this filesystem to be listed in /proc/filesystems. +func (*filesystem) AllowUserList() bool { + return false +} + +// Flags returns that there is nothing special about this file system. +func (*filesystem) Flags() fs.FilesystemFlags { + return 0 +} diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 6fa39caab..1da3c0a17 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -17,12 +17,10 @@ package host import ( "syscall" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/secio" - "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -69,9 +67,6 @@ type inodeOperations struct { // // +stateify savable type inodeFileState struct { - // Common file system state. - mops *superOperations `state:"wait"` - // descriptor is the backing host FD. descriptor *descriptor `state:"wait"` @@ -160,7 +155,7 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err if err := syscall.Fstat(i.FD(), &s); err != nil { return fs.UnstableAttr{}, err } - return unstableAttr(i.mops, &s), nil + return unstableAttr(&s), nil } // Allocate implements fsutil.CachedFileObject.Allocate. @@ -172,7 +167,7 @@ func (i *inodeFileState) Allocate(_ context.Context, offset, length int64) error var _ fs.InodeOperations = (*inodeOperations)(nil) // newInode returns a new fs.Inode backed by the host FD. -func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool, donated bool) (*fs.Inode, error) { +func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool) (*fs.Inode, error) { // Retrieve metadata. var s syscall.Stat_t err := syscall.Fstat(fd, &s) @@ -181,24 +176,17 @@ func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool, } fileState := &inodeFileState{ - mops: msrc.MountSourceOperations.(*superOperations), sattr: stableAttr(&s), } // Initialize the wrapped host file descriptor. - fileState.descriptor, err = newDescriptor( - fd, - donated, - saveable, - wouldBlock(&s), - &fileState.queue, - ) + fileState.descriptor, err = newDescriptor(fd, saveable, wouldBlock(&s), &fileState.queue) if err != nil { return nil, err } // Build the fs.InodeOperations. - uattr := unstableAttr(msrc.MountSourceOperations.(*superOperations), &s) + uattr := unstableAttr(&s) iops := &inodeOperations{ fileState: fileState, cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{ @@ -232,54 +220,23 @@ func (i *inodeOperations) Release(context.Context) { // Lookup implements fs.InodeOperations.Lookup. func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { - // Get a new FD relative to i at name. - fd, err := open(i, name) - if err != nil { - if err == syserror.ENOENT { - return nil, syserror.ENOENT - } - return nil, err - } - - inode, err := newInode(ctx, dir.MountSource, fd, false /* saveable */, false /* donated */) - if err != nil { - return nil, err - } - - // Return the fs.Dirent. - return fs.NewDirent(ctx, inode, name), nil + return nil, syserror.ENOENT } // Create implements fs.InodeOperations.Create. func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) { - // Create a file relative to i at name. - // - // N.B. We always open this file O_RDWR regardless of flags because a - // future GetFile might want more access. Open allows this regardless - // of perm. - fd, err := openAt(i, name, syscall.O_RDWR|syscall.O_CREAT|syscall.O_EXCL, perm.LinuxMode()) - if err != nil { - return nil, err - } - - inode, err := newInode(ctx, dir.MountSource, fd, false /* saveable */, false /* donated */) - if err != nil { - return nil, err - } + return nil, syserror.EPERM - d := fs.NewDirent(ctx, inode, name) - defer d.DecRef() - return inode.GetFile(ctx, d, flags) } // CreateDirectory implements fs.InodeOperations.CreateDirectory. func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { - return syscall.Mkdirat(i.fileState.FD(), name, uint32(perm.LinuxMode())) + return syserror.EPERM } // CreateLink implements fs.InodeOperations.CreateLink. func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error { - return createLink(i.fileState.FD(), oldname, newname) + return syserror.EPERM } // CreateHardLink implements fs.InodeOperations.CreateHardLink. @@ -294,25 +251,17 @@ func (*inodeOperations) CreateFifo(context.Context, *fs.Inode, string, fs.FilePe // Remove implements fs.InodeOperations.Remove. func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error { - return unlinkAt(i.fileState.FD(), name, false /* dir */) + return syserror.EPERM } // RemoveDirectory implements fs.InodeOperations.RemoveDirectory. func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error { - return unlinkAt(i.fileState.FD(), name, true /* dir */) + return syserror.EPERM } // Rename implements fs.InodeOperations.Rename. func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { - op, ok := oldParent.InodeOperations.(*inodeOperations) - if !ok { - return syscall.EXDEV - } - np, ok := newParent.InodeOperations.(*inodeOperations) - if !ok { - return syscall.EXDEV - } - return syscall.Renameat(op.fileState.FD(), oldName, np.fileState.FD(), newName) + return syserror.EPERM } // Bind implements fs.InodeOperations.Bind. @@ -461,69 +410,7 @@ func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {} // readdirAll returns all of the directory entries in i. func (i *inodeOperations) readdirAll(d *dirInfo) (map[string]fs.DentAttr, error) { - i.readdirMu.Lock() - defer i.readdirMu.Unlock() - - fd := i.fileState.FD() - - // syscall.ReadDirent will use getdents, which will seek the file past - // the last directory entry. To read the directory entries a second - // time, we need to seek back to the beginning. - if _, err := syscall.Seek(fd, 0, 0); err != nil { - if err == syscall.ESPIPE { - // All directories should be seekable. If this file - // isn't seekable, it is not a directory and we should - // return that more sane error. - err = syscall.ENOTDIR - } - return nil, err - } - - names := make([]string, 0, 100) - for { - // Refill the buffer if necessary - if d.bufp >= d.nbuf { - d.bufp = 0 - // ReadDirent will just do a sys_getdents64 to the kernel. - n, err := syscall.ReadDirent(fd, d.buf) - if err != nil { - return nil, err - } - if n == 0 { - break // EOF - } - d.nbuf = n - } - - var nb int - // Parse the dirent buffer we just get and return the directory names along - // with the number of bytes consumed in the buffer. - nb, _, names = syscall.ParseDirent(d.buf[d.bufp:d.nbuf], -1, names) - d.bufp += nb - } - - entries := make(map[string]fs.DentAttr) - for _, filename := range names { - // Lookup the type and host device and inode. - stat, lerr := fstatat(fd, filename, linux.AT_SYMLINK_NOFOLLOW) - if lerr == syscall.ENOENT { - // File disappeared between readdir and lstat. - // Just treat it as if it didn't exist. - continue - } - - // There was a serious problem, we should probably report it. - if lerr != nil { - return nil, lerr - } - - entries[filename] = fs.DentAttr{ - Type: nodeType(&stat), - InodeID: hostFileDevice.Map(device.MultiDeviceKey{ - Device: stat.Dev, - Inode: stat.Ino, - }), - } - } - return entries, nil + // We only support non-directory file descriptors that have been + // imported, so just claim that this isn't a directory, even if it is. + return nil, syscall.ENOTDIR } diff --git a/pkg/sentry/fs/host/inode_state.go b/pkg/sentry/fs/host/inode_state.go index 299e0e0b0..1adbd4562 100644 --- a/pkg/sentry/fs/host/inode_state.go +++ b/pkg/sentry/fs/host/inode_state.go @@ -18,29 +18,14 @@ import ( "fmt" "syscall" - "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" ) -// beforeSave is invoked by stateify. -func (i *inodeFileState) beforeSave() { - if !i.queue.IsEmpty() { - panic("event queue must be empty") - } - if !i.descriptor.donated && i.sattr.Type == fs.RegularFile { - uattr, err := i.unstableAttr(context.Background()) - if err != nil { - panic(fs.ErrSaveRejection{fmt.Errorf("failed to get unstable atttribute of %s: %v", i.mops.inodeMappings[i.sattr.InodeID], err)}) - } - i.savedUAttr = &uattr - } -} - // afterLoad is invoked by stateify. func (i *inodeFileState) afterLoad() { // Initialize the descriptor value. - if err := i.descriptor.initAfterLoad(i.mops, i.sattr.InodeID, &i.queue); err != nil { + if err := i.descriptor.initAfterLoad(i.sattr.InodeID, &i.queue); err != nil { panic(fmt.Sprintf("failed to load value of descriptor: %v", err)) } @@ -61,19 +46,4 @@ func (i *inodeFileState) afterLoad() { // change across save and restore, error out. panic(fs.ErrCorruption{fmt.Errorf("host %s conflict in host device mappings: %s", key, hostFileDevice)}) } - - if !i.descriptor.donated && i.sattr.Type == fs.RegularFile { - env, ok := fs.CurrentRestoreEnvironment() - if !ok { - panic("missing restore environment") - } - uattr := unstableAttr(i.mops, &s) - if env.ValidateFileSize && uattr.Size != i.savedUAttr.Size { - panic(fs.ErrCorruption{fmt.Errorf("file size has changed for %s: previously %d, now %d", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.Size, uattr.Size)}) - } - if env.ValidateFileTimestamp && uattr.ModificationTime != i.savedUAttr.ModificationTime { - panic(fs.ErrCorruption{fmt.Errorf("file modification time has changed for %s: previously %v, now %v", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.ModificationTime, uattr.ModificationTime)}) - } - i.savedUAttr = nil - } } diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go index 7221bc825..4c374681c 100644 --- a/pkg/sentry/fs/host/inode_test.go +++ b/pkg/sentry/fs/host/inode_test.go @@ -15,9 +15,6 @@ package host import ( - "io/ioutil" - "os" - "path" "syscall" "testing" @@ -25,69 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" ) -// TestMultipleReaddir verifies that multiple Readdir calls return the same -// thing if they use different dir contexts. -func TestMultipleReaddir(t *testing.T) { - p, err := ioutil.TempDir("", "readdir") - if err != nil { - t.Fatalf("Failed to create test dir: %v", err) - } - defer os.RemoveAll(p) - - f, err := os.Create(path.Join(p, "a.txt")) - if err != nil { - t.Fatalf("Failed to create a.txt: %v", err) - } - f.Close() - - f, err = os.Create(path.Join(p, "b.txt")) - if err != nil { - t.Fatalf("Failed to create b.txt: %v", err) - } - f.Close() - - fd, err := open(nil, p) - if err != nil { - t.Fatalf("Failed to open %q: %v", p, err) - } - ctx := contexttest.Context(t) - n, err := newInode(ctx, newMountSource(ctx, p, fs.RootOwner, &Filesystem{}, fs.MountSourceFlags{}, false), fd, false, false) - if err != nil { - t.Fatalf("Failed to create inode: %v", err) - } - - dirent := fs.NewDirent(ctx, n, "readdir") - openFile, err := n.GetFile(ctx, dirent, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("Failed to get file: %v", err) - } - defer openFile.DecRef() - - c1 := &fs.DirCtx{DirCursor: new(string)} - if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c1, 0); err != nil { - t.Fatalf("First Readdir failed: %v", err) - } - - c2 := &fs.DirCtx{DirCursor: new(string)} - if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c2, 0); err != nil { - t.Errorf("Second Readdir failed: %v", err) - } - - if _, ok := c1.DentAttrs()["a.txt"]; !ok { - t.Errorf("want a.txt in first Readdir, got %v", c1.DentAttrs()) - } - if _, ok := c1.DentAttrs()["b.txt"]; !ok { - t.Errorf("want b.txt in first Readdir, got %v", c1.DentAttrs()) - } - - if _, ok := c2.DentAttrs()["a.txt"]; !ok { - t.Errorf("want a.txt in second Readdir, got %v", c2.DentAttrs()) - } - if _, ok := c2.DentAttrs()["b.txt"]; !ok { - t.Errorf("want b.txt in second Readdir, got %v", c2.DentAttrs()) - } -} - // TestCloseFD verifies fds will be closed. func TestCloseFD(t *testing.T) { var p [2]int diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go index 7c60dc1db..388108fdf 100644 --- a/pkg/sentry/fs/host/util.go +++ b/pkg/sentry/fs/host/util.go @@ -16,7 +16,6 @@ package host import ( "os" - "path" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" @@ -28,45 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) -func open(parent *inodeOperations, name string) (int, error) { - if parent == nil && !path.IsAbs(name) { - return -1, syserror.EINVAL - } - name = path.Clean(name) - - // Don't follow through symlinks. - flags := syscall.O_NOFOLLOW - - if fd, err := openAt(parent, name, flags|syscall.O_RDWR, 0); err == nil { - return fd, nil - } - // Retry as read-only. - if fd, err := openAt(parent, name, flags|syscall.O_RDONLY, 0); err == nil { - return fd, nil - } - - // Retry as write-only. - if fd, err := openAt(parent, name, flags|syscall.O_WRONLY, 0); err == nil { - return fd, nil - } - - // Retry as a symlink, by including O_PATH as an option. - fd, err := openAt(parent, name, linux.O_PATH|flags, 0) - if err == nil { - return fd, nil - } - - // Everything failed. - return -1, err -} - -func openAt(parent *inodeOperations, name string, flags int, perm linux.FileMode) (int, error) { - if parent == nil { - return syscall.Open(name, flags, uint32(perm)) - } - return syscall.Openat(parent.fileState.FD(), name, flags, uint32(perm)) -} - func nodeType(s *syscall.Stat_t) fs.InodeType { switch x := (s.Mode & syscall.S_IFMT); x { case syscall.S_IFLNK: @@ -107,51 +67,19 @@ func stableAttr(s *syscall.Stat_t) fs.StableAttr { } } -func owner(mo *superOperations, s *syscall.Stat_t) fs.FileOwner { - // User requested no translation, just return actual owner. - if mo.dontTranslateOwnership { - return fs.FileOwner{auth.KUID(s.Uid), auth.KGID(s.Gid)} - } - - // Show only IDs relevant to the sandboxed task. I.e. if we not own the - // file, no sandboxed task can own the file. In that case, we - // use OverflowID for UID, implying that the IDs are not mapped in the - // "root" user namespace. - // - // E.g. - // sandbox's host EUID/EGID is 1/1. - // some_dir's host UID/GID is 2/1. - // Task that mounted this fs has virtualized EUID/EGID 5/5. - // - // If you executed `ls -n` in the sandboxed task, it would show: - // drwxwrxwrx [...] 65534 5 [...] some_dir - - // Files are owned by OverflowID by default. - owner := fs.FileOwner{auth.KUID(auth.OverflowUID), auth.KGID(auth.OverflowGID)} - - // If we own file on host, let mounting task's initial EUID own - // the file. - if s.Uid == hostUID { - owner.UID = mo.mounter.UID - } - - // If our group matches file's group, make file's group match - // the mounting task's initial EGID. - for _, gid := range hostGIDs { - if s.Gid == gid { - owner.GID = mo.mounter.GID - break - } +func owner(s *syscall.Stat_t) fs.FileOwner { + return fs.FileOwner{ + UID: auth.KUID(s.Uid), + GID: auth.KGID(s.Gid), } - return owner } -func unstableAttr(mo *superOperations, s *syscall.Stat_t) fs.UnstableAttr { +func unstableAttr(s *syscall.Stat_t) fs.UnstableAttr { return fs.UnstableAttr{ Size: s.Size, Usage: s.Blocks * 512, Perms: fs.FilePermsFromMode(linux.FileMode(s.Mode)), - Owner: owner(mo, s), + Owner: owner(s), AccessTime: time.FromUnix(s.Atim.Sec, s.Atim.Nsec), ModificationTime: time.FromUnix(s.Mtim.Sec, s.Mtim.Nsec), StatusChangeTime: time.FromUnix(s.Ctim.Sec, s.Ctim.Nsec), diff --git a/pkg/sentry/fs/host/util_unsafe.go b/pkg/sentry/fs/host/util_unsafe.go index 3ab36b088..23bd35d64 100644 --- a/pkg/sentry/fs/host/util_unsafe.go +++ b/pkg/sentry/fs/host/util_unsafe.go @@ -26,26 +26,6 @@ import ( // NulByte is a single NUL byte. It is passed to readlinkat as an empty string. var NulByte byte = '\x00' -func createLink(fd int, name string, linkName string) error { - namePtr, err := syscall.BytePtrFromString(name) - if err != nil { - return err - } - linkNamePtr, err := syscall.BytePtrFromString(linkName) - if err != nil { - return err - } - _, _, errno := syscall.Syscall( - syscall.SYS_SYMLINKAT, - uintptr(unsafe.Pointer(namePtr)), - uintptr(fd), - uintptr(unsafe.Pointer(linkNamePtr))) - if errno != 0 { - return errno - } - return nil -} - func readLink(fd int) (string, error) { // Buffer sizing copied from os.Readlink. for l := 128; ; l *= 2 { @@ -66,27 +46,6 @@ func readLink(fd int) (string, error) { } } -func unlinkAt(fd int, name string, dir bool) error { - namePtr, err := syscall.BytePtrFromString(name) - if err != nil { - return err - } - var flags uintptr - if dir { - flags = linux.AT_REMOVEDIR - } - _, _, errno := syscall.Syscall( - syscall.SYS_UNLINKAT, - uintptr(fd), - uintptr(unsafe.Pointer(namePtr)), - flags, - ) - if errno != 0 { - return errno - } - return nil -} - func timespecFromTimestamp(t ktime.Time, omit, setSysTime bool) syscall.Timespec { if omit { return syscall.Timespec{0, linux.UTIME_OMIT} -- cgit v1.2.3 From 3a42638a0b32ceede66d8d593609b424bbdba47e Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 18 Mar 2020 19:08:46 -0700 Subject: Port imported TTY fds to vfs2. Refactor fs/host.TTYFileOperations so that the relevant functionality can be shared with VFS2 (fsimpl/host.ttyFD). Incorporate host.defaultFileFD into the default host.fileDescription. This way, there is no need for a separate default_file.go. As in vfs1, the TTY file implementation can be built on top of this default and override operations as necessary (PRead/Read/PWrite/Write, Release, Ioctl). Note that these changes still need to be plumbed into runsc, which refers to imported TTYs in control/proc.go:ExecAsync. Updates #1672. PiperOrigin-RevId: 301718157 --- pkg/sentry/fs/host/ioctl_unsafe.go | 4 + pkg/sentry/fs/host/tty.go | 5 + pkg/sentry/fs/host/util.go | 8 +- pkg/sentry/fsimpl/host/BUILD | 6 +- pkg/sentry/fsimpl/host/default_file.go | 247 --------------------- pkg/sentry/fsimpl/host/host.go | 258 ++++++++++++++++++++-- pkg/sentry/fsimpl/host/ioctl_unsafe.go | 56 +++++ pkg/sentry/fsimpl/host/tty.go | 379 +++++++++++++++++++++++++++++++++ 8 files changed, 692 insertions(+), 271 deletions(-) delete mode 100644 pkg/sentry/fsimpl/host/default_file.go create mode 100644 pkg/sentry/fsimpl/host/ioctl_unsafe.go create mode 100644 pkg/sentry/fsimpl/host/tty.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/host/ioctl_unsafe.go b/pkg/sentry/fs/host/ioctl_unsafe.go index 271582e54..150ac8e19 100644 --- a/pkg/sentry/fs/host/ioctl_unsafe.go +++ b/pkg/sentry/fs/host/ioctl_unsafe.go @@ -21,6 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" ) +// LINT.IfChange + func ioctlGetTermios(fd int) (*linux.Termios, error) { var t linux.Termios _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TCGETS, uintptr(unsafe.Pointer(&t))) @@ -54,3 +56,5 @@ func ioctlSetWinsize(fd int, w *linux.Winsize) error { } return nil } + +// LINT.ThenChange(../../fsimpl/host/ioctl_unsafe.go) diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 3f218b4a7..cb91355ab 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -26,6 +26,8 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) +// LINT.IfChange + // TTYFileOperations implements fs.FileOperations for a host file descriptor // that wraps a TTY FD. // @@ -43,6 +45,7 @@ type TTYFileOperations struct { // connected to this TTY. fgProcessGroup *kernel.ProcessGroup + // termios contains the terminal attributes for this TTY. termios linux.KernelTermios } @@ -357,3 +360,5 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e _ = pg.SendSignal(kernel.SignalInfoPriv(sig)) return kernel.ERESTARTSYS } + +// LINT.ThenChange(../../fsimpl/host/tty.go) diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go index 388108fdf..1b0356930 100644 --- a/pkg/sentry/fs/host/util.go +++ b/pkg/sentry/fs/host/util.go @@ -23,7 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/time" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/syserror" ) @@ -80,9 +80,9 @@ func unstableAttr(s *syscall.Stat_t) fs.UnstableAttr { Usage: s.Blocks * 512, Perms: fs.FilePermsFromMode(linux.FileMode(s.Mode)), Owner: owner(s), - AccessTime: time.FromUnix(s.Atim.Sec, s.Atim.Nsec), - ModificationTime: time.FromUnix(s.Mtim.Sec, s.Mtim.Nsec), - StatusChangeTime: time.FromUnix(s.Ctim.Sec, s.Ctim.Nsec), + AccessTime: ktime.FromUnix(s.Atim.Sec, s.Atim.Nsec), + ModificationTime: ktime.FromUnix(s.Mtim.Sec, s.Mtim.Nsec), + StatusChangeTime: ktime.FromUnix(s.Ctim.Sec, s.Ctim.Nsec), Links: uint64(s.Nlink), } } diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 5d67f88e3..0bb4a5c3e 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -5,8 +5,9 @@ licenses(["notice"]) go_library( name = "host", srcs = [ - "default_file.go", "host.go", + "ioctl_unsafe.go", + "tty.go", "util.go", ], visibility = ["//pkg/sentry:internal"], @@ -17,9 +18,12 @@ go_library( "//pkg/log", "//pkg/refs", "//pkg/safemem", + "//pkg/sentry/arch", "//pkg/sentry/fsimpl/kernfs", + "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", + "//pkg/sentry/unimpl", "//pkg/sentry/vfs", "//pkg/sync", "//pkg/syserror", diff --git a/pkg/sentry/fsimpl/host/default_file.go b/pkg/sentry/fsimpl/host/default_file.go deleted file mode 100644 index 459238603..000000000 --- a/pkg/sentry/fsimpl/host/default_file.go +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "math" - "syscall" - - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fd" - "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// defaultFileFD implements FileDescriptionImpl for non-socket, non-TTY files. -type defaultFileFD struct { - fileDescription - - // canMap specifies whether we allow the file to be memory mapped. - canMap bool - - // mu protects the fields below. - mu sync.Mutex - - // offset specifies the current file offset. - offset int64 -} - -// TODO(gvisor.dev/issue/1672): Implement Waitable interface. - -// PRead implements FileDescriptionImpl. -func (f *defaultFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. - if f.inode.isStream { - return 0, syserror.ESPIPE - } - - return readFromHostFD(ctx, f.inode.hostFD, dst, offset, int(opts.Flags)) -} - -// Read implements FileDescriptionImpl. -func (f *defaultFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. - if f.inode.isStream { - // These files can't be memory mapped, assert this. - if f.canMap { - panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") - } - - n, err := readFromHostFD(ctx, f.inode.hostFD, dst, -1, int(opts.Flags)) - if isBlockError(err) { - // If we got any data at all, return it as a "completed" partial read - // rather than retrying until complete. - if n != 0 { - err = nil - } else { - err = syserror.ErrWouldBlock - } - } - return n, err - } - // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so. - f.mu.Lock() - n, err := readFromHostFD(ctx, f.inode.hostFD, dst, f.offset, int(opts.Flags)) - f.offset += n - f.mu.Unlock() - return n, err -} - -func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags int) (int64, error) { - // TODO(gvisor.dev/issue/1672): Support select preadv2 flags. - if flags != 0 { - return 0, syserror.EOPNOTSUPP - } - - var reader safemem.Reader - if offset == -1 { - reader = safemem.FromIOReader{fd.NewReadWriter(hostFD)} - } else { - reader = safemem.FromVecReaderFunc{ - func(srcs [][]byte) (int64, error) { - n, err := unix.Preadv(hostFD, srcs, offset) - return int64(n), err - }, - } - } - n, err := dst.CopyOutFrom(ctx, reader) - return int64(n), err -} - -// PWrite implements FileDescriptionImpl. -func (f *defaultFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. - if f.inode.isStream { - return 0, syserror.ESPIPE - } - return writeToHostFD(ctx, f.inode.hostFD, src, offset, int(opts.Flags)) -} - -// Write implements FileDescriptionImpl. -func (f *defaultFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. - if f.inode.isStream { - // These files can't be memory mapped, assert this. - if f.canMap { - panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") - } - - n, err := writeToHostFD(ctx, f.inode.hostFD, src, -1, int(opts.Flags)) - if isBlockError(err) { - err = syserror.ErrWouldBlock - } - return n, err - } - // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so. - // TODO(gvisor.dev/issue/1672): Write to end of file and update offset if O_APPEND is set on this file. - f.mu.Lock() - n, err := writeToHostFD(ctx, f.inode.hostFD, src, f.offset, int(opts.Flags)) - f.offset += n - f.mu.Unlock() - return n, err -} - -func writeToHostFD(ctx context.Context, hostFD int, src usermem.IOSequence, offset int64, flags int) (int64, error) { - // TODO(gvisor.dev/issue/1672): Support select pwritev2 flags. - if flags != 0 { - return 0, syserror.EOPNOTSUPP - } - - limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes()) - if err != nil { - return 0, err - } - src = src.TakeFirst64(limit) - - var writer safemem.Writer - if offset == -1 { - writer = safemem.FromIOWriter{fd.NewReadWriter(hostFD)} - } else { - writer = safemem.FromVecWriterFunc{ - func(srcs [][]byte) (int64, error) { - n, err := unix.Pwritev(hostFD, srcs, offset) - return int64(n), err - }, - } - } - n, err := src.CopyInTo(ctx, writer) - return int64(n), err -} - -// Seek implements FileDescriptionImpl. -// -// Note that we do not support seeking on directories, since we do not even -// allow directory fds to be imported at all. -func (f *defaultFileFD) Seek(_ context.Context, offset int64, whence int32) (int64, error) { - // TODO(b/34716638): Some char devices do support seeking, e.g. /dev/null. - if f.inode.isStream { - return 0, syserror.ESPIPE - } - - f.mu.Lock() - defer f.mu.Unlock() - - switch whence { - case linux.SEEK_SET: - if offset < 0 { - return f.offset, syserror.EINVAL - } - f.offset = offset - - case linux.SEEK_CUR: - // Check for overflow. Note that underflow cannot occur, since f.offset >= 0. - if offset > math.MaxInt64-f.offset { - return f.offset, syserror.EOVERFLOW - } - if f.offset+offset < 0 { - return f.offset, syserror.EINVAL - } - f.offset += offset - - case linux.SEEK_END: - var s syscall.Stat_t - if err := syscall.Fstat(f.inode.hostFD, &s); err != nil { - return f.offset, err - } - size := s.Size - - // Check for overflow. Note that underflow cannot occur, since size >= 0. - if offset > math.MaxInt64-size { - return f.offset, syserror.EOVERFLOW - } - if size+offset < 0 { - return f.offset, syserror.EINVAL - } - f.offset = size + offset - - case linux.SEEK_DATA, linux.SEEK_HOLE: - // Modifying the offset in the host file table should not matter, since - // this is the only place where we use it. - // - // For reading and writing, we always rely on our internal offset. - n, err := unix.Seek(f.inode.hostFD, offset, int(whence)) - if err != nil { - return f.offset, err - } - f.offset = n - - default: - // Invalid whence. - return f.offset, syserror.EINVAL - } - - return f.offset, nil -} - -// Sync implements FileDescriptionImpl. -func (f *defaultFileFD) Sync(context.Context) error { - // TODO(gvisor.dev/issue/1672): Currently we do not support the SyncData optimization, so we always sync everything. - return unix.Fsync(f.inode.hostFD) -} - -// ConfigureMMap implements FileDescriptionImpl. -func (f *defaultFileFD) ConfigureMMap(_ context.Context, opts *memmap.MMapOpts) error { - if !f.canMap { - return syserror.ENODEV - } - // TODO(gvisor.dev/issue/1672): Implement ConfigureMMap and Mappable interface. - return syserror.ENODEV -} diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 2eebcd60c..3afb41395 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -19,18 +19,23 @@ package host import ( "errors" "fmt" + "math" "syscall" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // filesystem implements vfs.FilesystemImpl. @@ -70,10 +75,20 @@ func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID hostFD: hostFD, isStream: isStream, isTTY: isTTY, + canMap: canMap(uint32(fileType)), ino: fs.NextIno(), mode: fileMode, uid: ownerUID, gid: ownerGID, + // For simplicity, set offset to 0. Technically, we should + // only set to 0 on files that are not seekable (sockets, pipes, etc.), + // and use the offset from the host fd otherwise. + offset: 0, + } + + // These files can't be memory mapped, assert this. + if i.isStream && i.canMap { + panic("files that can return EWOULDBLOCK (sockets, pipes, etc.) cannot be memory mapped") } d := &kernfs.Dentry{} @@ -110,12 +125,17 @@ type inode struct { // This field is initialized at creation time and is immutable. isTTY bool + // canMap specifies whether we allow the file to be memory mapped. + // + // This field is initialized at creation time and is immutable. + canMap bool + // ino is an inode number unique within this filesystem. + // + // This field is initialized at creation time and is immutable. ino uint64 - // mu protects the inode metadata below. - // TODO(gvisor.dev/issue/1672): actually protect fields below. - //mu sync.Mutex + // TODO(gvisor.dev/issue/1672): protect mode, uid, and gid with mutex. // mode is the file mode of this inode. Note that this value may become out // of date if the mode is changed on the host, e.g. with chmod. @@ -125,6 +145,12 @@ type inode struct { // file created on import, not the fd on the host. uid auth.KUID gid auth.KGID + + // offsetMu protects offset. + offsetMu sync.Mutex + + // offset specifies the current file offset. + offset int64 } // Note that these flags may become out of date, since they can be modified @@ -336,36 +362,40 @@ func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that // we don't allow importing arbitrary file types without proper support. + var ( + vfsfd *vfs.FileDescription + fdImpl vfs.FileDescriptionImpl + ) if i.isTTY { - // TODO(gvisor.dev/issue/1672): support importing host fd as TTY. - return nil, errors.New("importing host fd as TTY not supported") - } - - // For simplicity, set offset to 0. Technically, we should - // only set to 0 on files that are not seekable (sockets, pipes, etc.), - // and use the offset from the host fd otherwise. - fd := &defaultFileFD{ - fileDescription: fileDescription{ - inode: i, - }, - canMap: canMap(uint32(fileType)), - mu: sync.Mutex{}, - offset: 0, + fd := &ttyFD{ + fileDescription: fileDescription{inode: i}, + termios: linux.DefaultSlaveTermios, + } + vfsfd = &fd.vfsfd + fdImpl = fd + } else { + // For simplicity, set offset to 0. Technically, we should + // only set to 0 on files that are not seekable (sockets, pipes, etc.), + // and use the offset from the host fd otherwise. + fd := &fileDescription{inode: i} + vfsfd = &fd.vfsfd + fdImpl = fd } - vfsfd := &fd.vfsfd flags, err := fileFlagsFromHostFD(i.hostFD) if err != nil { return nil, err } - if err := vfsfd.Init(fd, uint32(flags), mnt, d, &vfs.FileDescriptionOptions{}); err != nil { + if err := vfsfd.Init(fdImpl, uint32(flags), mnt, d, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } return vfsfd, nil } // fileDescription is embedded by host fd implementations of FileDescriptionImpl. +// +// TODO(gvisor.dev/issue/1672): Implement Waitable interface. type fileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl @@ -394,3 +424,193 @@ func (f *fileDescription) Stat(_ context.Context, opts vfs.StatOptions) (linux.S func (f *fileDescription) Release() { // noop } + +// PRead implements FileDescriptionImpl. +func (f *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + i := f.inode + // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. + if i.isStream { + return 0, syserror.ESPIPE + } + + return readFromHostFD(ctx, i.hostFD, dst, offset, opts.Flags) +} + +// Read implements FileDescriptionImpl. +func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + i := f.inode + // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. + if i.isStream { + n, err := readFromHostFD(ctx, i.hostFD, dst, -1, opts.Flags) + if isBlockError(err) { + // If we got any data at all, return it as a "completed" partial read + // rather than retrying until complete. + if n != 0 { + err = nil + } else { + err = syserror.ErrWouldBlock + } + } + return n, err + } + // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so. + i.offsetMu.Lock() + n, err := readFromHostFD(ctx, i.hostFD, dst, i.offset, opts.Flags) + i.offset += n + i.offsetMu.Unlock() + return n, err +} + +func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags uint32) (int64, error) { + // TODO(gvisor.dev/issue/1672): Support select preadv2 flags. + if flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + var reader safemem.Reader + if offset == -1 { + reader = safemem.FromIOReader{fd.NewReadWriter(hostFD)} + } else { + reader = safemem.FromVecReaderFunc{ + func(srcs [][]byte) (int64, error) { + n, err := unix.Preadv(hostFD, srcs, offset) + return int64(n), err + }, + } + } + n, err := dst.CopyOutFrom(ctx, reader) + return int64(n), err +} + +// PWrite implements FileDescriptionImpl. +func (f *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + i := f.inode + // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. + if i.isStream { + return 0, syserror.ESPIPE + } + + return writeToHostFD(ctx, i.hostFD, src, offset, opts.Flags) +} + +// Write implements FileDescriptionImpl. +func (f *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + i := f.inode + // TODO(b/34716638): Some char devices do support offsets, e.g. /dev/null. + if i.isStream { + n, err := writeToHostFD(ctx, i.hostFD, src, -1, opts.Flags) + if isBlockError(err) { + err = syserror.ErrWouldBlock + } + return n, err + } + // TODO(gvisor.dev/issue/1672): Cache pages, when forced to do so. + // TODO(gvisor.dev/issue/1672): Write to end of file and update offset if O_APPEND is set on this file. + i.offsetMu.Lock() + n, err := writeToHostFD(ctx, i.hostFD, src, i.offset, opts.Flags) + i.offset += n + i.offsetMu.Unlock() + return n, err +} + +func writeToHostFD(ctx context.Context, hostFD int, src usermem.IOSequence, offset int64, flags uint32) (int64, error) { + // TODO(gvisor.dev/issue/1672): Support select pwritev2 flags. + if flags != 0 { + return 0, syserror.EOPNOTSUPP + } + + var writer safemem.Writer + if offset == -1 { + writer = safemem.FromIOWriter{fd.NewReadWriter(hostFD)} + } else { + writer = safemem.FromVecWriterFunc{ + func(srcs [][]byte) (int64, error) { + n, err := unix.Pwritev(hostFD, srcs, offset) + return int64(n), err + }, + } + } + n, err := src.CopyInTo(ctx, writer) + return int64(n), err +} + +// Seek implements FileDescriptionImpl. +// +// Note that we do not support seeking on directories, since we do not even +// allow directory fds to be imported at all. +func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (int64, error) { + i := f.inode + // TODO(b/34716638): Some char devices do support seeking, e.g. /dev/null. + if i.isStream { + return 0, syserror.ESPIPE + } + + i.offsetMu.Lock() + defer i.offsetMu.Unlock() + + switch whence { + case linux.SEEK_SET: + if offset < 0 { + return i.offset, syserror.EINVAL + } + i.offset = offset + + case linux.SEEK_CUR: + // Check for overflow. Note that underflow cannot occur, since i.offset >= 0. + if offset > math.MaxInt64-i.offset { + return i.offset, syserror.EOVERFLOW + } + if i.offset+offset < 0 { + return i.offset, syserror.EINVAL + } + i.offset += offset + + case linux.SEEK_END: + var s syscall.Stat_t + if err := syscall.Fstat(i.hostFD, &s); err != nil { + return i.offset, err + } + size := s.Size + + // Check for overflow. Note that underflow cannot occur, since size >= 0. + if offset > math.MaxInt64-size { + return i.offset, syserror.EOVERFLOW + } + if size+offset < 0 { + return i.offset, syserror.EINVAL + } + i.offset = size + offset + + case linux.SEEK_DATA, linux.SEEK_HOLE: + // Modifying the offset in the host file table should not matter, since + // this is the only place where we use it. + // + // For reading and writing, we always rely on our internal offset. + n, err := unix.Seek(i.hostFD, offset, int(whence)) + if err != nil { + return i.offset, err + } + i.offset = n + + default: + // Invalid whence. + return i.offset, syserror.EINVAL + } + + return i.offset, nil +} + +// Sync implements FileDescriptionImpl. +func (f *fileDescription) Sync(context.Context) error { + // TODO(gvisor.dev/issue/1672): Currently we do not support the SyncData optimization, so we always sync everything. + return unix.Fsync(f.inode.hostFD) +} + +// ConfigureMMap implements FileDescriptionImpl. +func (f *fileDescription) ConfigureMMap(_ context.Context, opts *memmap.MMapOpts) error { + if !f.inode.canMap { + return syserror.ENODEV + } + // TODO(gvisor.dev/issue/1672): Implement ConfigureMMap and Mappable interface. + return syserror.ENODEV +} diff --git a/pkg/sentry/fsimpl/host/ioctl_unsafe.go b/pkg/sentry/fsimpl/host/ioctl_unsafe.go new file mode 100644 index 000000000..0983bf7d8 --- /dev/null +++ b/pkg/sentry/fsimpl/host/ioctl_unsafe.go @@ -0,0 +1,56 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "syscall" + "unsafe" + + "gvisor.dev/gvisor/pkg/abi/linux" +) + +func ioctlGetTermios(fd int) (*linux.Termios, error) { + var t linux.Termios + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TCGETS, uintptr(unsafe.Pointer(&t))) + if errno != 0 { + return nil, errno + } + return &t, nil +} + +func ioctlSetTermios(fd int, req uint64, t *linux.Termios) error { + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(unsafe.Pointer(t))) + if errno != 0 { + return errno + } + return nil +} + +func ioctlGetWinsize(fd int) (*linux.Winsize, error) { + var w linux.Winsize + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TIOCGWINSZ, uintptr(unsafe.Pointer(&w))) + if errno != 0 { + return nil, errno + } + return &w, nil +} + +func ioctlSetWinsize(fd int, w *linux.Winsize) error { + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TIOCSWINSZ, uintptr(unsafe.Pointer(w))) + if errno != 0 { + return errno + } + return nil +} diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go new file mode 100644 index 000000000..8936afb06 --- /dev/null +++ b/pkg/sentry/fsimpl/host/tty.go @@ -0,0 +1,379 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/unimpl" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// ttyFD implements vfs.FileDescriptionImpl for a host file descriptor +// that wraps a TTY FD. +type ttyFD struct { + fileDescription + + // mu protects the fields below. + mu sync.Mutex `state:"nosave"` + + // session is the session attached to this ttyFD. + session *kernel.Session + + // fgProcessGroup is the foreground process group that is currently + // connected to this TTY. + fgProcessGroup *kernel.ProcessGroup + + // termios contains the terminal attributes for this TTY. + termios linux.KernelTermios +} + +// InitForegroundProcessGroup sets the foreground process group and session for +// the TTY. This should only be called once, after the foreground process group +// has been created, but before it has started running. +func (t *ttyFD) InitForegroundProcessGroup(pg *kernel.ProcessGroup) { + t.mu.Lock() + defer t.mu.Unlock() + if t.fgProcessGroup != nil { + panic("foreground process group is already set") + } + t.fgProcessGroup = pg + t.session = pg.Session() +} + +// ForegroundProcessGroup returns the foreground process for the TTY. +func (t *ttyFD) ForegroundProcessGroup() *kernel.ProcessGroup { + t.mu.Lock() + defer t.mu.Unlock() + return t.fgProcessGroup +} + +// Release implements fs.FileOperations.Release. +func (t *ttyFD) Release() { + t.mu.Lock() + t.fgProcessGroup = nil + t.mu.Unlock() + + t.fileDescription.Release() +} + +// PRead implements vfs.FileDescriptionImpl. +// +// Reading from a TTY is only allowed for foreground process groups. Background +// process groups will either get EIO or a SIGTTIN. +func (t *ttyFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + t.mu.Lock() + defer t.mu.Unlock() + + // Are we allowed to do the read? + // drivers/tty/n_tty.c:n_tty_read()=>job_control()=>tty_check_change(). + if err := t.checkChange(ctx, linux.SIGTTIN); err != nil { + return 0, err + } + + // Do the read. + return t.fileDescription.PRead(ctx, dst, offset, opts) +} + +// Read implements vfs.FileDescriptionImpl. +// +// Reading from a TTY is only allowed for foreground process groups. Background +// process groups will either get EIO or a SIGTTIN. +func (t *ttyFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + t.mu.Lock() + defer t.mu.Unlock() + + // Are we allowed to do the read? + // drivers/tty/n_tty.c:n_tty_read()=>job_control()=>tty_check_change(). + if err := t.checkChange(ctx, linux.SIGTTIN); err != nil { + return 0, err + } + + // Do the read. + return t.fileDescription.Read(ctx, dst, opts) +} + +// PWrite implements vfs.FileDescriptionImpl. +func (t *ttyFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + t.mu.Lock() + defer t.mu.Unlock() + + // Check whether TOSTOP is enabled. This corresponds to the check in + // drivers/tty/n_tty.c:n_tty_write(). + if t.termios.LEnabled(linux.TOSTOP) { + if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { + return 0, err + } + } + return t.fileDescription.PWrite(ctx, src, offset, opts) +} + +// Write implements vfs.FileDescriptionImpl. +func (t *ttyFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + t.mu.Lock() + defer t.mu.Unlock() + + // Check whether TOSTOP is enabled. This corresponds to the check in + // drivers/tty/n_tty.c:n_tty_write(). + if t.termios.LEnabled(linux.TOSTOP) { + if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { + return 0, err + } + } + return t.fileDescription.Write(ctx, src, opts) +} + +// Ioctl implements vfs.FileDescriptionImpl. +func (t *ttyFD) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + // Ignore arg[0]. This is the real FD: + fd := t.inode.hostFD + ioctl := args[1].Uint64() + switch ioctl { + case linux.TCGETS: + termios, err := ioctlGetTermios(fd) + if err != nil { + return 0, err + } + _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), termios, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + + case linux.TCSETS, linux.TCSETSW, linux.TCSETSF: + t.mu.Lock() + defer t.mu.Unlock() + + if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { + return 0, err + } + + var termios linux.Termios + if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &termios, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + err := ioctlSetTermios(fd, ioctl, &termios) + if err == nil { + t.termios.FromTermios(termios) + } + return 0, err + + case linux.TIOCGPGRP: + // Args: pid_t *argp + // When successful, equivalent to *argp = tcgetpgrp(fd). + // Get the process group ID of the foreground process group on this + // terminal. + + pidns := kernel.PIDNamespaceFromContext(ctx) + if pidns == nil { + return 0, syserror.ENOTTY + } + + t.mu.Lock() + defer t.mu.Unlock() + + // Map the ProcessGroup into a ProcessGroupID in the task's PID namespace. + pgID := pidns.IDOfProcessGroup(t.fgProcessGroup) + _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + + case linux.TIOCSPGRP: + // Args: const pid_t *argp + // Equivalent to tcsetpgrp(fd, *argp). + // Set the foreground process group ID of this terminal. + + task := kernel.TaskFromContext(ctx) + if task == nil { + return 0, syserror.ENOTTY + } + + t.mu.Lock() + defer t.mu.Unlock() + + // Check that we are allowed to set the process group. + if err := t.checkChange(ctx, linux.SIGTTOU); err != nil { + // drivers/tty/tty_io.c:tiocspgrp() converts -EIO from tty_check_change() + // to -ENOTTY. + if err == syserror.EIO { + return 0, syserror.ENOTTY + } + return 0, err + } + + // Check that calling task's process group is in the TTY session. + if task.ThreadGroup().Session() != t.session { + return 0, syserror.ENOTTY + } + + var pgID kernel.ProcessGroupID + if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgID, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + + // pgID must be non-negative. + if pgID < 0 { + return 0, syserror.EINVAL + } + + // Process group with pgID must exist in this PID namespace. + pidns := task.PIDNamespace() + pg := pidns.ProcessGroupWithID(pgID) + if pg == nil { + return 0, syserror.ESRCH + } + + // Check that new process group is in the TTY session. + if pg.Session() != t.session { + return 0, syserror.EPERM + } + + t.fgProcessGroup = pg + return 0, nil + + case linux.TIOCGWINSZ: + // Args: struct winsize *argp + // Get window size. + winsize, err := ioctlGetWinsize(fd) + if err != nil { + return 0, err + } + _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), winsize, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + + case linux.TIOCSWINSZ: + // Args: const struct winsize *argp + // Set window size. + + // Unlike setting the termios, any process group (even background ones) can + // set the winsize. + + var winsize linux.Winsize + if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &winsize, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + err := ioctlSetWinsize(fd, &winsize) + return 0, err + + // Unimplemented commands. + case linux.TIOCSETD, + linux.TIOCSBRK, + linux.TIOCCBRK, + linux.TCSBRK, + linux.TCSBRKP, + linux.TIOCSTI, + linux.TIOCCONS, + linux.FIONBIO, + linux.TIOCEXCL, + linux.TIOCNXCL, + linux.TIOCGEXCL, + linux.TIOCNOTTY, + linux.TIOCSCTTY, + linux.TIOCGSID, + linux.TIOCGETD, + linux.TIOCVHANGUP, + linux.TIOCGDEV, + linux.TIOCMGET, + linux.TIOCMSET, + linux.TIOCMBIC, + linux.TIOCMBIS, + linux.TIOCGICOUNT, + linux.TCFLSH, + linux.TIOCSSERIAL, + linux.TIOCGPTPEER: + + unimpl.EmitUnimplementedEvent(ctx) + fallthrough + default: + return 0, syserror.ENOTTY + } +} + +// checkChange checks that the process group is allowed to read, write, or +// change the state of the TTY. +// +// This corresponds to Linux drivers/tty/tty_io.c:tty_check_change(). The logic +// is a bit convoluted, but documented inline. +// +// Preconditions: t.mu must be held. +func (t *ttyFD) checkChange(ctx context.Context, sig linux.Signal) error { + task := kernel.TaskFromContext(ctx) + if task == nil { + // No task? Linux does not have an analog for this case, but + // tty_check_change is more of a blacklist of cases than a + // whitelist, and is surprisingly permissive. Allowing the + // change seems most appropriate. + return nil + } + + tg := task.ThreadGroup() + pg := tg.ProcessGroup() + + // If the session for the task is different than the session for the + // controlling TTY, then the change is allowed. Seems like a bad idea, + // but that's exactly what linux does. + if tg.Session() != t.fgProcessGroup.Session() { + return nil + } + + // If we are the foreground process group, then the change is allowed. + if pg == t.fgProcessGroup { + return nil + } + + // We are not the foreground process group. + + // Is the provided signal blocked or ignored? + if (task.SignalMask()&linux.SignalSetOf(sig) != 0) || tg.SignalHandlers().IsIgnored(sig) { + // If the signal is SIGTTIN, then we are attempting to read + // from the TTY. Don't send the signal and return EIO. + if sig == linux.SIGTTIN { + return syserror.EIO + } + + // Otherwise, we are writing or changing terminal state. This is allowed. + return nil + } + + // If the process group is an orphan, return EIO. + if pg.IsOrphan() { + return syserror.EIO + } + + // Otherwise, send the signal to the process group and return ERESTARTSYS. + // + // Note that Linux also unconditionally sets TIF_SIGPENDING on current, + // but this isn't necessary in gVisor because the rationale given in + // 040b6362d58f "tty: fix leakage of -ERESTARTSYS to userland" doesn't + // apply: the sentry will handle -ERESTARTSYS in + // kernel.runApp.execute() even if the kernel.Task isn't interrupted. + // + // Linux ignores the result of kill_pgrp(). + _ = pg.SendSignal(kernel.SignalInfoPriv(sig)) + return kernel.ERESTARTSYS +} -- cgit v1.2.3 From 57d9bd922b4eff922d1a5185529fe5446249d592 Mon Sep 17 00:00:00 2001 From: Zach Koopmans Date: Thu, 19 Mar 2020 15:28:52 -0700 Subject: Remove the "frozen" bit from dirents. Frozen was to lock down changes to the host filesystem for hostFS. Now that hostFS is gone, it can be removed. PiperOrigin-RevId: 301907923 --- pkg/sentry/fs/dirent.go | 133 +------------------------------------ pkg/sentry/fs/file_overlay_test.go | 84 ----------------------- pkg/sentry/fs/mounts.go | 13 ---- 3 files changed, 1 insertion(+), 229 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index e0b32e1c1..0266a5287 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -17,7 +17,6 @@ package fs import ( "fmt" "path" - "sort" "sync/atomic" "syscall" @@ -121,9 +120,6 @@ type Dirent struct { // deleted may be set atomically when removed. deleted int32 - // frozen indicates this entry can't walk to unknown nodes. - frozen bool - // mounted is true if Dirent is a mount point, similar to include/linux/dcache.h:DCACHE_MOUNTED. mounted bool @@ -253,8 +249,7 @@ func (d *Dirent) IsNegative() bool { return d.Inode == nil } -// hashChild will hash child into the children list of its new parent d, carrying over -// any "frozen" state from d. +// hashChild will hash child into the children list of its new parent d. // // Returns (*WeakRef, true) if hashing child caused a Dirent to be unhashed. The caller must // validate the returned unhashed weak reference. Common cases: @@ -282,9 +277,6 @@ func (d *Dirent) hashChild(child *Dirent) (*refs.WeakRef, bool) { d.IncRef() } - // Carry over parent's frozen state. - child.frozen = d.frozen - return d.hashChildParentSet(child) } @@ -400,38 +392,6 @@ func (d *Dirent) MountRoot() *Dirent { return mountRoot } -// Freeze prevents this dirent from walking to more nodes. Freeze is applied -// recursively to all children. -// -// If this particular Dirent represents a Virtual node, then Walks and Creates -// may proceed as before. -// -// Freeze can only be called before the application starts running, otherwise -// the root it might be out of sync with the application root if modified by -// sys_chroot. -func (d *Dirent) Freeze() { - d.mu.Lock() - defer d.mu.Unlock() - if d.frozen { - // Already frozen. - return - } - d.frozen = true - - // Take a reference when freezing. - for _, w := range d.children { - if child := w.Get(); child != nil { - // NOTE: We would normally drop the reference here. But - // instead we're hanging on to it. - ch := child.(*Dirent) - ch.Freeze() - } - } - - // Drop all expired weak references. - d.flush() -} - // descendantOf returns true if the receiver dirent is equal to, or a // descendant of, the argument dirent. // @@ -524,11 +484,6 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl w.Drop() } - // Are we allowed to do the lookup? - if d.frozen && !d.Inode.IsVirtual() { - return nil, syscall.ENOENT - } - // Slow path: load the InodeOperations into memory. Since this is a hot path and the lookup may be // expensive, if possible release the lock and re-acquire it. if walkMayUnlock { @@ -659,11 +614,6 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi return nil, syscall.EEXIST } - // Are we frozen? - if d.frozen && !d.Inode.IsVirtual() { - return nil, syscall.ENOENT - } - // Try the create. We need to trust the file system to return EEXIST (or something // that will translate to EEXIST) if name already exists. file, err := d.Inode.Create(ctx, d, name, flags, perms) @@ -727,11 +677,6 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c return syscall.EEXIST } - // Are we frozen? - if d.frozen && !d.Inode.IsVirtual() { - return syscall.ENOENT - } - // Remove any negative Dirent. We've already asserted above with d.exists // that the only thing remaining here can be a negative Dirent. if w, ok := d.children[name]; ok { @@ -862,49 +807,6 @@ func (d *Dirent) GetDotAttrs(root *Dirent) (DentAttr, DentAttr) { return dot, dot } -// readdirFrozen returns readdir results based solely on the frozen children. -func (d *Dirent) readdirFrozen(root *Dirent, offset int64, dirCtx *DirCtx) (int64, error) { - // Collect attrs for "." and "..". - attrs := make(map[string]DentAttr) - names := []string{".", ".."} - attrs["."], attrs[".."] = d.GetDotAttrs(root) - - // Get info from all children. - d.mu.Lock() - defer d.mu.Unlock() - for name, w := range d.children { - if child := w.Get(); child != nil { - defer child.DecRef() - - // Skip negative children. - if child.(*Dirent).IsNegative() { - continue - } - - sattr := child.(*Dirent).Inode.StableAttr - attrs[name] = DentAttr{ - Type: sattr.Type, - InodeID: sattr.InodeID, - } - names = append(names, name) - } - } - - sort.Strings(names) - - if int(offset) >= len(names) { - return offset, nil - } - names = names[int(offset):] - for _, name := range names { - if err := dirCtx.DirEmit(name, attrs[name]); err != nil { - return offset, err - } - offset++ - } - return offset, nil -} - // DirIterator is an open directory containing directory entries that can be read. type DirIterator interface { // IterateDir emits directory entries by calling dirCtx.EmitDir, beginning @@ -964,10 +866,6 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent, return offset, nil } - if d.frozen { - return d.readdirFrozen(root, offset, dirCtx) - } - // Collect attrs for "." and "..". dot, dotdot := d.GetDotAttrs(root) @@ -1068,11 +966,6 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err return nil, syserror.EINVAL } - // Are we frozen? - if d.parent.frozen && !d.parent.Inode.IsVirtual() { - return nil, syserror.ENOENT - } - // Dirent that'll replace d. // // Note that NewDirent returns with one reference taken; the reference @@ -1101,11 +994,6 @@ func (d *Dirent) unmount(ctx context.Context, replacement *Dirent) error { return syserror.ENOENT } - // Are we frozen? - if d.parent.frozen && !d.parent.Inode.IsVirtual() { - return syserror.ENOENT - } - // Remount our former child in its place. // // As replacement used to be our child, it must already have the right @@ -1135,11 +1023,6 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath unlock := d.lockDirectory() defer unlock() - // Are we frozen? - if d.frozen && !d.Inode.IsVirtual() { - return syscall.ENOENT - } - // Try to walk to the node. child, err := d.walk(ctx, root, name, false /* may unlock */) if err != nil { @@ -1201,11 +1084,6 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string) unlock := d.lockDirectory() defer unlock() - // Are we frozen? - if d.frozen && !d.Inode.IsVirtual() { - return syscall.ENOENT - } - // Check for dots. if name == "." { // Rejected as the last component by rmdir(2). @@ -1519,15 +1397,6 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string return err } - // Are we frozen? - // TODO(jamieliu): Is this the right errno? - if oldParent.frozen && !oldParent.Inode.IsVirtual() { - return syscall.ENOENT - } - if newParent.frozen && !newParent.Inode.IsVirtual() { - return syscall.ENOENT - } - // Do we have general permission to remove from oldParent and // create/replace in newParent? if err := oldParent.Inode.CheckPermission(ctx, PermMask{Write: true, Execute: true}); err != nil { diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go index a76d87e3a..1971cc680 100644 --- a/pkg/sentry/fs/file_overlay_test.go +++ b/pkg/sentry/fs/file_overlay_test.go @@ -175,90 +175,6 @@ func TestReaddirRevalidation(t *testing.T) { } } -// TestReaddirOverlayFrozen tests that calling Readdir on an overlay file with -// a frozen dirent tree does not make Readdir calls to the underlying files. -// This is a regression test for b/114808269. -func TestReaddirOverlayFrozen(t *testing.T) { - ctx := contexttest.Context(t) - - // Create an overlay with two directories, each with two files. - upper := newTestRamfsDir(ctx, []dirContent{{name: "upper-file1"}, {name: "upper-file2"}}, nil) - lower := newTestRamfsDir(ctx, []dirContent{{name: "lower-file1"}, {name: "lower-file2"}}, nil) - overlayInode := fs.NewTestOverlayDir(ctx, upper, lower, false) - - // Set that overlay as the root. - root := fs.NewDirent(ctx, overlayInode, "root") - ctx = &rootContext{ - Context: ctx, - root: root, - } - - // Check that calling Readdir on the root now returns all 4 files (2 - // from each layer in the overlay). - rootFile, err := root.Inode.GetFile(ctx, root, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("root.Inode.GetFile failed: %v", err) - } - defer rootFile.DecRef() - ser := &fs.CollectEntriesSerializer{} - if err := rootFile.Readdir(ctx, ser); err != nil { - t.Fatalf("rootFile.Readdir failed: %v", err) - } - if got, want := ser.Order, []string{".", "..", "lower-file1", "lower-file2", "upper-file1", "upper-file2"}; !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } - - // Readdir should have been called on upper and lower. - upperDir := upper.InodeOperations.(*dir) - lowerDir := lower.InodeOperations.(*dir) - if !upperDir.ReaddirCalled { - t.Errorf("upperDir.ReaddirCalled got %v, want true", upperDir.ReaddirCalled) - } - if !lowerDir.ReaddirCalled { - t.Errorf("lowerDir.ReaddirCalled got %v, want true", lowerDir.ReaddirCalled) - } - - // Reset. - upperDir.ReaddirCalled = false - lowerDir.ReaddirCalled = false - - // Take references on "upper-file1" and "lower-file1", pinning them in - // the dirent tree. - for _, name := range []string{"upper-file1", "lower-file1"} { - if _, err := root.Walk(ctx, root, name); err != nil { - t.Fatalf("root.Walk(%q) failed: %v", name, err) - } - // Don't drop a reference on the returned dirent so that it - // will stay in the tree. - } - - // Freeze the dirent tree. - root.Freeze() - - // Seek back to the beginning of the file. - if _, err := rootFile.Seek(ctx, fs.SeekSet, 0); err != nil { - t.Fatalf("error seeking to beginning of directory: %v", err) - } - - // Calling Readdir on the root now will return only the pinned - // children. - ser = &fs.CollectEntriesSerializer{} - if err := rootFile.Readdir(ctx, ser); err != nil { - t.Fatalf("rootFile.Readdir failed: %v", err) - } - if got, want := ser.Order, []string{".", "..", "lower-file1", "upper-file1"}; !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } - - // Readdir should NOT have been called on upper or lower. - if upperDir.ReaddirCalled { - t.Errorf("upperDir.ReaddirCalled got %v, want false", upperDir.ReaddirCalled) - } - if lowerDir.ReaddirCalled { - t.Errorf("lowerDir.ReaddirCalled got %v, want false", lowerDir.ReaddirCalled) - } -} - type rootContext struct { context.Context root *fs.Dirent diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index c7981f66e..b414ddaee 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -273,19 +273,6 @@ func (mns *MountNamespace) DecRef() { mns.DecRefWithDestructor(mns.destroy) } -// Freeze freezes the entire mount tree. -func (mns *MountNamespace) Freeze() { - mns.mu.Lock() - defer mns.mu.Unlock() - - // We only want to freeze Dirents with active references, not Dirents referenced - // by a mount's MountSource. - mns.flushMountSourceRefsLocked() - - // Freeze the entire shebang. - mns.root.Freeze() -} - // withMountLocked prevents further walks to `node`, because `node` is about to // be a mount point. func (mns *MountNamespace) withMountLocked(node *Dirent, fn func() error) error { -- cgit v1.2.3 From 137f3614009b0ef931c1d00a083b4ae8e6a39bc9 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 26 Mar 2020 16:46:15 -0700 Subject: Use host-defined file owner and mode, when possible, for imported fds. Using the host-defined file owner matches VFS1. It is more correct to use the host-defined mode, since the cached value may become out of date. However, kernfs.Inode.Mode() does not return an error--other filesystems on kernfs are in-memory so retrieving mode should not fail. Therefore, if the host syscall fails, we rely on a cached value instead. Updates #1672. PiperOrigin-RevId: 303220864 --- pkg/sentry/control/proc.go | 6 +-- pkg/sentry/fs/host/BUILD | 1 - pkg/sentry/fs/host/control.go | 2 +- pkg/sentry/fs/host/file.go | 10 ++-- pkg/sentry/fs/host/inode_test.go | 3 +- pkg/sentry/fs/host/wait_test.go | 3 +- pkg/sentry/fsimpl/host/host.go | 110 ++++++++++++++++++++++++++------------- runsc/boot/fds.go | 5 +- 8 files changed, 87 insertions(+), 53 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 5457ba5e7..b51fb3959 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -224,8 +224,6 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI } } - mounter := fs.FileOwnerFromContext(ctx) - // TODO(gvisor.dev/issue/1623): Use host FD when supported in VFS2. var ttyFile *fs.File for appFD, hostFile := range args.FilePayload.Files { @@ -235,7 +233,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI // Import the file as a host TTY file. if ttyFile == nil { var err error - appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), mounter, true /* isTTY */) + appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), true /* isTTY */) if err != nil { return nil, 0, nil, err } @@ -254,7 +252,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI } else { // Import the file as a regular host file. var err error - appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), mounter, false /* isTTY */) + appFile, err = host.ImportFile(ctx, int(hostFile.Fd()), false /* isTTY */) if err != nil { return nil, 0, nil, err } diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 011625c80..aabce6cc9 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -71,7 +71,6 @@ go_test( "//pkg/fd", "//pkg/fdnotifier", "//pkg/sentry/contexttest", - "//pkg/sentry/fs", "//pkg/sentry/kernel/time", "//pkg/sentry/socket", "//pkg/sentry/socket/unix/transport", diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go index cd84e1337..52c0504b6 100644 --- a/pkg/sentry/fs/host/control.go +++ b/pkg/sentry/fs/host/control.go @@ -78,7 +78,7 @@ func fdsToFiles(ctx context.Context, fds []int) []*fs.File { } // Create the file backed by hostFD. - file, err := NewFile(ctx, fd, fs.FileOwnerFromContext(ctx)) + file, err := NewFile(ctx, fd) if err != nil { ctx.Warningf("Error creating file from host FD: %v", err) break diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go index 034862694..3e48b8b2c 100644 --- a/pkg/sentry/fs/host/file.go +++ b/pkg/sentry/fs/host/file.go @@ -60,8 +60,8 @@ var _ fs.FileOperations = (*fileOperations)(nil) // The returned File cannot be saved, since there is no guarantee that the same // FD will exist or represent the same file at time of restore. If such a // guarantee does exist, use ImportFile instead. -func NewFile(ctx context.Context, fd int, mounter fs.FileOwner) (*fs.File, error) { - return newFileFromDonatedFD(ctx, fd, mounter, false, false) +func NewFile(ctx context.Context, fd int) (*fs.File, error) { + return newFileFromDonatedFD(ctx, fd, false, false) } // ImportFile creates a new File backed by the provided host file descriptor. @@ -71,13 +71,13 @@ func NewFile(ctx context.Context, fd int, mounter fs.FileOwner) (*fs.File, error // If the returned file is saved, it will be restored by re-importing the FD // originally passed to ImportFile. It is the restorer's responsibility to // ensure that the FD represents the same file. -func ImportFile(ctx context.Context, fd int, mounter fs.FileOwner, isTTY bool) (*fs.File, error) { - return newFileFromDonatedFD(ctx, fd, mounter, true, isTTY) +func ImportFile(ctx context.Context, fd int, isTTY bool) (*fs.File, error) { + return newFileFromDonatedFD(ctx, fd, true, isTTY) } // newFileFromDonatedFD returns an fs.File from a donated FD. If the FD is // saveable, then saveable is true. -func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner, saveable, isTTY bool) (*fs.File, error) { +func newFileFromDonatedFD(ctx context.Context, donated int, saveable, isTTY bool) (*fs.File, error) { var s syscall.Stat_t if err := syscall.Fstat(donated, &s); err != nil { return nil, err diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go index 4c374681c..c507f57eb 100644 --- a/pkg/sentry/fs/host/inode_test.go +++ b/pkg/sentry/fs/host/inode_test.go @@ -19,7 +19,6 @@ import ( "testing" "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" ) // TestCloseFD verifies fds will be closed. @@ -33,7 +32,7 @@ func TestCloseFD(t *testing.T) { // Use the write-end because we will detect if it's closed on the read end. ctx := contexttest.Context(t) - file, err := NewFile(ctx, p[1], fs.RootOwner) + file, err := NewFile(ctx, p[1]) if err != nil { t.Fatalf("Failed to create File: %v", err) } diff --git a/pkg/sentry/fs/host/wait_test.go b/pkg/sentry/fs/host/wait_test.go index d49c3a635..ce397a5e3 100644 --- a/pkg/sentry/fs/host/wait_test.go +++ b/pkg/sentry/fs/host/wait_test.go @@ -20,7 +20,6 @@ import ( "time" "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/waiter" ) @@ -34,7 +33,7 @@ func TestWait(t *testing.T) { defer syscall.Close(fds[1]) ctx := contexttest.Context(t) - file, err := NewFile(ctx, fds[0], fs.RootOwner) + file, err := NewFile(ctx, fds[0]) if err != nil { syscall.Close(fds[0]) t.Fatalf("NewFile failed: %v", err) diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index a54985ef5..17e3d6e9d 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -54,7 +54,7 @@ func NewMount(vfsObj *vfs.VirtualFilesystem) (*vfs.Mount, error) { } // ImportFD sets up and returns a vfs.FileDescription from a donated fd. -func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID, isTTY bool) (*vfs.FileDescription, error) { +func ImportFD(mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) { fs, ok := mnt.Filesystem().Impl().(*kernfs.Filesystem) if !ok { return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl()) @@ -78,8 +78,6 @@ func ImportFD(mnt *vfs.Mount, hostFD int, ownerUID auth.KUID, ownerGID auth.KGID canMap: canMap(uint32(fileType)), ino: fs.NextIno(), mode: fileMode, - uid: ownerUID, - gid: ownerGID, // For simplicity, set offset to 0. Technically, we should // only set to 0 on files that are not seekable (sockets, pipes, etc.), // and use the offset from the host fd otherwise. @@ -135,17 +133,20 @@ type inode struct { // This field is initialized at creation time and is immutable. ino uint64 - // TODO(gvisor.dev/issue/1672): protect mode, uid, and gid with mutex. + // modeMu protects mode. + modeMu sync.Mutex - // mode is the file mode of this inode. Note that this value may become out - // of date if the mode is changed on the host, e.g. with chmod. + // mode is a cached version of the file mode on the host. Note that it may + // become out of date if the mode is changed on the host, e.g. with chmod. + // + // Generally, it is better to retrieve the mode from the host through an + // fstat syscall. We only use this value in inode.Mode(), which cannot + // return an error, if the syscall to host fails. + // + // FIXME(b/152294168): Plumb error into Inode.Mode() return value so we + // can get rid of this. mode linux.FileMode - // uid and gid of the file owner. Note that these refer to the owner of the - // file created on import, not the fd on the host. - uid auth.KUID - gid auth.KGID - // offsetMu protects offset. offsetMu sync.Mutex @@ -168,12 +169,35 @@ func fileFlagsFromHostFD(fd int) (int, error) { // CheckPermissions implements kernfs.Inode. func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { - return vfs.GenericCheckPermissions(creds, ats, i.mode, i.uid, i.gid) + mode, uid, gid, err := i.getPermissions() + if err != nil { + return err + } + return vfs.GenericCheckPermissions(creds, ats, mode, uid, gid) } // Mode implements kernfs.Inode. func (i *inode) Mode() linux.FileMode { - return i.mode + mode, _, _, err := i.getPermissions() + if err != nil { + return i.mode + } + + return linux.FileMode(mode) +} + +func (i *inode) getPermissions() (linux.FileMode, auth.KUID, auth.KGID, error) { + // Retrieve metadata. + var s syscall.Stat_t + if err := syscall.Fstat(i.hostFD, &s); err != nil { + return 0, 0, 0, err + } + + // Update cached mode. + i.modeMu.Lock() + i.mode = linux.FileMode(s.Mode) + i.modeMu.Unlock() + return linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid), nil } // Stat implements kernfs.Inode. @@ -213,45 +237,51 @@ func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, erro ls.Attributes = s.Attributes ls.AttributesMask = s.Attributes_mask - if mask|linux.STATX_TYPE != 0 { + if mask&linux.STATX_TYPE != 0 { ls.Mode |= s.Mode & linux.S_IFMT } - if mask|linux.STATX_MODE != 0 { + if mask&linux.STATX_MODE != 0 { ls.Mode |= s.Mode &^ linux.S_IFMT } - if mask|linux.STATX_NLINK != 0 { + if mask&linux.STATX_NLINK != 0 { ls.Nlink = s.Nlink } - if mask|linux.STATX_ATIME != 0 { + if mask&linux.STATX_UID != 0 { + ls.UID = s.Uid + } + if mask&linux.STATX_GID != 0 { + ls.GID = s.Gid + } + if mask&linux.STATX_ATIME != 0 { ls.Atime = unixToLinuxStatxTimestamp(s.Atime) } - if mask|linux.STATX_BTIME != 0 { + if mask&linux.STATX_BTIME != 0 { ls.Btime = unixToLinuxStatxTimestamp(s.Btime) } - if mask|linux.STATX_CTIME != 0 { + if mask&linux.STATX_CTIME != 0 { ls.Ctime = unixToLinuxStatxTimestamp(s.Ctime) } - if mask|linux.STATX_MTIME != 0 { + if mask&linux.STATX_MTIME != 0 { ls.Mtime = unixToLinuxStatxTimestamp(s.Mtime) } - if mask|linux.STATX_SIZE != 0 { + if mask&linux.STATX_SIZE != 0 { ls.Size = s.Size } - if mask|linux.STATX_BLOCKS != 0 { + if mask&linux.STATX_BLOCKS != 0 { ls.Blocks = s.Blocks } - // Use our own internal inode number and file owner. - if mask|linux.STATX_INO != 0 { + // Use our own internal inode number. + if mask&linux.STATX_INO != 0 { ls.Ino = i.ino } - if mask|linux.STATX_UID != 0 { - ls.UID = uint32(i.uid) - } - if mask|linux.STATX_GID != 0 { - ls.GID = uint32(i.gid) - } + // Update cached mode. + if (mask&linux.STATX_TYPE != 0) && (mask&linux.STATX_MODE != 0) { + i.modeMu.Lock() + i.mode = linux.FileMode(s.Mode) + i.modeMu.Unlock() + } return ls, nil } @@ -274,6 +304,8 @@ func (i *inode) fstat(opts vfs.StatOptions) (linux.Statx, error) { Mask: linux.STATX_BASIC_STATS, Blksize: uint32(s.Blksize), Nlink: uint32(s.Nlink), + UID: s.Uid, + GID: s.Gid, Mode: uint16(s.Mode), Size: uint64(s.Size), Blocks: uint64(s.Blocks), @@ -282,15 +314,13 @@ func (i *inode) fstat(opts vfs.StatOptions) (linux.Statx, error) { Mtime: timespecToStatxTimestamp(s.Mtim), } - // Use our own internal inode number and file owner. + // Use our own internal inode number. // // TODO(gvisor.dev/issue/1672): Use a kernfs-specific device number as well. // If we use the device number from the host, it may collide with another // sentry-internal device number. We handle device/inode numbers without // relying on the host to prevent collisions. ls.Ino = i.ino - ls.UID = uint32(i.uid) - ls.GID = uint32(i.gid) return ls, nil } @@ -306,7 +336,11 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre if m&^(linux.STATX_MODE|linux.STATX_SIZE|linux.STATX_ATIME|linux.STATX_MTIME) != 0 { return syserror.EPERM } - if err := vfs.CheckSetStat(ctx, creds, &s, i.Mode(), i.uid, i.gid); err != nil { + mode, uid, gid, err := i.getPermissions() + if err != nil { + return err + } + if err := vfs.CheckSetStat(ctx, creds, &s, mode.Permissions(), uid, gid); err != nil { return err } @@ -314,7 +348,9 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre if err := syscall.Fchmod(i.hostFD, uint32(s.Mode)); err != nil { return err } + i.modeMu.Lock() i.mode = linux.FileMode(s.Mode) + i.modeMu.Unlock() } if m&linux.STATX_SIZE != 0 { if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil { @@ -351,7 +387,11 @@ func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptio } func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) { - fileType := i.mode.FileType() + mode, _, _, err := i.getPermissions() + if err != nil { + return nil, err + } + fileType := mode.FileType() if fileType == syscall.S_IFSOCK { if i.isTTY { return nil, errors.New("cannot use host socket as TTY") diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go index 417d2d5fb..5314b0f2a 100644 --- a/runsc/boot/fds.go +++ b/runsc/boot/fds.go @@ -34,7 +34,6 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F k := kernel.KernelFromContext(ctx) fdTable := k.NewFDTable() defer fdTable.DecRef() - mounter := fs.FileOwnerFromContext(ctx) var ttyFile *fs.File for appFD, hostFD := range stdioFDs { @@ -44,7 +43,7 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F // Import the file as a host TTY file. if ttyFile == nil { var err error - appFile, err = host.ImportFile(ctx, hostFD, mounter, true /* isTTY */) + appFile, err = host.ImportFile(ctx, hostFD, true /* isTTY */) if err != nil { return nil, err } @@ -63,7 +62,7 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F } else { // Import the file as a regular host file. var err error - appFile, err = host.ImportFile(ctx, hostFD, mounter, false /* isTTY */) + appFile, err = host.ImportFile(ctx, hostFD, false /* isTTY */) if err != nil { return nil, err } -- cgit v1.2.3 From b6639f77e59d885cb092c15a7a0c5a988e149b40 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Tue, 31 Mar 2020 15:00:30 -0700 Subject: Include original copyUp error in panic if cleanupUpper fails. When copyUp fails, we attempt to clean up the upper filesystem by removing any files that have already been copied-up. If the cleanup fails, we panic because the "overlay filesystem is in an inconsistent state". This CL adds the original copy-up error to the panic information, to hopefully make it easier to track down how the overlay filesystem got into the inconsistent state. PiperOrigin-RevId: 304053370 --- pkg/sentry/fs/copy_up.go | 28 +++++++++++++++------------- pkg/sentry/fs/inode_overlay.go | 3 ++- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index b060a12ff..ab1424c95 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -222,8 +222,8 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { } childUpper, err := parentUpper.Lookup(ctx, next.name) if err != nil { - log.Warningf("copy up failed to lookup directory: %v", err) - cleanupUpper(ctx, parentUpper, next.name) + werr := fmt.Errorf("copy up failed to lookup directory: %v", err) + cleanupUpper(ctx, parentUpper, next.name, werr) return syserror.EIO } defer childUpper.DecRef() @@ -242,8 +242,8 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { } childUpper, err := parentUpper.Lookup(ctx, next.name) if err != nil { - log.Warningf("copy up failed to lookup symlink: %v", err) - cleanupUpper(ctx, parentUpper, next.name) + werr := fmt.Errorf("copy up failed to lookup symlink: %v", err) + cleanupUpper(ctx, parentUpper, next.name, werr) return syserror.EIO } defer childUpper.DecRef() @@ -256,23 +256,23 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { // Bring file attributes up to date. This does not include size, which will be // brought up to date with copyContentsLocked. if err := copyAttributesLocked(ctx, childUpperInode, next.Inode.overlay.lower); err != nil { - log.Warningf("copy up failed to copy up attributes: %v", err) - cleanupUpper(ctx, parentUpper, next.name) + werr := fmt.Errorf("copy up failed to copy up attributes: %v", err) + cleanupUpper(ctx, parentUpper, next.name, werr) return syserror.EIO } // Copy the entire file. if err := copyContentsLocked(ctx, childUpperInode, next.Inode.overlay.lower, attrs.Size); err != nil { - log.Warningf("copy up failed to copy up contents: %v", err) - cleanupUpper(ctx, parentUpper, next.name) + werr := fmt.Errorf("copy up failed to copy up contents: %v", err) + cleanupUpper(ctx, parentUpper, next.name, werr) return syserror.EIO } lowerMappable := next.Inode.overlay.lower.Mappable() upperMappable := childUpperInode.Mappable() if lowerMappable != nil && upperMappable == nil { - log.Warningf("copy up failed: cannot ensure memory mapping coherence") - cleanupUpper(ctx, parentUpper, next.name) + werr := fmt.Errorf("copy up failed: cannot ensure memory mapping coherence") + cleanupUpper(ctx, parentUpper, next.name, werr) return syserror.EIO } @@ -324,12 +324,14 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { return nil } -// cleanupUpper removes name from parent, and panics if it is unsuccessful. -func cleanupUpper(ctx context.Context, parent *Inode, name string) { +// cleanupUpper is called when copy-up fails. It logs the copy-up error and +// attempts to remove name from parent. If that fails, then it panics. +func cleanupUpper(ctx context.Context, parent *Inode, name string, copyUpErr error) { + log.Warningf(copyUpErr.Error()) if err := parent.InodeOperations.Remove(ctx, parent, name); err != nil { // Unfortunately we don't have much choice. We shouldn't // willingly give the caller access to a nonsense filesystem. - panic(fmt.Sprintf("overlay filesystem is in an inconsistent state: failed to remove %q from upper filesystem: %v", name, err)) + panic(fmt.Sprintf("overlay filesystem is in an inconsistent state: copyUp got error: %v; then cleanup failed to remove %q from upper filesystem: %v.", copyUpErr, name, err)) } } diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 5ada33a32..537c8d257 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -231,7 +231,8 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st upperFile.Dirent.Inode.IncRef() entry, err := newOverlayEntry(ctx, upperFile.Dirent.Inode, nil, false) if err != nil { - cleanupUpper(ctx, o.upper, name) + werr := fmt.Errorf("newOverlayEntry failed: %v", err) + cleanupUpper(ctx, o.upper, name, werr) return nil, err } -- cgit v1.2.3 From 5b2396d244ed6283d928a72bdd4cc58d78ef3175 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 2 Apr 2020 17:06:19 -0700 Subject: Fix typo in TODO comments. PiperOrigin-RevId: 304508083 --- pkg/sentry/fs/proc/mounts.go | 3 ++- pkg/sentry/fsimpl/tmpfs/filesystem.go | 6 +++--- pkg/sentry/fsimpl/tmpfs/stat_test.go | 4 ++-- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 2 +- pkg/sentry/socket/netstack/netstack.go | 2 +- pkg/sentry/vfs/mount.go | 3 ++- test/syscalls/linux/socket_inet_loopback.cc | 2 +- 7 files changed, 12 insertions(+), 10 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go index 94deb553b..1fc9c703c 100644 --- a/pkg/sentry/fs/proc/mounts.go +++ b/pkg/sentry/fs/proc/mounts.go @@ -170,7 +170,8 @@ func superBlockOpts(mountPath string, msrc *fs.MountSource) string { // NOTE(b/147673608): If the mount is a cgroup, we also need to include // the cgroup name in the options. For now we just read that from the // path. - // TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we + // + // TODO(gvisor.dev/issue/190): Once gVisor has full cgroup support, we // should get this value from the cgroup itself, and not rely on the // path. if msrc.FilesystemType == "cgroup" { diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index e678ecc37..4cf27bf13 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -57,7 +57,7 @@ afterSymlink: } next := nextVFSD.Impl().(*dentry) if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO(gvisor.dev/issues/1197): Symlink traversals updates + // TODO(gvisor.dev/issue/1197): Symlink traversals updates // access time. if err := rp.HandleSymlink(symlink.target); err != nil { return nil, err @@ -515,7 +515,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa oldParent.inode.decLinksLocked() newParent.inode.incLinksLocked() } - // TODO(gvisor.dev/issues/1197): Update timestamps and parent directory + // TODO(gvisor.dev/issue/1197): Update timestamps and parent directory // sizes. vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD) return nil @@ -600,7 +600,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu if err != nil { return linux.Statfs{}, err } - // TODO(gvisor.dev/issues/1197): Actually implement statfs. + // TODO(gvisor.dev/issue/1197): Actually implement statfs. return linux.Statfs{}, syserror.ENOSYS } diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go index ebe035dee..3e02e7190 100644 --- a/pkg/sentry/fsimpl/tmpfs/stat_test.go +++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go @@ -29,7 +29,7 @@ func TestStatAfterCreate(t *testing.T) { mode := linux.FileMode(0644) // Run with different file types. - // TODO(gvisor.dev/issues/1197): Also test symlinks and sockets. + // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets. for _, typ := range []string{"file", "dir", "pipe"} { t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { var ( @@ -169,7 +169,7 @@ func TestSetStat(t *testing.T) { mode := linux.FileMode(0644) // Run with different file types. - // TODO(gvisor.dev/issues/1197): Also test symlinks and sockets. + // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets. for _, typ := range []string{"file", "dir", "pipe"} { t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { var ( diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 8bc8818c0..54da15849 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -315,7 +315,7 @@ func (i *inode) statTo(stat *linux.Statx) { stat.Atime = linux.NsecToStatxTimestamp(i.atime) stat.Ctime = linux.NsecToStatxTimestamp(i.ctime) stat.Mtime = linux.NsecToStatxTimestamp(i.mtime) - // TODO(gvisor.dev/issues/1197): Device number. + // TODO(gvisor.dev/issue/1197): Device number. switch impl := i.impl.(type) { case *regularFile: stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index f14c336b9..06a5b53bc 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -663,7 +663,7 @@ func (s *SocketOperations) checkFamily(family uint16, exact bool) *syserr.Error // This is a hack to work around the fact that both IPv4 and IPv6 ANY are // represented by the empty string. // -// TODO(gvisor.dev/issues/1556): remove this function. +// TODO(gvisor.dev/issue/1556): remove this function. func (s *SocketOperations) mapFamily(addr tcpip.FullAddress, family uint16) tcpip.FullAddress { if len(addr.Addr) == 0 && s.family == linux.AF_INET6 && family == linux.AF_INET { addr.Addr = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00" diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 7792eb1a0..1b8ecc415 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -835,7 +835,8 @@ func superBlockOpts(mountPath string, mnt *Mount) string { // NOTE(b/147673608): If the mount is a cgroup, we also need to include // the cgroup name in the options. For now we just read that from the // path. - // TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we + // + // TODO(gvisor.dev/issue/190): Once gVisor has full cgroup support, we // should get this value from the cgroup itself, and not rely on the // path. if mnt.fs.FilesystemType().Name() == "cgroup" { diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index 16888de2a..2ffc86382 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -234,7 +234,7 @@ TEST_P(DualStackSocketTest, AddressOperations) { } } -// TODO(gvisor.dev/issues/1556): uncomment V4MappedAny. +// TODO(gvisor.dev/issue/1556): uncomment V4MappedAny. INSTANTIATE_TEST_SUITE_P( All, DualStackSocketTest, ::testing::Combine( -- cgit v1.2.3 From 94b793262d3c54b4c32fed83d2bd121069680d15 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Wed, 25 Mar 2020 16:55:02 -0700 Subject: Fix all copy locks violations. This required minor restructuring of how system call tables were saved and restored, but it makes way more sense this way. Updates #2243 --- pkg/log/glog.go | 6 +++--- pkg/log/json.go | 2 +- pkg/log/json_k8s.go | 4 ++-- pkg/log/log.go | 2 +- pkg/log/log_test.go | 6 +++--- pkg/sentry/contexttest/contexttest.go | 4 ++-- pkg/sentry/fs/host/socket_test.go | 6 +++--- pkg/sentry/fs/proc/sys_net.go | 4 ++-- pkg/sentry/kernel/syscalls.go | 33 ++++++++++++++++---------------- pkg/sentry/kernel/syscalls_state.go | 36 ++++++++++++++++++++++++++--------- pkg/sentry/kernel/task_context.go | 2 +- pkg/sentry/kernel/time/time.go | 10 +++++----- pkg/state/state.go | 5 +---- runsc/boot/compat.go | 2 +- runsc/main.go | 6 +++--- tools/go_stateify/main.go | 2 +- tools/nogo.json | 13 ------------- 17 files changed, 72 insertions(+), 71 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/log/glog.go b/pkg/log/glog.go index b4f7bb5a4..f57c4427b 100644 --- a/pkg/log/glog.go +++ b/pkg/log/glog.go @@ -25,7 +25,7 @@ import ( // GoogleEmitter is a wrapper that emits logs in a format compatible with // package github.com/golang/glog. type GoogleEmitter struct { - Writer + *Writer } // pid is used for the threadid component of the header. @@ -46,7 +46,7 @@ var pid = os.Getpid() // line The line number // msg The user-supplied message // -func (g *GoogleEmitter) Emit(depth int, level Level, timestamp time.Time, format string, args ...interface{}) { +func (g GoogleEmitter) Emit(depth int, level Level, timestamp time.Time, format string, args ...interface{}) { // Log level. prefix := byte('?') switch level { @@ -81,5 +81,5 @@ func (g *GoogleEmitter) Emit(depth int, level Level, timestamp time.Time, format message := fmt.Sprintf(format, args...) // Emit the formatted result. - fmt.Fprintf(&g.Writer, "%c%02d%02d %02d:%02d:%02d.%06d % 7d %s:%d] %s\n", prefix, int(month), day, hour, minute, second, microsecond, pid, file, line, message) + fmt.Fprintf(g.Writer, "%c%02d%02d %02d:%02d:%02d.%06d % 7d %s:%d] %s\n", prefix, int(month), day, hour, minute, second, microsecond, pid, file, line, message) } diff --git a/pkg/log/json.go b/pkg/log/json.go index 0943db1cc..bdf9d691e 100644 --- a/pkg/log/json.go +++ b/pkg/log/json.go @@ -58,7 +58,7 @@ func (lv *Level) UnmarshalJSON(b []byte) error { // JSONEmitter logs messages in json format. type JSONEmitter struct { - Writer + *Writer } // Emit implements Emitter.Emit. diff --git a/pkg/log/json_k8s.go b/pkg/log/json_k8s.go index 6c6fc8b6f..5883e95e1 100644 --- a/pkg/log/json_k8s.go +++ b/pkg/log/json_k8s.go @@ -29,11 +29,11 @@ type k8sJSONLog struct { // K8sJSONEmitter logs messages in json format that is compatible with // Kubernetes fluent configuration. type K8sJSONEmitter struct { - Writer + *Writer } // Emit implements Emitter.Emit. -func (e *K8sJSONEmitter) Emit(_ int, level Level, timestamp time.Time, format string, v ...interface{}) { +func (e K8sJSONEmitter) Emit(_ int, level Level, timestamp time.Time, format string, v ...interface{}) { j := k8sJSONLog{ Log: fmt.Sprintf(format, v...), Level: level, diff --git a/pkg/log/log.go b/pkg/log/log.go index a794da1aa..37e0605ad 100644 --- a/pkg/log/log.go +++ b/pkg/log/log.go @@ -374,5 +374,5 @@ func CopyStandardLogTo(l Level) error { func init() { // Store the initial value for the log. - log.Store(&BasicLogger{Level: Info, Emitter: &GoogleEmitter{Writer{Next: os.Stderr}}}) + log.Store(&BasicLogger{Level: Info, Emitter: GoogleEmitter{&Writer{Next: os.Stderr}}}) } diff --git a/pkg/log/log_test.go b/pkg/log/log_test.go index 402cc29ae..9ff18559b 100644 --- a/pkg/log/log_test.go +++ b/pkg/log/log_test.go @@ -52,7 +52,7 @@ func TestDropMessages(t *testing.T) { t.Fatalf("Write should have failed") } - fmt.Printf("writer: %+v\n", w) + fmt.Printf("writer: %#v\n", &w) tw.fail = false if _, err := w.Write([]byte("line 2\n")); err != nil { @@ -76,7 +76,7 @@ func TestDropMessages(t *testing.T) { func TestCaller(t *testing.T) { tw := &testWriter{} - e := &GoogleEmitter{Writer: Writer{Next: tw}} + e := GoogleEmitter{Writer: &Writer{Next: tw}} bl := &BasicLogger{ Emitter: e, Level: Debug, @@ -94,7 +94,7 @@ func BenchmarkGoogleLogging(b *testing.B) { tw := &testWriter{ limit: 1, // Only record one message. } - e := &GoogleEmitter{Writer: Writer{Next: tw}} + e := GoogleEmitter{Writer: &Writer{Next: tw}} bl := &BasicLogger{ Emitter: e, Level: Debug, diff --git a/pkg/sentry/contexttest/contexttest.go b/pkg/sentry/contexttest/contexttest.go index 031fc64ec..8e5658c7a 100644 --- a/pkg/sentry/contexttest/contexttest.go +++ b/pkg/sentry/contexttest/contexttest.go @@ -97,7 +97,7 @@ type hostClock struct { } // Now implements ktime.Clock.Now. -func (hostClock) Now() ktime.Time { +func (*hostClock) Now() ktime.Time { return ktime.FromNanoseconds(time.Now().UnixNano()) } @@ -127,7 +127,7 @@ func (t *TestContext) Value(key interface{}) interface{} { case uniqueid.CtxInotifyCookie: return atomic.AddUint32(&lastInotifyCookie, 1) case ktime.CtxRealtimeClock: - return hostClock{} + return &hostClock{} default: if val, ok := t.otherValues[key]; ok { return val diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go index eb4afe520..affdbcacb 100644 --- a/pkg/sentry/fs/host/socket_test.go +++ b/pkg/sentry/fs/host/socket_test.go @@ -199,14 +199,14 @@ func TestListen(t *testing.T) { } func TestPasscred(t *testing.T) { - e := ConnectedEndpoint{} + e := &ConnectedEndpoint{} if got, want := e.Passcred(), false; got != want { t.Errorf("Got %#v.Passcred() = %t, want = %t", e, got, want) } } func TestGetLocalAddress(t *testing.T) { - e := ConnectedEndpoint{path: "foo"} + e := &ConnectedEndpoint{path: "foo"} want := tcpip.FullAddress{Addr: tcpip.Address("foo")} if got, err := e.GetLocalAddress(); err != nil || got != want { t.Errorf("Got %#v.GetLocalAddress() = %#v, %v, want = %#v, %v", e, got, err, want, nil) @@ -214,7 +214,7 @@ func TestGetLocalAddress(t *testing.T) { } func TestQueuedSize(t *testing.T) { - e := ConnectedEndpoint{} + e := &ConnectedEndpoint{} tests := []struct { name string f func() int64 diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index d4c4b533d..702fdd392 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -80,7 +80,7 @@ func newTCPMemInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack, dir } // Truncate implements fs.InodeOperations.Truncate. -func (tcpMemInode) Truncate(context.Context, *fs.Inode, int64) error { +func (*tcpMemInode) Truncate(context.Context, *fs.Inode, int64) error { return nil } @@ -196,7 +196,7 @@ func newTCPSackInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *f } // Truncate implements fs.InodeOperations.Truncate. -func (tcpSack) Truncate(context.Context, *fs.Inode, int64) error { +func (*tcpSack) Truncate(context.Context, *fs.Inode, int64) error { return nil } diff --git a/pkg/sentry/kernel/syscalls.go b/pkg/sentry/kernel/syscalls.go index 93c4fe969..c9a2321b8 100644 --- a/pkg/sentry/kernel/syscalls.go +++ b/pkg/sentry/kernel/syscalls.go @@ -218,56 +218,55 @@ type Stracer interface { SyscallExit(context interface{}, t *Task, sysno, rval uintptr, err error) } -// SyscallTable is a lookup table of system calls. Critically, a SyscallTable -// is *immutable*. In order to make supporting suspend and resume sane, they -// must be uniquely registered and may not change during operation. +// SyscallTable is a lookup table of system calls. // -// +stateify savable +// Note that a SyscallTable is not savable directly. Instead, they are saved as +// an OS/Arch pair and lookup happens again on restore. type SyscallTable struct { // OS is the operating system that this syscall table implements. - OS abi.OS `state:"wait"` + OS abi.OS // Arch is the architecture that this syscall table targets. - Arch arch.Arch `state:"wait"` + Arch arch.Arch // The OS version that this syscall table implements. - Version Version `state:"manual"` + Version Version // AuditNumber is a numeric constant that represents the syscall table. If // non-zero, auditNumber must be one of the AUDIT_ARCH_* values defined by // linux/audit.h. - AuditNumber uint32 `state:"manual"` + AuditNumber uint32 // Table is the collection of functions. - Table map[uintptr]Syscall `state:"manual"` + Table map[uintptr]Syscall // lookup is a fixed-size array that holds the syscalls (indexed by // their numbers). It is used for fast look ups. - lookup []SyscallFn `state:"manual"` + lookup []SyscallFn // Emulate is a collection of instruction addresses to emulate. The // keys are addresses, and the values are system call numbers. - Emulate map[usermem.Addr]uintptr `state:"manual"` + Emulate map[usermem.Addr]uintptr // The function to call in case of a missing system call. - Missing MissingFn `state:"manual"` + Missing MissingFn // Stracer traces this syscall table. - Stracer Stracer `state:"manual"` + Stracer Stracer // External is used to handle an external callback. - External func(*Kernel) `state:"manual"` + External func(*Kernel) // ExternalFilterBefore is called before External is called before the syscall is executed. // External is not called if it returns false. - ExternalFilterBefore func(*Task, uintptr, arch.SyscallArguments) bool `state:"manual"` + ExternalFilterBefore func(*Task, uintptr, arch.SyscallArguments) bool // ExternalFilterAfter is called before External is called after the syscall is executed. // External is not called if it returns false. - ExternalFilterAfter func(*Task, uintptr, arch.SyscallArguments) bool `state:"manual"` + ExternalFilterAfter func(*Task, uintptr, arch.SyscallArguments) bool // FeatureEnable stores the strace and one-shot enable bits. - FeatureEnable SyscallFlagsTable `state:"manual"` + FeatureEnable SyscallFlagsTable } // allSyscallTables contains all known tables. diff --git a/pkg/sentry/kernel/syscalls_state.go b/pkg/sentry/kernel/syscalls_state.go index 00358326b..90f890495 100644 --- a/pkg/sentry/kernel/syscalls_state.go +++ b/pkg/sentry/kernel/syscalls_state.go @@ -14,16 +14,34 @@ package kernel -import "fmt" +import ( + "fmt" -// afterLoad is invoked by stateify. -func (s *SyscallTable) afterLoad() { - otherTable, ok := LookupSyscallTable(s.OS, s.Arch) - if !ok { - // Couldn't find a reference? - panic(fmt.Sprintf("syscall table not found for OS %v Arch %v", s.OS, s.Arch)) + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/sentry/arch" +) + +// syscallTableInfo is used to reload the SyscallTable. +// +// +stateify savable +type syscallTableInfo struct { + OS abi.OS + Arch arch.Arch +} + +// saveSt saves the SyscallTable. +func (tc *TaskContext) saveSt() syscallTableInfo { + return syscallTableInfo{ + OS: tc.st.OS, + Arch: tc.st.Arch, } +} - // Copy the table. - *s = *otherTable +// loadSt loads the SyscallTable. +func (tc *TaskContext) loadSt(sti syscallTableInfo) { + st, ok := LookupSyscallTable(sti.OS, sti.Arch) + if !ok { + panic(fmt.Sprintf("syscall table not found for OS %v, Arch %v", sti.OS, sti.Arch)) + } + tc.st = st // Save the table reference. } diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go index 0158b1788..c115e8d1f 100644 --- a/pkg/sentry/kernel/task_context.go +++ b/pkg/sentry/kernel/task_context.go @@ -49,7 +49,7 @@ type TaskContext struct { fu *futex.Manager // st is the task's syscall table. - st *SyscallTable + st *SyscallTable `state:".(syscallTableInfo)"` } // release releases all resources held by the TaskContext. release is called by diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go index 706de83ef..e959700f2 100644 --- a/pkg/sentry/kernel/time/time.go +++ b/pkg/sentry/kernel/time/time.go @@ -245,7 +245,7 @@ type Clock interface { type WallRateClock struct{} // WallTimeUntil implements Clock.WallTimeUntil. -func (WallRateClock) WallTimeUntil(t, now Time) time.Duration { +func (*WallRateClock) WallTimeUntil(t, now Time) time.Duration { return t.Sub(now) } @@ -254,16 +254,16 @@ func (WallRateClock) WallTimeUntil(t, now Time) time.Duration { type NoClockEvents struct{} // Readiness implements waiter.Waitable.Readiness. -func (NoClockEvents) Readiness(mask waiter.EventMask) waiter.EventMask { +func (*NoClockEvents) Readiness(mask waiter.EventMask) waiter.EventMask { return 0 } // EventRegister implements waiter.Waitable.EventRegister. -func (NoClockEvents) EventRegister(e *waiter.Entry, mask waiter.EventMask) { +func (*NoClockEvents) EventRegister(e *waiter.Entry, mask waiter.EventMask) { } // EventUnregister implements waiter.Waitable.EventUnregister. -func (NoClockEvents) EventUnregister(e *waiter.Entry) { +func (*NoClockEvents) EventUnregister(e *waiter.Entry) { } // ClockEventsQueue implements waiter.Waitable by wrapping waiter.Queue and @@ -273,7 +273,7 @@ type ClockEventsQueue struct { } // Readiness implements waiter.Waitable.Readiness. -func (ClockEventsQueue) Readiness(mask waiter.EventMask) waiter.EventMask { +func (*ClockEventsQueue) Readiness(mask waiter.EventMask) waiter.EventMask { return 0 } diff --git a/pkg/state/state.go b/pkg/state/state.go index dbe507ab4..03ae2dbb0 100644 --- a/pkg/state/state.go +++ b/pkg/state/state.go @@ -241,10 +241,7 @@ func Register(name string, instance interface{}, fns Fns) { // // This function is used by the stateify tool. func IsZeroValue(val interface{}) bool { - if val == nil { - return true - } - return reflect.DeepEqual(val, reflect.Zero(reflect.TypeOf(val)).Interface()) + return val == nil || reflect.ValueOf(val).Elem().IsZero() } // step captures one encoding / decoding step. On each step, there is up to one diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index 8995d678e..b7cfb35bf 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -65,7 +65,7 @@ func newCompatEmitter(logFD int) (*compatEmitter, error) { if logFD > 0 { f := os.NewFile(uintptr(logFD), "user log file") - target := &log.MultiEmitter{c.sink, &log.K8sJSONEmitter{log.Writer{Next: f}}} + target := &log.MultiEmitter{c.sink, log.K8sJSONEmitter{&log.Writer{Next: f}}} c.sink = &log.BasicLogger{Level: log.Info, Emitter: target} } return c, nil diff --git a/runsc/main.go b/runsc/main.go index 62e184ec9..c1c78529c 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -342,11 +342,11 @@ func main() { func newEmitter(format string, logFile io.Writer) log.Emitter { switch format { case "text": - return &log.GoogleEmitter{log.Writer{Next: logFile}} + return log.GoogleEmitter{&log.Writer{Next: logFile}} case "json": - return &log.JSONEmitter{log.Writer{Next: logFile}} + return log.JSONEmitter{&log.Writer{Next: logFile}} case "json-k8s": - return &log.K8sJSONEmitter{log.Writer{Next: logFile}} + return log.K8sJSONEmitter{&log.Writer{Next: logFile}} } cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format) panic("unreachable") diff --git a/tools/go_stateify/main.go b/tools/go_stateify/main.go index 3437aa476..309ee9c21 100644 --- a/tools/go_stateify/main.go +++ b/tools/go_stateify/main.go @@ -206,7 +206,7 @@ func main() { initCalls = append(initCalls, fmt.Sprintf("%sRegister(\"%s.%s\", (*%s)(nil), state.Fns{Save: (*%s).save, Load: (*%s).load})", statePrefix, *fullPkg, name, name, name, name)) } emitZeroCheck := func(name string) { - fmt.Fprintf(outputFile, " if !%sIsZeroValue(x.%s) { m.Failf(\"%s is %%v, expected zero\", x.%s) }\n", statePrefix, name, name, name) + fmt.Fprintf(outputFile, " if !%sIsZeroValue(&x.%s) { m.Failf(\"%s is %%#v, expected zero\", &x.%s) }\n", statePrefix, name, name, name) } emitLoadValue := func(name, typName string) { fmt.Fprintf(outputFile, " m.LoadValue(\"%s\", new(%s), func(y interface{}) { x.load%s(y.(%s)) })\n", name, typName, camelCased(name), typName) diff --git a/tools/nogo.json b/tools/nogo.json index 83cb76b93..cc05ba027 100644 --- a/tools/nogo.json +++ b/tools/nogo.json @@ -9,19 +9,6 @@ "/external/": "allowed: not subject to unsafe naming rules" } }, - "copylocks": { - "exclude_files": { - ".*_state_autogen.go": "fix: m.Failf copies by value", - "/pkg/log/json.go": "fix: Emit passes lock by value: gvisor.dev/gvisor/pkg/log.JSONEmitter contains gvisor.dev/gvisor/pkg/log.Writer contains gvisor.dev/gvisor/pkg/sync.Mutex", - "/pkg/log/log_test.go": "fix: call of fmt.Printf copies lock value: gvisor.dev/gvisor/pkg/log.Writer contains gvisor.dev/gvisor/pkg/sync.Mutex", - "/pkg/sentry/fs/host/socket_test.go": "fix: call of t.Errorf copies lock value: gvisor.dev/gvisor/pkg/sentry/fs/host.ConnectedEndpoint contains gvisor.dev/gvisor/pkg/refs.AtomicRefCount contains gvisor.dev/gvisor/pkg/sync.Mutex", - "/pkg/sentry/fs/proc/sys_net.go": "fix: Truncate passes lock by value: gvisor.dev/gvisor/pkg/sentry/fs/proc.tcpMemInode contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.SimpleFileInode contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.InodeSimpleAttributes contains gvisor.dev/gvisor/pkg/sync.RWMutex", - "/pkg/sentry/fs/proc/sys_net.go": "fix: Truncate passes lock by value: gvisor.dev/gvisor/pkg/sentry/fs/proc.tcpSack contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.SimpleFileInode contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.InodeSimpleAttributes contains gvisor.dev/gvisor/pkg/sync.RWMutex", - "/pkg/sentry/fs/tty/slave.go": "fix: Truncate passes lock by value: gvisor.dev/gvisor/pkg/sentry/fs/tty.slaveInodeOperations contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.SimpleFileInode contains gvisor.dev/gvisor/pkg/sentry/fs/fsutil.InodeSimpleAttributes contains gvisor.dev/gvisor/pkg/sync.RWMutex", - "/pkg/sentry/kernel/time/time.go": "fix: Readiness passes lock by value: gvisor.dev/gvisor/pkg/sentry/kernel/time.ClockEventsQueue contains gvisor.dev/gvisor/pkg/waiter.Queue contains gvisor.dev/gvisor/pkg/sync.RWMutex", - "/pkg/sentry/kernel/syscalls_state.go": "fix: assignment copies lock value to *s: gvisor.dev/gvisor/pkg/sentry/kernel.SyscallTable contains gvisor.dev/gvisor/pkg/sentry/kernel.SyscallFlagsTable contains gvisor.dev/gvisor/pkg/sync.Mutex" - } - }, "lostcancel": { "exclude_files": { "/pkg/tcpip/network/arp/arp_test.go": "fix: the cancel function returned by context.WithTimeout should be called, not discarded, to avoid a context leak", -- cgit v1.2.3 From 928a7c60b8f02811e9c0fcbed0077efd55471cc4 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Wed, 25 Mar 2020 17:15:05 -0700 Subject: Fix all printf formatting errors. Updates #2243 --- pkg/eventchannel/event_test.go | 4 ++-- pkg/segment/test/segment_test.go | 2 +- pkg/sentry/fs/fdpipe/pipe_test.go | 4 ++-- pkg/sentry/fsimpl/tmpfs/stat_test.go | 8 ++++---- pkg/tcpip/header/eth_test.go | 2 +- pkg/tcpip/header/ndp_test.go | 2 +- pkg/tcpip/link/fdbased/endpoint.go | 2 +- pkg/tcpip/stack/ndp_test.go | 12 ++++++------ pkg/tcpip/stack/stack_test.go | 32 ++++++++++++++++---------------- pkg/tcpip/tcpip_test.go | 2 +- pkg/tcpip/transport/tcp/tcp_test.go | 4 ++-- pkg/tcpip/transport/udp/udp_test.go | 2 +- runsc/container/test_app/test_app.go | 2 +- test/root/cgroup_test.go | 4 ++-- test/runtimes/blacklist_test.go | 2 +- test/runtimes/runner.go | 2 +- tools/nogo.json | 31 ------------------------------- 17 files changed, 43 insertions(+), 74 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/eventchannel/event_test.go b/pkg/eventchannel/event_test.go index 7f41b4a27..43750360b 100644 --- a/pkg/eventchannel/event_test.go +++ b/pkg/eventchannel/event_test.go @@ -78,7 +78,7 @@ func TestMultiEmitter(t *testing.T) { for _, name := range names { m := testMessage{name: name} if _, err := me.Emit(m); err != nil { - t.Fatal("me.Emit(%v) failed: %v", m, err) + t.Fatalf("me.Emit(%v) failed: %v", m, err) } } @@ -96,7 +96,7 @@ func TestMultiEmitter(t *testing.T) { // Close multiEmitter. if err := me.Close(); err != nil { - t.Fatal("me.Close() failed: %v", err) + t.Fatalf("me.Close() failed: %v", err) } // All testEmitters should be closed. diff --git a/pkg/segment/test/segment_test.go b/pkg/segment/test/segment_test.go index f19a005f3..97b16c158 100644 --- a/pkg/segment/test/segment_test.go +++ b/pkg/segment/test/segment_test.go @@ -63,7 +63,7 @@ func checkSet(s *Set, expectedSegments int) error { return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) } if got, want := seg.Value(), seg.Start()+valueOffset; got != want { - return fmt.Errorf("segment %d has key %d, value %d (expected %d)", nrSegments, seg.Start, got, want) + return fmt.Errorf("segment %d has key %d, value %d (expected %d)", nrSegments, seg.Start(), got, want) } prev = next havePrev = true diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go index 5aff0cc95..a0082ecca 100644 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_test.go @@ -119,7 +119,7 @@ func TestNewPipe(t *testing.T) { continue } if flags := p.flags; test.flags != flags { - t.Errorf("%s: got file flags %s, want %s", test.desc, flags, test.flags) + t.Errorf("%s: got file flags %v, want %v", test.desc, flags, test.flags) continue } if len(test.readAheadBuffer) != len(p.readAheadBuffer) { @@ -136,7 +136,7 @@ func TestNewPipe(t *testing.T) { continue } if !fdnotifier.HasFD(int32(f.FD())) { - t.Errorf("%s: pipe fd %d is not registered for events", test.desc, f.FD) + t.Errorf("%s: pipe fd %d is not registered for events", test.desc, f.FD()) } } } diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go index 3e02e7190..d4f59ee5b 100644 --- a/pkg/sentry/fsimpl/tmpfs/stat_test.go +++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go @@ -140,7 +140,7 @@ func TestSetStatAtime(t *testing.T) { Mask: 0, Atime: linux.NsecToStatxTimestamp(100), }}); err != nil { - t.Errorf("SetStat atime without mask failed: %v") + t.Errorf("SetStat atime without mask failed: %v", err) } // Atime should be unchanged. if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { @@ -155,7 +155,7 @@ func TestSetStatAtime(t *testing.T) { Atime: linux.NsecToStatxTimestamp(100), } if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil { - t.Errorf("SetStat atime with mask failed: %v") + t.Errorf("SetStat atime with mask failed: %v", err) } if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { t.Errorf("Stat got error: %v", err) @@ -205,7 +205,7 @@ func TestSetStat(t *testing.T) { Mask: 0, Atime: linux.NsecToStatxTimestamp(100), }}); err != nil { - t.Errorf("SetStat atime without mask failed: %v") + t.Errorf("SetStat atime without mask failed: %v", err) } // Atime should be unchanged. if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { @@ -220,7 +220,7 @@ func TestSetStat(t *testing.T) { Atime: linux.NsecToStatxTimestamp(100), } if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil { - t.Errorf("SetStat atime with mask failed: %v") + t.Errorf("SetStat atime with mask failed: %v", err) } if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil { t.Errorf("Stat got error: %v", err) diff --git a/pkg/tcpip/header/eth_test.go b/pkg/tcpip/header/eth_test.go index 7a0014ad9..14413f2ce 100644 --- a/pkg/tcpip/header/eth_test.go +++ b/pkg/tcpip/header/eth_test.go @@ -88,7 +88,7 @@ func TestEthernetAddressFromMulticastIPv4Address(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { if got := EthernetAddressFromMulticastIPv4Address(test.addr); got != test.expectedLinkAddr { - t.Fatalf("got EthernetAddressFromMulticastIPv4Address(%s) = %s, want = %s", got, test.expectedLinkAddr) + t.Fatalf("got EthernetAddressFromMulticastIPv4Address(%s) = %s, want = %s", test.addr, got, test.expectedLinkAddr) } }) } diff --git a/pkg/tcpip/header/ndp_test.go b/pkg/tcpip/header/ndp_test.go index 1cb9f5dc8..969f8f1e8 100644 --- a/pkg/tcpip/header/ndp_test.go +++ b/pkg/tcpip/header/ndp_test.go @@ -115,7 +115,7 @@ func TestNDPNeighborAdvert(t *testing.T) { // Make sure flags got updated in the backing buffer. if got := b[ndpNAFlagsOffset]; got != 64 { - t.Errorf("got flags byte = %d, want = 64") + t.Errorf("got flags byte = %d, want = 64", got) } } diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 7198742b7..b857ce9d0 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -91,7 +91,7 @@ func (p PacketDispatchMode) String() string { case PacketMMap: return "PacketMMap" default: - return fmt.Sprintf("unknown packet dispatch mode %v", p) + return fmt.Sprintf("unknown packet dispatch mode '%d'", p) } } diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go index 27dc8baf9..bc822e3b1 100644 --- a/pkg/tcpip/stack/ndp_test.go +++ b/pkg/tcpip/stack/ndp_test.go @@ -1959,7 +1959,7 @@ func TestAutoGenAddrDeprecateFromPI(t *testing.T) { // addr2 is deprecated but if explicitly requested, it should be used. fullAddr2 := tcpip.FullAddress{Addr: addr2.Address, NIC: nicID} if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr2.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) } // Another PI w/ 0 preferred lifetime should not result in a deprecation @@ -1972,7 +1972,7 @@ func TestAutoGenAddrDeprecateFromPI(t *testing.T) { } expectPrimaryAddr(addr1) if got := addrForNewConnectionWithAddr(t, s, fullAddr2); got != addr2.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr2.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr2, got, addr2.Address) } // Refresh lifetimes of addr generated from prefix2. @@ -2084,7 +2084,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { // addr1 is deprecated but if explicitly requested, it should be used. fullAddr1 := tcpip.FullAddress{Addr: addr1.Address, NIC: nicID} if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr1.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) } // Refresh valid lifetime for addr of prefix1, w/ 0 preferred lifetime to make @@ -2097,7 +2097,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { } expectPrimaryAddr(addr2) if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr1.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) } // Refresh lifetimes for addr of prefix1. @@ -2121,7 +2121,7 @@ func TestAutoGenAddrTimerDeprecation(t *testing.T) { // addr2 should be the primary endpoint now since it is not deprecated. expectPrimaryAddr(addr2) if got := addrForNewConnectionWithAddr(t, s, fullAddr1); got != addr1.Address { - t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", got, addr1.Address) + t.Errorf("got addrForNewConnectionWithAddr(_, _, %+v) = %s, want = %s", fullAddr1, got, addr1.Address) } // Wait for addr of prefix1 to be invalidated. @@ -2564,7 +2564,7 @@ func TestAutoGenAddrAfterRemoval(t *testing.T) { AddressWithPrefix: addr2, } if err := s.AddProtocolAddressWithOptions(nicID, protoAddr2, stack.FirstPrimaryEndpoint); err != nil { - t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d, %s) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err) + t.Fatalf("AddProtocolAddressWithOptions(%d, %+v, %d) = %s", nicID, protoAddr2, stack.FirstPrimaryEndpoint, err) } // addr2 should be more preferred now since it is at the front of the primary // list. diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go index 3f8a2a095..c7634ceb1 100644 --- a/pkg/tcpip/stack/stack_test.go +++ b/pkg/tcpip/stack/stack_test.go @@ -1445,19 +1445,19 @@ func TestOutgoingBroadcastWithEmptyRouteTable(t *testing.T) { protoAddr := tcpip.ProtocolAddress{Protocol: fakeNetNumber, AddressWithPrefix: tcpip.AddressWithPrefix{header.IPv4Any, 0}} if err := s.AddProtocolAddress(1, protoAddr); err != nil { - t.Fatalf("AddProtocolAddress(1, %s) failed: %s", protoAddr, err) + t.Fatalf("AddProtocolAddress(1, %v) failed: %v", protoAddr, err) } r, err := s.FindRoute(1, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */) if err != nil { - t.Fatalf("FindRoute(1, %s, %s, %d) failed: %s", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) + t.Fatalf("FindRoute(1, %v, %v, %d) failed: %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) } if err := verifyRoute(r, stack.Route{LocalAddress: header.IPv4Any, RemoteAddress: header.IPv4Broadcast}); err != nil { - t.Errorf("FindRoute(1, %s, %s, %d) returned unexpected Route: %s)", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) + t.Errorf("FindRoute(1, %v, %v, %d) returned unexpected Route: %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err) } // If the NIC doesn't exist, it won't work. if _, err := s.FindRoute(2, header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */); err != tcpip.ErrNetworkUnreachable { - t.Fatalf("got FindRoute(2, %s, %s, %d) = %s want = %s", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, tcpip.ErrNetworkUnreachable) + t.Fatalf("got FindRoute(2, %v, %v, %d) = %v want = %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err, tcpip.ErrNetworkUnreachable) } } @@ -1483,12 +1483,12 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) { } nic1ProtoAddr := tcpip.ProtocolAddress{fakeNetNumber, nic1Addr} if err := s.AddProtocolAddress(1, nic1ProtoAddr); err != nil { - t.Fatalf("AddProtocolAddress(1, %s) failed: %s", nic1ProtoAddr, err) + t.Fatalf("AddProtocolAddress(1, %v) failed: %v", nic1ProtoAddr, err) } nic2ProtoAddr := tcpip.ProtocolAddress{fakeNetNumber, nic2Addr} if err := s.AddProtocolAddress(2, nic2ProtoAddr); err != nil { - t.Fatalf("AddAddress(2, %s) failed: %s", nic2ProtoAddr, err) + t.Fatalf("AddAddress(2, %v) failed: %v", nic2ProtoAddr, err) } // Set the initial route table. @@ -1503,10 +1503,10 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) { // When an interface is given, the route for a broadcast goes through it. r, err := s.FindRoute(1, nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, false /* multicastLoop */) if err != nil { - t.Fatalf("FindRoute(1, %s, %s, %d) failed: %s", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) + t.Fatalf("FindRoute(1, %v, %v, %d) failed: %v", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) } if err := verifyRoute(r, stack.Route{LocalAddress: nic1Addr.Address, RemoteAddress: header.IPv4Broadcast}); err != nil { - t.Errorf("FindRoute(1, %s, %s, %d) returned unexpected Route: %s)", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) + t.Errorf("FindRoute(1, %v, %v, %d) returned unexpected Route: %v", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err) } // When an interface is not given, it consults the route table. @@ -2399,7 +2399,7 @@ func TestNICContextPreservation(t *testing.T) { t.Fatalf("got nicinfos[%d] = _, %t, want _, true; nicinfos = %+v", id, ok, nicinfos) } if got, want := nicinfo.Context == test.want, true; got != want { - t.Fatal("got nicinfo.Context == ctx = %t, want %t; nicinfo.Context = %p, ctx = %p", got, want, nicinfo.Context, test.want) + t.Fatalf("got nicinfo.Context == ctx = %t, want %t; nicinfo.Context = %p, ctx = %p", got, want, nicinfo.Context, test.want) } }) } @@ -2768,7 +2768,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { { subnet, err := tcpip.NewSubnet("\x00", "\x00") if err != nil { - t.Fatalf("NewSubnet failed:", err) + t.Fatalf("NewSubnet failed: %v", err) } s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}}) } @@ -2782,11 +2782,11 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // permanentExpired kind. r, err := s.FindRoute(1, "\x01", "\x02", fakeNetNumber, false) if err != nil { - t.Fatal("FindRoute failed:", err) + t.Fatalf("FindRoute failed: %v", err) } defer r.Release() if err := s.RemoveAddress(1, "\x01"); err != nil { - t.Fatalf("RemoveAddress failed:", err) + t.Fatalf("RemoveAddress failed: %v", err) } // @@ -2798,7 +2798,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // Add some other address with peb set to // FirstPrimaryEndpoint. if err := s.AddAddressWithOptions(1, fakeNetNumber, "\x03", stack.FirstPrimaryEndpoint); err != nil { - t.Fatal("AddAddressWithOptions failed:", err) + t.Fatalf("AddAddressWithOptions failed: %v", err) } @@ -2806,7 +2806,7 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // make sure the new peb was respected. // (The address should just be promoted now). if err := s.AddAddressWithOptions(1, fakeNetNumber, "\x01", ps); err != nil { - t.Fatal("AddAddressWithOptions failed:", err) + t.Fatalf("AddAddressWithOptions failed: %v", err) } var primaryAddrs []tcpip.Address for _, pa := range s.NICInfo()[1].ProtocolAddresses { @@ -2839,11 +2839,11 @@ func TestNewPEBOnPromotionToPermanent(t *testing.T) { // GetMainNICAddress; else, our original address // should be returned. if err := s.RemoveAddress(1, "\x03"); err != nil { - t.Fatalf("RemoveAddress failed:", err) + t.Fatalf("RemoveAddress failed: %v", err) } addr, err = s.GetMainNICAddress(1, fakeNetNumber) if err != nil { - t.Fatal("s.GetMainNICAddress failed:", err) + t.Fatalf("s.GetMainNICAddress failed: %v", err) } if ps == stack.NeverPrimaryEndpoint { if want := (tcpip.AddressWithPrefix{}); addr != want { diff --git a/pkg/tcpip/tcpip_test.go b/pkg/tcpip/tcpip_test.go index 8c0aacffa..1c8e2bc34 100644 --- a/pkg/tcpip/tcpip_test.go +++ b/pkg/tcpip/tcpip_test.go @@ -218,7 +218,7 @@ func TestAddressWithPrefixSubnet(t *testing.T) { gotSubnet := ap.Subnet() wantSubnet, err := NewSubnet(tt.subnetAddr, tt.subnetMask) if err != nil { - t.Error("NewSubnet(%q, %q) failed: %s", tt.subnetAddr, tt.subnetMask, err) + t.Errorf("NewSubnet(%q, %q) failed: %s", tt.subnetAddr, tt.subnetMask, err) continue } if gotSubnet != wantSubnet { diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index ce3df7478..8a87fa0a8 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -284,7 +284,7 @@ func TestTCPResetSentForACKWhenNotUsingSynCookies(t *testing.T) { // are released instantly on Close. tcpTW := tcpip.TCPTimeWaitTimeoutOption(1 * time.Millisecond) if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpTW); err != nil { - t.Fatalf("e.stack.SetTransportProtocolOption(%d, %s) = %s", tcp.ProtocolNumber, tcpTW, err) + t.Fatalf("e.stack.SetTransportProtocolOption(%d, %v) = %v", tcp.ProtocolNumber, tcpTW, err) } c.EP.Close() @@ -5609,7 +5609,7 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) { return } if w := tcp.WindowSize(); w == 0 || w > uint16(wantRcvWnd) { - t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w, wantRcvWnd) + t.Errorf("expected a non-zero window: got %d, want <= wantRcvWnd", w) } }, )) diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go index 0905726c1..b3fe557ca 100644 --- a/pkg/tcpip/transport/udp/udp_test.go +++ b/pkg/tcpip/transport/udp/udp_test.go @@ -607,7 +607,7 @@ func testReadInternal(c *testContext, flow testFlow, packetShouldBeDropped, expe // Check the peer address. h := flow.header4Tuple(incoming) if addr.Addr != h.srcAddr.Addr { - c.t.Fatalf("unexpected remote address: got %s, want %s", addr.Addr, h.srcAddr) + c.t.Fatalf("unexpected remote address: got %s, want %v", addr.Addr, h.srcAddr) } // Check the payload. diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go index 01c47c79f..5f1c4b7d6 100644 --- a/runsc/container/test_app/test_app.go +++ b/runsc/container/test_app/test_app.go @@ -96,7 +96,7 @@ func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) listener, err := net.Listen("unix", c.socketPath) if err != nil { - log.Fatal("error listening on socket %q:", c.socketPath, err) + log.Fatalf("error listening on socket %q: %v", c.socketPath, err) } go server(listener, outputFile) diff --git a/test/root/cgroup_test.go b/test/root/cgroup_test.go index 4038661cb..679342def 100644 --- a/test/root/cgroup_test.go +++ b/test/root/cgroup_test.go @@ -53,7 +53,7 @@ func verifyPid(pid int, path string) error { if scanner.Err() != nil { return scanner.Err() } - return fmt.Errorf("got: %s, want: %d", gots, pid) + return fmt.Errorf("got: %v, want: %d", gots, pid) } // TestCgroup sets cgroup options and checks that cgroup was properly configured. @@ -106,7 +106,7 @@ func TestMemCGroup(t *testing.T) { time.Sleep(100 * time.Millisecond) } - t.Fatalf("%vMB is less than %vMB: %v", memUsage>>20, allocMemSize>>20) + t.Fatalf("%vMB is less than %vMB", memUsage>>20, allocMemSize>>20) } // TestCgroup sets cgroup options and checks that cgroup was properly configured. diff --git a/test/runtimes/blacklist_test.go b/test/runtimes/blacklist_test.go index 52f49b984..0ff69ab18 100644 --- a/test/runtimes/blacklist_test.go +++ b/test/runtimes/blacklist_test.go @@ -32,6 +32,6 @@ func TestBlacklists(t *testing.T) { t.Fatalf("error parsing blacklist: %v", err) } if *blacklistFile != "" && len(bl) == 0 { - t.Errorf("got empty blacklist for file %q", blacklistFile) + t.Errorf("got empty blacklist for file %q", *blacklistFile) } } diff --git a/test/runtimes/runner.go b/test/runtimes/runner.go index ddb890dbc..3c98f4570 100644 --- a/test/runtimes/runner.go +++ b/test/runtimes/runner.go @@ -114,7 +114,7 @@ func getTests(d dockerutil.Docker, blacklist map[string]struct{}) ([]testing.Int F: func(t *testing.T) { // Is the test blacklisted? if _, ok := blacklist[tc]; ok { - t.Skip("SKIP: blacklisted test %q", tc) + t.Skipf("SKIP: blacklisted test %q", tc) } var ( diff --git a/tools/nogo.json b/tools/nogo.json index cc05ba027..f69999e50 100644 --- a/tools/nogo.json +++ b/tools/nogo.json @@ -27,37 +27,6 @@ "/external/io_opencensus_go/tag/map_codec.go": "allowed: false positive" } }, - "printf": { - "exclude_files": { - ".*_abi_autogen_test.go": "fix: Sprintf format has insufficient args", - "/pkg/segment/test/segment_test.go": "fix: Errorf format %d arg seg.Start is a func value, not called", - "/pkg/tcpip/tcpip_test.go": "fix: Error call has possible formatting directive %q", - "/pkg/tcpip/header/eth_test.go": "fix: Fatalf format %s reads arg #3, but call has 2 args", - "/pkg/tcpip/header/ndp_test.go": "fix: Errorf format %d reads arg #1, but call has 0 args", - "/pkg/eventchannel/event_test.go": "fix: Fatal call has possible formatting directive %v", - "/pkg/tcpip/stack/ndp.go": "fix: Fatalf format %s has arg protocolAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/sentry/fs/fdpipe/pipe_test.go": "fix: Errorf format %s has arg flags of wrong type gvisor.dev/gvisor/pkg/sentry/fs.FileFlags", - "/pkg/sentry/fs/fdpipe/pipe_test.go": "fix: Errorf format %d arg f.FD is a func value, not called", - "/pkg/tcpip/link/fdbased/endpoint.go": "fix: Sprintf format %v with arg p causes recursive String method call", - "/pkg/tcpip/transport/udp/udp_test.go": "fix: Fatalf format %s has arg h.srcAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.FullAddress", - "/pkg/tcpip/transport/tcp/tcp_test.go": "fix: Fatalf format %s has arg tcpTW of wrong type gvisor.dev/gvisor/pkg/tcpip.TCPTimeWaitTimeoutOption", - "/pkg/tcpip/transport/tcp/tcp_test.go": "fix: Errorf call needs 1 arg but has 2 args", - "/pkg/tcpip/stack/ndp_test.go": "fix: Errorf format %s reads arg #3, but call has 2 args", - "/pkg/tcpip/stack/ndp_test.go": "fix: Fatalf format %s reads arg #5, but call has 4 args", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf format %s has arg protoAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf format %s has arg nic1ProtoAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf format %s has arg nic2ProtoAddr of wrong type gvisor.dev/gvisor/pkg/tcpip.ProtocolAddress", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatal call has possible formatting directive %t", - "/pkg/tcpip/stack/stack_test.go": "fix: Fatalf call has arguments but no formatting directives", - "/pkg/tcpip/link/fdbased/endpoint.go": "fix: Sprintf format %v with arg p causes recursive String method call", - "/pkg/sentry/fsimpl/tmpfs/stat_test.go": "fix: Errorf format %v reads arg #1, but call has 0 args", - "/runsc/container/test_app/test_app.go": "fix: Fatal call has possible formatting directive %q", - "/test/root/cgroup_test.go": "fix: Errorf format %s has arg gots of wrong type []int", - "/test/root/cgroup_test.go": "fix: Fatalf format %v reads arg #3, but call has 2 args", - "/test/runtimes/runner.go": "fix: Skip call has possible formatting directive %q", - "/test/runtimes/blacklist_test.go": "fix: Errorf format %q has arg blacklistFile of wrong type *string" - } - }, "structtag": { "exclude_files": { "/external/": "allowed: may use arbitrary tags" -- cgit v1.2.3 From b30130567d81157e39b692e0116f86015f0bcc71 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 8 Apr 2020 13:33:44 -0700 Subject: Enable SubprocessExited and SubprocessZombie for gVisor Updates #164 PiperOrigin-RevId: 305544029 --- pkg/sentry/fs/proc/task.go | 28 ++++++++++++++++-- pkg/sentry/fsimpl/proc/task.go | 16 ----------- pkg/sentry/fsimpl/proc/task_files.go | 56 ++++++++++++++++++++++++++++++++++-- test/syscalls/linux/proc.cc | 31 ++++++++------------ 4 files changed, 90 insertions(+), 41 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index d6c5dd2c1..4d42eac83 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -57,6 +57,16 @@ func getTaskMM(t *kernel.Task) (*mm.MemoryManager, error) { return m, nil } +func checkTaskState(t *kernel.Task) error { + switch t.ExitState() { + case kernel.TaskExitZombie: + return syserror.EACCES + case kernel.TaskExitDead: + return syserror.ESRCH + } + return nil +} + // taskDir represents a task-level directory. // // +stateify savable @@ -254,11 +264,12 @@ func newExe(t *kernel.Task, msrc *fs.MountSource) *fs.Inode { } func (e *exe) executable() (file fsbridge.File, err error) { + if err := checkTaskState(e.t); err != nil { + return nil, err + } e.t.WithMuLocked(func(t *kernel.Task) { mm := t.MemoryManager() if mm == nil { - // TODO(b/34851096): Check shouldn't allow Readlink once the - // Task is zombied. err = syserror.EACCES return } @@ -268,7 +279,7 @@ func (e *exe) executable() (file fsbridge.File, err error) { // (with locks held). file = mm.Executable() if file == nil { - err = syserror.ENOENT + err = syserror.ESRCH } }) return @@ -313,11 +324,22 @@ func newNamespaceSymlink(t *kernel.Task, msrc *fs.MountSource, name string) *fs. return newProcInode(t, n, msrc, fs.Symlink, t) } +// Readlink reads the symlink value. +func (n *namespaceSymlink) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { + if err := checkTaskState(n.t); err != nil { + return "", err + } + return n.Symlink.Readlink(ctx, inode) +} + // Getlink implements fs.InodeOperations.Getlink. func (n *namespaceSymlink) Getlink(ctx context.Context, inode *fs.Inode) (*fs.Dirent, error) { if !kernel.ContextCanTrace(ctx, n.t, false) { return nil, syserror.EACCES } + if err := checkTaskState(n.t); err != nil { + return nil, err + } // Create a new regular file to fake the namespace file. iops := fsutil.NewNoReadWriteFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0777), linux.PROC_SUPER_MAGIC) diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index aee2a4392..888afc0fd 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -214,22 +214,6 @@ func newIO(t *kernel.Task, isThreadGroup bool) *ioData { return &ioData{ioUsage: t} } -func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry { - // Namespace symlinks should contain the namespace name and the inode number - // for the namespace instance, so for example user:[123456]. We currently fake - // the inode number by sticking the symlink inode in its place. - target := fmt.Sprintf("%s:[%d]", ns, ino) - - inode := &kernfs.StaticSymlink{} - // Note: credentials are overridden by taskOwnedInode. - inode.Init(task.Credentials(), ino, target) - - taskInode := &taskOwnedInode{Inode: inode, owner: task} - d := &kernfs.Dentry{} - d.Init(taskInode) - return d -} - // newCgroupData creates inode that shows cgroup information. // From man 7 cgroups: "For each cgroup hierarchy of which the process is a // member, there is one entry containing three colon-separated fields: diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 88ea6a6d8..2c6f8bdfc 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -64,6 +64,16 @@ func getMMIncRef(task *kernel.Task) (*mm.MemoryManager, error) { return m, nil } +func checkTaskState(t *kernel.Task) error { + switch t.ExitState() { + case kernel.TaskExitZombie: + return syserror.EACCES + case kernel.TaskExitDead: + return syserror.ESRCH + } + return nil +} + type bufferWriter struct { buf *bytes.Buffer } @@ -628,11 +638,13 @@ func (s *exeSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, er } func (s *exeSymlink) executable() (file fsbridge.File, err error) { + if err := checkTaskState(s.task); err != nil { + return nil, err + } + s.task.WithMuLocked(func(t *kernel.Task) { mm := t.MemoryManager() if mm == nil { - // TODO(b/34851096): Check shouldn't allow Readlink once the - // Task is zombied. err = syserror.EACCES return } @@ -642,7 +654,7 @@ func (s *exeSymlink) executable() (file fsbridge.File, err error) { // (with locks held). file = mm.Executable() if file == nil { - err = syserror.ENOENT + err = syserror.ESRCH } }) return @@ -709,3 +721,41 @@ func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error { i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf) return nil } + +type namespaceSymlink struct { + kernfs.StaticSymlink + + task *kernel.Task +} + +func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry { + // Namespace symlinks should contain the namespace name and the inode number + // for the namespace instance, so for example user:[123456]. We currently fake + // the inode number by sticking the symlink inode in its place. + target := fmt.Sprintf("%s:[%d]", ns, ino) + + inode := &namespaceSymlink{task: task} + // Note: credentials are overridden by taskOwnedInode. + inode.Init(task.Credentials(), ino, target) + + taskInode := &taskOwnedInode{Inode: inode, owner: task} + d := &kernfs.Dentry{} + d.Init(taskInode) + return d +} + +// Readlink implements Inode. +func (s *namespaceSymlink) Readlink(ctx context.Context) (string, error) { + if err := checkTaskState(s.task); err != nil { + return "", err + } + return s.StaticSymlink.Readlink(ctx) +} + +// Getlink implements Inode.Getlink. +func (s *namespaceSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) { + if err := checkTaskState(s.task); err != nil { + return vfs.VirtualDentry{}, "", err + } + return s.StaticSymlink.Getlink(ctx) +} diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc index 5a70f6c3b..da98e1f66 100644 --- a/test/syscalls/linux/proc.cc +++ b/test/syscalls/linux/proc.cc @@ -1326,8 +1326,6 @@ TEST(ProcPidSymlink, SubprocessRunning) { SyscallSucceedsWithValue(sizeof(buf))); } -// FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux -// on proc files. TEST(ProcPidSymlink, SubprocessZombied) { ASSERT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, false)); ASSERT_NO_ERRNO(SetCapability(CAP_DAC_READ_SEARCH, false)); @@ -1337,7 +1335,7 @@ TEST(ProcPidSymlink, SubprocessZombied) { int want = EACCES; if (!IsRunningOnGvisor()) { auto version = ASSERT_NO_ERRNO_AND_VALUE(GetKernelVersion()); - if (version.major == 4 && version.minor > 3) { + if (version.major > 4 || (version.major == 4 && version.minor > 3)) { want = ENOENT; } } @@ -1350,30 +1348,25 @@ TEST(ProcPidSymlink, SubprocessZombied) { SyscallFailsWithErrno(want)); } - // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux - // on proc files. + // FIXME(gvisor.dev/issue/164): Inconsistent behavior between linux on proc + // files. // // ~4.3: Syscall fails with EACCES. - // 4.17 & gVisor: Syscall succeeds and returns 1. + // 4.17: Syscall succeeds and returns 1. // - // EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), - // SyscallFailsWithErrno(EACCES)); + if (!IsRunningOnGvisor()) { + return; + } - // FIXME(gvisor.dev/issue/164): Inconsistent behavior between gVisor and linux - // on proc files. - // - // ~4.3: Syscall fails with EACCES. - // 4.17 & gVisor: Syscall succeeds and returns 1. - // - // EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), - // SyscallFailsWithErrno(EACCES)); + EXPECT_THAT(ReadlinkWhileZombied("ns/pid", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); + + EXPECT_THAT(ReadlinkWhileZombied("ns/user", buf, sizeof(buf)), + SyscallFailsWithErrno(want)); } // Test whether /proc/PID/ symlinks can be read for an exited process. TEST(ProcPidSymlink, SubprocessExited) { - // FIXME(gvisor.dev/issue/164): These all succeed on gVisor. - SKIP_IF(IsRunningOnGvisor()); - char buf[1]; EXPECT_THAT(ReadlinkWhileExited("exe", buf, sizeof(buf)), -- cgit v1.2.3 From 357f136e42de81b033b65b7f39a4a555275a17e3 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 8 Apr 2020 14:38:09 -0700 Subject: Handle utimes correctly for shared gofer filesystems. Determine system time from within the sentry rather than relying on the remote filesystem to prevent inconsistencies. Resolve related TODOs; the time discrepancies in question don't exist anymore. PiperOrigin-RevId: 305557099 --- pkg/sentry/fs/gofer/util.go | 16 ++++++++++++++-- test/syscalls/linux/utimes.cc | 18 +----------------- 2 files changed, 15 insertions(+), 19 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/util.go b/pkg/sentry/fs/gofer/util.go index 2d8d3a2ea..47a6c69bf 100644 --- a/pkg/sentry/fs/gofer/util.go +++ b/pkg/sentry/fs/gofer/util.go @@ -20,17 +20,29 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/fs" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" ) func utimes(ctx context.Context, file contextFile, ts fs.TimeSpec) error { if ts.ATimeOmit && ts.MTimeOmit { return nil } + + // Replace requests to use the "system time" with the current time to + // ensure that timestamps remain consistent with the remote + // filesystem. + now := ktime.NowFromContext(ctx) + if ts.ATimeSetSystemTime { + ts.ATime = now + } + if ts.MTimeSetSystemTime { + ts.MTime = now + } mask := p9.SetAttrMask{ ATime: !ts.ATimeOmit, - ATimeNotSystemTime: !ts.ATimeSetSystemTime, + ATimeNotSystemTime: true, MTime: !ts.MTimeOmit, - MTimeNotSystemTime: !ts.MTimeSetSystemTime, + MTimeNotSystemTime: true, } as, ans := ts.ATime.Unix() ms, mns := ts.MTime.Unix() diff --git a/test/syscalls/linux/utimes.cc b/test/syscalls/linux/utimes.cc index 3a927a430..22e6d1a85 100644 --- a/test/syscalls/linux/utimes.cc +++ b/test/syscalls/linux/utimes.cc @@ -34,17 +34,10 @@ namespace testing { namespace { -// TODO(b/36516566): utimes(nullptr) does not pick the "now" time in the -// application's time domain, so when asserting that times are within a window, -// we expand the window to allow for differences between the time domains. -constexpr absl::Duration kClockSlack = absl::Milliseconds(100); - // TimeBoxed runs fn, setting before and after to (coarse realtime) times // guaranteed* to come before and after fn started and completed, respectively. // // fn may be called more than once if the clock is adjusted. -// -// * See the comment on kClockSlack. gVisor breaks this guarantee. void TimeBoxed(absl::Time* before, absl::Time* after, std::function const& fn) { do { @@ -69,12 +62,6 @@ void TimeBoxed(absl::Time* before, absl::Time* after, // which could lead to test failures, but that is very unlikely to happen. continue; } - - if (IsRunningOnGvisor()) { - // See comment on kClockSlack. - *before -= kClockSlack; - *after += kClockSlack; - } } while (*after < *before); } @@ -235,10 +222,7 @@ void TestUtimensat(int dirFd, std::string const& path) { EXPECT_GE(mtime3, before); EXPECT_LE(mtime3, after); - if (!IsRunningOnGvisor()) { - // FIXME(b/36516566): Gofers set atime and mtime to different "now" times. - EXPECT_EQ(atime3, mtime3); - } + EXPECT_EQ(atime3, mtime3); } TEST(UtimensatTest, OnAbsPath) { -- cgit v1.2.3 From 981a587476e11e49cf49edb31705d8727b0db556 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 8 Apr 2020 17:32:57 -0700 Subject: Remove InodeOperations FIXMEs that will be obsoleted by VFS2. PiperOrigin-RevId: 305588941 --- pkg/sentry/fs/gofer/inode.go | 3 --- pkg/sentry/fs/host/inode.go | 3 --- pkg/sentry/fs/inode.go | 2 -- 3 files changed, 8 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 1c934981b..811e8ea30 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -710,13 +710,10 @@ func init() { } // AddLink implements InodeOperations.AddLink, but is currently a noop. -// FIXME(b/63117438): Remove this from InodeOperations altogether. func (*inodeOperations) AddLink() {} // DropLink implements InodeOperations.DropLink, but is currently a noop. -// FIXME(b/63117438): Remove this from InodeOperations altogether. func (*inodeOperations) DropLink() {} // NotifyStatusChange implements fs.InodeOperations.NotifyStatusChange. -// FIXME(b/63117438): Remove this from InodeOperations altogether. func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {} diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 1da3c0a17..62f1246aa 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -397,15 +397,12 @@ func (i *inodeOperations) StatFS(context.Context) (fs.Info, error) { } // AddLink implements fs.InodeOperations.AddLink. -// FIXME(b/63117438): Remove this from InodeOperations altogether. func (i *inodeOperations) AddLink() {} // DropLink implements fs.InodeOperations.DropLink. -// FIXME(b/63117438): Remove this from InodeOperations altogether. func (i *inodeOperations) DropLink() {} // NotifyStatusChange implements fs.InodeOperations.NotifyStatusChange. -// FIXME(b/63117438): Remove this from InodeOperations altogether. func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {} // readdirAll returns all of the directory entries in i. diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 55fb71c16..73f89abcc 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -397,8 +397,6 @@ func (i *Inode) Getlink(ctx context.Context) (*Dirent, error) { // AddLink calls i.InodeOperations.AddLink. func (i *Inode) AddLink() { if i.overlay != nil { - // FIXME(b/63117438): Remove this from InodeOperations altogether. - // // This interface is only used by ramfs to update metadata of // children. These filesystems should _never_ have overlay // Inodes cached as children. So explicitly disallow this -- cgit v1.2.3 From 6dd5a1f3fe55daa8510b1ee5e3a59219aad92af6 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Wed, 8 Apr 2020 17:56:55 -0700 Subject: Clean up TODOs PiperOrigin-RevId: 305592245 --- pkg/sentry/fs/tmpfs/fs.go | 3 --- pkg/sentry/fsimpl/kernfs/filesystem.go | 2 +- pkg/sentry/kernel/ptrace.go | 1 - pkg/sentry/vfs/filesystem.go | 2 +- pkg/sentry/vfs/mount.go | 12 ++++++------ pkg/sentry/vfs/mount_test.go | 2 +- runsc/cmd/gofer.go | 5 ++--- test/syscalls/linux/epoll.cc | 4 ---- test/syscalls/linux/file_base.h | 1 + test/syscalls/linux/pwrite64.cc | 9 +-------- test/syscalls/linux/tuntap.cc | 7 ++++--- test/syscalls/linux/write.cc | 10 ++-------- tools/go_generics/defs.bzl | 1 - 13 files changed, 19 insertions(+), 40 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/tmpfs/fs.go b/pkg/sentry/fs/tmpfs/fs.go index d5be56c3f..bc117ca6a 100644 --- a/pkg/sentry/fs/tmpfs/fs.go +++ b/pkg/sentry/fs/tmpfs/fs.go @@ -44,9 +44,6 @@ const ( // lookup. cacheRevalidate = "revalidate" - // TODO(edahlgren/mpratt): support a tmpfs size limit. - // size = "size" - // Permissions that exceed modeMask will be rejected. modeMask = 01777 diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 16a3c18ae..4433071aa 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -682,7 +682,7 @@ func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu if err != nil { return linux.Statfs{}, err } - // TODO: actually implement statfs + // TODO(gvisor.dev/issue/1193): actually implement statfs. return linux.Statfs{}, syserror.ENOSYS } diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index 35ad97d5d..e23e796ef 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -184,7 +184,6 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { if targetCreds.PermittedCaps&^callerCreds.PermittedCaps != 0 { return false } - // TODO: Yama LSM return true } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index cd34782ff..bef1bd312 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -497,7 +497,7 @@ type FilesystemImpl interface { // Preconditions: vd.Mount().Filesystem().Impl() == this FilesystemImpl. PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error - // TODO: inotify_add_watch() + // TODO(gvisor.dev/issue/1479): inotify_add_watch() } // PrependPathAtVFSRootError is returned by implementations of diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 1b8ecc415..f06946103 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -233,9 +233,9 @@ func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentia } vd.dentry.mu.Lock() } - // TODO: Linux requires that either both the mount point and the mount root - // are directories, or neither are, and returns ENOTDIR if this is not the - // case. + // TODO(gvisor.dev/issue/1035): Linux requires that either both the mount + // point and the mount root are directories, or neither are, and returns + // ENOTDIR if this is not the case. mntns := vd.mount.ns mnt := newMount(vfs, fs, root, mntns, opts) vfs.mounts.seq.BeginWrite() @@ -274,9 +274,9 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti } } - // TODO(jamieliu): Linux special-cases umount of the caller's root, which - // we don't implement yet (we'll just fail it since the caller holds a - // reference on it). + // TODO(gvisor.dev/issue/1035): Linux special-cases umount of the caller's + // root, which we don't implement yet (we'll just fail it since the caller + // holds a reference on it). vfs.mounts.seq.BeginWrite() if opts.Flags&linux.MNT_DETACH == 0 { diff --git a/pkg/sentry/vfs/mount_test.go b/pkg/sentry/vfs/mount_test.go index 3b933468d..3335e4057 100644 --- a/pkg/sentry/vfs/mount_test.go +++ b/pkg/sentry/vfs/mount_test.go @@ -55,7 +55,7 @@ func TestMountTableInsertLookup(t *testing.T) { } } -// TODO: concurrent lookup/insertion/removal +// TODO(gvisor.dev/issue/1035): concurrent lookup/insertion/removal. // must be powers of 2 var benchNumMounts = []int{1 << 2, 1 << 5, 1 << 8} diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 02e5af3d3..28f0d54b9 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -272,9 +272,8 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error { root := spec.Root.Path if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { - // FIXME: runsc can't be re-executed without - // /proc, so we create a tmpfs mount, mount ./proc and ./root - // there, then move this mount to the root and after + // runsc can't be re-executed without /proc, so we create a tmpfs mount, + // mount ./proc and ./root there, then move this mount to the root and after // setCapsAndCallSelf, runsc will chroot into /root. // // We need a directory to construct a new root and we know that diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc index a4f8f3cec..f57d38dc7 100644 --- a/test/syscalls/linux/epoll.cc +++ b/test/syscalls/linux/epoll.cc @@ -56,10 +56,6 @@ TEST(EpollTest, AllWritable) { struct epoll_event result[kFDsPerEpoll]; ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), result, kFDsPerEpoll, -1), SyscallSucceedsWithValue(kFDsPerEpoll)); - // TODO(edahlgren): Why do some tests check epoll_event::data, and others - // don't? Does Linux actually guarantee that, in any of these test cases, - // epoll_wait will necessarily write out the epoll_events in the order that - // they were registered? for (int i = 0; i < kFDsPerEpoll; i++) { ASSERT_EQ(result[i].events, EPOLLOUT); } diff --git a/test/syscalls/linux/file_base.h b/test/syscalls/linux/file_base.h index 25fdd7106..fb418e052 100644 --- a/test/syscalls/linux/file_base.h +++ b/test/syscalls/linux/file_base.h @@ -87,6 +87,7 @@ class FileTest : public ::testing::Test { ClosePipes(); } + protected: std::string test_file_name_; FileDescriptor test_file_fd_; diff --git a/test/syscalls/linux/pwrite64.cc b/test/syscalls/linux/pwrite64.cc index b48fe540d..c2f72e010 100644 --- a/test/syscalls/linux/pwrite64.cc +++ b/test/syscalls/linux/pwrite64.cc @@ -27,14 +27,7 @@ namespace testing { namespace { -// This test is currently very rudimentary. -// -// TODO(edahlgren): -// * bad buffer states (EFAULT). -// * bad fds (wrong permission, wrong type of file, EBADF). -// * check offset is not incremented. -// * check for EOF. -// * writing to pipes, symlinks, special files. +// TODO(gvisor.dev/issue/2370): This test is currently very rudimentary. class Pwrite64 : public ::testing::Test { void SetUp() override { name_ = NewTempAbsPath(); diff --git a/test/syscalls/linux/tuntap.cc b/test/syscalls/linux/tuntap.cc index 53ad2dda3..3a8ba37eb 100644 --- a/test/syscalls/linux/tuntap.cc +++ b/test/syscalls/linux/tuntap.cc @@ -242,7 +242,7 @@ TEST_F(TuntapTest, InvalidReadWrite) { TEST_F(TuntapTest, WriteToDownDevice) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); - // FIXME: gVisor always creates enabled/up'd interfaces. + // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces. SKIP_IF(IsRunningOnGvisor()); FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR)); @@ -280,10 +280,11 @@ PosixErrorOr OpenAndAttachTap( &addr, sizeof(addr))); if (!IsRunningOnGvisor()) { - // FIXME: gVisor doesn't support setting MAC address on interfaces yet. + // FIXME(b/110961832): gVisor doesn't support setting MAC address on + // interfaces yet. RETURN_IF_ERRNO(LinkSetMacAddr(link->index, kMacA, sizeof(kMacA))); - // FIXME: gVisor always creates enabled/up'd interfaces. + // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces. RETURN_IF_ERRNO(LinkChangeFlags(link->index, IFF_UP, IFF_UP)); } diff --git a/test/syscalls/linux/write.cc b/test/syscalls/linux/write.cc index 9b219cfd6..39b5b2f56 100644 --- a/test/syscalls/linux/write.cc +++ b/test/syscalls/linux/write.cc @@ -31,14 +31,8 @@ namespace gvisor { namespace testing { namespace { -// This test is currently very rudimentary. -// -// TODO(edahlgren): -// * bad buffer states (EFAULT). -// * bad fds (wrong permission, wrong type of file, EBADF). -// * check offset is incremented. -// * check for EOF. -// * writing to pipes, symlinks, special files. + +// TODO(gvisor.dev/issue/2370): This test is currently very rudimentary. class WriteTest : public ::testing::Test { public: ssize_t WriteBytes(int fd, int bytes) { diff --git a/tools/go_generics/defs.bzl b/tools/go_generics/defs.bzl index c5be52ecd..8c9995fd4 100644 --- a/tools/go_generics/defs.bzl +++ b/tools/go_generics/defs.bzl @@ -105,7 +105,6 @@ def _go_template_instance_impl(ctx): executable = ctx.executable._tool, ) - # TODO: How can we get the dependencies out? return struct( files = depset([output]), ) -- cgit v1.2.3 From 257225c34b81ff0d0b5ce8ae333f5905f9e86cce Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Thu, 9 Apr 2020 17:29:43 -0700 Subject: Downgrade VFS1-specific FIXME to a NOTE. PiperOrigin-RevId: 305794509 --- pkg/sentry/fs/dirent.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index 0266a5287..65be12175 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -312,9 +312,9 @@ func (d *Dirent) SyncAll(ctx context.Context) { // There is nothing to sync for a read-only filesystem. if !d.Inode.MountSource.Flags.ReadOnly { - // FIXME(b/34856369): This should be a mount traversal, not a - // Dirent traversal, because some Inodes that need to be synced - // may no longer be reachable by name (after sys_unlink). + // NOTE(b/34856369): This should be a mount traversal, not a Dirent + // traversal, because some Inodes that need to be synced may no longer + // be reachable by name (after sys_unlink). // // Write out metadata, dirty page cached pages, and sync disk/remote // caches. -- cgit v1.2.3 From 96f914295920404e7c5c97553771e09b31f6900a Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Fri, 10 Apr 2020 15:46:16 -0700 Subject: Use O_CLOEXEC when dup'ing FDs The sentry doesn't allow execve, but it's a good defense in-depth measure. PiperOrigin-RevId: 305958737 --- pkg/sentry/fs/gofer/inode.go | 2 +- pkg/sentry/fsimpl/gofer/gofer.go | 2 +- runsc/boot/filter/config.go | 2 +- runsc/main.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 811e8ea30..a016c896e 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -273,7 +273,7 @@ func (i *inodeFileState) recreateReadHandles(ctx context.Context, writer *handle // operations on the old will see the new data. Then, make the new handle take // ownereship of the old FD and mark the old readHandle to not close the FD // when done. - if err := syscall.Dup3(h.Host.FD(), i.readHandles.Host.FD(), 0); err != nil { + if err := syscall.Dup3(h.Host.FD(), i.readHandles.Host.FD(), syscall.O_CLOEXEC); err != nil { return err } diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 20edaf643..bdf11fa65 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -1089,7 +1089,7 @@ func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool // description, but this doesn't matter since they refer to the // same file (unless d.fs.opts.overlayfsStaleRead is true, // which we handle separately). - if err := syscall.Dup3(int(h.fd), int(d.handle.fd), 0); err != nil { + if err := syscall.Dup3(int(h.fd), int(d.handle.fd), syscall.O_CLOEXEC); err != nil { d.handleMu.Unlock() ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to dup fd %d to fd %d: %v", h.fd, d.handle.fd, err) h.close(ctx) diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 06b9f888a..1828d116a 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -44,7 +44,7 @@ var allowedSyscalls = seccomp.SyscallRules{ { seccomp.AllowAny{}, seccomp.AllowAny{}, - seccomp.AllowValue(0), + seccomp.AllowValue(syscall.O_CLOEXEC), }, }, syscall.SYS_EPOLL_CREATE1: {}, diff --git a/runsc/main.go b/runsc/main.go index c1c78529c..59f624842 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -291,7 +291,7 @@ func main() { // want with them. Since Docker and Containerd both eat boot's stderr, we // dup our stderr to the provided log FD so that panics will appear in the // logs, rather than just disappear. - if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil { + if err := syscall.Dup3(fd, int(os.Stderr.Fd()), syscall.O_CLOEXEC); err != nil { cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err) } } -- cgit v1.2.3 From 6a4d17a31dc209afbbca66e871a7c6dc299c167b Mon Sep 17 00:00:00 2001 From: Jon Budd Date: Mon, 13 Apr 2020 11:01:02 -0700 Subject: Remove obsolete TODOs for b/38173783 The comments in the ticket indicate that this behavior is fine and that the ticket should be closed, so we shouldn't need pointers to the ticket. PiperOrigin-RevId: 306266071 --- pkg/context/context.go | 4 ---- pkg/sentry/arch/stack.go | 3 --- pkg/sentry/fs/gofer/file_state.go | 1 - pkg/sentry/fs/gofer/handles.go | 1 - pkg/sentry/fs/gofer/inode_state.go | 1 - pkg/sentry/fs/gofer/session_state.go | 1 - pkg/sentry/fs/inode.go | 1 - pkg/sentry/kernel/shm/shm.go | 2 +- pkg/sentry/kernel/task_context.go | 1 - pkg/sentry/kernel/task_signals.go | 2 -- pkg/usermem/usermem.go | 3 --- 11 files changed, 1 insertion(+), 19 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/context/context.go b/pkg/context/context.go index 23e009ef3..5319b6d8d 100644 --- a/pkg/context/context.go +++ b/pkg/context/context.go @@ -127,10 +127,6 @@ func (logContext) Value(key interface{}) interface{} { var bgContext = &logContext{Logger: log.Log()} // Background returns an empty context using the default logger. -// -// Users should be wary of using a Background context. Please tag any use with -// FIXME(b/38173783) and a note to remove this use. -// // Generally, one should use the Task as their context when available, or avoid // having to use a context in places where a Task is unavailable. // diff --git a/pkg/sentry/arch/stack.go b/pkg/sentry/arch/stack.go index 09bceabc9..1108fa0bd 100644 --- a/pkg/sentry/arch/stack.go +++ b/pkg/sentry/arch/stack.go @@ -97,7 +97,6 @@ func (s *Stack) Push(vals ...interface{}) (usermem.Addr, error) { if c < 0 { return 0, fmt.Errorf("bad binary.Size for %T", v) } - // TODO(b/38173783): Use a real context.Context. n, err := usermem.CopyObjectOut(context.Background(), s.IO, s.Bottom-usermem.Addr(c), norm, usermem.IOOpts{}) if err != nil || c != n { return 0, err @@ -121,11 +120,9 @@ func (s *Stack) Pop(vals ...interface{}) (usermem.Addr, error) { var err error if isVaddr { value := s.Arch.Native(uintptr(0)) - // TODO(b/38173783): Use a real context.Context. n, err = usermem.CopyObjectIn(context.Background(), s.IO, s.Bottom, value, usermem.IOOpts{}) *vaddr = usermem.Addr(s.Arch.Value(value)) } else { - // TODO(b/38173783): Use a real context.Context. n, err = usermem.CopyObjectIn(context.Background(), s.IO, s.Bottom, v, usermem.IOOpts{}) } if err != nil { diff --git a/pkg/sentry/fs/gofer/file_state.go b/pkg/sentry/fs/gofer/file_state.go index ff96b28ba..edd6576aa 100644 --- a/pkg/sentry/fs/gofer/file_state.go +++ b/pkg/sentry/fs/gofer/file_state.go @@ -34,7 +34,6 @@ func (f *fileOperations) afterLoad() { flags := f.flags flags.Truncate = false - // TODO(b/38173783): Context is not plumbed to save/restore. f.handles, err = f.inodeOperations.fileState.getHandles(context.Background(), flags, f.inodeOperations.cachingInodeOps) if err != nil { return fmt.Errorf("failed to re-open handle: %v", err) diff --git a/pkg/sentry/fs/gofer/handles.go b/pkg/sentry/fs/gofer/handles.go index 9f7c3e89f..fc14249be 100644 --- a/pkg/sentry/fs/gofer/handles.go +++ b/pkg/sentry/fs/gofer/handles.go @@ -57,7 +57,6 @@ func (h *handles) DecRef() { } } } - // FIXME(b/38173783): Context is not plumbed here. if err := h.File.close(context.Background()); err != nil { log.Warningf("error closing p9 file: %v", err) } diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go index 238f7804c..a3402e343 100644 --- a/pkg/sentry/fs/gofer/inode_state.go +++ b/pkg/sentry/fs/gofer/inode_state.go @@ -123,7 +123,6 @@ func (i *inodeFileState) afterLoad() { // beforeSave. return fmt.Errorf("failed to find path for inode number %d. Device %s contains %s", i.sattr.InodeID, i.s.connID, fs.InodeMappings(i.s.inodeMappings)) } - // TODO(b/38173783): Context is not plumbed to save/restore. ctx := &dummyClockContext{context.Background()} _, i.file, err = i.s.attach.walk(ctx, splitAbsolutePath(name)) diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index 111da59f9..2d398b753 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -104,7 +104,6 @@ func (s *session) afterLoad() { // If private unix sockets are enabled, create and fill the session's endpoint // maps. if opts.privateunixsocket { - // TODO(b/38173783): Context is not plumbed to save/restore. ctx := &dummyClockContext{context.Background()} if err = s.restoreEndpointMaps(ctx); err != nil { diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index 73f89abcc..a34fbc946 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -102,7 +102,6 @@ func (i *Inode) DecRef() { // destroy releases the Inode and releases the msrc reference taken. func (i *Inode) destroy() { - // FIXME(b/38173783): Context is not plumbed here. ctx := context.Background() if err := i.WriteOut(ctx); err != nil { // FIXME(b/65209558): Mark as warning again once noatime is diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index 208569057..f66cfcc7f 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -461,7 +461,7 @@ func (s *Shm) AddMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.A func (s *Shm) RemoveMapping(ctx context.Context, _ memmap.MappingSpace, _ usermem.AddrRange, _ uint64, _ bool) { s.mu.Lock() defer s.mu.Unlock() - // TODO(b/38173783): RemoveMapping may be called during task exit, when ctx + // RemoveMapping may be called during task exit, when ctx // is context.Background. Gracefully handle missing clocks. Failing to // update the detach time in these cases is ok, since no one can observe the // omission. diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go index c115e8d1f..9fa528384 100644 --- a/pkg/sentry/kernel/task_context.go +++ b/pkg/sentry/kernel/task_context.go @@ -58,7 +58,6 @@ func (tc *TaskContext) release() { // Nil out pointers so that if the task is saved after release, it doesn't // follow the pointers to possibly now-invalid objects. if tc.MemoryManager != nil { - // TODO(b/38173783) tc.MemoryManager.DecUsers(context.Background()) tc.MemoryManager = nil } diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index 8802db142..6aa798346 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -513,8 +513,6 @@ func (t *Task) canReceiveSignalLocked(sig linux.Signal) bool { if t.stop != nil { return false } - // - TODO(b/38173783): No special case for when t is also the sending task, - // because the identity of the sender is unknown. // - Do not choose tasks that have already been interrupted, as they may be // busy handling another signal. if len(t.interruptChan) != 0 { diff --git a/pkg/usermem/usermem.go b/pkg/usermem/usermem.go index d2f4403b0..cd6a0ea6b 100644 --- a/pkg/usermem/usermem.go +++ b/pkg/usermem/usermem.go @@ -29,9 +29,6 @@ import ( ) // IO provides access to the contents of a virtual memory space. -// -// FIXME(b/38173783): Implementations of IO cannot expect ctx to contain any -// meaningful data. type IO interface { // CopyOut copies len(src) bytes from src to the memory mapped at addr. It // returns the number of bytes copied. If the number of bytes copied is < -- cgit v1.2.3 From 7c0f3bc8576addbec001095d754a756691d26df3 Mon Sep 17 00:00:00 2001 From: Dave Bailey Date: Tue, 21 Apr 2020 09:34:42 -0700 Subject: Sentry metrics updates. Sentry metrics with nanoseconds units are labeled as such, and non-cumulative sentry metrics are supported. PiperOrigin-RevId: 307621080 --- pkg/metric/metric.go | 41 +++++++++++++++++----------------- pkg/metric/metric.proto | 10 ++++++++- pkg/metric/metric_test.go | 22 +++++++++++------- pkg/sentry/fs/file.go | 2 +- pkg/sentry/fs/gofer/file.go | 4 ++-- pkg/sentry/fs/tmpfs/inode_file.go | 2 +- pkg/sentry/socket/netstack/netstack.go | 14 ++++++++---- 7 files changed, 58 insertions(+), 37 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/metric/metric.go b/pkg/metric/metric.go index 895253625..64aa365ce 100644 --- a/pkg/metric/metric.go +++ b/pkg/metric/metric.go @@ -39,16 +39,11 @@ var ( // Uint64Metric encapsulates a uint64 that represents some kind of metric to be // monitored. // -// All metrics must be cumulative, meaning that their values will only increase -// over time. -// // Metrics are not saved across save/restore and thus reset to zero on restore. // -// TODO(b/67298402): Support non-cumulative metrics. // TODO(b/67298427): Support metric fields. type Uint64Metric struct { - // value is the actual value of the metric. It must be accessed - // atomically. + // value is the actual value of the metric. It must be accessed atomically. value uint64 } @@ -110,13 +105,10 @@ type customUint64Metric struct { // Register must only be called at init and will return and error if called // after Initialized. // -// All metrics must be cumulative, meaning that the return values of value must -// only increase over time. -// // Preconditions: // * name must be globally unique. // * Initialize/Disable have not been called. -func RegisterCustomUint64Metric(name string, sync bool, description string, value func() uint64) error { +func RegisterCustomUint64Metric(name string, cumulative, sync bool, units pb.MetricMetadata_Units, description string, value func() uint64) error { if initialized { return ErrInitializationDone } @@ -129,9 +121,10 @@ func RegisterCustomUint64Metric(name string, sync bool, description string, valu metadata: &pb.MetricMetadata{ Name: name, Description: description, - Cumulative: true, + Cumulative: cumulative, Sync: sync, - Type: pb.MetricMetadata_UINT64, + Type: pb.MetricMetadata_TYPE_UINT64, + Units: units, }, value: value, } @@ -140,24 +133,32 @@ func RegisterCustomUint64Metric(name string, sync bool, description string, valu // MustRegisterCustomUint64Metric calls RegisterCustomUint64Metric and panics // if it returns an error. -func MustRegisterCustomUint64Metric(name string, sync bool, description string, value func() uint64) { - if err := RegisterCustomUint64Metric(name, sync, description, value); err != nil { +func MustRegisterCustomUint64Metric(name string, cumulative, sync bool, description string, value func() uint64) { + if err := RegisterCustomUint64Metric(name, cumulative, sync, pb.MetricMetadata_UNITS_NONE, description, value); err != nil { panic(fmt.Sprintf("Unable to register metric %q: %v", name, err)) } } -// NewUint64Metric creates and registers a new metric with the given name. +// NewUint64Metric creates and registers a new cumulative metric with the given name. // // Metrics must be statically defined (i.e., at init). -func NewUint64Metric(name string, sync bool, description string) (*Uint64Metric, error) { +func NewUint64Metric(name string, sync bool, units pb.MetricMetadata_Units, description string) (*Uint64Metric, error) { var m Uint64Metric - return &m, RegisterCustomUint64Metric(name, sync, description, m.Value) + return &m, RegisterCustomUint64Metric(name, true /* cumulative */, sync, units, description, m.Value) } -// MustCreateNewUint64Metric calls NewUint64Metric and panics if it returns an -// error. +// MustCreateNewUint64Metric calls NewUint64Metric and panics if it returns an error. func MustCreateNewUint64Metric(name string, sync bool, description string) *Uint64Metric { - m, err := NewUint64Metric(name, sync, description) + m, err := NewUint64Metric(name, sync, pb.MetricMetadata_UNITS_NONE, description) + if err != nil { + panic(fmt.Sprintf("Unable to create metric %q: %v", name, err)) + } + return m +} + +// MustCreateNewUint64NanosecondsMetric calls NewUint64Metric and panics if it returns an error. +func MustCreateNewUint64NanosecondsMetric(name string, sync bool, description string) *Uint64Metric { + m, err := NewUint64Metric(name, sync, pb.MetricMetadata_UNITS_NANOSECONDS, description) if err != nil { panic(fmt.Sprintf("Unable to create metric %q: %v", name, err)) } diff --git a/pkg/metric/metric.proto b/pkg/metric/metric.proto index a2c2bd1ba..3cc89047d 100644 --- a/pkg/metric/metric.proto +++ b/pkg/metric/metric.proto @@ -36,10 +36,18 @@ message MetricMetadata { // the monitoring system. bool sync = 4; - enum Type { UINT64 = 0; } + enum Type { TYPE_UINT64 = 0; } // type is the type of the metric value. Type type = 5; + + enum Units { + UNITS_NONE = 0; + UNITS_NANOSECONDS = 1; + } + + // units is the units of the metric value. + Units units = 6; } // MetricRegistration contains the metadata for all metrics that will be in diff --git a/pkg/metric/metric_test.go b/pkg/metric/metric_test.go index 34969385a..c425ea532 100644 --- a/pkg/metric/metric_test.go +++ b/pkg/metric/metric_test.go @@ -66,12 +66,12 @@ const ( func TestInitialize(t *testing.T) { defer reset() - _, err := NewUint64Metric("/foo", false, fooDescription) + _, err := NewUint64Metric("/foo", false, pb.MetricMetadata_UNITS_NONE, fooDescription) if err != nil { t.Fatalf("NewUint64Metric got err %v want nil", err) } - _, err = NewUint64Metric("/bar", true, barDescription) + _, err = NewUint64Metric("/bar", true, pb.MetricMetadata_UNITS_NANOSECONDS, barDescription) if err != nil { t.Fatalf("NewUint64Metric got err %v want nil", err) } @@ -94,8 +94,8 @@ func TestInitialize(t *testing.T) { foundFoo := false foundBar := false for _, m := range mr.Metrics { - if m.Type != pb.MetricMetadata_UINT64 { - t.Errorf("Metadata %+v Type got %v want %v", m, m.Type, pb.MetricMetadata_UINT64) + if m.Type != pb.MetricMetadata_TYPE_UINT64 { + t.Errorf("Metadata %+v Type got %v want %v", m, m.Type, pb.MetricMetadata_TYPE_UINT64) } if !m.Cumulative { t.Errorf("Metadata %+v Cumulative got false want true", m) @@ -110,6 +110,9 @@ func TestInitialize(t *testing.T) { if m.Sync { t.Errorf("/foo %+v Sync got true want false", m) } + if m.Units != pb.MetricMetadata_UNITS_NONE { + t.Errorf("/foo %+v Units got %v want %v", m, m.Units, pb.MetricMetadata_UNITS_NONE) + } case "/bar": foundBar = true if m.Description != barDescription { @@ -118,6 +121,9 @@ func TestInitialize(t *testing.T) { if !m.Sync { t.Errorf("/bar %+v Sync got true want false", m) } + if m.Units != pb.MetricMetadata_UNITS_NANOSECONDS { + t.Errorf("/bar %+v Units got %v want %v", m, m.Units, pb.MetricMetadata_UNITS_NANOSECONDS) + } } } @@ -132,12 +138,12 @@ func TestInitialize(t *testing.T) { func TestDisable(t *testing.T) { defer reset() - _, err := NewUint64Metric("/foo", false, fooDescription) + _, err := NewUint64Metric("/foo", false, pb.MetricMetadata_UNITS_NONE, fooDescription) if err != nil { t.Fatalf("NewUint64Metric got err %v want nil", err) } - _, err = NewUint64Metric("/bar", true, barDescription) + _, err = NewUint64Metric("/bar", true, pb.MetricMetadata_UNITS_NONE, barDescription) if err != nil { t.Fatalf("NewUint64Metric got err %v want nil", err) } @@ -161,12 +167,12 @@ func TestDisable(t *testing.T) { func TestEmitMetricUpdate(t *testing.T) { defer reset() - foo, err := NewUint64Metric("/foo", false, fooDescription) + foo, err := NewUint64Metric("/foo", false, pb.MetricMetadata_UNITS_NONE, fooDescription) if err != nil { t.Fatalf("NewUint64Metric got err %v want nil", err) } - _, err = NewUint64Metric("/bar", true, barDescription) + _, err = NewUint64Metric("/bar", true, pb.MetricMetadata_UNITS_NONE, barDescription) if err != nil { t.Fatalf("NewUint64Metric got err %v want nil", err) } diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index 78100e448..846252c89 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -44,7 +44,7 @@ var ( RecordWaitTime = false reads = metric.MustCreateNewUint64Metric("/fs/reads", false /* sync */, "Number of file reads.") - readWait = metric.MustCreateNewUint64Metric("/fs/read_wait", false /* sync */, "Time waiting on file reads, in nanoseconds.") + readWait = metric.MustCreateNewUint64NanosecondsMetric("/fs/read_wait", false /* sync */, "Time waiting on file reads, in nanoseconds.") ) // IncrementWait increments the given wait time metric, if enabled. diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go index 23296f246..b2fcab127 100644 --- a/pkg/sentry/fs/gofer/file.go +++ b/pkg/sentry/fs/gofer/file.go @@ -37,9 +37,9 @@ var ( opens9P = metric.MustCreateNewUint64Metric("/gofer/opens_9p", false /* sync */, "Number of times a 9P file was opened from a gofer.") opensHost = metric.MustCreateNewUint64Metric("/gofer/opens_host", false /* sync */, "Number of times a host file was opened from a gofer.") reads9P = metric.MustCreateNewUint64Metric("/gofer/reads_9p", false /* sync */, "Number of 9P file reads from a gofer.") - readWait9P = metric.MustCreateNewUint64Metric("/gofer/read_wait_9p", false /* sync */, "Time waiting on 9P file reads from a gofer, in nanoseconds.") + readWait9P = metric.MustCreateNewUint64NanosecondsMetric("/gofer/read_wait_9p", false /* sync */, "Time waiting on 9P file reads from a gofer, in nanoseconds.") readsHost = metric.MustCreateNewUint64Metric("/gofer/reads_host", false /* sync */, "Number of host file reads from a gofer.") - readWaitHost = metric.MustCreateNewUint64Metric("/gofer/read_wait_host", false /* sync */, "Time waiting on host file reads from a gofer, in nanoseconds.") + readWaitHost = metric.MustCreateNewUint64NanosecondsMetric("/gofer/read_wait_host", false /* sync */, "Time waiting on host file reads from a gofer, in nanoseconds.") ) // fileOperations implements fs.FileOperations for a remote file system. diff --git a/pkg/sentry/fs/tmpfs/inode_file.go b/pkg/sentry/fs/tmpfs/inode_file.go index 25abbc151..1dc75291d 100644 --- a/pkg/sentry/fs/tmpfs/inode_file.go +++ b/pkg/sentry/fs/tmpfs/inode_file.go @@ -39,7 +39,7 @@ var ( opensRO = metric.MustCreateNewUint64Metric("/in_memory_file/opens_ro", false /* sync */, "Number of times an in-memory file was opened in read-only mode.") opensW = metric.MustCreateNewUint64Metric("/in_memory_file/opens_w", false /* sync */, "Number of times an in-memory file was opened in write mode.") reads = metric.MustCreateNewUint64Metric("/in_memory_file/reads", false /* sync */, "Number of in-memory file reads.") - readWait = metric.MustCreateNewUint64Metric("/in_memory_file/read_wait", false /* sync */, "Time waiting on in-memory file reads, in nanoseconds.") + readWait = metric.MustCreateNewUint64NanosecondsMetric("/in_memory_file/read_wait", false /* sync */, "Time waiting on in-memory file reads, in nanoseconds.") ) // fileInodeOperations implements fs.InodeOperations for a regular tmpfs file. diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 7ac38764d..d5879c10f 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -63,7 +63,13 @@ import ( func mustCreateMetric(name, description string) *tcpip.StatCounter { var cm tcpip.StatCounter - metric.MustRegisterCustomUint64Metric(name, false /* sync */, description, cm.Value) + metric.MustRegisterCustomUint64Metric(name, true /* cumulative */, false /* sync */, description, cm.Value) + return &cm +} + +func mustCreateGauge(name, description string) *tcpip.StatCounter { + var cm tcpip.StatCounter + metric.MustRegisterCustomUint64Metric(name, false /* cumulative */, false /* sync */, description, cm.Value) return &cm } @@ -151,10 +157,10 @@ var Metrics = tcpip.Stats{ TCP: tcpip.TCPStats{ ActiveConnectionOpenings: mustCreateMetric("/netstack/tcp/active_connection_openings", "Number of connections opened successfully via Connect."), PassiveConnectionOpenings: mustCreateMetric("/netstack/tcp/passive_connection_openings", "Number of connections opened successfully via Listen."), - CurrentEstablished: mustCreateMetric("/netstack/tcp/current_established", "Number of connections in ESTABLISHED state now."), - CurrentConnected: mustCreateMetric("/netstack/tcp/current_open", "Number of connections that are in connected state."), + CurrentEstablished: mustCreateGauge("/netstack/tcp/current_established", "Number of connections in ESTABLISHED state now."), + CurrentConnected: mustCreateGauge("/netstack/tcp/current_open", "Number of connections that are in connected state."), EstablishedResets: mustCreateMetric("/netstack/tcp/established_resets", "Number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state"), - EstablishedClosed: mustCreateMetric("/netstack/tcp/established_closed", "number of times established TCP connections made a transition to CLOSED state."), + EstablishedClosed: mustCreateMetric("/netstack/tcp/established_closed", "Number of times established TCP connections made a transition to CLOSED state."), EstablishedTimedout: mustCreateMetric("/netstack/tcp/established_timedout", "Number of times an established connection was reset because of keep-alive time out."), ListenOverflowSynDrop: mustCreateMetric("/netstack/tcp/listen_overflow_syn_drop", "Number of times the listen queue overflowed and a SYN was dropped."), ListenOverflowAckDrop: mustCreateMetric("/netstack/tcp/listen_overflow_ack_drop", "Number of times the listen queue overflowed and the final ACK in the handshake was dropped."), -- cgit v1.2.3 From 37e01fd2ea6a0e67637975863317be9aae1b02f0 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 21 Apr 2020 16:30:26 -0700 Subject: Misc VFS2 fixes - Fix defer operation ordering in kernfs.Filesystem.AccessAt() - Add AT_NULL entry in proc/pid/auvx - Fix line padding in /proc/pid/maps - Fix linux_dirent serialization for getdents(2) - Remove file creation flags from vfs.FileDescription.statusFlags() Updates #1193, #1035 PiperOrigin-RevId: 307704159 --- pkg/sentry/fs/proc/task.go | 3 +-- pkg/sentry/fsimpl/kernfs/filesystem.go | 2 +- pkg/sentry/fsimpl/proc/task_files.go | 13 +++++++------ pkg/sentry/fsimpl/proc/tasks_sys.go | 2 +- pkg/sentry/mm/procfs.go | 4 ++-- pkg/sentry/syscalls/linux/vfs2/getdents.go | 6 +++--- pkg/sentry/vfs/file_description.go | 15 +++++++++------ 7 files changed, 24 insertions(+), 21 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 4d42eac83..4bbe90198 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -73,8 +73,7 @@ func checkTaskState(t *kernel.Task) error { type taskDir struct { ramfs.Dir - t *kernel.Task - pidns *kernel.PIDNamespace + t *kernel.Task } var _ fs.InodeOperations = (*taskDir)(nil) diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 01c23d192..3164d022c 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -246,8 +246,8 @@ func (fs *Filesystem) Sync(ctx context.Context) error { // AccessAt implements vfs.Filesystem.Impl.AccessAt. func (fs *Filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { fs.mu.RLock() - defer fs.mu.RUnlock() defer fs.processDeferredDecRefs() + defer fs.mu.RUnlock() _, inode, err := fs.walkExistingLocked(ctx, rp) if err != nil { diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 2c6f8bdfc..f3173e197 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -111,17 +111,18 @@ func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error { } defer m.DecUsers(ctx) - // Space for buffer with AT_NULL (0) terminator at the end. auxv := m.Auxv() + // Space for buffer with AT_NULL (0) terminator at the end. buf.Grow((len(auxv) + 1) * 16) for _, e := range auxv { - var tmp [8]byte - usermem.ByteOrder.PutUint64(tmp[:], e.Key) - buf.Write(tmp[:]) - - usermem.ByteOrder.PutUint64(tmp[:], uint64(e.Value)) + var tmp [16]byte + usermem.ByteOrder.PutUint64(tmp[:8], e.Key) + usermem.ByteOrder.PutUint64(tmp[8:], uint64(e.Value)) buf.Write(tmp[:]) } + var atNull [16]byte + buf.Write(atNull[:]) + return nil } diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index 3d5dc463c..f08668ca2 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -39,7 +39,7 @@ func newSysDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *k "shmmni": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMNI)), }), "vm": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ - "mmap_min_addr": newDentry(root, inoGen.NextIno(), 0444, &mmapMinAddrData{}), + "mmap_min_addr": newDentry(root, inoGen.NextIno(), 0444, &mmapMinAddrData{k: k}), "overcommit_memory": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0\n")), }), "net": newSysNetDir(root, inoGen, k), diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go index 1ab92f046..6efe5102b 100644 --- a/pkg/sentry/mm/procfs.go +++ b/pkg/sentry/mm/procfs.go @@ -148,7 +148,7 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI // Do not include the guard page: fs/proc/task_mmu.c:show_map_vma() => // stack_guard_page_start(). - fmt.Fprintf(b, "%08x-%08x %s%s %08x %02x:%02x %d ", + lineLen, _ := fmt.Fprintf(b, "%08x-%08x %s%s %08x %02x:%02x %d ", vseg.Start(), vseg.End(), vma.realPerms, private, vma.off, devMajor, devMinor, ino) // Figure out our filename or hint. @@ -165,7 +165,7 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI } if s != "" { // Per linux, we pad until the 74th character. - if pad := 73 - b.Len(); pad > 0 { + if pad := 73 - lineLen; pad > 0 { b.WriteString(strings.Repeat(" ", pad)) } b.WriteString(s) diff --git a/pkg/sentry/syscalls/linux/vfs2/getdents.go b/pkg/sentry/syscalls/linux/vfs2/getdents.go index 62e98817d..c7c7bf7ce 100644 --- a/pkg/sentry/syscalls/linux/vfs2/getdents.go +++ b/pkg/sentry/syscalls/linux/vfs2/getdents.go @@ -130,7 +130,7 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { if cb.t.Arch().Width() != 8 { panic(fmt.Sprintf("unsupported sizeof(unsigned long): %d", cb.t.Arch().Width())) } - size := 8 + 8 + 2 + 1 + 1 + 1 + len(dirent.Name) + size := 8 + 8 + 2 + 1 + 1 + len(dirent.Name) size = (size + 7) &^ 7 // round up to multiple of sizeof(long) if size > cb.remaining { return syserror.EINVAL @@ -143,11 +143,11 @@ func (cb *getdentsCallback) Handle(dirent vfs.Dirent) error { // Zero out all remaining bytes in buf, including the NUL terminator // after dirent.Name and the zero padding byte between the name and // dirent type. - bufTail := buf[18+len(dirent.Name):] + bufTail := buf[18+len(dirent.Name) : size-1] for i := range bufTail { bufTail[i] = 0 } - bufTail[2] = dirent.Type + buf[size-1] = dirent.Type } n, err := cb.t.CopyOutBytes(cb.addr, buf) if err != nil { diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 15cc091e2..418d69b96 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -111,10 +111,10 @@ type FileDescriptionOptions struct { } // Init must be called before first use of fd. If it succeeds, it takes -// references on mnt and d. statusFlags is the initial file description status -// flags, which is usually the full set of flags passed to open(2). -func (fd *FileDescription) Init(impl FileDescriptionImpl, statusFlags uint32, mnt *Mount, d *Dentry, opts *FileDescriptionOptions) error { - writable := MayWriteFileWithOpenFlags(statusFlags) +// references on mnt and d. flags is the initial file description flags, which +// is usually the full set of flags passed to open(2). +func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mount, d *Dentry, opts *FileDescriptionOptions) error { + writable := MayWriteFileWithOpenFlags(flags) if writable { if err := mnt.CheckBeginWrite(); err != nil { return err @@ -122,7 +122,10 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, statusFlags uint32, mn } fd.refs = 1 - fd.statusFlags = statusFlags + + // Remove "file creation flags" to mirror the behavior from file.f_flags in + // fs/open.c:do_dentry_open + fd.statusFlags = flags &^ (linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC) fd.vd = VirtualDentry{ mount: mnt, dentry: d, @@ -130,7 +133,7 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, statusFlags uint32, mn mnt.IncRef() d.IncRef() fd.opts = *opts - fd.readable = MayReadFileWithOpenFlags(statusFlags) + fd.readable = MayReadFileWithOpenFlags(flags) fd.writable = writable fd.impl = impl return nil -- cgit v1.2.3 From e69a871c7bd4e4859b0acd8b875171f3ebbaec29 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 22 Apr 2020 22:17:01 -0700 Subject: Move user home detection to its own library. PiperOrigin-RevId: 307977689 --- pkg/sentry/fs/user/BUILD | 34 ++++++ pkg/sentry/fs/user/user.go | 237 +++++++++++++++++++++++++++++++++++++ pkg/sentry/fs/user/user_test.go | 198 +++++++++++++++++++++++++++++++ runsc/boot/BUILD | 5 +- runsc/boot/loader.go | 7 +- runsc/boot/user.go | 234 ------------------------------------ runsc/boot/user_test.go | 254 ---------------------------------------- 7 files changed, 474 insertions(+), 495 deletions(-) create mode 100644 pkg/sentry/fs/user/BUILD create mode 100644 pkg/sentry/fs/user/user.go create mode 100644 pkg/sentry/fs/user/user_test.go delete mode 100644 runsc/boot/user.go delete mode 100644 runsc/boot/user_test.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD new file mode 100644 index 000000000..f37f979f1 --- /dev/null +++ b/pkg/sentry/fs/user/BUILD @@ -0,0 +1,34 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "user", + srcs = ["user.go"], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/fspath", + "//pkg/sentry/fs", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/vfs", + "//pkg/usermem", + ], +) + +go_test( + name = "user_test", + size = "small", + srcs = ["user_test.go"], + library = ":user", + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/sentry/fs", + "//pkg/sentry/fs/tmpfs", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/contexttest", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/fs/user/user.go b/pkg/sentry/fs/user/user.go new file mode 100644 index 000000000..fe7f67c00 --- /dev/null +++ b/pkg/sentry/fs/user/user.go @@ -0,0 +1,237 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package user + +import ( + "bufio" + "fmt" + "io" + "strconv" + "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/usermem" +) + +type fileReader struct { + // Ctx is the context for the file reader. + Ctx context.Context + + // File is the file to read from. + File *fs.File +} + +// Read implements io.Reader.Read. +func (r *fileReader) Read(buf []byte) (int, error) { + n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf)) + return int(n), err +} + +// getExecUserHome returns the home directory of the executing user read from +// /etc/passwd as read from the container filesystem. +func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) { + // The default user home directory to return if no user matching the user + // if found in the /etc/passwd found in the image. + const defaultHome = "/" + + // Open the /etc/passwd file from the dirent via the root mount namespace. + mnsRoot := rootMns.Root() + maxTraversals := uint(linux.MaxSymlinkTraversals) + dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals) + if err != nil { + // NOTE: Ignore errors opening the passwd file. If the passwd file + // doesn't exist we will return the default home directory. + return defaultHome, nil + } + defer dirent.DecRef() + + // Check read permissions on the file. + if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil { + // NOTE: Ignore permissions errors here and return default root dir. + return defaultHome, nil + } + + // Only open regular files. We don't open other files like named pipes as + // they may block and might present some attack surface to the container. + // Note that runc does not seem to do this kind of checking. + if !fs.IsRegular(dirent.Inode.StableAttr) { + return defaultHome, nil + } + + f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false}) + if err != nil { + return "", err + } + defer f.DecRef() + + r := &fileReader{ + Ctx: ctx, + File: f, + } + + return findHomeInPasswd(uint32(uid), r, defaultHome) +} + +type fileReaderVFS2 struct { + ctx context.Context + fd *vfs.FileDescription +} + +func (r *fileReaderVFS2) Read(buf []byte) (int, error) { + n, err := r.fd.Read(r.ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) + return int(n), err +} + +func getExecUserHomeVFS2(ctx context.Context, mns *vfs.MountNamespace, uid auth.KUID) (string, error) { + const defaultHome = "/" + + root := mns.Root() + defer root.DecRef() + + creds := auth.CredentialsFromContext(ctx) + + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse("/etc/passwd"), + } + + opts := &vfs.OpenOptions{ + Flags: linux.O_RDONLY, + } + + fd, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, target, opts) + if err != nil { + return defaultHome, nil + } + defer fd.DecRef() + + r := &fileReaderVFS2{ + ctx: ctx, + fd: fd, + } + + homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome) + if err != nil { + return "", err + } + + return homeDir, nil +} + +// MaybeAddExecUserHome returns a new slice with the HOME enviroment variable +// set if the slice does not already contain it, otherwise it returns the +// original slice unmodified. +func MaybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) { + // Check if the envv already contains HOME. + for _, env := range envv { + if strings.HasPrefix(env, "HOME=") { + // We have it. Return the original slice unmodified. + return envv, nil + } + } + + // Read /etc/passwd for the user's HOME directory and set the HOME + // environment variable as required by POSIX if it is not overridden by + // the user. + homeDir, err := getExecUserHome(ctx, mns, uid) + if err != nil { + return nil, fmt.Errorf("error reading exec user: %v", err) + } + + return append(envv, "HOME="+homeDir), nil +} + +// MaybeAddExecUserHomeVFS2 returns a new slice with the HOME enviroment +// variable set if the slice does not already contain it, otherwise it returns +// the original slice unmodified. +func MaybeAddExecUserHomeVFS2(ctx context.Context, vmns *vfs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) { + // Check if the envv already contains HOME. + for _, env := range envv { + if strings.HasPrefix(env, "HOME=") { + // We have it. Return the original slice unmodified. + return envv, nil + } + } + + // Read /etc/passwd for the user's HOME directory and set the HOME + // environment variable as required by POSIX if it is not overridden by + // the user. + homeDir, err := getExecUserHomeVFS2(ctx, vmns, uid) + if err != nil { + return nil, fmt.Errorf("error reading exec user: %v", err) + } + return append(envv, "HOME="+homeDir), nil +} + +// findHomeInPasswd parses a passwd file and returns the given user's home +// directory. This function does it's best to replicate the runc's behavior. +func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) { + s := bufio.NewScanner(passwd) + + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // Pull out part of passwd entry. Loosely parse the passwd entry as some + // passwd files could be poorly written and for compatibility with runc. + // + // Per 'man 5 passwd' + // /etc/passwd contains one line for each user account, with seven + // fields delimited by colons (“:”). These fields are: + // + // - login name + // - optional encrypted password + // - numerical user ID + // - numerical group ID + // - user name or comment field + // - user home directory + // - optional user command interpreter + parts := strings.Split(line, ":") + + found := false + homeDir := "" + for i, p := range parts { + switch i { + case 2: + parsedUID, err := strconv.ParseUint(p, 10, 32) + if err == nil && parsedUID == uint64(uid) { + found = true + } + case 5: + homeDir = p + } + } + if found { + // NOTE: If the uid is present but the home directory is not + // present in the /etc/passwd entry we return an empty string. This + // is, for better or worse, what runc does. + return homeDir, nil + } + } + + return defaultHome, nil +} diff --git a/pkg/sentry/fs/user/user_test.go b/pkg/sentry/fs/user/user_test.go new file mode 100644 index 000000000..7d8e9ac7c --- /dev/null +++ b/pkg/sentry/fs/user/user_test.go @@ -0,0 +1,198 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package user + +import ( + "fmt" + "strings" + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" + "gvisor.dev/gvisor/pkg/usermem" +) + +// createEtcPasswd creates /etc/passwd with the given contents and mode. If +// mode is empty, then no file will be created. If mode is not a regular file +// mode, then contents is ignored. +func createEtcPasswd(ctx context.Context, root *fs.Dirent, contents string, mode linux.FileMode) error { + if err := root.CreateDirectory(ctx, root, "etc", fs.FilePermsFromMode(0755)); err != nil { + return err + } + etc, err := root.Walk(ctx, root, "etc") + if err != nil { + return err + } + defer etc.DecRef() + switch mode.FileType() { + case 0: + // Don't create anything. + return nil + case linux.S_IFREG: + passwd, err := etc.Create(ctx, root, "passwd", fs.FileFlags{Write: true}, fs.FilePermsFromMode(mode)) + if err != nil { + return err + } + defer passwd.DecRef() + if _, err := passwd.Writev(ctx, usermem.BytesIOSequence([]byte(contents))); err != nil { + return err + } + return nil + case linux.S_IFDIR: + return etc.CreateDirectory(ctx, root, "passwd", fs.FilePermsFromMode(mode)) + case linux.S_IFIFO: + return etc.CreateFifo(ctx, root, "passwd", fs.FilePermsFromMode(mode)) + default: + return fmt.Errorf("unknown file type %x", mode.FileType()) + } +} + +// TestGetExecUserHome tests the getExecUserHome function. +func TestGetExecUserHome(t *testing.T) { + tests := map[string]struct { + uid auth.KUID + passwdContents string + passwdMode linux.FileMode + expected string + }{ + "success": { + uid: 1000, + passwdContents: "adin::1000:1111::/home/adin:/bin/sh", + passwdMode: linux.S_IFREG | 0666, + expected: "/home/adin", + }, + "no_perms": { + uid: 1000, + passwdContents: "adin::1000:1111::/home/adin:/bin/sh", + passwdMode: linux.S_IFREG, + expected: "/", + }, + "no_passwd": { + uid: 1000, + expected: "/", + }, + "directory": { + uid: 1000, + passwdMode: linux.S_IFDIR | 0666, + expected: "/", + }, + // Currently we don't allow named pipes. + "named_pipe": { + uid: 1000, + passwdMode: linux.S_IFIFO | 0666, + expected: "/", + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + ctx := contexttest.Context(t) + msrc := fs.NewPseudoMountSource(ctx) + rootInode := tmpfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0777), msrc) + + mns, err := fs.NewMountNamespace(ctx, rootInode) + if err != nil { + t.Fatalf("NewMountNamespace failed: %v", err) + } + defer mns.DecRef() + root := mns.Root() + defer root.DecRef() + ctx = fs.WithRoot(ctx, root) + + if err := createEtcPasswd(ctx, root, tc.passwdContents, tc.passwdMode); err != nil { + t.Fatalf("createEtcPasswd failed: %v", err) + } + + got, err := getExecUserHome(ctx, mns, tc.uid) + if err != nil { + t.Fatalf("failed to get user home: %v", err) + } + + if got != tc.expected { + t.Fatalf("expected %v, got: %v", tc.expected, got) + } + }) + } +} + +// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing. +func TestFindHomeInPasswd(t *testing.T) { + tests := map[string]struct { + uid uint32 + passwd string + expected string + def string + }{ + "empty": { + uid: 1000, + passwd: "", + expected: "/", + def: "/", + }, + "whitespace": { + uid: 1000, + passwd: " ", + expected: "/", + def: "/", + }, + "full": { + uid: 1000, + passwd: "adin::1000:1111::/home/adin:/bin/sh", + expected: "/home/adin", + def: "/", + }, + // For better or worse, this is how runc works. + "partial": { + uid: 1000, + passwd: "adin::1000:1111:", + expected: "", + def: "/", + }, + "multiple": { + uid: 1001, + passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh", + expected: "/home/ian", + def: "/", + }, + "duplicate": { + uid: 1000, + passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh", + expected: "/home/adin", + def: "/", + }, + "empty_lines": { + uid: 1001, + passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh", + expected: "/home/ian", + def: "/", + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def) + if err != nil { + t.Fatalf("error parsing passwd: %v", err) + } + if tc.expected != got { + t.Fatalf("expected %v, got: %v", tc.expected, got) + } + }) + } +} diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 5451f1eba..72c2fe381 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -20,7 +20,6 @@ go_library( "loader_arm64.go", "network.go", "strace.go", - "user.go", "vfs.go", ], visibility = [ @@ -52,6 +51,7 @@ go_library( "//pkg/sentry/fs/sys", "//pkg/sentry/fs/tmpfs", "//pkg/sentry/fs/tty", + "//pkg/sentry/fs/user", "//pkg/sentry/fsimpl/devtmpfs", "//pkg/sentry/fsimpl/gofer", "//pkg/sentry/fsimpl/host", @@ -97,7 +97,6 @@ go_library( "//pkg/tcpip/transport/tcp", "//pkg/tcpip/transport/udp", "//pkg/urpc", - "//pkg/usermem", "//runsc/boot/filter", "//runsc/boot/platforms", "//runsc/boot/pprof", @@ -115,7 +114,6 @@ go_test( "compat_test.go", "fs_test.go", "loader_test.go", - "user_test.go", ], library = ":boot", deps = [ @@ -125,7 +123,6 @@ go_test( "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", "//pkg/sync", "//pkg/unet", "//runsc/fsgofer", diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index cf1f47bc7..096b0e9f0 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -35,6 +35,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/fs/user" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -550,11 +551,11 @@ func (l *Loader) run() error { // Add the HOME enviroment variable if it is not already set. var envv []string if kernel.VFS2Enabled { - envv, err = maybeAddExecUserHomeVFS2(ctx, l.rootProcArgs.MountNamespaceVFS2, + envv, err = user.MaybeAddExecUserHomeVFS2(ctx, l.rootProcArgs.MountNamespaceVFS2, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv) } else { - envv, err = maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, + envv, err = user.MaybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv) } if err != nil { @@ -860,7 +861,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { root := args.MountNamespace.Root() defer root.DecRef() ctx := fs.WithRoot(l.k.SupervisorContext(), root) - envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) + envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) if err != nil { return 0, err } diff --git a/runsc/boot/user.go b/runsc/boot/user.go deleted file mode 100644 index 332e4fce5..000000000 --- a/runsc/boot/user.go +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -import ( - "bufio" - "fmt" - "io" - "strconv" - "strings" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/usermem" -) - -type fileReader struct { - // Ctx is the context for the file reader. - Ctx context.Context - - // File is the file to read from. - File *fs.File -} - -// Read implements io.Reader.Read. -func (r *fileReader) Read(buf []byte) (int, error) { - n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf)) - return int(n), err -} - -// getExecUserHome returns the home directory of the executing user read from -// /etc/passwd as read from the container filesystem. -func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) { - // The default user home directory to return if no user matching the user - // if found in the /etc/passwd found in the image. - const defaultHome = "/" - - // Open the /etc/passwd file from the dirent via the root mount namespace. - mnsRoot := rootMns.Root() - maxTraversals := uint(linux.MaxSymlinkTraversals) - dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals) - if err != nil { - // NOTE: Ignore errors opening the passwd file. If the passwd file - // doesn't exist we will return the default home directory. - return defaultHome, nil - } - defer dirent.DecRef() - - // Check read permissions on the file. - if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil { - // NOTE: Ignore permissions errors here and return default root dir. - return defaultHome, nil - } - - // Only open regular files. We don't open other files like named pipes as - // they may block and might present some attack surface to the container. - // Note that runc does not seem to do this kind of checking. - if !fs.IsRegular(dirent.Inode.StableAttr) { - return defaultHome, nil - } - - f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false}) - if err != nil { - return "", err - } - defer f.DecRef() - - r := &fileReader{ - Ctx: ctx, - File: f, - } - - return findHomeInPasswd(uint32(uid), r, defaultHome) -} - -type fileReaderVFS2 struct { - ctx context.Context - fd *vfs.FileDescription -} - -func (r *fileReaderVFS2) Read(buf []byte) (int, error) { - n, err := r.fd.Read(r.ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) - return int(n), err -} - -func getExecUserHomeVFS2(ctx context.Context, mns *vfs.MountNamespace, uid auth.KUID) (string, error) { - const defaultHome = "/" - - root := mns.Root() - defer root.DecRef() - - creds := auth.CredentialsFromContext(ctx) - - target := &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse("/etc/passwd"), - } - - opts := &vfs.OpenOptions{ - Flags: linux.O_RDONLY, - } - - fd, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, target, opts) - if err != nil { - return defaultHome, nil - } - defer fd.DecRef() - - r := &fileReaderVFS2{ - ctx: ctx, - fd: fd, - } - - homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome) - if err != nil { - return "", err - } - - return homeDir, nil -} - -// maybeAddExecUserHome returns a new slice with the HOME enviroment variable -// set if the slice does not already contain it, otherwise it returns the -// original slice unmodified. -func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) { - // Check if the envv already contains HOME. - for _, env := range envv { - if strings.HasPrefix(env, "HOME=") { - // We have it. Return the original slice unmodified. - return envv, nil - } - } - - // Read /etc/passwd for the user's HOME directory and set the HOME - // environment variable as required by POSIX if it is not overridden by - // the user. - homeDir, err := getExecUserHome(ctx, mns, uid) - if err != nil { - return nil, fmt.Errorf("error reading exec user: %v", err) - } - - return append(envv, "HOME="+homeDir), nil -} - -func maybeAddExecUserHomeVFS2(ctx context.Context, vmns *vfs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) { - // Check if the envv already contains HOME. - for _, env := range envv { - if strings.HasPrefix(env, "HOME=") { - // We have it. Return the original slice unmodified. - return envv, nil - } - } - - // Read /etc/passwd for the user's HOME directory and set the HOME - // environment variable as required by POSIX if it is not overridden by - // the user. - homeDir, err := getExecUserHomeVFS2(ctx, vmns, uid) - if err != nil { - return nil, fmt.Errorf("error reading exec user: %v", err) - } - return append(envv, "HOME="+homeDir), nil -} - -// findHomeInPasswd parses a passwd file and returns the given user's home -// directory. This function does it's best to replicate the runc's behavior. -func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) { - s := bufio.NewScanner(passwd) - - for s.Scan() { - if err := s.Err(); err != nil { - return "", err - } - - line := strings.TrimSpace(s.Text()) - if line == "" { - continue - } - - // Pull out part of passwd entry. Loosely parse the passwd entry as some - // passwd files could be poorly written and for compatibility with runc. - // - // Per 'man 5 passwd' - // /etc/passwd contains one line for each user account, with seven - // fields delimited by colons (“:”). These fields are: - // - // - login name - // - optional encrypted password - // - numerical user ID - // - numerical group ID - // - user name or comment field - // - user home directory - // - optional user command interpreter - parts := strings.Split(line, ":") - - found := false - homeDir := "" - for i, p := range parts { - switch i { - case 2: - parsedUID, err := strconv.ParseUint(p, 10, 32) - if err == nil && parsedUID == uint64(uid) { - found = true - } - case 5: - homeDir = p - } - } - if found { - // NOTE: If the uid is present but the home directory is not - // present in the /etc/passwd entry we return an empty string. This - // is, for better or worse, what runc does. - return homeDir, nil - } - } - - return defaultHome, nil -} diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go deleted file mode 100644 index fb4e13dfb..000000000 --- a/runsc/boot/user_test.go +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package boot - -import ( - "io/ioutil" - "os" - "path/filepath" - "strings" - "syscall" - "testing" - - specs "github.com/opencontainers/runtime-spec/specs-go" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" -) - -func setupTempDir() (string, error) { - tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test") - if err != nil { - return "", err - } - return tmpDir, nil -} - -func setupPasswd(contents string, perms os.FileMode) func() (string, error) { - return func() (string, error) { - tmpDir, err := setupTempDir() - if err != nil { - return "", err - } - - if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { - return "", err - } - - f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd")) - if err != nil { - return "", err - } - defer f.Close() - - _, err = f.WriteString(contents) - if err != nil { - return "", err - } - - err = f.Chmod(perms) - if err != nil { - return "", err - } - return tmpDir, nil - } -} - -// TestGetExecUserHome tests the getExecUserHome function. -func TestGetExecUserHome(t *testing.T) { - tests := map[string]struct { - uid auth.KUID - createRoot func() (string, error) - expected string - }{ - "success": { - uid: 1000, - createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666), - expected: "/home/adin", - }, - "no_passwd": { - uid: 1000, - createRoot: setupTempDir, - expected: "/", - }, - "no_perms": { - uid: 1000, - createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000), - expected: "/", - }, - "directory": { - uid: 1000, - createRoot: func() (string, error) { - tmpDir, err := setupTempDir() - if err != nil { - return "", err - } - - if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { - return "", err - } - - if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { - return "", err - } - - return tmpDir, nil - }, - expected: "/", - }, - // Currently we don't allow named pipes. - "named_pipe": { - uid: 1000, - createRoot: func() (string, error) { - tmpDir, err := setupTempDir() - if err != nil { - return "", err - } - - if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil { - return "", err - } - - if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil { - return "", err - } - - return tmpDir, nil - }, - expected: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - tmpDir, err := tc.createRoot() - if err != nil { - t.Fatalf("failed to create root dir: %v", err) - } - - sandEnd, cleanup, err := startGofer(tmpDir) - if err != nil { - t.Fatalf("failed to create gofer: %v", err) - } - defer cleanup() - - ctx := contexttest.Context(t) - conf := &Config{ - RootDir: "unused_root_dir", - Network: NetworkNone, - DisableSeccomp: true, - } - - spec := &specs.Spec{ - Root: &specs.Root{ - Path: tmpDir, - Readonly: true, - }, - // Add /proc mount as tmpfs to avoid needing a kernel. - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "tmpfs", - }, - }, - } - - mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{}) - mns, err := mntr.createMountNamespace(ctx, conf) - if err != nil { - t.Fatalf("failed to create mount namespace: %v", err) - } - ctx = fs.WithRoot(ctx, mns.Root()) - if err := mntr.mountSubmounts(ctx, conf, mns); err != nil { - t.Fatalf("failed to create mount namespace: %v", err) - } - - got, err := getExecUserHome(ctx, mns, tc.uid) - if err != nil { - t.Fatalf("failed to get user home: %v", err) - } - - if got != tc.expected { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} - -// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing. -func TestFindHomeInPasswd(t *testing.T) { - tests := map[string]struct { - uid uint32 - passwd string - expected string - def string - }{ - "empty": { - uid: 1000, - passwd: "", - expected: "/", - def: "/", - }, - "whitespace": { - uid: 1000, - passwd: " ", - expected: "/", - def: "/", - }, - "full": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh", - expected: "/home/adin", - def: "/", - }, - // For better or worse, this is how runc works. - "partial": { - uid: 1000, - passwd: "adin::1000:1111:", - expected: "", - def: "/", - }, - "multiple": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - "duplicate": { - uid: 1000, - passwd: "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh", - expected: "/home/adin", - def: "/", - }, - "empty_lines": { - uid: 1001, - passwd: "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh", - expected: "/home/ian", - def: "/", - }, - } - - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def) - if err != nil { - t.Fatalf("error parsing passwd: %v", err) - } - if tc.expected != got { - t.Fatalf("expected %v, got: %v", tc.expected, got) - } - }) - } -} -- cgit v1.2.3 From 696feaf10c9339a57d177a913e847ddb488ece69 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Thu, 23 Apr 2020 17:32:59 -0700 Subject: Port devpts to VFS2. PiperOrigin-RevId: 308164359 --- pkg/sentry/fs/tty/line_discipline.go | 4 + pkg/sentry/fs/tty/master.go | 4 + pkg/sentry/fs/tty/queue.go | 4 + pkg/sentry/fs/tty/slave.go | 4 + pkg/sentry/fs/tty/terminal.go | 4 + pkg/sentry/fsimpl/devpts/BUILD | 43 +++ pkg/sentry/fsimpl/devpts/devpts.go | 207 +++++++++++++ pkg/sentry/fsimpl/devpts/devpts_test.go | 56 ++++ pkg/sentry/fsimpl/devpts/line_discipline.go | 449 ++++++++++++++++++++++++++++ pkg/sentry/fsimpl/devpts/master.go | 226 ++++++++++++++ pkg/sentry/fsimpl/devpts/queue.go | 240 +++++++++++++++ pkg/sentry/fsimpl/devpts/slave.go | 186 ++++++++++++ pkg/sentry/fsimpl/devpts/terminal.go | 124 ++++++++ pkg/sentry/fsimpl/devtmpfs/devtmpfs.go | 13 +- pkg/sentry/fsimpl/kernfs/filesystem.go | 2 +- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 9 +- 16 files changed, 1570 insertions(+), 5 deletions(-) create mode 100644 pkg/sentry/fsimpl/devpts/BUILD create mode 100644 pkg/sentry/fsimpl/devpts/devpts.go create mode 100644 pkg/sentry/fsimpl/devpts/devpts_test.go create mode 100644 pkg/sentry/fsimpl/devpts/line_discipline.go create mode 100644 pkg/sentry/fsimpl/devpts/master.go create mode 100644 pkg/sentry/fsimpl/devpts/queue.go create mode 100644 pkg/sentry/fsimpl/devpts/slave.go create mode 100644 pkg/sentry/fsimpl/devpts/terminal.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/tty/line_discipline.go b/pkg/sentry/fs/tty/line_discipline.go index 12b1c6097..2e9dd2d55 100644 --- a/pkg/sentry/fs/tty/line_discipline.go +++ b/pkg/sentry/fs/tty/line_discipline.go @@ -27,6 +27,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + const ( // canonMaxBytes is the number of bytes that fit into a single line of // terminal input in canonical mode. This corresponds to N_TTY_BUF_SIZE @@ -443,3 +445,5 @@ func (l *lineDiscipline) peek(b []byte) int { } return size } + +// LINT.ThenChange(../../fsimpl/devpts/line_discipline.go) diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index f62da49bd..fe07fa929 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -26,6 +26,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + // masterInodeOperations are the fs.InodeOperations for the master end of the // Terminal (ptmx file). // @@ -232,3 +234,5 @@ func maybeEmitUnimplementedEvent(ctx context.Context, cmd uint32) { unimpl.EmitUnimplementedEvent(ctx) } } + +// LINT.ThenChange(../../fsimpl/devpts/master.go) diff --git a/pkg/sentry/fs/tty/queue.go b/pkg/sentry/fs/tty/queue.go index 1ca79c0b2..ceabb9b1e 100644 --- a/pkg/sentry/fs/tty/queue.go +++ b/pkg/sentry/fs/tty/queue.go @@ -25,6 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + // waitBufMaxBytes is the maximum size of a wait buffer. It is based on // TTYB_DEFAULT_MEM_LIMIT. const waitBufMaxBytes = 131072 @@ -234,3 +236,5 @@ func (q *queue) waitBufAppend(b []byte) { q.waitBuf = append(q.waitBuf, b) q.waitBufLen += uint64(len(b)) } + +// LINT.ThenChange(../../fsimpl/devpts/queue.go) diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index 6a2dbc576..9871f6fc6 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -25,6 +25,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + // slaveInodeOperations are the fs.InodeOperations for the slave end of the // Terminal (pts file). // @@ -172,3 +174,5 @@ func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem return 0, syserror.ENOTTY } } + +// LINT.ThenChange(../../fsimpl/devpts/slave.go) diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go index 5883f26db..ddcccf4da 100644 --- a/pkg/sentry/fs/tty/terminal.go +++ b/pkg/sentry/fs/tty/terminal.go @@ -23,6 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) +// LINT.IfChange + // Terminal is a pseudoterminal. // // +stateify savable @@ -126,3 +128,5 @@ func (tm *Terminal) tty(isMaster bool) *kernel.TTY { } return tm.slaveKTTY } + +// LINT.ThenChange(../../fsimpl/devpts/terminal.go) diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD new file mode 100644 index 000000000..585764223 --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -0,0 +1,43 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +licenses(["notice"]) + +go_library( + name = "devpts", + srcs = [ + "devpts.go", + "line_discipline.go", + "master.go", + "queue.go", + "slave.go", + "terminal.go", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi/linux", + "//pkg/context", + "//pkg/safemem", + "//pkg/sentry/arch", + "//pkg/sentry/fsimpl/kernfs", + "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/unimpl", + "//pkg/sentry/vfs", + "//pkg/sync", + "//pkg/syserror", + "//pkg/usermem", + "//pkg/waiter", + ], +) + +go_test( + name = "devpts_test", + size = "small", + srcs = ["devpts_test.go"], + library = ":devpts", + deps = [ + "//pkg/abi/linux", + "//pkg/sentry/contexttest", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go new file mode 100644 index 000000000..07a69b940 --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -0,0 +1,207 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package devpts provides a filesystem implementation that behaves like +// devpts. +package devpts + +import ( + "fmt" + "math" + "sort" + "strconv" + "sync" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Name is the filesystem name. +const Name = "devpts" + +// FilesystemType implements vfs.FilesystemType. +type FilesystemType struct{} + +// Name implements vfs.FilesystemType.Name. +func (FilesystemType) Name() string { + return Name +} + +var _ vfs.FilesystemType = (*FilesystemType)(nil) + +// GetFilesystem implements vfs.FilesystemType.GetFilesystem. +func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { + // No data allowed. + if opts.Data != "" { + return nil, nil, syserror.EINVAL + } + + fs, root := fstype.newFilesystem(vfsObj, creds) + return fs.VFSFilesystem(), root.VFSDentry(), nil +} + +// newFilesystem creates a new devpts filesystem with root directory and ptmx +// master inode. It returns the filesystem and root Dentry. +func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) (*kernfs.Filesystem, *kernfs.Dentry) { + fs := &kernfs.Filesystem{} + fs.Init(vfsObj, fstype) + + // Construct the root directory. This is always inode id 1. + root := &rootInode{ + slaves: make(map[uint32]*slaveInode), + } + root.InodeAttrs.Init(creds, 1, linux.ModeDirectory|0555) + root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) + root.dentry.Init(root) + + // Construct the pts master inode and dentry. Linux always uses inode + // id 2 for ptmx. See fs/devpts/inode.c:mknod_ptmx. + master := &masterInode{ + root: root, + } + master.InodeAttrs.Init(creds, 2, linux.ModeCharacterDevice|0666) + master.dentry.Init(master) + + // Add the master as a child of the root. + links := root.OrderedChildren.Populate(&root.dentry, map[string]*kernfs.Dentry{ + "ptmx": &master.dentry, + }) + root.IncLinks(links) + + return fs, &root.dentry +} + +// rootInode is the root directory inode for the devpts mounts. +type rootInode struct { + kernfs.AlwaysValid + kernfs.InodeAttrs + kernfs.InodeDirectoryNoNewChildren + kernfs.InodeNotSymlink + kernfs.OrderedChildren + + // Keep a reference to this inode's dentry. + dentry kernfs.Dentry + + // master is the master pty inode. Immutable. + master *masterInode + + // root is the root directory inode for this filesystem. Immutable. + root *rootInode + + // mu protects the fields below. + mu sync.Mutex + + // slaves maps pty ids to slave inodes. + slaves map[uint32]*slaveInode + + // nextIdx is the next pty index to use. Must be accessed atomically. + // + // TODO(b/29356795): reuse indices when ptys are closed. + nextIdx uint32 +} + +var _ kernfs.Inode = (*rootInode)(nil) + +// allocateTerminal creates a new Terminal and installs a pts node for it. +func (i *rootInode) allocateTerminal(creds *auth.Credentials) (*Terminal, error) { + i.mu.Lock() + defer i.mu.Unlock() + if i.nextIdx == math.MaxUint32 { + return nil, syserror.ENOMEM + } + idx := i.nextIdx + i.nextIdx++ + + // Sanity check that slave with idx does not exist. + if _, ok := i.slaves[idx]; ok { + panic(fmt.Sprintf("pty index collision; index %d already exists", idx)) + } + + // Create the new terminal and slave. + t := newTerminal(idx) + slave := &slaveInode{ + root: i, + t: t, + } + // Linux always uses pty index + 3 as the inode id. See + // fs/devpts/inode.c:devpts_pty_new(). + slave.InodeAttrs.Init(creds, uint64(idx+3), linux.ModeCharacterDevice|0600) + slave.dentry.Init(slave) + i.slaves[idx] = slave + + return t, nil +} + +// masterClose is called when the master end of t is closed. +func (i *rootInode) masterClose(t *Terminal) { + i.mu.Lock() + defer i.mu.Unlock() + + // Sanity check that slave with idx exists. + if _, ok := i.slaves[t.n]; !ok { + panic(fmt.Sprintf("pty with index %d does not exist", t.n)) + } + delete(i.slaves, t.n) +} + +// Open implements kernfs.Inode.Open. +func (i *rootInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + fd := &kernfs.GenericDirectoryFD{} + fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + return fd.VFSFileDescription(), nil +} + +// Lookup implements kernfs.Inode.Lookup. +func (i *rootInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { + idx, err := strconv.ParseUint(name, 10, 32) + if err != nil { + return nil, syserror.ENOENT + } + i.mu.Lock() + defer i.mu.Unlock() + if si, ok := i.slaves[uint32(idx)]; ok { + si.dentry.IncRef() + return si.dentry.VFSDentry(), nil + + } + return nil, syserror.ENOENT +} + +// IterDirents implements kernfs.Inode.IterDirents. +func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { + i.mu.Lock() + defer i.mu.Unlock() + ids := make([]int, 0, len(i.slaves)) + for id := range i.slaves { + ids = append(ids, int(id)) + } + sort.Ints(ids) + for _, id := range ids[relOffset:] { + dirent := vfs.Dirent{ + Name: strconv.FormatUint(uint64(id), 10), + Type: linux.DT_CHR, + Ino: i.slaves[uint32(id)].InodeAttrs.Ino(), + NextOff: offset + 1, + } + if err := cb.Handle(dirent); err != nil { + return offset, err + } + offset++ + } + return offset, nil +} diff --git a/pkg/sentry/fsimpl/devpts/devpts_test.go b/pkg/sentry/fsimpl/devpts/devpts_test.go new file mode 100644 index 000000000..b7c149047 --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/devpts_test.go @@ -0,0 +1,56 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package devpts + +import ( + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/contexttest" + "gvisor.dev/gvisor/pkg/usermem" +) + +func TestSimpleMasterToSlave(t *testing.T) { + ld := newLineDiscipline(linux.DefaultSlaveTermios) + ctx := contexttest.Context(t) + inBytes := []byte("hello, tty\n") + src := usermem.BytesIOSequence(inBytes) + outBytes := make([]byte, 32) + dst := usermem.BytesIOSequence(outBytes) + + // Write to the input queue. + nw, err := ld.inputQueueWrite(ctx, src) + if err != nil { + t.Fatalf("error writing to input queue: %v", err) + } + if nw != int64(len(inBytes)) { + t.Fatalf("wrote wrong length: got %d, want %d", nw, len(inBytes)) + } + + // Read from the input queue. + nr, err := ld.inputQueueRead(ctx, dst) + if err != nil { + t.Fatalf("error reading from input queue: %v", err) + } + if nr != int64(len(inBytes)) { + t.Fatalf("read wrong length: got %d, want %d", nr, len(inBytes)) + } + + outStr := string(outBytes[:nr]) + inStr := string(inBytes) + if outStr != inStr { + t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr) + } +} diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go new file mode 100644 index 000000000..e201801d6 --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/line_discipline.go @@ -0,0 +1,449 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package devpts + +import ( + "bytes" + "unicode/utf8" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +// LINT.IfChange + +const ( + // canonMaxBytes is the number of bytes that fit into a single line of + // terminal input in canonical mode. This corresponds to N_TTY_BUF_SIZE + // in include/linux/tty.h. + canonMaxBytes = 4096 + + // nonCanonMaxBytes is the maximum number of bytes that can be read at + // a time in noncanonical mode. + nonCanonMaxBytes = canonMaxBytes - 1 + + spacesPerTab = 8 +) + +// lineDiscipline dictates how input and output are handled between the +// pseudoterminal (pty) master and slave. It can be configured to alter I/O, +// modify control characters (e.g. Ctrl-C for SIGINT), etc. The following man +// pages are good resources for how to affect the line discipline: +// +// * termios(3) +// * tty_ioctl(4) +// +// This file corresponds most closely to drivers/tty/n_tty.c. +// +// lineDiscipline has a simple structure but supports a multitude of options +// (see the above man pages). It consists of two queues of bytes: one from the +// terminal master to slave (the input queue) and one from slave to master (the +// output queue). When bytes are written to one end of the pty, the line +// discipline reads the bytes, modifies them or takes special action if +// required, and enqueues them to be read by the other end of the pty: +// +// input from terminal +-------------+ input to process (e.g. bash) +// +------------------------>| input queue |---------------------------+ +// | (inputQueueWrite) +-------------+ (inputQueueRead) | +// | | +// | v +// masterFD slaveFD +// ^ | +// | | +// | output to terminal +--------------+ output from process | +// +------------------------| output queue |<--------------------------+ +// (outputQueueRead) +--------------+ (outputQueueWrite) +// +// Lock order: +// termiosMu +// inQueue.mu +// outQueue.mu +// +// +stateify savable +type lineDiscipline struct { + // sizeMu protects size. + sizeMu sync.Mutex `state:"nosave"` + + // size is the terminal size (width and height). + size linux.WindowSize + + // inQueue is the input queue of the terminal. + inQueue queue + + // outQueue is the output queue of the terminal. + outQueue queue + + // termiosMu protects termios. + termiosMu sync.RWMutex `state:"nosave"` + + // termios is the terminal configuration used by the lineDiscipline. + termios linux.KernelTermios + + // column is the location in a row of the cursor. This is important for + // handling certain special characters like backspace. + column int + + // masterWaiter is used to wait on the master end of the TTY. + masterWaiter waiter.Queue `state:"zerovalue"` + + // slaveWaiter is used to wait on the slave end of the TTY. + slaveWaiter waiter.Queue `state:"zerovalue"` +} + +func newLineDiscipline(termios linux.KernelTermios) *lineDiscipline { + ld := lineDiscipline{termios: termios} + ld.inQueue.transformer = &inputQueueTransformer{} + ld.outQueue.transformer = &outputQueueTransformer{} + return &ld +} + +// getTermios gets the linux.Termios for the tty. +func (l *lineDiscipline) getTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + l.termiosMu.RLock() + defer l.termiosMu.RUnlock() + // We must copy a Termios struct, not KernelTermios. + t := l.termios.ToTermios() + _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), t, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err +} + +// setTermios sets a linux.Termios for the tty. +func (l *lineDiscipline) setTermios(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + l.termiosMu.Lock() + defer l.termiosMu.Unlock() + oldCanonEnabled := l.termios.LEnabled(linux.ICANON) + // We must copy a Termios struct, not KernelTermios. + var t linux.Termios + _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &t, usermem.IOOpts{ + AddressSpaceActive: true, + }) + l.termios.FromTermios(t) + + // If canonical mode is turned off, move bytes from inQueue's wait + // buffer to its read buffer. Anything already in the read buffer is + // now readable. + if oldCanonEnabled && !l.termios.LEnabled(linux.ICANON) { + l.inQueue.mu.Lock() + l.inQueue.pushWaitBufLocked(l) + l.inQueue.readable = true + l.inQueue.mu.Unlock() + l.slaveWaiter.Notify(waiter.EventIn) + } + + return 0, err +} + +func (l *lineDiscipline) windowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { + l.sizeMu.Lock() + defer l.sizeMu.Unlock() + _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), l.size, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return err +} + +func (l *lineDiscipline) setWindowSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { + l.sizeMu.Lock() + defer l.sizeMu.Unlock() + _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &l.size, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return err +} + +func (l *lineDiscipline) masterReadiness() waiter.EventMask { + // We don't have to lock a termios because the default master termios + // is immutable. + return l.inQueue.writeReadiness(&linux.MasterTermios) | l.outQueue.readReadiness(&linux.MasterTermios) +} + +func (l *lineDiscipline) slaveReadiness() waiter.EventMask { + l.termiosMu.RLock() + defer l.termiosMu.RUnlock() + return l.outQueue.writeReadiness(&l.termios) | l.inQueue.readReadiness(&l.termios) +} + +func (l *lineDiscipline) inputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { + return l.inQueue.readableSize(ctx, io, args) +} + +func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { + l.termiosMu.RLock() + defer l.termiosMu.RUnlock() + n, pushed, err := l.inQueue.read(ctx, dst, l) + if err != nil { + return 0, err + } + if n > 0 { + l.masterWaiter.Notify(waiter.EventOut) + if pushed { + l.slaveWaiter.Notify(waiter.EventIn) + } + return n, nil + } + return 0, syserror.ErrWouldBlock +} + +func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) { + l.termiosMu.RLock() + defer l.termiosMu.RUnlock() + n, err := l.inQueue.write(ctx, src, l) + if err != nil { + return 0, err + } + if n > 0 { + l.slaveWaiter.Notify(waiter.EventIn) + return n, nil + } + return 0, syserror.ErrWouldBlock +} + +func (l *lineDiscipline) outputQueueReadSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { + return l.outQueue.readableSize(ctx, io, args) +} + +func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) { + l.termiosMu.RLock() + defer l.termiosMu.RUnlock() + n, pushed, err := l.outQueue.read(ctx, dst, l) + if err != nil { + return 0, err + } + if n > 0 { + l.slaveWaiter.Notify(waiter.EventOut) + if pushed { + l.masterWaiter.Notify(waiter.EventIn) + } + return n, nil + } + return 0, syserror.ErrWouldBlock +} + +func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) { + l.termiosMu.RLock() + defer l.termiosMu.RUnlock() + n, err := l.outQueue.write(ctx, src, l) + if err != nil { + return 0, err + } + if n > 0 { + l.masterWaiter.Notify(waiter.EventIn) + return n, nil + } + return 0, syserror.ErrWouldBlock +} + +// transformer is a helper interface to make it easier to stateify queue. +type transformer interface { + // transform functions require queue's mutex to be held. + transform(*lineDiscipline, *queue, []byte) int +} + +// outputQueueTransformer implements transformer. It performs line discipline +// transformations on the output queue. +// +// +stateify savable +type outputQueueTransformer struct{} + +// transform does output processing for one end of the pty. See +// drivers/tty/n_tty.c:do_output_char for an analogous kernel function. +// +// Preconditions: +// * l.termiosMu must be held for reading. +// * q.mu must be held. +func (*outputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) int { + // transformOutput is effectively always in noncanonical mode, as the + // master termios never has ICANON set. + + if !l.termios.OEnabled(linux.OPOST) { + q.readBuf = append(q.readBuf, buf...) + if len(q.readBuf) > 0 { + q.readable = true + } + return len(buf) + } + + var ret int + for len(buf) > 0 { + size := l.peek(buf) + cBytes := append([]byte{}, buf[:size]...) + ret += size + buf = buf[size:] + // We're guaranteed that cBytes has at least one element. + switch cBytes[0] { + case '\n': + if l.termios.OEnabled(linux.ONLRET) { + l.column = 0 + } + if l.termios.OEnabled(linux.ONLCR) { + q.readBuf = append(q.readBuf, '\r', '\n') + continue + } + case '\r': + if l.termios.OEnabled(linux.ONOCR) && l.column == 0 { + continue + } + if l.termios.OEnabled(linux.OCRNL) { + cBytes[0] = '\n' + if l.termios.OEnabled(linux.ONLRET) { + l.column = 0 + } + break + } + l.column = 0 + case '\t': + spaces := spacesPerTab - l.column%spacesPerTab + if l.termios.OutputFlags&linux.TABDLY == linux.XTABS { + l.column += spaces + q.readBuf = append(q.readBuf, bytes.Repeat([]byte{' '}, spacesPerTab)...) + continue + } + l.column += spaces + case '\b': + if l.column > 0 { + l.column-- + } + default: + l.column++ + } + q.readBuf = append(q.readBuf, cBytes...) + } + if len(q.readBuf) > 0 { + q.readable = true + } + return ret +} + +// inputQueueTransformer implements transformer. It performs line discipline +// transformations on the input queue. +// +// +stateify savable +type inputQueueTransformer struct{} + +// transform does input processing for one end of the pty. Characters read are +// transformed according to flags set in the termios struct. See +// drivers/tty/n_tty.c:n_tty_receive_char_special for an analogous kernel +// function. +// +// Preconditions: +// * l.termiosMu must be held for reading. +// * q.mu must be held. +func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) int { + // If there's a line waiting to be read in canonical mode, don't write + // anything else to the read buffer. + if l.termios.LEnabled(linux.ICANON) && q.readable { + return 0 + } + + maxBytes := nonCanonMaxBytes + if l.termios.LEnabled(linux.ICANON) { + maxBytes = canonMaxBytes + } + + var ret int + for len(buf) > 0 && len(q.readBuf) < canonMaxBytes { + size := l.peek(buf) + cBytes := append([]byte{}, buf[:size]...) + // We're guaranteed that cBytes has at least one element. + switch cBytes[0] { + case '\r': + if l.termios.IEnabled(linux.IGNCR) { + buf = buf[size:] + ret += size + continue + } + if l.termios.IEnabled(linux.ICRNL) { + cBytes[0] = '\n' + } + case '\n': + if l.termios.IEnabled(linux.INLCR) { + cBytes[0] = '\r' + } + } + + // In canonical mode, we discard non-terminating characters + // after the first 4095. + if l.shouldDiscard(q, cBytes) { + buf = buf[size:] + ret += size + continue + } + + // Stop if the buffer would be overfilled. + if len(q.readBuf)+size > maxBytes { + break + } + buf = buf[size:] + ret += size + + // If we get EOF, make the buffer available for reading. + if l.termios.LEnabled(linux.ICANON) && l.termios.IsEOF(cBytes[0]) { + q.readable = true + break + } + + q.readBuf = append(q.readBuf, cBytes...) + + // Anything written to the readBuf will have to be echoed. + if l.termios.LEnabled(linux.ECHO) { + l.outQueue.writeBytes(cBytes, l) + l.masterWaiter.Notify(waiter.EventIn) + } + + // If we finish a line, make it available for reading. + if l.termios.LEnabled(linux.ICANON) && l.termios.IsTerminating(cBytes) { + q.readable = true + break + } + } + + // In noncanonical mode, everything is readable. + if !l.termios.LEnabled(linux.ICANON) && len(q.readBuf) > 0 { + q.readable = true + } + + return ret +} + +// shouldDiscard returns whether c should be discarded. In canonical mode, if +// too many bytes are enqueued, we keep reading input and discarding it until +// we find a terminating character. Signal/echo processing still occurs. +// +// Precondition: +// * l.termiosMu must be held for reading. +// * q.mu must be held. +func (l *lineDiscipline) shouldDiscard(q *queue, cBytes []byte) bool { + return l.termios.LEnabled(linux.ICANON) && len(q.readBuf)+len(cBytes) >= canonMaxBytes && !l.termios.IsTerminating(cBytes) +} + +// peek returns the size in bytes of the next character to process. As long as +// b isn't empty, peek returns a value of at least 1. +func (l *lineDiscipline) peek(b []byte) int { + size := 1 + // If UTF-8 support is enabled, runes might be multiple bytes. + if l.termios.IEnabled(linux.IUTF8) { + _, size = utf8.DecodeRune(b) + } + return size +} + +// LINT.ThenChange(../../fs/tty/line_discipline.go) diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go new file mode 100644 index 000000000..60340c28e --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -0,0 +1,226 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package devpts + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/unimpl" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +// LINT.IfChange + +// masterInode is the inode for the master end of the Terminal. +type masterInode struct { + kernfs.InodeAttrs + kernfs.InodeNoopRefCount + kernfs.InodeNotDirectory + kernfs.InodeNotSymlink + + // Keep a reference to this inode's dentry. + dentry kernfs.Dentry + + // root is the devpts root inode. + root *rootInode +} + +var _ kernfs.Inode = (*masterInode)(nil) + +// Open implements kernfs.Inode.Open. +func (mi *masterInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + t, err := mi.root.allocateTerminal(rp.Credentials()) + if err != nil { + return nil, err + } + + mi.IncRef() + fd := &masterFileDescription{ + inode: mi, + t: t, + } + if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { + mi.DecRef() + return nil, err + } + return &fd.vfsfd, nil +} + +// Stat implements kernfs.Inode.Stat. +func (mi *masterInode) Stat(vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + statx, err := mi.InodeAttrs.Stat(vfsfs, opts) + if err != nil { + return linux.Statx{}, err + } + statx.Blksize = 1024 + statx.RdevMajor = linux.TTYAUX_MAJOR + statx.RdevMinor = linux.PTMX_MINOR + return statx, nil +} + +// SetStat implements kernfs.Inode.SetStat +func (mi *masterInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { + if opts.Stat.Mask&linux.STATX_SIZE != 0 { + return syserror.EINVAL + } + return mi.InodeAttrs.SetStat(ctx, vfsfs, creds, opts) +} + +type masterFileDescription struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + + inode *masterInode + t *Terminal +} + +var _ vfs.FileDescriptionImpl = (*masterFileDescription)(nil) + +// Release implements vfs.FileDescriptionImpl.Release. +func (mfd *masterFileDescription) Release() { + mfd.inode.root.masterClose(mfd.t) + mfd.inode.DecRef() +} + +// EventRegister implements waiter.Waitable.EventRegister. +func (mfd *masterFileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + mfd.t.ld.masterWaiter.EventRegister(e, mask) +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (mfd *masterFileDescription) EventUnregister(e *waiter.Entry) { + mfd.t.ld.masterWaiter.EventUnregister(e) +} + +// Readiness implements waiter.Waitable.Readiness. +func (mfd *masterFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { + return mfd.t.ld.masterReadiness() +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (mfd *masterFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) { + return mfd.t.ld.outputQueueRead(ctx, dst) +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (mfd *masterFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) { + return mfd.t.ld.inputQueueWrite(ctx, src) +} + +// Ioctl implements vfs.FileDescriptionImpl.Ioctl. +func (mfd *masterFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + switch cmd := args[1].Uint(); cmd { + case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ + // Get the number of bytes in the output queue read buffer. + return 0, mfd.t.ld.outputQueueReadSize(ctx, io, args) + case linux.TCGETS: + // N.B. TCGETS on the master actually returns the configuration + // of the slave end. + return mfd.t.ld.getTermios(ctx, io, args) + case linux.TCSETS: + // N.B. TCSETS on the master actually affects the configuration + // of the slave end. + return mfd.t.ld.setTermios(ctx, io, args) + case linux.TCSETSW: + // TODO(b/29356795): This should drain the output queue first. + return mfd.t.ld.setTermios(ctx, io, args) + case linux.TIOCGPTN: + _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(mfd.t.n), usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + case linux.TIOCSPTLCK: + // TODO(b/29356795): Implement pty locking. For now just pretend we do. + return 0, nil + case linux.TIOCGWINSZ: + return 0, mfd.t.ld.windowSize(ctx, io, args) + case linux.TIOCSWINSZ: + return 0, mfd.t.ld.setWindowSize(ctx, io, args) + case linux.TIOCSCTTY: + // Make the given terminal the controlling terminal of the + // calling process. + return 0, mfd.t.setControllingTTY(ctx, io, args, true /* isMaster */) + case linux.TIOCNOTTY: + // Release this process's controlling terminal. + return 0, mfd.t.releaseControllingTTY(ctx, io, args, true /* isMaster */) + case linux.TIOCGPGRP: + // Get the foreground process group. + return mfd.t.foregroundProcessGroup(ctx, io, args, true /* isMaster */) + case linux.TIOCSPGRP: + // Set the foreground process group. + return mfd.t.setForegroundProcessGroup(ctx, io, args, true /* isMaster */) + default: + maybeEmitUnimplementedEvent(ctx, cmd) + return 0, syserror.ENOTTY + } +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (mfd *masterFileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + creds := auth.CredentialsFromContext(ctx) + fs := mfd.vfsfd.VirtualDentry().Mount().Filesystem() + return mfd.inode.SetStat(ctx, fs, creds, opts) +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (mfd *masterFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + fs := mfd.vfsfd.VirtualDentry().Mount().Filesystem() + return mfd.inode.Stat(fs, opts) +} + +// maybeEmitUnimplementedEvent emits unimplemented event if cmd is valid. +func maybeEmitUnimplementedEvent(ctx context.Context, cmd uint32) { + switch cmd { + case linux.TCGETS, + linux.TCSETS, + linux.TCSETSW, + linux.TCSETSF, + linux.TIOCGWINSZ, + linux.TIOCSWINSZ, + linux.TIOCSETD, + linux.TIOCSBRK, + linux.TIOCCBRK, + linux.TCSBRK, + linux.TCSBRKP, + linux.TIOCSTI, + linux.TIOCCONS, + linux.FIONBIO, + linux.TIOCEXCL, + linux.TIOCNXCL, + linux.TIOCGEXCL, + linux.TIOCGSID, + linux.TIOCGETD, + linux.TIOCVHANGUP, + linux.TIOCGDEV, + linux.TIOCMGET, + linux.TIOCMSET, + linux.TIOCMBIC, + linux.TIOCMBIS, + linux.TIOCGICOUNT, + linux.TCFLSH, + linux.TIOCSSERIAL, + linux.TIOCGPTPEER: + + unimpl.EmitUnimplementedEvent(ctx) + } +} + +// LINT.ThenChange(../../fs/tty/master.go) diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go new file mode 100644 index 000000000..29a6be858 --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/queue.go @@ -0,0 +1,240 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package devpts + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +// LINT.IfChange + +// waitBufMaxBytes is the maximum size of a wait buffer. It is based on +// TTYB_DEFAULT_MEM_LIMIT. +const waitBufMaxBytes = 131072 + +// queue represents one of the input or output queues between a pty master and +// slave. Bytes written to a queue are added to the read buffer until it is +// full, at which point they are written to the wait buffer. Bytes are +// processed (i.e. undergo termios transformations) as they are added to the +// read buffer. The read buffer is readable when its length is nonzero and +// readable is true. +// +// +stateify savable +type queue struct { + // mu protects everything in queue. + mu sync.Mutex `state:"nosave"` + + // readBuf is buffer of data ready to be read when readable is true. + // This data has been processed. + readBuf []byte + + // waitBuf contains data that can't fit into readBuf. It is put here + // until it can be loaded into the read buffer. waitBuf contains data + // that hasn't been processed. + waitBuf [][]byte + waitBufLen uint64 + + // readable indicates whether the read buffer can be read from. In + // canonical mode, there can be an unterminated line in the read buffer, + // so readable must be checked. + readable bool + + // transform is the the queue's function for transforming bytes + // entering the queue. For example, transform might convert all '\r's + // entering the queue to '\n's. + transformer +} + +// readReadiness returns whether q is ready to be read from. +func (q *queue) readReadiness(t *linux.KernelTermios) waiter.EventMask { + q.mu.Lock() + defer q.mu.Unlock() + if len(q.readBuf) > 0 && q.readable { + return waiter.EventIn + } + return waiter.EventMask(0) +} + +// writeReadiness returns whether q is ready to be written to. +func (q *queue) writeReadiness(t *linux.KernelTermios) waiter.EventMask { + q.mu.Lock() + defer q.mu.Unlock() + if q.waitBufLen < waitBufMaxBytes { + return waiter.EventOut + } + return waiter.EventMask(0) +} + +// readableSize writes the number of readable bytes to userspace. +func (q *queue) readableSize(ctx context.Context, io usermem.IO, args arch.SyscallArguments) error { + q.mu.Lock() + defer q.mu.Unlock() + var size int32 + if q.readable { + size = int32(len(q.readBuf)) + } + + _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), size, usermem.IOOpts{ + AddressSpaceActive: true, + }) + return err + +} + +// read reads from q to userspace. It returns the number of bytes read as well +// as whether the read caused more readable data to become available (whether +// data was pushed from the wait buffer to the read buffer). +// +// Preconditions: +// * l.termiosMu must be held for reading. +func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipline) (int64, bool, error) { + q.mu.Lock() + defer q.mu.Unlock() + + if !q.readable { + return 0, false, syserror.ErrWouldBlock + } + + if dst.NumBytes() > canonMaxBytes { + dst = dst.TakeFirst(canonMaxBytes) + } + + n, err := dst.CopyOutFrom(ctx, safemem.ReaderFunc(func(dst safemem.BlockSeq) (uint64, error) { + src := safemem.BlockSeqOf(safemem.BlockFromSafeSlice(q.readBuf)) + n, err := safemem.CopySeq(dst, src) + if err != nil { + return 0, err + } + q.readBuf = q.readBuf[n:] + + // If we read everything, this queue is no longer readable. + if len(q.readBuf) == 0 { + q.readable = false + } + + return n, nil + })) + if err != nil { + return 0, false, err + } + + // Move data from the queue's wait buffer to its read buffer. + nPushed := q.pushWaitBufLocked(l) + + return int64(n), nPushed > 0, nil +} + +// write writes to q from userspace. +// +// Preconditions: +// * l.termiosMu must be held for reading. +func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscipline) (int64, error) { + q.mu.Lock() + defer q.mu.Unlock() + + // Copy data into the wait buffer. + n, err := src.CopyInTo(ctx, safemem.WriterFunc(func(src safemem.BlockSeq) (uint64, error) { + copyLen := src.NumBytes() + room := waitBufMaxBytes - q.waitBufLen + // If out of room, return EAGAIN. + if room == 0 && copyLen > 0 { + return 0, syserror.ErrWouldBlock + } + // Cap the size of the wait buffer. + if copyLen > room { + copyLen = room + src = src.TakeFirst64(room) + } + buf := make([]byte, copyLen) + + // Copy the data into the wait buffer. + dst := safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)) + n, err := safemem.CopySeq(dst, src) + if err != nil { + return 0, err + } + q.waitBufAppend(buf) + + return n, nil + })) + if err != nil { + return 0, err + } + + // Push data from the wait to the read buffer. + q.pushWaitBufLocked(l) + + return n, nil +} + +// writeBytes writes to q from b. +// +// Preconditions: +// * l.termiosMu must be held for reading. +func (q *queue) writeBytes(b []byte, l *lineDiscipline) { + q.mu.Lock() + defer q.mu.Unlock() + + // Write to the wait buffer. + q.waitBufAppend(b) + q.pushWaitBufLocked(l) +} + +// pushWaitBufLocked fills the queue's read buffer with data from the wait +// buffer. +// +// Preconditions: +// * l.termiosMu must be held for reading. +// * q.mu must be locked. +func (q *queue) pushWaitBufLocked(l *lineDiscipline) int { + if q.waitBufLen == 0 { + return 0 + } + + // Move data from the wait to the read buffer. + var total int + var i int + for i = 0; i < len(q.waitBuf); i++ { + n := q.transform(l, q, q.waitBuf[i]) + total += n + if n != len(q.waitBuf[i]) { + // The read buffer filled up without consuming the + // entire buffer. + q.waitBuf[i] = q.waitBuf[i][n:] + break + } + } + + // Update wait buffer based on consumed data. + q.waitBuf = q.waitBuf[i:] + q.waitBufLen -= uint64(total) + + return total +} + +// Precondition: q.mu must be locked. +func (q *queue) waitBufAppend(b []byte) { + q.waitBuf = append(q.waitBuf, b) + q.waitBufLen += uint64(len(b)) +} + +// LINT.ThenChange(../../fs/tty/queue.go) diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go new file mode 100644 index 000000000..e7e50d51e --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/slave.go @@ -0,0 +1,186 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package devpts + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +// LINT.IfChange + +// slaveInode is the inode for the slave end of the Terminal. +type slaveInode struct { + kernfs.InodeAttrs + kernfs.InodeNoopRefCount + kernfs.InodeNotDirectory + kernfs.InodeNotSymlink + + // Keep a reference to this inode's dentry. + dentry kernfs.Dentry + + // root is the devpts root inode. + root *rootInode + + // t is the connected Terminal. + t *Terminal +} + +var _ kernfs.Inode = (*slaveInode)(nil) + +// Open implements kernfs.Inode.Open. +func (si *slaveInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + si.IncRef() + fd := &slaveFileDescription{ + inode: si, + } + if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { + si.DecRef() + return nil, err + } + return &fd.vfsfd, nil + +} + +// Valid implements kernfs.Inode.Valid. +func (si *slaveInode) Valid(context.Context) bool { + // Return valid if the slave still exists. + si.root.mu.Lock() + defer si.root.mu.Unlock() + _, ok := si.root.slaves[si.t.n] + return ok +} + +// Stat implements kernfs.Inode.Stat. +func (si *slaveInode) Stat(vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + statx, err := si.InodeAttrs.Stat(vfsfs, opts) + if err != nil { + return linux.Statx{}, err + } + statx.Blksize = 1024 + statx.RdevMajor = linux.UNIX98_PTY_SLAVE_MAJOR + statx.RdevMinor = si.t.n + return statx, nil +} + +// SetStat implements kernfs.Inode.SetStat +func (si *slaveInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { + if opts.Stat.Mask&linux.STATX_SIZE != 0 { + return syserror.EINVAL + } + return si.InodeAttrs.SetStat(ctx, vfsfs, creds, opts) +} + +type slaveFileDescription struct { + vfsfd vfs.FileDescription + vfs.FileDescriptionDefaultImpl + + inode *slaveInode +} + +var _ vfs.FileDescriptionImpl = (*slaveFileDescription)(nil) + +// Release implements fs.FileOperations.Release. +func (sfd *slaveFileDescription) Release() { + sfd.inode.DecRef() +} + +// EventRegister implements waiter.Waitable.EventRegister. +func (sfd *slaveFileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + sfd.inode.t.ld.slaveWaiter.EventRegister(e, mask) +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (sfd *slaveFileDescription) EventUnregister(e *waiter.Entry) { + sfd.inode.t.ld.slaveWaiter.EventUnregister(e) +} + +// Readiness implements waiter.Waitable.Readiness. +func (sfd *slaveFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { + return sfd.inode.t.ld.slaveReadiness() +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (sfd *slaveFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) { + return sfd.inode.t.ld.inputQueueRead(ctx, dst) +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (sfd *slaveFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) { + return sfd.inode.t.ld.outputQueueWrite(ctx, src) +} + +// Ioctl implements vfs.FileDescripionImpl.Ioctl. +func (sfd *slaveFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + switch cmd := args[1].Uint(); cmd { + case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ + // Get the number of bytes in the input queue read buffer. + return 0, sfd.inode.t.ld.inputQueueReadSize(ctx, io, args) + case linux.TCGETS: + return sfd.inode.t.ld.getTermios(ctx, io, args) + case linux.TCSETS: + return sfd.inode.t.ld.setTermios(ctx, io, args) + case linux.TCSETSW: + // TODO(b/29356795): This should drain the output queue first. + return sfd.inode.t.ld.setTermios(ctx, io, args) + case linux.TIOCGPTN: + _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), uint32(sfd.inode.t.n), usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err + case linux.TIOCGWINSZ: + return 0, sfd.inode.t.ld.windowSize(ctx, io, args) + case linux.TIOCSWINSZ: + return 0, sfd.inode.t.ld.setWindowSize(ctx, io, args) + case linux.TIOCSCTTY: + // Make the given terminal the controlling terminal of the + // calling process. + return 0, sfd.inode.t.setControllingTTY(ctx, io, args, false /* isMaster */) + case linux.TIOCNOTTY: + // Release this process's controlling terminal. + return 0, sfd.inode.t.releaseControllingTTY(ctx, io, args, false /* isMaster */) + case linux.TIOCGPGRP: + // Get the foreground process group. + return sfd.inode.t.foregroundProcessGroup(ctx, io, args, false /* isMaster */) + case linux.TIOCSPGRP: + // Set the foreground process group. + return sfd.inode.t.setForegroundProcessGroup(ctx, io, args, false /* isMaster */) + default: + maybeEmitUnimplementedEvent(ctx, cmd) + return 0, syserror.ENOTTY + } +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (sfd *slaveFileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + creds := auth.CredentialsFromContext(ctx) + fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem() + return sfd.inode.SetStat(ctx, fs, creds, opts) +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (sfd *slaveFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem() + return sfd.inode.Stat(fs, opts) +} + +// LINT.ThenChange(../../fs/tty/slave.go) diff --git a/pkg/sentry/fsimpl/devpts/terminal.go b/pkg/sentry/fsimpl/devpts/terminal.go new file mode 100644 index 000000000..b44e673d8 --- /dev/null +++ b/pkg/sentry/fsimpl/devpts/terminal.go @@ -0,0 +1,124 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package devpts + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/usermem" +) + +// LINT.IfChanges + +// Terminal is a pseudoterminal. +// +// +stateify savable +type Terminal struct { + // n is the terminal index. It is immutable. + n uint32 + + // ld is the line discipline of the terminal. It is immutable. + ld *lineDiscipline + + // masterKTTY contains the controlling process of the master end of + // this terminal. This field is immutable. + masterKTTY *kernel.TTY + + // slaveKTTY contains the controlling process of the slave end of this + // terminal. This field is immutable. + slaveKTTY *kernel.TTY +} + +func newTerminal(n uint32) *Terminal { + termios := linux.DefaultSlaveTermios + t := Terminal{ + n: n, + ld: newLineDiscipline(termios), + masterKTTY: &kernel.TTY{Index: n}, + slaveKTTY: &kernel.TTY{Index: n}, + } + return &t +} + +// setControllingTTY makes tm the controlling terminal of the calling thread +// group. +func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error { + task := kernel.TaskFromContext(ctx) + if task == nil { + panic("setControllingTTY must be called from a task context") + } + + return task.ThreadGroup().SetControllingTTY(tm.tty(isMaster), args[2].Int()) +} + +// releaseControllingTTY removes tm as the controlling terminal of the calling +// thread group. +func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error { + task := kernel.TaskFromContext(ctx) + if task == nil { + panic("releaseControllingTTY must be called from a task context") + } + + return task.ThreadGroup().ReleaseControllingTTY(tm.tty(isMaster)) +} + +// foregroundProcessGroup gets the process group ID of tm's foreground process. +func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) { + task := kernel.TaskFromContext(ctx) + if task == nil { + panic("foregroundProcessGroup must be called from a task context") + } + + ret, err := task.ThreadGroup().ForegroundProcessGroup(tm.tty(isMaster)) + if err != nil { + return 0, err + } + + // Write it out to *arg. + _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(ret), usermem.IOOpts{ + AddressSpaceActive: true, + }) + return 0, err +} + +// foregroundProcessGroup sets tm's foreground process. +func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) { + task := kernel.TaskFromContext(ctx) + if task == nil { + panic("setForegroundProcessGroup must be called from a task context") + } + + // Read in the process group ID. + var pgid int32 + if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgid, usermem.IOOpts{ + AddressSpaceActive: true, + }); err != nil { + return 0, err + } + + ret, err := task.ThreadGroup().SetForegroundProcessGroup(tm.tty(isMaster), kernel.ProcessGroupID(pgid)) + return uintptr(ret), err +} + +func (tm *Terminal) tty(isMaster bool) *kernel.TTY { + if isMaster { + return tm.masterKTTY + } + return tm.slaveKTTY +} + +// LINT.ThenChange(../../fs/tty/terminal.go) diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go index 64f1b142c..142ee53b0 100644 --- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go +++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go @@ -163,16 +163,25 @@ func (a *Accessor) CreateDeviceFile(ctx context.Context, pathname string, kind v func (a *Accessor) UserspaceInit(ctx context.Context) error { actx := a.wrapContext(ctx) - // systemd: src/shared/dev-setup.c:dev_setup() + // Initialize symlinks. for _, symlink := range []struct { source string target string }{ - // /proc/kcore is not implemented. + // systemd: src/shared/dev-setup.c:dev_setup() {source: "fd", target: "/proc/self/fd"}, {source: "stdin", target: "/proc/self/fd/0"}, {source: "stdout", target: "/proc/self/fd/1"}, {source: "stderr", target: "/proc/self/fd/2"}, + // /proc/kcore is not implemented. + + // Linux implements /dev/ptmx as a device node, but advises + // container implementations to create /dev/ptmx as a symlink + // to pts/ptmx (Documentation/filesystems/devpts.txt). Systemd + // follows this advice (src/nspawn/nspawn.c:setup_pts()), while + // LXC tries to create a bind mount and falls back to a symlink + // (src/lxc/conf.c:lxc_setup_devpts()). + {source: "ptmx", target: "pts/ptmx"}, } { if err := a.vfsObj.SymlinkAt(actx, a.creds, a.pathOperationAt(symlink.source), symlink.target); err != nil { return fmt.Errorf("failed to create symlink %q => %q: %v", symlink.source, symlink.target, err) diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 3164d022c..1d46dba25 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -391,7 +391,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // O_NOFOLLOW have no effect here (they're handled by VFS by setting // appropriate bits in rp), but are returned by // FileDescriptionImpl.StatusFlags(). - opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | linux.O_DIRECTORY | linux.O_NOFOLLOW + opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK ats := vfs.AccessTypesForOpenFlags(&opts) // Do not create new file. diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 9f526359e..a946645f6 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -216,6 +216,11 @@ func (a *InodeAttrs) Init(creds *auth.Credentials, ino uint64, mode linux.FileMo atomic.StoreUint32(&a.nlink, nlink) } +// Ino returns the inode id. +func (a *InodeAttrs) Ino() uint64 { + return atomic.LoadUint64(&a.ino) +} + // Mode implements Inode.Mode. func (a *InodeAttrs) Mode() linux.FileMode { return linux.FileMode(atomic.LoadUint32(&a.mode)) @@ -359,8 +364,8 @@ func (o *OrderedChildren) Destroy() { // cache. Populate returns the number of directories inserted, which the caller // may use to update the link count for the parent directory. // -// Precondition: d.Impl() must be a kernfs Dentry. d must represent a directory -// inode. children must not contain any conflicting entries already in o. +// Precondition: d must represent a directory inode. children must not contain +// any conflicting entries already in o. func (o *OrderedChildren) Populate(d *Dentry, children map[string]*Dentry) uint32 { var links uint32 for name, child := range children { -- cgit v1.2.3 From f13f26d17da56d585fd9857a81175bbd0be8ce60 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Fri, 24 Apr 2020 13:45:31 -0700 Subject: Port SCM Rights to VFS2. Fixes #1477. PiperOrigin-RevId: 308317511 --- pkg/sentry/fs/host/control.go | 6 +- pkg/sentry/fsimpl/host/BUILD | 3 + pkg/sentry/fsimpl/host/control.go | 96 ++++++++++++++++++++++ pkg/sentry/socket/control/BUILD | 6 +- pkg/sentry/socket/control/control.go | 24 +++++- pkg/sentry/socket/control/control_vfs2.go | 131 ++++++++++++++++++++++++++++++ pkg/sentry/syscalls/linux/vfs2/socket.go | 2 +- 7 files changed, 260 insertions(+), 8 deletions(-) create mode 100644 pkg/sentry/fsimpl/host/control.go create mode 100644 pkg/sentry/socket/control/control_vfs2.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go index 52c0504b6..39299b7e4 100644 --- a/pkg/sentry/fs/host/control.go +++ b/pkg/sentry/fs/host/control.go @@ -23,6 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" ) +// LINT.IfChange + type scmRights struct { fds []int } @@ -32,8 +34,6 @@ func newSCMRights(fds []int) control.SCMRights { } // Files implements control.SCMRights.Files. -// -// TODO(gvisor.dev/issue/2017): Port to VFS2. func (c *scmRights) Files(ctx context.Context, max int) (control.RightsFiles, bool) { n := max var trunc bool @@ -93,3 +93,5 @@ func fdsToFiles(ctx context.Context, fds []int) []*fs.File { } return files } + +// LINT.ThenChange(../../fsimpl/host/control.go) diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 44dd9f672..2dcb03a73 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -5,6 +5,7 @@ licenses(["notice"]) go_library( name = "host", srcs = [ + "control.go", "host.go", "ioctl_unsafe.go", "tty.go", @@ -23,6 +24,8 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", + "//pkg/sentry/socket/control", + "//pkg/sentry/socket/unix/transport", "//pkg/sentry/unimpl", "//pkg/sentry/vfs", "//pkg/sync", diff --git a/pkg/sentry/fsimpl/host/control.go b/pkg/sentry/fsimpl/host/control.go new file mode 100644 index 000000000..b9082a20f --- /dev/null +++ b/pkg/sentry/fsimpl/host/control.go @@ -0,0 +1,96 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/socket/control" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +type scmRights struct { + fds []int +} + +func newSCMRights(fds []int) control.SCMRightsVFS2 { + return &scmRights{fds} +} + +// Files implements control.SCMRights.Files. +func (c *scmRights) Files(ctx context.Context, max int) (control.RightsFilesVFS2, bool) { + n := max + var trunc bool + if l := len(c.fds); n > l { + n = l + } else if n < l { + trunc = true + } + + rf := control.RightsFilesVFS2(fdsToFiles(ctx, c.fds[:n])) + + // Only consume converted FDs (fdsToFiles may convert fewer than n FDs). + c.fds = c.fds[len(rf):] + return rf, trunc +} + +// Clone implements transport.RightsControlMessage.Clone. +func (c *scmRights) Clone() transport.RightsControlMessage { + // Host rights never need to be cloned. + return nil +} + +// Release implements transport.RightsControlMessage.Release. +func (c *scmRights) Release() { + for _, fd := range c.fds { + syscall.Close(fd) + } + c.fds = nil +} + +// If an error is encountered, only files created before the error will be +// returned. This is what Linux does. +func fdsToFiles(ctx context.Context, fds []int) []*vfs.FileDescription { + files := make([]*vfs.FileDescription, 0, len(fds)) + for _, fd := range fds { + // Get flags. We do it here because they may be modified + // by subsequent functions. + fileFlags, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), syscall.F_GETFL, 0) + if errno != 0 { + ctx.Warningf("Error retrieving host FD flags: %v", error(errno)) + break + } + + // Create the file backed by hostFD. + file, err := ImportFD(ctx, kernel.KernelFromContext(ctx).HostMount(), fd, false /* isTTY */) + if err != nil { + ctx.Warningf("Error creating file from host FD: %v", err) + break + } + + if err := file.SetStatusFlags(ctx, auth.CredentialsFromContext(ctx), uint32(fileFlags&linux.O_NONBLOCK)); err != nil { + ctx.Warningf("Error setting flags on host FD file: %v", err) + break + } + + files = append(files, file) + } + return files +} diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD index 4d42d29cb..ca16d0381 100644 --- a/pkg/sentry/socket/control/BUILD +++ b/pkg/sentry/socket/control/BUILD @@ -4,7 +4,10 @@ package(licenses = ["notice"]) go_library( name = "control", - srcs = ["control.go"], + srcs = [ + "control.go", + "control_vfs2.go", + ], imports = [ "gvisor.dev/gvisor/pkg/sentry/fs", ], @@ -18,6 +21,7 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/socket", "//pkg/sentry/socket/unix/transport", + "//pkg/sentry/vfs", "//pkg/syserror", "//pkg/tcpip", "//pkg/usermem", diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 8834a1e1a..8b439a078 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -41,6 +41,8 @@ type SCMCredentials interface { Credentials(t *kernel.Task) (kernel.ThreadID, auth.UID, auth.GID) } +// LINT.IfChange + // SCMRights represents a SCM_RIGHTS socket control message. type SCMRights interface { transport.RightsControlMessage @@ -142,6 +144,8 @@ func PackRights(t *kernel.Task, rights SCMRights, cloexec bool, buf []byte, flag return putCmsg(buf, flags, linux.SCM_RIGHTS, align, fds) } +// LINT.ThenChange(./control_vfs2.go) + // scmCredentials represents an SCM_CREDENTIALS socket control message. // // +stateify savable @@ -537,11 +541,19 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con } if len(fds) > 0 { - rights, err := NewSCMRights(t, fds) - if err != nil { - return socket.ControlMessages{}, err + if kernel.VFS2Enabled { + rights, err := NewSCMRightsVFS2(t, fds) + if err != nil { + return socket.ControlMessages{}, err + } + cmsgs.Unix.Rights = rights + } else { + rights, err := NewSCMRights(t, fds) + if err != nil { + return socket.ControlMessages{}, err + } + cmsgs.Unix.Rights = rights } - cmsgs.Unix.Rights = rights } return cmsgs, nil @@ -566,6 +578,8 @@ func MakeCreds(t *kernel.Task) SCMCredentials { return &scmCredentials{t, tcred.EffectiveKUID, tcred.EffectiveKGID} } +// LINT.IfChange + // New creates default control messages if needed. func New(t *kernel.Task, socketOrEndpoint interface{}, rights SCMRights) transport.ControlMessages { return transport.ControlMessages{ @@ -573,3 +587,5 @@ func New(t *kernel.Task, socketOrEndpoint interface{}, rights SCMRights) transpo Rights: rights, } } + +// LINT.ThenChange(./control_vfs2.go) diff --git a/pkg/sentry/socket/control/control_vfs2.go b/pkg/sentry/socket/control/control_vfs2.go new file mode 100644 index 000000000..fd08179be --- /dev/null +++ b/pkg/sentry/socket/control/control_vfs2.go @@ -0,0 +1,131 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// SCMRightsVFS2 represents a SCM_RIGHTS socket control message. +type SCMRightsVFS2 interface { + transport.RightsControlMessage + + // Files returns up to max RightsFiles. + // + // Returned files are consumed and ownership is transferred to the caller. + // Subsequent calls to Files will return the next files. + Files(ctx context.Context, max int) (rf RightsFilesVFS2, truncated bool) +} + +// RightsFiles represents a SCM_RIGHTS socket control message. A reference is +// maintained for each vfs.FileDescription and is release either when an FD is created or +// when the Release method is called. +type RightsFilesVFS2 []*vfs.FileDescription + +// NewSCMRightsVFS2 creates a new SCM_RIGHTS socket control message +// representation using local sentry FDs. +func NewSCMRightsVFS2(t *kernel.Task, fds []int32) (SCMRightsVFS2, error) { + files := make(RightsFilesVFS2, 0, len(fds)) + for _, fd := range fds { + file := t.GetFileVFS2(fd) + if file == nil { + files.Release() + return nil, syserror.EBADF + } + files = append(files, file) + } + return &files, nil +} + +// Files implements SCMRights.Files. +func (fs *RightsFilesVFS2) Files(ctx context.Context, max int) (RightsFilesVFS2, bool) { + n := max + var trunc bool + if l := len(*fs); n > l { + n = l + } else if n < l { + trunc = true + } + rf := (*fs)[:n] + *fs = (*fs)[n:] + return rf, trunc +} + +// Clone implements transport.RightsControlMessage.Clone. +func (fs *RightsFilesVFS2) Clone() transport.RightsControlMessage { + nfs := append(RightsFilesVFS2(nil), *fs...) + for _, nf := range nfs { + nf.IncRef() + } + return &nfs +} + +// Release implements transport.RightsControlMessage.Release. +func (fs *RightsFilesVFS2) Release() { + for _, f := range *fs { + f.DecRef() + } + *fs = nil +} + +// rightsFDsVFS2 gets up to the specified maximum number of FDs. +func rightsFDsVFS2(t *kernel.Task, rights SCMRightsVFS2, cloexec bool, max int) ([]int32, bool) { + files, trunc := rights.Files(t, max) + fds := make([]int32, 0, len(files)) + for i := 0; i < max && len(files) > 0; i++ { + fd, err := t.NewFDFromVFS2(0, files[0], kernel.FDFlags{ + CloseOnExec: cloexec, + }) + files[0].DecRef() + files = files[1:] + if err != nil { + t.Warningf("Error inserting FD: %v", err) + // This is what Linux does. + break + } + + fds = append(fds, int32(fd)) + } + return fds, trunc +} + +// PackRightsVFS2 packs as many FDs as will fit into the unused capacity of buf. +func PackRightsVFS2(t *kernel.Task, rights SCMRightsVFS2, cloexec bool, buf []byte, flags int) ([]byte, int) { + maxFDs := (cap(buf) - len(buf) - linux.SizeOfControlMessageHeader) / 4 + // Linux does not return any FDs if none fit. + if maxFDs <= 0 { + flags |= linux.MSG_CTRUNC + return buf, flags + } + fds, trunc := rightsFDsVFS2(t, rights, cloexec, maxFDs) + if trunc { + flags |= linux.MSG_CTRUNC + } + align := t.Arch().Width() + return putCmsg(buf, flags, linux.SCM_RIGHTS, align, fds) +} + +// NewVFS2 creates default control messages if needed. +func NewVFS2(t *kernel.Task, socketOrEndpoint interface{}, rights SCMRightsVFS2) transport.ControlMessages { + return transport.ControlMessages{ + Credentials: makeCreds(t, socketOrEndpoint), + Rights: rights, + } +} diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go index b1ede32f0..10b668477 100644 --- a/pkg/sentry/syscalls/linux/vfs2/socket.go +++ b/pkg/sentry/syscalls/linux/vfs2/socket.go @@ -804,7 +804,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla } if cms.Unix.Rights != nil { - controlData, mflags = control.PackRights(t, cms.Unix.Rights.(control.SCMRights), flags&linux.MSG_CMSG_CLOEXEC != 0, controlData, mflags) + controlData, mflags = control.PackRightsVFS2(t, cms.Unix.Rights.(control.SCMRightsVFS2), flags&linux.MSG_CMSG_CLOEXEC != 0, controlData, mflags) } // Copy the address to the caller. -- cgit v1.2.3 From 1c2ecbb1a03ffaa3bcdb2ee69c879da5e7076fa5 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Mon, 27 Apr 2020 16:00:39 -0700 Subject: Import host sockets. The FileDescription implementation for hostfs sockets uses the standard Unix socket implementation (unix.SocketVFS2), but is also tied to a hostfs dentry. Updates #1672, #1476 PiperOrigin-RevId: 308716426 --- pkg/sentry/fs/host/socket.go | 4 + pkg/sentry/fs/host/socket_iovec.go | 4 + pkg/sentry/fs/host/socket_unsafe.go | 4 + pkg/sentry/fsimpl/host/BUILD | 10 + pkg/sentry/fsimpl/host/host.go | 57 +++-- pkg/sentry/fsimpl/host/socket.go | 397 ++++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/host/socket_iovec.go | 113 +++++++++ pkg/sentry/fsimpl/host/socket_unsafe.go | 101 ++++++++ pkg/sentry/fsimpl/sockfs/sockfs.go | 20 +- pkg/sentry/socket/unix/BUILD | 1 + pkg/sentry/socket/unix/unix_vfs2.go | 42 ++-- 11 files changed, 698 insertions(+), 55 deletions(-) create mode 100644 pkg/sentry/fsimpl/host/socket.go create mode 100644 pkg/sentry/fsimpl/host/socket_iovec.go create mode 100644 pkg/sentry/fsimpl/host/socket_unsafe.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index 06fc2d80a..b6e94583e 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -37,6 +37,8 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) +// LINT.IfChange + // maxSendBufferSize is the maximum host send buffer size allowed for endpoint. // // N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max). @@ -388,3 +390,5 @@ func (c *ConnectedEndpoint) Release() { // CloseUnread implements transport.ConnectedEndpoint.CloseUnread. func (c *ConnectedEndpoint) CloseUnread() {} + +// LINT.ThenChange(../../fsimpl/host/socket.go) diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go index af6955675..5c18dbd5e 100644 --- a/pkg/sentry/fs/host/socket_iovec.go +++ b/pkg/sentry/fs/host/socket_iovec.go @@ -21,6 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) +// LINT.IfChange + // maxIovs is the maximum number of iovecs to pass to the host. var maxIovs = linux.UIO_MAXIOV @@ -111,3 +113,5 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec return total, iovecs, nil, err } + +// LINT.ThenChange(../../fsimpl/host/socket_iovec.go) diff --git a/pkg/sentry/fs/host/socket_unsafe.go b/pkg/sentry/fs/host/socket_unsafe.go index f3bbed7ea..5d4f312cf 100644 --- a/pkg/sentry/fs/host/socket_unsafe.go +++ b/pkg/sentry/fs/host/socket_unsafe.go @@ -19,6 +19,8 @@ import ( "unsafe" ) +// LINT.IfChange + // fdReadVec receives from fd to bufs. // // If the total length of bufs is > maxlen, fdReadVec will do a partial read @@ -99,3 +101,5 @@ func fdWriteVec(fd int, bufs [][]byte, maxlen int64, truncate bool) (int64, int6 return int64(n), length, err } + +// LINT.ThenChange(../../fsimpl/host/socket_unsafe.go) diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 2dcb03a73..e1c56d89b 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -8,6 +8,9 @@ go_library( "control.go", "host.go", "ioctl_unsafe.go", + "socket.go", + "socket_iovec.go", + "socket_unsafe.go", "tty.go", "util.go", "util_unsafe.go", @@ -16,6 +19,7 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/context", + "//pkg/fdnotifier", "//pkg/log", "//pkg/refs", "//pkg/sentry/arch", @@ -25,12 +29,18 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", "//pkg/sentry/socket/control", + "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/unimpl", + "//pkg/sentry/uniqueid", "//pkg/sentry/vfs", "//pkg/sync", + "//pkg/syserr", "//pkg/syserror", + "//pkg/tcpip", + "//pkg/unet", "//pkg/usermem", + "//pkg/waiter", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 1e53b5c1b..05538ea42 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -17,7 +17,6 @@ package host import ( - "errors" "fmt" "math" "syscall" @@ -31,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/hostfd" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" + unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -156,7 +156,7 @@ type inode struct { // Note that these flags may become out of date, since they can be modified // on the host, e.g. with fcntl. -func fileFlagsFromHostFD(fd int) (int, error) { +func fileFlagsFromHostFD(fd int) (uint32, error) { flags, err := unix.FcntlInt(uintptr(fd), syscall.F_GETFL, 0) if err != nil { log.Warningf("Failed to get file flags for donated FD %d: %v", fd, err) @@ -164,7 +164,7 @@ func fileFlagsFromHostFD(fd int) (int, error) { } // TODO(gvisor.dev/issue/1672): implement behavior corresponding to these allowed flags. flags &= syscall.O_ACCMODE | syscall.O_DIRECT | syscall.O_NONBLOCK | syscall.O_DSYNC | syscall.O_SYNC | syscall.O_APPEND - return flags, nil + return uint32(flags), nil } // CheckPermissions implements kernfs.Inode. @@ -361,6 +361,10 @@ func (i *inode) Destroy() { // Open implements kernfs.Inode. func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + // Once created, we cannot re-open a socket fd through /proc/[pid]/fd/. + if i.Mode().FileType() == linux.S_IFSOCK { + return nil, syserror.ENXIO + } return i.open(ctx, vfsd, rp.Mount()) } @@ -370,42 +374,45 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount) (*vfs.F return nil, err } fileType := s.Mode & linux.FileTypeMask + flags, err := fileFlagsFromHostFD(i.hostFD) + if err != nil { + return nil, err + } + if fileType == syscall.S_IFSOCK { if i.isTTY { - return nil, errors.New("cannot use host socket as TTY") + log.Warningf("cannot use host socket fd %d as TTY", i.hostFD) + return nil, syserror.ENOTTY } - // TODO(gvisor.dev/issue/1672): support importing sockets. - return nil, errors.New("importing host sockets not supported") + + ep, err := newEndpoint(ctx, i.hostFD) + if err != nil { + return nil, err + } + // Currently, we only allow Unix sockets to be imported. + return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d) } // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that // we don't allow importing arbitrary file types without proper support. - var ( - vfsfd *vfs.FileDescription - fdImpl vfs.FileDescriptionImpl - ) if i.isTTY { fd := &ttyFD{ fileDescription: fileDescription{inode: i}, termios: linux.DefaultSlaveTermios, } - vfsfd = &fd.vfsfd - fdImpl = fd - } else { - // For simplicity, set offset to 0. Technically, we should - // only set to 0 on files that are not seekable (sockets, pipes, etc.), - // and use the offset from the host fd otherwise. - fd := &fileDescription{inode: i} - vfsfd = &fd.vfsfd - fdImpl = fd - } - - flags, err := fileFlagsFromHostFD(i.hostFD) - if err != nil { - return nil, err + vfsfd := &fd.vfsfd + if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } + return vfsfd, nil } - if err := vfsfd.Init(fdImpl, uint32(flags), mnt, d, &vfs.FileDescriptionOptions{}); err != nil { + // For simplicity, set offset to 0. Technically, we should + // only set to 0 on files that are not seekable (sockets, pipes, etc.), + // and use the offset from the host fd otherwise. + fd := &fileDescription{inode: i} + vfsfd := &fd.vfsfd + if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } return vfsfd, nil diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go new file mode 100644 index 000000000..39dd624a0 --- /dev/null +++ b/pkg/sentry/fsimpl/host/socket.go @@ -0,0 +1,397 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "fmt" + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fdnotifier" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/socket/control" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/unet" + "gvisor.dev/gvisor/pkg/waiter" +) + +// Create a new host-backed endpoint from the given fd. +func newEndpoint(ctx context.Context, hostFD int) (transport.Endpoint, error) { + // Set up an external transport.Endpoint using the host fd. + addr := fmt.Sprintf("hostfd:[%d]", hostFD) + var q waiter.Queue + e, err := NewConnectedEndpoint(ctx, hostFD, &q, addr, true /* saveable */) + if err != nil { + return nil, err.ToError() + } + e.Init() + ep := transport.NewExternal(ctx, e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e) + return ep, nil +} + +// maxSendBufferSize is the maximum host send buffer size allowed for endpoint. +// +// N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max). +const maxSendBufferSize = 8 << 20 + +// ConnectedEndpoint is an implementation of transport.ConnectedEndpoint and +// transport.Receiver. It is backed by a host fd that was imported at sentry +// startup. This fd is shared with a hostfs inode, which retains ownership of +// it. +// +// ConnectedEndpoint is saveable, since we expect that the host will provide +// the same fd upon restore. +// +// As of this writing, we only allow Unix sockets to be imported. +// +// +stateify savable +type ConnectedEndpoint struct { + // ref keeps track of references to a ConnectedEndpoint. + ref refs.AtomicRefCount + + // mu protects fd below. + mu sync.RWMutex `state:"nosave"` + + // fd is the host fd backing this endpoint. + fd int + + // addr is the address at which this endpoint is bound. + addr string + + queue *waiter.Queue + + // sndbuf is the size of the send buffer. + // + // N.B. When this is smaller than the host size, we present it via + // GetSockOpt and message splitting/rejection in SendMsg, but do not + // prevent lots of small messages from filling the real send buffer + // size on the host. + sndbuf int64 `state:"nosave"` + + // stype is the type of Unix socket. + stype linux.SockType +} + +// init performs initialization required for creating new ConnectedEndpoints and +// for restoring them. +func (c *ConnectedEndpoint) init() *syserr.Error { + family, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_DOMAIN) + if err != nil { + return syserr.FromError(err) + } + + if family != syscall.AF_UNIX { + // We only allow Unix sockets. + return syserr.ErrInvalidEndpointState + } + + stype, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_TYPE) + if err != nil { + return syserr.FromError(err) + } + + if err := syscall.SetNonblock(c.fd, true); err != nil { + return syserr.FromError(err) + } + + sndbuf, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF) + if err != nil { + return syserr.FromError(err) + } + if sndbuf > maxSendBufferSize { + log.Warningf("Socket send buffer too large: %d", sndbuf) + return syserr.ErrInvalidEndpointState + } + + c.stype = linux.SockType(stype) + c.sndbuf = int64(sndbuf) + + return nil +} + +// NewConnectedEndpoint creates a new ConnectedEndpoint backed by a host fd +// imported at sentry startup, +// +// The caller is responsible for calling Init(). Additionaly, Release needs to +// be called twice because ConnectedEndpoint is both a transport.Receiver and +// transport.ConnectedEndpoint. +func NewConnectedEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue, addr string, saveable bool) (*ConnectedEndpoint, *syserr.Error) { + e := ConnectedEndpoint{ + fd: hostFD, + addr: addr, + queue: queue, + } + + if err := e.init(); err != nil { + return nil, err + } + + // AtomicRefCounters start off with a single reference. We need two. + e.ref.IncRef() + e.ref.EnableLeakCheck("host.ConnectedEndpoint") + return &e, nil +} + +// Init will do the initialization required without holding other locks. +func (c *ConnectedEndpoint) Init() { + if err := fdnotifier.AddFD(int32(c.fd), c.queue); err != nil { + panic(err) + } +} + +// Send implements transport.ConnectedEndpoint.Send. +func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) { + c.mu.RLock() + defer c.mu.RUnlock() + + if !controlMessages.Empty() { + return 0, false, syserr.ErrInvalidEndpointState + } + + // Since stream sockets don't preserve message boundaries, we can write + // only as much of the message as fits in the send buffer. + truncate := c.stype == linux.SOCK_STREAM + + n, totalLen, err := fdWriteVec(c.fd, data, c.sndbuf, truncate) + if n < totalLen && err == nil { + // The host only returns a short write if it would otherwise + // block (and only for stream sockets). + err = syserror.EAGAIN + } + if n > 0 && err != syserror.EAGAIN { + // The caller may need to block to send more data, but + // otherwise there isn't anything that can be done about an + // error with a partial write. + err = nil + } + + // There is no need for the callee to call SendNotify because fdWriteVec + // uses the host's sendmsg(2) and the host kernel's queue. + return n, false, syserr.FromError(err) +} + +// SendNotify implements transport.ConnectedEndpoint.SendNotify. +func (c *ConnectedEndpoint) SendNotify() {} + +// CloseSend implements transport.ConnectedEndpoint.CloseSend. +func (c *ConnectedEndpoint) CloseSend() { + c.mu.Lock() + defer c.mu.Unlock() + + if err := syscall.Shutdown(c.fd, syscall.SHUT_WR); err != nil { + // A well-formed UDS shutdown can't fail. See + // net/unix/af_unix.c:unix_shutdown. + panic(fmt.Sprintf("failed write shutdown on host socket %+v: %v", c, err)) + } +} + +// CloseNotify implements transport.ConnectedEndpoint.CloseNotify. +func (c *ConnectedEndpoint) CloseNotify() {} + +// Writable implements transport.ConnectedEndpoint.Writable. +func (c *ConnectedEndpoint) Writable() bool { + c.mu.RLock() + defer c.mu.RUnlock() + + return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.EventOut)&waiter.EventOut != 0 +} + +// Passcred implements transport.ConnectedEndpoint.Passcred. +func (c *ConnectedEndpoint) Passcred() bool { + // We don't support credential passing for host sockets. + return false +} + +// GetLocalAddress implements transport.ConnectedEndpoint.GetLocalAddress. +func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { + return tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, nil +} + +// EventUpdate implements transport.ConnectedEndpoint.EventUpdate. +func (c *ConnectedEndpoint) EventUpdate() { + c.mu.RLock() + defer c.mu.RUnlock() + if c.fd != -1 { + fdnotifier.UpdateFD(int32(c.fd)) + } +} + +// Recv implements transport.Receiver.Recv. +func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { + c.mu.RLock() + defer c.mu.RUnlock() + + var cm unet.ControlMessage + if numRights > 0 { + cm.EnableFDs(int(numRights)) + } + + // N.B. Unix sockets don't have a receive buffer, the send buffer + // serves both purposes. + rl, ml, cl, cTrunc, err := fdReadVec(c.fd, data, []byte(cm), peek, c.sndbuf) + if rl > 0 && err != nil { + // We got some data, so all we need to do on error is return + // the data that we got. Short reads are fine, no need to + // block. + err = nil + } + if err != nil { + return 0, 0, transport.ControlMessages{}, false, tcpip.FullAddress{}, false, syserr.FromError(err) + } + + // There is no need for the callee to call RecvNotify because fdReadVec uses + // the host's recvmsg(2) and the host kernel's queue. + + // Trim the control data if we received less than the full amount. + if cl < uint64(len(cm)) { + cm = cm[:cl] + } + + // Avoid extra allocations in the case where there isn't any control data. + if len(cm) == 0 { + return rl, ml, transport.ControlMessages{}, cTrunc, tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, false, nil + } + + fds, err := cm.ExtractFDs() + if err != nil { + return 0, 0, transport.ControlMessages{}, false, tcpip.FullAddress{}, false, syserr.FromError(err) + } + + if len(fds) == 0 { + return rl, ml, transport.ControlMessages{}, cTrunc, tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, false, nil + } + return rl, ml, control.NewVFS2(nil, nil, newSCMRights(fds)), cTrunc, tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, false, nil +} + +// RecvNotify implements transport.Receiver.RecvNotify. +func (c *ConnectedEndpoint) RecvNotify() {} + +// CloseRecv implements transport.Receiver.CloseRecv. +func (c *ConnectedEndpoint) CloseRecv() { + c.mu.Lock() + defer c.mu.Unlock() + + if err := syscall.Shutdown(c.fd, syscall.SHUT_RD); err != nil { + // A well-formed UDS shutdown can't fail. See + // net/unix/af_unix.c:unix_shutdown. + panic(fmt.Sprintf("failed read shutdown on host socket %+v: %v", c, err)) + } +} + +// Readable implements transport.Receiver.Readable. +func (c *ConnectedEndpoint) Readable() bool { + c.mu.RLock() + defer c.mu.RUnlock() + + return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.EventIn)&waiter.EventIn != 0 +} + +// SendQueuedSize implements transport.Receiver.SendQueuedSize. +func (c *ConnectedEndpoint) SendQueuedSize() int64 { + // TODO(gvisor.dev/issue/273): SendQueuedSize isn't supported for host + // sockets because we don't allow the sentry to call ioctl(2). + return -1 +} + +// RecvQueuedSize implements transport.Receiver.RecvQueuedSize. +func (c *ConnectedEndpoint) RecvQueuedSize() int64 { + // TODO(gvisor.dev/issue/273): RecvQueuedSize isn't supported for host + // sockets because we don't allow the sentry to call ioctl(2). + return -1 +} + +// SendMaxQueueSize implements transport.Receiver.SendMaxQueueSize. +func (c *ConnectedEndpoint) SendMaxQueueSize() int64 { + return int64(c.sndbuf) +} + +// RecvMaxQueueSize implements transport.Receiver.RecvMaxQueueSize. +func (c *ConnectedEndpoint) RecvMaxQueueSize() int64 { + // N.B. Unix sockets don't use the receive buffer. We'll claim it is + // the same size as the send buffer. + return int64(c.sndbuf) +} + +func (c *ConnectedEndpoint) destroyLocked() { + fdnotifier.RemoveFD(int32(c.fd)) + c.fd = -1 +} + +// Release implements transport.ConnectedEndpoint.Release and +// transport.Receiver.Release. +func (c *ConnectedEndpoint) Release() { + c.ref.DecRefWithDestructor(func() { + c.mu.Lock() + c.destroyLocked() + c.mu.Unlock() + }) +} + +// CloseUnread implements transport.ConnectedEndpoint.CloseUnread. +func (c *ConnectedEndpoint) CloseUnread() {} + +// SCMConnectedEndpoint represents an endpoint backed by a host fd that was +// passed through a gofer Unix socket. It is almost the same as +// ConnectedEndpoint, with the following differences: +// - SCMConnectedEndpoint is not saveable, because the host cannot guarantee +// the same descriptor number across S/R. +// - SCMConnectedEndpoint holds ownership of its fd and is responsible for +// closing it. +type SCMConnectedEndpoint struct { + ConnectedEndpoint +} + +// Release implements transport.ConnectedEndpoint.Release and +// transport.Receiver.Release. +func (e *SCMConnectedEndpoint) Release() { + e.ref.DecRefWithDestructor(func() { + e.mu.Lock() + if err := syscall.Close(e.fd); err != nil { + log.Warningf("Failed to close host fd %d: %v", err) + } + e.destroyLocked() + e.mu.Unlock() + }) +} + +// NewSCMEndpoint creates a new SCMConnectedEndpoint backed by a host fd that +// was passed through a Unix socket. +// +// The caller is responsible for calling Init(). Additionaly, Release needs to +// be called twice because ConnectedEndpoint is both a transport.Receiver and +// transport.ConnectedEndpoint. +func NewSCMEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue, addr string) (*SCMConnectedEndpoint, *syserr.Error) { + e := SCMConnectedEndpoint{ConnectedEndpoint{ + fd: hostFD, + addr: addr, + queue: queue, + }} + + if err := e.init(); err != nil { + return nil, err + } + + // AtomicRefCounters start off with a single reference. We need two. + e.ref.IncRef() + e.ref.EnableLeakCheck("host.SCMConnectedEndpoint") + return &e, nil +} diff --git a/pkg/sentry/fsimpl/host/socket_iovec.go b/pkg/sentry/fsimpl/host/socket_iovec.go new file mode 100644 index 000000000..584c247d2 --- /dev/null +++ b/pkg/sentry/fsimpl/host/socket_iovec.go @@ -0,0 +1,113 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserror" +) + +// maxIovs is the maximum number of iovecs to pass to the host. +var maxIovs = linux.UIO_MAXIOV + +// copyToMulti copies as many bytes from src to dst as possible. +func copyToMulti(dst [][]byte, src []byte) { + for _, d := range dst { + done := copy(d, src) + src = src[done:] + if len(src) == 0 { + break + } + } +} + +// copyFromMulti copies as many bytes from src to dst as possible. +func copyFromMulti(dst []byte, src [][]byte) { + for _, s := range src { + done := copy(dst, s) + dst = dst[done:] + if len(dst) == 0 { + break + } + } +} + +// buildIovec builds an iovec slice from the given []byte slice. +// +// If truncate, truncate bufs > maxlen. Otherwise, immediately return an error. +// +// If length < the total length of bufs, err indicates why, even when returning +// a truncated iovec. +// +// If intermediate != nil, iovecs references intermediate rather than bufs and +// the caller must copy to/from bufs as necessary. +func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovecs []syscall.Iovec, intermediate []byte, err error) { + var iovsRequired int + for _, b := range bufs { + length += int64(len(b)) + if len(b) > 0 { + iovsRequired++ + } + } + + stopLen := length + if length > maxlen { + if truncate { + stopLen = maxlen + err = syserror.EAGAIN + } else { + return 0, nil, nil, syserror.EMSGSIZE + } + } + + if iovsRequired > maxIovs { + // The kernel will reject our call if we pass this many iovs. + // Use a single intermediate buffer instead. + b := make([]byte, stopLen) + + return stopLen, []syscall.Iovec{{ + Base: &b[0], + Len: uint64(stopLen), + }}, b, err + } + + var total int64 + iovecs = make([]syscall.Iovec, 0, iovsRequired) + for i := range bufs { + l := len(bufs[i]) + if l == 0 { + continue + } + + stop := int64(l) + if total+stop > stopLen { + stop = stopLen - total + } + + iovecs = append(iovecs, syscall.Iovec{ + Base: &bufs[i][0], + Len: uint64(stop), + }) + + total += stop + if total >= stopLen { + break + } + } + + return total, iovecs, nil, err +} diff --git a/pkg/sentry/fsimpl/host/socket_unsafe.go b/pkg/sentry/fsimpl/host/socket_unsafe.go new file mode 100644 index 000000000..35ded24bc --- /dev/null +++ b/pkg/sentry/fsimpl/host/socket_unsafe.go @@ -0,0 +1,101 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "syscall" + "unsafe" +) + +// fdReadVec receives from fd to bufs. +// +// If the total length of bufs is > maxlen, fdReadVec will do a partial read +// and err will indicate why the message was truncated. +func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int64) (readLen int64, msgLen int64, controlLen uint64, controlTrunc bool, err error) { + flags := uintptr(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC) + if peek { + flags |= syscall.MSG_PEEK + } + + // Always truncate the receive buffer. All socket types will truncate + // received messages. + length, iovecs, intermediate, err := buildIovec(bufs, maxlen, true) + if err != nil && len(iovecs) == 0 { + // No partial write to do, return error immediately. + return 0, 0, 0, false, err + } + + var msg syscall.Msghdr + if len(control) != 0 { + msg.Control = &control[0] + msg.Controllen = uint64(len(control)) + } + + if len(iovecs) != 0 { + msg.Iov = &iovecs[0] + msg.Iovlen = uint64(len(iovecs)) + } + + rawN, _, e := syscall.RawSyscall(syscall.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), flags) + if e != 0 { + // N.B. prioritize the syscall error over the buildIovec error. + return 0, 0, 0, false, e + } + n := int64(rawN) + + // Copy data back to bufs. + if intermediate != nil { + copyToMulti(bufs, intermediate) + } + + controlTrunc = msg.Flags&syscall.MSG_CTRUNC == syscall.MSG_CTRUNC + + if n > length { + return length, n, msg.Controllen, controlTrunc, err + } + + return n, n, msg.Controllen, controlTrunc, err +} + +// fdWriteVec sends from bufs to fd. +// +// If the total length of bufs is > maxlen && truncate, fdWriteVec will do a +// partial write and err will indicate why the message was truncated. +func fdWriteVec(fd int, bufs [][]byte, maxlen int64, truncate bool) (int64, int64, error) { + length, iovecs, intermediate, err := buildIovec(bufs, maxlen, truncate) + if err != nil && len(iovecs) == 0 { + // No partial write to do, return error immediately. + return 0, length, err + } + + // Copy data to intermediate buf. + if intermediate != nil { + copyFromMulti(intermediate, bufs) + } + + var msg syscall.Msghdr + if len(iovecs) > 0 { + msg.Iov = &iovecs[0] + msg.Iovlen = uint64(len(iovecs)) + } + + n, _, e := syscall.RawSyscall(syscall.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), syscall.MSG_DONTWAIT|syscall.MSG_NOSIGNAL) + if e != 0 { + // N.B. prioritize the syscall error over the buildIovec error. + return 0, length, e + } + + return int64(n), length, err +} diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go index 5ce50625b..271134af8 100644 --- a/pkg/sentry/fsimpl/sockfs/sockfs.go +++ b/pkg/sentry/fsimpl/sockfs/sockfs.go @@ -73,26 +73,14 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr return nil, syserror.ENXIO } -// InitSocket initializes a socket FileDescription, with a corresponding -// Dentry in mnt. -// -// fd should be the FileDescription associated with socketImpl, i.e. its first -// field. mnt should be the global socket mount, Kernel.socketMount. -func InitSocket(socketImpl vfs.FileDescriptionImpl, fd *vfs.FileDescription, mnt *vfs.Mount, creds *auth.Credentials) error { - fsimpl := mnt.Filesystem().Impl() - fs := fsimpl.(*kernfs.Filesystem) - +// NewDentry constructs and returns a sockfs dentry. +func NewDentry(creds *auth.Credentials, ino uint64) *vfs.Dentry { // File mode matches net/socket.c:sock_alloc. filemode := linux.FileMode(linux.S_IFSOCK | 0600) i := &inode{} - i.Init(creds, fs.NextIno(), filemode) + i.Init(creds, ino, filemode) d := &kernfs.Dentry{} d.Init(i) - - opts := &vfs.FileDescriptionOptions{UseDentryMetadata: true} - if err := fd.Init(socketImpl, linux.O_RDWR, mnt, d.VFSDentry(), opts); err != nil { - return err - } - return nil + return d.VFSDentry() } diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index de2cc4bdf..941a91097 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -21,6 +21,7 @@ go_library( "//pkg/sentry/device", "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/time", diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 3e54d49c4..315dc274f 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -19,6 +19,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket/control" @@ -42,30 +43,44 @@ type SocketVFS2 struct { socketOpsCommon } -// NewVFS2File creates and returns a new vfs.FileDescription for a unix socket. -func NewVFS2File(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) { - sock := NewFDImpl(ep, stype) - vfsfd := &sock.vfsfd - if err := sockfs.InitSocket(sock, vfsfd, t.Kernel().SocketMount(), t.Credentials()); err != nil { +// NewSockfsFile creates a new socket file in the global sockfs mount and +// returns a corresponding file description. +func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) { + mnt := t.Kernel().SocketMount() + fs := mnt.Filesystem().Impl().(*kernfs.Filesystem) + d := sockfs.NewDentry(t.Credentials(), fs.NextIno()) + + fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d) + if err != nil { return nil, syserr.FromError(err) } - return vfsfd, nil + return fd, nil } -// NewFDImpl creates and returns a new SocketVFS2. -func NewFDImpl(ep transport.Endpoint, stype linux.SockType) *SocketVFS2 { +// NewFileDescription creates and returns a socket file description +// corresponding to the given mount and dentry. +func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry) (*vfs.FileDescription, error) { // You can create AF_UNIX, SOCK_RAW sockets. They're the same as // SOCK_DGRAM and don't require CAP_NET_RAW. if stype == linux.SOCK_RAW { stype = linux.SOCK_DGRAM } - return &SocketVFS2{ + sock := &SocketVFS2{ socketOpsCommon: socketOpsCommon{ ep: ep, stype: stype, }, } + vfsfd := &sock.vfsfd + if err := vfsfd.Init(sock, flags, mnt, d, &vfs.FileDescriptionOptions{ + DenyPRead: true, + DenyPWrite: true, + UseDentryMetadata: true, + }); err != nil { + return nil, err + } + return vfsfd, nil } // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by @@ -112,8 +127,7 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block } } - // We expect this to be a FileDescription here. - ns, err := NewVFS2File(t, ep, s.stype) + ns, err := NewSockfsFile(t, ep, s.stype) if err != nil { return 0, nil, 0, err } @@ -307,7 +321,7 @@ func (*providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int) return nil, syserr.ErrInvalidArgument } - f, err := NewVFS2File(t, ep, stype) + f, err := NewSockfsFile(t, ep, stype) if err != nil { ep.Close() return nil, err @@ -331,13 +345,13 @@ func (*providerVFS2) Pair(t *kernel.Task, stype linux.SockType, protocol int) (* // Create the endpoints and sockets. ep1, ep2 := transport.NewPair(t, stype, t.Kernel()) - s1, err := NewVFS2File(t, ep1, stype) + s1, err := NewSockfsFile(t, ep1, stype) if err != nil { ep1.Close() ep2.Close() return nil, nil, err } - s2, err := NewVFS2File(t, ep2, stype) + s2, err := NewSockfsFile(t, ep2, stype) if err != nil { s1.DecRef() ep2.Close() -- cgit v1.2.3 From f3ca5ca82abe35d13210f8a8035926170bb87436 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 28 Apr 2020 08:32:20 -0700 Subject: Support pipes and sockets in VFS2 gofer fs. Named pipes and sockets can be represented in two ways in gofer fs: 1. As a file on the remote filesystem. In this case, all file operations are passed through 9p. 2. As a synthetic file that is internal to the sandbox. In this case, the dentry stores an endpoint or VFSPipe for sockets and pipes respectively, which replaces interactions with the remote fs through the gofer. In gofer.filesystem.MknodAt, we attempt to call mknod(2) through 9p, and if it fails, fall back to the synthetic version. Updates #1200. PiperOrigin-RevId: 308828161 --- pkg/sentry/fs/gofer/socket.go | 4 + pkg/sentry/fsimpl/ext/filesystem.go | 2 +- pkg/sentry/fsimpl/gofer/BUILD | 8 ++ pkg/sentry/fsimpl/gofer/directory.go | 39 +++++++-- pkg/sentry/fsimpl/gofer/filesystem.go | 128 ++++++++++++++++++++++-------- pkg/sentry/fsimpl/gofer/gofer.go | 32 ++++++-- pkg/sentry/fsimpl/gofer/socket.go | 141 +++++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/kernfs/filesystem.go | 2 +- pkg/sentry/fsimpl/tmpfs/filesystem.go | 2 +- pkg/sentry/socket/unix/unix.go | 2 +- pkg/sentry/vfs/anonfs.go | 2 +- pkg/sentry/vfs/filesystem.go | 2 +- pkg/sentry/vfs/options.go | 18 +++++ pkg/sentry/vfs/vfs.go | 54 ++++++------- 14 files changed, 359 insertions(+), 77 deletions(-) create mode 100644 pkg/sentry/fsimpl/gofer/socket.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index 10ba2f5f0..40f2c1cad 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -47,6 +47,8 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport. return &endpoint{inode, i.fileState.file.file, path} } +// LINT.IfChange + // endpoint is a Gofer-backed transport.BoundEndpoint. // // An endpoint's lifetime is the time between when InodeOperations.BoundEndpoint() @@ -146,3 +148,5 @@ func (e *endpoint) Release() { func (e *endpoint) Passcred() bool { return false } + +// LINT.ThenChange(../../fsimpl/gofer/socket.go) diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index 2c22a04af..6c882d0b6 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -485,7 +485,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error } // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt. -func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath) (transport.BoundEndpoint, error) { +func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { _, _, err := fs.walk(rp, false) if err != nil { return nil, err diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index b9c4beee4..5ce82b793 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -38,6 +38,7 @@ go_library( "p9file.go", "pagemath.go", "regular_file.go", + "socket.go", "special_file.go", "symlink.go", "time.go", @@ -52,18 +53,25 @@ go_library( "//pkg/p9", "//pkg/safemem", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fsimpl/host", "//pkg/sentry/hostfd", + "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", + "//pkg/sentry/socket/control", + "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/vfs", + "//pkg/syserr", "//pkg/syserror", "//pkg/unet", "//pkg/usermem", + "//pkg/waiter", ], ) diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go index 55f9ed911..b98218753 100644 --- a/pkg/sentry/fsimpl/gofer/directory.go +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -15,6 +15,7 @@ package gofer import ( + "fmt" "sync" "sync/atomic" @@ -22,6 +23,8 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -59,28 +62,52 @@ func (d *dentry) cacheNegativeLookupLocked(name string) { d.children[name] = nil } -// createSyntheticDirectory creates a synthetic directory with the given name +type createSyntheticOpts struct { + name string + mode linux.FileMode + kuid auth.KUID + kgid auth.KGID + + // The endpoint for a synthetic socket. endpoint should be nil if the file + // being created is not a socket. + endpoint transport.BoundEndpoint + + // pipe should be nil if the file being created is not a pipe. + pipe *pipe.VFSPipe +} + +// createSyntheticChildLocked creates a synthetic file with the given name // in d. // // Preconditions: d.dirMu must be locked. d.isDir(). d does not already contain // a child with the given name. -func (d *dentry) createSyntheticDirectoryLocked(name string, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) { +func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) { d2 := &dentry{ refs: 1, // held by d fs: d.fs, - mode: uint32(mode) | linux.S_IFDIR, - uid: uint32(kuid), - gid: uint32(kgid), + mode: uint32(opts.mode), + uid: uint32(opts.kuid), + gid: uint32(opts.kgid), blockSize: usermem.PageSize, // arbitrary handle: handle{ fd: -1, }, nlink: uint32(2), } + switch opts.mode.FileType() { + case linux.S_IFDIR: + // Nothing else needs to be done. + case linux.S_IFSOCK: + d2.endpoint = opts.endpoint + case linux.S_IFIFO: + d2.pipe = opts.pipe + default: + panic(fmt.Sprintf("failed to create synthetic file of unrecognized type: %v", opts.mode.FileType())) + } d2.pf.dentry = d2 d2.vfsd.Init(d2) - d.cacheNewChildLocked(d2, name) + d.cacheNewChildLocked(d2, opts.name) d.syntheticChildren++ } diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 98ccb42fd..4a8411371 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -22,9 +22,11 @@ import ( "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Sync implements vfs.FilesystemImpl.Sync. @@ -652,7 +654,12 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return err } ctx.Infof("Failed to create remote directory %q: %v; falling back to synthetic directory", name, err) - parent.createSyntheticDirectoryLocked(name, opts.Mode, creds.EffectiveKUID, creds.EffectiveKGID) + parent.createSyntheticChildLocked(&createSyntheticOpts{ + name: name, + mode: linux.S_IFDIR | opts.Mode, + kuid: creds.EffectiveKUID, + kgid: creds.EffectiveKGID, + }) } if fs.opts.interop != InteropModeShared { parent.incLinks() @@ -663,7 +670,12 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v // Can't create non-synthetic files in synthetic directories. return syserror.EPERM } - parent.createSyntheticDirectoryLocked(name, opts.Mode, creds.EffectiveKUID, creds.EffectiveKGID) + parent.createSyntheticChildLocked(&createSyntheticOpts{ + name: name, + mode: linux.S_IFDIR | opts.Mode, + kuid: creds.EffectiveKUID, + kgid: creds.EffectiveKGID, + }) parent.incLinks() return nil }) @@ -674,6 +686,28 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error { creds := rp.Credentials() _, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) + if err == syserror.EPERM { + switch opts.Mode.FileType() { + case linux.S_IFSOCK: + parent.createSyntheticChildLocked(&createSyntheticOpts{ + name: name, + mode: opts.Mode, + kuid: creds.EffectiveKUID, + kgid: creds.EffectiveKGID, + endpoint: opts.Endpoint, + }) + return nil + case linux.S_IFIFO: + parent.createSyntheticChildLocked(&createSyntheticOpts{ + name: name, + mode: opts.Mode, + kuid: creds.EffectiveKUID, + kgid: creds.EffectiveKGID, + pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize), + }) + return nil + } + } return err }, nil) } @@ -756,20 +790,21 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf return nil, err } mnt := rp.Mount() - filetype := d.fileType() - switch { - case filetype == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD: - if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0); err != nil { - return nil, err - } - fd := ®ularFileFD{} - if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ - AllowDirectIO: true, - }); err != nil { - return nil, err + switch d.fileType() { + case linux.S_IFREG: + if !d.fs.opts.regularFilesUseSpecialFileFD { + if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0); err != nil { + return nil, err + } + fd := ®ularFileFD{} + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ + AllowDirectIO: true, + }); err != nil { + return nil, err + } + return &fd.vfsfd, nil } - return &fd.vfsfd, nil - case filetype == linux.S_IFDIR: + case linux.S_IFDIR: // Can't open directories with O_CREAT. if opts.Flags&linux.O_CREAT != 0 { return nil, syserror.EISDIR @@ -791,26 +826,40 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf return nil, err } return &fd.vfsfd, nil - case filetype == linux.S_IFLNK: + case linux.S_IFLNK: // Can't open symlinks without O_PATH (which is unimplemented). return nil, syserror.ELOOP - default: - if opts.Flags&linux.O_DIRECT != 0 { - return nil, syserror.EINVAL - } - h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0) - if err != nil { - return nil, err - } - fd := &specialFileFD{ - handle: h, + case linux.S_IFSOCK: + if d.isSynthetic() { + return nil, syserror.ENXIO } - if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { - h.close(ctx) - return nil, err + case linux.S_IFIFO: + if d.isSynthetic() { + return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags) } - return &fd.vfsfd, nil } + return d.openSpecialFileLocked(ctx, mnt, opts) +} + +func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { + ats := vfs.AccessTypesForOpenFlags(opts) + // Treat as a special file. This is done for non-synthetic pipes as well as + // regular files when d.fs.opts.regularFilesUseSpecialFileFD is true. + if opts.Flags&linux.O_DIRECT != 0 { + return nil, syserror.EINVAL + } + h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0) + if err != nil { + return nil, err + } + fd := &specialFileFD{ + handle: h, + } + if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + h.close(ctx) + return nil, err + } + return &fd.vfsfd, nil } // Preconditions: d.fs.renameMu must be locked. d.dirMu must be locked. @@ -1196,15 +1245,28 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error } // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt. -func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath) (transport.BoundEndpoint, error) { +func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { var ds *[]*dentry fs.renameMu.RLock() defer fs.renameMuRUnlockAndCheckCaching(&ds) - _, err := fs.resolveLocked(ctx, rp, &ds) + d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return nil, err } - // TODO(gvisor.dev/issue/1476): Implement BoundEndpointAt. + if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { + return nil, err + } + if d.isSocket() { + if !d.isSynthetic() { + d.IncRef() + return &endpoint{ + dentry: d, + file: d.file.file, + path: opts.Addr, + }, nil + } + return d.endpoint, nil + } return nil, syserror.ECONNREFUSED } diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 8b4e91d17..3235816c2 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -46,9 +46,11 @@ import ( "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/unet" @@ -585,6 +587,14 @@ type dentry struct { // and target are protected by dataMu. haveTarget bool target string + + // If this dentry represents a synthetic socket file, endpoint is the + // transport endpoint bound to this file. + endpoint transport.BoundEndpoint + + // If this dentry represents a synthetic named pipe, pipe is the pipe + // endpoint bound to this file. + pipe *pipe.VFSPipe } // dentryAttrMask returns a p9.AttrMask enabling all attributes used by the @@ -791,10 +801,21 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin setLocalAtime = stat.Mask&linux.STATX_ATIME != 0 setLocalMtime = stat.Mask&linux.STATX_MTIME != 0 stat.Mask &^= linux.STATX_ATIME | linux.STATX_MTIME - if !setLocalMtime && (stat.Mask&linux.STATX_SIZE != 0) { - // Truncate updates mtime. - setLocalMtime = true - stat.Mtime.Nsec = linux.UTIME_NOW + + // Prepare for truncate. + if stat.Mask&linux.STATX_SIZE != 0 { + switch d.mode & linux.S_IFMT { + case linux.S_IFREG: + if !setLocalMtime { + // Truncate updates mtime. + setLocalMtime = true + stat.Mtime.Nsec = linux.UTIME_NOW + } + case linux.S_IFDIR: + return syserror.EISDIR + default: + return syserror.EINVAL + } } } d.metadataMu.Lock() @@ -1049,6 +1070,7 @@ func (d *dentry) destroyLocked() { d.handle.close(ctx) } d.handleMu.Unlock() + if !d.file.isNil() { d.file.close(ctx) d.file = p9file{} @@ -1134,7 +1156,7 @@ func (d *dentry) removexattr(ctx context.Context, creds *auth.Credentials, name return d.file.removeXattr(ctx, name) } -// Preconditions: !d.file.isNil(). d.isRegularFile() || d.isDirectory(). +// Preconditions: !d.isSynthetic(). d.isRegularFile() || d.isDirectory(). func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool) error { // O_TRUNC unconditionally requires us to obtain a new handle (opened with // O_TRUNC). diff --git a/pkg/sentry/fsimpl/gofer/socket.go b/pkg/sentry/fsimpl/gofer/socket.go new file mode 100644 index 000000000..73835df91 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/socket.go @@ -0,0 +1,141 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/waiter" +) + +func (d *dentry) isSocket() bool { + return d.fileType() == linux.S_IFSOCK +} + +// endpoint is a Gofer-backed transport.BoundEndpoint. +// +// An endpoint's lifetime is the time between when filesystem.BoundEndpointAt() +// is called and either BoundEndpoint.BidirectionalConnect or +// BoundEndpoint.UnidirectionalConnect is called. +type endpoint struct { + // dentry is the filesystem dentry which produced this endpoint. + dentry *dentry + + // file is the p9 file that contains a single unopened fid. + file p9.File + + // path is the sentry path where this endpoint is bound. + path string +} + +func sockTypeToP9(t linux.SockType) (p9.ConnectFlags, bool) { + switch t { + case linux.SOCK_STREAM: + return p9.StreamSocket, true + case linux.SOCK_SEQPACKET: + return p9.SeqpacketSocket, true + case linux.SOCK_DGRAM: + return p9.DgramSocket, true + } + return 0, false +} + +// BidirectionalConnect implements ConnectableEndpoint.BidirectionalConnect. +func (e *endpoint) BidirectionalConnect(ctx context.Context, ce transport.ConnectingEndpoint, returnConnect func(transport.Receiver, transport.ConnectedEndpoint)) *syserr.Error { + cf, ok := sockTypeToP9(ce.Type()) + if !ok { + return syserr.ErrConnectionRefused + } + + // No lock ordering required as only the ConnectingEndpoint has a mutex. + ce.Lock() + + // Check connecting state. + if ce.Connected() { + ce.Unlock() + return syserr.ErrAlreadyConnected + } + if ce.Listening() { + ce.Unlock() + return syserr.ErrInvalidEndpointState + } + + c, err := e.newConnectedEndpoint(ctx, cf, ce.WaiterQueue()) + if err != nil { + ce.Unlock() + return err + } + + returnConnect(c, c) + ce.Unlock() + c.Init() + + return nil +} + +// UnidirectionalConnect implements +// transport.BoundEndpoint.UnidirectionalConnect. +func (e *endpoint) UnidirectionalConnect(ctx context.Context) (transport.ConnectedEndpoint, *syserr.Error) { + c, err := e.newConnectedEndpoint(ctx, p9.DgramSocket, &waiter.Queue{}) + if err != nil { + return nil, err + } + c.Init() + + // We don't need the receiver. + c.CloseRecv() + c.Release() + + return c, nil +} + +func (e *endpoint) newConnectedEndpoint(ctx context.Context, flags p9.ConnectFlags, queue *waiter.Queue) (*host.SCMConnectedEndpoint, *syserr.Error) { + hostFile, err := e.file.Connect(flags) + if err != nil { + return nil, syserr.ErrConnectionRefused + } + // Dup the fd so that the new endpoint can manage its lifetime. + hostFD, err := syscall.Dup(hostFile.FD()) + if err != nil { + log.Warningf("Could not dup host socket fd %d: %v", hostFile.FD(), err) + return nil, syserr.FromError(err) + } + // After duplicating, we no longer need hostFile. + hostFile.Close() + + c, serr := host.NewSCMEndpoint(ctx, hostFD, queue, e.path) + if serr != nil { + log.Warningf("Gofer returned invalid host socket for BidirectionalConnect; file %+v flags %+v: %v", e.file, flags, serr) + return nil, serr + } + return c, nil +} + +// Release implements transport.BoundEndpoint.Release. +func (e *endpoint) Release() { + e.dentry.DecRef() +} + +// Passcred implements transport.BoundEndpoint.Passcred. +func (e *endpoint) Passcred() bool { + return false +} diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 9e8d80414..40c1b2104 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -766,7 +766,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error } // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt. -func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath) (transport.BoundEndpoint, error) { +func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 5b62f9ebb..388b98bef 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -697,7 +697,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error } // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt. -func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath) (transport.BoundEndpoint, error) { +func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { fs.mu.RLock() defer fs.mu.RUnlock() d, err := resolveLocked(rp) diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 7c64f30fa..9aad41c9e 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -373,7 +373,7 @@ func extractEndpoint(t *kernel.Task, sockaddr []byte) (transport.BoundEndpoint, Path: p, FollowFinalSymlink: true, } - ep, e := t.Kernel().VFS().BoundEndpointAt(t, t.Credentials(), &pop) + ep, e := t.Kernel().VFS().BoundEndpointAt(t, t.Credentials(), &pop, &vfs.BoundEndpointOptions{path}) root.DecRef() if relPath { start.DecRef() diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index a64d86122..b1a998590 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -237,7 +237,7 @@ func (fs *anonFilesystem) UnlinkAt(ctx context.Context, rp *ResolvingPath) error } // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt. -func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath) (transport.BoundEndpoint, error) { +func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath, opts BoundEndpointOptions) (transport.BoundEndpoint, error) { if !rp.Final() { return nil, syserror.ENOTDIR } diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index 20e5bb072..70385a21f 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -495,7 +495,7 @@ type FilesystemImpl interface { // BoundEndpointAt returns the Unix socket endpoint bound at the path rp. // // - If a non-socket file exists at rp, then BoundEndpointAt returns ECONNREFUSED. - BoundEndpointAt(ctx context.Context, rp *ResolvingPath) (transport.BoundEndpoint, error) + BoundEndpointAt(ctx context.Context, rp *ResolvingPath, opts BoundEndpointOptions) (transport.BoundEndpoint, error) // PrependPath prepends a path from vd to vd.Mount().Root() to b. // diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go index 022bac127..53d364c5c 100644 --- a/pkg/sentry/vfs/options.go +++ b/pkg/sentry/vfs/options.go @@ -151,6 +151,24 @@ type SetStatOptions struct { Stat linux.Statx } +// BoundEndpointOptions contains options to VirtualFilesystem.BoundEndpointAt() +// and FilesystemImpl.BoundEndpointAt(). +type BoundEndpointOptions struct { + // Addr is the path of the file whose socket endpoint is being retrieved. + // It is generally irrelevant: most endpoints are stored at a dentry that + // was created through a bind syscall, so the path can be stored on creation. + // However, if the endpoint was created in FilesystemImpl.BoundEndpointAt(), + // then we may not know what the original bind address was. + // + // For example, if connect(2) is called with address "foo" which corresponds + // a remote named socket in goferfs, we need to generate an endpoint wrapping + // that file. In this case, we can use Addr to set the endpoint address to + // "foo". Note that Addr is only a best-effort attempt--we still do not know + // the exact address that was used on the remote fs to bind the socket (it + // may have been "foo", "./foo", etc.). + Addr string +} + // GetxattrOptions contains options to VirtualFilesystem.GetxattrAt(), // FilesystemImpl.GetxattrAt(), FileDescription.Getxattr(), and // FileDescriptionImpl.Getxattr(). diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 9015f2cc1..8d7f8f8af 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -351,33 +351,6 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia } } -// BoundEndpointAt gets the bound endpoint at the given path, if one exists. -func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (transport.BoundEndpoint, error) { - if !pop.Path.Begin.Ok() { - if pop.Path.Absolute { - return nil, syserror.ECONNREFUSED - } - return nil, syserror.ENOENT - } - rp := vfs.getResolvingPath(creds, pop) - for { - bep, err := rp.mount.fs.impl.BoundEndpointAt(ctx, rp) - if err == nil { - vfs.putResolvingPath(rp) - return bep, nil - } - if checkInvariants { - if rp.canHandleError(err) && rp.Done() { - panic(fmt.Sprintf("%T.BoundEndpointAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) - } - } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) - return nil, err - } - } -} - // OpenAt returns a FileDescription providing access to the file at the given // path. A reference is taken on the returned FileDescription. func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *OpenOptions) (*FileDescription, error) { @@ -675,6 +648,33 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti } } +// BoundEndpointAt gets the bound endpoint at the given path, if one exists. +func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *BoundEndpointOptions) (transport.BoundEndpoint, error) { + if !pop.Path.Begin.Ok() { + if pop.Path.Absolute { + return nil, syserror.ECONNREFUSED + } + return nil, syserror.ENOENT + } + rp := vfs.getResolvingPath(creds, pop) + for { + bep, err := rp.mount.fs.impl.BoundEndpointAt(ctx, rp, *opts) + if err == nil { + vfs.putResolvingPath(rp) + return bep, nil + } + if checkInvariants { + if rp.canHandleError(err) && rp.Done() { + panic(fmt.Sprintf("%T.BoundEndpointAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) + } + } + if !rp.handleError(err) { + vfs.putResolvingPath(rp) + return nil, err + } + } +} + // ListxattrAt returns all extended attribute names for the file at the given // path. func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) { -- cgit v1.2.3 From 35951c3671f3d429399eb581ad9da3b56e2a5f5a Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Mon, 4 May 2020 18:00:56 -0700 Subject: Translate p9.NoUID/GID to OverflowUID/GID. p9.NoUID/GID (== uint32(-1) == auth.NoID) is not a valid auth.KUID/KGID; in particular, using it for file ownership causes capabilities to be ineffective since file capabilities require that the file's KUID and KGID are mapped into the capability holder's user namespace [1], and auth.NoID is not mapped into any user namespace. Map p9.NoUID/GID to a different, valid KUID/KGID; in the unlikely case that an application actually using the overflow KUID/KGID attempts an operation that is consequently permitted by client permission checks, the remote operation will still fail with EPERM. Since this changes the VFS2 gofer client to no longer ignore the invalid IDs entirely, this CL both permits and requires that we change synthetic mount point creation to use root credentials. [1] See fs.Inode.CheckCapability or vfs.GenericCheckPermissions. PiperOrigin-RevId: 309856455 --- pkg/sentry/fs/gofer/attr.go | 12 ++++++++++-- pkg/sentry/fsimpl/gofer/gofer.go | 26 ++++++++++++++++++++------ 2 files changed, 30 insertions(+), 8 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go index 6db4b762d..d481baf77 100644 --- a/pkg/sentry/fs/gofer/attr.go +++ b/pkg/sentry/fs/gofer/attr.go @@ -75,10 +75,18 @@ func owner(mounter fs.FileOwner, valid p9.AttrMask, pattr p9.Attr) fs.FileOwner // task's EUID/EGID. owner := mounter if valid.UID { - owner.UID = auth.KUID(pattr.UID) + if pattr.UID.Ok() { + owner.UID = auth.KUID(pattr.UID) + } else { + owner.UID = auth.KUID(auth.OverflowUID) + } } if valid.GID { - owner.GID = auth.KGID(pattr.GID) + if pattr.GID.Ok() { + owner.GID = auth.KGID(pattr.GID) + } else { + owner.GID = auth.KGID(auth.OverflowGID) + } } return owner } diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 2881c7bdd..1d9caf127 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -663,11 +663,11 @@ func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, ma }, } d.pf.dentry = d - if mask.UID && attr.UID != auth.NoID { - d.uid = uint32(attr.UID) + if mask.UID { + d.uid = dentryUIDFromP9UID(attr.UID) } - if mask.GID && attr.GID != auth.NoID { - d.gid = uint32(attr.GID) + if mask.GID { + d.gid = dentryGIDFromP9GID(attr.GID) } if mask.Size { d.size = attr.Size @@ -718,10 +718,10 @@ func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) { atomic.StoreUint32(&d.mode, uint32(attr.Mode)) } if mask.UID { - atomic.StoreUint32(&d.uid, uint32(attr.UID)) + atomic.StoreUint32(&d.uid, dentryUIDFromP9UID(attr.UID)) } if mask.GID { - atomic.StoreUint32(&d.gid, uint32(attr.GID)) + atomic.StoreUint32(&d.gid, dentryGIDFromP9GID(attr.GID)) } // There is no P9_GETATTR_* bit for I/O block size. if attr.BlockSize != 0 { @@ -939,6 +939,20 @@ func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))) } +func dentryUIDFromP9UID(uid p9.UID) uint32 { + if !uid.Ok() { + return uint32(auth.OverflowUID) + } + return uint32(uid) +} + +func dentryGIDFromP9GID(gid p9.GID) uint32 { + if !gid.Ok() { + return uint32(auth.OverflowGID) + } + return uint32(gid) +} + // IncRef implements vfs.DentryImpl.IncRef. func (d *dentry) IncRef() { // d.refs may be 0 if d.fs.renameMu is locked, which serializes against -- cgit v1.2.3 From 591ff0e424e3b30eb143bce06618cb8656784b90 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 6 May 2020 10:28:50 -0700 Subject: Add maximum memory limit. PiperOrigin-RevId: 310179277 --- pkg/sentry/fs/proc/meminfo.go | 10 +++++++--- pkg/sentry/fsimpl/proc/tasks_files.go | 10 +++++++--- pkg/sentry/syscalls/linux/sys_sysinfo.go | 7 ++++++- pkg/sentry/usage/memory.go | 24 ++++++++++++++++-------- 4 files changed, 36 insertions(+), 15 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/meminfo.go b/pkg/sentry/fs/proc/meminfo.go index 465b47da9..91617267d 100644 --- a/pkg/sentry/fs/proc/meminfo.go +++ b/pkg/sentry/fs/proc/meminfo.go @@ -58,12 +58,16 @@ func (d *meminfoData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) var buf bytes.Buffer fmt.Fprintf(&buf, "MemTotal: %8d kB\n", totalSize/1024) - memFree := (totalSize - totalUsage) / 1024 + memFree := totalSize - totalUsage + if memFree > totalSize { + // Underflow. + memFree = 0 + } // We use MemFree as MemAvailable because we don't swap. // TODO(rahat): When reclaim is implemented the value of MemAvailable // should change. - fmt.Fprintf(&buf, "MemFree: %8d kB\n", memFree) - fmt.Fprintf(&buf, "MemAvailable: %8d kB\n", memFree) + fmt.Fprintf(&buf, "MemFree: %8d kB\n", memFree/1024) + fmt.Fprintf(&buf, "MemAvailable: %8d kB\n", memFree/1024) fmt.Fprintf(&buf, "Buffers: 0 kB\n") // memory usage by block devices fmt.Fprintf(&buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) // Emulate a system with no swap, which disables inactivation of anon pages. diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index 92007df81..e5f13b69e 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -272,12 +272,16 @@ func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { inactiveFile := file - activeFile fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024) - memFree := (totalSize - totalUsage) / 1024 + memFree := totalSize - totalUsage + if memFree > totalSize { + // Underflow. + memFree = 0 + } // We use MemFree as MemAvailable because we don't swap. // TODO(rahat): When reclaim is implemented the value of MemAvailable // should change. - fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree) - fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree) + fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree/1024) + fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree/1024) fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) // Emulate a system with no swap, which disables inactivation of anon pages. diff --git a/pkg/sentry/syscalls/linux/sys_sysinfo.go b/pkg/sentry/syscalls/linux/sys_sysinfo.go index a65b560c8..297de052a 100644 --- a/pkg/sentry/syscalls/linux/sys_sysinfo.go +++ b/pkg/sentry/syscalls/linux/sys_sysinfo.go @@ -29,13 +29,18 @@ func Sysinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca mf.UpdateUsage() _, totalUsage := usage.MemoryAccounting.Copy() totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage) + memFree := totalSize - totalUsage + if memFree > totalSize { + // Underflow. + memFree = 0 + } // Only a subset of the fields in sysinfo_t make sense to return. si := linux.Sysinfo{ Procs: uint16(len(t.PIDNamespace().Tasks())), Uptime: t.Kernel().MonotonicClock().Now().Seconds(), TotalRAM: totalSize, - FreeRAM: totalSize - totalUsage, + FreeRAM: memFree, Unit: 1, } _, err := t.CopyOut(addr, si) diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go index 4320ad17f..ab1d140d2 100644 --- a/pkg/sentry/usage/memory.go +++ b/pkg/sentry/usage/memory.go @@ -252,18 +252,23 @@ func (m *MemoryLocked) Copy() (MemoryStats, uint64) { return ms, m.totalLocked() } -// MinimumTotalMemoryBytes is the minimum reported total system memory. -// -// This can be configured through options provided to the Sentry at start. -// This number is purely synthetic. This is only set before the application -// starts executing, and must not be modified. -var MinimumTotalMemoryBytes uint64 = 2 << 30 // 2 GB +// These options control how much total memory the is reported to the application. +// They may only be set before the application starts executing, and must not +// be modified. +var ( + // MinimumTotalMemoryBytes is the minimum reported total system memory. + MinimumTotalMemoryBytes uint64 = 2 << 30 // 2 GB + + // MaximumTotalMemoryBytes is the maximum reported total system memory. + // The 0 value indicates no maximum. + MaximumTotalMemoryBytes uint64 +) // TotalMemory returns the "total usable memory" available. // // This number doesn't really have a true value so it's based on the following -// inputs and further bounded to be above some minimum guaranteed value (2GB), -// additionally ensuring that total memory reported is always less than used. +// inputs and further bounded to be above the MinumumTotalMemoryBytes and below +// MaximumTotalMemoryBytes. // // memSize should be the platform.Memory size reported by platform.Memory.TotalSize() // used is the total memory reported by MemoryLocked.Total() @@ -279,5 +284,8 @@ func TotalMemory(memSize, used uint64) uint64 { memSize = uint64(1) << (uint(msb) + 1) } } + if MaximumTotalMemoryBytes > 0 && memSize > MaximumTotalMemoryBytes { + memSize = MaximumTotalMemoryBytes + } return memSize } -- cgit v1.2.3 From 16da7e790f570d9743fb47e50c304e6a24834772 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Thu, 7 May 2020 10:19:11 -0700 Subject: Update privateunixsocket TODOs. Synthetic sockets do not have the race condition issue in VFS2, and we will get rid of privateunixsocket as well. Fixes #1200. PiperOrigin-RevId: 310386474 --- pkg/sentry/fs/gofer/session.go | 6 +++--- runsc/boot/vfs.go | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index f6b3ef178..b5efc86f2 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -190,9 +190,9 @@ type session struct { // be socket/pipe files. This allows unix domain sockets and named pipes to // be used with paths that belong to a gofer. // - // TODO(gvisor.dev/issue/1200): there are few possible races with someone - // stat'ing the file and another deleting it concurrently, where the file - // will not be reported as socket file. + // There are a few possible races with someone stat'ing the file and another + // deleting it concurrently, where the file will not be reported as socket + // file. overrides *overrideMaps `state:"wait"` } diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index d1397ed2c..b94211866 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -301,8 +301,8 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun } // p9MountOptions creates a slice of options for a p9 mount. -// TODO(gvisor.dev/issue/1200): Remove this version in favor of the one in -// fs.go when privateunixsocket lands. +// TODO(gvisor.dev/issue/1624): Remove this version once privateunixsocket is +// deleted, along with the rest of VFS1. func p9MountOptionsVFS2(fd int, fa FileAccessType) []string { opts := []string{ "trans=fd", -- cgit v1.2.3 From 7b691ab73c7a3534e3351a5ca55a82e73ec63f75 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Tue, 12 May 2020 13:41:47 -0700 Subject: Don't allow rename across different gofer or tmpfs mounts. Fixes #2651. PiperOrigin-RevId: 311193661 --- pkg/sentry/fs/gofer/path.go | 24 +++++++++++------------- pkg/sentry/fs/tmpfs/tmpfs.go | 11 +++++------ 2 files changed, 16 insertions(+), 19 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index a35c3a23d..cf9800100 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -16,7 +16,6 @@ package gofer import ( "fmt" - "syscall" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" @@ -68,7 +67,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string // Get a p9.File for name. qids, newFile, mask, p9attr, err := i.fileState.file.walkGetAttr(ctx, []string{name}) if err != nil { - if err == syscall.ENOENT { + if err == syserror.ENOENT { if cp.cacheNegativeDirents() { // Return a negative Dirent. It will stay cached until something // is created over it. @@ -207,7 +206,7 @@ func (i *inodeOperations) CreateHardLink(ctx context.Context, inode *fs.Inode, t targetOpts, ok := target.InodeOperations.(*inodeOperations) if !ok { - return syscall.EXDEV + return syserror.EXDEV } if err := i.fileState.file.link(ctx, &targetOpts.fileState.file, newName); err != nil { @@ -251,7 +250,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, } if i.session().overrides == nil { - return nil, syscall.EOPNOTSUPP + return nil, syserror.EOPNOTSUPP } // Stabilize the override map while creation is in progress. @@ -280,7 +279,7 @@ func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name st // N.B. FIFOs use major/minor numbers 0. if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil { - if i.session().overrides == nil || err != syscall.EPERM { + if i.session().overrides == nil || err != syserror.EPERM { return err } // If gofer doesn't support mknod, check if we can create an internal fifo. @@ -427,17 +426,16 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent return syserror.ENAMETOOLONG } - // Unwrap the new parent to a *inodeOperations. - newParentInodeOperations, ok := newParent.InodeOperations.(*inodeOperations) - if !ok { - return syscall.EXDEV + // Don't allow renames across different mounts. + if newParent.MountSource != oldParent.MountSource { + return syserror.EXDEV } + // Unwrap the new parent to a *inodeOperations. + newParentInodeOperations := newParent.InodeOperations.(*inodeOperations) + // Unwrap the old parent to a *inodeOperations. - oldParentInodeOperations, ok := oldParent.InodeOperations.(*inodeOperations) - if !ok { - return syscall.EXDEV - } + oldParentInodeOperations := oldParent.InodeOperations.(*inodeOperations) // Do the rename. if err := i.fileState.file.rename(ctx, newParentInodeOperations.fileState.file, newName); err != nil { diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go index 3c2b583ae..b095312fe 100644 --- a/pkg/sentry/fs/tmpfs/tmpfs.go +++ b/pkg/sentry/fs/tmpfs/tmpfs.go @@ -39,14 +39,13 @@ var fsInfo = fs.Info{ // rename implements fs.InodeOperations.Rename for tmpfs nodes. func rename(ctx context.Context, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { - op, ok := oldParent.InodeOperations.(*Dir) - if !ok { - return syserror.EXDEV - } - np, ok := newParent.InodeOperations.(*Dir) - if !ok { + // Don't allow renames across different mounts. + if newParent.MountSource != oldParent.MountSource { return syserror.EXDEV } + + op := oldParent.InodeOperations.(*Dir) + np := newParent.InodeOperations.(*Dir) return ramfs.Rename(ctx, op.ramfsDir, oldName, np.ramfsDir, newName, replacement) } -- cgit v1.2.3 From d84607762889d999f93b89e64e09d2a4dda63bf7 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Wed, 13 May 2020 10:51:50 -0700 Subject: Enable overlayfs_stale_read by default for runsc. Linux 4.18 and later make reads and writes coherent between pre-copy-up and post-copy-up FDs representing the same file on an overlay filesystem. However, memory mappings remain incoherent: - Documentation/filesystems/overlayfs.rst, "Non-standard behavior": "If a file residing on a lower layer is opened for read-only and then memory mapped with MAP_SHARED, then subsequent changes to the file are not reflected in the memory mapping." - fs/overlay/file.c:ovl_mmap() passes through to the underlying FD without any management of coherence in the overlay. - Experimentally on Linux 5.2: ``` $ cat mmap_cat_page.c #include #include #include #include #include #include int main(int argc, char **argv) { if (argc < 2) { errx(1, "syntax: %s [FILE]", argv[0]); } const int fd = open(argv[1], O_RDONLY); if (fd < 0) { err(1, "open(%s)", argv[1]); } const size_t page_size = sysconf(_SC_PAGE_SIZE); void* page = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); if (page == MAP_FAILED) { err(1, "mmap"); } for (;;) { write(1, page, strnlen(page, page_size)); if (getc(stdin) == EOF) { break; } } return 0; } $ gcc -O2 -o mmap_cat_page mmap_cat_page.c $ mkdir lowerdir upperdir workdir overlaydir $ echo old > lowerdir/file $ sudo mount -t overlay -o "lowerdir=lowerdir,upperdir=upperdir,workdir=workdir" none overlaydir $ ./mmap_cat_page overlaydir/file old ^Z [1]+ Stopped ./mmap_cat_page overlaydir/file $ echo new > overlaydir/file $ cat overlaydir/file new $ fg ./mmap_cat_page overlaydir/file old ``` Therefore, while the VFS1 gofer client's behavior of reopening read FDs is only necessary pre-4.18, replacing existing memory mappings (in both sentry and application address spaces) with mappings of the new FD is required regardless of kernel version, and this latter behavior is common to both VFS1 and VFS2. Re-document accordingly, and change the runsc flag to enabled by default. New test: - Before this CL: https://source.cloud.google.com/results/invocations/5b222d2c-e918-4bae-afc4-407f5bac509b - After this CL: https://source.cloud.google.com/results/invocations/f28c747e-d89c-4d8c-a461-602b33e71aab PiperOrigin-RevId: 311361267 --- images/hostoverlaytest/Dockerfile | 7 +++ images/hostoverlaytest/test.c | 88 +++++++++++++++++++++++++++++++++++++ images/hostoverlaytest/testfile.txt | 1 + pkg/sentry/fs/gofer/fs.go | 3 +- pkg/sentry/fsimpl/gofer/gofer.go | 9 ++-- runsc/boot/config.go | 6 ++- runsc/container/container_test.go | 37 ---------------- runsc/main.go | 2 +- test/e2e/integration_test.go | 15 +++++++ 9 files changed, 123 insertions(+), 45 deletions(-) create mode 100644 images/hostoverlaytest/Dockerfile create mode 100644 images/hostoverlaytest/test.c create mode 100644 images/hostoverlaytest/testfile.txt (limited to 'pkg/sentry/fs') diff --git a/images/hostoverlaytest/Dockerfile b/images/hostoverlaytest/Dockerfile new file mode 100644 index 000000000..d83439e9c --- /dev/null +++ b/images/hostoverlaytest/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:bionic + +WORKDIR /root +COPY . . + +RUN apt-get update && apt-get install -y gcc +RUN gcc -O2 -o test test.c diff --git a/images/hostoverlaytest/test.c b/images/hostoverlaytest/test.c new file mode 100644 index 000000000..088f90746 --- /dev/null +++ b/images/hostoverlaytest/test.c @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + const char kTestFilePath[] = "testfile.txt"; + const char kOldFileData[] = "old data\n"; + const char kNewFileData[] = "new data\n"; + const size_t kPageSize = sysconf(_SC_PAGE_SIZE); + + // Open a file that already exists in a host overlayfs lower layer. + const int fd_rdonly = open(kTestFilePath, O_RDONLY); + if (fd_rdonly < 0) { + err(1, "open(%s, O_RDONLY)", kTestFilePath); + } + + // Check that the file's initial contents are what we expect when read via + // syscall. + char oldbuf[sizeof(kOldFileData)] = {}; + ssize_t n = pread(fd_rdonly, oldbuf, sizeof(oldbuf), 0); + if (n < 0) { + err(1, "initial pread"); + } + if (n != strlen(kOldFileData)) { + errx(1, "short initial pread (%ld/%lu bytes)", n, strlen(kOldFileData)); + } + if (strcmp(oldbuf, kOldFileData) != 0) { + errx(1, "initial pread returned wrong data: %s", oldbuf); + } + + // Check that the file's initial contents are what we expect when read via + // memory mapping. + void* page = mmap(NULL, kPageSize, PROT_READ, MAP_SHARED, fd_rdonly, 0); + if (page == MAP_FAILED) { + err(1, "mmap"); + } + if (strcmp(page, kOldFileData) != 0) { + errx(1, "mapping contains wrong initial data: %s", (const char*)page); + } + + // Open the same file writably, causing host overlayfs to copy it up, and + // replace its contents. + const int fd_rdwr = open(kTestFilePath, O_RDWR); + if (fd_rdwr < 0) { + err(1, "open(%s, O_RDWR)", kTestFilePath); + } + n = write(fd_rdwr, kNewFileData, strlen(kNewFileData)); + if (n < 0) { + err(1, "write"); + } + if (n != strlen(kNewFileData)) { + errx(1, "short write (%ld/%lu bytes)", n, strlen(kNewFileData)); + } + if (ftruncate(fd_rdwr, strlen(kNewFileData)) < 0) { + err(1, "truncate"); + } + + int failed = 0; + + // Check that syscalls on the old FD return updated contents. (Before Linux + // 4.18, this requires that runsc use a post-copy-up FD to service the read.) + char newbuf[sizeof(kNewFileData)] = {}; + n = pread(fd_rdonly, newbuf, sizeof(newbuf), 0); + if (n < 0) { + err(1, "final pread"); + } + if (n != strlen(kNewFileData)) { + warnx("short final pread (%ld/%lu bytes)", n, strlen(kNewFileData)); + failed = 1; + } else if (strcmp(newbuf, kNewFileData) != 0) { + warnx("final pread returned wrong data: %s", newbuf); + failed = 1; + } + + // Check that the memory mapping of the old FD has been updated. (Linux + // overlayfs does not do this, so regardless of kernel version this requires + // that runsc replace existing memory mappings with mappings of a + // post-copy-up FD.) + if (strcmp(page, kNewFileData) != 0) { + warnx("mapping contains wrong final data: %s", (const char*)page); + failed = 1; + } + + return failed; +} diff --git a/images/hostoverlaytest/testfile.txt b/images/hostoverlaytest/testfile.txt new file mode 100644 index 000000000..e4188c841 --- /dev/null +++ b/images/hostoverlaytest/testfile.txt @@ -0,0 +1 @@ +old data diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go index 9d41fcbdb..8ae2d78d7 100644 --- a/pkg/sentry/fs/gofer/fs.go +++ b/pkg/sentry/fs/gofer/fs.go @@ -60,8 +60,7 @@ const ( limitHostFDTranslationKey = "limit_host_fd_translation" // overlayfsStaleRead if present closes cached readonly file after the first - // write. This is done to workaround a limitation of overlayfs in kernels - // before 4.19 where open FDs are not updated after the file is copied up. + // write. This is done to workaround a limitation of Linux overlayfs. overlayfsStaleRead = "overlayfs_stale_read" ) diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 1da8d5d82..353e2cf5b 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -143,9 +143,12 @@ type filesystemOptions struct { // If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote // filesystem may not be coherent with writable host FDs opened later, so - // mappings of the former must be replaced by mappings of the latter. This - // is usually only the case when the remote filesystem is an overlayfs - // mount on Linux < 4.19. + // all uses of the former must be replaced by uses of the latter. This is + // usually only the case when the remote filesystem is a Linux overlayfs + // mount. (Prior to Linux 4.18, patch series centered on commit + // d1d04ef8572b "ovl: stack file ops", both I/O and memory mappings were + // incoherent between pre-copy-up and post-copy-up FDs; after that patch + // series, only memory mappings are incoherent.) overlayfsStaleRead bool // If regularFilesUseSpecialFileFD is true, application FDs representing diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 6d6a705f8..bcec7e4db 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -241,8 +241,10 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeakMode refs.LeakMode - // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for - // write to workaround overlayfs limitation on kernels before 4.19. + // OverlayfsStaleRead instructs the sandbox to assume that the root mount + // is on a Linux overlayfs mount, which does not necessarily preserve + // coherence between read-only and subsequent writable file descriptors + // representing the "same" file. OverlayfsStaleRead bool // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index acf988aa0..7ba301331 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -2126,43 +2126,6 @@ func TestNetRaw(t *testing.T) { } } -// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works. -func TestOverlayfsStaleRead(t *testing.T) { - conf := testutil.TestConfig(t) - conf.OverlayfsStaleRead = true - - in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer in.Close() - if _, err := in.WriteString("stale data"); err != nil { - t.Fatalf("in.Write() failed: %v", err) - } - - out, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.out") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer out.Close() - - const want = "foobar" - cmd := fmt.Sprintf("cat %q >&2 && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name()) - spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd) - if err := run(spec, conf); err != nil { - t.Fatalf("Error running container: %v", err) - } - - gotBytes, err := ioutil.ReadAll(out) - if err != nil { - t.Fatalf("out.Read() failed: %v", err) - } - got := strings.TrimSpace(string(gotBytes)) - if want != got { - t.Errorf("Wrong content in out file, got: %q. want: %q", got, want) - } -} - // TestTTYField checks TTY field returned by container.Processes(). func TestTTYField(t *testing.T) { stop := testutil.StartReaper() diff --git a/runsc/main.go b/runsc/main.go index 0625a06e0..920ed84a5 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -76,7 +76,7 @@ var ( fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") - overlayfsStaleRead = flag.Bool("overlayfs-stale-read", false, "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19.") + overlayfsStaleRead = flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 404e37689..ff856883a 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -361,6 +361,21 @@ func TestTmpFile(t *testing.T) { } } +// TestHostOverlayfsCopyUp tests that the --overlayfs-stale-read option causes +// runsc to hide the incoherence of FDs opened before and after overlayfs +// copy-up on the host. +func TestHostOverlayfsCopyUp(t *testing.T) { + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + if _, err := d.Run(dockerutil.RunOpts{ + Image: "hostoverlaytest", + WorkDir: "/root", + }, "./test"); err != nil { + t.Fatalf("docker run failed: %v", err) + } +} + func TestMain(m *testing.M) { dockerutil.EnsureSupportedDockerVersion() flag.Parse() -- cgit v1.2.3 From 05c89af6edde6158844d4debfe68bc598fec4418 Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Tue, 19 May 2020 13:45:23 -0700 Subject: Implement mmap for host fs in vfs2. In VFS1, both fs/host and fs/gofer used the same utils for host file mappings. Refactor parts of fsimpl/gofer to create similar utils to share with fsimpl/host (memory accounting code moved to fsutil, page rounding arithmetic moved to usermem). Updates #1476. PiperOrigin-RevId: 312345090 --- pkg/sentry/fs/fsutil/frame_ref_set.go | 40 ++++++++++ pkg/sentry/fsimpl/gofer/BUILD | 1 - pkg/sentry/fsimpl/gofer/gofer.go | 4 +- pkg/sentry/fsimpl/gofer/pagemath.go | 31 -------- pkg/sentry/fsimpl/gofer/regular_file.go | 47 +++--------- pkg/sentry/fsimpl/host/BUILD | 4 + pkg/sentry/fsimpl/host/host.go | 70 ++++++++++++----- pkg/sentry/fsimpl/host/mmap.go | 132 ++++++++++++++++++++++++++++++++ pkg/usermem/addr.go | 17 ++++ 9 files changed, 254 insertions(+), 92 deletions(-) delete mode 100644 pkg/sentry/fsimpl/gofer/pagemath.go create mode 100644 pkg/sentry/fsimpl/host/mmap.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go index 6564fd0c6..dd6f5aba6 100644 --- a/pkg/sentry/fs/fsutil/frame_ref_set.go +++ b/pkg/sentry/fs/fsutil/frame_ref_set.go @@ -18,6 +18,7 @@ import ( "math" "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/usage" ) // FrameRefSetFunctions implements segment.Functions for FrameRefSet. @@ -49,3 +50,42 @@ func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform. func (FrameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) { return val, val } + +// IncRefAndAccount adds a reference on the range fr. All newly inserted segments +// are accounted as host page cache memory mappings. +func (refs *FrameRefSet) IncRefAndAccount(fr platform.FileRange) { + seg, gap := refs.Find(fr.Start) + for { + switch { + case seg.Ok() && seg.Start() < fr.End: + seg = refs.Isolate(seg, fr) + seg.SetValue(seg.Value() + 1) + seg, gap = seg.NextNonEmpty() + case gap.Ok() && gap.Start() < fr.End: + newRange := gap.Range().Intersect(fr) + usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped) + seg, gap = refs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty() + default: + refs.MergeAdjacent(fr) + return + } + } +} + +// DecRefAndAccount removes a reference on the range fr and untracks segments +// that are removed from memory accounting. +func (refs *FrameRefSet) DecRefAndAccount(fr platform.FileRange) { + seg := refs.FindSegment(fr.Start) + + for seg.Ok() && seg.Start() < fr.End { + seg = refs.Isolate(seg, fr) + if old := seg.Value(); old == 1 { + usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped) + seg = refs.Remove(seg).NextSegment() + } else { + seg.SetValue(old - 1) + seg = seg.NextSegment() + } + } + refs.MergeAdjacent(fr) +} diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index 5ce82b793..67e916525 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -36,7 +36,6 @@ go_library( "gofer.go", "handle.go", "p9file.go", - "pagemath.go", "regular_file.go", "socket.go", "special_file.go", diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index ebf063a58..6295f6b54 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -928,8 +928,8 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin // so we can't race with Write or another truncate.) d.dataMu.Unlock() if d.size < oldSize { - oldpgend := pageRoundUp(oldSize) - newpgend := pageRoundUp(d.size) + oldpgend, _ := usermem.PageRoundUp(oldSize) + newpgend, _ := usermem.PageRoundUp(d.size) if oldpgend != newpgend { d.mapsMu.Lock() d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ diff --git a/pkg/sentry/fsimpl/gofer/pagemath.go b/pkg/sentry/fsimpl/gofer/pagemath.go deleted file mode 100644 index 847cb0784..000000000 --- a/pkg/sentry/fsimpl/gofer/pagemath.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gofer - -import ( - "gvisor.dev/gvisor/pkg/usermem" -) - -// This are equivalent to usermem.Addr.RoundDown/Up, but without the -// potentially truncating conversion to usermem.Addr. This is necessary because -// there is no way to define generic "PageRoundDown/Up" functions in Go. - -func pageRoundDown(x uint64) uint64 { - return x &^ (usermem.PageSize - 1) -} - -func pageRoundUp(x uint64) uint64 { - return pageRoundDown(x + usermem.PageSize - 1) -} diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 857f7c74e..0d10cf7ac 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -148,9 +148,9 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off return 0, err } // Remove touched pages from the cache. - pgstart := pageRoundDown(uint64(offset)) - pgend := pageRoundUp(uint64(offset + src.NumBytes())) - if pgend < pgstart { + pgstart := usermem.PageRoundDown(uint64(offset)) + pgend, ok := usermem.PageRoundUp(uint64(offset + src.NumBytes())) + if !ok { return 0, syserror.EINVAL } mr := memmap.MappableRange{pgstart, pgend} @@ -306,9 +306,10 @@ func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) if fillCache { // Read into the cache, then re-enter the loop to read from the // cache. + gapEnd, _ := usermem.PageRoundUp(gapMR.End) reqMR := memmap.MappableRange{ - Start: pageRoundDown(gapMR.Start), - End: pageRoundUp(gapMR.End), + Start: usermem.PageRoundDown(gapMR.Start), + End: gapEnd, } optMR := gap.Range() err := rw.d.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mf, usage.PageCache, rw.d.handle.readToBlocksAt) @@ -671,7 +672,7 @@ func (d *dentry) Translate(ctx context.Context, required, optional memmap.Mappab // Constrain translations to d.size (rounded up) to prevent translation to // pages that may be concurrently truncated. - pgend := pageRoundUp(d.size) + pgend, _ := usermem.PageRoundUp(d.size) var beyondEOF bool if required.End > pgend { if required.Start >= pgend { @@ -818,43 +819,15 @@ type dentryPlatformFile struct { // IncRef implements platform.File.IncRef. func (d *dentryPlatformFile) IncRef(fr platform.FileRange) { d.dataMu.Lock() - seg, gap := d.fdRefs.Find(fr.Start) - for { - switch { - case seg.Ok() && seg.Start() < fr.End: - seg = d.fdRefs.Isolate(seg, fr) - seg.SetValue(seg.Value() + 1) - seg, gap = seg.NextNonEmpty() - case gap.Ok() && gap.Start() < fr.End: - newRange := gap.Range().Intersect(fr) - usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped) - seg, gap = d.fdRefs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty() - default: - d.fdRefs.MergeAdjacent(fr) - d.dataMu.Unlock() - return - } - } + d.fdRefs.IncRefAndAccount(fr) + d.dataMu.Unlock() } // DecRef implements platform.File.DecRef. func (d *dentryPlatformFile) DecRef(fr platform.FileRange) { d.dataMu.Lock() - seg := d.fdRefs.FindSegment(fr.Start) - - for seg.Ok() && seg.Start() < fr.End { - seg = d.fdRefs.Isolate(seg, fr) - if old := seg.Value(); old == 1 { - usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped) - seg = d.fdRefs.Remove(seg).NextSegment() - } else { - seg.SetValue(old - 1) - seg = seg.NextSegment() - } - } - d.fdRefs.MergeAdjacent(fr) + d.fdRefs.DecRefAndAccount(fr) d.dataMu.Unlock() - } // MapInternal implements platform.File.MapInternal. diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 39509f703..ca0fe6d2b 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -8,6 +8,7 @@ go_library( "control.go", "host.go", "ioctl_unsafe.go", + "mmap.go", "socket.go", "socket_iovec.go", "socket_unsafe.go", @@ -23,12 +24,15 @@ go_library( "//pkg/fspath", "//pkg/log", "//pkg/refs", + "//pkg/safemem", "//pkg/sentry/arch", + "//pkg/sentry/fs/fsutil", "//pkg/sentry/fsimpl/kernfs", "//pkg/sentry/hostfd", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", + "//pkg/sentry/platform", "//pkg/sentry/socket/control", "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 8caf55a1b..65981197d 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -86,15 +86,16 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) i := &inode{ hostFD: hostFD, - seekable: seekable, + ino: fs.NextIno(), isTTY: opts.IsTTY, - canMap: canMap(uint32(fileType)), wouldBlock: wouldBlock(uint32(fileType)), - ino: fs.NextIno(), + seekable: seekable, // For simplicity, set offset to 0. Technically, we should use the existing // offset on the host if the file is seekable. offset: 0, + canMap: canMap(uint32(fileType)), } + i.pf.inode = i // Non-seekable files can't be memory mapped, assert this. if !i.seekable && i.canMap { @@ -189,11 +190,15 @@ type inode struct { // This field is initialized at creation time and is immutable. hostFD int - // wouldBlock is true if the host FD would return EWOULDBLOCK for - // operations that would block. + // ino is an inode number unique within this filesystem. // // This field is initialized at creation time and is immutable. - wouldBlock bool + ino uint64 + + // isTTY is true if this file represents a TTY. + // + // This field is initialized at creation time and is immutable. + isTTY bool // seekable is false if the host fd points to a file representing a stream, // e.g. a socket or a pipe. Such files are not seekable and can return @@ -202,29 +207,36 @@ type inode struct { // This field is initialized at creation time and is immutable. seekable bool - // isTTY is true if this file represents a TTY. + // offsetMu protects offset. + offsetMu sync.Mutex + + // offset specifies the current file offset. It is only meaningful when + // seekable is true. + offset int64 + + // wouldBlock is true if the host FD would return EWOULDBLOCK for + // operations that would block. // // This field is initialized at creation time and is immutable. - isTTY bool + wouldBlock bool + + // Event queue for blocking operations. + queue waiter.Queue // canMap specifies whether we allow the file to be memory mapped. // // This field is initialized at creation time and is immutable. canMap bool - // ino is an inode number unique within this filesystem. - // - // This field is initialized at creation time and is immutable. - ino uint64 - - // offsetMu protects offset. - offsetMu sync.Mutex + // mapsMu protects mappings. + mapsMu sync.Mutex - // offset specifies the current file offset. - offset int64 + // If canMap is true, mappings tracks mappings of hostFD into + // memmap.MappingSpaces. + mappings memmap.MappingSet - // Event queue for blocking operations. - queue waiter.Queue + // pf implements platform.File for mappings of hostFD. + pf inodePlatformFile } // CheckPermissions implements kernfs.Inode. @@ -388,6 +400,21 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil { return err } + oldSize := uint64(hostStat.Size) + if s.Size < oldSize { + oldpgend, _ := usermem.PageRoundUp(oldSize) + newpgend, _ := usermem.PageRoundUp(s.Size) + if oldpgend != newpgend { + i.mapsMu.Lock() + i.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ + // Compare Linux's mm/truncate.c:truncate_setsize() => + // truncate_pagecache() => + // mm/memory.c:unmap_mapping_range(evencows=1). + InvalidatePrivate: true, + }) + i.mapsMu.Unlock() + } + } } if m&(linux.STATX_ATIME|linux.STATX_MTIME) != 0 { ts := [2]syscall.Timespec{ @@ -666,8 +693,9 @@ func (f *fileDescription) ConfigureMMap(_ context.Context, opts *memmap.MMapOpts if !f.inode.canMap { return syserror.ENODEV } - // TODO(gvisor.dev/issue/1672): Implement ConfigureMMap and Mappable interface. - return syserror.ENODEV + i := f.inode + i.pf.fileMapperInitOnce.Do(i.pf.fileMapper.Init) + return vfs.GenericConfigureMMap(&f.vfsfd, i, opts) } // EventRegister implements waiter.Waitable.EventRegister. diff --git a/pkg/sentry/fsimpl/host/mmap.go b/pkg/sentry/fsimpl/host/mmap.go new file mode 100644 index 000000000..8545a82f0 --- /dev/null +++ b/pkg/sentry/fsimpl/host/mmap.go @@ -0,0 +1,132 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/usermem" +) + +// inodePlatformFile implements platform.File. It exists solely because inode +// cannot implement both kernfs.Inode.IncRef and platform.File.IncRef. +// +// inodePlatformFile should only be used if inode.canMap is true. +type inodePlatformFile struct { + *inode + + // fdRefsMu protects fdRefs. + fdRefsMu sync.Mutex + + // fdRefs counts references on platform.File offsets. It is used solely for + // memory accounting. + fdRefs fsutil.FrameRefSet + + // fileMapper caches mappings of the host file represented by this inode. + fileMapper fsutil.HostFileMapper + + // fileMapperInitOnce is used to lazily initialize fileMapper. + fileMapperInitOnce sync.Once +} + +// IncRef implements platform.File.IncRef. +// +// Precondition: i.inode.canMap must be true. +func (i *inodePlatformFile) IncRef(fr platform.FileRange) { + i.fdRefsMu.Lock() + i.fdRefs.IncRefAndAccount(fr) + i.fdRefsMu.Unlock() +} + +// DecRef implements platform.File.DecRef. +// +// Precondition: i.inode.canMap must be true. +func (i *inodePlatformFile) DecRef(fr platform.FileRange) { + i.fdRefsMu.Lock() + i.fdRefs.DecRefAndAccount(fr) + i.fdRefsMu.Unlock() +} + +// MapInternal implements platform.File.MapInternal. +// +// Precondition: i.inode.canMap must be true. +func (i *inodePlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { + return i.fileMapper.MapInternal(fr, i.hostFD, at.Write) +} + +// FD implements platform.File.FD. +func (i *inodePlatformFile) FD() int { + return i.hostFD +} + +// AddMapping implements memmap.Mappable.AddMapping. +// +// Precondition: i.inode.canMap must be true. +func (i *inode) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { + i.mapsMu.Lock() + mapped := i.mappings.AddMapping(ms, ar, offset, writable) + for _, r := range mapped { + i.pf.fileMapper.IncRefOn(r) + } + i.mapsMu.Unlock() + return nil +} + +// RemoveMapping implements memmap.Mappable.RemoveMapping. +// +// Precondition: i.inode.canMap must be true. +func (i *inode) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { + i.mapsMu.Lock() + unmapped := i.mappings.RemoveMapping(ms, ar, offset, writable) + for _, r := range unmapped { + i.pf.fileMapper.DecRefOn(r) + } + i.mapsMu.Unlock() +} + +// CopyMapping implements memmap.Mappable.CopyMapping. +// +// Precondition: i.inode.canMap must be true. +func (i *inode) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { + return i.AddMapping(ctx, ms, dstAR, offset, writable) +} + +// Translate implements memmap.Mappable.Translate. +// +// Precondition: i.inode.canMap must be true. +func (i *inode) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { + mr := optional + return []memmap.Translation{ + { + Source: mr, + File: &i.pf, + Offset: mr.Start, + Perms: usermem.AnyAccess, + }, + }, nil +} + +// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. +// +// Precondition: i.inode.canMap must be true. +func (i *inode) InvalidateUnsavable(ctx context.Context) error { + // We expect the same host fd across save/restore, so all translations + // should be valid. + return nil +} diff --git a/pkg/usermem/addr.go b/pkg/usermem/addr.go index e79210804..c4100481e 100644 --- a/pkg/usermem/addr.go +++ b/pkg/usermem/addr.go @@ -106,3 +106,20 @@ func (ar AddrRange) IsPageAligned() bool { func (ar AddrRange) String() string { return fmt.Sprintf("[%#x, %#x)", ar.Start, ar.End) } + +// PageRoundDown/Up are equivalent to Addr.RoundDown/Up, but without the +// potentially truncating conversion from uint64 to Addr. This is necessary +// because there is no way to define generic "PageRoundDown/Up" functions in Go. + +// PageRoundDown returns x rounded down to the nearest page boundary. +func PageRoundDown(x uint64) uint64 { + return x &^ (PageSize - 1) +} + +// PageRoundUp returns x rounded up to the nearest page boundary. +// ok is true iff rounding up did not wrap around. +func PageRoundUp(x uint64) (addr uint64, ok bool) { + addr = PageRoundDown(x + PageSize - 1) + ok = addr >= x + return +} -- cgit v1.2.3 From 7938d8734870404c083379d2a73f7cd37fd2eda8 Mon Sep 17 00:00:00 2001 From: Rahat Mahmood Date: Wed, 13 May 2020 19:57:27 -0400 Subject: Write initial design doc for FUSE. --- pkg/sentry/fs/g3doc/.gitignore | 1 + pkg/sentry/fs/g3doc/fuse.md | 258 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+) create mode 100644 pkg/sentry/fs/g3doc/.gitignore create mode 100644 pkg/sentry/fs/g3doc/fuse.md (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/g3doc/.gitignore b/pkg/sentry/fs/g3doc/.gitignore new file mode 100644 index 000000000..2d19fc766 --- /dev/null +++ b/pkg/sentry/fs/g3doc/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/pkg/sentry/fs/g3doc/fuse.md b/pkg/sentry/fs/g3doc/fuse.md new file mode 100644 index 000000000..c3988aa43 --- /dev/null +++ b/pkg/sentry/fs/g3doc/fuse.md @@ -0,0 +1,258 @@ +# Foreword + +This document describes an on-going project to support FUSE filesystems within +the sentry. This is intended to become the final documentation for this +subsystem, and is therefore written in the past tense. However FUSE support is +currently incomplete and the document will be updated as things progress. + +# FUSE: Filesystem in Userspace + +The sentry supports dispatching filesystem operations to a FUSE server, +allowing FUSE filesystem to be used with a sandbox. + +## Overview + +FUSE has two main components: + +1. A client kernel driver (canonically `fuse.ko` in Linux), which forwards + filesystem operations (usually initiated by syscalls) to the server. + +2. A server, which is a userspace daemon that implements the actual filesystem. + +The sentry implements the client component, which allows a server daemon +running within the sandbox to implement a filesystem within the sandbox. + +A FUSE filesystem is initialized with `mount(2)`, typically with the help of a +utility like `fusermount(1)`. Various mount options exist for establishing +ownership and access permissions on the filesystem, but the most important mount +option is a file descriptor used to establish communication between the client +and server. + +The FUSE device FD is obtained by opening `/dev/fuse`. During regular operation, +the client and server use the FUSE protocol described in `fuse(4)` to service +filesystem operations. See the "Protocol" section below for more +information about this protocol. The core of the sentry support for FUSE is the +client-side implementation of this protocol. + +## FUSE in the Sentry + +The sentry's FUSE client targets VFS2 and has the following components: + +- An implementation of `/dev/fuse`. + +- A VFS2 filesystem for mapping syscalls to FUSE ops. Since we're targeting + VFS2, one point of contention may be the lack of inodes in VFS2. We can + tentatively implement a kernfs-based filesystem to bridge the gap in APIs. The + kernfs base functionality can serve the role of the Linux inode cache and, the + filesystem can map VFS2 syscalls to kernfs inode operations; see the + `kernfs.Inode` interface. + +The FUSE protocol lends itself well to marshaling with `go_marshal`. The +various request and response packets can be defined in the ABI package and +converted to and from the wire format using `go_marshal`. + +### Design Goals + +- While filesystem performance is always important, the sentry's FUSE support is + primarily concerned with compatibility, with performance as a secondary + concern. + +- Avoiding deadlocks from a hung server daemon. + +- Consider the potential for denial of service from a malicious server + daemon. Protecting itself from userspace is already a design goal for the + sentry, but needs additional consideration for FUSE. Normally, an operating + system doesn't rely on userspace to make progress with filesystem + operations. Since this changes with FUSE, it opens up the possibility of + creating a chain of dependencies controlled by userspace, which could affect + an entire sandbox. For example: a FUSE op can block a syscall, which could be + holding a subsystem lock, which can then block another task goroutine. + +### Milestones + +Below are some broad goals to aim for while implementing FUSE in the sentry. +Many FUSE ops can be grouped into broad categories of functionality, and most +ops can be implemented in parallel. + +#### Minimal client that can mount a trivial FUSE filesystem. + +- Implement `/dev/fuse`. + +- Implement basic FUSE ops like `FUSE_INIT`, `FUSE_DESTROY`. + +#### Read-only mount with basic file operations + +- Implement the majority of file, directory and file descriptor FUSE ops. For + this milestone, we can skip uncommon or complex operations like mmap, mknod, + file locking, poll, and extended attributes. We can stub these out along with + any ops that modify the filesystem. The exact list of required ops are to be + determined, but the goal is to mount a real filesystem as read-only, and be + able to read contents from the filesystem in the sentry. + +#### Full read-write support + +- Implement the remaining FUSE ops and decide if we can omit rarely used + operations like ioctl. + +# Appendix + +## FUSE Protocol + +The FUSE protocol is a request-response protocol. All requests are initiated by +the client. The wire-format for the protocol is raw c structs serialized to +memory. + +All FUSE requests begin with the following request header: + +```c +struct fuse_in_header { + uint32_t len; // Length of the request, including this header. + uint32_t opcode; // Requested operation. + uint64_t unique; // A unique identifier for this request. + uint64_t nodeid; // ID of the filesystem object being operated on. + uint32_t uid; // UID of the requesting process. + uint32_t gid; // GID of the requesting process. + uint32_t pid; // PID of the requesting process. + uint32_t padding; +}; +``` + +The request is then followed by a payload specific to the `opcode`. + +All responses begin with this response header: + +```c +struct fuse_out_header { + uint32_t len; // Length of the response, including this header. + int32_t error; // Status of the request, 0 if success. + uint64_t unique; // The unique identifier from the corresponding request. +}; +``` + +The response payload also depends on the request `opcode`. If `error != 0`, the +response payload must be empty. + +### Operations + +The following is a list of all FUSE operations used in `fuse_in_header.opcode` +as of Linux v4.4, and a brief description of their purpose. These are defined in +`uapi/linux/fuse.h`. Many of these have a corresponding request and response +payload struct; `fuse(4)` has details for some of these. We also note how these +operations map to the sentry virtual filesystem. + +#### FUSE meta-operations + +These operations are specific to FUSE and don't have a corresponding action in a +generic filesystem. + +- `FUSE_INIT`: This operation initializes a new FUSE filesystem, and is the + first message sent by the client after mount. This is used for version and + feature negotiation. This is related to `mount(2)`. +- `FUSE_DESTROY`: Teardown a FUSE filesystem, related to `unmount(2)`. +- `FUSE_INTERRUPT`: Interrupts an in-flight operation, specified by the + `fuse_in_header.unique` value provided in the corresponding request + header. The client can send at most one of these per request, and will enter + an uninterruptible wait for a reply. The server is expected to reply promptly. +- `FUSE_FORGET`: A hint to the server that server should evict the indicate node + from any caches. This is wired up to `(struct super_operations).evict_inode` + in Linux, which is in turned hooked as the inode cache shrinker which is + typically triggered by system memory pressure. +- `FUSE_BATCH_FORGET`: Batch version of `FUSE_FORGET`. + +#### Filesystem Syscalls + +These FUSE ops map directly to an equivalent filesystem syscall, or family of +syscalls. The relevant syscalls have a similar name to the operation, unless +otherwise noted. + +Node creation: + +- `FUSE_MKNOD` +- `FUSE_MKDIR` +- `FUSE_CREATE`: This is equivalent to `open(2)` and `creat(2)`, which + atomically creates and opens a node. + +Node attributes and extended attributes: + +- `FUSE_GETATTR` +- `FUSE_SETATTR` +- `FUSE_SETXATTR` +- `FUSE_GETXATTR` +- `FUSE_LISTXATTR` +- `FUSE_REMOVEXATTR` + +Node link manipulation: + +- `FUSE_READLINK` +- `FUSE_LINK` +- `FUSE_SYMLINK` +- `FUSE_UNLINK` + +Directory operations: + +- `FUSE_RMDIR` +- `FUSE_RENAME` +- `FUSE_RENAME2` +- `FUSE_OPENDIR`: `open(2)` for directories. +- `FUSE_RELEASEDIR`: `close(2)` for directories. +- `FUSE_READDIR` +- `FUSE_READDIRPLUS` +- `FUSE_FSYNCDIR`: `fsync(2)` for directories. +- `FUSE_LOOKUP`: Establishes a unique identifier for a FS node. This is + reminiscent of `VirtualFilesystem.GetDentryAt` in that it resolves a path + component to a node. However the returned identifier is opaque to the + client. The server must remember this mapping, as this is how the client will + reference the node in the future. + +File operations: + +- `FUSE_OPEN`: `open(2)` for files. +- `FUSE_RELEASE`: `close(2)` for files. +- `FUSE_FSYNC` +- `FUSE_FALLOCATE` +- `FUSE_SETUPMAPPING`: Creates a memory map on a file for `mmap(2)`. +- `FUSE_REMOVEMAPPING`: Removes a memory map for `munmap(2)`. + +File locking: + +- `FUSE_GETLK` +- `FUSE_SETLK` +- `FUSE_SETLKW` +- `FUSE_COPY_FILE_RANGE` + +File descriptor operations: + +- `FUSE_IOCTL` +- `FUSE_POLL` +- `FUSE_LSEEK` + +Filesystem operations: + +- `FUSE_STATFS` + +#### Permissions + +- `FUSE_ACCESS` is used to check if a node is accessible, as part of many + syscall implementations. Maps to `vfs.FilesystemImpl.AccessAt` + in the sentry. + +#### I/O Operations + +These ops are used to read and write file pages. They're used to implement both +I/O syscalls like `read(2)`, `write(2)` and `mmap(2)`. + +- `FUSE_READ` +- `FUSE_WRITE` + +#### Miscellaneous + +- `FUSE_FLUSH`: Used by the client to indicate when a file descriptor is + closed. Distinct from `FUSE_FSYNC`, which corresponds to an `fsync(2)` syscall + from the user. Maps to `vfs.FileDescriptorImpl.Release` in the sentry. +- `FUSE_BMAP`: Old address space API for block defrag. Probably not needed. +- `FUSE_NOTIFY_REPLY`: [TODO: what does this do?] + +# References + +- `fuse(4)` manpage. +- Linux kernel FUSE documentation: https://www.kernel.org/doc/html/latest/filesystems/fuse.html -- cgit v1.2.3 From 17d500f907c7bfc4d8365cceb4b305e9e3e6733c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 27 May 2020 10:16:09 -0700 Subject: g3doc/fuse: add more references --- pkg/sentry/fs/g3doc/fuse.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/g3doc/fuse.md b/pkg/sentry/fs/g3doc/fuse.md index 635cc009b..cc08c14f5 100644 --- a/pkg/sentry/fs/g3doc/fuse.md +++ b/pkg/sentry/fs/g3doc/fuse.md @@ -99,7 +99,7 @@ ops can be implemented in parallel. ## FUSE Protocol The FUSE protocol is a request-response protocol. All requests are initiated by -the client. The wire-format for the protocol is raw c structs serialized to +the client. The wire-format for the protocol is raw C structs serialized to memory. All FUSE requests begin with the following request header: @@ -255,6 +255,7 @@ I/O syscalls like `read(2)`, `write(2)` and `mmap(2)`. # References -- `fuse(4)` manpage. -- Linux kernel FUSE documentation: - https://www.kernel.org/doc/html/latest/filesystems/fuse.html +- [fuse(4) Linux manual page](https://www.man7.org/linux/man-pages/man4/fuse.4.html) +- [Linux kernel FUSE documentation](https://www.kernel.org/doc/html/latest/filesystems/fuse.html) +- [The reference implementation of the Linux FUSE (Filesystem in Userspace) interface](https://github.com/libfuse/libfuse) +- [The kernel interface of FUSE](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fuse.h) -- cgit v1.2.3 From 93edb36cbb653f8415d5c813e40ef21fed123417 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Fri, 29 May 2020 16:33:50 -0700 Subject: Refactor the ResolveExecutablePath logic. PiperOrigin-RevId: 313871804 --- pkg/sentry/control/BUILD | 1 + pkg/sentry/control/proc.go | 68 ++++-------------- pkg/sentry/fs/mounts.go | 72 ------------------- pkg/sentry/fs/user/BUILD | 7 +- pkg/sentry/fs/user/path.go | 169 +++++++++++++++++++++++++++++++++++++++++++++ pkg/sentry/fs/user/user.go | 2 + runsc/boot/fs.go | 14 ++-- runsc/boot/vfs.go | 76 +++----------------- 8 files changed, 205 insertions(+), 204 deletions(-) create mode 100644 pkg/sentry/fs/user/path.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index e74275d2d..0c9a62f0d 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -23,6 +23,7 @@ go_library( "//pkg/sentry/fdimport", "//pkg/sentry/fs", "//pkg/sentry/fs/host", + "//pkg/sentry/fs/user", "//pkg/sentry/fsbridge", "//pkg/sentry/fsimpl/host", "//pkg/sentry/kernel", diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 2ed17ee09..8767430b7 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -18,7 +18,6 @@ import ( "bytes" "encoding/json" "fmt" - "path" "sort" "strings" "text/tabwriter" @@ -28,10 +27,10 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fdimport" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/fs/user" "gvisor.dev/gvisor/pkg/sentry/fsbridge" hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -190,17 +189,12 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI // transferred to the new process. initArgs.MountNamespaceVFS2 = proc.Kernel.GlobalInit().Leader().MountNamespaceVFS2() } - - paths := fs.GetPath(initArgs.Envv) - vfsObj := proc.Kernel.VFS() - file, err := ResolveExecutablePath(ctx, vfsObj, initArgs.WorkingDirectory, initArgs.Argv[0], paths) + file, err := getExecutableFD(ctx, creds, proc.Kernel.VFS(), initArgs.MountNamespaceVFS2, initArgs.Envv, initArgs.WorkingDirectory, initArgs.Argv[0]) if err != nil { - return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) + return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in environment %v: %v", initArgs.Argv[0], initArgs.Envv, err) } initArgs.File = fsbridge.NewVFSFile(file) } else { - // Get the full path to the filename from the PATH env variable. - paths := fs.GetPath(initArgs.Envv) if initArgs.MountNamespace == nil { // Set initArgs so that 'ctx' returns the namespace. initArgs.MountNamespace = proc.Kernel.GlobalInit().Leader().MountNamespace() @@ -209,9 +203,9 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI // be donated to the new process in CreateProcess. initArgs.MountNamespace.IncRef() } - f, err := initArgs.MountNamespace.ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths) + f, err := user.ResolveExecutablePath(ctx, creds, initArgs.MountNamespace, initArgs.Envv, initArgs.WorkingDirectory, initArgs.Argv[0]) if err != nil { - return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], paths, err) + return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], initArgs.Envv, err) } initArgs.Filename = f } @@ -429,53 +423,17 @@ func ttyName(tty *kernel.TTY) string { return fmt.Sprintf("pts/%d", tty.Index) } -// ResolveExecutablePath resolves the given executable name given a set of -// paths that might contain it. -func ResolveExecutablePath(ctx context.Context, vfsObj *vfs.VirtualFilesystem, wd, name string, paths []string) (*vfs.FileDescription, error) { - root := vfs.RootFromContext(ctx) - defer root.DecRef() - creds := auth.CredentialsFromContext(ctx) - - // Absolute paths can be used directly. - if path.IsAbs(name) { - return openExecutable(ctx, vfsObj, creds, root, name) - } - - // Paths with '/' in them should be joined to the working directory, or - // to the root if working directory is not set. - if strings.IndexByte(name, '/') > 0 { - if len(wd) == 0 { - wd = "/" - } - if !path.IsAbs(wd) { - return nil, fmt.Errorf("working directory %q must be absolute", wd) - } - return openExecutable(ctx, vfsObj, creds, root, path.Join(wd, name)) +// getExecutableFD resolves the given executable name and returns a +// vfs.FileDescription for the executable file. +func getExecutableFD(ctx context.Context, creds *auth.Credentials, vfsObj *vfs.VirtualFilesystem, mns *vfs.MountNamespace, envv []string, wd, name string) (*vfs.FileDescription, error) { + path, err := user.ResolveExecutablePathVFS2(ctx, creds, mns, envv, wd, name) + if err != nil { + return nil, err } - // Otherwise, we must lookup the name in the paths, starting from the - // calling context's root directory. - for _, p := range paths { - if !path.IsAbs(p) { - // Relative paths aren't safe, no one should be using them. - log.Warningf("Skipping relative path %q in $PATH", p) - continue - } - - binPath := path.Join(p, name) - f, err := openExecutable(ctx, vfsObj, creds, root, binPath) - if err != nil { - return nil, err - } - if f == nil { - continue // Not found/no access. - } - return f, nil - } - return nil, syserror.ENOENT -} + root := vfs.RootFromContext(ctx) + defer root.DecRef() -func openExecutable(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, root vfs.VirtualDentry, path string) (*vfs.FileDescription, error) { pop := vfs.PathOperation{ Root: root, Start: root, // binPath is absolute, Start can be anything. diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index b414ddaee..3f2bd0e87 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -17,13 +17,9 @@ package fs import ( "fmt" "math" - "path" - "strings" "syscall" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" @@ -625,71 +621,3 @@ func (mns *MountNamespace) SyncAll(ctx context.Context) { defer mns.mu.Unlock() mns.root.SyncAll(ctx) } - -// ResolveExecutablePath resolves the given executable name given a set of -// paths that might contain it. -func (mns *MountNamespace) ResolveExecutablePath(ctx context.Context, wd, name string, paths []string) (string, error) { - // Absolute paths can be used directly. - if path.IsAbs(name) { - return name, nil - } - - // Paths with '/' in them should be joined to the working directory, or - // to the root if working directory is not set. - if strings.IndexByte(name, '/') > 0 { - if wd == "" { - wd = "/" - } - if !path.IsAbs(wd) { - return "", fmt.Errorf("working directory %q must be absolute", wd) - } - return path.Join(wd, name), nil - } - - // Otherwise, We must lookup the name in the paths, starting from the - // calling context's root directory. - root := RootFromContext(ctx) - if root == nil { - // Caller has no root. Don't bother traversing anything. - return "", syserror.ENOENT - } - defer root.DecRef() - for _, p := range paths { - binPath := path.Join(p, name) - traversals := uint(linux.MaxSymlinkTraversals) - d, err := mns.FindInode(ctx, root, nil, binPath, &traversals) - if err == syserror.ENOENT || err == syserror.EACCES { - // Didn't find it here. - continue - } - if err != nil { - return "", err - } - defer d.DecRef() - - // Check that it is a regular file. - if !IsRegular(d.Inode.StableAttr) { - continue - } - - // Check whether we can read and execute the found file. - if err := d.Inode.CheckPermission(ctx, PermMask{Read: true, Execute: true}); err != nil { - log.Infof("Found executable at %q, but user cannot execute it: %v", binPath, err) - continue - } - return path.Join("/", p, name), nil - } - return "", syserror.ENOENT -} - -// GetPath returns the PATH as a slice of strings given the environment -// variables. -func GetPath(env []string) []string { - const prefix = "PATH=" - for _, e := range env { - if strings.HasPrefix(e, prefix) { - return strings.Split(strings.TrimPrefix(e, prefix), ":") - } - } - return nil -} diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD index f37f979f1..bd5dac373 100644 --- a/pkg/sentry/fs/user/BUILD +++ b/pkg/sentry/fs/user/BUILD @@ -4,15 +4,20 @@ package(licenses = ["notice"]) go_library( name = "user", - srcs = ["user.go"], + srcs = [ + "path.go", + "user.go", + ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", "//pkg/fspath", + "//pkg/log", "//pkg/sentry/fs", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", + "//pkg/syserror", "//pkg/usermem", ], ) diff --git a/pkg/sentry/fs/user/path.go b/pkg/sentry/fs/user/path.go new file mode 100644 index 000000000..fbd4547a7 --- /dev/null +++ b/pkg/sentry/fs/user/path.go @@ -0,0 +1,169 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package user + +import ( + "fmt" + "path" + "strings" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// ResolveExecutablePath resolves the given executable name given the working +// dir and environment. +func ResolveExecutablePath(ctx context.Context, creds *auth.Credentials, mns *fs.MountNamespace, envv []string, wd, name string) (string, error) { + // Absolute paths can be used directly. + if path.IsAbs(name) { + return name, nil + } + + // Paths with '/' in them should be joined to the working directory, or + // to the root if working directory is not set. + if strings.IndexByte(name, '/') > 0 { + if wd == "" { + wd = "/" + } + if !path.IsAbs(wd) { + return "", fmt.Errorf("working directory %q must be absolute", wd) + } + return path.Join(wd, name), nil + } + + // Otherwise, We must lookup the name in the paths, starting from the + // calling context's root directory. + paths := getPath(envv) + + root := fs.RootFromContext(ctx) + if root == nil { + // Caller has no root. Don't bother traversing anything. + return "", syserror.ENOENT + } + defer root.DecRef() + for _, p := range paths { + if !path.IsAbs(p) { + // Relative paths aren't safe, no one should be using them. + log.Warningf("Skipping relative path %q in $PATH", p) + continue + } + + binPath := path.Join(p, name) + traversals := uint(linux.MaxSymlinkTraversals) + d, err := mns.FindInode(ctx, root, nil, binPath, &traversals) + if err == syserror.ENOENT || err == syserror.EACCES { + // Didn't find it here. + continue + } + if err != nil { + return "", err + } + defer d.DecRef() + + // Check that it is a regular file. + if !fs.IsRegular(d.Inode.StableAttr) { + continue + } + + // Check whether we can read and execute the found file. + if err := d.Inode.CheckPermission(ctx, fs.PermMask{Read: true, Execute: true}); err != nil { + log.Infof("Found executable at %q, but user cannot execute it: %v", binPath, err) + continue + } + return path.Join("/", p, name), nil + } + + // Couldn't find it. + return "", syserror.ENOENT +} + +// ResolveExecutablePathVFS2 resolves the given executable name given the +// working dir and environment. +func ResolveExecutablePathVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, envv []string, wd, name string) (string, error) { + // Absolute paths can be used directly. + if path.IsAbs(name) { + return name, nil + } + + // Paths with '/' in them should be joined to the working directory, or + // to the root if working directory is not set. + if strings.IndexByte(name, '/') > 0 { + if wd == "" { + wd = "/" + } + if !path.IsAbs(wd) { + return "", fmt.Errorf("working directory %q must be absolute", wd) + } + return path.Join(wd, name), nil + } + + // Otherwise, We must lookup the name in the paths, starting from the + // calling context's root directory. + paths := getPath(envv) + + root := mns.Root() + defer root.DecRef() + for _, p := range paths { + if !path.IsAbs(p) { + // Relative paths aren't safe, no one should be using them. + log.Warningf("Skipping relative path %q in $PATH", p) + continue + } + + binPath := path.Join(p, name) + pop := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(binPath), + FollowFinalSymlink: true, + } + opts := &vfs.OpenOptions{ + FileExec: true, + Flags: linux.O_RDONLY, + } + dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts) + if err == syserror.ENOENT || err == syserror.EACCES { + // Didn't find it here. + continue + } + if err != nil { + return "", err + } + dentry.DecRef() + + return binPath, nil + } + + // Couldn't find it. + return "", syserror.ENOENT +} + +// getPath returns the PATH as a slice of strings given the environment +// variables. +func getPath(env []string) []string { + const prefix = "PATH=" + for _, e := range env { + if strings.HasPrefix(e, prefix) { + return strings.Split(strings.TrimPrefix(e, prefix), ":") + } + } + return nil +} diff --git a/pkg/sentry/fs/user/user.go b/pkg/sentry/fs/user/user.go index fe7f67c00..f4d525523 100644 --- a/pkg/sentry/fs/user/user.go +++ b/pkg/sentry/fs/user/user.go @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +// Package user contains methods for resolving filesystem paths based on the +// user and their environment. package user import ( diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 9cd4f97da..52f8344ca 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -37,6 +37,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/fs/user" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" @@ -291,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, // Set namespace here so that it can be found in ctx. procArgs.MountNamespace = mns - return setExecutablePath(ctx, procArgs) -} - -// setExecutablePath sets the procArgs.Filename by searching the PATH for an -// executable matching the procArgs.Argv[0]. -func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - paths := fs.GetPath(procArgs.Envv) - exe := procArgs.Argv[0] - f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths) + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) if err != nil { - return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err) + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } procArgs.Filename = f return nil diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index fefeeff58..f48e6b0f1 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -22,22 +22,21 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/devices/memdev" - "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/user" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" ) func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error { @@ -95,69 +94,14 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte return fmt.Errorf("failed to setupFS: %w", err) } procArgs.MountNamespaceVFS2 = mns - return setExecutablePathVFS2(ctx, procArgs) -} - -func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - exe := procArgs.Argv[0] - // Absolute paths can be used directly. - if path.IsAbs(exe) { - procArgs.Filename = exe - return nil - } - - // Paths with '/' in them should be joined to the working directory, or - // to the root if working directory is not set. - if strings.IndexByte(exe, '/') > 0 { - if !path.IsAbs(procArgs.WorkingDirectory) { - return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory) - } - procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) - return nil - } - - // Paths with a '/' are relative to the CWD. - if strings.IndexByte(exe, '/') > 0 { - procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) - return nil - } - - // Otherwise, We must lookup the name in the paths, starting from the - // root directory. - root := procArgs.MountNamespaceVFS2.Root() - defer root.DecRef() - - paths := fs.GetPath(procArgs.Envv) - creds := procArgs.Credentials - - for _, p := range paths { - binPath := path.Join(p, exe) - pop := &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(binPath), - FollowFinalSymlink: true, - } - opts := &vfs.OpenOptions{ - FileExec: true, - Flags: linux.O_RDONLY, - } - dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts) - if err == syserror.ENOENT || err == syserror.EACCES { - // Didn't find it here. - continue - } - if err != nil { - return err - } - dentry.DecRef() - - procArgs.Filename = binPath - return nil + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) + if err != nil { + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } - - return fmt.Errorf("executable %q not found in $PATH=%q", exe, strings.Join(paths, ":")) + procArgs.Filename = f + return nil } func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) { -- cgit v1.2.3 From 5173c960212a2d78d69e94f028629cbcac8c04f9 Mon Sep 17 00:00:00 2001 From: Ridwan Sharif Date: Tue, 2 Jun 2020 22:08:23 +0000 Subject: Add some detail to milestone #1 This change adds more information about what needs to be done to implement `/dev/fuse` --- pkg/sentry/fs/g3doc/fuse.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/g3doc/fuse.md b/pkg/sentry/fs/g3doc/fuse.md index b43c082a7..2ca84dd74 100644 --- a/pkg/sentry/fs/g3doc/fuse.md +++ b/pkg/sentry/fs/g3doc/fuse.md @@ -76,7 +76,8 @@ ops can be implemented in parallel. #### Minimal client that can mount a trivial FUSE filesystem. -- Implement `/dev/fuse`. +- Implement `/dev/fuse` - a character device used to establish an FD for + communication between the sentry and the server daemon. - Implement basic FUSE ops like `FUSE_INIT`, `FUSE_DESTROY`. -- cgit v1.2.3 From 7da69fe9719b26f5d15e80b2764459c4cac80b0e Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 3 Jun 2020 12:33:51 -0700 Subject: Fix data race on f.offset. We must hold f.mu to write f.offset. PiperOrigin-RevId: 314582968 --- pkg/sentry/fs/file.go | 9 ++++----- pkg/sentry/fs/fs.go | 3 ++- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index 846252c89..2a278fbe3 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -310,7 +310,6 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error if !f.mu.Lock(ctx) { return 0, syserror.ErrInterrupted } - unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) // Handle append mode. if f.Flags().Append { @@ -355,7 +354,6 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64 // offset." unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) defer unlockAppendMu() - if f.Flags().Append { if err := f.offsetForAppend(ctx, &offset); err != nil { return 0, err @@ -374,9 +372,10 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64 return f.FileOperations.Write(ctx, f, src, offset) } -// offsetForAppend sets the given offset to the end of the file. +// offsetForAppend atomically sets the given offset to the end of the file. // -// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing. +// Precondition: the file.Dirent.Inode.appendMu mutex should be held for +// writing. func (f *File) offsetForAppend(ctx context.Context, offset *int64) error { uattr, err := f.Dirent.Inode.UnstableAttr(ctx) if err != nil { @@ -386,7 +385,7 @@ func (f *File) offsetForAppend(ctx context.Context, offset *int64) error { } // Update the offset. - *offset = uattr.Size + atomic.StoreInt64(offset, uattr.Size) return nil } diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go index bdba6efe5..d2dbff268 100644 --- a/pkg/sentry/fs/fs.go +++ b/pkg/sentry/fs/fs.go @@ -42,9 +42,10 @@ // Dirent.dirMu // Dirent.mu // DirentCache.mu -// Locks in InodeOperations implementations or overlayEntry // Inode.Watches.mu (see `Inotify` for other lock ordering) // MountSource.mu +// Inode.appendMu +// Locks in InodeOperations implementations or overlayEntry // // If multiple Dirent or MountSource locks must be taken, locks in the parent must be // taken before locks in their children. -- cgit v1.2.3 From 4e96b94915633cc06bf04bd680f4eeba6a764dc9 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Mon, 8 Jun 2020 23:06:50 -0700 Subject: Combine executable lookup code Run vs. exec, VFS1 vs. VFS2 were executable lookup were slightly different from each other. Combine them all into the same logic. PiperOrigin-RevId: 315426443 --- pkg/sentry/control/BUILD | 4 - pkg/sentry/control/proc.go | 84 +++------- pkg/sentry/fs/user/BUILD | 1 + pkg/sentry/fs/user/path.go | 57 +++---- runsc/boot/fs.go | 6 +- runsc/boot/vfs.go | 6 +- runsc/container/container_test.go | 272 +++++++++++++++++++++++--------- runsc/container/multi_container_test.go | 45 +++--- 8 files changed, 279 insertions(+), 196 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD index 0c9a62f0d..2c5d14be5 100644 --- a/pkg/sentry/control/BUILD +++ b/pkg/sentry/control/BUILD @@ -16,15 +16,12 @@ go_library( ], deps = [ "//pkg/abi/linux", - "//pkg/context", "//pkg/fd", - "//pkg/fspath", "//pkg/log", "//pkg/sentry/fdimport", "//pkg/sentry/fs", "//pkg/sentry/fs/host", "//pkg/sentry/fs/user", - "//pkg/sentry/fsbridge", "//pkg/sentry/fsimpl/host", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", @@ -36,7 +33,6 @@ go_library( "//pkg/sentry/vfs", "//pkg/sentry/watchdog", "//pkg/sync", - "//pkg/syserror", "//pkg/tcpip/link/sniffer", "//pkg/urpc", "@org_golang_x_sys//unix:go_default_library", diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 8767430b7..1bae7cfaf 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -25,13 +25,10 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fdimport" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" "gvisor.dev/gvisor/pkg/sentry/fs/user" - "gvisor.dev/gvisor/pkg/sentry/fsbridge" hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -39,7 +36,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/limits" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/urpc" ) @@ -107,6 +103,9 @@ type ExecArgs struct { // String prints the arguments as a string. func (args ExecArgs) String() string { + if len(args.Argv) == 0 { + return args.Filename + } a := make([]string, len(args.Argv)) copy(a, args.Argv) if args.Filename != "" { @@ -179,37 +178,30 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI } ctx := initArgs.NewContext(proc.Kernel) - if initArgs.Filename == "" { - if kernel.VFS2Enabled { - // Get the full path to the filename from the PATH env variable. - if initArgs.MountNamespaceVFS2 == nil { - // Set initArgs so that 'ctx' returns the namespace. - // - // MountNamespaceVFS2 adds a reference to the namespace, which is - // transferred to the new process. - initArgs.MountNamespaceVFS2 = proc.Kernel.GlobalInit().Leader().MountNamespaceVFS2() - } - file, err := getExecutableFD(ctx, creds, proc.Kernel.VFS(), initArgs.MountNamespaceVFS2, initArgs.Envv, initArgs.WorkingDirectory, initArgs.Argv[0]) - if err != nil { - return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in environment %v: %v", initArgs.Argv[0], initArgs.Envv, err) - } - initArgs.File = fsbridge.NewVFSFile(file) - } else { - if initArgs.MountNamespace == nil { - // Set initArgs so that 'ctx' returns the namespace. - initArgs.MountNamespace = proc.Kernel.GlobalInit().Leader().MountNamespace() + if kernel.VFS2Enabled { + // Get the full path to the filename from the PATH env variable. + if initArgs.MountNamespaceVFS2 == nil { + // Set initArgs so that 'ctx' returns the namespace. + // + // MountNamespaceVFS2 adds a reference to the namespace, which is + // transferred to the new process. + initArgs.MountNamespaceVFS2 = proc.Kernel.GlobalInit().Leader().MountNamespaceVFS2() + } + } else { + if initArgs.MountNamespace == nil { + // Set initArgs so that 'ctx' returns the namespace. + initArgs.MountNamespace = proc.Kernel.GlobalInit().Leader().MountNamespace() - // initArgs must hold a reference on MountNamespace, which will - // be donated to the new process in CreateProcess. - initArgs.MountNamespace.IncRef() - } - f, err := user.ResolveExecutablePath(ctx, creds, initArgs.MountNamespace, initArgs.Envv, initArgs.WorkingDirectory, initArgs.Argv[0]) - if err != nil { - return nil, 0, nil, nil, fmt.Errorf("error finding executable %q in PATH %v: %v", initArgs.Argv[0], initArgs.Envv, err) - } - initArgs.Filename = f + // initArgs must hold a reference on MountNamespace, which will + // be donated to the new process in CreateProcess. + initArgs.MountNamespace.IncRef() } } + resolved, err := user.ResolveExecutablePath(ctx, &initArgs) + if err != nil { + return nil, 0, nil, nil, err + } + initArgs.Filename = resolved fds := make([]int, len(args.FilePayload.Files)) for i, file := range args.FilePayload.Files { @@ -422,31 +414,3 @@ func ttyName(tty *kernel.TTY) string { } return fmt.Sprintf("pts/%d", tty.Index) } - -// getExecutableFD resolves the given executable name and returns a -// vfs.FileDescription for the executable file. -func getExecutableFD(ctx context.Context, creds *auth.Credentials, vfsObj *vfs.VirtualFilesystem, mns *vfs.MountNamespace, envv []string, wd, name string) (*vfs.FileDescription, error) { - path, err := user.ResolveExecutablePathVFS2(ctx, creds, mns, envv, wd, name) - if err != nil { - return nil, err - } - - root := vfs.RootFromContext(ctx) - defer root.DecRef() - - pop := vfs.PathOperation{ - Root: root, - Start: root, // binPath is absolute, Start can be anything. - Path: fspath.Parse(path), - FollowFinalSymlink: true, - } - opts := &vfs.OpenOptions{ - Flags: linux.O_RDONLY, - FileExec: true, - } - f, err := vfsObj.OpenAt(ctx, creds, &pop, opts) - if err == syserror.ENOENT || err == syserror.EACCES { - return nil, nil - } - return f, err -} diff --git a/pkg/sentry/fs/user/BUILD b/pkg/sentry/fs/user/BUILD index bd5dac373..66e949c95 100644 --- a/pkg/sentry/fs/user/BUILD +++ b/pkg/sentry/fs/user/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/fspath", "//pkg/log", "//pkg/sentry/fs", + "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", "//pkg/syserror", diff --git a/pkg/sentry/fs/user/path.go b/pkg/sentry/fs/user/path.go index fbd4547a7..397e96045 100644 --- a/pkg/sentry/fs/user/path.go +++ b/pkg/sentry/fs/user/path.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" @@ -31,7 +32,15 @@ import ( // ResolveExecutablePath resolves the given executable name given the working // dir and environment. -func ResolveExecutablePath(ctx context.Context, creds *auth.Credentials, mns *fs.MountNamespace, envv []string, wd, name string) (string, error) { +func ResolveExecutablePath(ctx context.Context, args *kernel.CreateProcessArgs) (string, error) { + name := args.Filename + if len(name) == 0 { + if len(args.Argv) == 0 { + return "", fmt.Errorf("no filename or command provided") + } + name = args.Argv[0] + } + // Absolute paths can be used directly. if path.IsAbs(name) { return name, nil @@ -40,6 +49,7 @@ func ResolveExecutablePath(ctx context.Context, creds *auth.Credentials, mns *fs // Paths with '/' in them should be joined to the working directory, or // to the root if working directory is not set. if strings.IndexByte(name, '/') > 0 { + wd := args.WorkingDirectory if wd == "" { wd = "/" } @@ -49,10 +59,24 @@ func ResolveExecutablePath(ctx context.Context, creds *auth.Credentials, mns *fs return path.Join(wd, name), nil } - // Otherwise, We must lookup the name in the paths, starting from the - // calling context's root directory. - paths := getPath(envv) + // Otherwise, We must lookup the name in the paths. + paths := getPath(args.Envv) + if kernel.VFS2Enabled { + f, err := resolveVFS2(ctx, args.Credentials, args.MountNamespaceVFS2, paths, name) + if err != nil { + return "", fmt.Errorf("error finding executable %q in PATH %v: %v", name, paths, err) + } + return f, nil + } + + f, err := resolve(ctx, args.MountNamespace, paths, name) + if err != nil { + return "", fmt.Errorf("error finding executable %q in PATH %v: %v", name, paths, err) + } + return f, nil +} +func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name string) (string, error) { root := fs.RootFromContext(ctx) if root == nil { // Caller has no root. Don't bother traversing anything. @@ -95,30 +119,7 @@ func ResolveExecutablePath(ctx context.Context, creds *auth.Credentials, mns *fs return "", syserror.ENOENT } -// ResolveExecutablePathVFS2 resolves the given executable name given the -// working dir and environment. -func ResolveExecutablePathVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, envv []string, wd, name string) (string, error) { - // Absolute paths can be used directly. - if path.IsAbs(name) { - return name, nil - } - - // Paths with '/' in them should be joined to the working directory, or - // to the root if working directory is not set. - if strings.IndexByte(name, '/') > 0 { - if wd == "" { - wd = "/" - } - if !path.IsAbs(wd) { - return "", fmt.Errorf("working directory %q must be absolute", wd) - } - return path.Join(wd, name), nil - } - - // Otherwise, We must lookup the name in the paths, starting from the - // calling context's root directory. - paths := getPath(envv) - +func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, paths []string, name string) (string, error) { root := mns.Root() defer root.DecRef() for _, p := range paths { diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index b98a1eb50..e83584b82 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -293,11 +293,11 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs.MountNamespace = mns // Resolve the executable path from working dir and environment. - f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) + resolved, err := user.ResolveExecutablePath(ctx, procArgs) if err != nil { - return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) + return err } - procArgs.Filename = f + procArgs.Filename = resolved return nil } diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 7ed6801b4..8eeb43e79 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -96,11 +96,11 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte procArgs.MountNamespaceVFS2 = mns // Resolve the executable path from working dir and environment. - f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) + resolved, err := user.ResolveExecutablePath(ctx, procArgs) if err != nil { - return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) + return err } - procArgs.Filename = f + procArgs.Filename = resolved return nil } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index e7715b6f7..cd76645bd 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "io/ioutil" + "math" "os" "path" "path/filepath" @@ -53,9 +54,8 @@ func waitForProcessList(cont *Container, want []*control.Process) error { err = fmt.Errorf("error getting process data from container: %v", err) return &backoff.PermanentError{Err: err} } - if r, err := procListsEqual(got, want); !r { - return fmt.Errorf("container got process list: %s, want: %s: error: %v", - procListToString(got), procListToString(want), err) + if !procListsEqual(got, want) { + return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want)) } return nil } @@ -92,36 +92,72 @@ func blockUntilWaitable(pid int) error { return err } -// procListsEqual is used to check whether 2 Process lists are equal for all -// implemented fields. -func procListsEqual(got, want []*control.Process) (bool, error) { - if len(got) != len(want) { - return false, nil - } - for i := range got { - pd1 := got[i] - pd2 := want[i] - // Zero out timing dependant fields. - pd1.Time = "" - pd1.STime = "" - pd1.C = 0 - // Ignore TTY field too, since it's not relevant in the cases - // where we use this method. Tests that care about the TTY - // field should check for it themselves. - pd1.TTY = "" - pd1Json, err := control.ProcessListToJSON([]*control.Process{pd1}) - if err != nil { - return false, err +// procListsEqual is used to check whether 2 Process lists are equal. Fields +// set to -1 in wants are ignored. Timestamp and threads fields are always +// ignored. +func procListsEqual(gots, wants []*control.Process) bool { + if len(gots) != len(wants) { + return false + } + for i := range gots { + got := gots[i] + want := wants[i] + + if want.UID != math.MaxUint32 && want.UID != got.UID { + return false } - pd2Json, err := control.ProcessListToJSON([]*control.Process{pd2}) - if err != nil { - return false, err + if want.PID != -1 && want.PID != got.PID { + return false } - if pd1Json != pd2Json { - return false, nil + if want.PPID != -1 && want.PPID != got.PPID { + return false + } + if len(want.TTY) != 0 && want.TTY != got.TTY { + return false + } + if len(want.Cmd) != 0 && want.Cmd != got.Cmd { + return false } } - return true, nil + return true +} + +type processBuilder struct { + process control.Process +} + +func newProcessBuilder() *processBuilder { + return &processBuilder{ + process: control.Process{ + UID: math.MaxUint32, + PID: -1, + PPID: -1, + }, + } +} + +func (p *processBuilder) Cmd(cmd string) *processBuilder { + p.process.Cmd = cmd + return p +} + +func (p *processBuilder) PID(pid kernel.ThreadID) *processBuilder { + p.process.PID = pid + return p +} + +func (p *processBuilder) PPID(ppid kernel.ThreadID) *processBuilder { + p.process.PPID = ppid + return p +} + +func (p *processBuilder) UID(uid auth.KUID) *processBuilder { + p.process.UID = uid + return p +} + +func (p *processBuilder) Process() *control.Process { + return &p.process } func procListToString(pl []*control.Process) string { @@ -323,14 +359,7 @@ func TestLifecycle(t *testing.T) { // expectedPL lists the expected process state of the container. expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, + newProcessBuilder().Cmd("sleep").Process(), } // Create the container. args := Args{ @@ -608,10 +637,14 @@ func doAppExitStatus(t *testing.T, vfs2 bool) { // TestExec verifies that a container can exec a new program. func TestExec(t *testing.T) { - for name, conf := range configsWithVFS2(t, overlay) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { - const uid = 343 - spec := testutil.NewSpecWithArgs("sleep", "100") + dir, err := ioutil.TempDir(testutil.TmpDir(), "exec-test") + if err != nil { + t.Fatalf("error creating temporary directory: %v", err) + } + cmd := fmt.Sprintf("ln -s /bin/true %q/symlink && sleep 100", dir) + spec := testutil.NewSpecWithArgs("sh", "-c", cmd) _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) if err != nil { @@ -634,29 +667,127 @@ func TestExec(t *testing.T) { t.Fatalf("error starting container: %v", err) } - // expectedPL lists the expected process state of the container. + // Wait until sleep is running to ensure the symlink was created. expectedPL := []*control.Process{ + newProcessBuilder().Cmd("sh").Process(), + newProcessBuilder().Cmd("sleep").Process(), + } + if err := waitForProcessList(cont, expectedPL); err != nil { + t.Fatalf("waitForProcessList: %v", err) + } + + for _, tc := range []struct { + name string + args control.ExecArgs + }{ + { + name: "complete", + args: control.ExecArgs{ + Filename: "/bin/true", + Argv: []string{"/bin/true"}, + }, + }, + { + name: "filename", + args: control.ExecArgs{ + Filename: "/bin/true", + }, + }, + { + name: "argv", + args: control.ExecArgs{ + Argv: []string{"/bin/true"}, + }, + }, + { + name: "filename resolution", + args: control.ExecArgs{ + Filename: "true", + Envv: []string{"PATH=/bin"}, + }, + }, + { + name: "argv resolution", + args: control.ExecArgs{ + Argv: []string{"true"}, + Envv: []string{"PATH=/bin"}, + }, + }, { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, + name: "argv symlink", + args: control.ExecArgs{ + Argv: []string{filepath.Join(dir, "symlink")}, + }, }, { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{2}, + name: "working dir", + args: control.ExecArgs{ + Argv: []string{"/bin/sh", "-c", `if [[ "${PWD}" != "/tmp" ]]; then exit 1; fi`}, + WorkingDirectory: "/tmp", + }, }, + { + name: "user", + args: control.ExecArgs{ + Argv: []string{"/bin/sh", "-c", `if [[ "$(id -u)" != "343" ]]; then exit 1; fi`}, + KUID: 343, + }, + }, + { + name: "group", + args: control.ExecArgs{ + Argv: []string{"/bin/sh", "-c", `if [[ "$(id -g)" != "343" ]]; then exit 1; fi`}, + KGID: 343, + }, + }, + { + name: "env", + args: control.ExecArgs{ + Argv: []string{"/bin/sh", "-c", `if [[ "${FOO}" != "123" ]]; then exit 1; fi`}, + Envv: []string{"FOO=123"}, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + // t.Parallel() + if ws, err := cont.executeSync(&tc.args); err != nil { + t.Fatalf("executeAsync(%+v): %v", tc.args, err) + } else if ws != 0 { + t.Fatalf("executeAsync(%+v) failed with exit: %v", tc.args, ws) + } + }) } + }) + } +} - // Verify that "sleep 100" is running. - if err := waitForProcessList(cont, expectedPL[:1]); err != nil { - t.Error(err) +// TestExecProcList verifies that a container can exec a new program and it +// shows correcly in the process list. +func TestExecProcList(t *testing.T) { + for name, conf := range configsWithVFS2(t, all...) { + t.Run(name, func(t *testing.T) { + const uid = 343 + spec := testutil.NewSpecWithArgs("sleep", "100") + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + // Create and start the container. + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) } execArgs := &control.ExecArgs{ @@ -666,9 +797,8 @@ func TestExec(t *testing.T) { KUID: uid, } - // Verify that "sleep 100" and "sleep 5" are running - // after exec. First, start running exec (whick - // blocks). + // Verify that "sleep 100" and "sleep 5" are running after exec. First, + // start running exec (which blocks). ch := make(chan error) go func() { exitStatus, err := cont.executeSync(execArgs) @@ -681,6 +811,11 @@ func TestExec(t *testing.T) { } }() + // expectedPL lists the expected process state of the container. + expectedPL := []*control.Process{ + newProcessBuilder().PID(1).PPID(0).Cmd("sleep").UID(0).Process(), + newProcessBuilder().PID(2).PPID(0).Cmd("sleep").UID(uid).Process(), + } if err := waitForProcessList(cont, expectedPL); err != nil { t.Fatalf("error waiting for processes: %v", err) } @@ -1242,24 +1377,9 @@ func TestCapabilities(t *testing.T) { // expectedPL lists the expected process state of the container. expectedPL := []*control.Process{ - { - UID: 0, - PID: 1, - PPID: 0, - C: 0, - Cmd: "sleep", - Threads: []kernel.ThreadID{1}, - }, - { - UID: uid, - PID: 2, - PPID: 0, - C: 0, - Cmd: "exe", - Threads: []kernel.ThreadID{2}, - }, + newProcessBuilder().Cmd("sleep").Process(), } - if err := waitForProcessList(cont, expectedPL[:1]); err != nil { + if err := waitForProcessList(cont, expectedPL); err != nil { t.Fatalf("Failed to wait for sleep to start, err: %v", err) } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 207206dd2..c2b54696c 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -149,13 +149,13 @@ func TestMultiContainerSanity(t *testing.T) { // Check via ps that multiple processes are running. expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + newProcessBuilder().PID(1).PPID(0).Cmd("sleep").Process(), } if err := waitForProcessList(containers[0], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) } expectedPL = []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + newProcessBuilder().PID(2).PPID(0).Cmd("sleep").Process(), } if err := waitForProcessList(containers[1], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) @@ -195,13 +195,13 @@ func TestMultiPIDNS(t *testing.T) { // Check via ps that multiple processes are running. expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + newProcessBuilder().PID(1).Cmd("sleep").Process(), } if err := waitForProcessList(containers[0], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) } expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + newProcessBuilder().PID(1).Cmd("sleep").Process(), } if err := waitForProcessList(containers[1], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) @@ -257,7 +257,7 @@ func TestMultiPIDNSPath(t *testing.T) { // Check via ps that multiple processes are running. expectedPL := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + newProcessBuilder().PID(1).PPID(0).Cmd("sleep").Process(), } if err := waitForProcessList(containers[0], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) @@ -267,7 +267,7 @@ func TestMultiPIDNSPath(t *testing.T) { } expectedPL = []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + newProcessBuilder().PID(2).PPID(0).Cmd("sleep").Process(), } if err := waitForProcessList(containers[1], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) @@ -300,7 +300,7 @@ func TestMultiContainerWait(t *testing.T) { // Check via ps that multiple processes are running. expectedPL := []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + newProcessBuilder().PID(2).PPID(0).Cmd("sleep").Process(), } if err := waitForProcessList(containers[1], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) @@ -345,7 +345,7 @@ func TestMultiContainerWait(t *testing.T) { // After Wait returns, ensure that the root container is running and // the child has finished. expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + newProcessBuilder().Cmd("sleep").Process(), } if err := waitForProcessList(containers[0], expectedPL); err != nil { t.Errorf("failed to wait for %q to start: %v", strings.Join(containers[0].Spec.Process.Args, " "), err) @@ -377,7 +377,7 @@ func TestExecWait(t *testing.T) { // Check via ps that process is running. expectedPL := []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + newProcessBuilder().Cmd("sleep").Process(), } if err := waitForProcessList(containers[1], expectedPL); err != nil { t.Fatalf("failed to wait for sleep to start: %v", err) @@ -412,7 +412,7 @@ func TestExecWait(t *testing.T) { // Wait for the exec'd process to exit. expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + newProcessBuilder().PID(1).Cmd("sleep").Process(), } if err := waitForProcessList(containers[0], expectedPL); err != nil { t.Fatalf("failed to wait for second container to stop: %v", err) @@ -498,9 +498,8 @@ func TestMultiContainerSignal(t *testing.T) { // Check via ps that container 1 process is running. expectedPL := []*control.Process{ - {PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}}, + newProcessBuilder().Cmd("sleep").Process(), } - if err := waitForProcessList(containers[1], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) } @@ -512,7 +511,7 @@ func TestMultiContainerSignal(t *testing.T) { // Make sure process 1 is still running. expectedPL = []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + newProcessBuilder().PID(1).Cmd("sleep").Process(), } if err := waitForProcessList(containers[0], expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) @@ -626,8 +625,10 @@ func TestMultiContainerDestroy(t *testing.T) { if err != nil { t.Fatalf("error getting process data from sandbox: %v", err) } - expectedPL := []*control.Process{{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}} - if r, err := procListsEqual(pss, expectedPL); !r { + expectedPL := []*control.Process{ + newProcessBuilder().PID(1).Cmd("sleep").Process(), + } + if !procListsEqual(pss, expectedPL) { t.Errorf("container got process list: %s, want: %s: error: %v", procListToString(pss), procListToString(expectedPL), err) } @@ -664,7 +665,7 @@ func TestMultiContainerProcesses(t *testing.T) { // Check root's container process list doesn't include other containers. expectedPL0 := []*control.Process{ - {PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}, + newProcessBuilder().PID(1).Cmd("sleep").Process(), } if err := waitForProcessList(containers[0], expectedPL0); err != nil { t.Errorf("failed to wait for process to start: %v", err) @@ -672,8 +673,8 @@ func TestMultiContainerProcesses(t *testing.T) { // Same for the other container. expectedPL1 := []*control.Process{ - {PID: 2, Cmd: "sh", Threads: []kernel.ThreadID{2}}, - {PID: 3, PPID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{3}}, + newProcessBuilder().PID(2).Cmd("sh").Process(), + newProcessBuilder().PID(3).PPID(2).Cmd("sleep").Process(), } if err := waitForProcessList(containers[1], expectedPL1); err != nil { t.Errorf("failed to wait for process to start: %v", err) @@ -687,7 +688,7 @@ func TestMultiContainerProcesses(t *testing.T) { if _, err := containers[1].Execute(args); err != nil { t.Fatalf("error exec'ing: %v", err) } - expectedPL1 = append(expectedPL1, &control.Process{PID: 4, Cmd: "sleep", Threads: []kernel.ThreadID{4}}) + expectedPL1 = append(expectedPL1, newProcessBuilder().PID(4).Cmd("sleep").Process()) if err := waitForProcessList(containers[1], expectedPL1); err != nil { t.Errorf("failed to wait for process to start: %v", err) } @@ -1505,7 +1506,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { // Ensure container is running c := containers[2] expectedPL := []*control.Process{ - {PID: 3, Cmd: "sleep", Threads: []kernel.ThreadID{3}}, + newProcessBuilder().PID(3).Cmd("sleep").Process(), } if err := waitForProcessList(c, expectedPL); err != nil { t.Errorf("failed to wait for sleep to start: %v", err) @@ -1533,7 +1534,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { continue // container[2] has been killed. } pl := []*control.Process{ - {PID: kernel.ThreadID(i + 1), Cmd: "sleep", Threads: []kernel.ThreadID{kernel.ThreadID(i + 1)}}, + newProcessBuilder().PID(kernel.ThreadID(i + 1)).Cmd("sleep").Process(), } if err := waitForProcessList(c, pl); err != nil { t.Errorf("Container %q was affected by another container: %v", c.ID, err) @@ -1553,7 +1554,7 @@ func TestMultiContainerGoferKilled(t *testing.T) { // Wait until sandbox stops. waitForProcessList will loop until sandbox exits // and RPC errors out. impossiblePL := []*control.Process{ - {PID: 100, Cmd: "non-existent-process", Threads: []kernel.ThreadID{100}}, + newProcessBuilder().Cmd("non-existent-process").Process(), } if err := waitForProcessList(c, impossiblePL); err == nil { t.Fatalf("Sandbox was not killed after gofer death") -- cgit v1.2.3 From 6722b1e56fa63f3409f222a63241705aa3f3ace4 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 9 Jun 2020 16:34:42 -0700 Subject: Don't WriteOut to readonly mounts When the file closes, it attempts to write dirty cached attributes to the file. This should not be done when the mount is readonly. PiperOrigin-RevId: 315585058 --- pkg/sentry/fs/gofer/inode.go | 2 +- pkg/sentry/fs/host/inode.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index a016c896e..51d7368a1 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -640,7 +640,7 @@ func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset, // WriteOut implements fs.InodeOperations.WriteOut. func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error { - if !i.session().cachePolicy.cacheUAttrs(inode) { + if inode.MountSource.Flags.ReadOnly || !i.session().cachePolicy.cacheUAttrs(inode) { return nil } diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 62f1246aa..fbfba1b58 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -368,6 +368,9 @@ func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset, // WriteOut implements fs.InodeOperations.WriteOut. func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error { + if inode.MountSource.Flags.ReadOnly { + return nil + } // Have we been using host kernel metadata caches? if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) { // Then the metadata is already up to date on the host. -- cgit v1.2.3 From 67565078bbcdd8f797206d996605df8f6658d00a Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 9 Jun 2020 18:44:57 -0700 Subject: Implement flock(2) in VFS2 LockFD is the generic implementation that can be embedded in FileDescriptionImpl implementations. Unique lock ID is maintained in vfs.FileDescription and is created on demand. Updates #1480 PiperOrigin-RevId: 315604825 --- pkg/sentry/devices/memdev/full.go | 1 + pkg/sentry/devices/memdev/null.go | 1 + pkg/sentry/devices/memdev/random.go | 1 + pkg/sentry/devices/memdev/zero.go | 1 + pkg/sentry/fs/file.go | 2 +- pkg/sentry/fs/lock/lock.go | 41 +++----- pkg/sentry/fs/lock/lock_set_functions.go | 8 +- pkg/sentry/fs/lock/lock_test.go | 111 +++++++++++----------- pkg/sentry/fsimpl/devpts/BUILD | 1 + pkg/sentry/fsimpl/devpts/devpts.go | 5 +- pkg/sentry/fsimpl/devpts/master.go | 5 + pkg/sentry/fsimpl/devpts/slave.go | 5 + pkg/sentry/fsimpl/eventfd/eventfd.go | 1 + pkg/sentry/fsimpl/ext/BUILD | 1 + pkg/sentry/fsimpl/ext/file_description.go | 1 + pkg/sentry/fsimpl/ext/inode.go | 6 ++ pkg/sentry/fsimpl/ext/regular_file.go | 1 + pkg/sentry/fsimpl/gofer/BUILD | 2 + pkg/sentry/fsimpl/gofer/filesystem.go | 9 +- pkg/sentry/fsimpl/gofer/gofer.go | 23 +++++ pkg/sentry/fsimpl/gofer/special_file.go | 4 +- pkg/sentry/fsimpl/host/BUILD | 1 + pkg/sentry/fsimpl/host/host.go | 8 +- pkg/sentry/fsimpl/kernfs/BUILD | 2 + pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 10 +- pkg/sentry/fsimpl/kernfs/fd_impl_util.go | 9 +- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 5 +- pkg/sentry/fsimpl/kernfs/kernfs_test.go | 13 ++- pkg/sentry/fsimpl/pipefs/BUILD | 1 + pkg/sentry/fsimpl/pipefs/pipefs.go | 6 +- pkg/sentry/fsimpl/proc/BUILD | 1 + pkg/sentry/fsimpl/proc/subtasks.go | 5 +- pkg/sentry/fsimpl/proc/task.go | 5 +- pkg/sentry/fsimpl/proc/task_fds.go | 7 +- pkg/sentry/fsimpl/proc/task_files.go | 10 +- pkg/sentry/fsimpl/proc/tasks.go | 5 +- pkg/sentry/fsimpl/signalfd/signalfd.go | 1 + pkg/sentry/fsimpl/sys/BUILD | 1 + pkg/sentry/fsimpl/sys/sys.go | 7 +- pkg/sentry/fsimpl/timerfd/timerfd.go | 1 + pkg/sentry/fsimpl/tmpfs/filesystem.go | 6 +- pkg/sentry/fsimpl/tmpfs/regular_file.go | 23 ----- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 41 +------- pkg/sentry/kernel/fd_table.go | 15 +-- pkg/sentry/kernel/kernel.go | 5 - pkg/sentry/kernel/pipe/BUILD | 1 + pkg/sentry/kernel/pipe/vfs.go | 13 ++- pkg/sentry/socket/hostinet/BUILD | 1 + pkg/sentry/socket/hostinet/socket_vfs2.go | 3 + pkg/sentry/socket/netlink/BUILD | 1 + pkg/sentry/socket/netlink/socket_vfs2.go | 8 +- pkg/sentry/socket/netstack/BUILD | 1 + pkg/sentry/socket/netstack/netstack_vfs2.go | 3 + pkg/sentry/socket/unix/BUILD | 1 + pkg/sentry/socket/unix/unix_vfs2.go | 7 +- pkg/sentry/syscalls/linux/sys_file.go | 39 ++------ pkg/sentry/syscalls/linux/vfs2/BUILD | 2 + pkg/sentry/syscalls/linux/vfs2/lock.go | 64 +++++++++++++ pkg/sentry/syscalls/linux/vfs2/vfs2.go | 2 +- pkg/sentry/vfs/BUILD | 1 + pkg/sentry/vfs/epoll.go | 1 + pkg/sentry/vfs/file_description.go | 25 ++++- pkg/sentry/vfs/file_description_impl_util.go | 80 ++++++++++++---- pkg/sentry/vfs/file_description_impl_util_test.go | 1 + pkg/sentry/vfs/inotify.go | 1 + test/syscalls/linux/BUILD | 3 + test/syscalls/linux/flock.cc | 75 ++++++++++++--- 67 files changed, 470 insertions(+), 281 deletions(-) create mode 100644 pkg/sentry/syscalls/linux/vfs2/lock.go (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/devices/memdev/full.go b/pkg/sentry/devices/memdev/full.go index c7e197691..af66fe4dc 100644 --- a/pkg/sentry/devices/memdev/full.go +++ b/pkg/sentry/devices/memdev/full.go @@ -42,6 +42,7 @@ type fullFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD } // Release implements vfs.FileDescriptionImpl.Release. diff --git a/pkg/sentry/devices/memdev/null.go b/pkg/sentry/devices/memdev/null.go index 33d060d02..92d3d71be 100644 --- a/pkg/sentry/devices/memdev/null.go +++ b/pkg/sentry/devices/memdev/null.go @@ -43,6 +43,7 @@ type nullFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD } // Release implements vfs.FileDescriptionImpl.Release. diff --git a/pkg/sentry/devices/memdev/random.go b/pkg/sentry/devices/memdev/random.go index acfa23149..6b81da5ef 100644 --- a/pkg/sentry/devices/memdev/random.go +++ b/pkg/sentry/devices/memdev/random.go @@ -48,6 +48,7 @@ type randomFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD // off is the "file offset". off is accessed using atomic memory // operations. diff --git a/pkg/sentry/devices/memdev/zero.go b/pkg/sentry/devices/memdev/zero.go index 3b1372b9e..c6f15054d 100644 --- a/pkg/sentry/devices/memdev/zero.go +++ b/pkg/sentry/devices/memdev/zero.go @@ -44,6 +44,7 @@ type zeroFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD } // Release implements vfs.FileDescriptionImpl.Release. diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index 2a278fbe3..ca41520b4 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -146,7 +146,7 @@ func (f *File) DecRef() { f.DecRefWithDestructor(func() { // Drop BSD style locks. lockRng := lock.LockRange{Start: 0, End: lock.LockEOF} - f.Dirent.Inode.LockCtx.BSD.UnlockRegion(lock.UniqueID(f.UniqueID), lockRng) + f.Dirent.Inode.LockCtx.BSD.UnlockRegion(f, lockRng) // Release resources held by the FileOperations. f.FileOperations.Release() diff --git a/pkg/sentry/fs/lock/lock.go b/pkg/sentry/fs/lock/lock.go index 926538d90..8a5d9c7eb 100644 --- a/pkg/sentry/fs/lock/lock.go +++ b/pkg/sentry/fs/lock/lock.go @@ -62,7 +62,7 @@ import ( type LockType int // UniqueID is a unique identifier of the holder of a regional file lock. -type UniqueID uint64 +type UniqueID interface{} const ( // ReadLock describes a POSIX regional file lock to be taken @@ -98,12 +98,7 @@ type Lock struct { // If len(Readers) > 0 then HasWriter must be false. Readers map[UniqueID]bool - // HasWriter indicates that this is a write lock held by a single - // UniqueID. - HasWriter bool - - // Writer is only valid if HasWriter is true. It identifies a - // single write lock holder. + // Writer holds the writer unique ID. It's nil if there are no writers. Writer UniqueID } @@ -186,7 +181,6 @@ func makeLock(uid UniqueID, t LockType) Lock { case ReadLock: value.Readers[uid] = true case WriteLock: - value.HasWriter = true value.Writer = uid default: panic(fmt.Sprintf("makeLock: invalid lock type %d", t)) @@ -196,10 +190,7 @@ func makeLock(uid UniqueID, t LockType) Lock { // isHeld returns true if uid is a holder of Lock. func (l Lock) isHeld(uid UniqueID) bool { - if l.HasWriter && l.Writer == uid { - return true - } - return l.Readers[uid] + return l.Writer == uid || l.Readers[uid] } // lock sets uid as a holder of a typed lock on Lock. @@ -214,20 +205,20 @@ func (l *Lock) lock(uid UniqueID, t LockType) { } // We cannot downgrade a write lock to a read lock unless the // uid is the same. - if l.HasWriter { + if l.Writer != nil { if l.Writer != uid { panic(fmt.Sprintf("lock: cannot downgrade write lock to read lock for uid %d, writer is %d", uid, l.Writer)) } // Ensure that there is only one reader if upgrading. l.Readers = make(map[UniqueID]bool) // Ensure that there is no longer a writer. - l.HasWriter = false + l.Writer = nil } l.Readers[uid] = true return case WriteLock: // If we are already the writer, then this is a no-op. - if l.HasWriter && l.Writer == uid { + if l.Writer == uid { return } // We can only upgrade a read lock to a write lock if there @@ -243,7 +234,6 @@ func (l *Lock) lock(uid UniqueID, t LockType) { } // Ensure that there is only a writer. l.Readers = make(map[UniqueID]bool) - l.HasWriter = true l.Writer = uid default: panic(fmt.Sprintf("lock: invalid lock type %d", t)) @@ -277,9 +267,8 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool { switch t { case ReadLock: return l.lockable(r, func(value Lock) bool { - // If there is no writer, there's no problem adding - // another reader. - if !value.HasWriter { + // If there is no writer, there's no problem adding another reader. + if value.Writer == nil { return true } // If there is a writer, then it must be the same uid @@ -289,10 +278,9 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool { case WriteLock: return l.lockable(r, func(value Lock) bool { // If there are only readers. - if !value.HasWriter { - // Then this uid can only take a write lock if - // this is a private upgrade, meaning that the - // only reader is uid. + if value.Writer == nil { + // Then this uid can only take a write lock if this is a private + // upgrade, meaning that the only reader is uid. return len(value.Readers) == 1 && value.Readers[uid] } // If the uid is already a writer on this region, then @@ -304,7 +292,8 @@ func (l LockSet) canLock(uid UniqueID, t LockType, r LockRange) bool { } } -// lock returns true if uid took a lock of type t on the entire range of LockRange. +// lock returns true if uid took a lock of type t on the entire range of +// LockRange. // // Preconditions: r.Start <= r.End (will panic otherwise). func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool { @@ -339,7 +328,7 @@ func (l *LockSet) lock(uid UniqueID, t LockType, r LockRange) bool { seg, _ = l.SplitUnchecked(seg, r.End) } - // Set the lock on the segment. This is guaranteed to + // Set the lock on the segment. This is guaranteed to // always be safe, given canLock above. value := seg.ValuePtr() value.lock(uid, t) @@ -386,7 +375,7 @@ func (l *LockSet) unlock(uid UniqueID, r LockRange) { value := seg.Value() var remove bool - if value.HasWriter && value.Writer == uid { + if value.Writer == uid { // If we are unlocking a writer, then since there can // only ever be one writer and no readers, then this // lock should always be removed from the set. diff --git a/pkg/sentry/fs/lock/lock_set_functions.go b/pkg/sentry/fs/lock/lock_set_functions.go index 8a3ace0c1..50a16e662 100644 --- a/pkg/sentry/fs/lock/lock_set_functions.go +++ b/pkg/sentry/fs/lock/lock_set_functions.go @@ -44,14 +44,9 @@ func (lockSetFunctions) Merge(r1 LockRange, val1 Lock, r2 LockRange, val2 Lock) return Lock{}, false } } - if val1.HasWriter != val2.HasWriter { + if val1.Writer != val2.Writer { return Lock{}, false } - if val1.HasWriter { - if val1.Writer != val2.Writer { - return Lock{}, false - } - } return val1, true } @@ -62,7 +57,6 @@ func (lockSetFunctions) Split(r LockRange, val Lock, split uint64) (Lock, Lock) for k, v := range val.Readers { val0.Readers[k] = v } - val0.HasWriter = val.HasWriter val0.Writer = val.Writer return val, val0 diff --git a/pkg/sentry/fs/lock/lock_test.go b/pkg/sentry/fs/lock/lock_test.go index ba002aeb7..fad90984b 100644 --- a/pkg/sentry/fs/lock/lock_test.go +++ b/pkg/sentry/fs/lock/lock_test.go @@ -42,9 +42,6 @@ func equals(e0, e1 []entry) bool { if !reflect.DeepEqual(e0[i].LockRange, e1[i].LockRange) { return false } - if e0[i].Lock.HasWriter != e1[i].Lock.HasWriter { - return false - } if e0[i].Lock.Writer != e1[i].Lock.Writer { return false } @@ -105,7 +102,7 @@ func TestCanLock(t *testing.T) { LockRange: LockRange{2048, 3072}, }, { - Lock: Lock{HasWriter: true, Writer: 1}, + Lock: Lock{Writer: 1}, LockRange: LockRange{3072, 4096}, }, }) @@ -241,7 +238,7 @@ func TestSetLock(t *testing.T) { // 0 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -254,7 +251,7 @@ func TestSetLock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -273,7 +270,7 @@ func TestSetLock(t *testing.T) { LockRange: LockRange{0, 4096}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{4096, LockEOF}, }, }, @@ -301,7 +298,7 @@ func TestSetLock(t *testing.T) { // 0 4096 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 4096}, }, { @@ -318,7 +315,7 @@ func TestSetLock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -550,7 +547,7 @@ func TestSetLock(t *testing.T) { LockRange: LockRange{0, 1024}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{1024, 4096}, }, { @@ -594,7 +591,7 @@ func TestSetLock(t *testing.T) { LockRange: LockRange{0, 1024}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{1024, 3072}, }, { @@ -633,7 +630,7 @@ func TestSetLock(t *testing.T) { // 0 1024 2048 4096 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { @@ -663,11 +660,11 @@ func TestSetLock(t *testing.T) { // 0 1024 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{1024, LockEOF}, }, }, @@ -675,28 +672,30 @@ func TestSetLock(t *testing.T) { } for _, test := range tests { - l := fill(test.before) + t.Run(test.name, func(t *testing.T) { + l := fill(test.before) - r := LockRange{Start: test.start, End: test.end} - success := l.lock(test.uid, test.lockType, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } + r := LockRange{Start: test.start, End: test.end} + success := l.lock(test.uid, test.lockType, r) + var got []entry + for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + got = append(got, entry{ + Lock: seg.Value(), + LockRange: seg.Range(), + }) + } - if success != test.success { - t.Errorf("%s: setlock(%v, %+v, %d, %d) got success %v, want %v", test.name, test.before, r, test.uid, test.lockType, success, test.success) - continue - } + if success != test.success { + t.Errorf("setlock(%v, %+v, %d, %d) got success %v, want %v", test.before, r, test.uid, test.lockType, success, test.success) + return + } - if success { - if !equals(got, test.after) { - t.Errorf("%s: got set %+v, want %+v", test.name, got, test.after) + if success { + if !equals(got, test.after) { + t.Errorf("got set %+v, want %+v", got, test.after) + } } - } + }) } } @@ -782,7 +781,7 @@ func TestUnlock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -824,7 +823,7 @@ func TestUnlock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -837,7 +836,7 @@ func TestUnlock(t *testing.T) { // 0 4096 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{4096, LockEOF}, }, }, @@ -876,7 +875,7 @@ func TestUnlock(t *testing.T) { // 0 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, LockEOF}, }, }, @@ -889,7 +888,7 @@ func TestUnlock(t *testing.T) { // 0 4096 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 4096}, }, }, @@ -906,7 +905,7 @@ func TestUnlock(t *testing.T) { LockRange: LockRange{0, 1024}, }, { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{1024, 4096}, }, { @@ -974,7 +973,7 @@ func TestUnlock(t *testing.T) { // 0 1024 4096 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { @@ -991,7 +990,7 @@ func TestUnlock(t *testing.T) { // 0 8 4096 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 8}, }, { @@ -1008,7 +1007,7 @@ func TestUnlock(t *testing.T) { // 0 1024 4096 max uint64 before: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { @@ -1025,7 +1024,7 @@ func TestUnlock(t *testing.T) { // 0 1024 4096 8192 max uint64 after: []entry{ { - Lock: Lock{HasWriter: true, Writer: 0}, + Lock: Lock{Writer: 0}, LockRange: LockRange{0, 1024}, }, { @@ -1041,19 +1040,21 @@ func TestUnlock(t *testing.T) { } for _, test := range tests { - l := fill(test.before) + t.Run(test.name, func(t *testing.T) { + l := fill(test.before) - r := LockRange{Start: test.start, End: test.end} - l.unlock(test.uid, r) - var got []entry - for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { - got = append(got, entry{ - Lock: seg.Value(), - LockRange: seg.Range(), - }) - } - if !equals(got, test.after) { - t.Errorf("%s: got set %+v, want %+v", test.name, got, test.after) - } + r := LockRange{Start: test.start, End: test.end} + l.unlock(test.uid, r) + var got []entry + for seg := l.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + got = append(got, entry{ + Lock: seg.Value(), + LockRange: seg.Range(), + }) + } + if !equals(got, test.after) { + t.Errorf("got set %+v, want %+v", got, test.after) + } + }) } } diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD index 585764223..cf440dce8 100644 --- a/pkg/sentry/fsimpl/devpts/BUILD +++ b/pkg/sentry/fsimpl/devpts/BUILD @@ -23,6 +23,7 @@ go_library( "//pkg/sentry/kernel/auth", "//pkg/sentry/unimpl", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index c03c65445..9b0e0cca2 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -28,6 +28,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -116,6 +117,8 @@ type rootInode struct { kernfs.InodeNotSymlink kernfs.OrderedChildren + locks lock.FileLocks + // Keep a reference to this inode's dentry. dentry kernfs.Dentry @@ -183,7 +186,7 @@ func (i *rootInode) masterClose(t *Terminal) { // Open implements kernfs.Inode.Open. func (i *rootInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 7a7ce5d81..1d22adbe3 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/unimpl" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -34,6 +35,8 @@ type masterInode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink + locks lock.FileLocks + // Keep a reference to this inode's dentry. dentry kernfs.Dentry @@ -55,6 +58,7 @@ func (mi *masterInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vf inode: mi, t: t, } + fd.LockFD.Init(&mi.locks) if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { mi.DecRef() return nil, err @@ -85,6 +89,7 @@ func (mi *masterInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds type masterFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD inode *masterInode t *Terminal diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go index 526cd406c..7fe475080 100644 --- a/pkg/sentry/fsimpl/devpts/slave.go +++ b/pkg/sentry/fsimpl/devpts/slave.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -33,6 +34,8 @@ type slaveInode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink + locks lock.FileLocks + // Keep a reference to this inode's dentry. dentry kernfs.Dentry @@ -51,6 +54,7 @@ func (si *slaveInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs fd := &slaveFileDescription{ inode: si, } + fd.LockFD.Init(&si.locks) if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { si.DecRef() return nil, err @@ -91,6 +95,7 @@ func (si *slaveInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds type slaveFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD inode *slaveInode } diff --git a/pkg/sentry/fsimpl/eventfd/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go index c573d7935..d12d78b84 100644 --- a/pkg/sentry/fsimpl/eventfd/eventfd.go +++ b/pkg/sentry/fsimpl/eventfd/eventfd.go @@ -37,6 +37,7 @@ type EventFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD // queue is used to notify interested parties when the event object // becomes readable or writable. diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index ff861d0fe..973fa0def 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -60,6 +60,7 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/syscalls/linux", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go index 92f7da40d..90b086468 100644 --- a/pkg/sentry/fsimpl/ext/file_description.go +++ b/pkg/sentry/fsimpl/ext/file_description.go @@ -26,6 +26,7 @@ import ( type fileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD } func (fd *fileDescription) filesystem() *filesystem { diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go index 485f86f4b..e4b434b13 100644 --- a/pkg/sentry/fsimpl/ext/inode.go +++ b/pkg/sentry/fsimpl/ext/inode.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -54,6 +55,8 @@ type inode struct { // diskInode gives us access to the inode struct on disk. Immutable. diskInode disklayout.Inode + locks lock.FileLocks + // This is immutable. The first field of the implementations must have inode // as the first field to ensure temporality. impl interface{} @@ -157,6 +160,7 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOpt switch in.impl.(type) { case *regularFile: var fd regularFileFD + fd.LockFD.Init(&in.locks) if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -168,6 +172,7 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOpt return nil, syserror.EISDIR } var fd directoryFD + fd.LockFD.Init(&in.locks) if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -178,6 +183,7 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOpt return nil, syserror.ELOOP } var fd symlinkFD + fd.LockFD.Init(&in.locks) fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) return &fd.vfsfd, nil default: diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go index 30135ddb0..f7015c44f 100644 --- a/pkg/sentry/fsimpl/ext/regular_file.go +++ b/pkg/sentry/fsimpl/ext/regular_file.go @@ -77,6 +77,7 @@ func (in *inode) isRegular() bool { // vfs.FileDescriptionImpl. type regularFileFD struct { fileDescription + vfs.LockFD // off is the file offset. off is accessed using atomic memory operations. off int64 diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index f5f35a3bc..5cdeeaeb5 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -54,6 +54,7 @@ go_library( "//pkg/p9", "//pkg/safemem", "//pkg/sentry/fs/fsutil", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsimpl/host", "//pkg/sentry/hostfd", "//pkg/sentry/kernel", @@ -68,6 +69,7 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserr", "//pkg/syserror", "//pkg/unet", diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 36e0e1856..40933b74b 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -801,6 +801,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf return nil, err } fd := ®ularFileFD{} + fd.LockFD.Init(&d.locks) if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ AllowDirectIO: true, }); err != nil { @@ -826,6 +827,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf } } fd := &directoryFD{} + fd.LockFD.Init(&d.locks) if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -842,7 +844,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf } case linux.S_IFIFO: if d.isSynthetic() { - return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags) + return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags, &d.locks) } } return d.openSpecialFileLocked(ctx, mnt, opts) @@ -902,7 +904,7 @@ retry: return nil, err } } - fd, err := newSpecialFileFD(h, mnt, d, opts.Flags) + fd, err := newSpecialFileFD(h, mnt, d, &d.locks, opts.Flags) if err != nil { h.close(ctx) return nil, err @@ -989,6 +991,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving var childVFSFD *vfs.FileDescription if useRegularFileFD { fd := ®ularFileFD{} + fd.LockFD.Init(&child.locks) if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{ AllowDirectIO: true, }); err != nil { @@ -1003,7 +1006,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving if fdobj != nil { h.fd = int32(fdobj.Release()) } - fd, err := newSpecialFileFD(h, mnt, child, opts.Flags) + fd, err := newSpecialFileFD(h, mnt, child, &d.locks, opts.Flags) if err != nil { h.close(ctx) return nil, err diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 3f3bd56f0..0d88a328e 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -45,6 +45,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -52,6 +53,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/usermem" @@ -662,6 +664,8 @@ type dentry struct { // If this dentry represents a synthetic named pipe, pipe is the pipe // endpoint bound to this file. pipe *pipe.VFSPipe + + locks lock.FileLocks } // dentryAttrMask returns a p9.AttrMask enabling all attributes used by the @@ -1366,6 +1370,9 @@ func (d *dentry) decLinks() { type fileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD + + lockLogging sync.Once } func (fd *fileDescription) filesystem() *filesystem { @@ -1416,3 +1423,19 @@ func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOption func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { return fd.dentry().removexattr(ctx, auth.CredentialsFromContext(ctx), name) } + +// LockBSD implements vfs.FileDescriptionImpl.LockBSD. +func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + fd.lockLogging.Do(func() { + log.Infof("File lock using gofer file handled internally.") + }) + return fd.LockFD.LockBSD(ctx, uid, t, block) +} + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { + fd.lockLogging.Do(func() { + log.Infof("Range lock using gofer file handled internally.") + }) + return fd.LockFD.LockPOSIX(ctx, uid, t, rng, block) +} diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index ff6126b87..289efdd25 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -51,7 +52,7 @@ type specialFileFD struct { off int64 } -func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) { +func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *lock.FileLocks, flags uint32) (*specialFileFD, error) { ftype := d.fileType() seekable := ftype == linux.S_IFREG mayBlock := ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK @@ -60,6 +61,7 @@ func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*speci seekable: seekable, mayBlock: mayBlock, } + fd.LockFD.Init(locks) if mayBlock && h.fd >= 0 { if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil { return nil, err diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index ca0fe6d2b..54f16ad63 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -39,6 +39,7 @@ go_library( "//pkg/sentry/unimpl", "//pkg/sentry/uniqueid", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 18b127521..5ec5100b8 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -34,6 +34,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/memmap" unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -182,6 +183,8 @@ type inode struct { kernfs.InodeNotDirectory kernfs.InodeNotSymlink + locks lock.FileLocks + // When the reference count reaches zero, the host fd is closed. refs.AtomicRefCount @@ -468,7 +471,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u return nil, err } // Currently, we only allow Unix sockets to be imported. - return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d) + return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d, &i.locks) } // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that @@ -478,6 +481,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u fileDescription: fileDescription{inode: i}, termios: linux.DefaultSlaveTermios, } + fd.LockFD.Init(&i.locks) vfsfd := &fd.vfsfd if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil { return nil, err @@ -486,6 +490,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u } fd := &fileDescription{inode: i} + fd.LockFD.Init(&i.locks) vfsfd := &fd.vfsfd if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil { return nil, err @@ -497,6 +502,7 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u type fileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD // inode is vfsfd.Dentry().Impl().(*kernfs.Dentry).Inode().(*inode), but // cached to reduce indirections and casting. fileDescription does not hold diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index ef34cb28a..0299dbde9 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -49,6 +49,7 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", @@ -67,6 +68,7 @@ go_test( "//pkg/sentry/fsimpl/testutil", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserror", "//pkg/usermem", "@com_github_google_go-cmp//cmp:go_default_library", diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index 1568a9d49..6418de0a3 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -38,7 +39,8 @@ type DynamicBytesFile struct { InodeNotDirectory InodeNotSymlink - data vfs.DynamicBytesSource + locks lock.FileLocks + data vfs.DynamicBytesSource } var _ Inode = (*DynamicBytesFile)(nil) @@ -55,7 +57,7 @@ func (f *DynamicBytesFile) Init(creds *auth.Credentials, devMajor, devMinor uint // Open implements Inode.Open. func (f *DynamicBytesFile) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { fd := &DynamicBytesFD{} - if err := fd.Init(rp.Mount(), vfsd, f.data, opts.Flags); err != nil { + if err := fd.Init(rp.Mount(), vfsd, f.data, &f.locks, opts.Flags); err != nil { return nil, err } return &fd.vfsfd, nil @@ -77,13 +79,15 @@ func (*DynamicBytesFile) SetStat(context.Context, *vfs.Filesystem, *auth.Credent type DynamicBytesFD struct { vfs.FileDescriptionDefaultImpl vfs.DynamicBytesFileDescriptionImpl + vfs.LockFD vfsfd vfs.FileDescription inode Inode } // Init initializes a DynamicBytesFD. -func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, flags uint32) error { +func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, locks *lock.FileLocks, flags uint32) error { + fd.LockFD.Init(locks) if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil { return err } diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 8284e76a7..33a5968ca 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -22,6 +22,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -42,6 +43,7 @@ import ( type GenericDirectoryFD struct { vfs.FileDescriptionDefaultImpl vfs.DirectoryFileDescriptionDefaultImpl + vfs.LockFD vfsfd vfs.FileDescription children *OrderedChildren @@ -55,9 +57,9 @@ type GenericDirectoryFD struct { // NewGenericDirectoryFD creates a new GenericDirectoryFD and returns its // dentry. -func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, opts *vfs.OpenOptions) (*GenericDirectoryFD, error) { +func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, locks *lock.FileLocks, opts *vfs.OpenOptions) (*GenericDirectoryFD, error) { fd := &GenericDirectoryFD{} - if err := fd.Init(children, opts); err != nil { + if err := fd.Init(children, locks, opts); err != nil { return nil, err } if err := fd.vfsfd.Init(fd, opts.Flags, m, d, &vfs.FileDescriptionOptions{}); err != nil { @@ -69,11 +71,12 @@ func NewGenericDirectoryFD(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildre // Init initializes a GenericDirectoryFD. Use it when overriding // GenericDirectoryFD. Caller must call fd.VFSFileDescription.Init() with the // correct implementation. -func (fd *GenericDirectoryFD) Init(children *OrderedChildren, opts *vfs.OpenOptions) error { +func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *lock.FileLocks, opts *vfs.OpenOptions) error { if vfs.AccessTypesForOpenFlags(opts)&vfs.MayWrite != 0 { // Can't open directories for writing. return syserror.EISDIR } + fd.LockFD.Init(locks) fd.children = children return nil } diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 982daa2e6..0e4927215 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) @@ -555,6 +556,8 @@ type StaticDirectory struct { InodeAttrs InodeNoDynamicLookup OrderedChildren + + locks lock.FileLocks } var _ Inode = (*StaticDirectory)(nil) @@ -584,7 +587,7 @@ func (s *StaticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint3 // Open implements kernfs.Inode. func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := NewGenericDirectoryFD(rp.Mount(), vfsd, &s.OrderedChildren, &opts) + fd, err := NewGenericDirectoryFD(rp.Mount(), vfsd, &s.OrderedChildren, &s.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index 412cf6ac9..6749facf7 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -100,8 +101,10 @@ type readonlyDir struct { kernfs.InodeNotSymlink kernfs.InodeNoDynamicLookup kernfs.InodeDirectoryNoNewChildren - kernfs.OrderedChildren + + locks lock.FileLocks + dentry kernfs.Dentry } @@ -117,7 +120,7 @@ func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMod } func (d *readonlyDir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts) if err != nil { return nil, err } @@ -128,10 +131,12 @@ type dir struct { attrs kernfs.InodeNotSymlink kernfs.InodeNoDynamicLookup + kernfs.OrderedChildren + + locks lock.FileLocks fs *filesystem dentry kernfs.Dentry - kernfs.OrderedChildren } func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry { @@ -147,7 +152,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte } func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD index 5950a2d59..c618dbe6c 100644 --- a/pkg/sentry/fsimpl/pipefs/BUILD +++ b/pkg/sentry/fsimpl/pipefs/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserror", "//pkg/usermem", ], diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index cab771211..e4dabaa33 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -81,7 +82,8 @@ type inode struct { kernfs.InodeNotSymlink kernfs.InodeNoopRefCount - pipe *pipe.VFSPipe + locks lock.FileLocks + pipe *pipe.VFSPipe ino uint64 uid auth.KUID @@ -147,7 +149,7 @@ func (i *inode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth. // Open implements kernfs.Inode.Open. func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - return i.pipe.Open(ctx, rp.Mount(), vfsd, opts.Flags) + return i.pipe.Open(ctx, rp.Mount(), vfsd, opts.Flags, &i.locks) } // NewConnectedPipeFDs returns a pair of FileDescriptions representing the read diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 17c1342b5..351ba4ee9 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -35,6 +35,7 @@ go_library( "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserror", "//pkg/tcpip/header", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index 36a911db4..e2cdb7ee9 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -37,6 +38,8 @@ type subtasksInode struct { kernfs.OrderedChildren kernfs.AlwaysValid + locks lock.FileLocks + fs *filesystem task *kernel.Task pidns *kernel.PIDNamespace @@ -153,7 +156,7 @@ func (fd *subtasksFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) erro // Open implements kernfs.Inode. func (i *subtasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { fd := &subtasksFD{task: i.task} - if err := fd.Init(&i.OrderedChildren, &opts); err != nil { + if err := fd.Init(&i.OrderedChildren, &i.locks, &opts); err != nil { return nil, err } if err := fd.VFSFileDescription().Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 482055db1..44078a765 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -38,6 +39,8 @@ type taskInode struct { kernfs.InodeAttrs kernfs.OrderedChildren + locks lock.FileLocks + task *kernel.Task } @@ -103,7 +106,7 @@ func (i *taskInode) Valid(ctx context.Context) bool { // Open implements kernfs.Inode. func (i *taskInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go index 44ccc9e4a..ef6c1d04f 100644 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ b/pkg/sentry/fsimpl/proc/task_fds.go @@ -27,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -53,6 +54,8 @@ func taskFDExists(t *kernel.Task, fd int32) bool { } type fdDir struct { + locks lock.FileLocks + fs *filesystem task *kernel.Task @@ -143,7 +146,7 @@ func (i *fdDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro // Open implements kernfs.Inode. func (i *fdDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } @@ -270,7 +273,7 @@ func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, // Open implements kernfs.Inode. func (i *fdInfoDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 2f297e48a..e5eaa91cd 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -30,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/mm" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) @@ -775,6 +776,8 @@ type namespaceInode struct { kernfs.InodeNoopRefCount kernfs.InodeNotDirectory kernfs.InodeNotSymlink + + locks lock.FileLocks } var _ kernfs.Inode = (*namespaceInode)(nil) @@ -791,6 +794,7 @@ func (i *namespaceInode) Init(creds *auth.Credentials, devMajor, devMinor uint32 func (i *namespaceInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { fd := &namespaceFD{inode: i} i.IncRef() + fd.LockFD.Init(&i.locks) if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -801,6 +805,7 @@ func (i *namespaceInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd * // /proc/[pid]/ns/*. type namespaceFD struct { vfs.FileDescriptionDefaultImpl + vfs.LockFD vfsfd vfs.FileDescription inode *namespaceInode @@ -825,8 +830,3 @@ func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) err func (fd *namespaceFD) Release() { fd.inode.DecRef() } - -// OnClose implements FileDescriptionImpl. -func (*namespaceFD) OnClose(context.Context) error { - return nil -} diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index b51d43954..58c8b9d05 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -43,6 +44,8 @@ type tasksInode struct { kernfs.OrderedChildren kernfs.AlwaysValid + locks lock.FileLocks + fs *filesystem pidns *kernel.PIDNamespace @@ -197,7 +200,7 @@ func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback // Open implements kernfs.Inode. func (i *tasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/signalfd/signalfd.go b/pkg/sentry/fsimpl/signalfd/signalfd.go index d29ef3f83..242ba9b5d 100644 --- a/pkg/sentry/fsimpl/signalfd/signalfd.go +++ b/pkg/sentry/fsimpl/signalfd/signalfd.go @@ -31,6 +31,7 @@ type SignalFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD // target is the original signal target task. // diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD index a741e2bb6..237f17def 100644 --- a/pkg/sentry/fsimpl/sys/BUILD +++ b/pkg/sentry/fsimpl/sys/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserror", ], ) diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index 0af373604..b84463d3a 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserror" ) @@ -98,8 +99,10 @@ type dir struct { kernfs.InodeNoDynamicLookup kernfs.InodeNotSymlink kernfs.InodeDirectoryNoNewChildren - kernfs.OrderedChildren + + locks lock.FileLocks + dentry kernfs.Dentry } @@ -121,7 +124,7 @@ func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.Set // Open implements kernfs.Inode.Open. func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &opts) + fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &d.locks, &opts) if err != nil { return nil, err } diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go index 60c92d626..2dc90d484 100644 --- a/pkg/sentry/fsimpl/timerfd/timerfd.go +++ b/pkg/sentry/fsimpl/timerfd/timerfd.go @@ -32,6 +32,7 @@ type TimerFileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.NoLockFD events waiter.Queue timer *ktime.Timer diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index e801680e8..72399b321 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -399,6 +399,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open switch impl := d.inode.impl.(type) { case *regularFile: var fd regularFileFD + fd.LockFD.Init(&d.inode.locks) if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } @@ -414,15 +415,16 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open return nil, syserror.EISDIR } var fd directoryFD + fd.LockFD.Init(&d.inode.locks) if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { return nil, err } return &fd.vfsfd, nil case *symlink: - // Can't open symlinks without O_PATH (which is unimplemented). + // TODO(gvisor.dev/issue/2782): Can't open symlinks without O_PATH. return nil, syserror.ELOOP case *namedPipe: - return impl.pipe.Open(ctx, rp.Mount(), &d.vfsd, opts.Flags) + return impl.pipe.Open(ctx, rp.Mount(), &d.vfsd, opts.Flags, &d.inode.locks) case *deviceFile: return rp.VirtualFilesystem().OpenDeviceSpecialFile(ctx, rp.Mount(), &d.vfsd, impl.kind, impl.major, impl.minor, opts) case *socketFile: diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 4f2ae04d2..77447b32c 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -25,7 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -366,28 +365,6 @@ func (fd *regularFileFD) Sync(ctx context.Context) error { return nil } -// LockBSD implements vfs.FileDescriptionImpl.LockBSD. -func (fd *regularFileFD) LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error { - return fd.inode().lockBSD(uid, t, block) -} - -// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. -func (fd *regularFileFD) UnlockBSD(ctx context.Context, uid lock.UniqueID) error { - fd.inode().unlockBSD(uid) - return nil -} - -// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (fd *regularFileFD) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error { - return fd.inode().lockPOSIX(uid, t, rng, block) -} - -// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. -func (fd *regularFileFD) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error { - fd.inode().unlockPOSIX(uid, rng) - return nil -} - // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { file := fd.inode().impl.(*regularFile) diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 7ce1b86c7..71a7522af 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -36,7 +36,6 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" - fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/pgalloc" @@ -311,7 +310,6 @@ type inode struct { ctime int64 // nanoseconds mtime int64 // nanoseconds - // Advisory file locks, which lock at the inode level. locks lock.FileLocks // Inotify watches for this inode. @@ -539,44 +537,6 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linu return nil } -// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. -func (i *inode) lockBSD(uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { - switch i.impl.(type) { - case *regularFile: - return i.locks.LockBSD(uid, t, block) - } - return syserror.EBADF -} - -// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. -func (i *inode) unlockBSD(uid fslock.UniqueID) error { - switch i.impl.(type) { - case *regularFile: - i.locks.UnlockBSD(uid) - return nil - } - return syserror.EBADF -} - -// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. -func (i *inode) lockPOSIX(uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { - switch i.impl.(type) { - case *regularFile: - return i.locks.LockPOSIX(uid, t, rng, block) - } - return syserror.EBADF -} - -// TODO(gvisor.dev/issue/1480): support file locking for file types other than regular. -func (i *inode) unlockPOSIX(uid fslock.UniqueID, rng fslock.LockRange) error { - switch i.impl.(type) { - case *regularFile: - i.locks.UnlockPOSIX(uid, rng) - return nil - } - return syserror.EBADF -} - // allocatedBlocksForSize returns the number of 512B blocks needed to // accommodate the given size in bytes, as appropriate for struct // stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block @@ -708,6 +668,7 @@ func (i *inode) userXattrSupported() bool { type fileDescription struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD } func (fd *fileDescription) filesystem() *filesystem { diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index dbfcef0fa..b35afafe3 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -80,9 +80,6 @@ type FDTable struct { refs.AtomicRefCount k *Kernel - // uid is a unique identifier. - uid uint64 - // mu protects below. mu sync.Mutex `state:"nosave"` @@ -130,7 +127,7 @@ func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) { // drop drops the table reference. func (f *FDTable) drop(file *fs.File) { // Release locks. - file.Dirent.Inode.LockCtx.Posix.UnlockRegion(lock.UniqueID(f.uid), lock.LockRange{0, lock.LockEOF}) + file.Dirent.Inode.LockCtx.Posix.UnlockRegion(f, lock.LockRange{0, lock.LockEOF}) // Send inotify events. d := file.Dirent @@ -164,17 +161,9 @@ func (f *FDTable) dropVFS2(file *vfs.FileDescription) { file.DecRef() } -// ID returns a unique identifier for this FDTable. -func (f *FDTable) ID() uint64 { - return f.uid -} - // NewFDTable allocates a new FDTable that may be used by tasks in k. func (k *Kernel) NewFDTable() *FDTable { - f := &FDTable{ - k: k, - uid: atomic.AddUint64(&k.fdMapUids, 1), - } + f := &FDTable{k: k} f.init() return f } diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 5efeb3767..bcbeb6a39 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -194,11 +194,6 @@ type Kernel struct { // cpuClockTickerSetting is protected by runningTasksMu. cpuClockTickerSetting ktime.Setting - // fdMapUids is an ever-increasing counter for generating FDTable uids. - // - // fdMapUids is mutable, and is accessed using atomic memory operations. - fdMapUids uint64 - // uniqueID is used to generate unique identifiers. // // uniqueID is mutable, and is accessed using atomic memory operations. diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD index 7bfa9075a..0db546b98 100644 --- a/pkg/sentry/kernel/pipe/BUILD +++ b/pkg/sentry/kernel/pipe/BUILD @@ -27,6 +27,7 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 2602bed72..c0e9ee1f4 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -21,6 +21,7 @@ import ( "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -61,11 +62,13 @@ func NewVFSPipe(isNamed bool, sizeBytes, atomicIOBytes int64) *VFSPipe { // // Preconditions: statusFlags should not contain an open access mode. func (vp *VFSPipe) ReaderWriterPair(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription) { - return vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags), vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags) + // Connected pipes share the same locks. + locks := &lock.FileLocks{} + return vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags, locks), vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags, locks) } // Open opens the pipe represented by vp. -func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, error) { +func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *lock.FileLocks) (*vfs.FileDescription, error) { vp.mu.Lock() defer vp.mu.Unlock() @@ -75,7 +78,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s return nil, syserror.EINVAL } - fd := vp.newFD(mnt, vfsd, statusFlags) + fd := vp.newFD(mnt, vfsd, statusFlags, locks) // Named pipes have special blocking semantics during open: // @@ -127,10 +130,11 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s } // Preconditions: vp.mu must be held. -func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) *vfs.FileDescription { +func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *lock.FileLocks) *vfs.FileDescription { fd := &VFSPipeFD{ pipe: &vp.pipe, } + fd.LockFD.Init(locks) fd.vfsfd.Init(fd, statusFlags, mnt, vfsd, &vfs.FileDescriptionOptions{ DenyPRead: true, DenyPWrite: true, @@ -159,6 +163,7 @@ type VFSPipeFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.LockFD pipe *Pipe } diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD index e82d6cd1e..60c9896fc 100644 --- a/pkg/sentry/socket/hostinet/BUILD +++ b/pkg/sentry/socket/hostinet/BUILD @@ -34,6 +34,7 @@ go_library( "//pkg/sentry/socket", "//pkg/sentry/socket/control", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip/stack", diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go index 677743113..027add1fd 100644 --- a/pkg/sentry/socket/hostinet/socket_vfs2.go +++ b/pkg/sentry/socket/hostinet/socket_vfs2.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -35,6 +36,7 @@ import ( type socketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl + vfs.LockFD // We store metadata for hostinet sockets internally. Technically, we should // access metadata (e.g. through stat, chmod) on the host for correctness, @@ -59,6 +61,7 @@ func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol in fd: fd, }, } + s.LockFD.Init(&lock.FileLocks{}) if err := fdnotifier.AddFD(int32(fd), &s.queue); err != nil { return nil, syserr.FromError(err) } diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD index 7212d8644..420e573c9 100644 --- a/pkg/sentry/socket/netlink/BUILD +++ b/pkg/sentry/socket/netlink/BUILD @@ -29,6 +29,7 @@ go_library( "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go index b854bf990..8bfee5193 100644 --- a/pkg/sentry/socket/netlink/socket_vfs2.go +++ b/pkg/sentry/socket/netlink/socket_vfs2.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -40,6 +41,7 @@ type SocketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.LockFD socketOpsCommon } @@ -66,7 +68,7 @@ func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketV return nil, err } - return &SocketVFS2{ + fd := &SocketVFS2{ socketOpsCommon: socketOpsCommon{ ports: t.Kernel().NetlinkPorts(), protocol: protocol, @@ -75,7 +77,9 @@ func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketV connection: connection, sendBufferSize: defaultSendBufferSize, }, - }, nil + } + fd.LockFD.Init(&lock.FileLocks{}) + return fd, nil } // Readiness implements waiter.Waitable.Readiness. diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 8f0f5466e..0f592ecc3 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -37,6 +37,7 @@ go_library( "//pkg/sentry/socket/netfilter", "//pkg/sentry/unimpl", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserr", "//pkg/syserror", diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index fcd8013c0..1412a4810 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -25,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket" "gvisor.dev/gvisor/pkg/sentry/socket/netfilter" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -38,6 +39,7 @@ type SocketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.LockFD socketOpsCommon } @@ -64,6 +66,7 @@ func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, qu protocol: protocol, }, } + s.LockFD.Init(&lock.FileLocks{}) vfsfd := &s.vfsfd if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{ DenyPRead: true, diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD index de2cc4bdf..7d4cc80fe 100644 --- a/pkg/sentry/socket/unix/BUILD +++ b/pkg/sentry/socket/unix/BUILD @@ -29,6 +29,7 @@ go_library( "//pkg/sentry/socket/netstack", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/vfs", + "//pkg/sentry/vfs/lock", "//pkg/syserr", "//pkg/syserror", "//pkg/tcpip", diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 45e109361..8c32371a2 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -26,6 +26,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/netstack" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/syserr" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip" @@ -39,6 +40,7 @@ type SocketVFS2 struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl vfs.DentryMetadataFileDescriptionImpl + vfs.LockFD socketOpsCommon } @@ -51,7 +53,7 @@ func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) mnt := t.Kernel().SocketMount() d := sockfs.NewDentry(t.Credentials(), mnt) - fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d) + fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &lock.FileLocks{}) if err != nil { return nil, syserr.FromError(err) } @@ -60,7 +62,7 @@ func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) // NewFileDescription creates and returns a socket file description // corresponding to the given mount and dentry. -func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry) (*vfs.FileDescription, error) { +func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry, locks *lock.FileLocks) (*vfs.FileDescription, error) { // You can create AF_UNIX, SOCK_RAW sockets. They're the same as // SOCK_DGRAM and don't require CAP_NET_RAW. if stype == linux.SOCK_RAW { @@ -73,6 +75,7 @@ func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint3 stype: stype, }, } + sock.LockFD.Init(locks) vfsfd := &sock.vfsfd if err := vfsfd.Init(sock, flags, mnt, d, &vfs.FileDescriptionOptions{ DenyPRead: true, diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 35a98212a..8347617bd 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -998,9 +998,6 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, err } - // The lock uid is that of the Task's FDTable. - lockUniqueID := lock.UniqueID(t.FDTable().ID()) - // These locks don't block; execute the non-blocking operation using the inode's lock // context directly. switch flock.Type { @@ -1010,12 +1007,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.ReadLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, nil) { return 0, nil, syserror.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.ReadLock, rng, t) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, t) { return 0, nil, syserror.EINTR } } @@ -1026,18 +1023,18 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.WriteLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, nil) { return 0, nil, syserror.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(lockUniqueID, lock.WriteLock, rng, t) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, t) { return 0, nil, syserror.EINTR } } return 0, nil, nil case linux.F_UNLCK: - file.Dirent.Inode.LockCtx.Posix.UnlockRegion(lockUniqueID, rng) + file.Dirent.Inode.LockCtx.Posix.UnlockRegion(t.FDTable(), rng) return 0, nil, nil default: return 0, nil, syserror.EINVAL @@ -2157,22 +2154,6 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall nonblocking := operation&linux.LOCK_NB != 0 operation &^= linux.LOCK_NB - // flock(2): - // Locks created by flock() are associated with an open file table entry. This means that - // duplicate file descriptors (created by, for example, fork(2) or dup(2)) refer to the - // same lock, and this lock may be modified or released using any of these descriptors. Furthermore, - // the lock is released either by an explicit LOCK_UN operation on any of these duplicate - // descriptors, or when all such descriptors have been closed. - // - // If a process uses open(2) (or similar) to obtain more than one descriptor for the same file, - // these descriptors are treated independently by flock(). An attempt to lock the file using - // one of these file descriptors may be denied by a lock that the calling process has already placed via - // another descriptor. - // - // We use the File UniqueID as the lock UniqueID because it needs to reference the same lock across dup(2) - // and fork(2). - lockUniqueID := lock.UniqueID(file.UniqueID) - // A BSD style lock spans the entire file. rng := lock.LockRange{ Start: 0, @@ -2183,29 +2164,29 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.LOCK_EX: if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(lockUniqueID, lock.WriteLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, nil) { return 0, nil, syserror.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(lockUniqueID, lock.WriteLock, rng, t) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, t) { return 0, nil, syserror.EINTR } } case linux.LOCK_SH: if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(lockUniqueID, lock.ReadLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, nil) { return 0, nil, syserror.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(lockUniqueID, lock.ReadLock, rng, t) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, t) { return 0, nil, syserror.EINTR } } case linux.LOCK_UN: - file.Dirent.Inode.LockCtx.BSD.UnlockRegion(lockUniqueID, rng) + file.Dirent.Inode.LockCtx.BSD.UnlockRegion(file, rng) default: // flock(2): EINVAL operation is invalid. return 0, nil, syserror.EINVAL diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index c0d005247..9f93f4354 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -14,6 +14,7 @@ go_library( "getdents.go", "inotify.go", "ioctl.go", + "lock.go", "memfd.go", "mmap.go", "mount.go", @@ -42,6 +43,7 @@ go_library( "//pkg/fspath", "//pkg/gohacks", "//pkg/sentry/arch", + "//pkg/sentry/fs/lock", "//pkg/sentry/fsbridge", "//pkg/sentry/fsimpl/eventfd", "//pkg/sentry/fsimpl/pipefs", diff --git a/pkg/sentry/syscalls/linux/vfs2/lock.go b/pkg/sentry/syscalls/linux/vfs2/lock.go new file mode 100644 index 000000000..bf19028c4 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/lock.go @@ -0,0 +1,64 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fs/lock" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Flock implements linux syscall flock(2). +func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + fd := args[0].Int() + operation := args[1].Int() + + file := t.GetFileVFS2(fd) + if file == nil { + // flock(2): EBADF fd is not an open file descriptor. + return 0, nil, syserror.EBADF + } + defer file.DecRef() + + nonblocking := operation&linux.LOCK_NB != 0 + operation &^= linux.LOCK_NB + + var blocker lock.Blocker + if !nonblocking { + blocker = t + } + + switch operation { + case linux.LOCK_EX: + if err := file.LockBSD(t, lock.WriteLock, blocker); err != nil { + return 0, nil, err + } + case linux.LOCK_SH: + if err := file.LockBSD(t, lock.ReadLock, blocker); err != nil { + return 0, nil, err + } + case linux.LOCK_UN: + if err := file.UnlockBSD(t); err != nil { + return 0, nil, err + } + default: + // flock(2): EINVAL operation is invalid. + return 0, nil, syserror.EINVAL + } + + return 0, nil, nil +} diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index 7b6e7571a..954c82f97 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -62,7 +62,7 @@ func Override() { s.Table[55] = syscalls.Supported("getsockopt", GetSockOpt) s.Table[59] = syscalls.Supported("execve", Execve) s.Table[72] = syscalls.Supported("fcntl", Fcntl) - delete(s.Table, 73) // flock + s.Table[73] = syscalls.Supported("fcntl", Flock) s.Table[74] = syscalls.Supported("fsync", Fsync) s.Table[75] = syscalls.Supported("fdatasync", Fdatasync) s.Table[76] = syscalls.Supported("truncate", Truncate) diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD index 774cc66cc..16d9f3a28 100644 --- a/pkg/sentry/vfs/BUILD +++ b/pkg/sentry/vfs/BUILD @@ -72,6 +72,7 @@ go_library( "//pkg/sentry/memmap", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/uniqueid", + "//pkg/sentry/vfs/lock", "//pkg/sync", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go index 8297f964b..599c3131c 100644 --- a/pkg/sentry/vfs/epoll.go +++ b/pkg/sentry/vfs/epoll.go @@ -31,6 +31,7 @@ type EpollInstance struct { vfsfd FileDescription FileDescriptionDefaultImpl DentryMetadataFileDescriptionImpl + NoLockFD // q holds waiters on this EpollInstance. q waiter.Queue diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index bb294563d..97b9b18d7 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -73,6 +73,8 @@ type FileDescription struct { // writable is analogous to Linux's FMODE_WRITE. writable bool + usedLockBSD uint32 + // impl is the FileDescriptionImpl associated with this Filesystem. impl is // immutable. This should be the last field in FileDescription. impl FileDescriptionImpl @@ -175,6 +177,12 @@ func (fd *FileDescription) DecRef() { } ep.interestMu.Unlock() } + + // If BSD locks were used, release any lock that it may have acquired. + if atomic.LoadUint32(&fd.usedLockBSD) != 0 { + fd.impl.UnlockBSD(context.Background(), fd) + } + // Release implementation resources. fd.impl.Release() if fd.writable { @@ -420,13 +428,9 @@ type FileDescriptionImpl interface { Removexattr(ctx context.Context, name string) error // LockBSD tries to acquire a BSD-style advisory file lock. - // - // TODO(gvisor.dev/issue/1480): BSD-style file locking LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error - // LockBSD releases a BSD-style advisory file lock. - // - // TODO(gvisor.dev/issue/1480): BSD-style file locking + // UnlockBSD releases a BSD-style advisory file lock. UnlockBSD(ctx context.Context, uid lock.UniqueID) error // LockPOSIX tries to acquire a POSIX-style advisory file lock. @@ -736,3 +740,14 @@ func (fd *FileDescription) InodeID() uint64 { func (fd *FileDescription) Msync(ctx context.Context, mr memmap.MappableRange) error { return fd.Sync(ctx) } + +// LockBSD tries to acquire a BSD-style advisory file lock. +func (fd *FileDescription) LockBSD(ctx context.Context, lockType lock.LockType, blocker lock.Blocker) error { + atomic.StoreUint32(&fd.usedLockBSD, 1) + return fd.impl.LockBSD(ctx, fd, lockType, blocker) +} + +// UnlockBSD releases a BSD-style advisory file lock. +func (fd *FileDescription) UnlockBSD(ctx context.Context) error { + return fd.impl.UnlockBSD(ctx, fd) +} diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go index f4c111926..af7213dfd 100644 --- a/pkg/sentry/vfs/file_description_impl_util.go +++ b/pkg/sentry/vfs/file_description_impl_util.go @@ -21,8 +21,9 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" - "gvisor.dev/gvisor/pkg/sentry/fs/lock" + fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/vfs/lock" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -153,26 +154,6 @@ func (FileDescriptionDefaultImpl) Removexattr(ctx context.Context, name string) return syserror.ENOTSUP } -// LockBSD implements FileDescriptionImpl.LockBSD. -func (FileDescriptionDefaultImpl) LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error { - return syserror.EBADF -} - -// UnlockBSD implements FileDescriptionImpl.UnlockBSD. -func (FileDescriptionDefaultImpl) UnlockBSD(ctx context.Context, uid lock.UniqueID) error { - return syserror.EBADF -} - -// LockPOSIX implements FileDescriptionImpl.LockPOSIX. -func (FileDescriptionDefaultImpl) LockPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, rng lock.LockRange, block lock.Blocker) error { - return syserror.EBADF -} - -// UnlockPOSIX implements FileDescriptionImpl.UnlockPOSIX. -func (FileDescriptionDefaultImpl) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, rng lock.LockRange) error { - return syserror.EBADF -} - // DirectoryFileDescriptionDefaultImpl may be embedded by implementations of // FileDescriptionImpl that always represent directories to obtain // implementations of non-directory I/O methods that return EISDIR. @@ -384,3 +365,60 @@ func GenericConfigureMMap(fd *FileDescription, m memmap.Mappable, opts *memmap.M fd.IncRef() return nil } + +// LockFD may be used by most implementations of FileDescriptionImpl.Lock* +// functions. Caller must call Init(). +type LockFD struct { + locks *lock.FileLocks +} + +// Init initializes fd with FileLocks to use. +func (fd *LockFD) Init(locks *lock.FileLocks) { + fd.locks = locks +} + +// LockBSD implements vfs.FileDescriptionImpl.LockBSD. +func (fd *LockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + return fd.locks.LockBSD(uid, t, block) +} + +// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. +func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { + fd.locks.UnlockBSD(uid) + return nil +} + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (fd *LockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { + return fd.locks.LockPOSIX(uid, t, rng, block) +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (fd *LockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, rng fslock.LockRange) error { + fd.locks.UnlockPOSIX(uid, rng) + return nil +} + +// NoLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface +// returning ENOLCK. +type NoLockFD struct{} + +// LockBSD implements vfs.FileDescriptionImpl.LockBSD. +func (NoLockFD) LockBSD(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, block fslock.Blocker) error { + return syserror.ENOLCK +} + +// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. +func (NoLockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { + return syserror.ENOLCK +} + +// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. +func (NoLockFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, rng fslock.LockRange, block fslock.Blocker) error { + return syserror.ENOLCK +} + +// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. +func (NoLockFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, rng fslock.LockRange) error { + return syserror.ENOLCK +} diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go index 3a75d4d62..5061f6ac9 100644 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ b/pkg/sentry/vfs/file_description_impl_util_test.go @@ -33,6 +33,7 @@ import ( type fileDescription struct { vfsfd FileDescription FileDescriptionDefaultImpl + NoLockFD } // genCount contains the number of times its DynamicBytesSource.Generate() diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 05a3051a4..7fa7d2d0c 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -57,6 +57,7 @@ type Inotify struct { vfsfd FileDescription FileDescriptionDefaultImpl DentryMetadataFileDescriptionImpl + NoLockFD // Unique identifier for this inotify instance. We don't just reuse the // inotify fd because fds can be duped. These should not be exposed to the diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index ae2aa44dc..4a1486e14 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -802,10 +802,13 @@ cc_binary( ], linkstatic = 1, deps = [ + ":socket_test_util", "//test/util:file_descriptor", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", gtest, + "//test/util:epoll_util", + "//test/util:eventfd_util", "//test/util:posix_error", "//test/util:temp_path", "//test/util:test_main", diff --git a/test/syscalls/linux/flock.cc b/test/syscalls/linux/flock.cc index 3ecb8db8e..638a93979 100644 --- a/test/syscalls/linux/flock.cc +++ b/test/syscalls/linux/flock.cc @@ -21,6 +21,7 @@ #include "absl/time/clock.h" #include "absl/time/time.h" #include "test/syscalls/linux/file_base.h" +#include "test/syscalls/linux/socket_test_util.h" #include "test/util/file_descriptor.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" @@ -34,11 +35,6 @@ namespace { class FlockTest : public FileTest {}; -TEST_F(FlockTest, BadFD) { - // EBADF: fd is not an open file descriptor. - ASSERT_THAT(flock(-1, 0), SyscallFailsWithErrno(EBADF)); -} - TEST_F(FlockTest, InvalidOpCombinations) { // The operation cannot be both exclusive and shared. EXPECT_THAT(flock(test_file_fd_.get(), LOCK_EX | LOCK_SH | LOCK_NB), @@ -57,15 +53,6 @@ TEST_F(FlockTest, NoOperationSpecified) { SyscallFailsWithErrno(EINVAL)); } -TEST(FlockTestNoFixture, FlockSupportsPipes) { - int fds[2]; - ASSERT_THAT(pipe(fds), SyscallSucceeds()); - - EXPECT_THAT(flock(fds[0], LOCK_EX | LOCK_NB), SyscallSucceeds()); - EXPECT_THAT(close(fds[0]), SyscallSucceeds()); - EXPECT_THAT(close(fds[1]), SyscallSucceeds()); -} - TEST_F(FlockTest, TestSimpleExLock) { // Test that we can obtain an exclusive lock (no other holders) // and that we can unlock it. @@ -583,6 +570,66 @@ TEST_F(FlockTest, BlockingLockFirstExclusiveSecondExclusive_NoRandomSave) { EXPECT_THAT(flock(test_file_fd_.get(), LOCK_UN), SyscallSucceeds()); } +TEST(FlockTestNoFixture, BadFD) { + // EBADF: fd is not an open file descriptor. + ASSERT_THAT(flock(-1, 0), SyscallFailsWithErrno(EBADF)); +} + +TEST(FlockTestNoFixture, FlockDir) { + auto dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + auto fd = ASSERT_NO_ERRNO_AND_VALUE(Open(dir.path(), O_RDONLY, 0000)); + EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, FlockSymlink) { + // TODO(gvisor.dev/issue/2782): Replace with IsRunningWithVFS1() when O_PATH + // is supported. + SKIP_IF(IsRunningOnGvisor()); + + auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + auto symlink = ASSERT_NO_ERRNO_AND_VALUE( + TempPath::CreateSymlinkTo(GetAbsoluteTestTmpdir(), file.path())); + + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(symlink.path(), O_RDONLY | O_PATH, 0000)); + EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallFailsWithErrno(EBADF)); +} + +TEST(FlockTestNoFixture, FlockProc) { + auto fd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/status", O_RDONLY, 0000)); + EXPECT_THAT(flock(fd.get(), LOCK_EX | LOCK_NB), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, FlockPipe) { + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + + EXPECT_THAT(flock(fds[0], LOCK_EX | LOCK_NB), SyscallSucceeds()); + // Check that the pipe was locked above. + EXPECT_THAT(flock(fds[1], LOCK_EX | LOCK_NB), SyscallFailsWithErrno(EAGAIN)); + + EXPECT_THAT(flock(fds[0], LOCK_UN), SyscallSucceeds()); + EXPECT_THAT(flock(fds[1], LOCK_EX | LOCK_NB), SyscallSucceeds()); + + EXPECT_THAT(close(fds[0]), SyscallSucceeds()); + EXPECT_THAT(close(fds[1]), SyscallSucceeds()); +} + +TEST(FlockTestNoFixture, FlockSocket) { + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_THAT(sock, SyscallSucceeds()); + + struct sockaddr_un addr = + ASSERT_NO_ERRNO_AND_VALUE(UniqueUnixAddr(true /* abstract */, AF_UNIX)); + ASSERT_THAT( + bind(sock, reinterpret_cast(&addr), sizeof(addr)), + SyscallSucceeds()); + + EXPECT_THAT(flock(sock, LOCK_EX | LOCK_NB), SyscallSucceeds()); + EXPECT_THAT(close(sock), SyscallSucceeds()); +} + } // namespace } // namespace testing -- cgit v1.2.3 From cbfb537eef7c531b596c3aecd947ee6920bea2d7 Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Tue, 2 Jun 2020 22:55:31 -0400 Subject: Deleting the maxSendBufferSize from fs/host When I do high-performance networking, the value of wmem_max is often set very high, specially for 10/25/50 Gigabit NIC. I think maybe this restriction is not suitable. Signed-off-by: Bin Lu --- pkg/sentry/fs/host/socket.go | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index b6e94583e..cfb089e43 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -22,7 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/fdnotifier" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/socket/control" @@ -39,11 +38,6 @@ import ( // LINT.IfChange -// maxSendBufferSize is the maximum host send buffer size allowed for endpoint. -// -// N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max). -const maxSendBufferSize = 8 << 20 - // ConnectedEndpoint is a host FD backed implementation of // transport.ConnectedEndpoint and transport.Receiver. // @@ -103,10 +97,6 @@ func (c *ConnectedEndpoint) init() *syserr.Error { if err != nil { return syserr.FromError(err) } - if sndbuf > maxSendBufferSize { - log.Warningf("Socket send buffer too large: %d", sndbuf) - return syserr.ErrInvalidEndpointState - } c.stype = linux.SockType(stype) c.sndbuf = int64(sndbuf) -- cgit v1.2.3 From 6d806ee7198422973a2e4efa9b539de7792b933f Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 17 Jun 2020 12:32:59 -0700 Subject: Remove various uses of 'blacklist' Updates #2972 PiperOrigin-RevId: 316942245 --- pkg/sentry/fs/host/tty.go | 6 +++--- pkg/sentry/fsimpl/host/tty.go | 6 +++--- test/runner/runner.go | 6 +++--- tools/nogo/matchers.go | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index cb91355ab..82a02fcb2 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -308,9 +308,9 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e task := kernel.TaskFromContext(ctx) if task == nil { // No task? Linux does not have an analog for this case, but - // tty_check_change is more of a blacklist of cases than a - // whitelist, and is surprisingly permissive. Allowing the - // change seems most appropriate. + // tty_check_change only blocks specific cases and is + // surprisingly permissive. Allowing the change seems + // appropriate. return nil } diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go index 0fbc543b1..4ee9270cc 100644 --- a/pkg/sentry/fsimpl/host/tty.go +++ b/pkg/sentry/fsimpl/host/tty.go @@ -326,9 +326,9 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal) task := kernel.TaskFromContext(ctx) if task == nil { // No task? Linux does not have an analog for this case, but - // tty_check_change is more of a blacklist of cases than a - // whitelist, and is surprisingly permissive. Allowing the - // change seems most appropriate. + // tty_check_change only blocks specific cases and is + // surprisingly permissive. Allowing the change seems + // appropriate. return nil } diff --git a/test/runner/runner.go b/test/runner/runner.go index f8baf61b0..5456e46a6 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -391,12 +391,12 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { } } -// filterEnv returns an environment with the blacklisted variables removed. -func filterEnv(env, blacklist []string) []string { +// filterEnv returns an environment with the excluded variables removed. +func filterEnv(env, exclude []string) []string { var out []string for _, kv := range env { ok := true - for _, k := range blacklist { + for _, k := range exclude { if strings.HasPrefix(kv, k+"=") { ok = false break diff --git a/tools/nogo/matchers.go b/tools/nogo/matchers.go index bc5772303..32f099925 100644 --- a/tools/nogo/matchers.go +++ b/tools/nogo/matchers.go @@ -89,7 +89,7 @@ func (r resultExcluded) ShouldReport(d analysis.Diagnostic, _ *token.FileSet) bo return false } } - return true // Not blacklisted. + return true // Not excluded. } // andMatcher is a composite matcher. -- cgit v1.2.3 From 3970c127434817304f67a2ad192cbe8094ad3353 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 18 Jun 2020 09:02:14 -0700 Subject: Remove various uses of 'whitelist' Updates #2972 PiperOrigin-RevId: 317113059 --- pkg/bpf/interpreter_test.go | 2 +- pkg/cpuid/cpuid_arm64.go | 5 +++-- pkg/cpuid/cpuid_x86.go | 7 +++---- pkg/seccomp/seccomp_rules.go | 4 ++-- pkg/sentry/fs/filesystems.go | 14 -------------- pkg/sentry/fsimpl/host/host.go | 5 +++-- pkg/sentry/socket/hostinet/socket.go | 8 ++++---- pkg/sentry/vfs/README.md | 2 -- test/syscalls/linux/xattr.cc | 5 +++-- tools/nogo/matchers.go | 27 ++++++++++++++++----------- 10 files changed, 35 insertions(+), 44 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/bpf/interpreter_test.go b/pkg/bpf/interpreter_test.go index 547921d0a..c85d786b9 100644 --- a/pkg/bpf/interpreter_test.go +++ b/pkg/bpf/interpreter_test.go @@ -767,7 +767,7 @@ func TestSimpleFilter(t *testing.T) { expectedRet: 0, }, { - desc: "Whitelisted syscall is allowed", + desc: "Allowed syscall is indeed allowed", seccompData: seccompData{nr: 231 /* __NR_exit_group */, arch: 0xc000003e}, expectedRet: 0x7fff0000, }, diff --git a/pkg/cpuid/cpuid_arm64.go b/pkg/cpuid/cpuid_arm64.go index 08381c1c0..ac7bb6774 100644 --- a/pkg/cpuid/cpuid_arm64.go +++ b/pkg/cpuid/cpuid_arm64.go @@ -312,8 +312,9 @@ func HostFeatureSet() *FeatureSet { } } -// Reads bogomips from host /proc/cpuinfo. Must run before whitelisting. -// This value is used to create the fake /proc/cpuinfo from a FeatureSet. +// Reads bogomips from host /proc/cpuinfo. Must run before syscall filter +// installation. This value is used to create the fake /proc/cpuinfo from a +// FeatureSet. func initCPUInfo() { cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo") if err != nil { diff --git a/pkg/cpuid/cpuid_x86.go b/pkg/cpuid/cpuid_x86.go index 562f8f405..17a89c00d 100644 --- a/pkg/cpuid/cpuid_x86.go +++ b/pkg/cpuid/cpuid_x86.go @@ -1057,9 +1057,9 @@ func HostFeatureSet() *FeatureSet { } } -// Reads max cpu frequency from host /proc/cpuinfo. Must run before -// whitelisting. This value is used to create the fake /proc/cpuinfo from a -// FeatureSet. +// Reads max cpu frequency from host /proc/cpuinfo. Must run before syscall +// filter installation. This value is used to create the fake /proc/cpuinfo +// from a FeatureSet. func initCPUFreq() { cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo") if err != nil { @@ -1106,7 +1106,6 @@ func initFeaturesFromString() { } func init() { - // initCpuFreq must be run before whitelists are enabled. initCPUFreq() initFeaturesFromString() } diff --git a/pkg/seccomp/seccomp_rules.go b/pkg/seccomp/seccomp_rules.go index 06308cd29..a52dc1b4e 100644 --- a/pkg/seccomp/seccomp_rules.go +++ b/pkg/seccomp/seccomp_rules.go @@ -56,7 +56,7 @@ func (a AllowValue) String() (s string) { return fmt.Sprintf("%#x ", uintptr(a)) } -// Rule stores the whitelist of syscall arguments. +// Rule stores the allowed syscall arguments. // // For example: // rule := Rule { @@ -82,7 +82,7 @@ func (r Rule) String() (s string) { return } -// SyscallRules stores a map of OR'ed whitelist rules indexed by the syscall number. +// SyscallRules stores a map of OR'ed argument rules indexed by the syscall number. // If the 'Rules' is empty, we treat it as any argument is allowed. // // For example: diff --git a/pkg/sentry/fs/filesystems.go b/pkg/sentry/fs/filesystems.go index 084da2a8d..d41f30bbb 100644 --- a/pkg/sentry/fs/filesystems.go +++ b/pkg/sentry/fs/filesystems.go @@ -87,20 +87,6 @@ func RegisterFilesystem(f Filesystem) { filesystems.registered[f.Name()] = f } -// UnregisterFilesystem removes a file system from the global set. To keep the -// file system set compatible with save/restore, UnregisterFilesystem must be -// called before save/restore methods. -// -// For instance, packages may unregister their file system after it is mounted. -// This makes sense for pseudo file systems that should not be visible or -// mountable. See whitelistfs in fs/host/fs.go for one example. -func UnregisterFilesystem(name string) { - filesystems.mu.Lock() - defer filesystems.mu.Unlock() - - delete(filesystems.registered, name) -} - // FindFilesystem returns a Filesystem registered at name or (nil, false) if name // is not a file system type that can be found in /proc/filesystems. func FindFilesystem(name string) (Filesystem, bool) { diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index a3a312edb..43a173bc9 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -476,8 +476,9 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags u return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d, &i.locks) } - // TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that - // we don't allow importing arbitrary file types without proper support. + // TODO(gvisor.dev/issue/1672): Allow only specific file types here, so + // that we don't allow importing arbitrary file types without proper + // support. if i.isTTY { fd := &TTYFileDescription{ fileDescription: fileDescription{inode: i}, diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index c11e82c10..a92aed2c9 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -324,7 +324,7 @@ func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr return nil, syserr.ErrInvalidArgument } - // Whitelist options and constrain option length. + // Only allow known and safe options. optlen := getSockOptLen(t, level, name) switch level { case linux.SOL_IP: @@ -369,7 +369,7 @@ func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr // SetSockOpt implements socket.Socket.SetSockOpt. func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error { - // Whitelist options and constrain option length. + // Only allow known and safe options. optlen := setSockOptLen(t, level, name) switch level { case linux.SOL_IP: @@ -415,7 +415,7 @@ func (s *socketOpsCommon) SetSockOpt(t *kernel.Task, level int, name int, opt [] // RecvMsg implements socket.Socket.RecvMsg. func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlLen uint64) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) { - // Whitelist flags. + // Only allow known and safe flags. // // FIXME(jamieliu): We can't support MSG_ERRQUEUE because it uses ancillary // messages that gvisor/pkg/tcpip/transport/unix doesn't understand. Kill the @@ -537,7 +537,7 @@ func (s *socketOpsCommon) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags // SendMsg implements socket.Socket.SendMsg. func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) { - // Whitelist flags. + // Only allow known and safe flags. if flags&^(syscall.MSG_DONTWAIT|syscall.MSG_EOR|syscall.MSG_FASTOPEN|syscall.MSG_MORE|syscall.MSG_NOSIGNAL) != 0 { return 0, syserr.ErrInvalidArgument } diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md index 66f3105bd..4b9faf2ea 100644 --- a/pkg/sentry/vfs/README.md +++ b/pkg/sentry/vfs/README.md @@ -169,8 +169,6 @@ This construction, which is essentially a type-safe analogue to Linux's - binder, which is similarly far too incomplete to use. - - whitelistfs, which we are already actively attempting to remove. - - Save/restore. For instance, it is unclear if the current implementation of the `state` package supports the inheritance pattern described above. diff --git a/test/syscalls/linux/xattr.cc b/test/syscalls/linux/xattr.cc index 3231732ec..cbcf08451 100644 --- a/test/syscalls/linux/xattr.cc +++ b/test/syscalls/linux/xattr.cc @@ -73,9 +73,10 @@ TEST_F(XattrTest, XattrLargeName) { std::string name = "user."; name += std::string(XATTR_NAME_MAX - name.length(), 'a'); - // An xattr should be whitelisted before it can be accessed--do not allow - // arbitrary xattrs to be read/written in gVisor. if (!IsRunningOnGvisor()) { + // In gVisor, access to xattrs is controlled with an explicit list of + // allowed names. This name isn't going to be configured to allow access, so + // don't test it. EXPECT_THAT(setxattr(path, name.c_str(), nullptr, 0, /*flags=*/0), SyscallSucceeds()); EXPECT_THAT(getxattr(path, name.c_str(), nullptr, 0), diff --git a/tools/nogo/matchers.go b/tools/nogo/matchers.go index 32f099925..57a250501 100644 --- a/tools/nogo/matchers.go +++ b/tools/nogo/matchers.go @@ -27,10 +27,15 @@ type matcher interface { ShouldReport(d analysis.Diagnostic, fs *token.FileSet) bool } -// pathRegexps excludes explicit paths. +// pathRegexps filters explicit paths. type pathRegexps struct { - expr []*regexp.Regexp - whitelist bool + expr []*regexp.Regexp + + // include, if true, indicates that paths matching any regexp in expr + // match. + // + // If false, paths matching no regexps in expr match. + include bool } // buildRegexps builds a list of regular expressions. @@ -49,33 +54,33 @@ func (p *pathRegexps) ShouldReport(d analysis.Diagnostic, fs *token.FileSet) boo fullPos := fs.Position(d.Pos).String() for _, path := range p.expr { if path.MatchString(fullPos) { - return p.whitelist + return p.include } } - return !p.whitelist + return !p.include } // internalExcluded excludes specific internal paths. func internalExcluded(paths ...string) *pathRegexps { return &pathRegexps{ - expr: buildRegexps(internalPrefix, paths...), - whitelist: false, + expr: buildRegexps(internalPrefix, paths...), + include: false, } } // excludedExcluded excludes specific external paths. func externalExcluded(paths ...string) *pathRegexps { return &pathRegexps{ - expr: buildRegexps(externalPrefix, paths...), - whitelist: false, + expr: buildRegexps(externalPrefix, paths...), + include: false, } } // internalMatches returns a path matcher for internal packages. func internalMatches() *pathRegexps { return &pathRegexps{ - expr: buildRegexps(internalPrefix, ".*"), - whitelist: true, + expr: buildRegexps(internalPrefix, ".*"), + include: true, } } -- cgit v1.2.3 From db653bb34baeba24b8b658eb0985e4c5185344cb Mon Sep 17 00:00:00 2001 From: Ting-Yu Wang Date: Wed, 15 Jul 2020 15:04:10 -0700 Subject: fdbased: Vectorized write for packet; relax writev syscall filter. Now it calls pkt.Data.ToView() when writing the packet. This may require copying when the packet is large, which puts the worse case in an even worse situation. This sent out in a separate preparation change as it requires syscall filter changes. This change will be followed by the change for the adoption of the new PacketHeader API. PiperOrigin-RevId: 321447003 --- pkg/iovec/BUILD | 18 +++++ pkg/iovec/iovec.go | 75 +++++++++++++++++++ pkg/iovec/iovec_test.go | 121 +++++++++++++++++++++++++++++++ pkg/sentry/fs/host/BUILD | 1 + pkg/sentry/fs/host/socket_iovec.go | 7 +- pkg/sentry/fsimpl/host/BUILD | 1 + pkg/sentry/fsimpl/host/socket_iovec.go | 7 +- pkg/tcpip/link/fdbased/BUILD | 1 + pkg/tcpip/link/fdbased/endpoint.go | 69 ++++++------------ pkg/tcpip/link/rawfile/rawfile_unsafe.go | 32 +------- runsc/boot/filter/config.go | 11 +-- 11 files changed, 248 insertions(+), 95 deletions(-) create mode 100644 pkg/iovec/BUILD create mode 100644 pkg/iovec/iovec.go create mode 100644 pkg/iovec/iovec_test.go (limited to 'pkg/sentry/fs') diff --git a/pkg/iovec/BUILD b/pkg/iovec/BUILD new file mode 100644 index 000000000..eda82cfc1 --- /dev/null +++ b/pkg/iovec/BUILD @@ -0,0 +1,18 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "iovec", + srcs = ["iovec.go"], + visibility = ["//:sandbox"], + deps = ["//pkg/abi/linux"], +) + +go_test( + name = "iovec_test", + size = "small", + srcs = ["iovec_test.go"], + library = ":iovec", + deps = ["@org_golang_x_sys//unix:go_default_library"], +) diff --git a/pkg/iovec/iovec.go b/pkg/iovec/iovec.go new file mode 100644 index 000000000..dd70fe80f --- /dev/null +++ b/pkg/iovec/iovec.go @@ -0,0 +1,75 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +// Package iovec provides helpers to interact with vectorized I/O on host +// system. +package iovec + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/abi/linux" +) + +// MaxIovs is the maximum number of iovecs host platform can accept. +var MaxIovs = linux.UIO_MAXIOV + +// Builder is a builder for slice of syscall.Iovec. +type Builder struct { + iovec []syscall.Iovec + storage [8]syscall.Iovec + + // overflow tracks the last buffer when iovec length is at MaxIovs. + overflow []byte +} + +// Add adds buf to b preparing to be written. Zero-length buf won't be added. +func (b *Builder) Add(buf []byte) { + if len(buf) == 0 { + return + } + if b.iovec == nil { + b.iovec = b.storage[:0] + } + if len(b.iovec) >= MaxIovs { + b.addByAppend(buf) + return + } + b.iovec = append(b.iovec, syscall.Iovec{ + Base: &buf[0], + Len: uint64(len(buf)), + }) + // Keep the last buf if iovec is at max capacity. We will need to append to it + // for later bufs. + if len(b.iovec) == MaxIovs { + n := len(buf) + b.overflow = buf[:n:n] + } +} + +func (b *Builder) addByAppend(buf []byte) { + b.overflow = append(b.overflow, buf...) + b.iovec[len(b.iovec)-1] = syscall.Iovec{ + Base: &b.overflow[0], + Len: uint64(len(b.overflow)), + } +} + +// Build returns the final Iovec slice. The length of returned iovec will not +// excceed MaxIovs. +func (b *Builder) Build() []syscall.Iovec { + return b.iovec +} diff --git a/pkg/iovec/iovec_test.go b/pkg/iovec/iovec_test.go new file mode 100644 index 000000000..a3900c299 --- /dev/null +++ b/pkg/iovec/iovec_test.go @@ -0,0 +1,121 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package iovec + +import ( + "bytes" + "fmt" + "syscall" + "testing" + "unsafe" + + "golang.org/x/sys/unix" +) + +func TestBuilderEmpty(t *testing.T) { + var builder Builder + iovecs := builder.Build() + if got, want := len(iovecs), 0; got != want { + t.Errorf("len(iovecs) = %d, want %d", got, want) + } +} + +func TestBuilderBuild(t *testing.T) { + a := []byte{1, 2} + b := []byte{3, 4, 5} + + var builder Builder + builder.Add(a) + builder.Add(b) + builder.Add(nil) // Nil slice won't be added. + builder.Add([]byte{}) // Empty slice won't be added. + iovecs := builder.Build() + + if got, want := len(iovecs), 2; got != want { + t.Fatalf("len(iovecs) = %d, want %d", got, want) + } + for i, data := range [][]byte{a, b} { + if got, want := *iovecs[i].Base, data[0]; got != want { + t.Fatalf("*iovecs[%d].Base = %d, want %d", i, got, want) + } + if got, want := iovecs[i].Len, uint64(len(data)); got != want { + t.Fatalf("iovecs[%d].Len = %d, want %d", i, got, want) + } + } +} + +func TestBuilderBuildMaxIov(t *testing.T) { + for _, test := range []struct { + numIov int + }{ + { + numIov: MaxIovs - 1, + }, + { + numIov: MaxIovs, + }, + { + numIov: MaxIovs + 1, + }, + { + numIov: MaxIovs + 10, + }, + } { + name := fmt.Sprintf("numIov=%v", test.numIov) + t.Run(name, func(t *testing.T) { + var data []byte + var builder Builder + for i := 0; i < test.numIov; i++ { + buf := []byte{byte(i)} + builder.Add(buf) + data = append(data, buf...) + } + iovec := builder.Build() + + // Check the expected length of iovec. + wantNum := test.numIov + if wantNum > MaxIovs { + wantNum = MaxIovs + } + if got, want := len(iovec), wantNum; got != want { + t.Errorf("len(iovec) = %d, want %d", got, want) + } + + // Test a real read-write. + var fds [2]int + if err := unix.Pipe(fds[:]); err != nil { + t.Fatalf("Pipe: %v", err) + } + defer syscall.Close(fds[0]) + defer syscall.Close(fds[1]) + + wrote, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fds[1]), uintptr(unsafe.Pointer(&iovec[0])), uintptr(len(iovec))) + if int(wrote) != len(data) || e != 0 { + t.Fatalf("writev: %v, %v; want %v, 0", wrote, e, len(data)) + } + + got := make([]byte, len(data)) + if n, err := syscall.Read(fds[0], got); n != len(got) || err != nil { + t.Fatalf("read: %v, %v; want %v, nil", n, err, len(got)) + } + + if !bytes.Equal(got, data) { + t.Errorf("read: got data %v, want %v", got, data) + } + }) + } +} diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index aabce6cc9..d41d23a43 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -30,6 +30,7 @@ go_library( "//pkg/context", "//pkg/fd", "//pkg/fdnotifier", + "//pkg/iovec", "//pkg/log", "//pkg/refs", "//pkg/safemem", diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go index 5c18dbd5e..905afb50d 100644 --- a/pkg/sentry/fs/host/socket_iovec.go +++ b/pkg/sentry/fs/host/socket_iovec.go @@ -17,15 +17,12 @@ package host import ( "syscall" - "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/iovec" "gvisor.dev/gvisor/pkg/syserror" ) // LINT.IfChange -// maxIovs is the maximum number of iovecs to pass to the host. -var maxIovs = linux.UIO_MAXIOV - // copyToMulti copies as many bytes from src to dst as possible. func copyToMulti(dst [][]byte, src []byte) { for _, d := range dst { @@ -76,7 +73,7 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec } } - if iovsRequired > maxIovs { + if iovsRequired > iovec.MaxIovs { // The kernel will reject our call if we pass this many iovs. // Use a single intermediate buffer instead. b := make([]byte, stopLen) diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index 44a09d87a..e86fbe2d5 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -22,6 +22,7 @@ go_library( "//pkg/context", "//pkg/fdnotifier", "//pkg/fspath", + "//pkg/iovec", "//pkg/log", "//pkg/refs", "//pkg/safemem", diff --git a/pkg/sentry/fsimpl/host/socket_iovec.go b/pkg/sentry/fsimpl/host/socket_iovec.go index 584c247d2..fc0d5fd38 100644 --- a/pkg/sentry/fsimpl/host/socket_iovec.go +++ b/pkg/sentry/fsimpl/host/socket_iovec.go @@ -17,13 +17,10 @@ package host import ( "syscall" - "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/iovec" "gvisor.dev/gvisor/pkg/syserror" ) -// maxIovs is the maximum number of iovecs to pass to the host. -var maxIovs = linux.UIO_MAXIOV - // copyToMulti copies as many bytes from src to dst as possible. func copyToMulti(dst [][]byte, src []byte) { for _, d := range dst { @@ -74,7 +71,7 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec } } - if iovsRequired > maxIovs { + if iovsRequired > iovec.MaxIovs { // The kernel will reject our call if we pass this many iovs. // Use a single intermediate buffer instead. b := make([]byte, stopLen) diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD index aa6db9aea..507b44abc 100644 --- a/pkg/tcpip/link/fdbased/BUILD +++ b/pkg/tcpip/link/fdbased/BUILD @@ -15,6 +15,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/binary", + "//pkg/iovec", "//pkg/sync", "//pkg/tcpip", "//pkg/tcpip/buffer", diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 32abe2a13..6aa1badc7 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -45,6 +45,7 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/iovec" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" @@ -406,6 +407,8 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne eth.Encode(ethHdr) } + var builder iovec.Builder + fd := e.fds[pkt.Hash%uint32(len(e.fds))] if e.Capabilities()&stack.CapabilityHardwareGSO != 0 { vnetHdr := virtioNetHdr{} @@ -430,29 +433,25 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne } vnetHdrBuf := binary.Marshal(make([]byte, 0, virtioNetHdrSize), binary.LittleEndian, vnetHdr) - return rawfile.NonBlockingWrite3(fd, vnetHdrBuf, pkt.Header.View(), pkt.Data.ToView()) + builder.Add(vnetHdrBuf) } - if pkt.Data.Size() == 0 { - return rawfile.NonBlockingWrite(fd, pkt.Header.View()) - } - if pkt.Header.UsedLength() == 0 { - return rawfile.NonBlockingWrite(fd, pkt.Data.ToView()) + builder.Add(pkt.Header.View()) + for _, v := range pkt.Data.Views() { + builder.Add(v) } - return rawfile.NonBlockingWrite3(fd, pkt.Header.View(), pkt.Data.ToView(), nil) + return rawfile.NonBlockingWriteIovec(fd, builder.Build()) } func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tcpip.Error) { // Send a batch of packets through batchFD. mmsgHdrs := make([]rawfile.MMsgHdr, 0, len(batch)) for _, pkt := range batch { - var ethHdrBuf []byte - iovLen := 0 + var eth header.Ethernet if e.hdrSize > 0 { // Add ethernet header if needed. - ethHdrBuf = make([]byte, header.EthernetMinimumSize) - eth := header.Ethernet(ethHdrBuf) + eth = make(header.Ethernet, header.EthernetMinimumSize) ethHdr := &header.EthernetFields{ DstAddr: pkt.EgressRoute.RemoteLinkAddress, Type: pkt.NetworkProtocolNumber, @@ -465,12 +464,11 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc ethHdr.SrcAddr = e.addr } eth.Encode(ethHdr) - iovLen++ } - vnetHdr := virtioNetHdr{} var vnetHdrBuf []byte if e.Capabilities()&stack.CapabilityHardwareGSO != 0 { + vnetHdr := virtioNetHdr{} if pkt.GSOOptions != nil { vnetHdr.hdrLen = uint16(pkt.Header.UsedLength()) if pkt.GSOOptions.NeedsCsum { @@ -491,45 +489,20 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc } } vnetHdrBuf = binary.Marshal(make([]byte, 0, virtioNetHdrSize), binary.LittleEndian, vnetHdr) - iovLen++ } - iovecs := make([]syscall.Iovec, iovLen+1+len(pkt.Data.Views())) + var builder iovec.Builder + builder.Add(vnetHdrBuf) + builder.Add(eth) + builder.Add(pkt.Header.View()) + for _, v := range pkt.Data.Views() { + builder.Add(v) + } + iovecs := builder.Build() + var mmsgHdr rawfile.MMsgHdr mmsgHdr.Msg.Iov = &iovecs[0] - iovecIdx := 0 - if vnetHdrBuf != nil { - v := &iovecs[iovecIdx] - v.Base = &vnetHdrBuf[0] - v.Len = uint64(len(vnetHdrBuf)) - iovecIdx++ - } - if ethHdrBuf != nil { - v := &iovecs[iovecIdx] - v.Base = ðHdrBuf[0] - v.Len = uint64(len(ethHdrBuf)) - iovecIdx++ - } - pktSize := uint64(0) - // Encode L3 Header - v := &iovecs[iovecIdx] - hdr := &pkt.Header - hdrView := hdr.View() - v.Base = &hdrView[0] - v.Len = uint64(len(hdrView)) - pktSize += v.Len - iovecIdx++ - - // Now encode the Transport Payload. - pktViews := pkt.Data.Views() - for i := range pktViews { - vec := &iovecs[iovecIdx] - iovecIdx++ - vec.Base = &pktViews[i][0] - vec.Len = uint64(len(pktViews[i])) - pktSize += vec.Len - } - mmsgHdr.Msg.Iovlen = uint64(iovecIdx) + mmsgHdr.Msg.Iovlen = uint64(len(iovecs)) mmsgHdrs = append(mmsgHdrs, mmsgHdr) } diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go index 69de6eb3e..f4c32c2da 100644 --- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go +++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go @@ -66,38 +66,14 @@ func NonBlockingWrite(fd int, buf []byte) *tcpip.Error { return nil } -// NonBlockingWrite3 writes up to three byte slices to a file descriptor in a -// single syscall. It fails if partial data is written. -func NonBlockingWrite3(fd int, b1, b2, b3 []byte) *tcpip.Error { - // If there is no second and third buffer, issue a regular write. - if len(b2) == 0 && len(b3) == 0 { - return NonBlockingWrite(fd, b1) - } - - // Build the iovec that represents them and issue a writev syscall. - iovec := [3]syscall.Iovec{ - { - Base: &b1[0], - Len: uint64(len(b1)), - }, - { - Base: &b2[0], - Len: uint64(len(b2)), - }, - } - iovecLen := uintptr(2) - - if len(b3) > 0 { - iovecLen++ - iovec[2].Base = &b3[0] - iovec[2].Len = uint64(len(b3)) - } - +// NonBlockingWriteIovec writes iovec to a file descriptor in a single syscall. +// It fails if partial data is written. +func NonBlockingWriteIovec(fd int, iovec []syscall.Iovec) *tcpip.Error { + iovecLen := uintptr(len(iovec)) _, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovec[0])), iovecLen) if e != 0 { return TranslateErrno(e) } - return nil } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 60e33425f..149eb0b1b 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -310,19 +310,12 @@ var allowedSyscalls = seccomp.SyscallRules{ }, }, syscall.SYS_WRITE: {}, - // The only user in rawfile.NonBlockingWrite3 always passes iovcnt with - // values 2 or 3. Three iovec-s are passed, when the PACKET_VNET_HDR - // option is enabled for a packet socket. + // For rawfile.NonBlockingWriteIovec. syscall.SYS_WRITEV: []seccomp.Rule{ { seccomp.AllowAny{}, seccomp.AllowAny{}, - seccomp.AllowValue(2), - }, - { - seccomp.AllowAny{}, - seccomp.AllowAny{}, - seccomp.AllowValue(3), + seccomp.GreaterThan(0), }, }, } -- cgit v1.2.3 From e2c70ee9814f0f76ab5c30478748e4c697e91f33 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Fri, 24 Jul 2020 01:24:16 -0700 Subject: Enable automated marshalling for netstack. PiperOrigin-RevId: 322954792 --- pkg/abi/linux/netdevice.go | 4 +++ pkg/sentry/fs/file_operations.go | 1 + pkg/sentry/socket/netfilter/netfilter.go | 4 +-- pkg/sentry/socket/netstack/netstack.go | 54 ++++++++++++++------------------ 4 files changed, 31 insertions(+), 32 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/abi/linux/netdevice.go b/pkg/abi/linux/netdevice.go index 7866352b4..0faf015c7 100644 --- a/pkg/abi/linux/netdevice.go +++ b/pkg/abi/linux/netdevice.go @@ -22,6 +22,8 @@ const ( ) // IFReq is an interface request. +// +// +marshal type IFReq struct { // IFName is an encoded name, normally null-terminated. This should be // accessed via the Name and SetName functions. @@ -79,6 +81,8 @@ type IFMap struct { // IFConf is used to return a list of interfaces and their addresses. See // netdevice(7) and struct ifconf for more detail on its use. +// +// +marshal type IFConf struct { Len int32 _ [4]byte // Pad to sizeof(struct ifconf). diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go index beba0f771..f5537411e 100644 --- a/pkg/sentry/fs/file_operations.go +++ b/pkg/sentry/fs/file_operations.go @@ -160,6 +160,7 @@ type FileOperations interface { // refer. // // Preconditions: The AddressSpace (if any) that io refers to is activated. + // Must only be called from a task goroutine. Ioctl(ctx context.Context, file *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) } diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go index d9394055d..a9f0604ae 100644 --- a/pkg/sentry/socket/netfilter/netfilter.go +++ b/pkg/sentry/socket/netfilter/netfilter.go @@ -66,7 +66,7 @@ func nflog(format string, args ...interface{}) { func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPTGetinfo, *syserr.Error) { // Read in the struct and table name. var info linux.IPTGetinfo - if _, err := t.CopyIn(outPtr, &info); err != nil { + if _, err := info.CopyIn(t, outPtr); err != nil { return linux.IPTGetinfo{}, syserr.FromError(err) } @@ -84,7 +84,7 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT func GetEntries(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) { // Read in the struct and table name. var userEntries linux.IPTGetEntries - if _, err := t.CopyIn(outPtr, &userEntries); err != nil { + if _, err := userEntries.CopyIn(t, outPtr); err != nil { nflog("couldn't copy in entries %q", userEntries.Name) return linux.KernelIPTGetEntries{}, syserr.FromError(err) } diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 9856ab8c5..44b3fff46 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -2835,6 +2835,11 @@ func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, } func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + panic("ioctl(2) may only be called from a task goroutine") + } + // SIOCGSTAMP is implemented by netstack rather than all commonEndpoint // sockets. // TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP. @@ -2847,9 +2852,7 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy } tv := linux.NsecToTimeval(s.timestampNS) - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &tv, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := tv.CopyOut(t, args[2].Pointer()) return 0, err case linux.TIOCINQ: @@ -2868,9 +2871,8 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy } // Copy result to userspace. - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{ - AddressSpaceActive: true, - }) + vP := primitive.Int32(v) + _, err := vP.CopyOut(t, args[2].Pointer()) return 0, err } @@ -2879,6 +2881,11 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy // Ioctl performs a socket ioctl. func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.SyscallArguments) (uintptr, error) { + t := kernel.TaskFromContext(ctx) + if t == nil { + panic("ioctl(2) may only be called from a task goroutine") + } + switch arg := int(args[1].Int()); arg { case linux.SIOCGIFFLAGS, linux.SIOCGIFADDR, @@ -2895,37 +2902,28 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc linux.SIOCETHTOOL: var ifr linux.IFReq - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifr, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := ifr.CopyIn(t, args[2].Pointer()); err != nil { return 0, err } if err := interfaceIoctl(ctx, io, arg, &ifr); err != nil { return 0, err.ToError() } - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &ifr, usermem.IOOpts{ - AddressSpaceActive: true, - }) + _, err := ifr.CopyOut(t, args[2].Pointer()) return 0, err case linux.SIOCGIFCONF: // Return a list of interface addresses or the buffer size // necessary to hold the list. var ifc linux.IFConf - if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifc, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := ifc.CopyIn(t, args[2].Pointer()); err != nil { return 0, err } - if err := ifconfIoctl(ctx, io, &ifc); err != nil { + if err := ifconfIoctl(ctx, t, io, &ifc); err != nil { return 0, err } - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), ifc, usermem.IOOpts{ - AddressSpaceActive: true, - }) - + _, err := ifc.CopyOut(t, args[2].Pointer()) return 0, err case linux.TIOCINQ: @@ -2938,9 +2936,8 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc v = math.MaxInt32 } // Copy result to userspace. - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{ - AddressSpaceActive: true, - }) + vP := primitive.Int32(v) + _, err := vP.CopyOut(t, args[2].Pointer()) return 0, err case linux.TIOCOUTQ: @@ -2954,9 +2951,8 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc } // Copy result to userspace. - _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{ - AddressSpaceActive: true, - }) + vP := primitive.Int32(v) + _, err := vP.CopyOut(t, args[2].Pointer()) return 0, err case linux.SIOCGIFMEM, linux.SIOCGIFPFLAGS, linux.SIOCGMIIPHY, linux.SIOCGMIIREG: @@ -3105,7 +3101,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe } // ifconfIoctl populates a struct ifconf for the SIOCGIFCONF ioctl. -func ifconfIoctl(ctx context.Context, io usermem.IO, ifc *linux.IFConf) error { +func ifconfIoctl(ctx context.Context, t *kernel.Task, io usermem.IO, ifc *linux.IFConf) error { // If Ptr is NULL, return the necessary buffer size via Len. // Otherwise, write up to Len bytes starting at Ptr containing ifreq // structs. @@ -3142,9 +3138,7 @@ func ifconfIoctl(ctx context.Context, io usermem.IO, ifc *linux.IFConf) error { // Copy the ifr to userspace. dst := uintptr(ifc.Ptr) + uintptr(ifc.Len) ifc.Len += int32(linux.SizeOfIFReq) - if _, err := usermem.CopyObjectOut(ctx, io, usermem.Addr(dst), ifr, usermem.IOOpts{ - AddressSpaceActive: true, - }); err != nil { + if _, err := ifr.CopyOut(t, usermem.Addr(dst)); err != nil { return err } } -- cgit v1.2.3 From f347a578b79c96c13ed492b2cf9aec1cb3e60f3f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 27 Jul 2020 11:57:11 -0700 Subject: Move platform.File in memmap The subsequent systrap changes will need to import memmap from the platform package. PiperOrigin-RevId: 323409486 --- pkg/sentry/fs/fsutil/BUILD | 7 +--- pkg/sentry/fs/fsutil/dirty_set.go | 7 ++-- pkg/sentry/fs/fsutil/file_range_set.go | 15 ++++---- pkg/sentry/fs/fsutil/frame_ref_set.go | 10 ++--- pkg/sentry/fs/fsutil/host_file_mapper.go | 5 +-- pkg/sentry/fs/fsutil/host_mappable.go | 19 +++++---- pkg/sentry/fs/fsutil/inode_cached.go | 25 ++++++------ pkg/sentry/fsimpl/gofer/regular_file.go | 27 +++++++------ pkg/sentry/fsimpl/host/BUILD | 1 - pkg/sentry/fsimpl/host/mmap.go | 21 +++++----- pkg/sentry/kernel/shm/BUILD | 1 - pkg/sentry/kernel/shm/shm.go | 3 +- pkg/sentry/kernel/timekeeper.go | 4 +- pkg/sentry/kernel/vdso.go | 6 +-- pkg/sentry/memmap/BUILD | 14 ++++++- pkg/sentry/memmap/memmap.go | 60 +++++++++++++++++++++++++---- pkg/sentry/mm/BUILD | 4 +- pkg/sentry/mm/aio_context.go | 3 +- pkg/sentry/mm/mm.go | 10 ++--- pkg/sentry/mm/pma.go | 25 ++++++------ pkg/sentry/mm/special_mappable.go | 7 ++-- pkg/sentry/pgalloc/BUILD | 10 ++--- pkg/sentry/pgalloc/pgalloc.go | 66 ++++++++++++++++---------------- pkg/sentry/platform/BUILD | 20 +--------- pkg/sentry/platform/kvm/BUILD | 1 + pkg/sentry/platform/kvm/address_space.go | 3 +- pkg/sentry/platform/platform.go | 50 +----------------------- pkg/sentry/platform/ptrace/BUILD | 1 + pkg/sentry/platform/ptrace/subprocess.go | 3 +- 29 files changed, 205 insertions(+), 223 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD index 789369220..5fb419bcd 100644 --- a/pkg/sentry/fs/fsutil/BUILD +++ b/pkg/sentry/fs/fsutil/BUILD @@ -8,7 +8,6 @@ go_template_instance( out = "dirty_set_impl.go", imports = { "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", }, package = "fsutil", prefix = "Dirty", @@ -25,14 +24,14 @@ go_template_instance( name = "frame_ref_set_impl", out = "frame_ref_set_impl.go", imports = { - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", + "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", }, package = "fsutil", prefix = "FrameRef", template = "//pkg/segment:generic_set", types = { "Key": "uint64", - "Range": "platform.FileRange", + "Range": "memmap.FileRange", "Value": "uint64", "Functions": "FrameRefSetFunctions", }, @@ -43,7 +42,6 @@ go_template_instance( out = "file_range_set_impl.go", imports = { "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", }, package = "fsutil", prefix = "FileRange", @@ -86,7 +84,6 @@ go_library( "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", "//pkg/sentry/socket/unix/transport", "//pkg/sentry/usage", "//pkg/state", diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go index c6cd45087..2c9446c1d 100644 --- a/pkg/sentry/fs/fsutil/dirty_set.go +++ b/pkg/sentry/fs/fsutil/dirty_set.go @@ -20,7 +20,6 @@ import ( "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/usermem" ) @@ -159,7 +158,7 @@ func (ds *DirtySet) AllowClean(mr memmap.MappableRange) { // repeatedly until all bytes have been written. max is the true size of the // cached object; offsets beyond max will not be passed to writeAt, even if // they are marked dirty. -func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error { +func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, dirty *DirtySet, max uint64, mem memmap.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error { var changedDirty bool defer func() { if changedDirty { @@ -194,7 +193,7 @@ func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet // successful partial write, SyncDirtyAll will call it repeatedly until all // bytes have been written. max is the true size of the cached object; offsets // beyond max will not be passed to writeAt, even if they are marked dirty. -func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error { +func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max uint64, mem memmap.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error { dseg := dirty.FirstSegment() for dseg.Ok() { if err := syncDirtyRange(ctx, dseg.Range(), cache, max, mem, writeAt); err != nil { @@ -210,7 +209,7 @@ func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max } // Preconditions: mr must be page-aligned. -func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error { +func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, max uint64, mem memmap.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error { for cseg := cache.LowerBoundSegment(mr.Start); cseg.Ok() && cseg.Start() < mr.End; cseg = cseg.NextSegment() { wbr := cseg.Range().Intersect(mr) if max < wbr.Start { diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go index 5643cdac9..bbafebf03 100644 --- a/pkg/sentry/fs/fsutil/file_range_set.go +++ b/pkg/sentry/fs/fsutil/file_range_set.go @@ -23,13 +23,12 @@ import ( "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/usermem" ) // FileRangeSet maps offsets into a memmap.Mappable to offsets into a -// platform.File. It is used to implement Mappables that store data in +// memmap.File. It is used to implement Mappables that store data in // sparsely-allocated memory. // // type FileRangeSet @@ -65,20 +64,20 @@ func (FileRangeSetFunctions) Split(mr memmap.MappableRange, frstart uint64, spli } // FileRange returns the FileRange mapped by seg. -func (seg FileRangeIterator) FileRange() platform.FileRange { +func (seg FileRangeIterator) FileRange() memmap.FileRange { return seg.FileRangeOf(seg.Range()) } // FileRangeOf returns the FileRange mapped by mr. // // Preconditions: seg.Range().IsSupersetOf(mr). mr.Length() != 0. -func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) platform.FileRange { +func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) memmap.FileRange { frstart := seg.Value() + (mr.Start - seg.Start()) - return platform.FileRange{frstart, frstart + mr.Length()} + return memmap.FileRange{frstart, frstart + mr.Length()} } // Fill attempts to ensure that all memmap.Mappable offsets in required are -// mapped to a platform.File offset, by allocating from mf with the given +// mapped to a memmap.File offset, by allocating from mf with the given // memory usage kind and invoking readAt to store data into memory. (If readAt // returns a successful partial read, Fill will call it repeatedly until all // bytes have been read.) EOF is handled consistently with the requirements of @@ -141,7 +140,7 @@ func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.Map } // Drop removes segments for memmap.Mappable offsets in mr, freeing the -// corresponding platform.FileRanges. +// corresponding memmap.FileRanges. // // Preconditions: mr must be page-aligned. func (frs *FileRangeSet) Drop(mr memmap.MappableRange, mf *pgalloc.MemoryFile) { @@ -154,7 +153,7 @@ func (frs *FileRangeSet) Drop(mr memmap.MappableRange, mf *pgalloc.MemoryFile) { } // DropAll removes all segments in mr, freeing the corresponding -// platform.FileRanges. +// memmap.FileRanges. func (frs *FileRangeSet) DropAll(mf *pgalloc.MemoryFile) { for seg := frs.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { mf.DecRef(seg.FileRange()) diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go index dd6f5aba6..a808894df 100644 --- a/pkg/sentry/fs/fsutil/frame_ref_set.go +++ b/pkg/sentry/fs/fsutil/frame_ref_set.go @@ -17,7 +17,7 @@ package fsutil import ( "math" - "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usage" ) @@ -39,7 +39,7 @@ func (FrameRefSetFunctions) ClearValue(val *uint64) { } // Merge implements segment.Functions.Merge. -func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) { +func (FrameRefSetFunctions) Merge(_ memmap.FileRange, val1 uint64, _ memmap.FileRange, val2 uint64) (uint64, bool) { if val1 != val2 { return 0, false } @@ -47,13 +47,13 @@ func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform. } // Split implements segment.Functions.Split. -func (FrameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) { +func (FrameRefSetFunctions) Split(_ memmap.FileRange, val uint64, _ uint64) (uint64, uint64) { return val, val } // IncRefAndAccount adds a reference on the range fr. All newly inserted segments // are accounted as host page cache memory mappings. -func (refs *FrameRefSet) IncRefAndAccount(fr platform.FileRange) { +func (refs *FrameRefSet) IncRefAndAccount(fr memmap.FileRange) { seg, gap := refs.Find(fr.Start) for { switch { @@ -74,7 +74,7 @@ func (refs *FrameRefSet) IncRefAndAccount(fr platform.FileRange) { // DecRefAndAccount removes a reference on the range fr and untracks segments // that are removed from memory accounting. -func (refs *FrameRefSet) DecRefAndAccount(fr platform.FileRange) { +func (refs *FrameRefSet) DecRefAndAccount(fr memmap.FileRange) { seg := refs.FindSegment(fr.Start) for seg.Ok() && seg.Start() < fr.End { diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go index e82afd112..ef0113b52 100644 --- a/pkg/sentry/fs/fsutil/host_file_mapper.go +++ b/pkg/sentry/fs/fsutil/host_file_mapper.go @@ -21,7 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" ) @@ -126,7 +125,7 @@ func (f *HostFileMapper) DecRefOn(mr memmap.MappableRange) { // offsets in fr or until the next call to UnmapAll. // // Preconditions: The caller must hold a reference on all offsets in fr. -func (f *HostFileMapper) MapInternal(fr platform.FileRange, fd int, write bool) (safemem.BlockSeq, error) { +func (f *HostFileMapper) MapInternal(fr memmap.FileRange, fd int, write bool) (safemem.BlockSeq, error) { chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift) f.mapsMu.Lock() defer f.mapsMu.Unlock() @@ -146,7 +145,7 @@ func (f *HostFileMapper) MapInternal(fr platform.FileRange, fd int, write bool) } // Preconditions: f.mapsMu must be locked. -func (f *HostFileMapper) forEachMappingBlockLocked(fr platform.FileRange, fd int, write bool, fn func(safemem.Block)) error { +func (f *HostFileMapper) forEachMappingBlockLocked(fr memmap.FileRange, fd int, write bool, fn func(safemem.Block)) error { prot := syscall.PROT_READ if write { prot |= syscall.PROT_WRITE diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go index 78fec553e..c15d8a946 100644 --- a/pkg/sentry/fs/fsutil/host_mappable.go +++ b/pkg/sentry/fs/fsutil/host_mappable.go @@ -21,18 +21,17 @@ import ( "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" ) -// HostMappable implements memmap.Mappable and platform.File over a +// HostMappable implements memmap.Mappable and memmap.File over a // CachedFileObject. // // Lock order (compare the lock order model in mm/mm.go): // truncateMu ("fs locks") // mu ("memmap.Mappable locks not taken by Translate") -// ("platform.File locks") +// ("memmap.File locks") // backingFile ("CachedFileObject locks") // // +stateify savable @@ -124,24 +123,24 @@ func (h *HostMappable) NotifyChangeFD() error { return nil } -// MapInternal implements platform.File.MapInternal. -func (h *HostMappable) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +// MapInternal implements memmap.File.MapInternal. +func (h *HostMappable) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { return h.hostFileMapper.MapInternal(fr, h.backingFile.FD(), at.Write) } -// FD implements platform.File.FD. +// FD implements memmap.File.FD. func (h *HostMappable) FD() int { return h.backingFile.FD() } -// IncRef implements platform.File.IncRef. -func (h *HostMappable) IncRef(fr platform.FileRange) { +// IncRef implements memmap.File.IncRef. +func (h *HostMappable) IncRef(fr memmap.FileRange) { mr := memmap.MappableRange{Start: fr.Start, End: fr.End} h.hostFileMapper.IncRefOn(mr) } -// DecRef implements platform.File.DecRef. -func (h *HostMappable) DecRef(fr platform.FileRange) { +// DecRef implements memmap.File.DecRef. +func (h *HostMappable) DecRef(fr memmap.FileRange) { mr := memmap.MappableRange{Start: fr.Start, End: fr.End} h.hostFileMapper.DecRefOn(mr) } diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 800c8b4e1..fe8b0b6ac 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -26,7 +26,6 @@ import ( ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" @@ -934,7 +933,7 @@ func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. func (c *CachingInodeOperations) InvalidateUnsavable(ctx context.Context) error { - // Whether we have a host fd (and consequently what platform.File is + // Whether we have a host fd (and consequently what memmap.File is // mapped) can change across save/restore, so invalidate all translations // unconditionally. c.mapsMu.Lock() @@ -999,10 +998,10 @@ func (c *CachingInodeOperations) Evict(ctx context.Context, er pgalloc.Evictable } } -// IncRef implements platform.File.IncRef. This is used when we directly map an -// underlying host fd and CachingInodeOperations is used as the platform.File +// IncRef implements memmap.File.IncRef. This is used when we directly map an +// underlying host fd and CachingInodeOperations is used as the memmap.File // during translation. -func (c *CachingInodeOperations) IncRef(fr platform.FileRange) { +func (c *CachingInodeOperations) IncRef(fr memmap.FileRange) { // Hot path. Avoid defers. c.dataMu.Lock() seg, gap := c.refs.Find(fr.Start) @@ -1024,10 +1023,10 @@ func (c *CachingInodeOperations) IncRef(fr platform.FileRange) { } } -// DecRef implements platform.File.DecRef. This is used when we directly map an -// underlying host fd and CachingInodeOperations is used as the platform.File +// DecRef implements memmap.File.DecRef. This is used when we directly map an +// underlying host fd and CachingInodeOperations is used as the memmap.File // during translation. -func (c *CachingInodeOperations) DecRef(fr platform.FileRange) { +func (c *CachingInodeOperations) DecRef(fr memmap.FileRange) { // Hot path. Avoid defers. c.dataMu.Lock() seg := c.refs.FindSegment(fr.Start) @@ -1046,15 +1045,15 @@ func (c *CachingInodeOperations) DecRef(fr platform.FileRange) { c.dataMu.Unlock() } -// MapInternal implements platform.File.MapInternal. This is used when we +// MapInternal implements memmap.File.MapInternal. This is used when we // directly map an underlying host fd and CachingInodeOperations is used as the -// platform.File during translation. -func (c *CachingInodeOperations) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +// memmap.File during translation. +func (c *CachingInodeOperations) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { return c.hostFileMapper.MapInternal(fr, c.backingFile.FD(), at.Write) } -// FD implements platform.File.FD. This is used when we directly map an -// underlying host fd and CachingInodeOperations is used as the platform.File +// FD implements memmap.File.FD. This is used when we directly map an +// underlying host fd and CachingInodeOperations is used as the memmap.File // during translation. func (c *CachingInodeOperations) FD() int { return c.backingFile.FD() diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 02317a133..09f142cfc 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -29,7 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" @@ -221,12 +220,12 @@ func (fd *regularFileFD) pwriteLocked(ctx context.Context, src usermem.IOSequenc return 0, syserror.EINVAL } mr := memmap.MappableRange{pgstart, pgend} - var freed []platform.FileRange + var freed []memmap.FileRange d.dataMu.Lock() cseg := d.cache.LowerBoundSegment(mr.Start) for cseg.Ok() && cseg.Start() < mr.End { cseg = d.cache.Isolate(cseg, mr) - freed = append(freed, platform.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()}) + freed = append(freed, memmap.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()}) cseg = d.cache.Remove(cseg).NextSegment() } d.dataMu.Unlock() @@ -821,7 +820,7 @@ func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. func (d *dentry) InvalidateUnsavable(ctx context.Context) error { - // Whether we have a host fd (and consequently what platform.File is + // Whether we have a host fd (and consequently what memmap.File is // mapped) can change across save/restore, so invalidate all translations // unconditionally. d.mapsMu.Lock() @@ -869,8 +868,8 @@ func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) { } } -// dentryPlatformFile implements platform.File. It exists solely because dentry -// cannot implement both vfs.DentryImpl.IncRef and platform.File.IncRef. +// dentryPlatformFile implements memmap.File. It exists solely because dentry +// cannot implement both vfs.DentryImpl.IncRef and memmap.File.IncRef. // // dentryPlatformFile is only used when a host FD representing the remote file // is available (i.e. dentry.handle.fd >= 0), and that FD is used for @@ -878,7 +877,7 @@ func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) { type dentryPlatformFile struct { *dentry - // fdRefs counts references on platform.File offsets. fdRefs is protected + // fdRefs counts references on memmap.File offsets. fdRefs is protected // by dentry.dataMu. fdRefs fsutil.FrameRefSet @@ -890,29 +889,29 @@ type dentryPlatformFile struct { hostFileMapperInitOnce sync.Once } -// IncRef implements platform.File.IncRef. -func (d *dentryPlatformFile) IncRef(fr platform.FileRange) { +// IncRef implements memmap.File.IncRef. +func (d *dentryPlatformFile) IncRef(fr memmap.FileRange) { d.dataMu.Lock() d.fdRefs.IncRefAndAccount(fr) d.dataMu.Unlock() } -// DecRef implements platform.File.DecRef. -func (d *dentryPlatformFile) DecRef(fr platform.FileRange) { +// DecRef implements memmap.File.DecRef. +func (d *dentryPlatformFile) DecRef(fr memmap.FileRange) { d.dataMu.Lock() d.fdRefs.DecRefAndAccount(fr) d.dataMu.Unlock() } -// MapInternal implements platform.File.MapInternal. -func (d *dentryPlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +// MapInternal implements memmap.File.MapInternal. +func (d *dentryPlatformFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { d.handleMu.RLock() bs, err := d.hostFileMapper.MapInternal(fr, int(d.handle.fd), at.Write) d.handleMu.RUnlock() return bs, err } -// FD implements platform.File.FD. +// FD implements memmap.File.FD. func (d *dentryPlatformFile) FD() int { d.handleMu.RLock() fd := d.handle.fd diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD index e86fbe2d5..bd701bbc7 100644 --- a/pkg/sentry/fsimpl/host/BUILD +++ b/pkg/sentry/fsimpl/host/BUILD @@ -34,7 +34,6 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", - "//pkg/sentry/platform", "//pkg/sentry/socket/control", "//pkg/sentry/socket/unix", "//pkg/sentry/socket/unix/transport", diff --git a/pkg/sentry/fsimpl/host/mmap.go b/pkg/sentry/fsimpl/host/mmap.go index 8545a82f0..65d3af38c 100644 --- a/pkg/sentry/fsimpl/host/mmap.go +++ b/pkg/sentry/fsimpl/host/mmap.go @@ -19,13 +19,12 @@ import ( "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" ) -// inodePlatformFile implements platform.File. It exists solely because inode -// cannot implement both kernfs.Inode.IncRef and platform.File.IncRef. +// inodePlatformFile implements memmap.File. It exists solely because inode +// cannot implement both kernfs.Inode.IncRef and memmap.File.IncRef. // // inodePlatformFile should only be used if inode.canMap is true. type inodePlatformFile struct { @@ -34,7 +33,7 @@ type inodePlatformFile struct { // fdRefsMu protects fdRefs. fdRefsMu sync.Mutex - // fdRefs counts references on platform.File offsets. It is used solely for + // fdRefs counts references on memmap.File offsets. It is used solely for // memory accounting. fdRefs fsutil.FrameRefSet @@ -45,32 +44,32 @@ type inodePlatformFile struct { fileMapperInitOnce sync.Once } -// IncRef implements platform.File.IncRef. +// IncRef implements memmap.File.IncRef. // // Precondition: i.inode.canMap must be true. -func (i *inodePlatformFile) IncRef(fr platform.FileRange) { +func (i *inodePlatformFile) IncRef(fr memmap.FileRange) { i.fdRefsMu.Lock() i.fdRefs.IncRefAndAccount(fr) i.fdRefsMu.Unlock() } -// DecRef implements platform.File.DecRef. +// DecRef implements memmap.File.DecRef. // // Precondition: i.inode.canMap must be true. -func (i *inodePlatformFile) DecRef(fr platform.FileRange) { +func (i *inodePlatformFile) DecRef(fr memmap.FileRange) { i.fdRefsMu.Lock() i.fdRefs.DecRefAndAccount(fr) i.fdRefsMu.Unlock() } -// MapInternal implements platform.File.MapInternal. +// MapInternal implements memmap.File.MapInternal. // // Precondition: i.inode.canMap must be true. -func (i *inodePlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +func (i *inodePlatformFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { return i.fileMapper.MapInternal(fr, i.hostFD, at.Write) } -// FD implements platform.File.FD. +// FD implements memmap.File.FD. func (i *inodePlatformFile) FD() int { return i.hostFD } diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD index bfd779837..c211fc8d0 100644 --- a/pkg/sentry/kernel/shm/BUILD +++ b/pkg/sentry/kernel/shm/BUILD @@ -20,7 +20,6 @@ go_library( "//pkg/sentry/kernel/time", "//pkg/sentry/memmap", "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", "//pkg/sentry/usage", "//pkg/sync", "//pkg/syserror", diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index f66cfcc7f..55b4c2cdb 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -45,7 +45,6 @@ import ( ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -370,7 +369,7 @@ type Shm struct { // fr is the offset into mfp.MemoryFile() that backs this contents of this // segment. Immutable. - fr platform.FileRange + fr memmap.FileRange // mu protects all fields below. mu sync.Mutex `state:"nosave"` diff --git a/pkg/sentry/kernel/timekeeper.go b/pkg/sentry/kernel/timekeeper.go index 5f3908d8b..7c4fefb16 100644 --- a/pkg/sentry/kernel/timekeeper.go +++ b/pkg/sentry/kernel/timekeeper.go @@ -21,8 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/log" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" sentrytime "gvisor.dev/gvisor/pkg/sentry/time" "gvisor.dev/gvisor/pkg/sync" ) @@ -90,7 +90,7 @@ type Timekeeper struct { // NewTimekeeper does not take ownership of paramPage. // // SetClocks must be called on the returned Timekeeper before it is usable. -func NewTimekeeper(mfp pgalloc.MemoryFileProvider, paramPage platform.FileRange) (*Timekeeper, error) { +func NewTimekeeper(mfp pgalloc.MemoryFileProvider, paramPage memmap.FileRange) (*Timekeeper, error) { return &Timekeeper{ params: NewVDSOParamPage(mfp, paramPage), }, nil diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go index f1b3c212c..290c32466 100644 --- a/pkg/sentry/kernel/vdso.go +++ b/pkg/sentry/kernel/vdso.go @@ -19,8 +19,8 @@ import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/usermem" ) @@ -58,7 +58,7 @@ type vdsoParams struct { type VDSOParamPage struct { // The parameter page is fr, allocated from mfp.MemoryFile(). mfp pgalloc.MemoryFileProvider - fr platform.FileRange + fr memmap.FileRange // seq is the current sequence count written to the page. // @@ -81,7 +81,7 @@ type VDSOParamPage struct { // * VDSOParamPage must be the only writer to fr. // // * mfp.MemoryFile().MapInternal(fr) must return a single safemem.Block. -func NewVDSOParamPage(mfp pgalloc.MemoryFileProvider, fr platform.FileRange) *VDSOParamPage { +func NewVDSOParamPage(mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *VDSOParamPage { return &VDSOParamPage{mfp: mfp, fr: fr} } diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD index a98b66de1..2c95669cd 100644 --- a/pkg/sentry/memmap/BUILD +++ b/pkg/sentry/memmap/BUILD @@ -28,9 +28,21 @@ go_template_instance( }, ) +go_template_instance( + name = "file_range", + out = "file_range.go", + package = "memmap", + prefix = "File", + template = "//pkg/segment:generic_range", + types = { + "T": "uint64", + }, +) + go_library( name = "memmap", srcs = [ + "file_range.go", "mappable_range.go", "mapping_set.go", "mapping_set_impl.go", @@ -40,7 +52,7 @@ go_library( deps = [ "//pkg/context", "//pkg/log", - "//pkg/sentry/platform", + "//pkg/safemem", "//pkg/syserror", "//pkg/usermem", ], diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go index c6db9fc8f..c188f6c29 100644 --- a/pkg/sentry/memmap/memmap.go +++ b/pkg/sentry/memmap/memmap.go @@ -19,12 +19,12 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/usermem" ) // Mappable represents a memory-mappable object, a mutable mapping from uint64 -// offsets to (platform.File, uint64 File offset) pairs. +// offsets to (File, uint64 File offset) pairs. // // See mm/mm.go for Mappable's place in the lock order. // @@ -74,7 +74,7 @@ type Mappable interface { // Translations are valid until invalidated by a callback to // MappingSpace.Invalidate or until the caller removes its mapping of the // translated range. Mappable implementations must ensure that at least one - // reference is held on all pages in a platform.File that may be the result + // reference is held on all pages in a File that may be the result // of a valid Translation. // // Preconditions: required.Length() > 0. optional.IsSupersetOf(required). @@ -100,7 +100,7 @@ type Translation struct { Source MappableRange // File is the mapped file. - File platform.File + File File // Offset is the offset into File at which this Translation begins. Offset uint64 @@ -110,9 +110,9 @@ type Translation struct { Perms usermem.AccessType } -// FileRange returns the platform.FileRange represented by t. -func (t Translation) FileRange() platform.FileRange { - return platform.FileRange{t.Offset, t.Offset + t.Source.Length()} +// FileRange returns the FileRange represented by t. +func (t Translation) FileRange() FileRange { + return FileRange{t.Offset, t.Offset + t.Source.Length()} } // CheckTranslateResult returns an error if (ts, terr) does not satisfy all @@ -361,3 +361,49 @@ type MMapOpts struct { // TODO(jamieliu): Replace entirely with MappingIdentity? Hint string } + +// File represents a host file that may be mapped into an platform.AddressSpace. +type File interface { + // All pages in a File are reference-counted. + + // IncRef increments the reference count on all pages in fr. + // + // Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() > + // 0. At least one reference must be held on all pages in fr. (The File + // interface does not provide a way to acquire an initial reference; + // implementors may define mechanisms for doing so.) + IncRef(fr FileRange) + + // DecRef decrements the reference count on all pages in fr. + // + // Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() > + // 0. At least one reference must be held on all pages in fr. + DecRef(fr FileRange) + + // MapInternal returns a mapping of the given file offsets in the invoking + // process' address space for reading and writing. + // + // Note that fr.Start and fr.End need not be page-aligned. + // + // Preconditions: fr.Length() > 0. At least one reference must be held on + // all pages in fr. + // + // Postconditions: The returned mapping is valid as long as at least one + // reference is held on the mapped pages. + MapInternal(fr FileRange, at usermem.AccessType) (safemem.BlockSeq, error) + + // FD returns the file descriptor represented by the File. + // + // The only permitted operation on the returned file descriptor is to map + // pages from it consistent with the requirements of AddressSpace.MapFile. + FD() int +} + +// FileRange represents a range of uint64 offsets into a File. +// +// type FileRange + +// String implements fmt.Stringer.String. +func (fr FileRange) String() string { + return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End) +} diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD index a036ce53c..f9d0837a1 100644 --- a/pkg/sentry/mm/BUILD +++ b/pkg/sentry/mm/BUILD @@ -7,14 +7,14 @@ go_template_instance( name = "file_refcount_set", out = "file_refcount_set.go", imports = { - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", + "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", }, package = "mm", prefix = "fileRefcount", template = "//pkg/segment:generic_set", types = { "Key": "uint64", - "Range": "platform.FileRange", + "Range": "memmap.FileRange", "Value": "int32", "Functions": "fileRefcountSetFunctions", }, diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 379148903..1999ec706 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -20,7 +20,6 @@ import ( "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -243,7 +242,7 @@ type aioMappable struct { refs.AtomicRefCount mfp pgalloc.MemoryFileProvider - fr platform.FileRange + fr memmap.FileRange } var aioRingBufferSize = uint64(usermem.Addr(linux.AIORingSize).MustRoundUp()) diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index 6db7c3d40..3e85964e4 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -25,7 +25,7 @@ // Locks taken by memmap.Mappable.Translate // mm.privateRefs.mu // platform.AddressSpace locks -// platform.File locks +// memmap.File locks // mm.aioManager.mu // mm.AIOContext.mu // @@ -396,7 +396,7 @@ type pma struct { // file is the file mapped by this pma. Only pmas for which file == // MemoryManager.mfp.MemoryFile() may be saved. pmas hold a reference to // the corresponding file range while they exist. - file platform.File `state:"nosave"` + file memmap.File `state:"nosave"` // off is the offset into file at which this pma begins. // @@ -436,7 +436,7 @@ type pma struct { private bool // If internalMappings is not empty, it is the cached return value of - // file.MapInternal for the platform.FileRange mapped by this pma. + // file.MapInternal for the memmap.FileRange mapped by this pma. internalMappings safemem.BlockSeq `state:"nosave"` } @@ -469,10 +469,10 @@ func (fileRefcountSetFunctions) MaxKey() uint64 { func (fileRefcountSetFunctions) ClearValue(_ *int32) { } -func (fileRefcountSetFunctions) Merge(_ platform.FileRange, rc1 int32, _ platform.FileRange, rc2 int32) (int32, bool) { +func (fileRefcountSetFunctions) Merge(_ memmap.FileRange, rc1 int32, _ memmap.FileRange, rc2 int32) (int32, bool) { return rc1, rc1 == rc2 } -func (fileRefcountSetFunctions) Split(_ platform.FileRange, rc int32, _ uint64) (int32, int32) { +func (fileRefcountSetFunctions) Split(_ memmap.FileRange, rc int32, _ uint64) (int32, int32) { return rc, rc } diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go index 62e4c20af..930ec895f 100644 --- a/pkg/sentry/mm/pma.go +++ b/pkg/sentry/mm/pma.go @@ -21,7 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/safecopy" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -604,7 +603,7 @@ func (mm *MemoryManager) invalidateLocked(ar usermem.AddrRange, invalidatePrivat } } -// Pin returns the platform.File ranges currently mapped by addresses in ar in +// Pin returns the memmap.File ranges currently mapped by addresses in ar in // mm, acquiring a reference on the returned ranges which the caller must // release by calling Unpin. If not all addresses are mapped, Pin returns a // non-nil error. Note that Pin may return both a non-empty slice of @@ -674,15 +673,15 @@ type PinnedRange struct { Source usermem.AddrRange // File is the mapped file. - File platform.File + File memmap.File // Offset is the offset into File at which this PinnedRange begins. Offset uint64 } -// FileRange returns the platform.File offsets mapped by pr. -func (pr PinnedRange) FileRange() platform.FileRange { - return platform.FileRange{pr.Offset, pr.Offset + uint64(pr.Source.Length())} +// FileRange returns the memmap.File offsets mapped by pr. +func (pr PinnedRange) FileRange() memmap.FileRange { + return memmap.FileRange{pr.Offset, pr.Offset + uint64(pr.Source.Length())} } // Unpin releases the reference held by prs. @@ -857,7 +856,7 @@ func (mm *MemoryManager) vecInternalMappingsLocked(ars usermem.AddrRangeSeq) saf } // incPrivateRef acquires a reference on private pages in fr. -func (mm *MemoryManager) incPrivateRef(fr platform.FileRange) { +func (mm *MemoryManager) incPrivateRef(fr memmap.FileRange) { mm.privateRefs.mu.Lock() defer mm.privateRefs.mu.Unlock() refSet := &mm.privateRefs.refs @@ -878,8 +877,8 @@ func (mm *MemoryManager) incPrivateRef(fr platform.FileRange) { } // decPrivateRef releases a reference on private pages in fr. -func (mm *MemoryManager) decPrivateRef(fr platform.FileRange) { - var freed []platform.FileRange +func (mm *MemoryManager) decPrivateRef(fr memmap.FileRange) { + var freed []memmap.FileRange mm.privateRefs.mu.Lock() refSet := &mm.privateRefs.refs @@ -951,7 +950,7 @@ func (pmaSetFunctions) Merge(ar1 usermem.AddrRange, pma1 pma, ar2 usermem.AddrRa // Discard internal mappings instead of trying to merge them, since merging // them requires an allocation and getting them again from the - // platform.File might not. + // memmap.File might not. pma1.internalMappings = safemem.BlockSeq{} return pma1, true } @@ -1012,12 +1011,12 @@ func (pseg pmaIterator) getInternalMappingsLocked() error { return nil } -func (pseg pmaIterator) fileRange() platform.FileRange { +func (pseg pmaIterator) fileRange() memmap.FileRange { return pseg.fileRangeOf(pseg.Range()) } // Preconditions: pseg.Range().IsSupersetOf(ar). ar.Length != 0. -func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) platform.FileRange { +func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) memmap.FileRange { if checkInvariants { if !pseg.Ok() { panic("terminal pma iterator") @@ -1032,5 +1031,5 @@ func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) platform.FileRange { pma := pseg.ValuePtr() pstart := pseg.Start() - return platform.FileRange{pma.off + uint64(ar.Start-pstart), pma.off + uint64(ar.End-pstart)} + return memmap.FileRange{pma.off + uint64(ar.Start-pstart), pma.off + uint64(ar.End-pstart)} } diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index 9ad52082d..0e142fb11 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -19,7 +19,6 @@ import ( "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -35,7 +34,7 @@ type SpecialMappable struct { refs.AtomicRefCount mfp pgalloc.MemoryFileProvider - fr platform.FileRange + fr memmap.FileRange name string } @@ -44,7 +43,7 @@ type SpecialMappable struct { // SpecialMappable will use the given name in /proc/[pid]/maps. // // Preconditions: fr.Length() != 0. -func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr platform.FileRange) *SpecialMappable { +func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *SpecialMappable { m := SpecialMappable{mfp: mfp, fr: fr, name: name} m.EnableLeakCheck("mm.SpecialMappable") return &m @@ -126,7 +125,7 @@ func (m *SpecialMappable) MemoryFileProvider() pgalloc.MemoryFileProvider { // FileRange returns the offsets into MemoryFileProvider().MemoryFile() that // store the SpecialMappable's contents. -func (m *SpecialMappable) FileRange() platform.FileRange { +func (m *SpecialMappable) FileRange() memmap.FileRange { return m.fr } diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD index e1fcb175f..7a3311a70 100644 --- a/pkg/sentry/pgalloc/BUILD +++ b/pkg/sentry/pgalloc/BUILD @@ -36,14 +36,14 @@ go_template_instance( "trackGaps": "1", }, imports = { - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", + "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", }, package = "pgalloc", prefix = "usage", template = "//pkg/segment:generic_set", types = { "Key": "uint64", - "Range": "platform.FileRange", + "Range": "memmap.FileRange", "Value": "usageInfo", "Functions": "usageSetFunctions", }, @@ -56,14 +56,14 @@ go_template_instance( "minDegree": "10", }, imports = { - "platform": "gvisor.dev/gvisor/pkg/sentry/platform", + "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap", }, package = "pgalloc", prefix = "reclaim", template = "//pkg/segment:generic_set", types = { "Key": "uint64", - "Range": "platform.FileRange", + "Range": "memmap.FileRange", "Value": "reclaimSetValue", "Functions": "reclaimSetFunctions", }, @@ -89,7 +89,7 @@ go_library( "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/hostmm", - "//pkg/sentry/platform", + "//pkg/sentry/memmap", "//pkg/sentry/usage", "//pkg/state", "//pkg/state/wire", diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index afab97c0a..3243d7214 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -33,14 +33,14 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/hostmm" - "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usage" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) -// MemoryFile is a platform.File whose pages may be allocated to arbitrary +// MemoryFile is a memmap.File whose pages may be allocated to arbitrary // users. type MemoryFile struct { // opts holds options passed to NewMemoryFile. opts is immutable. @@ -372,7 +372,7 @@ func (f *MemoryFile) Destroy() { // to Allocate. // // Preconditions: length must be page-aligned and non-zero. -func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.FileRange, error) { +func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (memmap.FileRange, error) { if length == 0 || length%usermem.PageSize != 0 { panic(fmt.Sprintf("invalid allocation length: %#x", length)) } @@ -390,7 +390,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi // Find a range in the underlying file. fr, ok := findAvailableRange(&f.usage, f.fileSize, length, alignment) if !ok { - return platform.FileRange{}, syserror.ENOMEM + return memmap.FileRange{}, syserror.ENOMEM } // Expand the file if needed. @@ -398,7 +398,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi // Round the new file size up to be chunk-aligned. newFileSize := (int64(fr.End) + chunkMask) &^ chunkMask if err := f.file.Truncate(newFileSize); err != nil { - return platform.FileRange{}, err + return memmap.FileRange{}, err } f.fileSize = newFileSize f.mappingsMu.Lock() @@ -416,7 +416,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi bs[i] = 0 } }); err != nil { - return platform.FileRange{}, err + return memmap.FileRange{}, err } } if !f.usage.Add(fr, usageInfo{ @@ -439,7 +439,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi // space for mappings to be allocated downwards. // // Precondition: alignment must be a power of 2. -func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint64) (platform.FileRange, bool) { +func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint64) (memmap.FileRange, bool) { alignmentMask := alignment - 1 // Search for space in existing gaps, starting at the current end of the @@ -461,7 +461,7 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6 break } if start := unalignedStart &^ alignmentMask; start >= gap.Start() { - return platform.FileRange{start, start + length}, true + return memmap.FileRange{start, start + length}, true } gap = gap.PrevLargeEnoughGap(length) @@ -475,7 +475,7 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6 min = (min + alignmentMask) &^ alignmentMask if min+length < min { // Overflow: allocation would exceed the range of uint64. - return platform.FileRange{}, false + return memmap.FileRange{}, false } // Determine the minimum file size required to fit this allocation at its end. @@ -484,7 +484,7 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6 if newFileSize <= fileSize { if fileSize != 0 { // Overflow: allocation would exceed the range of int64. - return platform.FileRange{}, false + return memmap.FileRange{}, false } newFileSize = chunkSize } @@ -496,7 +496,7 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6 continue } if start := unalignedStart &^ alignmentMask; start >= min { - return platform.FileRange{start, start + length}, true + return memmap.FileRange{start, start + length}, true } } } @@ -508,22 +508,22 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6 // by r.ReadToBlocks(), it returns that error. // // Preconditions: length > 0. length must be page-aligned. -func (f *MemoryFile) AllocateAndFill(length uint64, kind usage.MemoryKind, r safemem.Reader) (platform.FileRange, error) { +func (f *MemoryFile) AllocateAndFill(length uint64, kind usage.MemoryKind, r safemem.Reader) (memmap.FileRange, error) { fr, err := f.Allocate(length, kind) if err != nil { - return platform.FileRange{}, err + return memmap.FileRange{}, err } dsts, err := f.MapInternal(fr, usermem.Write) if err != nil { f.DecRef(fr) - return platform.FileRange{}, err + return memmap.FileRange{}, err } n, err := safemem.ReadFullToBlocks(r, dsts) un := uint64(usermem.Addr(n).RoundDown()) if un < length { // Free unused memory and update fr to contain only the memory that is // still allocated. - f.DecRef(platform.FileRange{fr.Start + un, fr.End}) + f.DecRef(memmap.FileRange{fr.Start + un, fr.End}) fr.End = fr.Start + un } return fr, err @@ -540,7 +540,7 @@ const ( // will read zeroes. // // Preconditions: fr.Length() > 0. -func (f *MemoryFile) Decommit(fr platform.FileRange) error { +func (f *MemoryFile) Decommit(fr memmap.FileRange) error { if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 { panic(fmt.Sprintf("invalid range: %v", fr)) } @@ -560,7 +560,7 @@ func (f *MemoryFile) Decommit(fr platform.FileRange) error { return nil } -func (f *MemoryFile) markDecommitted(fr platform.FileRange) { +func (f *MemoryFile) markDecommitted(fr memmap.FileRange) { f.mu.Lock() defer f.mu.Unlock() // Since we're changing the knownCommitted attribute, we need to merge @@ -581,8 +581,8 @@ func (f *MemoryFile) markDecommitted(fr platform.FileRange) { f.usage.MergeRange(fr) } -// IncRef implements platform.File.IncRef. -func (f *MemoryFile) IncRef(fr platform.FileRange) { +// IncRef implements memmap.File.IncRef. +func (f *MemoryFile) IncRef(fr memmap.FileRange) { if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 { panic(fmt.Sprintf("invalid range: %v", fr)) } @@ -600,8 +600,8 @@ func (f *MemoryFile) IncRef(fr platform.FileRange) { f.usage.MergeAdjacent(fr) } -// DecRef implements platform.File.DecRef. -func (f *MemoryFile) DecRef(fr platform.FileRange) { +// DecRef implements memmap.File.DecRef. +func (f *MemoryFile) DecRef(fr memmap.FileRange) { if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 { panic(fmt.Sprintf("invalid range: %v", fr)) } @@ -637,8 +637,8 @@ func (f *MemoryFile) DecRef(fr platform.FileRange) { } } -// MapInternal implements platform.File.MapInternal. -func (f *MemoryFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { +// MapInternal implements memmap.File.MapInternal. +func (f *MemoryFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) { if !fr.WellFormed() || fr.Length() == 0 { panic(fmt.Sprintf("invalid range: %v", fr)) } @@ -664,7 +664,7 @@ func (f *MemoryFile) MapInternal(fr platform.FileRange, at usermem.AccessType) ( // forEachMappingSlice invokes fn on a sequence of byte slices that // collectively map all bytes in fr. -func (f *MemoryFile) forEachMappingSlice(fr platform.FileRange, fn func([]byte)) error { +func (f *MemoryFile) forEachMappingSlice(fr memmap.FileRange, fn func([]byte)) error { mappings := f.mappings.Load().([]uintptr) for chunkStart := fr.Start &^ chunkMask; chunkStart < fr.End; chunkStart += chunkSize { chunk := int(chunkStart >> chunkShift) @@ -944,7 +944,7 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func( continue case !populated && populatedRun: // Finish the run by changing this segment. - runRange := platform.FileRange{ + runRange := memmap.FileRange{ Start: r.Start + uint64(populatedRunStart*usermem.PageSize), End: r.Start + uint64(i*usermem.PageSize), } @@ -1009,7 +1009,7 @@ func (f *MemoryFile) File() *os.File { return f.file } -// FD implements platform.File.FD. +// FD implements memmap.File.FD. func (f *MemoryFile) FD() int { return int(f.file.Fd()) } @@ -1090,13 +1090,13 @@ func (f *MemoryFile) runReclaim() { // // Note that there returned range will be removed from tracking. It // must be reclaimed (removed from f.usage) at this point. -func (f *MemoryFile) findReclaimable() (platform.FileRange, bool) { +func (f *MemoryFile) findReclaimable() (memmap.FileRange, bool) { f.mu.Lock() defer f.mu.Unlock() for { for { if f.destroyed { - return platform.FileRange{}, false + return memmap.FileRange{}, false } if f.reclaimable { break @@ -1120,7 +1120,7 @@ func (f *MemoryFile) findReclaimable() (platform.FileRange, bool) { } } -func (f *MemoryFile) markReclaimed(fr platform.FileRange) { +func (f *MemoryFile) markReclaimed(fr memmap.FileRange) { f.mu.Lock() defer f.mu.Unlock() seg := f.usage.FindSegment(fr.Start) @@ -1222,11 +1222,11 @@ func (usageSetFunctions) MaxKey() uint64 { func (usageSetFunctions) ClearValue(val *usageInfo) { } -func (usageSetFunctions) Merge(_ platform.FileRange, val1 usageInfo, _ platform.FileRange, val2 usageInfo) (usageInfo, bool) { +func (usageSetFunctions) Merge(_ memmap.FileRange, val1 usageInfo, _ memmap.FileRange, val2 usageInfo) (usageInfo, bool) { return val1, val1 == val2 } -func (usageSetFunctions) Split(_ platform.FileRange, val usageInfo, _ uint64) (usageInfo, usageInfo) { +func (usageSetFunctions) Split(_ memmap.FileRange, val usageInfo, _ uint64) (usageInfo, usageInfo) { return val, val } @@ -1270,10 +1270,10 @@ func (reclaimSetFunctions) MaxKey() uint64 { func (reclaimSetFunctions) ClearValue(val *reclaimSetValue) { } -func (reclaimSetFunctions) Merge(_ platform.FileRange, _ reclaimSetValue, _ platform.FileRange, _ reclaimSetValue) (reclaimSetValue, bool) { +func (reclaimSetFunctions) Merge(_ memmap.FileRange, _ reclaimSetValue, _ memmap.FileRange, _ reclaimSetValue) (reclaimSetValue, bool) { return reclaimSetValue{}, true } -func (reclaimSetFunctions) Split(_ platform.FileRange, _ reclaimSetValue, _ uint64) (reclaimSetValue, reclaimSetValue) { +func (reclaimSetFunctions) Split(_ memmap.FileRange, _ reclaimSetValue, _ uint64) (reclaimSetValue, reclaimSetValue) { return reclaimSetValue{}, reclaimSetValue{} } diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD index 453241eca..209b28053 100644 --- a/pkg/sentry/platform/BUILD +++ b/pkg/sentry/platform/BUILD @@ -1,39 +1,21 @@ load("//tools:defs.bzl", "go_library") -load("//tools/go_generics:defs.bzl", "go_template_instance") package(licenses = ["notice"]) -go_template_instance( - name = "file_range", - out = "file_range.go", - package = "platform", - prefix = "File", - template = "//pkg/segment:generic_range", - types = { - "T": "uint64", - }, -) - go_library( name = "platform", srcs = [ "context.go", - "file_range.go", "mmap_min_addr.go", "platform.go", ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", - "//pkg/atomicbitops", "//pkg/context", - "//pkg/log", - "//pkg/safecopy", - "//pkg/safemem", "//pkg/seccomp", "//pkg/sentry/arch", - "//pkg/sentry/usage", - "//pkg/syserror", + "//pkg/sentry/memmap", "//pkg/usermem", ], ) diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 10a10bfe2..b5d27a72a 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -47,6 +47,7 @@ go_library( "//pkg/safecopy", "//pkg/seccomp", "//pkg/sentry/arch", + "//pkg/sentry/memmap", "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", "//pkg/sentry/platform/ring0", diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index faf1d5e1c..98a3e539d 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -18,6 +18,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.dev/gvisor/pkg/sync" @@ -150,7 +151,7 @@ func (as *addressSpace) mapLocked(addr usermem.Addr, m hostMapEntry, at usermem. } // MapFile implements platform.AddressSpace.MapFile. -func (as *addressSpace) MapFile(addr usermem.Addr, f platform.File, fr platform.FileRange, at usermem.AccessType, precommit bool) error { +func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error { as.mu.Lock() defer as.mu.Unlock() diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go index 171513f3f..4b13eec30 100644 --- a/pkg/sentry/platform/platform.go +++ b/pkg/sentry/platform/platform.go @@ -22,9 +22,9 @@ import ( "os" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/seccomp" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/usermem" ) @@ -207,7 +207,7 @@ type AddressSpace interface { // Preconditions: addr and fr must be page-aligned. fr.Length() > 0. // at.Any() == true. At least one reference must be held on all pages in // fr, and must continue to be held as long as pages are mapped. - MapFile(addr usermem.Addr, f File, fr FileRange, at usermem.AccessType, precommit bool) error + MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error // Unmap unmaps the given range. // @@ -310,52 +310,6 @@ func (f SegmentationFault) Error() string { return fmt.Sprintf("segmentation fault at %#x", f.Addr) } -// File represents a host file that may be mapped into an AddressSpace. -type File interface { - // All pages in a File are reference-counted. - - // IncRef increments the reference count on all pages in fr. - // - // Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() > - // 0. At least one reference must be held on all pages in fr. (The File - // interface does not provide a way to acquire an initial reference; - // implementors may define mechanisms for doing so.) - IncRef(fr FileRange) - - // DecRef decrements the reference count on all pages in fr. - // - // Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() > - // 0. At least one reference must be held on all pages in fr. - DecRef(fr FileRange) - - // MapInternal returns a mapping of the given file offsets in the invoking - // process' address space for reading and writing. - // - // Note that fr.Start and fr.End need not be page-aligned. - // - // Preconditions: fr.Length() > 0. At least one reference must be held on - // all pages in fr. - // - // Postconditions: The returned mapping is valid as long as at least one - // reference is held on the mapped pages. - MapInternal(fr FileRange, at usermem.AccessType) (safemem.BlockSeq, error) - - // FD returns the file descriptor represented by the File. - // - // The only permitted operation on the returned file descriptor is to map - // pages from it consistent with the requirements of AddressSpace.MapFile. - FD() int -} - -// FileRange represents a range of uint64 offsets into a File. -// -// type FileRange - -// String implements fmt.Stringer.String. -func (fr FileRange) String() string { - return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End) -} - // Requirements is used to specify platform specific requirements. type Requirements struct { // RequiresCurrentPIDNS indicates that the sandbox has to be started in the diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD index 30402c2df..29fd23cc3 100644 --- a/pkg/sentry/platform/ptrace/BUILD +++ b/pkg/sentry/platform/ptrace/BUILD @@ -30,6 +30,7 @@ go_library( "//pkg/seccomp", "//pkg/sentry/arch", "//pkg/sentry/hostcpu", + "//pkg/sentry/memmap", "//pkg/sentry/platform", "//pkg/sentry/platform/interrupt", "//pkg/sync", diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go index 2389423b0..c990f3454 100644 --- a/pkg/sentry/platform/ptrace/subprocess.go +++ b/pkg/sentry/platform/ptrace/subprocess.go @@ -24,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/procid" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" @@ -616,7 +617,7 @@ func (s *subprocess) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintp } // MapFile implements platform.AddressSpace.MapFile. -func (s *subprocess) MapFile(addr usermem.Addr, f platform.File, fr platform.FileRange, at usermem.AccessType, precommit bool) error { +func (s *subprocess) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error { var flags int if precommit { flags |= syscall.MAP_POPULATE -- cgit v1.2.3 From b2ae7ea1bb207eddadd7962080e7bd0b8634db96 Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Mon, 3 Aug 2020 13:33:47 -0700 Subject: Plumbing context.Context to DecRef() and Release(). context is passed to DecRef() and Release() which is needed for SO_LINGER implementation. PiperOrigin-RevId: 324672584 --- pkg/refs/BUILD | 6 +- pkg/refs/refcounter.go | 19 +-- pkg/refs/refcounter_test.go | 38 ++--- pkg/sentry/control/proc.go | 2 +- pkg/sentry/devices/memdev/full.go | 2 +- pkg/sentry/devices/memdev/null.go | 2 +- pkg/sentry/devices/memdev/random.go | 2 +- pkg/sentry/devices/memdev/zero.go | 2 +- pkg/sentry/devices/ttydev/ttydev.go | 2 +- pkg/sentry/devices/tundev/tundev.go | 4 +- pkg/sentry/fdimport/fdimport.go | 8 +- pkg/sentry/fs/copy_up.go | 12 +- pkg/sentry/fs/copy_up_test.go | 4 +- pkg/sentry/fs/dev/net_tun.go | 4 +- pkg/sentry/fs/dirent.go | 110 +++++++------- pkg/sentry/fs/dirent_cache.go | 3 +- pkg/sentry/fs/dirent_refs_test.go | 16 +-- pkg/sentry/fs/dirent_state.go | 3 +- pkg/sentry/fs/fdpipe/pipe.go | 2 +- pkg/sentry/fs/fdpipe/pipe_opener_test.go | 16 +-- pkg/sentry/fs/fdpipe/pipe_test.go | 18 +-- pkg/sentry/fs/file.go | 10 +- pkg/sentry/fs/file_operations.go | 2 +- pkg/sentry/fs/file_overlay.go | 22 +-- pkg/sentry/fs/fsutil/file.go | 4 +- pkg/sentry/fs/gofer/file.go | 4 +- pkg/sentry/fs/gofer/gofer_test.go | 8 +- pkg/sentry/fs/gofer/handles.go | 5 +- pkg/sentry/fs/gofer/inode.go | 5 +- pkg/sentry/fs/gofer/path.go | 6 +- pkg/sentry/fs/gofer/session.go | 16 +-- pkg/sentry/fs/gofer/session_state.go | 3 +- pkg/sentry/fs/gofer/socket.go | 6 +- pkg/sentry/fs/host/control.go | 2 +- pkg/sentry/fs/host/file.go | 4 +- pkg/sentry/fs/host/inode_test.go | 2 +- pkg/sentry/fs/host/socket.go | 10 +- pkg/sentry/fs/host/socket_test.go | 38 ++--- pkg/sentry/fs/host/tty.go | 4 +- pkg/sentry/fs/host/wait_test.go | 2 +- pkg/sentry/fs/inode.go | 11 +- pkg/sentry/fs/inode_inotify.go | 5 +- pkg/sentry/fs/inode_overlay.go | 30 ++-- pkg/sentry/fs/inode_overlay_test.go | 8 +- pkg/sentry/fs/inotify.go | 8 +- pkg/sentry/fs/inotify_watch.go | 9 +- pkg/sentry/fs/mount.go | 12 +- pkg/sentry/fs/mount_overlay.go | 6 +- pkg/sentry/fs/mount_test.go | 29 ++-- pkg/sentry/fs/mounts.go | 30 ++-- pkg/sentry/fs/mounts_test.go | 2 +- pkg/sentry/fs/overlay.go | 10 +- pkg/sentry/fs/proc/fds.go | 18 +-- pkg/sentry/fs/proc/mounts.go | 8 +- pkg/sentry/fs/proc/net.go | 12 +- pkg/sentry/fs/proc/proc.go | 2 +- pkg/sentry/fs/proc/task.go | 4 +- pkg/sentry/fs/ramfs/dir.go | 18 +-- pkg/sentry/fs/ramfs/tree_test.go | 2 +- pkg/sentry/fs/timerfd/timerfd.go | 4 +- pkg/sentry/fs/tmpfs/file_test.go | 2 +- pkg/sentry/fs/tty/dir.go | 8 +- pkg/sentry/fs/tty/fs.go | 2 +- pkg/sentry/fs/tty/master.go | 8 +- pkg/sentry/fs/tty/slave.go | 4 +- pkg/sentry/fs/user/path.go | 8 +- pkg/sentry/fs/user/user.go | 8 +- pkg/sentry/fs/user/user_test.go | 8 +- pkg/sentry/fsbridge/bridge.go | 2 +- pkg/sentry/fsbridge/fs.go | 8 +- pkg/sentry/fsbridge/vfs.go | 6 +- pkg/sentry/fsimpl/devpts/devpts.go | 4 +- pkg/sentry/fsimpl/devpts/master.go | 6 +- pkg/sentry/fsimpl/devpts/slave.go | 6 +- pkg/sentry/fsimpl/devtmpfs/devtmpfs.go | 6 +- pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go | 8 +- pkg/sentry/fsimpl/eventfd/eventfd.go | 6 +- pkg/sentry/fsimpl/eventfd/eventfd_test.go | 12 +- pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go | 6 +- pkg/sentry/fsimpl/ext/dentry.go | 7 +- pkg/sentry/fsimpl/ext/directory.go | 2 +- pkg/sentry/fsimpl/ext/ext.go | 10 +- pkg/sentry/fsimpl/ext/ext_test.go | 4 +- pkg/sentry/fsimpl/ext/filesystem.go | 68 ++++----- pkg/sentry/fsimpl/ext/regular_file.go | 2 +- pkg/sentry/fsimpl/ext/symlink.go | 2 +- pkg/sentry/fsimpl/fuse/dev.go | 2 +- pkg/sentry/fsimpl/fuse/dev_test.go | 4 +- pkg/sentry/fsimpl/fuse/fusefs.go | 4 +- pkg/sentry/fsimpl/gofer/directory.go | 4 +- pkg/sentry/fsimpl/gofer/filesystem.go | 90 ++++++------ pkg/sentry/fsimpl/gofer/gofer.go | 40 +++--- pkg/sentry/fsimpl/gofer/gofer_test.go | 6 +- pkg/sentry/fsimpl/gofer/regular_file.go | 2 +- pkg/sentry/fsimpl/gofer/socket.go | 6 +- pkg/sentry/fsimpl/gofer/special_file.go | 4 +- pkg/sentry/fsimpl/host/control.go | 2 +- pkg/sentry/fsimpl/host/host.go | 14 +- pkg/sentry/fsimpl/host/socket.go | 12 +- pkg/sentry/fsimpl/host/tty.go | 4 +- pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 2 +- pkg/sentry/fsimpl/kernfs/fd_impl_util.go | 2 +- pkg/sentry/fsimpl/kernfs/filesystem.go | 68 ++++----- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 10 +- pkg/sentry/fsimpl/kernfs/kernfs.go | 26 ++-- pkg/sentry/fsimpl/kernfs/kernfs_test.go | 18 +-- pkg/sentry/fsimpl/overlay/copy_up.go | 8 +- pkg/sentry/fsimpl/overlay/directory.go | 8 +- pkg/sentry/fsimpl/overlay/filesystem.go | 64 ++++----- pkg/sentry/fsimpl/overlay/non_directory.go | 18 +-- pkg/sentry/fsimpl/overlay/overlay.go | 48 +++---- pkg/sentry/fsimpl/pipefs/pipefs.go | 6 +- pkg/sentry/fsimpl/proc/filesystem.go | 4 +- pkg/sentry/fsimpl/proc/task_fds.go | 18 +-- pkg/sentry/fsimpl/proc/task_files.go | 14 +- pkg/sentry/fsimpl/proc/task_net.go | 12 +- pkg/sentry/fsimpl/proc/tasks_test.go | 10 +- pkg/sentry/fsimpl/signalfd/signalfd.go | 4 +- pkg/sentry/fsimpl/sockfs/sockfs.go | 4 +- pkg/sentry/fsimpl/sys/sys.go | 4 +- pkg/sentry/fsimpl/sys/sys_test.go | 2 +- pkg/sentry/fsimpl/testutil/kernel.go | 2 +- pkg/sentry/fsimpl/testutil/testutil.go | 6 +- pkg/sentry/fsimpl/timerfd/timerfd.go | 6 +- pkg/sentry/fsimpl/tmpfs/benchmark_test.go | 50 +++---- pkg/sentry/fsimpl/tmpfs/directory.go | 4 +- pkg/sentry/fsimpl/tmpfs/filesystem.go | 106 +++++++------- pkg/sentry/fsimpl/tmpfs/pipe_test.go | 20 +-- pkg/sentry/fsimpl/tmpfs/regular_file.go | 2 +- pkg/sentry/fsimpl/tmpfs/tmpfs.go | 34 ++--- pkg/sentry/fsimpl/tmpfs/tmpfs_test.go | 6 +- pkg/sentry/kernel/abstract_socket_namespace.go | 13 +- pkg/sentry/kernel/epoll/epoll.go | 14 +- pkg/sentry/kernel/epoll/epoll_test.go | 5 +- pkg/sentry/kernel/eventfd/eventfd.go | 4 +- pkg/sentry/kernel/fd_table.go | 55 +++---- pkg/sentry/kernel/fd_table_test.go | 6 +- pkg/sentry/kernel/fs_context.go | 31 ++-- pkg/sentry/kernel/futex/BUILD | 1 + pkg/sentry/kernel/futex/futex.go | 35 ++--- pkg/sentry/kernel/futex/futex_test.go | 66 +++++---- pkg/sentry/kernel/kernel.go | 57 ++++---- pkg/sentry/kernel/pipe/node.go | 6 +- pkg/sentry/kernel/pipe/node_test.go | 2 +- pkg/sentry/kernel/pipe/pipe.go | 2 +- pkg/sentry/kernel/pipe/pipe_test.go | 16 +-- pkg/sentry/kernel/pipe/pipe_util.go | 2 +- pkg/sentry/kernel/pipe/reader.go | 3 +- pkg/sentry/kernel/pipe/vfs.go | 8 +- pkg/sentry/kernel/pipe/writer.go | 3 +- pkg/sentry/kernel/sessions.go | 5 +- pkg/sentry/kernel/shm/shm.go | 10 +- pkg/sentry/kernel/signalfd/signalfd.go | 2 +- pkg/sentry/kernel/task.go | 8 +- pkg/sentry/kernel/task_clone.go | 10 +- pkg/sentry/kernel/task_exec.go | 6 +- pkg/sentry/kernel/task_exit.go | 8 +- pkg/sentry/kernel/task_log.go | 2 +- pkg/sentry/kernel/task_start.go | 6 +- pkg/sentry/kernel/thread_group.go | 4 +- pkg/sentry/loader/elf.go | 4 +- pkg/sentry/loader/loader.go | 6 +- pkg/sentry/memmap/memmap.go | 2 +- pkg/sentry/mm/aio_context.go | 6 +- pkg/sentry/mm/lifecycle.go | 2 +- pkg/sentry/mm/metadata.go | 5 +- pkg/sentry/mm/special_mappable.go | 4 +- pkg/sentry/mm/syscalls.go | 4 +- pkg/sentry/mm/vma.go | 4 +- pkg/sentry/socket/control/control.go | 8 +- pkg/sentry/socket/control/control_vfs2.go | 8 +- pkg/sentry/socket/hostinet/socket.go | 8 +- pkg/sentry/socket/netlink/provider.go | 2 +- pkg/sentry/socket/netlink/socket.go | 14 +- pkg/sentry/socket/netlink/socket_vfs2.go | 4 +- pkg/sentry/socket/netstack/netstack.go | 6 +- pkg/sentry/socket/netstack/netstack_vfs2.go | 2 +- pkg/sentry/socket/socket.go | 4 +- pkg/sentry/socket/unix/transport/connectioned.go | 10 +- pkg/sentry/socket/unix/transport/connectionless.go | 12 +- pkg/sentry/socket/unix/transport/queue.go | 13 +- pkg/sentry/socket/unix/transport/unix.go | 48 +++---- pkg/sentry/socket/unix/unix.go | 38 ++--- pkg/sentry/socket/unix/unix_vfs2.go | 18 +-- pkg/sentry/strace/strace.go | 12 +- pkg/sentry/syscalls/epoll.go | 18 +-- pkg/sentry/syscalls/linux/sys_aio.go | 8 +- pkg/sentry/syscalls/linux/sys_eventfd.go | 2 +- pkg/sentry/syscalls/linux/sys_file.go | 78 +++++----- pkg/sentry/syscalls/linux/sys_futex.go | 6 +- pkg/sentry/syscalls/linux/sys_getdents.go | 2 +- pkg/sentry/syscalls/linux/sys_inotify.go | 10 +- pkg/sentry/syscalls/linux/sys_lseek.go | 2 +- pkg/sentry/syscalls/linux/sys_mmap.go | 4 +- pkg/sentry/syscalls/linux/sys_mount.go | 2 +- pkg/sentry/syscalls/linux/sys_pipe.go | 6 +- pkg/sentry/syscalls/linux/sys_poll.go | 10 +- pkg/sentry/syscalls/linux/sys_prctl.go | 4 +- pkg/sentry/syscalls/linux/sys_read.go | 12 +- pkg/sentry/syscalls/linux/sys_shm.go | 10 +- pkg/sentry/syscalls/linux/sys_signal.go | 4 +- pkg/sentry/syscalls/linux/sys_socket.go | 46 +++--- pkg/sentry/syscalls/linux/sys_splice.go | 12 +- pkg/sentry/syscalls/linux/sys_stat.go | 8 +- pkg/sentry/syscalls/linux/sys_sync.go | 8 +- pkg/sentry/syscalls/linux/sys_thread.go | 6 +- pkg/sentry/syscalls/linux/sys_timerfd.go | 6 +- pkg/sentry/syscalls/linux/sys_write.go | 10 +- pkg/sentry/syscalls/linux/sys_xattr.go | 8 +- pkg/sentry/syscalls/linux/vfs2/aio.go | 8 +- pkg/sentry/syscalls/linux/vfs2/epoll.go | 14 +- pkg/sentry/syscalls/linux/vfs2/eventfd.go | 4 +- pkg/sentry/syscalls/linux/vfs2/execve.go | 12 +- pkg/sentry/syscalls/linux/vfs2/fd.go | 12 +- pkg/sentry/syscalls/linux/vfs2/filesystem.go | 22 +-- pkg/sentry/syscalls/linux/vfs2/fscontext.go | 22 +-- pkg/sentry/syscalls/linux/vfs2/getdents.go | 2 +- pkg/sentry/syscalls/linux/vfs2/inotify.go | 14 +- pkg/sentry/syscalls/linux/vfs2/ioctl.go | 2 +- pkg/sentry/syscalls/linux/vfs2/lock.go | 2 +- pkg/sentry/syscalls/linux/vfs2/memfd.go | 2 +- pkg/sentry/syscalls/linux/vfs2/mmap.go | 4 +- pkg/sentry/syscalls/linux/vfs2/mount.go | 4 +- pkg/sentry/syscalls/linux/vfs2/path.go | 12 +- pkg/sentry/syscalls/linux/vfs2/pipe.go | 6 +- pkg/sentry/syscalls/linux/vfs2/poll.go | 10 +- pkg/sentry/syscalls/linux/vfs2/read_write.go | 48 +++---- pkg/sentry/syscalls/linux/vfs2/setstat.go | 20 +-- pkg/sentry/syscalls/linux/vfs2/signal.go | 4 +- pkg/sentry/syscalls/linux/vfs2/socket.go | 46 +++--- pkg/sentry/syscalls/linux/vfs2/splice.go | 20 +-- pkg/sentry/syscalls/linux/vfs2/stat.go | 30 ++-- pkg/sentry/syscalls/linux/vfs2/sync.go | 6 +- pkg/sentry/syscalls/linux/vfs2/timerfd.go | 8 +- pkg/sentry/syscalls/linux/vfs2/xattr.go | 16 +-- pkg/sentry/vfs/anonfs.go | 8 +- pkg/sentry/vfs/dentry.go | 37 ++--- pkg/sentry/vfs/epoll.go | 6 +- pkg/sentry/vfs/file_description.go | 24 ++-- pkg/sentry/vfs/file_description_impl_util_test.go | 18 +-- pkg/sentry/vfs/filesystem.go | 6 +- pkg/sentry/vfs/inotify.go | 34 ++--- pkg/sentry/vfs/mount.go | 54 +++---- pkg/sentry/vfs/pathname.go | 18 +-- pkg/sentry/vfs/resolving_path.go | 43 +++--- pkg/sentry/vfs/vfs.go | 160 ++++++++++----------- pkg/tcpip/link/tun/BUILD | 1 + pkg/tcpip/link/tun/device.go | 9 +- runsc/boot/fs.go | 12 +- runsc/boot/loader.go | 16 +-- runsc/boot/loader_test.go | 8 +- runsc/boot/vfs.go | 10 +- 252 files changed, 1711 insertions(+), 1668 deletions(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/refs/BUILD b/pkg/refs/BUILD index 74affc887..9888cce9c 100644 --- a/pkg/refs/BUILD +++ b/pkg/refs/BUILD @@ -24,6 +24,7 @@ go_library( ], visibility = ["//:sandbox"], deps = [ + "//pkg/context", "//pkg/log", "//pkg/sync", ], @@ -34,5 +35,8 @@ go_test( size = "small", srcs = ["refcounter_test.go"], library = ":refs", - deps = ["//pkg/sync"], + deps = [ + "//pkg/context", + "//pkg/sync", + ], ) diff --git a/pkg/refs/refcounter.go b/pkg/refs/refcounter.go index c45ba8200..61790221b 100644 --- a/pkg/refs/refcounter.go +++ b/pkg/refs/refcounter.go @@ -23,6 +23,7 @@ import ( "runtime" "sync/atomic" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sync" ) @@ -38,7 +39,7 @@ type RefCounter interface { // Note that AtomicRefCounter.DecRef() does not support destructors. // If a type has a destructor, it must implement its own DecRef() // method and call AtomicRefCounter.DecRefWithDestructor(destructor). - DecRef() + DecRef(ctx context.Context) // TryIncRef attempts to increase the reference counter on the object, // but may fail if all references have already been dropped. This @@ -57,7 +58,7 @@ type RefCounter interface { // A WeakRefUser is notified when the last non-weak reference is dropped. type WeakRefUser interface { // WeakRefGone is called when the last non-weak reference is dropped. - WeakRefGone() + WeakRefGone(ctx context.Context) } // WeakRef is a weak reference. @@ -123,7 +124,7 @@ func (w *WeakRef) Get() RefCounter { // Drop drops this weak reference. You should always call drop when you are // finished with the weak reference. You may not use this object after calling // drop. -func (w *WeakRef) Drop() { +func (w *WeakRef) Drop(ctx context.Context) { rc, ok := w.get() if !ok { // We've been zapped already. When the refcounter has called @@ -145,7 +146,7 @@ func (w *WeakRef) Drop() { // And now aren't on the object's list of weak references. So it won't // zap us if this causes the reference count to drop to zero. - rc.DecRef() + rc.DecRef(ctx) // Return to the pool. weakRefPool.Put(w) @@ -427,7 +428,7 @@ func (r *AtomicRefCount) dropWeakRef(w *WeakRef) { // A: TryIncRef [transform speculative to real] // //go:nosplit -func (r *AtomicRefCount) DecRefWithDestructor(destroy func()) { +func (r *AtomicRefCount) DecRefWithDestructor(ctx context.Context, destroy func(context.Context)) { switch v := atomic.AddInt64(&r.refCount, -1); { case v < -1: panic("Decrementing non-positive ref count") @@ -448,7 +449,7 @@ func (r *AtomicRefCount) DecRefWithDestructor(destroy func()) { if user != nil { r.mu.Unlock() - user.WeakRefGone() + user.WeakRefGone(ctx) r.mu.Lock() } } @@ -456,7 +457,7 @@ func (r *AtomicRefCount) DecRefWithDestructor(destroy func()) { // Call the destructor. if destroy != nil { - destroy() + destroy(ctx) } } } @@ -464,6 +465,6 @@ func (r *AtomicRefCount) DecRefWithDestructor(destroy func()) { // DecRef decrements this object's reference count. // //go:nosplit -func (r *AtomicRefCount) DecRef() { - r.DecRefWithDestructor(nil) +func (r *AtomicRefCount) DecRef(ctx context.Context) { + r.DecRefWithDestructor(ctx, nil) } diff --git a/pkg/refs/refcounter_test.go b/pkg/refs/refcounter_test.go index 1ab4a4440..6d0dd1018 100644 --- a/pkg/refs/refcounter_test.go +++ b/pkg/refs/refcounter_test.go @@ -18,6 +18,7 @@ import ( "reflect" "testing" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" ) @@ -31,11 +32,11 @@ type testCounter struct { destroyed bool } -func (t *testCounter) DecRef() { - t.AtomicRefCount.DecRefWithDestructor(t.destroy) +func (t *testCounter) DecRef(ctx context.Context) { + t.AtomicRefCount.DecRefWithDestructor(ctx, t.destroy) } -func (t *testCounter) destroy() { +func (t *testCounter) destroy(context.Context) { t.mu.Lock() defer t.mu.Unlock() t.destroyed = true @@ -53,7 +54,7 @@ func newTestCounter() *testCounter { func TestOneRef(t *testing.T) { tc := newTestCounter() - tc.DecRef() + tc.DecRef(context.Background()) if !tc.IsDestroyed() { t.Errorf("object should have been destroyed") @@ -63,8 +64,9 @@ func TestOneRef(t *testing.T) { func TestTwoRefs(t *testing.T) { tc := newTestCounter() tc.IncRef() - tc.DecRef() - tc.DecRef() + ctx := context.Background() + tc.DecRef(ctx) + tc.DecRef(ctx) if !tc.IsDestroyed() { t.Errorf("object should have been destroyed") @@ -74,12 +76,13 @@ func TestTwoRefs(t *testing.T) { func TestMultiRefs(t *testing.T) { tc := newTestCounter() tc.IncRef() - tc.DecRef() + ctx := context.Background() + tc.DecRef(ctx) tc.IncRef() - tc.DecRef() + tc.DecRef(ctx) - tc.DecRef() + tc.DecRef(ctx) if !tc.IsDestroyed() { t.Errorf("object should have been destroyed") @@ -89,19 +92,20 @@ func TestMultiRefs(t *testing.T) { func TestWeakRef(t *testing.T) { tc := newTestCounter() w := NewWeakRef(tc, nil) + ctx := context.Background() // Try resolving. if x := w.Get(); x == nil { t.Errorf("weak reference didn't resolve: expected %v, got nil", tc) } else { - x.DecRef() + x.DecRef(ctx) } // Try resolving again. if x := w.Get(); x == nil { t.Errorf("weak reference didn't resolve: expected %v, got nil", tc) } else { - x.DecRef() + x.DecRef(ctx) } // Shouldn't be destroyed yet. (Can't continue if this fails.) @@ -110,7 +114,7 @@ func TestWeakRef(t *testing.T) { } // Drop the original reference. - tc.DecRef() + tc.DecRef(ctx) // Assert destroyed. if !tc.IsDestroyed() { @@ -126,7 +130,8 @@ func TestWeakRef(t *testing.T) { func TestWeakRefDrop(t *testing.T) { tc := newTestCounter() w := NewWeakRef(tc, nil) - w.Drop() + ctx := context.Background() + w.Drop(ctx) // Just assert the list is empty. if !tc.weakRefs.Empty() { @@ -134,14 +139,14 @@ func TestWeakRefDrop(t *testing.T) { } // Drop the original reference. - tc.DecRef() + tc.DecRef(ctx) } type testWeakRefUser struct { weakRefGone func() } -func (u *testWeakRefUser) WeakRefGone() { +func (u *testWeakRefUser) WeakRefGone(ctx context.Context) { u.weakRefGone() } @@ -165,7 +170,8 @@ func TestCallback(t *testing.T) { }}) // Drop the original reference, this must trigger the callback. - tc.DecRef() + ctx := context.Background() + tc.DecRef(ctx) if !called { t.Fatalf("Callback not called") diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 1bae7cfaf..dfa936563 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -139,7 +139,6 @@ func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { // Import file descriptors. fdTable := proc.Kernel.NewFDTable() - defer fdTable.DecRef() creds := auth.NewUserCredentials( args.KUID, @@ -177,6 +176,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI initArgs.MountNamespaceVFS2.IncRef() } ctx := initArgs.NewContext(proc.Kernel) + defer fdTable.DecRef(ctx) if kernel.VFS2Enabled { // Get the full path to the filename from the PATH env variable. diff --git a/pkg/sentry/devices/memdev/full.go b/pkg/sentry/devices/memdev/full.go index af66fe4dc..511179e31 100644 --- a/pkg/sentry/devices/memdev/full.go +++ b/pkg/sentry/devices/memdev/full.go @@ -46,7 +46,7 @@ type fullFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *fullFD) Release() { +func (fd *fullFD) Release(context.Context) { // noop } diff --git a/pkg/sentry/devices/memdev/null.go b/pkg/sentry/devices/memdev/null.go index 92d3d71be..4918dbeeb 100644 --- a/pkg/sentry/devices/memdev/null.go +++ b/pkg/sentry/devices/memdev/null.go @@ -47,7 +47,7 @@ type nullFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *nullFD) Release() { +func (fd *nullFD) Release(context.Context) { // noop } diff --git a/pkg/sentry/devices/memdev/random.go b/pkg/sentry/devices/memdev/random.go index 6b81da5ef..5e7fe0280 100644 --- a/pkg/sentry/devices/memdev/random.go +++ b/pkg/sentry/devices/memdev/random.go @@ -56,7 +56,7 @@ type randomFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *randomFD) Release() { +func (fd *randomFD) Release(context.Context) { // noop } diff --git a/pkg/sentry/devices/memdev/zero.go b/pkg/sentry/devices/memdev/zero.go index c6f15054d..2e631a252 100644 --- a/pkg/sentry/devices/memdev/zero.go +++ b/pkg/sentry/devices/memdev/zero.go @@ -48,7 +48,7 @@ type zeroFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *zeroFD) Release() { +func (fd *zeroFD) Release(context.Context) { // noop } diff --git a/pkg/sentry/devices/ttydev/ttydev.go b/pkg/sentry/devices/ttydev/ttydev.go index fbb7fd92c..fd4b79c46 100644 --- a/pkg/sentry/devices/ttydev/ttydev.go +++ b/pkg/sentry/devices/ttydev/ttydev.go @@ -55,7 +55,7 @@ type ttyFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *ttyFD) Release() {} +func (fd *ttyFD) Release(context.Context) {} // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *ttyFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { diff --git a/pkg/sentry/devices/tundev/tundev.go b/pkg/sentry/devices/tundev/tundev.go index dfbd069af..852ec3c5c 100644 --- a/pkg/sentry/devices/tundev/tundev.go +++ b/pkg/sentry/devices/tundev/tundev.go @@ -108,8 +108,8 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *tunFD) Release() { - fd.device.Release() +func (fd *tunFD) Release(ctx context.Context) { + fd.device.Release(ctx) } // PRead implements vfs.FileDescriptionImpl.PRead. diff --git a/pkg/sentry/fdimport/fdimport.go b/pkg/sentry/fdimport/fdimport.go index b8686adb4..1b7cb94c0 100644 --- a/pkg/sentry/fdimport/fdimport.go +++ b/pkg/sentry/fdimport/fdimport.go @@ -50,7 +50,7 @@ func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds [] if err != nil { return nil, err } - defer appFile.DecRef() + defer appFile.DecRef(ctx) // Remember this in the TTY file, as we will // use it for the other stdio FDs. @@ -69,7 +69,7 @@ func importFS(ctx context.Context, fdTable *kernel.FDTable, console bool, fds [] if err != nil { return nil, err } - defer appFile.DecRef() + defer appFile.DecRef(ctx) } // Add the file to the FD map. @@ -102,7 +102,7 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi if err != nil { return nil, err } - defer appFile.DecRef() + defer appFile.DecRef(ctx) // Remember this in the TTY file, as we will use it for the other stdio // FDs. @@ -119,7 +119,7 @@ func importVFS2(ctx context.Context, fdTable *kernel.FDTable, console bool, stdi if err != nil { return nil, err } - defer appFile.DecRef() + defer appFile.DecRef(ctx) } if err := fdTable.NewFDAtVFS2(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go index ab1424c95..735452b07 100644 --- a/pkg/sentry/fs/copy_up.go +++ b/pkg/sentry/fs/copy_up.go @@ -201,7 +201,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { parentUpper := parent.Inode.overlay.upper root := RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } // Create the file in the upper filesystem and get an Inode for it. @@ -212,7 +212,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { log.Warningf("copy up failed to create file: %v", err) return syserror.EIO } - defer childFile.DecRef() + defer childFile.DecRef(ctx) childUpperInode = childFile.Dirent.Inode case Directory: @@ -226,7 +226,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { cleanupUpper(ctx, parentUpper, next.name, werr) return syserror.EIO } - defer childUpper.DecRef() + defer childUpper.DecRef(ctx) childUpperInode = childUpper.Inode case Symlink: @@ -246,7 +246,7 @@ func copyUpLocked(ctx context.Context, parent *Dirent, next *Dirent) error { cleanupUpper(ctx, parentUpper, next.name, werr) return syserror.EIO } - defer childUpper.DecRef() + defer childUpper.DecRef(ctx) childUpperInode = childUpper.Inode default: @@ -352,14 +352,14 @@ func copyContentsLocked(ctx context.Context, upper *Inode, lower *Inode, size in if err != nil { return err } - defer upperFile.DecRef() + defer upperFile.DecRef(ctx) // Get a handle to the lower filesystem, which we will read from. lowerFile, err := overlayFile(ctx, lower, FileFlags{Read: true}) if err != nil { return err } - defer lowerFile.DecRef() + defer lowerFile.DecRef(ctx) // Use a buffer pool to minimize allocations. buf := copyUpBuffers.Get().([]byte) diff --git a/pkg/sentry/fs/copy_up_test.go b/pkg/sentry/fs/copy_up_test.go index 91792d9fe..c7a11eec1 100644 --- a/pkg/sentry/fs/copy_up_test.go +++ b/pkg/sentry/fs/copy_up_test.go @@ -126,7 +126,7 @@ func makeOverlayTestFiles(t *testing.T) []*overlayTestFile { if err != nil { t.Fatalf("failed to create file %q: %v", name, err) } - defer f.DecRef() + defer f.DecRef(ctx) relname, _ := f.Dirent.FullName(lowerRoot) @@ -171,7 +171,7 @@ func makeOverlayTestFiles(t *testing.T) []*overlayTestFile { if err != nil { t.Fatalf("failed to find %q: %v", f.name, err) } - defer d.DecRef() + defer d.DecRef(ctx) f.File, err = d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) if err != nil { diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go index dc7ad075a..ec474e554 100644 --- a/pkg/sentry/fs/dev/net_tun.go +++ b/pkg/sentry/fs/dev/net_tun.go @@ -80,8 +80,8 @@ type netTunFileOperations struct { var _ fs.FileOperations = (*netTunFileOperations)(nil) // Release implements fs.FileOperations.Release. -func (fops *netTunFileOperations) Release() { - fops.device.Release() +func (fops *netTunFileOperations) Release(ctx context.Context) { + fops.device.Release(ctx) } // Ioctl implements fs.FileOperations.Ioctl. diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index 65be12175..a2f751068 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -325,7 +325,7 @@ func (d *Dirent) SyncAll(ctx context.Context) { for _, w := range d.children { if child := w.Get(); child != nil { child.(*Dirent).SyncAll(ctx) - child.DecRef() + child.DecRef(ctx) } } } @@ -451,7 +451,7 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl // which don't hold a hard reference on their parent (their parent holds a // hard reference on them, and they contain virtually no state). But this is // good house-keeping. - child.DecRef() + child.DecRef(ctx) return nil, syscall.ENOENT } @@ -468,20 +468,20 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl // their pins on the child. Inotify doesn't properly support filesystems that // revalidate dirents (since watches are lost on revalidation), but if we fail // to unpin the watches child will never be GCed. - cd.Inode.Watches.Unpin(cd) + cd.Inode.Watches.Unpin(ctx, cd) // This child needs to be revalidated, fallthrough to unhash it. Make sure // to not leak a reference from Get(). // // Note that previous lookups may still have a reference to this stale child; // this can't be helped, but we can ensure that *new* lookups are up-to-date. - child.DecRef() + child.DecRef(ctx) } // Either our weak reference expired or we need to revalidate it. Unhash child first, we're // about to replace it. delete(d.children, name) - w.Drop() + w.Drop(ctx) } // Slow path: load the InodeOperations into memory. Since this is a hot path and the lookup may be @@ -512,12 +512,12 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl // There are active references to the existing child, prefer it to the one we // retrieved from Lookup. Likely the Lookup happened very close to the insertion // of child, so considering one stale over the other is fairly arbitrary. - c.DecRef() + c.DecRef(ctx) // The child that was installed could be negative. if cd.IsNegative() { // If so, don't leak a reference and short circuit. - child.DecRef() + child.DecRef(ctx) return nil, syscall.ENOENT } @@ -531,7 +531,7 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl // we did the Inode.Lookup. Fully drop the weak reference and fallback to using the child // we looked up. delete(d.children, name) - w.Drop() + w.Drop(ctx) } // Give the looked up child a parent. We cannot kick out entries, since we just checked above @@ -587,7 +587,7 @@ func (d *Dirent) exists(ctx context.Context, root *Dirent, name string) bool { return false } // Child exists. - child.DecRef() + child.DecRef(ctx) return true } @@ -622,7 +622,7 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi } child := file.Dirent - d.finishCreate(child, name) + d.finishCreate(ctx, child, name) // Return the reference and the new file. When the last reference to // the file is dropped, file.Dirent may no longer be cached. @@ -631,7 +631,7 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi // finishCreate validates the created file, adds it as a child of this dirent, // and notifies any watchers. -func (d *Dirent) finishCreate(child *Dirent, name string) { +func (d *Dirent) finishCreate(ctx context.Context, child *Dirent, name string) { // Sanity check c, its name must be consistent. if child.name != name { panic(fmt.Sprintf("create from %q to %q returned unexpected name %q", d.name, name, child.name)) @@ -650,14 +650,14 @@ func (d *Dirent) finishCreate(child *Dirent, name string) { panic(fmt.Sprintf("hashed child %q over a positive child", child.name)) } // Don't leak a reference. - old.DecRef() + old.DecRef(ctx) // Drop d's reference. - old.DecRef() + old.DecRef(ctx) } // Finally drop the useless weak reference on the floor. - w.Drop() + w.Drop(ctx) } d.Inode.Watches.Notify(name, linux.IN_CREATE, 0) @@ -686,17 +686,17 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c panic(fmt.Sprintf("hashed over a positive child %q", old.(*Dirent).name)) } // Don't leak a reference. - old.DecRef() + old.DecRef(ctx) // Drop d's reference. - old.DecRef() + old.DecRef(ctx) } // Unhash the negative Dirent, name needs to exist now. delete(d.children, name) // Finally drop the useless weak reference on the floor. - w.Drop() + w.Drop(ctx) } // Execute the create operation. @@ -756,7 +756,7 @@ func (d *Dirent) Bind(ctx context.Context, root *Dirent, name string, data trans if e != nil { return e } - d.finishCreate(childDir, name) + d.finishCreate(ctx, childDir, name) return nil }) if err == syscall.EEXIST { @@ -901,7 +901,7 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent, // references to children. // // Preconditions: d.mu must be held. -func (d *Dirent) flush() { +func (d *Dirent) flush(ctx context.Context) { expired := make(map[string]*refs.WeakRef) for n, w := range d.children { // Call flush recursively on each child before removing our @@ -912,7 +912,7 @@ func (d *Dirent) flush() { if !cd.IsNegative() { // Flush the child. cd.mu.Lock() - cd.flush() + cd.flush(ctx) cd.mu.Unlock() // Allow the file system to drop extra references on child. @@ -920,13 +920,13 @@ func (d *Dirent) flush() { } // Don't leak a reference. - child.DecRef() + child.DecRef(ctx) } // Check if the child dirent is closed, and mark it as expired if it is. // We must call w.Get() again here, since the child could have been closed // by the calls to flush() and cache.Remove() in the above if-block. if child := w.Get(); child != nil { - child.DecRef() + child.DecRef(ctx) } else { expired[n] = w } @@ -935,7 +935,7 @@ func (d *Dirent) flush() { // Remove expired entries. for n, w := range expired { delete(d.children, n) - w.Drop() + w.Drop(ctx) } } @@ -977,7 +977,7 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err if !ok { panic("mount must mount over an existing dirent") } - weakRef.Drop() + weakRef.Drop(ctx) // Note that even though `d` is now hidden, it still holds a reference // to its parent. @@ -1002,13 +1002,13 @@ func (d *Dirent) unmount(ctx context.Context, replacement *Dirent) error { if !ok { panic("mount must mount over an existing dirent") } - weakRef.Drop() + weakRef.Drop(ctx) // d is not reachable anymore, and hence not mounted anymore. d.mounted = false // Drop mount reference. - d.DecRef() + d.DecRef(ctx) return nil } @@ -1029,7 +1029,7 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath // Child does not exist. return err } - defer child.DecRef() + defer child.DecRef(ctx) // Remove cannot remove directories. if IsDir(child.Inode.StableAttr) { @@ -1055,7 +1055,7 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath atomic.StoreInt32(&child.deleted, 1) if w, ok := d.children[name]; ok { delete(d.children, name) - w.Drop() + w.Drop(ctx) } // Allow the file system to drop extra references on child. @@ -1067,7 +1067,7 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath // inode may have other links. If this was the last link, the events for the // watch removal will be queued by the inode destructor. child.Inode.Watches.MarkUnlinked() - child.Inode.Watches.Unpin(child) + child.Inode.Watches.Unpin(ctx, child) d.Inode.Watches.Notify(name, linux.IN_DELETE, 0) return nil @@ -1100,7 +1100,7 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string) // Child does not exist. return err } - defer child.DecRef() + defer child.DecRef(ctx) // RemoveDirectory can only remove directories. if !IsDir(child.Inode.StableAttr) { @@ -1121,7 +1121,7 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string) atomic.StoreInt32(&child.deleted, 1) if w, ok := d.children[name]; ok { delete(d.children, name) - w.Drop() + w.Drop(ctx) } // Allow the file system to drop extra references on child. @@ -1130,14 +1130,14 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string) // Finally, let inotify know the child is being unlinked. Drop any extra // refs from inotify to this child dirent. child.Inode.Watches.MarkUnlinked() - child.Inode.Watches.Unpin(child) + child.Inode.Watches.Unpin(ctx, child) d.Inode.Watches.Notify(name, linux.IN_ISDIR|linux.IN_DELETE, 0) return nil } // destroy closes this node and all children. -func (d *Dirent) destroy() { +func (d *Dirent) destroy(ctx context.Context) { if d.IsNegative() { // Nothing to tear-down and no parent references to drop, since a negative // Dirent does not take a references on its parent, has no Inode and no children. @@ -1153,19 +1153,19 @@ func (d *Dirent) destroy() { if c.(*Dirent).IsNegative() { // The parent holds both weak and strong refs in the case of // negative dirents. - c.DecRef() + c.DecRef(ctx) } // Drop the reference we just acquired in WeakRef.Get. - c.DecRef() + c.DecRef(ctx) } - w.Drop() + w.Drop(ctx) } d.children = nil allDirents.remove(d) // Drop our reference to the Inode. - d.Inode.DecRef() + d.Inode.DecRef(ctx) // Allow the Dirent to be GC'ed after this point, since the Inode may still // be referenced after the Dirent is destroyed (for instance by filesystem @@ -1175,7 +1175,7 @@ func (d *Dirent) destroy() { // Drop the reference we have on our parent if we took one. renameMu doesn't need to be // held because d can't be reparented without any references to it left. if d.parent != nil { - d.parent.DecRef() + d.parent.DecRef(ctx) } } @@ -1201,14 +1201,14 @@ func (d *Dirent) TryIncRef() bool { // DecRef decreases the Dirent's refcount and drops its reference on its mount. // // DecRef implements RefCounter.DecRef with destructor d.destroy. -func (d *Dirent) DecRef() { +func (d *Dirent) DecRef(ctx context.Context) { if d.Inode != nil { // Keep mount around, since DecRef may destroy d.Inode. msrc := d.Inode.MountSource - d.DecRefWithDestructor(d.destroy) + d.DecRefWithDestructor(ctx, d.destroy) msrc.DecDirentRefs() } else { - d.DecRefWithDestructor(d.destroy) + d.DecRefWithDestructor(ctx, d.destroy) } } @@ -1359,7 +1359,7 @@ func (d *Dirent) MayDelete(ctx context.Context, root *Dirent, name string) error if err != nil { return err } - defer victim.DecRef() + defer victim.DecRef(ctx) return d.mayDelete(ctx, victim) } @@ -1411,7 +1411,7 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string if err != nil { return err } - defer renamed.DecRef() + defer renamed.DecRef(ctx) // Check that the renamed dirent is deletable. if err := oldParent.mayDelete(ctx, renamed); err != nil { @@ -1453,13 +1453,13 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string // Check that we can delete replaced. if err := newParent.mayDelete(ctx, replaced); err != nil { - replaced.DecRef() + replaced.DecRef(ctx) return err } // Target should not be an ancestor of source. if oldParent.descendantOf(replaced) { - replaced.DecRef() + replaced.DecRef(ctx) // Note that Linux returns EINVAL if the source is an // ancestor of target, but ENOTEMPTY if the target is @@ -1470,7 +1470,7 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string // Check that replaced is not a mount point. if replaced.isMountPointLocked() { - replaced.DecRef() + replaced.DecRef(ctx) return syscall.EBUSY } @@ -1478,11 +1478,11 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string oldIsDir := IsDir(renamed.Inode.StableAttr) newIsDir := IsDir(replaced.Inode.StableAttr) if !newIsDir && oldIsDir { - replaced.DecRef() + replaced.DecRef(ctx) return syscall.ENOTDIR } if !oldIsDir && newIsDir { - replaced.DecRef() + replaced.DecRef(ctx) return syscall.EISDIR } @@ -1493,13 +1493,13 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string // open across renames is currently broken for multiple // reasons, so we flush all references on the replaced node and // its children. - replaced.Inode.Watches.Unpin(replaced) + replaced.Inode.Watches.Unpin(ctx, replaced) replaced.mu.Lock() - replaced.flush() + replaced.flush(ctx) replaced.mu.Unlock() // Done with replaced. - replaced.DecRef() + replaced.DecRef(ctx) } if err := renamed.Inode.Rename(ctx, oldParent, renamed, newParent, newName, replaced != nil); err != nil { @@ -1513,14 +1513,14 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string // can't destroy oldParent (and try to retake its lock) because // Rename's caller must be holding a reference. newParent.IncRef() - oldParent.DecRef() + oldParent.DecRef(ctx) } if w, ok := newParent.children[newName]; ok { - w.Drop() + w.Drop(ctx) delete(newParent.children, newName) } if w, ok := oldParent.children[oldName]; ok { - w.Drop() + w.Drop(ctx) delete(oldParent.children, oldName) } @@ -1551,7 +1551,7 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string // Same as replaced.flush above. renamed.mu.Lock() - renamed.flush() + renamed.flush(ctx) renamed.mu.Unlock() return nil diff --git a/pkg/sentry/fs/dirent_cache.go b/pkg/sentry/fs/dirent_cache.go index 33de32c69..7d9dd717e 100644 --- a/pkg/sentry/fs/dirent_cache.go +++ b/pkg/sentry/fs/dirent_cache.go @@ -17,6 +17,7 @@ package fs import ( "fmt" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" ) @@ -101,7 +102,7 @@ func (c *DirentCache) remove(d *Dirent) { panic(fmt.Sprintf("trying to remove %v, which is not in the dirent cache", d)) } c.list.Remove(d) - d.DecRef() + d.DecRef(context.Background()) c.currentSize-- if c.limit != nil { c.limit.dec() diff --git a/pkg/sentry/fs/dirent_refs_test.go b/pkg/sentry/fs/dirent_refs_test.go index 98d69c6f2..176b894ba 100644 --- a/pkg/sentry/fs/dirent_refs_test.go +++ b/pkg/sentry/fs/dirent_refs_test.go @@ -51,7 +51,7 @@ func TestWalkPositive(t *testing.T) { t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 1) } - d.DecRef() + d.DecRef(ctx) if got := root.ReadRefs(); got != 1 { t.Fatalf("root has a ref count of %d, want %d", got, 1) @@ -61,7 +61,7 @@ func TestWalkPositive(t *testing.T) { t.Fatalf("child name = %q has a ref count of %d, want %d", d.name, got, 0) } - root.flush() + root.flush(ctx) if got := len(root.children); got != 0 { t.Fatalf("root has %d children, want %d", got, 0) @@ -114,7 +114,7 @@ func TestWalkNegative(t *testing.T) { t.Fatalf("child has a ref count of %d, want %d", got, 2) } - child.DecRef() + child.DecRef(ctx) if got := child.(*Dirent).ReadRefs(); got != 1 { t.Fatalf("child has a ref count of %d, want %d", got, 1) @@ -124,7 +124,7 @@ func TestWalkNegative(t *testing.T) { t.Fatalf("root has %d children, want %d", got, 1) } - root.DecRef() + root.DecRef(ctx) if got := root.ReadRefs(); got != 0 { t.Fatalf("root has a ref count of %d, want %d", got, 0) @@ -351,9 +351,9 @@ func TestRemoveExtraRefs(t *testing.T) { t.Fatalf("dirent has a ref count of %d, want %d", got, 1) } - d.DecRef() + d.DecRef(ctx) - test.root.flush() + test.root.flush(ctx) if got := len(test.root.children); got != 0 { t.Errorf("root has %d children, want %d", got, 0) @@ -403,8 +403,8 @@ func TestRenameExtraRefs(t *testing.T) { t.Fatalf("Rename got error %v, want nil", err) } - oldParent.flush() - newParent.flush() + oldParent.flush(ctx) + newParent.flush(ctx) // Expect to have only active references. if got := renamed.ReadRefs(); got != 1 { diff --git a/pkg/sentry/fs/dirent_state.go b/pkg/sentry/fs/dirent_state.go index f623d6c0e..67a35f0b2 100644 --- a/pkg/sentry/fs/dirent_state.go +++ b/pkg/sentry/fs/dirent_state.go @@ -18,6 +18,7 @@ import ( "fmt" "sync/atomic" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" ) @@ -48,7 +49,7 @@ func (d *Dirent) saveChildren() map[string]*Dirent { for name, w := range d.children { if rc := w.Get(); rc != nil { // Drop the reference count obtain in w.Get() - rc.DecRef() + rc.DecRef(context.Background()) cd := rc.(*Dirent) if cd.IsNegative() { diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go index 9fce177ad..b99199798 100644 --- a/pkg/sentry/fs/fdpipe/pipe.go +++ b/pkg/sentry/fs/fdpipe/pipe.go @@ -115,7 +115,7 @@ func (p *pipeOperations) Readiness(mask waiter.EventMask) (eventMask waiter.Even } // Release implements fs.FileOperations.Release. -func (p *pipeOperations) Release() { +func (p *pipeOperations) Release(context.Context) { fdnotifier.RemoveFD(int32(p.file.FD())) p.file.Close() p.file = nil diff --git a/pkg/sentry/fs/fdpipe/pipe_opener_test.go b/pkg/sentry/fs/fdpipe/pipe_opener_test.go index e556da48a..b9cec4b13 100644 --- a/pkg/sentry/fs/fdpipe/pipe_opener_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_opener_test.go @@ -182,7 +182,7 @@ func TestTryOpen(t *testing.T) { // Cleanup the state of the pipe, and remove the fd from the // fdnotifier. Sadly this needed to maintain the correctness // of other tests because the fdnotifier is global. - pipeOps.Release() + pipeOps.Release(ctx) } continue } @@ -191,7 +191,7 @@ func TestTryOpen(t *testing.T) { } if pipeOps != nil { // Same as above. - pipeOps.Release() + pipeOps.Release(ctx) } } } @@ -279,7 +279,7 @@ func TestPipeOpenUnblocksEventually(t *testing.T) { pipeOps, err := Open(ctx, opener, flags) if pipeOps != nil { // Same as TestTryOpen. - pipeOps.Release() + pipeOps.Release(ctx) } // Check that the partner opened the file successfully. @@ -325,7 +325,7 @@ func TestCopiedReadAheadBuffer(t *testing.T) { ctx := contexttest.Context(t) pipeOps, err := pipeOpenState.TryOpen(ctx, opener, fs.FileFlags{Read: true}) if pipeOps != nil { - pipeOps.Release() + pipeOps.Release(ctx) t.Fatalf("open(%s, %o) got file, want nil", name, syscall.O_RDONLY) } if err != syserror.ErrWouldBlock { @@ -351,7 +351,7 @@ func TestCopiedReadAheadBuffer(t *testing.T) { if pipeOps == nil { t.Fatalf("open(%s, %o) got nil file, want not nil", name, syscall.O_RDONLY) } - defer pipeOps.Release() + defer pipeOps.Release(ctx) if err != nil { t.Fatalf("open(%s, %o) got error %v, want nil", name, syscall.O_RDONLY, err) @@ -471,14 +471,14 @@ func TestPipeHangup(t *testing.T) { f := <-fdchan if f < 0 { t.Errorf("%s: partner routine got fd %d, want > 0", test.desc, f) - pipeOps.Release() + pipeOps.Release(ctx) continue } if test.hangupSelf { // Hangup self and assert that our partner got the expected hangup // error. - pipeOps.Release() + pipeOps.Release(ctx) if test.flags.Read { // Partner is writer. @@ -490,7 +490,7 @@ func TestPipeHangup(t *testing.T) { } else { // Hangup our partner and expect us to get the hangup error. syscall.Close(f) - defer pipeOps.Release() + defer pipeOps.Release(ctx) if test.flags.Read { assertReaderHungup(t, test.desc, pipeOps.(*pipeOperations).file) diff --git a/pkg/sentry/fs/fdpipe/pipe_test.go b/pkg/sentry/fs/fdpipe/pipe_test.go index a0082ecca..1c9e82562 100644 --- a/pkg/sentry/fs/fdpipe/pipe_test.go +++ b/pkg/sentry/fs/fdpipe/pipe_test.go @@ -98,10 +98,11 @@ func TestNewPipe(t *testing.T) { } f := fd.New(gfd) - p, err := newPipeOperations(contexttest.Context(t), nil, test.flags, f, test.readAheadBuffer) + ctx := contexttest.Context(t) + p, err := newPipeOperations(ctx, nil, test.flags, f, test.readAheadBuffer) if p != nil { // This is necessary to remove the fd from the global fd notifier. - defer p.Release() + defer p.Release(ctx) } else { // If there is no p to DecRef on, because newPipeOperations failed, then the // file still needs to be closed. @@ -153,13 +154,14 @@ func TestPipeDestruction(t *testing.T) { syscall.Close(fds[1]) // Test the read end, but it doesn't really matter which. - p, err := newPipeOperations(contexttest.Context(t), nil, fs.FileFlags{Read: true}, f, nil) + ctx := contexttest.Context(t) + p, err := newPipeOperations(ctx, nil, fs.FileFlags{Read: true}, f, nil) if err != nil { f.Close() t.Fatalf("newPipeOperations got error %v, want nil", err) } // Drop our only reference, which should trigger the destructor. - p.Release() + p.Release(ctx) if fdnotifier.HasFD(int32(fds[0])) { t.Fatalf("after DecRef fdnotifier has fd %d, want no longer registered", fds[0]) @@ -282,7 +284,7 @@ func TestPipeRequest(t *testing.T) { if err != nil { t.Fatalf("%s: newPipeOperations got error %v, want nil", test.desc, err) } - defer p.Release() + defer p.Release(ctx) inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{Type: fs.Pipe}) file := fs.NewFile(ctx, fs.NewDirent(ctx, inode, "pipe"), fs.FileFlags{Read: true}, p) @@ -334,7 +336,7 @@ func TestPipeReadAheadBuffer(t *testing.T) { rfile.Close() t.Fatalf("newPipeOperations got error %v, want nil", err) } - defer p.Release() + defer p.Release(ctx) inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ Type: fs.Pipe, @@ -380,7 +382,7 @@ func TestPipeReadsAccumulate(t *testing.T) { } // Don't forget to remove the fd from the fd notifier. Otherwise other tests will // likely be borked, because it's global :( - defer p.Release() + defer p.Release(ctx) inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ Type: fs.Pipe, @@ -448,7 +450,7 @@ func TestPipeWritesAccumulate(t *testing.T) { } // Don't forget to remove the fd from the fd notifier. Otherwise other tests // will likely be borked, because it's global :( - defer p.Release() + defer p.Release(ctx) inode := fs.NewMockInode(ctx, fs.NewMockMountSource(nil), fs.StableAttr{ Type: fs.Pipe, diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index ca41520b4..72ea70fcf 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -142,17 +142,17 @@ func NewFile(ctx context.Context, dirent *Dirent, flags FileFlags, fops FileOper } // DecRef destroys the File when it is no longer referenced. -func (f *File) DecRef() { - f.DecRefWithDestructor(func() { +func (f *File) DecRef(ctx context.Context) { + f.DecRefWithDestructor(ctx, func(context.Context) { // Drop BSD style locks. lockRng := lock.LockRange{Start: 0, End: lock.LockEOF} f.Dirent.Inode.LockCtx.BSD.UnlockRegion(f, lockRng) // Release resources held by the FileOperations. - f.FileOperations.Release() + f.FileOperations.Release(ctx) // Release a reference on the Dirent. - f.Dirent.DecRef() + f.Dirent.DecRef(ctx) // Only unregister if we are currently registered. There is nothing // to register if f.async is nil (this happens when async mode is @@ -460,7 +460,7 @@ func (f *File) UnstableAttr(ctx context.Context) (UnstableAttr, error) { func (f *File) MappedName(ctx context.Context) string { root := RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } name, _ := f.Dirent.FullName(root) return name diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go index f5537411e..305c0f840 100644 --- a/pkg/sentry/fs/file_operations.go +++ b/pkg/sentry/fs/file_operations.go @@ -67,7 +67,7 @@ type SpliceOpts struct { // - File.Flags(): This value may change during the operation. type FileOperations interface { // Release release resources held by FileOperations. - Release() + Release(ctx context.Context) // Waitable defines how this File can be waited on for read and // write readiness. diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go index dcc1df38f..9dc58d5ff 100644 --- a/pkg/sentry/fs/file_overlay.go +++ b/pkg/sentry/fs/file_overlay.go @@ -54,7 +54,7 @@ func overlayFile(ctx context.Context, inode *Inode, flags FileFlags) (*File, err // Drop the extra reference on the Dirent. Now there's only one reference // on the dirent, either owned by f (if non-nil), or the Dirent is about // to be destroyed (if GetFile failed). - dirent.DecRef() + dirent.DecRef(ctx) return f, err } @@ -89,12 +89,12 @@ type overlayFileOperations struct { } // Release implements FileOperations.Release. -func (f *overlayFileOperations) Release() { +func (f *overlayFileOperations) Release(ctx context.Context) { if f.upper != nil { - f.upper.DecRef() + f.upper.DecRef(ctx) } if f.lower != nil { - f.lower.DecRef() + f.lower.DecRef(ctx) } } @@ -164,7 +164,7 @@ func (f *overlayFileOperations) Seek(ctx context.Context, file *File, whence See func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, serializer DentrySerializer) (int64, error) { root := RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } dirCtx := &DirCtx{ @@ -497,7 +497,7 @@ func readdirOne(ctx context.Context, d *Dirent) (map[string]DentAttr, error) { if err != nil { return nil, err } - defer dir.DecRef() + defer dir.DecRef(ctx) // Use a stub serializer to read the entries into memory. stubSerializer := &CollectEntriesSerializer{} @@ -521,10 +521,10 @@ type overlayMappingIdentity struct { } // DecRef implements AtomicRefCount.DecRef. -func (omi *overlayMappingIdentity) DecRef() { - omi.AtomicRefCount.DecRefWithDestructor(func() { - omi.overlayFile.DecRef() - omi.id.DecRef() +func (omi *overlayMappingIdentity) DecRef(ctx context.Context) { + omi.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) { + omi.overlayFile.DecRef(ctx) + omi.id.DecRef(ctx) }) } @@ -544,7 +544,7 @@ func (omi *overlayMappingIdentity) InodeID() uint64 { func (omi *overlayMappingIdentity) MappedName(ctx context.Context) string { root := RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } name, _ := omi.overlayFile.Dirent.FullName(root) return name diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go index 08695391c..dc9efa5df 100644 --- a/pkg/sentry/fs/fsutil/file.go +++ b/pkg/sentry/fs/fsutil/file.go @@ -31,7 +31,7 @@ import ( type FileNoopRelease struct{} // Release is a no-op. -func (FileNoopRelease) Release() {} +func (FileNoopRelease) Release(context.Context) {} // SeekWithDirCursor is used to implement fs.FileOperations.Seek. If dirCursor // is not nil and the seek was on a directory, the cursor will be updated. @@ -296,7 +296,7 @@ func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, d *fs.Diren func (sdfo *StaticDirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) { root := fs.RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } dirCtx := &fs.DirCtx{ Serializer: serializer, diff --git a/pkg/sentry/fs/gofer/file.go b/pkg/sentry/fs/gofer/file.go index b2fcab127..c0bc63a32 100644 --- a/pkg/sentry/fs/gofer/file.go +++ b/pkg/sentry/fs/gofer/file.go @@ -114,7 +114,7 @@ func NewFile(ctx context.Context, dirent *fs.Dirent, name string, flags fs.FileF } // Release implements fs.FileOpeations.Release. -func (f *fileOperations) Release() { +func (f *fileOperations) Release(context.Context) { f.handles.DecRef() } @@ -122,7 +122,7 @@ func (f *fileOperations) Release() { func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) { root := fs.RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } dirCtx := &fs.DirCtx{ diff --git a/pkg/sentry/fs/gofer/gofer_test.go b/pkg/sentry/fs/gofer/gofer_test.go index 2df2fe889..326fed954 100644 --- a/pkg/sentry/fs/gofer/gofer_test.go +++ b/pkg/sentry/fs/gofer/gofer_test.go @@ -232,7 +232,7 @@ func TestRevalidation(t *testing.T) { // We must release the dirent, of the test will fail // with a reference leak. This is tracked by p9test. - defer dirent.DecRef() + defer dirent.DecRef(ctx) // Walk again. Depending on the cache policy, we may // get a new dirent. @@ -246,7 +246,7 @@ func TestRevalidation(t *testing.T) { if !test.preModificationWantReload && dirent != newDirent { t.Errorf("Lookup with cachePolicy=%s got new dirent %+v, wanted old dirent %+v", test.cachePolicy, newDirent, dirent) } - newDirent.DecRef() // See above. + newDirent.DecRef(ctx) // See above. // Modify the underlying mocked file's modification // time for the next walk that occurs. @@ -287,7 +287,7 @@ func TestRevalidation(t *testing.T) { if test.postModificationWantUpdatedAttrs && gotModTimeSeconds != nowSeconds { t.Fatalf("Lookup with cachePolicy=%s got new modification time %v, wanted %v", test.cachePolicy, gotModTimeSeconds, nowSeconds) } - newDirent.DecRef() // See above. + newDirent.DecRef(ctx) // See above. // Remove the file from the remote fs, subsequent walks // should now fail to find anything. @@ -303,7 +303,7 @@ func TestRevalidation(t *testing.T) { t.Errorf("Lookup with cachePolicy=%s got new dirent and error %v, wanted old dirent and nil error", test.cachePolicy, err) } if err == nil { - newDirent.DecRef() // See above. + newDirent.DecRef(ctx) // See above. } }) } diff --git a/pkg/sentry/fs/gofer/handles.go b/pkg/sentry/fs/gofer/handles.go index fc14249be..f324dbf26 100644 --- a/pkg/sentry/fs/gofer/handles.go +++ b/pkg/sentry/fs/gofer/handles.go @@ -47,7 +47,8 @@ type handles struct { // DecRef drops a reference on handles. func (h *handles) DecRef() { - h.DecRefWithDestructor(func() { + ctx := context.Background() + h.DecRefWithDestructor(ctx, func(context.Context) { if h.Host != nil { if h.isHostBorrowed { h.Host.Release() @@ -57,7 +58,7 @@ func (h *handles) DecRef() { } } } - if err := h.File.close(context.Background()); err != nil { + if err := h.File.close(ctx); err != nil { log.Warningf("error closing p9 file: %v", err) } }) diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go index 51d7368a1..3a225fd39 100644 --- a/pkg/sentry/fs/gofer/inode.go +++ b/pkg/sentry/fs/gofer/inode.go @@ -441,8 +441,9 @@ func (i *inodeOperations) Release(ctx context.Context) { // asynchronously. // // We use AsyncWithContext to avoid needing to allocate an extra - // anonymous function on the heap. - fs.AsyncWithContext(ctx, i.fileState.Release) + // anonymous function on the heap. We must use background context + // because the async work cannot happen on the task context. + fs.AsyncWithContext(context.Background(), i.fileState.Release) } // Mappable implements fs.InodeOperations.Mappable. diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go index cf9800100..3c66dc3c2 100644 --- a/pkg/sentry/fs/gofer/path.go +++ b/pkg/sentry/fs/gofer/path.go @@ -168,7 +168,7 @@ func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string // Construct the positive Dirent. d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name) - defer d.DecRef() + defer d.DecRef(ctx) // Construct the new file, caching the handles if allowed. h := handles{ @@ -371,7 +371,7 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string // Find out if file being deleted is a socket or pipe that needs to be // removed from endpoint map. if d, err := i.Lookup(ctx, dir, name); err == nil { - defer d.DecRef() + defer d.DecRef(ctx) if fs.IsSocket(d.Inode.StableAttr) || fs.IsPipe(d.Inode.StableAttr) { switch iops := d.Inode.InodeOperations.(type) { @@ -392,7 +392,7 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string return err } if key != nil { - i.session().overrides.remove(*key) + i.session().overrides.remove(ctx, *key) } i.touchModificationAndStatusChangeTime(ctx, dir) diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go index b5efc86f2..7cf3522ff 100644 --- a/pkg/sentry/fs/gofer/session.go +++ b/pkg/sentry/fs/gofer/session.go @@ -89,10 +89,10 @@ func (e *overrideMaps) addPipe(key device.MultiDeviceKey, d *fs.Dirent, inode *f // remove deletes the key from the maps. // // Precondition: maps must have been locked with 'lock'. -func (e *overrideMaps) remove(key device.MultiDeviceKey) { +func (e *overrideMaps) remove(ctx context.Context, key device.MultiDeviceKey) { endpoint := e.keyMap[key] delete(e.keyMap, key) - endpoint.dirent.DecRef() + endpoint.dirent.DecRef(ctx) } // lock blocks other addition and removal operations from happening while @@ -197,7 +197,7 @@ type session struct { } // Destroy tears down the session. -func (s *session) Destroy() { +func (s *session) Destroy(ctx context.Context) { s.client.Close() } @@ -329,7 +329,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF s.client, err = p9.NewClient(conn, s.msize, s.version) if err != nil { // Drop our reference on the session, it needs to be torn down. - s.DecRef() + s.DecRef(ctx) return nil, err } @@ -340,7 +340,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF ctx.UninterruptibleSleepFinish(false) if err != nil { // Same as above. - s.DecRef() + s.DecRef(ctx) return nil, err } @@ -348,7 +348,7 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF if err != nil { s.attach.close(ctx) // Same as above, but after we execute the Close request. - s.DecRef() + s.DecRef(ctx) return nil, err } @@ -393,13 +393,13 @@ func (s *session) fillKeyMap(ctx context.Context) error { // fillPathMap populates paths for overrides from dirents in direntMap // before save. -func (s *session) fillPathMap() error { +func (s *session) fillPathMap(ctx context.Context) error { unlock := s.overrides.lock() defer unlock() for _, endpoint := range s.overrides.keyMap { mountRoot := endpoint.dirent.MountRoot() - defer mountRoot.DecRef() + defer mountRoot.DecRef(ctx) dirPath, _ := endpoint.dirent.FullName(mountRoot) if dirPath == "" { return fmt.Errorf("error getting path from dirent") diff --git a/pkg/sentry/fs/gofer/session_state.go b/pkg/sentry/fs/gofer/session_state.go index 2d398b753..48b423dd8 100644 --- a/pkg/sentry/fs/gofer/session_state.go +++ b/pkg/sentry/fs/gofer/session_state.go @@ -26,7 +26,8 @@ import ( // beforeSave is invoked by stateify. func (s *session) beforeSave() { if s.overrides != nil { - if err := s.fillPathMap(); err != nil { + ctx := &dummyClockContext{context.Background()} + if err := s.fillPathMap(ctx); err != nil { panic("failed to save paths to override map before saving" + err.Error()) } } diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go index 40f2c1cad..8a1c69ac2 100644 --- a/pkg/sentry/fs/gofer/socket.go +++ b/pkg/sentry/fs/gofer/socket.go @@ -134,14 +134,14 @@ func (e *endpoint) UnidirectionalConnect(ctx context.Context) (transport.Connect // We don't need the receiver. c.CloseRecv() - c.Release() + c.Release(ctx) return c, nil } // Release implements transport.BoundEndpoint.Release. -func (e *endpoint) Release() { - e.inode.DecRef() +func (e *endpoint) Release(ctx context.Context) { + e.inode.DecRef(ctx) } // Passcred implements transport.BoundEndpoint.Passcred. diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go index 39299b7e4..0d8d36afa 100644 --- a/pkg/sentry/fs/host/control.go +++ b/pkg/sentry/fs/host/control.go @@ -57,7 +57,7 @@ func (c *scmRights) Clone() transport.RightsControlMessage { } // Release implements transport.RightsControlMessage.Release. -func (c *scmRights) Release() { +func (c *scmRights) Release(ctx context.Context) { for _, fd := range c.fds { syscall.Close(fd) } diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go index 3e48b8b2c..86d1a87f0 100644 --- a/pkg/sentry/fs/host/file.go +++ b/pkg/sentry/fs/host/file.go @@ -110,7 +110,7 @@ func newFileFromDonatedFD(ctx context.Context, donated int, saveable, isTTY bool name := fmt.Sprintf("host:[%d]", inode.StableAttr.InodeID) dirent := fs.NewDirent(ctx, inode, name) - defer dirent.DecRef() + defer dirent.DecRef(ctx) if isTTY { return newTTYFile(ctx, dirent, flags, iops), nil @@ -169,7 +169,7 @@ func (f *fileOperations) Readiness(mask waiter.EventMask) waiter.EventMask { func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) { root := fs.RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } dirCtx := &fs.DirCtx{ Serializer: serializer, diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go index c507f57eb..41a23b5da 100644 --- a/pkg/sentry/fs/host/inode_test.go +++ b/pkg/sentry/fs/host/inode_test.go @@ -36,7 +36,7 @@ func TestCloseFD(t *testing.T) { if err != nil { t.Fatalf("Failed to create File: %v", err) } - file.DecRef() + file.DecRef(ctx) s := make([]byte, 10) if c, err := syscall.Read(p[0], s); c != 0 || err != nil { diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go index cfb089e43..a2f3d5918 100644 --- a/pkg/sentry/fs/host/socket.go +++ b/pkg/sentry/fs/host/socket.go @@ -194,7 +194,7 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error) } // Send implements transport.ConnectedEndpoint.Send. -func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) { +func (c *ConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) { c.mu.RLock() defer c.mu.RUnlock() @@ -271,7 +271,7 @@ func (c *ConnectedEndpoint) EventUpdate() { } // Recv implements transport.Receiver.Recv. -func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { +func (c *ConnectedEndpoint) Recv(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { c.mu.RLock() defer c.mu.RUnlock() @@ -318,7 +318,7 @@ func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek } // close releases all resources related to the endpoint. -func (c *ConnectedEndpoint) close() { +func (c *ConnectedEndpoint) close(context.Context) { fdnotifier.RemoveFD(int32(c.file.FD())) c.file.Close() c.file = nil @@ -374,8 +374,8 @@ func (c *ConnectedEndpoint) RecvMaxQueueSize() int64 { } // Release implements transport.ConnectedEndpoint.Release and transport.Receiver.Release. -func (c *ConnectedEndpoint) Release() { - c.ref.DecRefWithDestructor(c.close) +func (c *ConnectedEndpoint) Release(ctx context.Context) { + c.ref.DecRefWithDestructor(ctx, c.close) } // CloseUnread implements transport.ConnectedEndpoint.CloseUnread. diff --git a/pkg/sentry/fs/host/socket_test.go b/pkg/sentry/fs/host/socket_test.go index affdbcacb..9d58ea448 100644 --- a/pkg/sentry/fs/host/socket_test.go +++ b/pkg/sentry/fs/host/socket_test.go @@ -67,11 +67,12 @@ func TestSocketIsBlocking(t *testing.T) { if fl&syscall.O_NONBLOCK == syscall.O_NONBLOCK { t.Fatalf("Expected socket %v to be blocking", pair[1]) } - sock, err := newSocket(contexttest.Context(t), pair[0], false) + ctx := contexttest.Context(t) + sock, err := newSocket(ctx, pair[0], false) if err != nil { t.Fatalf("newSocket(%v) failed => %v", pair[0], err) } - defer sock.DecRef() + defer sock.DecRef(ctx) // Test that the socket now is non-blocking. if fl, err = getFl(pair[0]); err != nil { t.Fatalf("getFl: fcntl(%v, GETFL) => %v", pair[0], err) @@ -93,11 +94,12 @@ func TestSocketWritev(t *testing.T) { if err != nil { t.Fatalf("host socket creation failed: %v", err) } - socket, err := newSocket(contexttest.Context(t), pair[0], false) + ctx := contexttest.Context(t) + socket, err := newSocket(ctx, pair[0], false) if err != nil { t.Fatalf("newSocket(%v) => %v", pair[0], err) } - defer socket.DecRef() + defer socket.DecRef(ctx) buf := []byte("hello world\n") n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(buf)) if err != nil { @@ -115,11 +117,12 @@ func TestSocketWritevLen0(t *testing.T) { if err != nil { t.Fatalf("host socket creation failed: %v", err) } - socket, err := newSocket(contexttest.Context(t), pair[0], false) + ctx := contexttest.Context(t) + socket, err := newSocket(ctx, pair[0], false) if err != nil { t.Fatalf("newSocket(%v) => %v", pair[0], err) } - defer socket.DecRef() + defer socket.DecRef(ctx) n, err := socket.Writev(contexttest.Context(t), usermem.BytesIOSequence(nil)) if err != nil { t.Fatalf("socket writev failed: %v", err) @@ -136,11 +139,12 @@ func TestSocketSendMsgLen0(t *testing.T) { if err != nil { t.Fatalf("host socket creation failed: %v", err) } - sfile, err := newSocket(contexttest.Context(t), pair[0], false) + ctx := contexttest.Context(t) + sfile, err := newSocket(ctx, pair[0], false) if err != nil { t.Fatalf("newSocket(%v) => %v", pair[0], err) } - defer sfile.DecRef() + defer sfile.DecRef(ctx) s := sfile.FileOperations.(socket.Socket) n, terr := s.SendMsg(nil, usermem.BytesIOSequence(nil), []byte{}, 0, false, ktime.Time{}, socket.ControlMessages{}) @@ -158,18 +162,19 @@ func TestListen(t *testing.T) { if err != nil { t.Fatalf("syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) => %v", err) } - sfile1, err := newSocket(contexttest.Context(t), pair[0], false) + ctx := contexttest.Context(t) + sfile1, err := newSocket(ctx, pair[0], false) if err != nil { t.Fatalf("newSocket(%v) => %v", pair[0], err) } - defer sfile1.DecRef() + defer sfile1.DecRef(ctx) socket1 := sfile1.FileOperations.(socket.Socket) - sfile2, err := newSocket(contexttest.Context(t), pair[1], false) + sfile2, err := newSocket(ctx, pair[1], false) if err != nil { t.Fatalf("newSocket(%v) => %v", pair[1], err) } - defer sfile2.DecRef() + defer sfile2.DecRef(ctx) socket2 := sfile2.FileOperations.(socket.Socket) // Socketpairs can not be listened to. @@ -185,11 +190,11 @@ func TestListen(t *testing.T) { if err != nil { t.Fatalf("syscall.Socket(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) => %v", err) } - sfile3, err := newSocket(contexttest.Context(t), sock, false) + sfile3, err := newSocket(ctx, sock, false) if err != nil { t.Fatalf("newSocket(%v) => %v", sock, err) } - defer sfile3.DecRef() + defer sfile3.DecRef(ctx) socket3 := sfile3.FileOperations.(socket.Socket) // This socket is not bound so we can't listen on it. @@ -237,9 +242,10 @@ func TestRelease(t *testing.T) { } c := &ConnectedEndpoint{queue: &waiter.Queue{}, file: fd.New(f)} want := &ConnectedEndpoint{queue: c.queue} - want.ref.DecRef() + ctx := contexttest.Context(t) + want.ref.DecRef(ctx) fdnotifier.AddFD(int32(c.file.FD()), nil) - c.Release() + c.Release(ctx) if !reflect.DeepEqual(c, want) { t.Errorf("got = %#v, want = %#v", c, want) } diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 82a02fcb2..b5229098c 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -113,12 +113,12 @@ func (t *TTYFileOperations) Write(ctx context.Context, file *fs.File, src userme } // Release implements fs.FileOperations.Release. -func (t *TTYFileOperations) Release() { +func (t *TTYFileOperations) Release(ctx context.Context) { t.mu.Lock() t.fgProcessGroup = nil t.mu.Unlock() - t.fileOperations.Release() + t.fileOperations.Release(ctx) } // Ioctl implements fs.FileOperations.Ioctl. diff --git a/pkg/sentry/fs/host/wait_test.go b/pkg/sentry/fs/host/wait_test.go index ce397a5e3..c143f4ce2 100644 --- a/pkg/sentry/fs/host/wait_test.go +++ b/pkg/sentry/fs/host/wait_test.go @@ -39,7 +39,7 @@ func TestWait(t *testing.T) { t.Fatalf("NewFile failed: %v", err) } - defer file.DecRef() + defer file.DecRef(ctx) r := file.Readiness(waiter.EventIn) if r != 0 { diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index a34fbc946..b79cd9877 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -96,13 +96,12 @@ func NewInode(ctx context.Context, iops InodeOperations, msrc *MountSource, satt } // DecRef drops a reference on the Inode. -func (i *Inode) DecRef() { - i.DecRefWithDestructor(i.destroy) +func (i *Inode) DecRef(ctx context.Context) { + i.DecRefWithDestructor(ctx, i.destroy) } // destroy releases the Inode and releases the msrc reference taken. -func (i *Inode) destroy() { - ctx := context.Background() +func (i *Inode) destroy(ctx context.Context) { if err := i.WriteOut(ctx); err != nil { // FIXME(b/65209558): Mark as warning again once noatime is // properly supported. @@ -122,12 +121,12 @@ func (i *Inode) destroy() { i.Watches.targetDestroyed() if i.overlay != nil { - i.overlay.release() + i.overlay.release(ctx) } else { i.InodeOperations.Release(ctx) } - i.MountSource.DecRef() + i.MountSource.DecRef(ctx) } // Mappable calls i.InodeOperations.Mappable. diff --git a/pkg/sentry/fs/inode_inotify.go b/pkg/sentry/fs/inode_inotify.go index efd3c962b..9911a00c2 100644 --- a/pkg/sentry/fs/inode_inotify.go +++ b/pkg/sentry/fs/inode_inotify.go @@ -17,6 +17,7 @@ package fs import ( "fmt" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" ) @@ -136,11 +137,11 @@ func (w *Watches) Notify(name string, events, cookie uint32) { } // Unpin unpins dirent from all watches in this set. -func (w *Watches) Unpin(d *Dirent) { +func (w *Watches) Unpin(ctx context.Context, d *Dirent) { w.mu.RLock() defer w.mu.RUnlock() for _, watch := range w.ws { - watch.Unpin(d) + watch.Unpin(ctx, d) } } diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go index 537c8d257..dc2e353d9 100644 --- a/pkg/sentry/fs/inode_overlay.go +++ b/pkg/sentry/fs/inode_overlay.go @@ -85,7 +85,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name upperInode = child.Inode upperInode.IncRef() } - child.DecRef() + child.DecRef(ctx) } // Are we done? @@ -108,7 +108,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name entry, err := newOverlayEntry(ctx, upperInode, nil, false) if err != nil { // Don't leak resources. - upperInode.DecRef() + upperInode.DecRef(ctx) parent.copyMu.RUnlock() return nil, false, err } @@ -129,7 +129,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name if err != nil && err != syserror.ENOENT { // Don't leak resources. if upperInode != nil { - upperInode.DecRef() + upperInode.DecRef(ctx) } parent.copyMu.RUnlock() return nil, false, err @@ -152,7 +152,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name } } } - child.DecRef() + child.DecRef(ctx) } } @@ -183,7 +183,7 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name // unnecessary because we don't need to copy-up and we will always // operate (e.g. read/write) on the upper Inode. if !IsDir(upperInode.StableAttr) { - lowerInode.DecRef() + lowerInode.DecRef(ctx) lowerInode = nil } } @@ -194,10 +194,10 @@ func overlayLookup(ctx context.Context, parent *overlayEntry, inode *Inode, name // Well, not quite, we failed at the last moment, how depressing. // Be sure not to leak resources. if upperInode != nil { - upperInode.DecRef() + upperInode.DecRef(ctx) } if lowerInode != nil { - lowerInode.DecRef() + lowerInode.DecRef(ctx) } parent.copyMu.RUnlock() return nil, false, err @@ -248,7 +248,7 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st // user) will clobber the real path for the underlying Inode. upperFile.Dirent.Inode.IncRef() upperDirent := NewTransientDirent(upperFile.Dirent.Inode) - upperFile.Dirent.DecRef() + upperFile.Dirent.DecRef(ctx) upperFile.Dirent = upperDirent // Create the overlay inode and dirent. We need this to construct the @@ -259,7 +259,7 @@ func overlayCreate(ctx context.Context, o *overlayEntry, parent *Dirent, name st // The overlay file created below with NewFile will take a reference on // the overlayDirent, and it should be the only thing holding a // reference at the time of creation, so we must drop this reference. - defer overlayDirent.DecRef() + defer overlayDirent.DecRef(ctx) // Create a new overlay file that wraps the upper file. flags.Pread = upperFile.Flags().Pread @@ -399,7 +399,7 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena if !replaced.IsNegative() && IsDir(replaced.Inode.StableAttr) { children, err := readdirOne(ctx, replaced) if err != nil { - replaced.DecRef() + replaced.DecRef(ctx) return err } @@ -407,12 +407,12 @@ func overlayRename(ctx context.Context, o *overlayEntry, oldParent *Dirent, rena // included among the returned children, so we don't // need to bother checking for them. if len(children) > 0 { - replaced.DecRef() + replaced.DecRef(ctx) return syserror.ENOTEMPTY } } - replaced.DecRef() + replaced.DecRef(ctx) } } @@ -455,12 +455,12 @@ func overlayBind(ctx context.Context, o *overlayEntry, parent *Dirent, name stri // Grab the inode and drop the dirent, we don't need it. inode := d.Inode inode.IncRef() - d.DecRef() + d.DecRef(ctx) // Create a new overlay entry and dirent for the socket. entry, err := newOverlayEntry(ctx, inode, nil, false) if err != nil { - inode.DecRef() + inode.DecRef(ctx) return nil, err } // Use the parent's MountSource, since that corresponds to the overlay, @@ -672,7 +672,7 @@ func overlayGetlink(ctx context.Context, o *overlayEntry) (*Dirent, error) { // ground and claim that jumping around the filesystem like this // is not supported. name, _ := dirent.FullName(nil) - dirent.DecRef() + dirent.DecRef(ctx) // Claim that the path is not accessible. err = syserror.EACCES diff --git a/pkg/sentry/fs/inode_overlay_test.go b/pkg/sentry/fs/inode_overlay_test.go index 389c219d6..aa9851b26 100644 --- a/pkg/sentry/fs/inode_overlay_test.go +++ b/pkg/sentry/fs/inode_overlay_test.go @@ -316,7 +316,7 @@ func TestCacheFlush(t *testing.T) { t.Fatalf("NewMountNamespace failed: %v", err) } root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) ctx = &rootContext{ Context: ctx, @@ -345,7 +345,7 @@ func TestCacheFlush(t *testing.T) { } // Drop the file reference. - file.DecRef() + file.DecRef(ctx) // Dirent should have 2 refs left. if got, want := dirent.ReadRefs(), 2; int(got) != want { @@ -361,7 +361,7 @@ func TestCacheFlush(t *testing.T) { } // Drop our ref. - dirent.DecRef() + dirent.DecRef(ctx) // We should be back to zero refs. if got, want := dirent.ReadRefs(), 0; int(got) != want { @@ -398,7 +398,7 @@ func (d *dir) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags if err != nil { return nil, err } - defer file.DecRef() + defer file.DecRef(ctx) // Wrap the file's FileOperations in a dirFile. fops := &dirFile{ FileOperations: file.FileOperations, diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index e3a715c1f..c5c07d564 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -80,7 +80,7 @@ func NewInotify(ctx context.Context) *Inotify { // Release implements FileOperations.Release. Release removes all watches and // frees all resources for an inotify instance. -func (i *Inotify) Release() { +func (i *Inotify) Release(ctx context.Context) { // We need to hold i.mu to avoid a race with concurrent calls to // Inotify.targetDestroyed from Watches. There's no risk of Watches // accessing this Inotify after the destructor ends, because we remove all @@ -93,7 +93,7 @@ func (i *Inotify) Release() { // the owner's destructor. w.target.Watches.Remove(w.ID()) // Don't leak any references to the target, held by pins in the watch. - w.destroy() + w.destroy(ctx) } } @@ -321,7 +321,7 @@ func (i *Inotify) AddWatch(target *Dirent, mask uint32) int32 { // // RmWatch looks up an inotify watch for the given 'wd' and configures the // target dirent to stop sending events to this inotify instance. -func (i *Inotify) RmWatch(wd int32) error { +func (i *Inotify) RmWatch(ctx context.Context, wd int32) error { i.mu.Lock() // Find the watch we were asked to removed. @@ -346,7 +346,7 @@ func (i *Inotify) RmWatch(wd int32) error { i.queueEvent(newEvent(watch.wd, "", linux.IN_IGNORED, 0)) // Remove all pins. - watch.destroy() + watch.destroy(ctx) return nil } diff --git a/pkg/sentry/fs/inotify_watch.go b/pkg/sentry/fs/inotify_watch.go index 900cba3ca..605423d22 100644 --- a/pkg/sentry/fs/inotify_watch.go +++ b/pkg/sentry/fs/inotify_watch.go @@ -18,6 +18,7 @@ import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" ) @@ -105,12 +106,12 @@ func (w *Watch) Pin(d *Dirent) { // Unpin drops any extra refs held on dirent due to a previous Pin // call. Calling Unpin multiple times for the same dirent, or on a dirent // without a corresponding Pin call is a no-op. -func (w *Watch) Unpin(d *Dirent) { +func (w *Watch) Unpin(ctx context.Context, d *Dirent) { w.mu.Lock() defer w.mu.Unlock() if w.pins[d] { delete(w.pins, d) - d.DecRef() + d.DecRef(ctx) } } @@ -125,11 +126,11 @@ func (w *Watch) TargetDestroyed() { // this watch. Destroy does not cause any new events to be generated. The caller // is responsible for ensuring there are no outstanding references to this // watch. -func (w *Watch) destroy() { +func (w *Watch) destroy(ctx context.Context) { w.mu.Lock() defer w.mu.Unlock() for d := range w.pins { - d.DecRef() + d.DecRef(ctx) } w.pins = nil } diff --git a/pkg/sentry/fs/mount.go b/pkg/sentry/fs/mount.go index 37bae6810..ee69b10e8 100644 --- a/pkg/sentry/fs/mount.go +++ b/pkg/sentry/fs/mount.go @@ -51,7 +51,7 @@ type MountSourceOperations interface { DirentOperations // Destroy destroys the MountSource. - Destroy() + Destroy(ctx context.Context) // Below are MountSourceOperations that do not conform to Linux. @@ -165,16 +165,16 @@ func (msrc *MountSource) DecDirentRefs() { } } -func (msrc *MountSource) destroy() { +func (msrc *MountSource) destroy(ctx context.Context) { if c := msrc.DirentRefs(); c != 0 { panic(fmt.Sprintf("MountSource with non-zero direntRefs is being destroyed: %d", c)) } - msrc.MountSourceOperations.Destroy() + msrc.MountSourceOperations.Destroy(ctx) } // DecRef drops a reference on the MountSource. -func (msrc *MountSource) DecRef() { - msrc.DecRefWithDestructor(msrc.destroy) +func (msrc *MountSource) DecRef(ctx context.Context) { + msrc.DecRefWithDestructor(ctx, msrc.destroy) } // FlushDirentRefs drops all references held by the MountSource on Dirents. @@ -264,7 +264,7 @@ func (*SimpleMountSourceOperations) ResetInodeMappings() {} func (*SimpleMountSourceOperations) SaveInodeMapping(*Inode, string) {} // Destroy implements MountSourceOperations.Destroy. -func (*SimpleMountSourceOperations) Destroy() {} +func (*SimpleMountSourceOperations) Destroy(context.Context) {} // Info defines attributes of a filesystem. type Info struct { diff --git a/pkg/sentry/fs/mount_overlay.go b/pkg/sentry/fs/mount_overlay.go index 78e35b1e6..7badc75d6 100644 --- a/pkg/sentry/fs/mount_overlay.go +++ b/pkg/sentry/fs/mount_overlay.go @@ -115,9 +115,9 @@ func (o *overlayMountSourceOperations) SaveInodeMapping(inode *Inode, path strin } // Destroy drops references on the upper and lower MountSource. -func (o *overlayMountSourceOperations) Destroy() { - o.upper.DecRef() - o.lower.DecRef() +func (o *overlayMountSourceOperations) Destroy(ctx context.Context) { + o.upper.DecRef(ctx) + o.lower.DecRef(ctx) } // type overlayFilesystem is the filesystem for overlay mounts. diff --git a/pkg/sentry/fs/mount_test.go b/pkg/sentry/fs/mount_test.go index a3d10770b..6c296f5d0 100644 --- a/pkg/sentry/fs/mount_test.go +++ b/pkg/sentry/fs/mount_test.go @@ -18,6 +18,7 @@ import ( "fmt" "testing" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/contexttest" ) @@ -32,13 +33,13 @@ func cacheReallyContains(cache *DirentCache, d *Dirent) bool { return false } -func mountPathsAre(root *Dirent, got []*Mount, want ...string) error { +func mountPathsAre(ctx context.Context, root *Dirent, got []*Mount, want ...string) error { gotPaths := make(map[string]struct{}, len(got)) gotStr := make([]string, len(got)) for i, g := range got { if groot := g.Root(); groot != nil { name, _ := groot.FullName(root) - groot.DecRef() + groot.DecRef(ctx) gotStr[i] = name gotPaths[name] = struct{}{} } @@ -69,7 +70,7 @@ func TestMountSourceOnlyCachedOnce(t *testing.T) { t.Fatalf("NewMountNamespace failed: %v", err) } rootDirent := mm.Root() - defer rootDirent.DecRef() + defer rootDirent.DecRef(ctx) // Get a child of the root which we will mount over. Note that the // MockInodeOperations causes Walk to always succeed. @@ -125,7 +126,7 @@ func TestAllMountsUnder(t *testing.T) { t.Fatalf("NewMountNamespace failed: %v", err) } rootDirent := mm.Root() - defer rootDirent.DecRef() + defer rootDirent.DecRef(ctx) // Add mounts at the following paths: paths := []string{ @@ -150,14 +151,14 @@ func TestAllMountsUnder(t *testing.T) { if err := mm.Mount(ctx, d, submountInode); err != nil { t.Fatalf("could not mount at %q: %v", p, err) } - d.DecRef() + d.DecRef(ctx) } // mm root should contain all submounts (and does not include the root mount). rootMnt := mm.FindMount(rootDirent) submounts := mm.AllMountsUnder(rootMnt) allPaths := append(paths, "/") - if err := mountPathsAre(rootDirent, submounts, allPaths...); err != nil { + if err := mountPathsAre(ctx, rootDirent, submounts, allPaths...); err != nil { t.Error(err) } @@ -181,9 +182,9 @@ func TestAllMountsUnder(t *testing.T) { if err != nil { t.Fatalf("could not find path %q in mount manager: %v", "/foo", err) } - defer d.DecRef() + defer d.DecRef(ctx) submounts = mm.AllMountsUnder(mm.FindMount(d)) - if err := mountPathsAre(rootDirent, submounts, "/foo", "/foo/bar", "/foo/qux", "/foo/bar/baz"); err != nil { + if err := mountPathsAre(ctx, rootDirent, submounts, "/foo", "/foo/bar", "/foo/qux", "/foo/bar/baz"); err != nil { t.Error(err) } @@ -193,9 +194,9 @@ func TestAllMountsUnder(t *testing.T) { if err != nil { t.Fatalf("could not find path %q in mount manager: %v", "/waldo", err) } - defer waldo.DecRef() + defer waldo.DecRef(ctx) submounts = mm.AllMountsUnder(mm.FindMount(waldo)) - if err := mountPathsAre(rootDirent, submounts, "/waldo"); err != nil { + if err := mountPathsAre(ctx, rootDirent, submounts, "/waldo"); err != nil { t.Error(err) } } @@ -212,7 +213,7 @@ func TestUnmount(t *testing.T) { t.Fatalf("NewMountNamespace failed: %v", err) } rootDirent := mm.Root() - defer rootDirent.DecRef() + defer rootDirent.DecRef(ctx) // Add mounts at the following paths: paths := []string{ @@ -240,7 +241,7 @@ func TestUnmount(t *testing.T) { if err := mm.Mount(ctx, d, submountInode); err != nil { t.Fatalf("could not mount at %q: %v", p, err) } - d.DecRef() + d.DecRef(ctx) } allPaths := make([]string, len(paths)+1) @@ -259,13 +260,13 @@ func TestUnmount(t *testing.T) { if err := mm.Unmount(ctx, d, false); err != nil { t.Fatalf("could not unmount at %q: %v", p, err) } - d.DecRef() + d.DecRef(ctx) // Remove the path that has been unmounted and the check that the remaining // mounts are still there. allPaths = allPaths[:len(allPaths)-1] submounts := mm.AllMountsUnder(rootMnt) - if err := mountPathsAre(rootDirent, submounts, allPaths...); err != nil { + if err := mountPathsAre(ctx, rootDirent, submounts, allPaths...); err != nil { t.Error(err) } } diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index 3f2bd0e87..d741c4339 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -234,7 +234,7 @@ func (mns *MountNamespace) flushMountSourceRefsLocked() { // After destroy is called, the MountNamespace may continue to be referenced (for // example via /proc/mounts), but should free all resources and shouldn't have // Find* methods called. -func (mns *MountNamespace) destroy() { +func (mns *MountNamespace) destroy(ctx context.Context) { mns.mu.Lock() defer mns.mu.Unlock() @@ -247,13 +247,13 @@ func (mns *MountNamespace) destroy() { for _, mp := range mns.mounts { // Drop the mount reference on all mounted dirents. for ; mp != nil; mp = mp.previous { - mp.root.DecRef() + mp.root.DecRef(ctx) } } mns.mounts = nil // Drop reference on the root. - mns.root.DecRef() + mns.root.DecRef(ctx) // Ensure that root cannot be accessed via this MountNamespace any // more. @@ -265,8 +265,8 @@ func (mns *MountNamespace) destroy() { } // DecRef implements RefCounter.DecRef with destructor mns.destroy. -func (mns *MountNamespace) DecRef() { - mns.DecRefWithDestructor(mns.destroy) +func (mns *MountNamespace) DecRef(ctx context.Context) { + mns.DecRefWithDestructor(ctx, mns.destroy) } // withMountLocked prevents further walks to `node`, because `node` is about to @@ -312,7 +312,7 @@ func (mns *MountNamespace) Mount(ctx context.Context, mountPoint *Dirent, inode if err != nil { return err } - defer replacement.DecRef() + defer replacement.DecRef(ctx) // Set the mount's root dirent and id. parentMnt := mns.findMountLocked(mountPoint) @@ -394,7 +394,7 @@ func (mns *MountNamespace) Unmount(ctx context.Context, node *Dirent, detachOnly panic(fmt.Sprintf("Last mount in the chain must be a undo mount: %+v", prev)) } // Drop mount reference taken at the end of MountNamespace.Mount. - prev.root.DecRef() + prev.root.DecRef(ctx) } else { mns.mounts[prev.root] = prev } @@ -496,11 +496,11 @@ func (mns *MountNamespace) FindLink(ctx context.Context, root, wd *Dirent, path // non-directory root is hopeless. if current != root { if !IsDir(current.Inode.StableAttr) { - current.DecRef() // Drop reference from above. + current.DecRef(ctx) // Drop reference from above. return nil, syserror.ENOTDIR } if err := current.Inode.CheckPermission(ctx, PermMask{Execute: true}); err != nil { - current.DecRef() // Drop reference from above. + current.DecRef(ctx) // Drop reference from above. return nil, err } } @@ -511,12 +511,12 @@ func (mns *MountNamespace) FindLink(ctx context.Context, root, wd *Dirent, path // Allow failed walks to cache the dirent, because no // children will acquire a reference at the end. current.maybeExtendReference() - current.DecRef() + current.DecRef(ctx) return nil, err } // Drop old reference. - current.DecRef() + current.DecRef(ctx) if remainder != "" { // Ensure it's resolved, unless it's the last level. @@ -570,11 +570,11 @@ func (mns *MountNamespace) resolve(ctx context.Context, root, node *Dirent, rema case nil: // Make sure we didn't exhaust the traversal budget. if *remainingTraversals == 0 { - target.DecRef() + target.DecRef(ctx) return nil, syscall.ELOOP } - node.DecRef() // Drop the original reference. + node.DecRef(ctx) // Drop the original reference. return target, nil case syscall.ENOLINK: @@ -582,7 +582,7 @@ func (mns *MountNamespace) resolve(ctx context.Context, root, node *Dirent, rema return node, nil case ErrResolveViaReadlink: - defer node.DecRef() // See above. + defer node.DecRef(ctx) // See above. // First, check if we should traverse. if *remainingTraversals == 0 { @@ -608,7 +608,7 @@ func (mns *MountNamespace) resolve(ctx context.Context, root, node *Dirent, rema return d, err default: - node.DecRef() // Drop for err; see above. + node.DecRef(ctx) // Drop for err; see above. // Propagate the error. return nil, err diff --git a/pkg/sentry/fs/mounts_test.go b/pkg/sentry/fs/mounts_test.go index a69b41468..975d6cbc9 100644 --- a/pkg/sentry/fs/mounts_test.go +++ b/pkg/sentry/fs/mounts_test.go @@ -51,7 +51,7 @@ func TestFindLink(t *testing.T) { } root := mm.Root() - defer root.DecRef() + defer root.DecRef(ctx) foo, err := root.Walk(ctx, root, "foo") if err != nil { t.Fatalf("Error walking to foo: %v", err) diff --git a/pkg/sentry/fs/overlay.go b/pkg/sentry/fs/overlay.go index a8ae7d81d..35013a21b 100644 --- a/pkg/sentry/fs/overlay.go +++ b/pkg/sentry/fs/overlay.go @@ -107,7 +107,7 @@ func NewOverlayRoot(ctx context.Context, upper *Inode, lower *Inode, flags Mount msrc := newOverlayMountSource(ctx, upper.MountSource, lower.MountSource, flags) overlay, err := newOverlayEntry(ctx, upper, lower, true) if err != nil { - msrc.DecRef() + msrc.DecRef(ctx) return nil, err } @@ -130,7 +130,7 @@ func NewOverlayRootFile(ctx context.Context, upperMS *MountSource, lower *Inode, msrc := newOverlayMountSource(ctx, upperMS, lower.MountSource, flags) overlay, err := newOverlayEntry(ctx, nil, lower, true) if err != nil { - msrc.DecRef() + msrc.DecRef(ctx) return nil, err } return newOverlayInode(ctx, overlay, msrc), nil @@ -230,16 +230,16 @@ func newOverlayEntry(ctx context.Context, upper *Inode, lower *Inode, lowerExist }, nil } -func (o *overlayEntry) release() { +func (o *overlayEntry) release(ctx context.Context) { // We drop a reference on upper and lower file system Inodes // rather than releasing them, because in-memory filesystems // may hold an extra reference to these Inodes so that they // stay in memory. if o.upper != nil { - o.upper.DecRef() + o.upper.DecRef(ctx) } if o.lower != nil { - o.lower.DecRef() + o.lower.DecRef(ctx) } } diff --git a/pkg/sentry/fs/proc/fds.go b/pkg/sentry/fs/proc/fds.go index 35972e23c..45523adf8 100644 --- a/pkg/sentry/fs/proc/fds.go +++ b/pkg/sentry/fs/proc/fds.go @@ -56,11 +56,11 @@ func walkDescriptors(t *kernel.Task, p string, toInode func(*fs.File, kernel.FDF // readDescriptors reads fds in the task starting at offset, and calls the // toDentAttr callback for each to get a DentAttr, which it then emits. This is // a helper for implementing fs.InodeOperations.Readdir. -func readDescriptors(t *kernel.Task, c *fs.DirCtx, offset int64, toDentAttr func(int) fs.DentAttr) (int64, error) { +func readDescriptors(ctx context.Context, t *kernel.Task, c *fs.DirCtx, offset int64, toDentAttr func(int) fs.DentAttr) (int64, error) { var fds []int32 t.WithMuLocked(func(t *kernel.Task) { if fdTable := t.FDTable(); fdTable != nil { - fds = fdTable.GetFDs() + fds = fdTable.GetFDs(ctx) } }) @@ -116,7 +116,7 @@ func (f *fd) GetFile(context.Context, *fs.Dirent, fs.FileFlags) (*fs.File, error func (f *fd) Readlink(ctx context.Context, _ *fs.Inode) (string, error) { root := fs.RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } n, _ := f.file.Dirent.FullName(root) return n, nil @@ -135,13 +135,7 @@ func (f *fd) Truncate(context.Context, *fs.Inode, int64) error { func (f *fd) Release(ctx context.Context) { f.Symlink.Release(ctx) - f.file.DecRef() -} - -// Close releases the reference on the file. -func (f *fd) Close() error { - f.file.DecRef() - return nil + f.file.DecRef(ctx) } // fdDir is an InodeOperations for /proc/TID/fd. @@ -227,7 +221,7 @@ func (f *fdDirFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySer if f.isInfoFile { typ = fs.Symlink } - return readDescriptors(f.t, dirCtx, file.Offset(), func(fd int) fs.DentAttr { + return readDescriptors(ctx, f.t, dirCtx, file.Offset(), func(fd int) fs.DentAttr { return fs.GenericDentAttr(typ, device.ProcDevice) }) } @@ -261,7 +255,7 @@ func (fdid *fdInfoDir) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs // locks, and other data. For now we only have flags. // See https://www.kernel.org/doc/Documentation/filesystems/proc.txt flags := file.Flags().ToLinux() | fdFlags.ToLinuxFileFlags() - file.DecRef() + file.DecRef(ctx) contents := []byte(fmt.Sprintf("flags:\t0%o\n", flags)) return newStaticProcInode(ctx, dir.MountSource, contents) }) diff --git a/pkg/sentry/fs/proc/mounts.go b/pkg/sentry/fs/proc/mounts.go index 1fc9c703c..6a63c47b3 100644 --- a/pkg/sentry/fs/proc/mounts.go +++ b/pkg/sentry/fs/proc/mounts.go @@ -47,7 +47,7 @@ func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) { // The task has been destroyed. Nothing to show here. return } - defer rootDir.DecRef() + defer rootDir.DecRef(t) mnt := t.MountNamespace().FindMount(rootDir) if mnt == nil { @@ -64,7 +64,7 @@ func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) { continue // No longer valid. } mountPath, desc := mroot.FullName(rootDir) - mroot.DecRef() + mroot.DecRef(t) if !desc { // MountSources that are not descendants of the chroot jail are ignored. continue @@ -97,7 +97,7 @@ func (mif *mountInfoFile) ReadSeqFileData(ctx context.Context, handle seqfile.Se if mroot == nil { return // No longer valid. } - defer mroot.DecRef() + defer mroot.DecRef(ctx) // Format: // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue @@ -216,7 +216,7 @@ func (mf *mountsFile) ReadSeqFileData(ctx context.Context, handle seqfile.SeqHan if root == nil { return // No longer valid. } - defer root.DecRef() + defer root.DecRef(ctx) flags := root.Inode.MountSource.Flags opts := "rw" diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go index bd18177d4..83a43aa26 100644 --- a/pkg/sentry/fs/proc/net.go +++ b/pkg/sentry/fs/proc/net.go @@ -419,7 +419,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s } sfile := s.(*fs.File) if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { - s.DecRef() + s.DecRef(ctx) // Not a unix socket. continue } @@ -479,7 +479,7 @@ func (n *netUnix) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]s } fmt.Fprintf(&buf, "\n") - s.DecRef() + s.DecRef(ctx) } data := []seqfile.SeqData{ @@ -574,7 +574,7 @@ func commonReadSeqFileDataTCP(ctx context.Context, n seqfile.SeqHandle, k *kerne panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) } if family, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) { - s.DecRef() + s.DecRef(ctx) // Not tcp4 sockets. continue } @@ -664,7 +664,7 @@ func commonReadSeqFileDataTCP(ctx context.Context, n seqfile.SeqHandle, k *kerne fmt.Fprintf(&buf, "\n") - s.DecRef() + s.DecRef(ctx) } data := []seqfile.SeqData{ @@ -752,7 +752,7 @@ func (n *netUDP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) } if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM { - s.DecRef() + s.DecRef(ctx) // Not udp4 socket. continue } @@ -822,7 +822,7 @@ func (n *netUDP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se fmt.Fprintf(&buf, "\n") - s.DecRef() + s.DecRef(ctx) } data := []seqfile.SeqData{ diff --git a/pkg/sentry/fs/proc/proc.go b/pkg/sentry/fs/proc/proc.go index c659224a7..77e0e1d26 100644 --- a/pkg/sentry/fs/proc/proc.go +++ b/pkg/sentry/fs/proc/proc.go @@ -213,7 +213,7 @@ func (rpf *rootProcFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dent // Add dot and dotdot. root := fs.RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } dot, dotdot := file.Dirent.GetDotAttrs(root) names = append(names, ".", "..") diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 4bbe90198..9cf7f2a62 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -185,7 +185,7 @@ func (f *subtasksFile) Readdir(ctx context.Context, file *fs.File, ser fs.Dentry // Serialize "." and "..". root := fs.RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } dot, dotdot := file.Dirent.GetDotAttrs(root) if err := dirCtx.DirEmit(".", dot); err != nil { @@ -295,7 +295,7 @@ func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { if err != nil { return "", err } - defer exec.DecRef() + defer exec.DecRef(ctx) return exec.PathnameWithDeleted(ctx), nil } diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go index bfa304552..f4fcddecb 100644 --- a/pkg/sentry/fs/ramfs/dir.go +++ b/pkg/sentry/fs/ramfs/dir.go @@ -219,7 +219,7 @@ func (d *Dir) Remove(ctx context.Context, _ *fs.Inode, name string) error { } // Remove our reference on the inode. - inode.DecRef() + inode.DecRef(ctx) return nil } @@ -250,7 +250,7 @@ func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) err } // Remove our reference on the inode. - inode.DecRef() + inode.DecRef(ctx) return nil } @@ -326,7 +326,7 @@ func (d *Dir) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.F // Create the Dirent and corresponding file. created := fs.NewDirent(ctx, inode, name) - defer created.DecRef() + defer created.DecRef(ctx) return created.Inode.GetFile(ctx, created, flags) } @@ -412,11 +412,11 @@ func (*Dir) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, ol } // Release implements fs.InodeOperation.Release. -func (d *Dir) Release(_ context.Context) { +func (d *Dir) Release(ctx context.Context) { // Drop references on all children. d.mu.Lock() for _, i := range d.children { - i.DecRef() + i.DecRef(ctx) } d.mu.Unlock() } @@ -456,7 +456,7 @@ func (dfo *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirC func (dfo *dirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) { root := fs.RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } dirCtx := &fs.DirCtx{ Serializer: serializer, @@ -473,13 +473,13 @@ func hasChildren(ctx context.Context, inode *fs.Inode) (bool, error) { // dropped when that dirent is destroyed. inode.IncRef() d := fs.NewTransientDirent(inode) - defer d.DecRef() + defer d.DecRef(ctx) file, err := inode.GetFile(ctx, d, fs.FileFlags{Read: true}) if err != nil { return false, err } - defer file.DecRef() + defer file.DecRef(ctx) ser := &fs.CollectEntriesSerializer{} if err := file.Readdir(ctx, ser); err != nil { @@ -530,7 +530,7 @@ func Rename(ctx context.Context, oldParent fs.InodeOperations, oldName string, n if err != nil { return err } - inode.DecRef() + inode.DecRef(ctx) } // Be careful, we may have already grabbed this mutex above. diff --git a/pkg/sentry/fs/ramfs/tree_test.go b/pkg/sentry/fs/ramfs/tree_test.go index a6ed8b2c5..3e0d1e07e 100644 --- a/pkg/sentry/fs/ramfs/tree_test.go +++ b/pkg/sentry/fs/ramfs/tree_test.go @@ -67,7 +67,7 @@ func TestMakeDirectoryTree(t *testing.T) { continue } root := mm.Root() - defer mm.DecRef() + defer mm.DecRef(ctx) for _, p := range test.subdirs { maxTraversals := uint(0) diff --git a/pkg/sentry/fs/timerfd/timerfd.go b/pkg/sentry/fs/timerfd/timerfd.go index 88c344089..f362ca9b6 100644 --- a/pkg/sentry/fs/timerfd/timerfd.go +++ b/pkg/sentry/fs/timerfd/timerfd.go @@ -55,7 +55,7 @@ type TimerOperations struct { func NewFile(ctx context.Context, c ktime.Clock) *fs.File { dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[timerfd]") // Release the initial dirent reference after NewFile takes a reference. - defer dirent.DecRef() + defer dirent.DecRef(ctx) tops := &TimerOperations{} tops.timer = ktime.NewTimer(c, tops) // Timerfds reject writes, but the Write flag must be set in order to @@ -65,7 +65,7 @@ func NewFile(ctx context.Context, c ktime.Clock) *fs.File { } // Release implements fs.FileOperations.Release. -func (t *TimerOperations) Release() { +func (t *TimerOperations) Release(context.Context) { t.timer.Destroy() } diff --git a/pkg/sentry/fs/tmpfs/file_test.go b/pkg/sentry/fs/tmpfs/file_test.go index aaba35502..d4d613ea9 100644 --- a/pkg/sentry/fs/tmpfs/file_test.go +++ b/pkg/sentry/fs/tmpfs/file_test.go @@ -46,7 +46,7 @@ func newFile(ctx context.Context) *fs.File { func TestGrow(t *testing.T) { ctx := contexttest.Context(t) f := newFile(ctx) - defer f.DecRef() + defer f.DecRef(ctx) abuf := bytes.Repeat([]byte{'a'}, 68) n, err := f.Pwritev(ctx, usermem.BytesIOSequence(abuf), 0) diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go index 108654827..463f6189e 100644 --- a/pkg/sentry/fs/tty/dir.go +++ b/pkg/sentry/fs/tty/dir.go @@ -132,7 +132,7 @@ func (d *dirInodeOperations) Release(ctx context.Context) { d.mu.Lock() defer d.mu.Unlock() - d.master.DecRef() + d.master.DecRef(ctx) if len(d.slaves) != 0 { panic(fmt.Sprintf("devpts directory still contains active terminals: %+v", d)) } @@ -263,7 +263,7 @@ func (d *dirInodeOperations) allocateTerminal(ctx context.Context) (*Terminal, e } // masterClose is called when the master end of t is closed. -func (d *dirInodeOperations) masterClose(t *Terminal) { +func (d *dirInodeOperations) masterClose(ctx context.Context, t *Terminal) { d.mu.Lock() defer d.mu.Unlock() @@ -277,7 +277,7 @@ func (d *dirInodeOperations) masterClose(t *Terminal) { panic(fmt.Sprintf("Terminal %+v doesn't exist in %+v?", t, d)) } - s.DecRef() + s.DecRef(ctx) delete(d.slaves, t.n) d.dentryMap.Remove(strconv.FormatUint(uint64(t.n), 10)) } @@ -322,7 +322,7 @@ func (df *dirFileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCt func (df *dirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) { root := fs.RootFromContext(ctx) if root != nil { - defer root.DecRef() + defer root.DecRef(ctx) } dirCtx := &fs.DirCtx{ Serializer: serializer, diff --git a/pkg/sentry/fs/tty/fs.go b/pkg/sentry/fs/tty/fs.go index 8fe05ebe5..2d4d44bf3 100644 --- a/pkg/sentry/fs/tty/fs.go +++ b/pkg/sentry/fs/tty/fs.go @@ -108,4 +108,4 @@ func (superOperations) ResetInodeMappings() {} func (superOperations) SaveInodeMapping(*fs.Inode, string) {} // Destroy implements MountSourceOperations.Destroy. -func (superOperations) Destroy() {} +func (superOperations) Destroy(context.Context) {} diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go index fe07fa929..e00746017 100644 --- a/pkg/sentry/fs/tty/master.go +++ b/pkg/sentry/fs/tty/master.go @@ -75,7 +75,7 @@ func newMasterInode(ctx context.Context, d *dirInodeOperations, owner fs.FileOwn } // Release implements fs.InodeOperations.Release. -func (mi *masterInodeOperations) Release(ctx context.Context) { +func (mi *masterInodeOperations) Release(context.Context) { } // Truncate implements fs.InodeOperations.Truncate. @@ -120,9 +120,9 @@ type masterFileOperations struct { var _ fs.FileOperations = (*masterFileOperations)(nil) // Release implements fs.FileOperations.Release. -func (mf *masterFileOperations) Release() { - mf.d.masterClose(mf.t) - mf.t.DecRef() +func (mf *masterFileOperations) Release(ctx context.Context) { + mf.d.masterClose(ctx, mf.t) + mf.t.DecRef(ctx) } // EventRegister implements waiter.Waitable.EventRegister. diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go index 9871f6fc6..7c7292687 100644 --- a/pkg/sentry/fs/tty/slave.go +++ b/pkg/sentry/fs/tty/slave.go @@ -71,7 +71,7 @@ func newSlaveInode(ctx context.Context, d *dirInodeOperations, t *Terminal, owne // Release implements fs.InodeOperations.Release. func (si *slaveInodeOperations) Release(ctx context.Context) { - si.t.DecRef() + si.t.DecRef(ctx) } // Truncate implements fs.InodeOperations.Truncate. @@ -106,7 +106,7 @@ type slaveFileOperations struct { var _ fs.FileOperations = (*slaveFileOperations)(nil) // Release implements fs.FileOperations.Release. -func (sf *slaveFileOperations) Release() { +func (sf *slaveFileOperations) Release(context.Context) { } // EventRegister implements waiter.Waitable.EventRegister. diff --git a/pkg/sentry/fs/user/path.go b/pkg/sentry/fs/user/path.go index 397e96045..2f5a43b84 100644 --- a/pkg/sentry/fs/user/path.go +++ b/pkg/sentry/fs/user/path.go @@ -82,7 +82,7 @@ func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name s // Caller has no root. Don't bother traversing anything. return "", syserror.ENOENT } - defer root.DecRef() + defer root.DecRef(ctx) for _, p := range paths { if !path.IsAbs(p) { // Relative paths aren't safe, no one should be using them. @@ -100,7 +100,7 @@ func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name s if err != nil { return "", err } - defer d.DecRef() + defer d.DecRef(ctx) // Check that it is a regular file. if !fs.IsRegular(d.Inode.StableAttr) { @@ -121,7 +121,7 @@ func resolve(ctx context.Context, mns *fs.MountNamespace, paths []string, name s func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, paths []string, name string) (string, error) { root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) for _, p := range paths { if !path.IsAbs(p) { // Relative paths aren't safe, no one should be using them. @@ -148,7 +148,7 @@ func resolveVFS2(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNam if err != nil { return "", err } - dentry.DecRef() + dentry.DecRef(ctx) return binPath, nil } diff --git a/pkg/sentry/fs/user/user.go b/pkg/sentry/fs/user/user.go index f4d525523..936fd3932 100644 --- a/pkg/sentry/fs/user/user.go +++ b/pkg/sentry/fs/user/user.go @@ -62,7 +62,7 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.K // doesn't exist we will return the default home directory. return defaultHome, nil } - defer dirent.DecRef() + defer dirent.DecRef(ctx) // Check read permissions on the file. if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil { @@ -81,7 +81,7 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.K if err != nil { return "", err } - defer f.DecRef() + defer f.DecRef(ctx) r := &fileReader{ Ctx: ctx, @@ -105,7 +105,7 @@ func getExecUserHomeVFS2(ctx context.Context, mns *vfs.MountNamespace, uid auth. const defaultHome = "/" root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) creds := auth.CredentialsFromContext(ctx) @@ -123,7 +123,7 @@ func getExecUserHomeVFS2(ctx context.Context, mns *vfs.MountNamespace, uid auth. if err != nil { return defaultHome, nil } - defer fd.DecRef() + defer fd.DecRef(ctx) r := &fileReaderVFS2{ ctx: ctx, diff --git a/pkg/sentry/fs/user/user_test.go b/pkg/sentry/fs/user/user_test.go index 7d8e9ac7c..12b786224 100644 --- a/pkg/sentry/fs/user/user_test.go +++ b/pkg/sentry/fs/user/user_test.go @@ -39,7 +39,7 @@ func createEtcPasswd(ctx context.Context, root *fs.Dirent, contents string, mode if err != nil { return err } - defer etc.DecRef() + defer etc.DecRef(ctx) switch mode.FileType() { case 0: // Don't create anything. @@ -49,7 +49,7 @@ func createEtcPasswd(ctx context.Context, root *fs.Dirent, contents string, mode if err != nil { return err } - defer passwd.DecRef() + defer passwd.DecRef(ctx) if _, err := passwd.Writev(ctx, usermem.BytesIOSequence([]byte(contents))); err != nil { return err } @@ -110,9 +110,9 @@ func TestGetExecUserHome(t *testing.T) { if err != nil { t.Fatalf("NewMountNamespace failed: %v", err) } - defer mns.DecRef() + defer mns.DecRef(ctx) root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) ctx = fs.WithRoot(ctx, root) if err := createEtcPasswd(ctx, root, tc.passwdContents, tc.passwdMode); err != nil { diff --git a/pkg/sentry/fsbridge/bridge.go b/pkg/sentry/fsbridge/bridge.go index 8e7590721..7e61209ee 100644 --- a/pkg/sentry/fsbridge/bridge.go +++ b/pkg/sentry/fsbridge/bridge.go @@ -44,7 +44,7 @@ type File interface { IncRef() // DecRef decrements reference. - DecRef() + DecRef(ctx context.Context) } // Lookup provides a common interface to open files. diff --git a/pkg/sentry/fsbridge/fs.go b/pkg/sentry/fsbridge/fs.go index 093ce1fb3..9785fd62a 100644 --- a/pkg/sentry/fsbridge/fs.go +++ b/pkg/sentry/fsbridge/fs.go @@ -49,7 +49,7 @@ func (f *fsFile) PathnameWithDeleted(ctx context.Context) string { // global there. return "" } - defer root.DecRef() + defer root.DecRef(ctx) name, _ := f.file.Dirent.FullName(root) return name @@ -87,8 +87,8 @@ func (f *fsFile) IncRef() { } // DecRef implements File. -func (f *fsFile) DecRef() { - f.file.DecRef() +func (f *fsFile) DecRef(ctx context.Context) { + f.file.DecRef(ctx) } // fsLookup implements Lookup interface using fs.File. @@ -124,7 +124,7 @@ func (l *fsLookup) OpenPath(ctx context.Context, path string, opts vfs.OpenOptio if err != nil { return nil, err } - defer d.DecRef() + defer d.DecRef(ctx) if !resolveFinal && fs.IsSymlink(d.Inode.StableAttr) { return nil, syserror.ELOOP diff --git a/pkg/sentry/fsbridge/vfs.go b/pkg/sentry/fsbridge/vfs.go index 89168220a..323506d33 100644 --- a/pkg/sentry/fsbridge/vfs.go +++ b/pkg/sentry/fsbridge/vfs.go @@ -43,7 +43,7 @@ func NewVFSFile(file *vfs.FileDescription) File { // PathnameWithDeleted implements File. func (f *VFSFile) PathnameWithDeleted(ctx context.Context) string { root := vfs.RootFromContext(ctx) - defer root.DecRef() + defer root.DecRef(ctx) vfsObj := f.file.VirtualDentry().Mount().Filesystem().VirtualFilesystem() name, _ := vfsObj.PathnameWithDeleted(ctx, root, f.file.VirtualDentry()) @@ -86,8 +86,8 @@ func (f *VFSFile) IncRef() { } // DecRef implements File. -func (f *VFSFile) DecRef() { - f.file.DecRef() +func (f *VFSFile) DecRef(ctx context.Context) { + f.file.DecRef(ctx) } // FileDescription returns the FileDescription represented by f. It does not diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index e6fda2b4f..7169e91af 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -103,9 +103,9 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) - fs.Filesystem.Release() + fs.Filesystem.Release(ctx) } // rootInode is the root directory inode for the devpts mounts. diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 1081fff52..3bb397f71 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -60,7 +60,7 @@ func (mi *masterInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vf } fd.LockFD.Init(&mi.locks) if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { - mi.DecRef() + mi.DecRef(ctx) return nil, err } return &fd.vfsfd, nil @@ -98,9 +98,9 @@ type masterFileDescription struct { var _ vfs.FileDescriptionImpl = (*masterFileDescription)(nil) // Release implements vfs.FileDescriptionImpl.Release. -func (mfd *masterFileDescription) Release() { +func (mfd *masterFileDescription) Release(ctx context.Context) { mfd.inode.root.masterClose(mfd.t) - mfd.inode.DecRef() + mfd.inode.DecRef(ctx) } // EventRegister implements waiter.Waitable.EventRegister. diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go index a91cae3ef..32e4e1908 100644 --- a/pkg/sentry/fsimpl/devpts/slave.go +++ b/pkg/sentry/fsimpl/devpts/slave.go @@ -56,7 +56,7 @@ func (si *slaveInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs } fd.LockFD.Init(&si.locks) if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil { - si.DecRef() + si.DecRef(ctx) return nil, err } return &fd.vfsfd, nil @@ -103,8 +103,8 @@ type slaveFileDescription struct { var _ vfs.FileDescriptionImpl = (*slaveFileDescription)(nil) // Release implements fs.FileOperations.Release. -func (sfd *slaveFileDescription) Release() { - sfd.inode.DecRef() +func (sfd *slaveFileDescription) Release(ctx context.Context) { + sfd.inode.DecRef(ctx) } // EventRegister implements waiter.Waitable.EventRegister. diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go index d0e06cdc0..2ed5fa8a9 100644 --- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go +++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go @@ -92,9 +92,9 @@ func NewAccessor(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth } // Release must be called when a is no longer in use. -func (a *Accessor) Release() { - a.root.DecRef() - a.mntns.DecRef() +func (a *Accessor) Release(ctx context.Context) { + a.root.DecRef(ctx) + a.mntns.DecRef(ctx) } // accessorContext implements context.Context by extending an existing diff --git a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go index b6d52c015..747867cca 100644 --- a/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go +++ b/pkg/sentry/fsimpl/devtmpfs/devtmpfs_test.go @@ -30,7 +30,7 @@ func TestDevtmpfs(t *testing.T) { creds := auth.CredentialsFromContext(ctx) vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { t.Fatalf("VFS init: %v", err) } // Register tmpfs just so that we can have a root filesystem that isn't @@ -48,9 +48,9 @@ func TestDevtmpfs(t *testing.T) { if err != nil { t.Fatalf("failed to create tmpfs root mount: %v", err) } - defer mntns.DecRef() + defer mntns.DecRef(ctx) root := mntns.Root() - defer root.DecRef() + defer root.DecRef(ctx) devpop := vfs.PathOperation{ Root: root, Start: root, @@ -69,7 +69,7 @@ func TestDevtmpfs(t *testing.T) { if err != nil { t.Fatalf("failed to create devtmpfs.Accessor: %v", err) } - defer a.Release() + defer a.Release(ctx) // Create "userspace-initialized" files using a devtmpfs.Accessor. if err := a.UserspaceInit(ctx); err != nil { diff --git a/pkg/sentry/fsimpl/eventfd/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go index d12d78b84..812171fa3 100644 --- a/pkg/sentry/fsimpl/eventfd/eventfd.go +++ b/pkg/sentry/fsimpl/eventfd/eventfd.go @@ -59,9 +59,9 @@ type EventFileDescription struct { var _ vfs.FileDescriptionImpl = (*EventFileDescription)(nil) // New creates a new event fd. -func New(vfsObj *vfs.VirtualFilesystem, initVal uint64, semMode bool, flags uint32) (*vfs.FileDescription, error) { +func New(ctx context.Context, vfsObj *vfs.VirtualFilesystem, initVal uint64, semMode bool, flags uint32) (*vfs.FileDescription, error) { vd := vfsObj.NewAnonVirtualDentry("[eventfd]") - defer vd.DecRef() + defer vd.DecRef(ctx) efd := &EventFileDescription{ val: initVal, semMode: semMode, @@ -107,7 +107,7 @@ func (efd *EventFileDescription) HostFD() (int, error) { } // Release implements FileDescriptionImpl.Release() -func (efd *EventFileDescription) Release() { +func (efd *EventFileDescription) Release(context.Context) { efd.mu.Lock() defer efd.mu.Unlock() if efd.hostfd >= 0 { diff --git a/pkg/sentry/fsimpl/eventfd/eventfd_test.go b/pkg/sentry/fsimpl/eventfd/eventfd_test.go index 20e3adffc..49916fa81 100644 --- a/pkg/sentry/fsimpl/eventfd/eventfd_test.go +++ b/pkg/sentry/fsimpl/eventfd/eventfd_test.go @@ -36,16 +36,16 @@ func TestEventFD(t *testing.T) { for _, initVal := range initVals { ctx := contexttest.Context(t) vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { t.Fatalf("VFS init: %v", err) } // Make a new eventfd that is writable. - eventfd, err := New(vfsObj, initVal, false, linux.O_RDWR) + eventfd, err := New(ctx, vfsObj, initVal, false, linux.O_RDWR) if err != nil { t.Fatalf("New() failed: %v", err) } - defer eventfd.DecRef() + defer eventfd.DecRef(ctx) // Register a callback for a write event. w, ch := waiter.NewChannelEntry(nil) @@ -74,16 +74,16 @@ func TestEventFD(t *testing.T) { func TestEventFDStat(t *testing.T) { ctx := contexttest.Context(t) vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { t.Fatalf("VFS init: %v", err) } // Make a new eventfd that is writable. - eventfd, err := New(vfsObj, 0, false, linux.O_RDWR) + eventfd, err := New(ctx, vfsObj, 0, false, linux.O_RDWR) if err != nil { t.Fatalf("New() failed: %v", err) } - defer eventfd.DecRef() + defer eventfd.DecRef(ctx) statx, err := eventfd.Stat(ctx, vfs.StatOptions{ Mask: linux.STATX_BASIC_STATS, diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go index 89caee3df..8f7d5a9bb 100644 --- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go +++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go @@ -53,7 +53,7 @@ func setUp(b *testing.B, imagePath string) (context.Context, *vfs.VirtualFilesys // Create VFS. vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { return nil, nil, nil, nil, err } vfsObj.MustRegisterFilesystemType("extfs", ext.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ @@ -68,7 +68,7 @@ func setUp(b *testing.B, imagePath string) (context.Context, *vfs.VirtualFilesys root := mntns.Root() tearDown := func() { - root.DecRef() + root.DecRef(ctx) if err := f.Close(); err != nil { b.Fatalf("tearDown failed: %v", err) @@ -169,7 +169,7 @@ func BenchmarkVFS2ExtfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to walk to mount point: %v", err) } - defer mountPoint.DecRef() + defer mountPoint.DecRef(ctx) // Create extfs submount. mountTearDown := mount(b, fmt.Sprintf("/tmp/image-%d.ext4", depth), vfsfs, &pop) diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go index 55902322a..7a1b4219f 100644 --- a/pkg/sentry/fsimpl/ext/dentry.go +++ b/pkg/sentry/fsimpl/ext/dentry.go @@ -15,6 +15,7 @@ package ext import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -55,7 +56,7 @@ func (d *dentry) TryIncRef() bool { } // DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef() { +func (d *dentry) DecRef(ctx context.Context) { // FIXME(b/134676337): filesystem.mu may not be locked as required by // inode.decRef(). d.inode.decRef() @@ -64,7 +65,7 @@ func (d *dentry) DecRef() { // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. // // TODO(b/134676337): Implement inotify. -func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) {} +func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) {} // Watches implements vfs.DentryImpl.Watches. // @@ -76,4 +77,4 @@ func (d *dentry) Watches() *vfs.Watches { // OnZeroWatches implements vfs.Dentry.OnZeroWatches. // // TODO(b/134676337): Implement inotify. -func (d *dentry) OnZeroWatches() {} +func (d *dentry) OnZeroWatches(context.Context) {} diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go index 357512c7e..0fc01668d 100644 --- a/pkg/sentry/fsimpl/ext/directory.go +++ b/pkg/sentry/fsimpl/ext/directory.go @@ -142,7 +142,7 @@ type directoryFD struct { var _ vfs.FileDescriptionImpl = (*directoryFD)(nil) // Release implements vfs.FileDescriptionImpl.Release. -func (fd *directoryFD) Release() { +func (fd *directoryFD) Release(ctx context.Context) { if fd.iter == nil { return } diff --git a/pkg/sentry/fsimpl/ext/ext.go b/pkg/sentry/fsimpl/ext/ext.go index dac6effbf..08ffc2834 100644 --- a/pkg/sentry/fsimpl/ext/ext.go +++ b/pkg/sentry/fsimpl/ext/ext.go @@ -123,32 +123,32 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt fs.vfsfs.Init(vfsObj, &fsType, &fs) fs.sb, err = readSuperBlock(dev) if err != nil { - fs.vfsfs.DecRef() + fs.vfsfs.DecRef(ctx) return nil, nil, err } if fs.sb.Magic() != linux.EXT_SUPER_MAGIC { // mount(2) specifies that EINVAL should be returned if the superblock is // invalid. - fs.vfsfs.DecRef() + fs.vfsfs.DecRef(ctx) return nil, nil, syserror.EINVAL } // Refuse to mount if the filesystem is incompatible. if !isCompatible(fs.sb) { - fs.vfsfs.DecRef() + fs.vfsfs.DecRef(ctx) return nil, nil, syserror.EINVAL } fs.bgs, err = readBlockGroups(dev, fs.sb) if err != nil { - fs.vfsfs.DecRef() + fs.vfsfs.DecRef(ctx) return nil, nil, err } rootInode, err := fs.getOrCreateInodeLocked(disklayout.RootDirInode) if err != nil { - fs.vfsfs.DecRef() + fs.vfsfs.DecRef(ctx) return nil, nil, err } rootInode.incRef() diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go index 64e9a579f..2dbaee287 100644 --- a/pkg/sentry/fsimpl/ext/ext_test.go +++ b/pkg/sentry/fsimpl/ext/ext_test.go @@ -65,7 +65,7 @@ func setUp(t *testing.T, imagePath string) (context.Context, *vfs.VirtualFilesys // Create VFS. vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { t.Fatalf("VFS init: %v", err) } vfsObj.MustRegisterFilesystemType("extfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ @@ -80,7 +80,7 @@ func setUp(t *testing.T, imagePath string) (context.Context, *vfs.VirtualFilesys root := mntns.Root() tearDown := func() { - root.DecRef() + root.DecRef(ctx) if err := f.Close(); err != nil { t.Fatalf("tearDown failed: %v", err) diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index 557963e03..c714ddf73 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -84,7 +84,7 @@ var _ vfs.FilesystemImpl = (*filesystem)(nil) // - filesystem.mu must be locked (for writing if write param is true). // - !rp.Done(). // - inode == vfsd.Impl().(*Dentry).inode. -func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write bool) (*vfs.Dentry, *inode, error) { +func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write bool) (*vfs.Dentry, *inode, error) { if !inode.isDir() { return nil, nil, syserror.ENOTDIR } @@ -100,7 +100,7 @@ func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write boo } d := vfsd.Impl().(*dentry) if name == ".." { - isRoot, err := rp.CheckRoot(vfsd) + isRoot, err := rp.CheckRoot(ctx, vfsd) if err != nil { return nil, nil, err } @@ -108,7 +108,7 @@ func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write boo rp.Advance() return vfsd, inode, nil } - if err := rp.CheckMount(&d.parent.vfsd); err != nil { + if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { return nil, nil, err } rp.Advance() @@ -143,7 +143,7 @@ func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write boo child.name = name dir.childCache[name] = child } - if err := rp.CheckMount(&child.vfsd); err != nil { + if err := rp.CheckMount(ctx, &child.vfsd); err != nil { return nil, nil, err } if child.inode.isSymlink() && rp.ShouldFollowSymlink() { @@ -167,12 +167,12 @@ func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write boo // // Preconditions: // - filesystem.mu must be locked (for writing if write param is true). -func walkLocked(rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { +func walkLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { vfsd := rp.Start() inode := vfsd.Impl().(*dentry).inode for !rp.Done() { var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode, write) + vfsd, inode, err = stepLocked(ctx, rp, vfsd, inode, write) if err != nil { return nil, nil, err } @@ -196,12 +196,12 @@ func walkLocked(rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) // Preconditions: // - filesystem.mu must be locked (for writing if write param is true). // - !rp.Done(). -func walkParentLocked(rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { +func walkParentLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { vfsd := rp.Start() inode := vfsd.Impl().(*dentry).inode for !rp.Final() { var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode, write) + vfsd, inode, err = stepLocked(ctx, rp, vfsd, inode, write) if err != nil { return nil, nil, err } @@ -216,7 +216,7 @@ func walkParentLocked(rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, e // the rp till the parent of the last component which should be an existing // directory. If parent is false then resolves rp entirely. Attemps to resolve // the path as far as it can with a read lock and upgrades the lock if needed. -func (fs *filesystem) walk(rp *vfs.ResolvingPath, parent bool) (*vfs.Dentry, *inode, error) { +func (fs *filesystem) walk(ctx context.Context, rp *vfs.ResolvingPath, parent bool) (*vfs.Dentry, *inode, error) { var ( vfsd *vfs.Dentry inode *inode @@ -227,9 +227,9 @@ func (fs *filesystem) walk(rp *vfs.ResolvingPath, parent bool) (*vfs.Dentry, *in // of disk. This reduces congestion (allows concurrent walks). fs.mu.RLock() if parent { - vfsd, inode, err = walkParentLocked(rp, false) + vfsd, inode, err = walkParentLocked(ctx, rp, false) } else { - vfsd, inode, err = walkLocked(rp, false) + vfsd, inode, err = walkLocked(ctx, rp, false) } fs.mu.RUnlock() @@ -238,9 +238,9 @@ func (fs *filesystem) walk(rp *vfs.ResolvingPath, parent bool) (*vfs.Dentry, *in // walk is fine as this is a read only filesystem. fs.mu.Lock() if parent { - vfsd, inode, err = walkParentLocked(rp, true) + vfsd, inode, err = walkParentLocked(ctx, rp, true) } else { - vfsd, inode, err = walkLocked(rp, true) + vfsd, inode, err = walkLocked(ctx, rp, true) } fs.mu.Unlock() } @@ -283,7 +283,7 @@ func (fs *filesystem) statTo(stat *linux.Statfs) { // AccessAt implements vfs.Filesystem.Impl.AccessAt. func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { - _, inode, err := fs.walk(rp, false) + _, inode, err := fs.walk(ctx, rp, false) if err != nil { return err } @@ -292,7 +292,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { - vfsd, inode, err := fs.walk(rp, false) + vfsd, inode, err := fs.walk(ctx, rp, false) if err != nil { return nil, err } @@ -312,7 +312,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { - vfsd, inode, err := fs.walk(rp, true) + vfsd, inode, err := fs.walk(ctx, rp, true) if err != nil { return nil, err } @@ -322,7 +322,7 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa // OpenAt implements vfs.FilesystemImpl.OpenAt. func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - vfsd, inode, err := fs.walk(rp, false) + vfsd, inode, err := fs.walk(ctx, rp, false) if err != nil { return nil, err } @@ -336,7 +336,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { - _, inode, err := fs.walk(rp, false) + _, inode, err := fs.walk(ctx, rp, false) if err != nil { return "", err } @@ -349,7 +349,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st // StatAt implements vfs.FilesystemImpl.StatAt. func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { - _, inode, err := fs.walk(rp, false) + _, inode, err := fs.walk(ctx, rp, false) if err != nil { return linux.Statx{}, err } @@ -360,7 +360,7 @@ func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // StatFSAt implements vfs.FilesystemImpl.StatFSAt. func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { - if _, _, err := fs.walk(rp, false); err != nil { + if _, _, err := fs.walk(ctx, rp, false); err != nil { return linux.Statfs{}, err } @@ -370,7 +370,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) } @@ -390,7 +390,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EEXIST } - if _, _, err := fs.walk(rp, true); err != nil { + if _, _, err := fs.walk(ctx, rp, true); err != nil { return err } @@ -403,7 +403,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return syserror.EEXIST } - if _, _, err := fs.walk(rp, true); err != nil { + if _, _, err := fs.walk(ctx, rp, true); err != nil { return err } @@ -416,7 +416,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return syserror.EEXIST } - _, _, err := fs.walk(rp, true) + _, _, err := fs.walk(ctx, rp, true) if err != nil { return err } @@ -430,7 +430,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return syserror.ENOENT } - _, _, err := fs.walk(rp, false) + _, _, err := fs.walk(ctx, rp, false) if err != nil { return err } @@ -440,7 +440,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa // RmdirAt implements vfs.FilesystemImpl.RmdirAt. func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { - _, inode, err := fs.walk(rp, false) + _, inode, err := fs.walk(ctx, rp, false) if err != nil { return err } @@ -454,7 +454,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error // SetStatAt implements vfs.FilesystemImpl.SetStatAt. func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { - _, _, err := fs.walk(rp, false) + _, _, err := fs.walk(ctx, rp, false) if err != nil { return err } @@ -468,7 +468,7 @@ func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ return syserror.EEXIST } - _, _, err := fs.walk(rp, true) + _, _, err := fs.walk(ctx, rp, true) if err != nil { return err } @@ -478,7 +478,7 @@ func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { - _, inode, err := fs.walk(rp, false) + _, inode, err := fs.walk(ctx, rp, false) if err != nil { return err } @@ -492,7 +492,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error // BoundEndpointAt implements FilesystemImpl.BoundEndpointAt. func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { - _, inode, err := fs.walk(rp, false) + _, inode, err := fs.walk(ctx, rp, false) if err != nil { return nil, err } @@ -506,7 +506,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath // ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { - _, _, err := fs.walk(rp, false) + _, _, err := fs.walk(ctx, rp, false) if err != nil { return nil, err } @@ -515,7 +515,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si // GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { - _, _, err := fs.walk(rp, false) + _, _, err := fs.walk(ctx, rp, false) if err != nil { return "", err } @@ -524,7 +524,7 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { - _, _, err := fs.walk(rp, false) + _, _, err := fs.walk(ctx, rp, false) if err != nil { return err } @@ -533,7 +533,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { - _, _, err := fs.walk(rp, false) + _, _, err := fs.walk(ctx, rp, false) if err != nil { return err } diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go index 66d14bb95..e73e740d6 100644 --- a/pkg/sentry/fsimpl/ext/regular_file.go +++ b/pkg/sentry/fsimpl/ext/regular_file.go @@ -79,7 +79,7 @@ type regularFileFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *regularFileFD) Release() {} +func (fd *regularFileFD) Release(context.Context) {} // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { diff --git a/pkg/sentry/fsimpl/ext/symlink.go b/pkg/sentry/fsimpl/ext/symlink.go index 62efd4095..2fd0d1fa8 100644 --- a/pkg/sentry/fsimpl/ext/symlink.go +++ b/pkg/sentry/fsimpl/ext/symlink.go @@ -73,7 +73,7 @@ type symlinkFD struct { var _ vfs.FileDescriptionImpl = (*symlinkFD)(nil) // Release implements vfs.FileDescriptionImpl.Release. -func (fd *symlinkFD) Release() {} +func (fd *symlinkFD) Release(context.Context) {} // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *symlinkFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go index 2225076bc..e522ff9a0 100644 --- a/pkg/sentry/fsimpl/fuse/dev.go +++ b/pkg/sentry/fsimpl/fuse/dev.go @@ -99,7 +99,7 @@ type DeviceFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *DeviceFD) Release() { +func (fd *DeviceFD) Release(context.Context) { fd.fs.conn.connected = false } diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go index 84c222ad6..1ffe7ccd2 100644 --- a/pkg/sentry/fsimpl/fuse/dev_test.go +++ b/pkg/sentry/fsimpl/fuse/dev_test.go @@ -356,12 +356,12 @@ func newTestConnection(system *testutil.System, k *kernel.Kernel, maxActiveReque vfsObj := &vfs.VirtualFilesystem{} fuseDev := &DeviceFD{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(system.Ctx); err != nil { return nil, nil, err } vd := vfsObj.NewAnonVirtualDentry("genCountFD") - defer vd.DecRef() + defer vd.DecRef(system.Ctx) if err := fuseDev.vfsfd.Init(fuseDev, linux.O_RDWR|linux.O_CREAT, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{}); err != nil { return nil, nil, err } diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index 200a93bbf..a1405f7c3 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -191,9 +191,9 @@ func NewFUSEFilesystem(ctx context.Context, devMinor uint32, opts *filesystemOpt } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) - fs.Filesystem.Release() + fs.Filesystem.Release(ctx) } // inode implements kernfs.Inode. diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go index 8c7c8e1b3..1679066ba 100644 --- a/pkg/sentry/fsimpl/gofer/directory.go +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -122,7 +122,7 @@ type directoryFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *directoryFD) Release() { +func (fd *directoryFD) Release(context.Context) { } // IterDirents implements vfs.FileDescriptionImpl.IterDirents. @@ -139,7 +139,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba fd.dirents = ds } - d.InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + d.InotifyWithParent(ctx, linux.IN_ACCESS, 0, vfs.PathEvent) if d.cachedMetadataAuthoritative() { d.touchAtime(fd.vfsfd.Mount()) } diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 00e3c99cd..e6af37d0d 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -55,7 +55,7 @@ func (fs *filesystem) Sync(ctx context.Context) error { // Sync regular files. for _, d := range ds { err := d.syncSharedHandle(ctx) - d.DecRef() + d.DecRef(ctx) if err != nil && retErr == nil { retErr = err } @@ -65,7 +65,7 @@ func (fs *filesystem) Sync(ctx context.Context) error { // handles (so they won't be synced by the above). for _, sffd := range sffds { err := sffd.Sync(ctx) - sffd.vfsfd.DecRef() + sffd.vfsfd.DecRef(ctx) if err != nil && retErr == nil { retErr = err } @@ -133,7 +133,7 @@ afterSymlink: return d, nil } if name == ".." { - if isRoot, err := rp.CheckRoot(&d.vfsd); err != nil { + if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil { return nil, err } else if isRoot || d.parent == nil { rp.Advance() @@ -146,7 +146,7 @@ afterSymlink: // // Call rp.CheckMount() before updating d.parent's metadata, since if // we traverse to another mount then d.parent's metadata is irrelevant. - if err := rp.CheckMount(&d.parent.vfsd); err != nil { + if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { return nil, err } if d != d.parent && !d.cachedMetadataAuthoritative() { @@ -164,7 +164,7 @@ afterSymlink: if child == nil { return nil, syserror.ENOENT } - if err := rp.CheckMount(&child.vfsd); err != nil { + if err := rp.CheckMount(ctx, &child.vfsd); err != nil { return nil, err } if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() { @@ -239,7 +239,7 @@ func (fs *filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir // has 0 references, drop it). Wait to update parent.children until we // know what to replace the existing dentry with (i.e. one of the // returns below), to avoid a redundant map access. - vfsObj.InvalidateDentry(&child.vfsd) + vfsObj.InvalidateDentry(ctx, &child.vfsd) if child.isSynthetic() { // Normally we don't mark invalidated dentries as deleted since // they may still exist (but at a different path), and also for @@ -332,7 +332,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, createInRemoteDir func(parent *dentry, name string) error, createInSyntheticDir func(parent *dentry, name string) error) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) start := rp.Start().Impl().(*dentry) if !start.cachedMetadataAuthoritative() { // Get updated metadata for start as required by @@ -384,7 +384,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir if dir { ev |= linux.IN_ISDIR } - parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) + parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) return nil } if fs.opts.interop == InteropModeShared { @@ -405,7 +405,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir if dir { ev |= linux.IN_ISDIR } - parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) + parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) return nil } if child := parent.children[name]; child != nil { @@ -426,7 +426,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir if dir { ev |= linux.IN_ISDIR } - parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) + parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) return nil } @@ -434,7 +434,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) start := rp.Start().Impl().(*dentry) if !start.cachedMetadataAuthoritative() { // Get updated metadata for start as required by @@ -470,7 +470,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b } vfsObj := rp.VirtualFilesystem() mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) parent.dirMu.Lock() defer parent.dirMu.Unlock() @@ -600,17 +600,17 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b // Generate inotify events for rmdir or unlink. if dir { - parent.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */) + parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */) } else { var cw *vfs.Watches if child != nil { cw = &child.watches } - vfs.InotifyRemoveChild(cw, &parent.watches, name) + vfs.InotifyRemoveChild(ctx, cw, &parent.watches, name) } if child != nil { - vfsObj.CommitDeleteDentry(&child.vfsd) + vfsObj.CommitDeleteDentry(ctx, &child.vfsd) child.setDeleted() if child.isSynthetic() { parent.syntheticChildren-- @@ -637,7 +637,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b // but dentry slices are allocated lazily, and it's much easier to say "defer // fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() { // fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this. -func (fs *filesystem) renameMuRUnlockAndCheckCaching(ds **[]*dentry) { +func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) { fs.renameMu.RUnlock() if *ds == nil { return @@ -645,20 +645,20 @@ func (fs *filesystem) renameMuRUnlockAndCheckCaching(ds **[]*dentry) { if len(**ds) != 0 { fs.renameMu.Lock() for _, d := range **ds { - d.checkCachingLocked() + d.checkCachingLocked(ctx) } fs.renameMu.Unlock() } putDentrySlice(*ds) } -func (fs *filesystem) renameMuUnlockAndCheckCaching(ds **[]*dentry) { +func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) { if *ds == nil { fs.renameMu.Unlock() return } for _, d := range **ds { - d.checkCachingLocked() + d.checkCachingLocked(ctx) } fs.renameMu.Unlock() putDentrySlice(*ds) @@ -668,7 +668,7 @@ func (fs *filesystem) renameMuUnlockAndCheckCaching(ds **[]*dentry) { func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return err @@ -680,7 +680,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return nil, err @@ -701,7 +701,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) start := rp.Start().Impl().(*dentry) if !start.cachedMetadataAuthoritative() { // Get updated metadata for start as required by @@ -812,7 +812,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) start := rp.Start().Impl().(*dentry) if !start.cachedMetadataAuthoritative() { @@ -1126,7 +1126,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } childVFSFD = &fd.vfsfd } - d.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */) + d.watches.Notify(ctx, name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */) return childVFSFD, nil } @@ -1134,7 +1134,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return "", err @@ -1154,7 +1154,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa var ds *[]*dentry fs.renameMu.Lock() - defer fs.renameMuUnlockAndCheckCaching(&ds) + defer fs.renameMuUnlockAndCheckCaching(ctx, &ds) newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds) if err != nil { return err @@ -1244,7 +1244,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa return nil } mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil { return err } @@ -1269,7 +1269,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } // Update the dentry tree. - vfsObj.CommitRenameReplaceDentry(&renamed.vfsd, replacedVFSD) + vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD) if replaced != nil { replaced.setDeleted() if replaced.isSynthetic() { @@ -1331,17 +1331,17 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts fs.renameMu.RLock() d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { - fs.renameMuRUnlockAndCheckCaching(&ds) + fs.renameMuRUnlockAndCheckCaching(ctx, &ds) return err } if err := d.setStat(ctx, rp.Credentials(), &opts, rp.Mount()); err != nil { - fs.renameMuRUnlockAndCheckCaching(&ds) + fs.renameMuRUnlockAndCheckCaching(ctx, &ds) return err } - fs.renameMuRUnlockAndCheckCaching(&ds) + fs.renameMuRUnlockAndCheckCaching(ctx, &ds) if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { - d.InotifyWithParent(ev, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) } return nil } @@ -1350,7 +1350,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return linux.Statx{}, err @@ -1367,7 +1367,7 @@ func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return linux.Statfs{}, err @@ -1417,7 +1417,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return nil, err @@ -1443,7 +1443,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return nil, err @@ -1455,7 +1455,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckCaching(&ds) + defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return "", err @@ -1469,16 +1469,16 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt fs.renameMu.RLock() d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { - fs.renameMuRUnlockAndCheckCaching(&ds) + fs.renameMuRUnlockAndCheckCaching(ctx, &ds) return err } if err := d.setxattr(ctx, rp.Credentials(), &opts); err != nil { - fs.renameMuRUnlockAndCheckCaching(&ds) + fs.renameMuRUnlockAndCheckCaching(ctx, &ds) return err } - fs.renameMuRUnlockAndCheckCaching(&ds) + fs.renameMuRUnlockAndCheckCaching(ctx, &ds) - d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } @@ -1488,16 +1488,16 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, fs.renameMu.RLock() d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { - fs.renameMuRUnlockAndCheckCaching(&ds) + fs.renameMuRUnlockAndCheckCaching(ctx, &ds) return err } if err := d.removexattr(ctx, rp.Credentials(), name); err != nil { - fs.renameMuRUnlockAndCheckCaching(&ds) + fs.renameMuRUnlockAndCheckCaching(ctx, &ds) return err } - fs.renameMuRUnlockAndCheckCaching(&ds) + fs.renameMuRUnlockAndCheckCaching(ctx, &ds) - d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index e20de84b5..2e5575d8d 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -482,7 +482,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt root, err := fs.newDentry(ctx, attachFile, qid, attrMask, &attr) if err != nil { attachFile.close(ctx) - fs.vfsfs.DecRef() + fs.vfsfs.DecRef(ctx) return nil, nil, err } // Set the root's reference count to 2. One reference is returned to the @@ -495,8 +495,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { - ctx := context.Background() +func (fs *filesystem) Release(ctx context.Context) { mf := fs.mfp.MemoryFile() fs.syncMu.Lock() @@ -1089,10 +1088,10 @@ func (d *dentry) TryIncRef() bool { } // DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef() { +func (d *dentry) DecRef(ctx context.Context) { if refs := atomic.AddInt64(&d.refs, -1); refs == 0 { d.fs.renameMu.Lock() - d.checkCachingLocked() + d.checkCachingLocked(ctx) d.fs.renameMu.Unlock() } else if refs < 0 { panic("gofer.dentry.DecRef() called without holding a reference") @@ -1109,7 +1108,7 @@ func (d *dentry) decRefLocked() { } // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. -func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) { +func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) { if d.isDir() { events |= linux.IN_ISDIR } @@ -1117,9 +1116,9 @@ func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) { d.fs.renameMu.RLock() // The ordering below is important, Linux always notifies the parent first. if d.parent != nil { - d.parent.watches.Notify(d.name, events, cookie, et, d.isDeleted()) + d.parent.watches.Notify(ctx, d.name, events, cookie, et, d.isDeleted()) } - d.watches.Notify("", events, cookie, et, d.isDeleted()) + d.watches.Notify(ctx, "", events, cookie, et, d.isDeleted()) d.fs.renameMu.RUnlock() } @@ -1131,10 +1130,10 @@ func (d *dentry) Watches() *vfs.Watches { // OnZeroWatches implements vfs.DentryImpl.OnZeroWatches. // // If no watches are left on this dentry and it has no references, cache it. -func (d *dentry) OnZeroWatches() { +func (d *dentry) OnZeroWatches(ctx context.Context) { if atomic.LoadInt64(&d.refs) == 0 { d.fs.renameMu.Lock() - d.checkCachingLocked() + d.checkCachingLocked(ctx) d.fs.renameMu.Unlock() } } @@ -1149,7 +1148,7 @@ func (d *dentry) OnZeroWatches() { // do nothing. // // Preconditions: d.fs.renameMu must be locked for writing. -func (d *dentry) checkCachingLocked() { +func (d *dentry) checkCachingLocked(ctx context.Context) { // Dentries with a non-zero reference count must be retained. (The only way // to obtain a reference on a dentry with zero references is via path // resolution, which requires renameMu, so if d.refs is zero then it will @@ -1171,14 +1170,14 @@ func (d *dentry) checkCachingLocked() { // reachable by path resolution and should be dropped immediately. if d.vfsd.IsDead() { if d.isDeleted() { - d.watches.HandleDeletion() + d.watches.HandleDeletion(ctx) } if d.cached { d.fs.cachedDentries.Remove(d) d.fs.cachedDentriesLen-- d.cached = false } - d.destroyLocked() + d.destroyLocked(ctx) return } // If d still has inotify watches and it is not deleted or invalidated, we @@ -1213,7 +1212,7 @@ func (d *dentry) checkCachingLocked() { if !victim.vfsd.IsDead() { // Note that victim can't be a mount point (in any mount // namespace), since VFS holds references on mount points. - d.fs.vfsfs.VirtualFilesystem().InvalidateDentry(&victim.vfsd) + d.fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, &victim.vfsd) delete(victim.parent.children, victim.name) // We're only deleting the dentry, not the file it // represents, so we don't need to update @@ -1221,7 +1220,7 @@ func (d *dentry) checkCachingLocked() { } victim.parent.dirMu.Unlock() } - victim.destroyLocked() + victim.destroyLocked(ctx) } // Whether or not victim was destroyed, we brought fs.cachedDentriesLen // back down to fs.opts.maxCachedDentries, so we don't loop. @@ -1233,7 +1232,7 @@ func (d *dentry) checkCachingLocked() { // // Preconditions: d.fs.renameMu must be locked for writing. d.refs == 0. d is // not a child dentry. -func (d *dentry) destroyLocked() { +func (d *dentry) destroyLocked(ctx context.Context) { switch atomic.LoadInt64(&d.refs) { case 0: // Mark the dentry destroyed. @@ -1244,7 +1243,6 @@ func (d *dentry) destroyLocked() { panic("dentry.destroyLocked() called with references on the dentry") } - ctx := context.Background() d.handleMu.Lock() if !d.handle.file.isNil() { mf := d.fs.mfp.MemoryFile() @@ -1276,7 +1274,7 @@ func (d *dentry) destroyLocked() { // d.fs.renameMu. if d.parent != nil { if refs := atomic.AddInt64(&d.parent.refs, -1); refs == 0 { - d.parent.checkCachingLocked() + d.parent.checkCachingLocked(ctx) } else if refs < 0 { panic("gofer.dentry.DecRef() called without holding a reference") } @@ -1514,7 +1512,7 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) return err } if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { - fd.dentry().InotifyWithParent(ev, 0, vfs.InodeEvent) + fd.dentry().InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) } return nil } @@ -1535,7 +1533,7 @@ func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOption if err := d.setxattr(ctx, auth.CredentialsFromContext(ctx), &opts); err != nil { return err } - d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } @@ -1545,7 +1543,7 @@ func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { if err := d.removexattr(ctx, auth.CredentialsFromContext(ctx), name); err != nil { return err } - d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } diff --git a/pkg/sentry/fsimpl/gofer/gofer_test.go b/pkg/sentry/fsimpl/gofer/gofer_test.go index adff39490..56d80bcf8 100644 --- a/pkg/sentry/fsimpl/gofer/gofer_test.go +++ b/pkg/sentry/fsimpl/gofer/gofer_test.go @@ -50,7 +50,7 @@ func TestDestroyIdempotent(t *testing.T) { } parent.cacheNewChildLocked(child, "child") - child.checkCachingLocked() + child.checkCachingLocked(ctx) if got := atomic.LoadInt64(&child.refs); got != -1 { t.Fatalf("child.refs=%d, want: -1", got) } @@ -58,6 +58,6 @@ func TestDestroyIdempotent(t *testing.T) { if got := atomic.LoadInt64(&parent.refs); got != -1 { t.Fatalf("parent.refs=%d, want: -1", got) } - child.checkCachingLocked() - child.checkCachingLocked() + child.checkCachingLocked(ctx) + child.checkCachingLocked(ctx) } diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 09f142cfc..420e8efe2 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -48,7 +48,7 @@ type regularFileFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *regularFileFD) Release() { +func (fd *regularFileFD) Release(context.Context) { } // OnClose implements vfs.FileDescriptionImpl.OnClose. diff --git a/pkg/sentry/fsimpl/gofer/socket.go b/pkg/sentry/fsimpl/gofer/socket.go index d6dbe9092..85d2bee72 100644 --- a/pkg/sentry/fsimpl/gofer/socket.go +++ b/pkg/sentry/fsimpl/gofer/socket.go @@ -108,7 +108,7 @@ func (e *endpoint) UnidirectionalConnect(ctx context.Context) (transport.Connect // We don't need the receiver. c.CloseRecv() - c.Release() + c.Release(ctx) return c, nil } @@ -136,8 +136,8 @@ func (e *endpoint) newConnectedEndpoint(ctx context.Context, flags p9.ConnectFla } // Release implements transport.BoundEndpoint.Release. -func (e *endpoint) Release() { - e.dentry.DecRef() +func (e *endpoint) Release(ctx context.Context) { + e.dentry.DecRef(ctx) } // Passcred implements transport.BoundEndpoint.Passcred. diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index 811528982..fc269ef2b 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -80,11 +80,11 @@ func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *vfs.FileLocks, } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *specialFileFD) Release() { +func (fd *specialFileFD) Release(ctx context.Context) { if fd.haveQueue { fdnotifier.RemoveFD(fd.handle.fd) } - fd.handle.close(context.Background()) + fd.handle.close(ctx) fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) fs.syncMu.Lock() delete(fs.specialFileFDs, fd) diff --git a/pkg/sentry/fsimpl/host/control.go b/pkg/sentry/fsimpl/host/control.go index b9082a20f..0135e4428 100644 --- a/pkg/sentry/fsimpl/host/control.go +++ b/pkg/sentry/fsimpl/host/control.go @@ -58,7 +58,7 @@ func (c *scmRights) Clone() transport.RightsControlMessage { } // Release implements transport.RightsControlMessage.Release. -func (c *scmRights) Release() { +func (c *scmRights) Release(ctx context.Context) { for _, fd := range c.fds { syscall.Close(fd) } diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index c894f2ca0..bf922c566 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -117,7 +117,7 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) d.Init(i) // i.open will take a reference on d. - defer d.DecRef() + defer d.DecRef(ctx) // For simplicity, fileDescription.offset is set to 0. Technically, we // should only set to 0 on files that are not seekable (sockets, pipes, @@ -168,9 +168,9 @@ type filesystem struct { devMinor uint32 } -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { fs.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) - fs.Filesystem.Release() + fs.Filesystem.Release(ctx) } func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { @@ -431,12 +431,12 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre } // DecRef implements kernfs.Inode. -func (i *inode) DecRef() { - i.AtomicRefCount.DecRefWithDestructor(i.Destroy) +func (i *inode) DecRef(ctx context.Context) { + i.AtomicRefCount.DecRefWithDestructor(ctx, i.Destroy) } // Destroy implements kernfs.Inode. -func (i *inode) Destroy() { +func (i *inode) Destroy(context.Context) { if i.wouldBlock { fdnotifier.RemoveFD(int32(i.hostFD)) } @@ -542,7 +542,7 @@ func (f *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux } // Release implements vfs.FileDescriptionImpl. -func (f *fileDescription) Release() { +func (f *fileDescription) Release(context.Context) { // noop } diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go index fd16bd92d..4979dd0a9 100644 --- a/pkg/sentry/fsimpl/host/socket.go +++ b/pkg/sentry/fsimpl/host/socket.go @@ -139,7 +139,7 @@ func NewConnectedEndpoint(ctx context.Context, hostFD int, addr string, saveable } // Send implements transport.ConnectedEndpoint.Send. -func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) { +func (c *ConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) { c.mu.RLock() defer c.mu.RUnlock() @@ -216,7 +216,7 @@ func (c *ConnectedEndpoint) EventUpdate() { } // Recv implements transport.Receiver.Recv. -func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { +func (c *ConnectedEndpoint) Recv(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { c.mu.RLock() defer c.mu.RUnlock() @@ -317,8 +317,8 @@ func (c *ConnectedEndpoint) destroyLocked() { // Release implements transport.ConnectedEndpoint.Release and // transport.Receiver.Release. -func (c *ConnectedEndpoint) Release() { - c.ref.DecRefWithDestructor(func() { +func (c *ConnectedEndpoint) Release(ctx context.Context) { + c.ref.DecRefWithDestructor(ctx, func(context.Context) { c.mu.Lock() c.destroyLocked() c.mu.Unlock() @@ -347,8 +347,8 @@ func (e *SCMConnectedEndpoint) Init() error { // Release implements transport.ConnectedEndpoint.Release and // transport.Receiver.Release. -func (e *SCMConnectedEndpoint) Release() { - e.ref.DecRefWithDestructor(func() { +func (e *SCMConnectedEndpoint) Release(ctx context.Context) { + e.ref.DecRefWithDestructor(ctx, func(context.Context) { e.mu.Lock() if err := syscall.Close(e.fd); err != nil { log.Warningf("Failed to close host fd %d: %v", err) diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go index 4ee9270cc..d372c60cb 100644 --- a/pkg/sentry/fsimpl/host/tty.go +++ b/pkg/sentry/fsimpl/host/tty.go @@ -67,12 +67,12 @@ func (t *TTYFileDescription) ForegroundProcessGroup() *kernel.ProcessGroup { } // Release implements fs.FileOperations.Release. -func (t *TTYFileDescription) Release() { +func (t *TTYFileDescription) Release(ctx context.Context) { t.mu.Lock() t.fgProcessGroup = nil t.mu.Unlock() - t.fileDescription.Release() + t.fileDescription.Release(ctx) } // PRead implements vfs.FileDescriptionImpl. diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go index c6c4472e7..12adf727a 100644 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go @@ -122,7 +122,7 @@ func (fd *DynamicBytesFD) PWrite(ctx context.Context, src usermem.IOSequence, of } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *DynamicBytesFD) Release() {} +func (fd *DynamicBytesFD) Release(context.Context) {} // Stat implements vfs.FileDescriptionImpl.Stat. func (fd *DynamicBytesFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 1d37ccb98..fcee6200a 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -113,7 +113,7 @@ func (fd *GenericDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *GenericDirectoryFD) Release() {} +func (fd *GenericDirectoryFD) Release(context.Context) {} func (fd *GenericDirectoryFD) filesystem() *vfs.Filesystem { return fd.vfsfd.VirtualDentry().Mount().Filesystem() diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 61a36cff9..d7edb6342 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -56,13 +56,13 @@ afterSymlink: return vfsd, nil } if name == ".." { - if isRoot, err := rp.CheckRoot(vfsd); err != nil { + if isRoot, err := rp.CheckRoot(ctx, vfsd); err != nil { return nil, err } else if isRoot || d.parent == nil { rp.Advance() return vfsd, nil } - if err := rp.CheckMount(&d.parent.vfsd); err != nil { + if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { return nil, err } rp.Advance() @@ -77,7 +77,7 @@ afterSymlink: if err != nil { return nil, err } - if err := rp.CheckMount(&next.vfsd); err != nil { + if err := rp.CheckMount(ctx, &next.vfsd); err != nil { return nil, err } // Resolve any symlink at current path component. @@ -88,7 +88,7 @@ afterSymlink: } if targetVD.Ok() { err := rp.HandleJump(targetVD) - targetVD.DecRef() + targetVD.DecRef(ctx) if err != nil { return nil, err } @@ -116,7 +116,7 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir // Cached dentry exists, revalidate. if !child.inode.Valid(ctx) { delete(parent.children, name) - vfsObj.InvalidateDentry(&child.vfsd) + vfsObj.InvalidateDentry(ctx, &child.vfsd) fs.deferDecRef(&child.vfsd) // Reference from Lookup. child = nil } @@ -234,7 +234,7 @@ func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Den } // Release implements vfs.FilesystemImpl.Release. -func (fs *Filesystem) Release() { +func (fs *Filesystem) Release(context.Context) { } // Sync implements vfs.FilesystemImpl.Sync. @@ -246,7 +246,7 @@ func (fs *Filesystem) Sync(ctx context.Context) error { // AccessAt implements vfs.Filesystem.Impl.AccessAt. func (fs *Filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { fs.mu.RLock() - defer fs.processDeferredDecRefs() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.RUnlock() _, inode, err := fs.walkExistingLocked(ctx, rp) @@ -259,7 +259,7 @@ func (fs *Filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { fs.mu.RLock() - defer fs.processDeferredDecRefs() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.RUnlock() vfsd, inode, err := fs.walkExistingLocked(ctx, rp) if err != nil { @@ -282,7 +282,7 @@ func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { fs.mu.RLock() - defer fs.processDeferredDecRefs() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.RUnlock() vfsd, _, err := fs.walkParentDirLocked(ctx, rp) if err != nil { @@ -300,7 +300,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. fs.mu.Lock() defer fs.mu.Unlock() parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() + fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -337,7 +337,7 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v fs.mu.Lock() defer fs.mu.Unlock() parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() + fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -365,7 +365,7 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v fs.mu.Lock() defer fs.mu.Unlock() parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() + fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -397,7 +397,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // Do not create new file. if opts.Flags&linux.O_CREAT == 0 { fs.mu.RLock() - defer fs.processDeferredDecRefs() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.RUnlock() vfsd, inode, err := fs.walkExistingLocked(ctx, rp) if err != nil { @@ -429,7 +429,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf } afterTrailingSymlink: parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() + fs.processDeferredDecRefsLocked(ctx) if err != nil { return nil, err } @@ -483,7 +483,7 @@ afterTrailingSymlink: } if targetVD.Ok() { err := rp.HandleJump(targetVD) - targetVD.DecRef() + targetVD.DecRef(ctx) if err != nil { return nil, err } @@ -507,7 +507,7 @@ func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st fs.mu.RLock() d, inode, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() - fs.processDeferredDecRefs() + fs.processDeferredDecRefs(ctx) if err != nil { return "", err } @@ -526,7 +526,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 fs.mu.Lock() - defer fs.processDeferredDecRefsLocked() + defer fs.processDeferredDecRefsLocked(ctx) defer fs.mu.Unlock() // Resolve the destination directory first to verify that it's on this @@ -584,7 +584,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) virtfs := rp.VirtualFilesystem() // We can't deadlock here due to lock ordering because we're protected from @@ -615,7 +615,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa dstDir.children = make(map[string]*Dentry) } dstDir.children[pc] = src - virtfs.CommitRenameReplaceDentry(srcVFSD, replaced) + virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaced) return nil } @@ -624,7 +624,7 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error fs.mu.Lock() defer fs.mu.Unlock() vfsd, inode, err := fs.walkExistingLocked(ctx, rp) - fs.processDeferredDecRefsLocked() + fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -648,7 +648,7 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error defer parentDentry.dirMu.Unlock() mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { return err } @@ -656,7 +656,7 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error virtfs.AbortDeleteDentry(vfsd) return err } - virtfs.CommitDeleteDentry(vfsd) + virtfs.CommitDeleteDentry(ctx, vfsd) return nil } @@ -665,7 +665,7 @@ func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts fs.mu.RLock() _, inode, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() - fs.processDeferredDecRefs() + fs.processDeferredDecRefs(ctx) if err != nil { return err } @@ -680,7 +680,7 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf fs.mu.RLock() _, inode, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() - fs.processDeferredDecRefs() + fs.processDeferredDecRefs(ctx) if err != nil { return linux.Statx{}, err } @@ -692,7 +692,7 @@ func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() - fs.processDeferredDecRefs() + fs.processDeferredDecRefs(ctx) if err != nil { return linux.Statfs{}, err } @@ -708,7 +708,7 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ fs.mu.Lock() defer fs.mu.Unlock() parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() + fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -733,7 +733,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error fs.mu.Lock() defer fs.mu.Unlock() vfsd, _, err := fs.walkExistingLocked(ctx, rp) - fs.processDeferredDecRefsLocked() + fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -753,7 +753,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error parentDentry.dirMu.Lock() defer parentDentry.dirMu.Unlock() mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { return err } @@ -761,7 +761,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error virtfs.AbortDeleteDentry(vfsd) return err } - virtfs.CommitDeleteDentry(vfsd) + virtfs.CommitDeleteDentry(ctx, vfsd) return nil } @@ -770,7 +770,7 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath fs.mu.RLock() _, inode, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() - fs.processDeferredDecRefs() + fs.processDeferredDecRefs(ctx) if err != nil { return nil, err } @@ -785,7 +785,7 @@ func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() - fs.processDeferredDecRefs() + fs.processDeferredDecRefs(ctx) if err != nil { return nil, err } @@ -798,7 +798,7 @@ func (fs *Filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() - fs.processDeferredDecRefs() + fs.processDeferredDecRefs(ctx) if err != nil { return "", err } @@ -811,7 +811,7 @@ func (fs *Filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() - fs.processDeferredDecRefs() + fs.processDeferredDecRefs(ctx) if err != nil { return err } @@ -824,7 +824,7 @@ func (fs *Filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, fs.mu.RLock() _, _, err := fs.walkExistingLocked(ctx, rp) fs.mu.RUnlock() - fs.processDeferredDecRefs() + fs.processDeferredDecRefs(ctx) if err != nil { return err } diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 579e627f0..c3efcf3ec 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -40,7 +40,7 @@ func (InodeNoopRefCount) IncRef() { } // DecRef implements Inode.DecRef. -func (InodeNoopRefCount) DecRef() { +func (InodeNoopRefCount) DecRef(context.Context) { } // TryIncRef implements Inode.TryIncRef. @@ -49,7 +49,7 @@ func (InodeNoopRefCount) TryIncRef() bool { } // Destroy implements Inode.Destroy. -func (InodeNoopRefCount) Destroy() { +func (InodeNoopRefCount) Destroy(context.Context) { } // InodeDirectoryNoNewChildren partially implements the Inode interface. @@ -366,12 +366,12 @@ func (o *OrderedChildren) Init(opts OrderedChildrenOptions) { } // DecRef implements Inode.DecRef. -func (o *OrderedChildren) DecRef() { - o.AtomicRefCount.DecRefWithDestructor(o.Destroy) +func (o *OrderedChildren) DecRef(ctx context.Context) { + o.AtomicRefCount.DecRefWithDestructor(ctx, o.Destroy) } // Destroy cleans up resources referenced by this OrderedChildren. -func (o *OrderedChildren) Destroy() { +func (o *OrderedChildren) Destroy(context.Context) { o.mu.Lock() defer o.mu.Unlock() o.order.Reset() diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 46f207664..080118841 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -116,17 +116,17 @@ func (fs *Filesystem) deferDecRef(d *vfs.Dentry) { // processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the // droppedDentries list. See comment on Filesystem.mu. -func (fs *Filesystem) processDeferredDecRefs() { +func (fs *Filesystem) processDeferredDecRefs(ctx context.Context) { fs.mu.Lock() - fs.processDeferredDecRefsLocked() + fs.processDeferredDecRefsLocked(ctx) fs.mu.Unlock() } // Precondition: fs.mu must be held for writing. -func (fs *Filesystem) processDeferredDecRefsLocked() { +func (fs *Filesystem) processDeferredDecRefsLocked(ctx context.Context) { fs.droppedDentriesMu.Lock() for _, d := range fs.droppedDentries { - d.DecRef() + d.DecRef(ctx) } fs.droppedDentries = fs.droppedDentries[:0] // Keep slice memory for reuse. fs.droppedDentriesMu.Unlock() @@ -212,16 +212,16 @@ func (d *Dentry) isSymlink() bool { } // DecRef implements vfs.DentryImpl.DecRef. -func (d *Dentry) DecRef() { - d.AtomicRefCount.DecRefWithDestructor(d.destroy) +func (d *Dentry) DecRef(ctx context.Context) { + d.AtomicRefCount.DecRefWithDestructor(ctx, d.destroy) } // Precondition: Dentry must be removed from VFS' dentry cache. -func (d *Dentry) destroy() { - d.inode.DecRef() // IncRef from Init. +func (d *Dentry) destroy(ctx context.Context) { + d.inode.DecRef(ctx) // IncRef from Init. d.inode = nil if d.parent != nil { - d.parent.DecRef() // IncRef from Dentry.InsertChild. + d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild. } } @@ -230,7 +230,7 @@ func (d *Dentry) destroy() { // Although Linux technically supports inotify on pseudo filesystems (inotify // is implemented at the vfs layer), it is not particularly useful. It is left // unimplemented until someone actually needs it. -func (d *Dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) {} +func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) {} // Watches implements vfs.DentryImpl.Watches. func (d *Dentry) Watches() *vfs.Watches { @@ -238,7 +238,7 @@ func (d *Dentry) Watches() *vfs.Watches { } // OnZeroWatches implements vfs.Dentry.OnZeroWatches. -func (d *Dentry) OnZeroWatches() {} +func (d *Dentry) OnZeroWatches(context.Context) {} // InsertChild inserts child into the vfs dentry cache with the given name under // this dentry. This does not update the directory inode, so calling this on @@ -326,12 +326,12 @@ type Inode interface { type inodeRefs interface { IncRef() - DecRef() + DecRef(ctx context.Context) TryIncRef() bool // Destroy is called when the inode reaches zero references. Destroy release // all resources (references) on objects referenced by the inode, including // any child dentries. - Destroy() + Destroy(ctx context.Context) } type inodeMetadata interface { diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index dc407eb1d..c5d5afedf 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -46,7 +46,7 @@ func newTestSystem(t *testing.T, rootFn RootDentryFn) *testutil.System { ctx := contexttest.Context(t) creds := auth.CredentialsFromContext(ctx) v := &vfs.VirtualFilesystem{} - if err := v.Init(); err != nil { + if err := v.Init(ctx); err != nil { t.Fatalf("VFS init: %v", err) } v.MustRegisterFilesystemType("testfs", &fsType{rootFn: rootFn}, &vfs.RegisterFilesystemTypeOptions{ @@ -163,7 +163,7 @@ func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (* dir := d.fs.newDir(creds, opts.Mode, nil) dirVFSD := dir.VFSDentry() if err := d.OrderedChildren.Insert(name, dirVFSD); err != nil { - dir.DecRef() + dir.DecRef(ctx) return nil, err } d.IncLinks(1) @@ -175,7 +175,7 @@ func (d *dir) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (* f := d.fs.newFile(creds, "") fVFSD := f.VFSDentry() if err := d.OrderedChildren.Insert(name, fVFSD); err != nil { - f.DecRef() + f.DecRef(ctx) return nil, err } return fVFSD, nil @@ -213,7 +213,7 @@ func TestBasic(t *testing.T) { }) }) defer sys.Destroy() - sys.GetDentryOrDie(sys.PathOpAtRoot("file1")).DecRef() + sys.GetDentryOrDie(sys.PathOpAtRoot("file1")).DecRef(sys.Ctx) } func TestMkdirGetDentry(t *testing.T) { @@ -228,7 +228,7 @@ func TestMkdirGetDentry(t *testing.T) { if err := sys.VFS.MkdirAt(sys.Ctx, sys.Creds, pop, &vfs.MkdirOptions{Mode: 0755}); err != nil { t.Fatalf("MkdirAt for PathOperation %+v failed: %v", pop, err) } - sys.GetDentryOrDie(pop).DecRef() + sys.GetDentryOrDie(pop).DecRef(sys.Ctx) } func TestReadStaticFile(t *testing.T) { @@ -246,7 +246,7 @@ func TestReadStaticFile(t *testing.T) { if err != nil { t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) } - defer fd.DecRef() + defer fd.DecRef(sys.Ctx) content, err := sys.ReadToEnd(fd) if err != nil { @@ -273,7 +273,7 @@ func TestCreateNewFileInStaticDir(t *testing.T) { } // Close the file. The file should persist. - fd.DecRef() + fd.DecRef(sys.Ctx) fd, err = sys.VFS.OpenAt(sys.Ctx, sys.Creds, pop, &vfs.OpenOptions{ Flags: linux.O_RDONLY, @@ -281,7 +281,7 @@ func TestCreateNewFileInStaticDir(t *testing.T) { if err != nil { t.Fatalf("OpenAt(pop:%+v) = %+v failed: %v", pop, fd, err) } - fd.DecRef() + fd.DecRef(sys.Ctx) } func TestDirFDReadWrite(t *testing.T) { @@ -297,7 +297,7 @@ func TestDirFDReadWrite(t *testing.T) { if err != nil { t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) } - defer fd.DecRef() + defer fd.DecRef(sys.Ctx) // Read/Write should fail for directory FDs. if _, err := fd.Read(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); err != syserror.EISDIR { diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go index 8f8dcfafe..b3d19ff82 100644 --- a/pkg/sentry/fsimpl/overlay/copy_up.go +++ b/pkg/sentry/fsimpl/overlay/copy_up.go @@ -98,7 +98,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { if err != nil { return err } - defer oldFD.DecRef() + defer oldFD.DecRef(ctx) newFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &newpop, &vfs.OpenOptions{ Flags: linux.O_WRONLY | linux.O_CREAT | linux.O_EXCL, Mode: linux.FileMode(d.mode &^ linux.S_IFMT), @@ -106,7 +106,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { if err != nil { return err } - defer newFD.DecRef() + defer newFD.DecRef(ctx) bufIOSeq := usermem.BytesIOSequence(make([]byte, 32*1024)) // arbitrary buffer size for { readN, readErr := oldFD.Read(ctx, bufIOSeq, vfs.ReadOptions{}) @@ -241,13 +241,13 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { Mask: linux.STATX_INO, }) if err != nil { - d.upperVD.DecRef() + d.upperVD.DecRef(ctx) d.upperVD = vfs.VirtualDentry{} cleanupUndoCopyUp() return err } if upperStat.Mask&linux.STATX_INO == 0 { - d.upperVD.DecRef() + d.upperVD.DecRef(ctx) d.upperVD = vfs.VirtualDentry{} cleanupUndoCopyUp() return syserror.EREMOTE diff --git a/pkg/sentry/fsimpl/overlay/directory.go b/pkg/sentry/fsimpl/overlay/directory.go index f5c2462a5..fccb94105 100644 --- a/pkg/sentry/fsimpl/overlay/directory.go +++ b/pkg/sentry/fsimpl/overlay/directory.go @@ -46,7 +46,7 @@ func (d *dentry) collectWhiteoutsForRmdirLocked(ctx context.Context) (map[string readdirErr = err return false } - defer layerFD.DecRef() + defer layerFD.DecRef(ctx) // Reuse slice allocated for maybeWhiteouts from a previous layer to // reduce allocations. @@ -108,7 +108,7 @@ type directoryFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *directoryFD) Release() { +func (fd *directoryFD) Release(ctx context.Context) { } // IterDirents implements vfs.FileDescriptionImpl.IterDirents. @@ -177,7 +177,7 @@ func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) { readdirErr = err return false } - defer layerFD.DecRef() + defer layerFD.DecRef(ctx) // Reuse slice allocated for maybeWhiteouts from a previous layer to // reduce allocations. @@ -282,6 +282,6 @@ func (fd *directoryFD) Sync(ctx context.Context) error { return err } err = upperFD.Sync(ctx) - upperFD.DecRef() + upperFD.DecRef(ctx) return err } diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index 6b705e955..986b36ead 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -77,7 +77,7 @@ func putDentrySlice(ds *[]*dentry) { // but dentry slices are allocated lazily, and it's much easier to say "defer // fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() { // fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this. -func (fs *filesystem) renameMuRUnlockAndCheckDrop(ds **[]*dentry) { +func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) { fs.renameMu.RUnlock() if *ds == nil { return @@ -85,20 +85,20 @@ func (fs *filesystem) renameMuRUnlockAndCheckDrop(ds **[]*dentry) { if len(**ds) != 0 { fs.renameMu.Lock() for _, d := range **ds { - d.checkDropLocked() + d.checkDropLocked(ctx) } fs.renameMu.Unlock() } putDentrySlice(*ds) } -func (fs *filesystem) renameMuUnlockAndCheckDrop(ds **[]*dentry) { +func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) { if *ds == nil { fs.renameMu.Unlock() return } for _, d := range **ds { - d.checkDropLocked() + d.checkDropLocked(ctx) } fs.renameMu.Unlock() putDentrySlice(*ds) @@ -126,13 +126,13 @@ afterSymlink: return d, nil } if name == ".." { - if isRoot, err := rp.CheckRoot(&d.vfsd); err != nil { + if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil { return nil, err } else if isRoot || d.parent == nil { rp.Advance() return d, nil } - if err := rp.CheckMount(&d.parent.vfsd); err != nil { + if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { return nil, err } rp.Advance() @@ -142,7 +142,7 @@ afterSymlink: if err != nil { return nil, err } - if err := rp.CheckMount(&child.vfsd); err != nil { + if err := rp.CheckMount(ctx, &child.vfsd); err != nil { return nil, err } if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() { @@ -272,11 +272,11 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str }) if lookupErr != nil { - child.destroyLocked() + child.destroyLocked(ctx) return nil, lookupErr } if !existsOnAnyLayer { - child.destroyLocked() + child.destroyLocked(ctx) return nil, syserror.ENOENT } @@ -430,7 +430,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string, haveUpperWhiteout bool) error) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) start := rp.Start().Impl().(*dentry) parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) if err != nil { @@ -501,7 +501,7 @@ func (fs *filesystem) cleanupRecreateWhiteout(ctx context.Context, vfsObj *vfs.V func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return err @@ -513,7 +513,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return nil, err @@ -532,7 +532,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return nil, err @@ -553,7 +553,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) start := rp.Start().Impl().(*dentry) d, err := fs.walkParentDirLocked(ctx, rp, start, &ds) if err != nil { @@ -720,7 +720,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) start := rp.Start().Impl().(*dentry) if rp.Done() { @@ -825,7 +825,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf fd.LockFD.Init(&d.locks) layerFDOpts := layerFD.Options() if err := fd.vfsfd.Init(fd, layerFlags, mnt, &d.vfsd, &layerFDOpts); err != nil { - layerFD.DecRef() + layerFD.DecRef(ctx) return nil, err } return &fd.vfsfd, nil @@ -920,7 +920,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving fd.LockFD.Init(&child.locks) upperFDOpts := upperFD.Options() if err := fd.vfsfd.Init(fd, upperFlags, mnt, &child.vfsd, &upperFDOpts); err != nil { - upperFD.DecRef() + upperFD.DecRef(ctx) // Don't bother with cleanup; the file was created successfully, we // just can't open it anymore for some reason. return nil, err @@ -932,7 +932,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return "", err @@ -952,7 +952,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa var ds *[]*dentry fs.renameMu.Lock() - defer fs.renameMuUnlockAndCheckDrop(&ds) + defer fs.renameMuUnlockAndCheckDrop(ctx, &ds) newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds) if err != nil { return err @@ -979,7 +979,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) start := rp.Start().Impl().(*dentry) parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) if err != nil { @@ -1001,7 +1001,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error } vfsObj := rp.VirtualFilesystem() mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) parent.dirMu.Lock() defer parent.dirMu.Unlock() @@ -1086,7 +1086,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } - vfsObj.CommitDeleteDentry(&child.vfsd) + vfsObj.CommitDeleteDentry(ctx, &child.vfsd) delete(parent.children, name) ds = appendDentry(ds, child) parent.dirents = nil @@ -1097,7 +1097,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return err @@ -1132,7 +1132,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) d, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return linux.Statx{}, err @@ -1160,7 +1160,7 @@ func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) _, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return linux.Statfs{}, err @@ -1211,7 +1211,7 @@ func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) start := rp.Start().Impl().(*dentry) parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) if err != nil { @@ -1233,7 +1233,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error } vfsObj := rp.VirtualFilesystem() mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) parent.dirMu.Lock() defer parent.dirMu.Unlock() @@ -1298,7 +1298,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error } if child != nil { - vfsObj.CommitDeleteDentry(&child.vfsd) + vfsObj.CommitDeleteDentry(ctx, &child.vfsd) delete(parent.children, name) ds = appendDentry(ds, child) } @@ -1310,7 +1310,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) _, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return nil, err @@ -1324,7 +1324,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) _, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return "", err @@ -1336,7 +1336,7 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) _, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return err @@ -1348,7 +1348,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { var ds *[]*dentry fs.renameMu.RLock() - defer fs.renameMuRUnlockAndCheckDrop(&ds) + defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) _, err := fs.resolveLocked(ctx, rp, &ds) if err != nil { return err diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/non_directory.go index c0749e711..d3060a481 100644 --- a/pkg/sentry/fsimpl/overlay/non_directory.go +++ b/pkg/sentry/fsimpl/overlay/non_directory.go @@ -81,11 +81,11 @@ func (fd *nonDirectoryFD) currentFDLocked(ctx context.Context) (*vfs.FileDescrip oldOff, oldOffErr := fd.cachedFD.Seek(ctx, 0, linux.SEEK_CUR) if oldOffErr == nil { if _, err := upperFD.Seek(ctx, oldOff, linux.SEEK_SET); err != nil { - upperFD.DecRef() + upperFD.DecRef(ctx) return nil, err } } - fd.cachedFD.DecRef() + fd.cachedFD.DecRef(ctx) fd.copiedUp = true fd.cachedFD = upperFD fd.cachedFlags = statusFlags @@ -99,8 +99,8 @@ func (fd *nonDirectoryFD) currentFDLocked(ctx context.Context) (*vfs.FileDescrip } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *nonDirectoryFD) Release() { - fd.cachedFD.DecRef() +func (fd *nonDirectoryFD) Release(ctx context.Context) { + fd.cachedFD.DecRef(ctx) fd.cachedFD = nil } @@ -138,7 +138,7 @@ func (fd *nonDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux Mask: layerMask, Sync: opts.Sync, }) - wrappedFD.DecRef() + wrappedFD.DecRef(ctx) if err != nil { return linux.Statx{}, err } @@ -187,7 +187,7 @@ func (fd *nonDirectoryFD) PRead(ctx context.Context, dst usermem.IOSequence, off if err != nil { return 0, err } - defer wrappedFD.DecRef() + defer wrappedFD.DecRef(ctx) return wrappedFD.PRead(ctx, dst, offset, opts) } @@ -209,7 +209,7 @@ func (fd *nonDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence, of if err != nil { return 0, err } - defer wrappedFD.DecRef() + defer wrappedFD.DecRef(ctx) return wrappedFD.PWrite(ctx, src, offset, opts) } @@ -250,7 +250,7 @@ func (fd *nonDirectoryFD) Sync(ctx context.Context) error { return err } wrappedFD.IncRef() - defer wrappedFD.DecRef() + defer wrappedFD.DecRef(ctx) fd.mu.Unlock() return wrappedFD.Sync(ctx) } @@ -261,6 +261,6 @@ func (fd *nonDirectoryFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOp if err != nil { return err } - defer wrappedFD.DecRef() + defer wrappedFD.DecRef(ctx) return wrappedFD.ConfigureMMap(ctx, opts) } diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go index e720d4825..75cc006bf 100644 --- a/pkg/sentry/fsimpl/overlay/overlay.go +++ b/pkg/sentry/fsimpl/overlay/overlay.go @@ -123,7 +123,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // filesystem with any number of lower layers. } else { vfsroot := vfs.RootFromContext(ctx) - defer vfsroot.DecRef() + defer vfsroot.DecRef(ctx) upperPathname, ok := mopts["upperdir"] if ok { delete(mopts, "upperdir") @@ -147,13 +147,13 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt ctx.Warningf("overlay.FilesystemType.GetFilesystem: failed to resolve upperdir %q: %v", upperPathname, err) return nil, nil, err } - defer upperRoot.DecRef() + defer upperRoot.DecRef(ctx) privateUpperRoot, err := clonePrivateMount(vfsObj, upperRoot, false /* forceReadOnly */) if err != nil { ctx.Warningf("overlay.FilesystemType.GetFilesystem: failed to make private bind mount of upperdir %q: %v", upperPathname, err) return nil, nil, err } - defer privateUpperRoot.DecRef() + defer privateUpperRoot.DecRef(ctx) fsopts.UpperRoot = privateUpperRoot } lowerPathnamesStr, ok := mopts["lowerdir"] @@ -190,13 +190,13 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt ctx.Warningf("overlay.FilesystemType.GetFilesystem: failed to resolve lowerdir %q: %v", lowerPathname, err) return nil, nil, err } - defer lowerRoot.DecRef() + defer lowerRoot.DecRef(ctx) privateLowerRoot, err := clonePrivateMount(vfsObj, lowerRoot, true /* forceReadOnly */) if err != nil { ctx.Warningf("overlay.FilesystemType.GetFilesystem: failed to make private bind mount of lowerdir %q: %v", lowerPathname, err) return nil, nil, err } - defer privateLowerRoot.DecRef() + defer privateLowerRoot.DecRef(ctx) fsopts.LowerRoots = append(fsopts.LowerRoots, privateLowerRoot) } } @@ -264,19 +264,19 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt Mask: rootStatMask, }) if err != nil { - root.destroyLocked() - fs.vfsfs.DecRef() + root.destroyLocked(ctx) + fs.vfsfs.DecRef(ctx) return nil, nil, err } if rootStat.Mask&rootStatMask != rootStatMask { - root.destroyLocked() - fs.vfsfs.DecRef() + root.destroyLocked(ctx) + fs.vfsfs.DecRef(ctx) return nil, nil, syserror.EREMOTE } if isWhiteout(&rootStat) { ctx.Warningf("overlay.FilesystemType.GetFilesystem: filesystem root is a whiteout") - root.destroyLocked() - fs.vfsfs.DecRef() + root.destroyLocked(ctx) + fs.vfsfs.DecRef(ctx) return nil, nil, syserror.EINVAL } root.mode = uint32(rootStat.Mode) @@ -319,17 +319,17 @@ func clonePrivateMount(vfsObj *vfs.VirtualFilesystem, vd vfs.VirtualDentry, forc } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { vfsObj := fs.vfsfs.VirtualFilesystem() vfsObj.PutAnonBlockDevMinor(fs.dirDevMinor) for _, lowerDevMinor := range fs.lowerDevMinors { vfsObj.PutAnonBlockDevMinor(lowerDevMinor) } if fs.opts.UpperRoot.Ok() { - fs.opts.UpperRoot.DecRef() + fs.opts.UpperRoot.DecRef(ctx) } for _, lowerRoot := range fs.opts.LowerRoots { - lowerRoot.DecRef() + lowerRoot.DecRef(ctx) } } @@ -452,10 +452,10 @@ func (d *dentry) TryIncRef() bool { } // DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef() { +func (d *dentry) DecRef(ctx context.Context) { if refs := atomic.AddInt64(&d.refs, -1); refs == 0 { d.fs.renameMu.Lock() - d.checkDropLocked() + d.checkDropLocked(ctx) d.fs.renameMu.Unlock() } else if refs < 0 { panic("overlay.dentry.DecRef() called without holding a reference") @@ -466,7 +466,7 @@ func (d *dentry) DecRef() { // becomes deleted. // // Preconditions: d.fs.renameMu must be locked for writing. -func (d *dentry) checkDropLocked() { +func (d *dentry) checkDropLocked(ctx context.Context) { // Dentries with a positive reference count must be retained. (The only way // to obtain a reference on a dentry with zero references is via path // resolution, which requires renameMu, so if d.refs is zero then it will @@ -476,14 +476,14 @@ func (d *dentry) checkDropLocked() { return } // Refs is still zero; destroy it. - d.destroyLocked() + d.destroyLocked(ctx) return } // destroyLocked destroys the dentry. // // Preconditions: d.fs.renameMu must be locked for writing. d.refs == 0. -func (d *dentry) destroyLocked() { +func (d *dentry) destroyLocked(ctx context.Context) { switch atomic.LoadInt64(&d.refs) { case 0: // Mark the dentry destroyed. @@ -495,10 +495,10 @@ func (d *dentry) destroyLocked() { } if d.upperVD.Ok() { - d.upperVD.DecRef() + d.upperVD.DecRef(ctx) } for _, lowerVD := range d.lowerVDs { - lowerVD.DecRef() + lowerVD.DecRef(ctx) } if d.parent != nil { @@ -510,7 +510,7 @@ func (d *dentry) destroyLocked() { // Drop the reference held by d on its parent without recursively // locking d.fs.renameMu. if refs := atomic.AddInt64(&d.parent.refs, -1); refs == 0 { - d.parent.checkDropLocked() + d.parent.checkDropLocked(ctx) } else if refs < 0 { panic("overlay.dentry.DecRef() called without holding a reference") } @@ -518,7 +518,7 @@ func (d *dentry) destroyLocked() { } // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. -func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) { +func (d *dentry) InotifyWithParent(ctx context.Context, events uint32, cookie uint32, et vfs.EventType) { // TODO(gvisor.dev/issue/1479): Implement inotify. } @@ -531,7 +531,7 @@ func (d *dentry) Watches() *vfs.Watches { // OnZeroWatches implements vfs.DentryImpl.OnZeroWatches. // // TODO(gvisor.dev/issue/1479): Implement inotify. -func (d *dentry) OnZeroWatches() {} +func (d *dentry) OnZeroWatches(context.Context) {} // iterLayers invokes yield on each layer comprising d, from top to bottom. If // any call to yield returns false, iterLayer stops iteration. diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index 811f80a5f..2ca793db9 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -63,9 +63,9 @@ func NewFilesystem(vfsObj *vfs.VirtualFilesystem) (*vfs.Filesystem, error) { } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) - fs.Filesystem.Release() + fs.Filesystem.Release(ctx) } // PrependPath implements vfs.FilesystemImpl.PrependPath. @@ -160,6 +160,6 @@ func NewConnectedPipeFDs(ctx context.Context, mnt *vfs.Mount, flags uint32) (*vf inode := newInode(ctx, fs) var d kernfs.Dentry d.Init(inode) - defer d.DecRef() + defer d.DecRef(ctx) return inode.pipe.ReaderWriterPair(mnt, d.VFSDentry(), flags) } diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index 609210253..2463d51cd 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -77,9 +77,9 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) - fs.Filesystem.Release() + fs.Filesystem.Release(ctx) } // dynamicInode is an overfitted interface for common Inodes with diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go index fea29e5f0..f0d3f7f5e 100644 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ b/pkg/sentry/fsimpl/proc/task_fds.go @@ -43,12 +43,12 @@ func getTaskFD(t *kernel.Task, fd int32) (*vfs.FileDescription, kernel.FDFlags) return file, flags } -func taskFDExists(t *kernel.Task, fd int32) bool { +func taskFDExists(ctx context.Context, t *kernel.Task, fd int32) bool { file, _ := getTaskFD(t, fd) if file == nil { return false } - file.DecRef() + file.DecRef(ctx) return true } @@ -68,7 +68,7 @@ func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, off var fds []int32 i.task.WithMuLocked(func(t *kernel.Task) { if fdTable := t.FDTable(); fdTable != nil { - fds = fdTable.GetFDs() + fds = fdTable.GetFDs(ctx) } }) @@ -135,7 +135,7 @@ func (i *fdDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro return nil, syserror.ENOENT } fd := int32(fdInt) - if !taskFDExists(i.task, fd) { + if !taskFDExists(ctx, i.task, fd) { return nil, syserror.ENOENT } taskDentry := i.fs.newFDSymlink(i.task, fd, i.fs.NextIno()) @@ -204,9 +204,9 @@ func (s *fdSymlink) Readlink(ctx context.Context) (string, error) { if file == nil { return "", syserror.ENOENT } - defer file.DecRef() + defer file.DecRef(ctx) root := vfs.RootFromContext(ctx) - defer root.DecRef() + defer root.DecRef(ctx) return s.task.Kernel().VFS().PathnameWithDeleted(ctx, root, file.VirtualDentry()) } @@ -215,7 +215,7 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen if file == nil { return vfs.VirtualDentry{}, "", syserror.ENOENT } - defer file.DecRef() + defer file.DecRef(ctx) vd := file.VirtualDentry() vd.IncRef() return vd, "", nil @@ -258,7 +258,7 @@ func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, return nil, syserror.ENOENT } fd := int32(fdInt) - if !taskFDExists(i.task, fd) { + if !taskFDExists(ctx, i.task, fd) { return nil, syserror.ENOENT } data := &fdInfoData{ @@ -297,7 +297,7 @@ func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { if file == nil { return syserror.ENOENT } - defer file.DecRef() + defer file.DecRef(ctx) // TODO(b/121266871): Include pos, locks, and other data. For now we only // have flags. // See https://www.kernel.org/doc/Documentation/filesystems/proc.txt diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 859b7d727..830b78949 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -677,7 +677,7 @@ func (s *exeSymlink) Readlink(ctx context.Context) (string, error) { if err != nil { return "", err } - defer exec.DecRef() + defer exec.DecRef(ctx) return exec.PathnameWithDeleted(ctx), nil } @@ -692,7 +692,7 @@ func (s *exeSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDent if err != nil { return vfs.VirtualDentry{}, "", err } - defer exec.DecRef() + defer exec.DecRef(ctx) vd := exec.(*fsbridge.VFSFile).FileDescription().VirtualDentry() vd.IncRef() @@ -748,7 +748,7 @@ func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { // Root has been destroyed. Don't try to read mounts. return nil } - defer rootDir.DecRef() + defer rootDir.DecRef(ctx) i.task.Kernel().VFS().GenerateProcMountInfo(ctx, rootDir, buf) return nil } @@ -779,7 +779,7 @@ func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error { // Root has been destroyed. Don't try to read mounts. return nil } - defer rootDir.DecRef() + defer rootDir.DecRef(ctx) i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf) return nil } @@ -825,7 +825,7 @@ func (s *namespaceSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.Vir dentry.Init(&namespaceInode{}) vd := vfs.MakeVirtualDentry(mnt, dentry.VFSDentry()) vd.IncRef() - dentry.DecRef() + dentry.DecRef(ctx) return vd, "", nil } @@ -887,8 +887,8 @@ func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) err } // Release implements FileDescriptionImpl. -func (fd *namespaceFD) Release() { - fd.inode.DecRef() +func (fd *namespaceFD) Release(ctx context.Context) { + fd.inode.DecRef(ctx) } // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go index 6bde27376..a4c884bf9 100644 --- a/pkg/sentry/fsimpl/proc/task_net.go +++ b/pkg/sentry/fsimpl/proc/task_net.go @@ -212,7 +212,7 @@ func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error { continue } if family, _, _ := s.Impl().(socket.SocketVFS2).Type(); family != linux.AF_UNIX { - s.DecRef() + s.DecRef(ctx) // Not a unix socket. continue } @@ -281,7 +281,7 @@ func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error { } fmt.Fprintf(buf, "\n") - s.DecRef() + s.DecRef(ctx) } return nil } @@ -359,7 +359,7 @@ func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s)) } if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) { - s.DecRef() + s.DecRef(ctx) // Not tcp4 sockets. continue } @@ -455,7 +455,7 @@ func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, fmt.Fprintf(buf, "\n") - s.DecRef() + s.DecRef(ctx) } return nil @@ -524,7 +524,7 @@ func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error { panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s)) } if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM { - s.DecRef() + s.DecRef(ctx) // Not udp4 socket. continue } @@ -600,7 +600,7 @@ func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error { fmt.Fprintf(buf, "\n") - s.DecRef() + s.DecRef(ctx) } return nil } diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go index 19abb5034..3c9297dee 100644 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_test.go @@ -218,7 +218,7 @@ func TestTasks(t *testing.T) { if err != nil { t.Fatalf("vfsfs.OpenAt(%q) failed: %v", path, err) } - defer fd.DecRef() + defer fd.DecRef(s.Ctx) buf := make([]byte, 1) bufIOSeq := usermem.BytesIOSequence(buf) if _, err := fd.Read(s.Ctx, bufIOSeq, vfs.ReadOptions{}); err != syserror.EISDIR { @@ -336,7 +336,7 @@ func TestTasksOffset(t *testing.T) { if err != nil { t.Fatalf("vfsfs.OpenAt(/) failed: %v", err) } - defer fd.DecRef() + defer fd.DecRef(s.Ctx) if _, err := fd.Seek(s.Ctx, tc.offset, linux.SEEK_SET); err != nil { t.Fatalf("Seek(%d, SEEK_SET): %v", tc.offset, err) } @@ -441,7 +441,7 @@ func iterateDir(ctx context.Context, t *testing.T, s *testutil.System, fd *vfs.F t.Errorf("vfsfs.OpenAt(%v) failed: %v", absPath, err) continue } - defer child.DecRef() + defer child.DecRef(ctx) stat, err := child.Stat(ctx, vfs.StatOptions{}) if err != nil { t.Errorf("Stat(%v) failed: %v", absPath, err) @@ -476,7 +476,7 @@ func TestTree(t *testing.T) { if err != nil { t.Fatalf("failed to create test file: %v", err) } - defer file.DecRef() + defer file.DecRef(s.Ctx) var tasks []*kernel.Task for i := 0; i < 5; i++ { @@ -501,5 +501,5 @@ func TestTree(t *testing.T) { t.Fatalf("vfsfs.OpenAt(/proc) failed: %v", err) } iterateDir(ctx, t, s, fd) - fd.DecRef() + fd.DecRef(ctx) } diff --git a/pkg/sentry/fsimpl/signalfd/signalfd.go b/pkg/sentry/fsimpl/signalfd/signalfd.go index 242ba9b5d..6297e1df4 100644 --- a/pkg/sentry/fsimpl/signalfd/signalfd.go +++ b/pkg/sentry/fsimpl/signalfd/signalfd.go @@ -54,7 +54,7 @@ var _ vfs.FileDescriptionImpl = (*SignalFileDescription)(nil) // New creates a new signal fd. func New(vfsObj *vfs.VirtualFilesystem, target *kernel.Task, mask linux.SignalSet, flags uint32) (*vfs.FileDescription, error) { vd := vfsObj.NewAnonVirtualDentry("[signalfd]") - defer vd.DecRef() + defer vd.DecRef(target) sfd := &SignalFileDescription{ target: target, mask: mask, @@ -133,4 +133,4 @@ func (sfd *SignalFileDescription) EventUnregister(entry *waiter.Entry) { } // Release implements FileDescriptionImpl.Release() -func (sfd *SignalFileDescription) Release() {} +func (sfd *SignalFileDescription) Release(context.Context) {} diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go index ee0828a15..c61818ff6 100644 --- a/pkg/sentry/fsimpl/sockfs/sockfs.go +++ b/pkg/sentry/fsimpl/sockfs/sockfs.go @@ -67,9 +67,9 @@ func NewFilesystem(vfsObj *vfs.VirtualFilesystem) (*vfs.Filesystem, error) { } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) - fs.Filesystem.Release() + fs.Filesystem.Release(ctx) } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index 01ce30a4d..f81b0c38f 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -87,9 +87,9 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) - fs.Filesystem.Release() + fs.Filesystem.Release(ctx) } // dir implements kernfs.Inode. diff --git a/pkg/sentry/fsimpl/sys/sys_test.go b/pkg/sentry/fsimpl/sys/sys_test.go index 242d5fd12..9fd38b295 100644 --- a/pkg/sentry/fsimpl/sys/sys_test.go +++ b/pkg/sentry/fsimpl/sys/sys_test.go @@ -59,7 +59,7 @@ func TestReadCPUFile(t *testing.T) { if err != nil { t.Fatalf("OpenAt(pop:%+v) = %+v failed: %v", pop, fd, err) } - defer fd.DecRef() + defer fd.DecRef(s.Ctx) content, err := s.ReadToEnd(fd) if err != nil { t.Fatalf("Read failed: %v", err) diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go index e743e8114..1e57744e8 100644 --- a/pkg/sentry/fsimpl/testutil/kernel.go +++ b/pkg/sentry/fsimpl/testutil/kernel.go @@ -127,7 +127,7 @@ func CreateTask(ctx context.Context, name string, tc *kernel.ThreadGroup, mntns return nil, err } m := mm.NewMemoryManager(k, k, k.SleepForAddressSpaceActivation) - m.SetExecutable(fsbridge.NewVFSFile(exe)) + m.SetExecutable(ctx, fsbridge.NewVFSFile(exe)) config := &kernel.TaskConfig{ Kernel: k, diff --git a/pkg/sentry/fsimpl/testutil/testutil.go b/pkg/sentry/fsimpl/testutil/testutil.go index 0556af877..568132121 100644 --- a/pkg/sentry/fsimpl/testutil/testutil.go +++ b/pkg/sentry/fsimpl/testutil/testutil.go @@ -97,8 +97,8 @@ func (s *System) WithTemporaryContext(ctx context.Context) *System { // Destroy release resources associated with a test system. func (s *System) Destroy() { - s.Root.DecRef() - s.MntNs.DecRef() // Reference on MntNs passed to NewSystem. + s.Root.DecRef(s.Ctx) + s.MntNs.DecRef(s.Ctx) // Reference on MntNs passed to NewSystem. } // ReadToEnd reads the contents of fd until EOF to a string. @@ -149,7 +149,7 @@ func (s *System) ListDirents(pop *vfs.PathOperation) *DirentCollector { if err != nil { s.t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) } - defer fd.DecRef() + defer fd.DecRef(s.Ctx) collector := &DirentCollector{} if err := fd.IterDirents(s.Ctx, collector); err != nil { diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go index 2dc90d484..86beaa0a8 100644 --- a/pkg/sentry/fsimpl/timerfd/timerfd.go +++ b/pkg/sentry/fsimpl/timerfd/timerfd.go @@ -47,9 +47,9 @@ var _ vfs.FileDescriptionImpl = (*TimerFileDescription)(nil) var _ ktime.TimerListener = (*TimerFileDescription)(nil) // New returns a new timer fd. -func New(vfsObj *vfs.VirtualFilesystem, clock ktime.Clock, flags uint32) (*vfs.FileDescription, error) { +func New(ctx context.Context, vfsObj *vfs.VirtualFilesystem, clock ktime.Clock, flags uint32) (*vfs.FileDescription, error) { vd := vfsObj.NewAnonVirtualDentry("[timerfd]") - defer vd.DecRef() + defer vd.DecRef(ctx) tfd := &TimerFileDescription{} tfd.timer = ktime.NewTimer(clock, tfd) if err := tfd.vfsfd.Init(tfd, flags, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{ @@ -129,7 +129,7 @@ func (tfd *TimerFileDescription) ResumeTimer() { } // Release implements FileDescriptionImpl.Release() -func (tfd *TimerFileDescription) Release() { +func (tfd *TimerFileDescription) Release(context.Context) { tfd.timer.Destroy() } diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go index 2fb5c4d84..d263147c2 100644 --- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go +++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go @@ -83,7 +83,7 @@ func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent } err = fn(root, d) - d.DecRef() + d.DecRef(ctx) return err } @@ -105,17 +105,17 @@ func BenchmarkVFS1TmpfsStat(b *testing.B) { if err != nil { b.Fatalf("failed to create mount namespace: %v", err) } - defer mntns.DecRef() + defer mntns.DecRef(ctx) var filePathBuilder strings.Builder filePathBuilder.WriteByte('/') // Create nested directories with given depth. root := mntns.Root() - defer root.DecRef() + defer root.DecRef(ctx) d := root d.IncRef() - defer d.DecRef() + defer d.DecRef(ctx) for i := depth; i > 0; i-- { name := fmt.Sprintf("%d", i) if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { @@ -125,7 +125,7 @@ func BenchmarkVFS1TmpfsStat(b *testing.B) { if err != nil { b.Fatalf("failed to walk to directory %q: %v", name, err) } - d.DecRef() + d.DecRef(ctx) d = next filePathBuilder.WriteString(name) filePathBuilder.WriteByte('/') @@ -136,7 +136,7 @@ func BenchmarkVFS1TmpfsStat(b *testing.B) { if err != nil { b.Fatalf("failed to create file %q: %v", filename, err) } - file.DecRef() + file.DecRef(ctx) filePathBuilder.WriteString(filename) filePath := filePathBuilder.String() @@ -176,7 +176,7 @@ func BenchmarkVFS2TmpfsStat(b *testing.B) { // Create VFS. vfsObj := vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { b.Fatalf("VFS init: %v", err) } vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ @@ -186,14 +186,14 @@ func BenchmarkVFS2TmpfsStat(b *testing.B) { if err != nil { b.Fatalf("failed to create tmpfs root mount: %v", err) } - defer mntns.DecRef() + defer mntns.DecRef(ctx) var filePathBuilder strings.Builder filePathBuilder.WriteByte('/') // Create nested directories with given depth. root := mntns.Root() - defer root.DecRef() + defer root.DecRef(ctx) vd := root vd.IncRef() for i := depth; i > 0; i-- { @@ -212,7 +212,7 @@ func BenchmarkVFS2TmpfsStat(b *testing.B) { if err != nil { b.Fatalf("failed to walk to directory %q: %v", name, err) } - vd.DecRef() + vd.DecRef(ctx) vd = nextVD filePathBuilder.WriteString(name) filePathBuilder.WriteByte('/') @@ -228,12 +228,12 @@ func BenchmarkVFS2TmpfsStat(b *testing.B) { Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, Mode: 0644, }) - vd.DecRef() + vd.DecRef(ctx) vd = vfs.VirtualDentry{} if err != nil { b.Fatalf("failed to create file %q: %v", filename, err) } - defer fd.DecRef() + defer fd.DecRef(ctx) filePathBuilder.WriteString(filename) filePath := filePathBuilder.String() @@ -278,14 +278,14 @@ func BenchmarkVFS1TmpfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to create mount namespace: %v", err) } - defer mntns.DecRef() + defer mntns.DecRef(ctx) var filePathBuilder strings.Builder filePathBuilder.WriteByte('/') // Create and mount the submount. root := mntns.Root() - defer root.DecRef() + defer root.DecRef(ctx) if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil { b.Fatalf("failed to create mount point: %v", err) } @@ -293,7 +293,7 @@ func BenchmarkVFS1TmpfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to walk to mount point: %v", err) } - defer mountPoint.DecRef() + defer mountPoint.DecRef(ctx) submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) if err != nil { b.Fatalf("failed to create tmpfs submount: %v", err) @@ -309,7 +309,7 @@ func BenchmarkVFS1TmpfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to walk to mount root: %v", err) } - defer d.DecRef() + defer d.DecRef(ctx) for i := depth; i > 0; i-- { name := fmt.Sprintf("%d", i) if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { @@ -319,7 +319,7 @@ func BenchmarkVFS1TmpfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to walk to directory %q: %v", name, err) } - d.DecRef() + d.DecRef(ctx) d = next filePathBuilder.WriteString(name) filePathBuilder.WriteByte('/') @@ -330,7 +330,7 @@ func BenchmarkVFS1TmpfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to create file %q: %v", filename, err) } - file.DecRef() + file.DecRef(ctx) filePathBuilder.WriteString(filename) filePath := filePathBuilder.String() @@ -370,7 +370,7 @@ func BenchmarkVFS2TmpfsMountStat(b *testing.B) { // Create VFS. vfsObj := vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { b.Fatalf("VFS init: %v", err) } vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ @@ -380,14 +380,14 @@ func BenchmarkVFS2TmpfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to create tmpfs root mount: %v", err) } - defer mntns.DecRef() + defer mntns.DecRef(ctx) var filePathBuilder strings.Builder filePathBuilder.WriteByte('/') // Create the mount point. root := mntns.Root() - defer root.DecRef() + defer root.DecRef(ctx) pop := vfs.PathOperation{ Root: root, Start: root, @@ -403,7 +403,7 @@ func BenchmarkVFS2TmpfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to walk to mount point: %v", err) } - defer mountPoint.DecRef() + defer mountPoint.DecRef(ctx) // Create and mount the submount. if err := vfsObj.MountAt(ctx, creds, "", &pop, "tmpfs", &vfs.MountOptions{}); err != nil { b.Fatalf("failed to mount tmpfs submount: %v", err) @@ -432,7 +432,7 @@ func BenchmarkVFS2TmpfsMountStat(b *testing.B) { if err != nil { b.Fatalf("failed to walk to directory %q: %v", name, err) } - vd.DecRef() + vd.DecRef(ctx) vd = nextVD filePathBuilder.WriteString(name) filePathBuilder.WriteByte('/') @@ -448,11 +448,11 @@ func BenchmarkVFS2TmpfsMountStat(b *testing.B) { Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, Mode: 0644, }) - vd.DecRef() + vd.DecRef(ctx) if err != nil { b.Fatalf("failed to create file %q: %v", filename, err) } - fd.DecRef() + fd.DecRef(ctx) filePathBuilder.WriteString(filename) filePath := filePathBuilder.String() diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index 0a1ad4765..78b4fc5be 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -95,7 +95,7 @@ type directoryFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *directoryFD) Release() { +func (fd *directoryFD) Release(ctx context.Context) { if fd.iter != nil { dir := fd.inode().impl.(*directory) dir.iterMu.Lock() @@ -110,7 +110,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba fs := fd.filesystem() dir := fd.inode().impl.(*directory) - defer fd.dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + defer fd.dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, vfs.PathEvent) // fs.mu is required to read d.parent and dentry.name. fs.mu.RLock() diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index ef210a69b..fb77f95cc 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -40,7 +40,7 @@ func (fs *filesystem) Sync(ctx context.Context) error { // stepLocked is loosely analogous to fs/namei.c:walk_component(). // // Preconditions: filesystem.mu must be locked. !rp.Done(). -func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { +func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { dir, ok := d.inode.impl.(*directory) if !ok { return nil, syserror.ENOTDIR @@ -55,13 +55,13 @@ afterSymlink: return d, nil } if name == ".." { - if isRoot, err := rp.CheckRoot(&d.vfsd); err != nil { + if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil { return nil, err } else if isRoot || d.parent == nil { rp.Advance() return d, nil } - if err := rp.CheckMount(&d.parent.vfsd); err != nil { + if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { return nil, err } rp.Advance() @@ -74,7 +74,7 @@ afterSymlink: if !ok { return nil, syserror.ENOENT } - if err := rp.CheckMount(&child.vfsd); err != nil { + if err := rp.CheckMount(ctx, &child.vfsd); err != nil { return nil, err } if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { @@ -98,9 +98,9 @@ afterSymlink: // fs/namei.c:path_parentat(). // // Preconditions: filesystem.mu must be locked. !rp.Done(). -func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*directory, error) { +func walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry) (*directory, error) { for !rp.Final() { - next, err := stepLocked(rp, d) + next, err := stepLocked(ctx, rp, d) if err != nil { return nil, err } @@ -118,10 +118,10 @@ func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*directory, error) { // resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat(). // // Preconditions: filesystem.mu must be locked. -func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) { +func resolveLocked(ctx context.Context, rp *vfs.ResolvingPath) (*dentry, error) { d := rp.Start().Impl().(*dentry) for !rp.Done() { - next, err := stepLocked(rp, d) + next, err := stepLocked(ctx, rp, d) if err != nil { return nil, err } @@ -141,10 +141,10 @@ func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) { // // Preconditions: !rp.Done(). For the final path component in rp, // !rp.ShouldFollowSymlink(). -func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parentDir *directory, name string) error) error { +func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parentDir *directory, name string) error) error { fs.mu.Lock() defer fs.mu.Unlock() - parentDir, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + parentDir, err := walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry)) if err != nil { return err } @@ -182,7 +182,7 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa if dir { ev |= linux.IN_ISDIR } - parentDir.inode.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) + parentDir.inode.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */) parentDir.inode.touchCMtime() return nil } @@ -191,7 +191,7 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { fs.mu.RLock() defer fs.mu.RUnlock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { return err } @@ -202,7 +202,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { fs.mu.RLock() defer fs.mu.RUnlock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { return nil, err } @@ -222,7 +222,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { fs.mu.RLock() defer fs.mu.RUnlock() - dir, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + dir, err := walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry)) if err != nil { return nil, err } @@ -232,7 +232,7 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa // LinkAt implements vfs.FilesystemImpl.LinkAt. func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { - return fs.doCreateAt(rp, false /* dir */, func(parentDir *directory, name string) error { + return fs.doCreateAt(ctx, rp, false /* dir */, func(parentDir *directory, name string) error { if rp.Mount() != vd.Mount() { return syserror.EXDEV } @@ -251,7 +251,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EMLINK } i.incLinksLocked() - i.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent, false /* unlinked */) + i.watches.Notify(ctx, "", linux.IN_ATTRIB, 0, vfs.InodeEvent, false /* unlinked */) parentDir.insertChildLocked(fs.newDentry(i), name) return nil }) @@ -259,7 +259,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. // MkdirAt implements vfs.FilesystemImpl.MkdirAt. func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { - return fs.doCreateAt(rp, true /* dir */, func(parentDir *directory, name string) error { + return fs.doCreateAt(ctx, rp, true /* dir */, func(parentDir *directory, name string) error { creds := rp.Credentials() if parentDir.inode.nlink == maxLinks { return syserror.EMLINK @@ -273,7 +273,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v // MknodAt implements vfs.FilesystemImpl.MknodAt. func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { - return fs.doCreateAt(rp, false /* dir */, func(parentDir *directory, name string) error { + return fs.doCreateAt(ctx, rp, false /* dir */, func(parentDir *directory, name string) error { creds := rp.Credentials() var childInode *inode switch opts.Mode.FileType() { @@ -308,7 +308,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if opts.Flags&linux.O_CREAT == 0 { fs.mu.RLock() defer fs.mu.RUnlock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { return nil, err } @@ -330,7 +330,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf return start.open(ctx, rp, &opts, false /* afterCreate */) } afterTrailingSymlink: - parentDir, err := walkParentDirLocked(rp, start) + parentDir, err := walkParentDirLocked(ctx, rp, start) if err != nil { return nil, err } @@ -368,7 +368,7 @@ afterTrailingSymlink: if err != nil { return nil, err } - parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */) + parentDir.inode.watches.Notify(ctx, name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */) parentDir.inode.touchCMtime() return fd, nil } @@ -376,7 +376,7 @@ afterTrailingSymlink: return nil, syserror.EEXIST } // Is the file mounted over? - if err := rp.CheckMount(&child.vfsd); err != nil { + if err := rp.CheckMount(ctx, &child.vfsd); err != nil { return nil, err } // Do we need to resolve a trailing symlink? @@ -445,7 +445,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { return "", err } @@ -467,7 +467,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa // Resolve newParent first to verify that it's on this Mount. fs.mu.Lock() defer fs.mu.Unlock() - newParentDir, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + newParentDir, err := walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry)) if err != nil { return err } @@ -555,7 +555,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa } vfsObj := rp.VirtualFilesystem() mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) var replacedVFSD *vfs.Dentry if replaced != nil { replacedVFSD = &replaced.vfsd @@ -566,17 +566,17 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if replaced != nil { newParentDir.removeChildLocked(replaced) if replaced.inode.isDir() { - newParentDir.inode.decLinksLocked() // from replaced's ".." + newParentDir.inode.decLinksLocked(ctx) // from replaced's ".." } - replaced.inode.decLinksLocked() + replaced.inode.decLinksLocked(ctx) } oldParentDir.removeChildLocked(renamed) newParentDir.insertChildLocked(renamed, newName) - vfsObj.CommitRenameReplaceDentry(&renamed.vfsd, replacedVFSD) + vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD) oldParentDir.inode.touchCMtime() if oldParentDir != newParentDir { if renamed.inode.isDir() { - oldParentDir.inode.decLinksLocked() + oldParentDir.inode.decLinksLocked(ctx) newParentDir.inode.incLinksLocked() } newParentDir.inode.touchCMtime() @@ -591,7 +591,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() defer fs.mu.Unlock() - parentDir, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + parentDir, err := walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry)) if err != nil { return err } @@ -626,17 +626,17 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error defer mnt.EndWrite() vfsObj := rp.VirtualFilesystem() mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { return err } parentDir.removeChildLocked(child) - parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */) + parentDir.inode.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */) // Remove links for child, child/., and child/.. - child.inode.decLinksLocked() - child.inode.decLinksLocked() - parentDir.inode.decLinksLocked() - vfsObj.CommitDeleteDentry(&child.vfsd) + child.inode.decLinksLocked(ctx) + child.inode.decLinksLocked(ctx) + parentDir.inode.decLinksLocked(ctx) + vfsObj.CommitDeleteDentry(ctx, &child.vfsd) parentDir.inode.touchCMtime() return nil } @@ -644,7 +644,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error // SetStatAt implements vfs.FilesystemImpl.SetStatAt. func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { fs.mu.RLock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { fs.mu.RUnlock() return err @@ -656,7 +656,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts fs.mu.RUnlock() if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { - d.InotifyWithParent(ev, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) } return nil } @@ -665,7 +665,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { fs.mu.RLock() defer fs.mu.RUnlock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { return linux.Statx{}, err } @@ -678,7 +678,7 @@ func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { fs.mu.RLock() defer fs.mu.RUnlock() - if _, err := resolveLocked(rp); err != nil { + if _, err := resolveLocked(ctx, rp); err != nil { return linux.Statfs{}, err } statfs := linux.Statfs{ @@ -695,7 +695,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { - return fs.doCreateAt(rp, false /* dir */, func(parentDir *directory, name string) error { + return fs.doCreateAt(ctx, rp, false /* dir */, func(parentDir *directory, name string) error { creds := rp.Credentials() child := fs.newDentry(fs.newSymlink(creds.EffectiveKUID, creds.EffectiveKGID, 0777, target)) parentDir.insertChildLocked(child, name) @@ -707,7 +707,7 @@ func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() defer fs.mu.Unlock() - parentDir, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) + parentDir, err := walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry)) if err != nil { return err } @@ -738,7 +738,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error defer mnt.EndWrite() vfsObj := rp.VirtualFilesystem() mntns := vfs.MountNamespaceFromContext(ctx) - defer mntns.DecRef() + defer mntns.DecRef(ctx) if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { return err } @@ -746,11 +746,11 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error // Generate inotify events. Note that this must take place before the link // count of the child is decremented, or else the watches may be dropped // before these events are added. - vfs.InotifyRemoveChild(&child.inode.watches, &parentDir.inode.watches, name) + vfs.InotifyRemoveChild(ctx, &child.inode.watches, &parentDir.inode.watches, name) parentDir.removeChildLocked(child) - child.inode.decLinksLocked() - vfsObj.CommitDeleteDentry(&child.vfsd) + child.inode.decLinksLocked(ctx) + vfsObj.CommitDeleteDentry(ctx, &child.vfsd) parentDir.inode.touchCMtime() return nil } @@ -759,7 +759,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { fs.mu.RLock() defer fs.mu.RUnlock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { return nil, err } @@ -778,7 +778,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { return nil, err } @@ -789,7 +789,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath, si func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetxattrOptions) (string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { return "", err } @@ -799,7 +799,7 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { fs.mu.RLock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { fs.mu.RUnlock() return err @@ -810,14 +810,14 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt } fs.mu.RUnlock() - d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { fs.mu.RLock() - d, err := resolveLocked(rp) + d, err := resolveLocked(ctx, rp) if err != nil { fs.mu.RUnlock() return err @@ -828,7 +828,7 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, } fs.mu.RUnlock() - d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go index 1614f2c39..ec2701d8b 100644 --- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go +++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go @@ -32,7 +32,7 @@ const fileName = "mypipe" func TestSeparateFDs(t *testing.T) { ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() + defer root.DecRef(ctx) // Open the read side. This is done in a concurrently because opening // One end the pipe blocks until the other end is opened. @@ -55,13 +55,13 @@ func TestSeparateFDs(t *testing.T) { if err != nil { t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) } - defer wfd.DecRef() + defer wfd.DecRef(ctx) rfd, ok := <-rfdchan if !ok { t.Fatalf("failed to open pipe for reading %q", fileName) } - defer rfd.DecRef() + defer rfd.DecRef(ctx) const msg = "vamos azul" checkEmpty(ctx, t, rfd) @@ -71,7 +71,7 @@ func TestSeparateFDs(t *testing.T) { func TestNonblockingRead(t *testing.T) { ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() + defer root.DecRef(ctx) // Open the read side as nonblocking. pop := vfs.PathOperation{ @@ -85,7 +85,7 @@ func TestNonblockingRead(t *testing.T) { if err != nil { t.Fatalf("failed to open pipe for reading %q: %v", fileName, err) } - defer rfd.DecRef() + defer rfd.DecRef(ctx) // Open the write side. openOpts = vfs.OpenOptions{Flags: linux.O_WRONLY} @@ -93,7 +93,7 @@ func TestNonblockingRead(t *testing.T) { if err != nil { t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) } - defer wfd.DecRef() + defer wfd.DecRef(ctx) const msg = "geh blau" checkEmpty(ctx, t, rfd) @@ -103,7 +103,7 @@ func TestNonblockingRead(t *testing.T) { func TestNonblockingWriteError(t *testing.T) { ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() + defer root.DecRef(ctx) // Open the write side as nonblocking, which should return ENXIO. pop := vfs.PathOperation{ @@ -121,7 +121,7 @@ func TestNonblockingWriteError(t *testing.T) { func TestSingleFD(t *testing.T) { ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() + defer root.DecRef(ctx) // Open the pipe as readable and writable. pop := vfs.PathOperation{ @@ -135,7 +135,7 @@ func TestSingleFD(t *testing.T) { if err != nil { t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) } - defer fd.DecRef() + defer fd.DecRef(ctx) const msg = "forza blu" checkEmpty(ctx, t, fd) @@ -152,7 +152,7 @@ func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesy // Create VFS. vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { t.Fatalf("VFS init: %v", err) } vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index abbaa5d60..0710b65db 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -270,7 +270,7 @@ type regularFileFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *regularFileFD) Release() { +func (fd *regularFileFD) Release(context.Context) { // noop } diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 2545d88e9..68e615e8b 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -185,7 +185,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt case linux.S_IFDIR: root = &fs.newDirectory(rootKUID, rootKGID, rootMode).dentry default: - fs.vfsfs.DecRef() + fs.vfsfs.DecRef(ctx) return nil, nil, fmt.Errorf("invalid tmpfs root file type: %#o", rootFileType) } return &fs.vfsfs, &root.vfsd, nil @@ -197,7 +197,7 @@ func NewFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *au } // Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { +func (fs *filesystem) Release(ctx context.Context) { fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) } @@ -249,12 +249,12 @@ func (d *dentry) TryIncRef() bool { } // DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef() { - d.inode.decRef() +func (d *dentry) DecRef(ctx context.Context) { + d.inode.decRef(ctx) } // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. -func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) { +func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) { if d.inode.isDir() { events |= linux.IN_ISDIR } @@ -266,9 +266,9 @@ func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) { d.inode.fs.mu.RLock() // The ordering below is important, Linux always notifies the parent first. if d.parent != nil { - d.parent.inode.watches.Notify(d.name, events, cookie, et, deleted) + d.parent.inode.watches.Notify(ctx, d.name, events, cookie, et, deleted) } - d.inode.watches.Notify("", events, cookie, et, deleted) + d.inode.watches.Notify(ctx, "", events, cookie, et, deleted) d.inode.fs.mu.RUnlock() } @@ -278,7 +278,7 @@ func (d *dentry) Watches() *vfs.Watches { } // OnZeroWatches implements vfs.Dentry.OnZeroWatches. -func (d *dentry) OnZeroWatches() {} +func (d *dentry) OnZeroWatches(context.Context) {} // inode represents a filesystem object. type inode struct { @@ -359,12 +359,12 @@ func (i *inode) incLinksLocked() { // remove a reference on i as well. // // Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. -func (i *inode) decLinksLocked() { +func (i *inode) decLinksLocked(ctx context.Context) { if i.nlink == 0 { panic("tmpfs.inode.decLinksLocked() called with no existing links") } if atomic.AddUint32(&i.nlink, ^uint32(0)) == 0 { - i.decRef() + i.decRef(ctx) } } @@ -386,9 +386,9 @@ func (i *inode) tryIncRef() bool { } } -func (i *inode) decRef() { +func (i *inode) decRef(ctx context.Context) { if refs := atomic.AddInt64(&i.refs, -1); refs == 0 { - i.watches.HandleDeletion() + i.watches.HandleDeletion(ctx) if regFile, ok := i.impl.(*regularFile); ok { // Release memory used by regFile to store data. Since regFile is // no longer usable, we don't need to grab any locks or update any @@ -701,7 +701,7 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) } if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { - d.InotifyWithParent(ev, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) } return nil } @@ -724,7 +724,7 @@ func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOption } // Generate inotify events. - d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } @@ -736,13 +736,13 @@ func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { } // Generate inotify events. - d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent) return nil } // NewMemfd creates a new tmpfs regular file and file description that can back // an anonymous fd created by memfd_create. -func NewMemfd(mount *vfs.Mount, creds *auth.Credentials, allowSeals bool, name string) (*vfs.FileDescription, error) { +func NewMemfd(ctx context.Context, creds *auth.Credentials, mount *vfs.Mount, allowSeals bool, name string) (*vfs.FileDescription, error) { fs, ok := mount.Filesystem().Impl().(*filesystem) if !ok { panic("NewMemfd() called with non-tmpfs mount") @@ -757,7 +757,7 @@ func NewMemfd(mount *vfs.Mount, creds *auth.Credentials, allowSeals bool, name s } d := fs.newDentry(inode) - defer d.DecRef() + defer d.DecRef(ctx) d.name = name // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go index a240fb276..6f3e3ae6f 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go @@ -34,7 +34,7 @@ func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentr creds := auth.CredentialsFromContext(ctx) vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err) } @@ -47,8 +47,8 @@ func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentr } root := mntns.Root() return vfsObj, root, func() { - root.DecRef() - mntns.DecRef() + root.DecRef(ctx) + mntns.DecRef(ctx) }, nil } diff --git a/pkg/sentry/kernel/abstract_socket_namespace.go b/pkg/sentry/kernel/abstract_socket_namespace.go index 920fe4329..52ed5cea2 100644 --- a/pkg/sentry/kernel/abstract_socket_namespace.go +++ b/pkg/sentry/kernel/abstract_socket_namespace.go @@ -17,6 +17,7 @@ package kernel import ( "syscall" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" @@ -31,7 +32,7 @@ type abstractEndpoint struct { } // WeakRefGone implements refs.WeakRefUser.WeakRefGone. -func (e *abstractEndpoint) WeakRefGone() { +func (e *abstractEndpoint) WeakRefGone(context.Context) { e.ns.mu.Lock() if e.ns.endpoints[e.name].ep == e.ep { delete(e.ns.endpoints, e.name) @@ -64,9 +65,9 @@ type boundEndpoint struct { } // Release implements transport.BoundEndpoint.Release. -func (e *boundEndpoint) Release() { - e.rc.DecRef() - e.BoundEndpoint.Release() +func (e *boundEndpoint) Release(ctx context.Context) { + e.rc.DecRef(ctx) + e.BoundEndpoint.Release(ctx) } // BoundEndpoint retrieves the endpoint bound to the given name. The return @@ -93,13 +94,13 @@ func (a *AbstractSocketNamespace) BoundEndpoint(name string) transport.BoundEndp // // When the last reference managed by rc is dropped, ep may be removed from the // namespace. -func (a *AbstractSocketNamespace) Bind(name string, ep transport.BoundEndpoint, rc refs.RefCounter) error { +func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep transport.BoundEndpoint, rc refs.RefCounter) error { a.mu.Lock() defer a.mu.Unlock() if ep, ok := a.endpoints[name]; ok { if rc := ep.wr.Get(); rc != nil { - rc.DecRef() + rc.DecRef(ctx) return syscall.EADDRINUSE } } diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go index 4c0f1e41f..15519f0df 100644 --- a/pkg/sentry/kernel/epoll/epoll.go +++ b/pkg/sentry/kernel/epoll/epoll.go @@ -76,8 +76,8 @@ type pollEntry struct { // WeakRefGone implements refs.WeakRefUser.WeakRefGone. // weakReferenceGone is called when the file in the weak reference is destroyed. // The poll entry is removed in response to this. -func (p *pollEntry) WeakRefGone() { - p.epoll.RemoveEntry(p.id) +func (p *pollEntry) WeakRefGone(ctx context.Context) { + p.epoll.RemoveEntry(ctx, p.id) } // EventPoll holds all the state associated with an event poll object, that is, @@ -144,14 +144,14 @@ func NewEventPoll(ctx context.Context) *fs.File { // name matches fs/eventpoll.c:epoll_create1. dirent := fs.NewDirent(ctx, anon.NewInode(ctx), fmt.Sprintf("anon_inode:[eventpoll]")) // Release the initial dirent reference after NewFile takes a reference. - defer dirent.DecRef() + defer dirent.DecRef(ctx) return fs.NewFile(ctx, dirent, fs.FileFlags{}, &EventPoll{ files: make(map[FileIdentifier]*pollEntry), }) } // Release implements fs.FileOperations.Release. -func (e *EventPoll) Release() { +func (e *EventPoll) Release(ctx context.Context) { // We need to take the lock now because files may be attempting to // remove entries in parallel if they get destroyed. e.mu.Lock() @@ -160,7 +160,7 @@ func (e *EventPoll) Release() { // Go through all entries and clean up. for _, entry := range e.files { entry.id.File.EventUnregister(&entry.waiter) - entry.file.Drop() + entry.file.Drop(ctx) } e.files = nil } @@ -423,7 +423,7 @@ func (e *EventPoll) UpdateEntry(id FileIdentifier, flags EntryFlags, mask waiter } // RemoveEntry a files from the collection of observed files. -func (e *EventPoll) RemoveEntry(id FileIdentifier) error { +func (e *EventPoll) RemoveEntry(ctx context.Context, id FileIdentifier) error { e.mu.Lock() defer e.mu.Unlock() @@ -445,7 +445,7 @@ func (e *EventPoll) RemoveEntry(id FileIdentifier) error { // Remove file from map, and drop weak reference. delete(e.files, id) - entry.file.Drop() + entry.file.Drop(ctx) return nil } diff --git a/pkg/sentry/kernel/epoll/epoll_test.go b/pkg/sentry/kernel/epoll/epoll_test.go index 22630e9c5..55b505593 100644 --- a/pkg/sentry/kernel/epoll/epoll_test.go +++ b/pkg/sentry/kernel/epoll/epoll_test.go @@ -26,7 +26,8 @@ func TestFileDestroyed(t *testing.T) { f := filetest.NewTestFile(t) id := FileIdentifier{f, 12} - efile := NewEventPoll(contexttest.Context(t)) + ctx := contexttest.Context(t) + efile := NewEventPoll(ctx) e := efile.FileOperations.(*EventPoll) if err := e.AddEntry(id, 0, waiter.EventIn, [2]int32{}); err != nil { t.Fatalf("addEntry failed: %v", err) @@ -44,7 +45,7 @@ func TestFileDestroyed(t *testing.T) { } // Destroy the file. Check that we get no more events. - f.DecRef() + f.DecRef(ctx) evt = e.ReadEvents(1) if len(evt) != 0 { diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go index 87951adeb..bbf568dfc 100644 --- a/pkg/sentry/kernel/eventfd/eventfd.go +++ b/pkg/sentry/kernel/eventfd/eventfd.go @@ -70,7 +70,7 @@ func New(ctx context.Context, initVal uint64, semMode bool) *fs.File { // name matches fs/eventfd.c:eventfd_file_create. dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[eventfd]") // Release the initial dirent reference after NewFile takes a reference. - defer dirent.DecRef() + defer dirent.DecRef(ctx) return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &EventOperations{ val: initVal, semMode: semMode, @@ -106,7 +106,7 @@ func (e *EventOperations) HostFD() (int, error) { } // Release implements fs.FileOperations.Release. -func (e *EventOperations) Release() { +func (e *EventOperations) Release(context.Context) { e.mu.Lock() defer e.mu.Unlock() if e.hostfd >= 0 { diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 4b7d234a4..ce53af69b 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -98,7 +98,7 @@ type FDTable struct { func (f *FDTable) saveDescriptorTable() map[int32]descriptor { m := make(map[int32]descriptor) - f.forEach(func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { + f.forEach(context.Background(), func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { m[fd] = descriptor{ file: file, fileVFS2: fileVFS2, @@ -109,6 +109,7 @@ func (f *FDTable) saveDescriptorTable() map[int32]descriptor { } func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) { + ctx := context.Background() f.init() // Initialize table. for fd, d := range m { f.setAll(fd, d.file, d.fileVFS2, d.flags) @@ -118,9 +119,9 @@ func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) { // reference taken by set above. switch { case d.file != nil: - d.file.DecRef() + d.file.DecRef(ctx) case d.fileVFS2 != nil: - d.fileVFS2.DecRef() + d.fileVFS2.DecRef(ctx) } } } @@ -144,14 +145,15 @@ func (f *FDTable) drop(file *fs.File) { d.InotifyEvent(ev, 0) // Drop the table reference. - file.DecRef() + file.DecRef(context.Background()) } // dropVFS2 drops the table reference. func (f *FDTable) dropVFS2(file *vfs.FileDescription) { // Release any POSIX lock possibly held by the FDTable. Range {0, 0} means the // entire file. - err := file.UnlockPOSIX(context.Background(), f, 0, 0, linux.SEEK_SET) + ctx := context.Background() + err := file.UnlockPOSIX(ctx, f, 0, 0, linux.SEEK_SET) if err != nil && err != syserror.ENOLCK { panic(fmt.Sprintf("UnlockPOSIX failed: %v", err)) } @@ -161,10 +163,10 @@ func (f *FDTable) dropVFS2(file *vfs.FileDescription) { if file.IsWritable() { ev = linux.IN_CLOSE_WRITE } - file.Dentry().InotifyWithParent(ev, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(ctx, ev, 0, vfs.PathEvent) // Drop the table's reference. - file.DecRef() + file.DecRef(ctx) } // NewFDTable allocates a new FDTable that may be used by tasks in k. @@ -175,15 +177,15 @@ func (k *Kernel) NewFDTable() *FDTable { } // destroy removes all of the file descriptors from the map. -func (f *FDTable) destroy() { - f.RemoveIf(func(*fs.File, *vfs.FileDescription, FDFlags) bool { +func (f *FDTable) destroy(ctx context.Context) { + f.RemoveIf(ctx, func(*fs.File, *vfs.FileDescription, FDFlags) bool { return true }) } // DecRef implements RefCounter.DecRef with destructor f.destroy. -func (f *FDTable) DecRef() { - f.DecRefWithDestructor(f.destroy) +func (f *FDTable) DecRef(ctx context.Context) { + f.DecRefWithDestructor(ctx, f.destroy) } // Size returns the number of file descriptor slots currently allocated. @@ -195,7 +197,7 @@ func (f *FDTable) Size() int { // forEach iterates over all non-nil files in sorted order. // // It is the caller's responsibility to acquire an appropriate lock. -func (f *FDTable) forEach(fn func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags)) { +func (f *FDTable) forEach(ctx context.Context, fn func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags)) { // retries tracks the number of failed TryIncRef attempts for the same FD. retries := 0 fd := int32(0) @@ -214,7 +216,7 @@ func (f *FDTable) forEach(fn func(fd int32, file *fs.File, fileVFS2 *vfs.FileDes continue // Race caught. } fn(fd, file, nil, flags) - file.DecRef() + file.DecRef(ctx) case fileVFS2 != nil: if !fileVFS2.TryIncRef() { retries++ @@ -224,7 +226,7 @@ func (f *FDTable) forEach(fn func(fd int32, file *fs.File, fileVFS2 *vfs.FileDes continue // Race caught. } fn(fd, nil, fileVFS2, flags) - fileVFS2.DecRef() + fileVFS2.DecRef(ctx) } retries = 0 fd++ @@ -234,7 +236,8 @@ func (f *FDTable) forEach(fn func(fd int32, file *fs.File, fileVFS2 *vfs.FileDes // String is a stringer for FDTable. func (f *FDTable) String() string { var buf strings.Builder - f.forEach(func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { + ctx := context.Background() + f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { switch { case file != nil: n, _ := file.Dirent.FullName(nil /* root */) @@ -242,7 +245,7 @@ func (f *FDTable) String() string { case fileVFS2 != nil: vfsObj := fileVFS2.Mount().Filesystem().VirtualFilesystem() - name, err := vfsObj.PathnameWithDeleted(context.Background(), vfs.VirtualDentry{}, fileVFS2.VirtualDentry()) + name, err := vfsObj.PathnameWithDeleted(ctx, vfs.VirtualDentry{}, fileVFS2.VirtualDentry()) if err != nil { fmt.Fprintf(&buf, "\n", err) return @@ -541,9 +544,9 @@ func (f *FDTable) GetVFS2(fd int32) (*vfs.FileDescription, FDFlags) { // // Precondition: The caller must be running on the task goroutine, or Task.mu // must be locked. -func (f *FDTable) GetFDs() []int32 { +func (f *FDTable) GetFDs(ctx context.Context) []int32 { fds := make([]int32, 0, int(atomic.LoadInt32(&f.used))) - f.forEach(func(fd int32, _ *fs.File, _ *vfs.FileDescription, _ FDFlags) { + f.forEach(ctx, func(fd int32, _ *fs.File, _ *vfs.FileDescription, _ FDFlags) { fds = append(fds, fd) }) return fds @@ -552,9 +555,9 @@ func (f *FDTable) GetFDs() []int32 { // GetRefs returns a stable slice of references to all files and bumps the // reference count on each. The caller must use DecRef on each reference when // they're done using the slice. -func (f *FDTable) GetRefs() []*fs.File { +func (f *FDTable) GetRefs(ctx context.Context) []*fs.File { files := make([]*fs.File, 0, f.Size()) - f.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { + f.forEach(ctx, func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { file.IncRef() // Acquire a reference for caller. files = append(files, file) }) @@ -564,9 +567,9 @@ func (f *FDTable) GetRefs() []*fs.File { // GetRefsVFS2 returns a stable slice of references to all files and bumps the // reference count on each. The caller must use DecRef on each reference when // they're done using the slice. -func (f *FDTable) GetRefsVFS2() []*vfs.FileDescription { +func (f *FDTable) GetRefsVFS2(ctx context.Context) []*vfs.FileDescription { files := make([]*vfs.FileDescription, 0, f.Size()) - f.forEach(func(_ int32, _ *fs.File, file *vfs.FileDescription, _ FDFlags) { + f.forEach(ctx, func(_ int32, _ *fs.File, file *vfs.FileDescription, _ FDFlags) { file.IncRef() // Acquire a reference for caller. files = append(files, file) }) @@ -574,10 +577,10 @@ func (f *FDTable) GetRefsVFS2() []*vfs.FileDescription { } // Fork returns an independent FDTable. -func (f *FDTable) Fork() *FDTable { +func (f *FDTable) Fork(ctx context.Context) *FDTable { clone := f.k.NewFDTable() - f.forEach(func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { + f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { // The set function here will acquire an appropriate table // reference for the clone. We don't need anything else. switch { @@ -622,11 +625,11 @@ func (f *FDTable) Remove(fd int32) (*fs.File, *vfs.FileDescription) { } // RemoveIf removes all FDs where cond is true. -func (f *FDTable) RemoveIf(cond func(*fs.File, *vfs.FileDescription, FDFlags) bool) { +func (f *FDTable) RemoveIf(ctx context.Context, cond func(*fs.File, *vfs.FileDescription, FDFlags) bool) { f.mu.Lock() defer f.mu.Unlock() - f.forEach(func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { + f.forEach(ctx, func(fd int32, file *fs.File, fileVFS2 *vfs.FileDescription, flags FDFlags) { if cond(file, fileVFS2, flags) { f.set(fd, nil, FDFlags{}) // Clear from table. // Update current available position. diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go index 29f95a2c4..e3f30ba2a 100644 --- a/pkg/sentry/kernel/fd_table_test.go +++ b/pkg/sentry/kernel/fd_table_test.go @@ -154,7 +154,7 @@ func TestFDTable(t *testing.T) { if ref == nil { t.Fatalf("fdTable.Remove(1) for an existing FD: failed, want success") } - ref.DecRef() + ref.DecRef(ctx) if ref, _ := fdTable.Remove(1); ref != nil { t.Fatalf("r.Remove(1) for a removed FD: got success, want failure") @@ -191,7 +191,7 @@ func BenchmarkFDLookupAndDecRef(b *testing.B) { b.StartTimer() // Benchmark. for i := 0; i < b.N; i++ { tf, _ := fdTable.Get(fds[i%len(fds)]) - tf.DecRef() + tf.DecRef(ctx) } }) } @@ -219,7 +219,7 @@ func BenchmarkFDLookupAndDecRefConcurrent(b *testing.B) { defer wg.Done() for i := 0; i < each; i++ { tf, _ := fdTable.Get(fds[i%len(fds)]) - tf.DecRef() + tf.DecRef(ctx) } }() } diff --git a/pkg/sentry/kernel/fs_context.go b/pkg/sentry/kernel/fs_context.go index 47f78df9a..8f2d36d5a 100644 --- a/pkg/sentry/kernel/fs_context.go +++ b/pkg/sentry/kernel/fs_context.go @@ -17,6 +17,7 @@ package kernel import ( "fmt" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/vfs" @@ -89,28 +90,28 @@ func NewFSContextVFS2(root, cwd vfs.VirtualDentry, umask uint) *FSContext { // Note that there may still be calls to WorkingDirectory() or RootDirectory() // (that return nil). This is because valid references may still be held via // proc files or other mechanisms. -func (f *FSContext) destroy() { +func (f *FSContext) destroy(ctx context.Context) { // Hold f.mu so that we don't race with RootDirectory() and // WorkingDirectory(). f.mu.Lock() defer f.mu.Unlock() if VFS2Enabled { - f.rootVFS2.DecRef() + f.rootVFS2.DecRef(ctx) f.rootVFS2 = vfs.VirtualDentry{} - f.cwdVFS2.DecRef() + f.cwdVFS2.DecRef(ctx) f.cwdVFS2 = vfs.VirtualDentry{} } else { - f.root.DecRef() + f.root.DecRef(ctx) f.root = nil - f.cwd.DecRef() + f.cwd.DecRef(ctx) f.cwd = nil } } // DecRef implements RefCounter.DecRef with destructor f.destroy. -func (f *FSContext) DecRef() { - f.DecRefWithDestructor(f.destroy) +func (f *FSContext) DecRef(ctx context.Context) { + f.DecRefWithDestructor(ctx, f.destroy) } // Fork forks this FSContext. @@ -165,7 +166,7 @@ func (f *FSContext) WorkingDirectoryVFS2() vfs.VirtualDentry { // This will take an extra reference on the Dirent. // // This is not a valid call after destroy. -func (f *FSContext) SetWorkingDirectory(d *fs.Dirent) { +func (f *FSContext) SetWorkingDirectory(ctx context.Context, d *fs.Dirent) { if d == nil { panic("FSContext.SetWorkingDirectory called with nil dirent") } @@ -180,21 +181,21 @@ func (f *FSContext) SetWorkingDirectory(d *fs.Dirent) { old := f.cwd f.cwd = d d.IncRef() - old.DecRef() + old.DecRef(ctx) } // SetWorkingDirectoryVFS2 sets the current working directory. // This will take an extra reference on the VirtualDentry. // // This is not a valid call after destroy. -func (f *FSContext) SetWorkingDirectoryVFS2(d vfs.VirtualDentry) { +func (f *FSContext) SetWorkingDirectoryVFS2(ctx context.Context, d vfs.VirtualDentry) { f.mu.Lock() defer f.mu.Unlock() old := f.cwdVFS2 f.cwdVFS2 = d d.IncRef() - old.DecRef() + old.DecRef(ctx) } // RootDirectory returns the current filesystem root. @@ -226,7 +227,7 @@ func (f *FSContext) RootDirectoryVFS2() vfs.VirtualDentry { // This will take an extra reference on the Dirent. // // This is not a valid call after free. -func (f *FSContext) SetRootDirectory(d *fs.Dirent) { +func (f *FSContext) SetRootDirectory(ctx context.Context, d *fs.Dirent) { if d == nil { panic("FSContext.SetRootDirectory called with nil dirent") } @@ -241,13 +242,13 @@ func (f *FSContext) SetRootDirectory(d *fs.Dirent) { old := f.root f.root = d d.IncRef() - old.DecRef() + old.DecRef(ctx) } // SetRootDirectoryVFS2 sets the root directory. It takes a reference on vd. // // This is not a valid call after free. -func (f *FSContext) SetRootDirectoryVFS2(vd vfs.VirtualDentry) { +func (f *FSContext) SetRootDirectoryVFS2(ctx context.Context, vd vfs.VirtualDentry) { if !vd.Ok() { panic("FSContext.SetRootDirectoryVFS2 called with zero-value VirtualDentry") } @@ -263,7 +264,7 @@ func (f *FSContext) SetRootDirectoryVFS2(vd vfs.VirtualDentry) { vd.IncRef() f.rootVFS2 = vd f.mu.Unlock() - old.DecRef() + old.DecRef(ctx) } // Umask returns the current umask. diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD index c5021f2db..daa2dae76 100644 --- a/pkg/sentry/kernel/futex/BUILD +++ b/pkg/sentry/kernel/futex/BUILD @@ -51,6 +51,7 @@ go_test( srcs = ["futex_test.go"], library = ":futex", deps = [ + "//pkg/context", "//pkg/sync", "//pkg/usermem", ], diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go index bcc1b29a8..e4dcc4d40 100644 --- a/pkg/sentry/kernel/futex/futex.go +++ b/pkg/sentry/kernel/futex/futex.go @@ -19,6 +19,7 @@ package futex import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -66,9 +67,9 @@ type Key struct { Offset uint64 } -func (k *Key) release() { +func (k *Key) release(t Target) { if k.MappingIdentity != nil { - k.MappingIdentity.DecRef() + k.MappingIdentity.DecRef(t) } k.Mappable = nil k.MappingIdentity = nil @@ -94,6 +95,8 @@ func (k *Key) matches(k2 *Key) bool { // Target abstracts memory accesses and keys. type Target interface { + context.Context + // SwapUint32 gives access to usermem.IO.SwapUint32. SwapUint32(addr usermem.Addr, new uint32) (uint32, error) @@ -296,7 +299,7 @@ func (b *bucket) wakeWaiterLocked(w *Waiter) { // bucket "to". // // Preconditions: b and to must be locked. -func (b *bucket) requeueLocked(to *bucket, key, nkey *Key, n int) int { +func (b *bucket) requeueLocked(t Target, to *bucket, key, nkey *Key, n int) int { done := 0 for w := b.waiters.Front(); done < n && w != nil; { if !w.key.matches(key) { @@ -308,7 +311,7 @@ func (b *bucket) requeueLocked(to *bucket, key, nkey *Key, n int) int { requeued := w w = w.Next() // Next iteration. b.waiters.Remove(requeued) - requeued.key.release() + requeued.key.release(t) requeued.key = nkey.clone() to.waiters.PushBack(requeued) requeued.bucket.Store(to) @@ -456,7 +459,7 @@ func (m *Manager) Wake(t Target, addr usermem.Addr, private bool, bitmask uint32 r := b.wakeLocked(&k, bitmask, n) b.mu.Unlock() - k.release() + k.release(t) return r, nil } @@ -465,12 +468,12 @@ func (m *Manager) doRequeue(t Target, addr, naddr usermem.Addr, private bool, ch if err != nil { return 0, err } - defer k1.release() + defer k1.release(t) k2, err := getKey(t, naddr, private) if err != nil { return 0, err } - defer k2.release() + defer k2.release(t) b1, b2 := m.lockBuckets(&k1, &k2) defer b1.mu.Unlock() @@ -488,7 +491,7 @@ func (m *Manager) doRequeue(t Target, addr, naddr usermem.Addr, private bool, ch done := b1.wakeLocked(&k1, ^uint32(0), nwake) // Requeue the number required. - b1.requeueLocked(b2, &k1, &k2, nreq) + b1.requeueLocked(t, b2, &k1, &k2, nreq) return done, nil } @@ -515,12 +518,12 @@ func (m *Manager) WakeOp(t Target, addr1, addr2 usermem.Addr, private bool, nwak if err != nil { return 0, err } - defer k1.release() + defer k1.release(t) k2, err := getKey(t, addr2, private) if err != nil { return 0, err } - defer k2.release() + defer k2.release(t) b1, b2 := m.lockBuckets(&k1, &k2) defer b1.mu.Unlock() @@ -571,7 +574,7 @@ func (m *Manager) WaitPrepare(w *Waiter, t Target, addr usermem.Addr, private bo // Perform our atomic check. if err := check(t, addr, val); err != nil { b.mu.Unlock() - w.key.release() + w.key.release(t) return err } @@ -585,7 +588,7 @@ func (m *Manager) WaitPrepare(w *Waiter, t Target, addr usermem.Addr, private bo // WaitComplete must be called when a Waiter previously added by WaitPrepare is // no longer eligible to be woken. -func (m *Manager) WaitComplete(w *Waiter) { +func (m *Manager) WaitComplete(w *Waiter, t Target) { // Remove w from the bucket it's in. for { b := w.bucket.Load() @@ -617,7 +620,7 @@ func (m *Manager) WaitComplete(w *Waiter) { } // Release references held by the waiter. - w.key.release() + w.key.release(t) } // LockPI attempts to lock the futex following the Priority-inheritance futex @@ -648,13 +651,13 @@ func (m *Manager) LockPI(w *Waiter, t Target, addr usermem.Addr, tid uint32, pri success, err := m.lockPILocked(w, t, addr, tid, b, try) if err != nil { - w.key.release() + w.key.release(t) b.mu.Unlock() return false, err } if success || try { // Release waiter if it's not going to be a wait. - w.key.release() + w.key.release(t) } b.mu.Unlock() return success, nil @@ -730,7 +733,7 @@ func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool err = m.unlockPILocked(t, addr, tid, b, &k) - k.release() + k.release(t) b.mu.Unlock() return err } diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go index 7c5c7665b..d0128c548 100644 --- a/pkg/sentry/kernel/futex/futex_test.go +++ b/pkg/sentry/kernel/futex/futex_test.go @@ -22,6 +22,7 @@ import ( "testing" "unsafe" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/usermem" ) @@ -29,28 +30,33 @@ import ( // testData implements the Target interface, and allows us to // treat the address passed for futex operations as an index in // a byte slice for testing simplicity. -type testData []byte +type testData struct { + context.Context + data []byte +} const sizeofInt32 = 4 func newTestData(size uint) testData { - return make([]byte, size) + return testData{ + data: make([]byte, size), + } } func (t testData) SwapUint32(addr usermem.Addr, new uint32) (uint32, error) { - val := atomic.SwapUint32((*uint32)(unsafe.Pointer(&t[addr])), new) + val := atomic.SwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), new) return val, nil } func (t testData) CompareAndSwapUint32(addr usermem.Addr, old, new uint32) (uint32, error) { - if atomic.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&t[addr])), old, new) { + if atomic.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&t.data[addr])), old, new) { return old, nil } - return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t[addr]))), nil + return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil } func (t testData) LoadUint32(addr usermem.Addr) (uint32, error) { - return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t[addr]))), nil + return atomic.LoadUint32((*uint32)(unsafe.Pointer(&t.data[addr]))), nil } func (t testData) GetSharedKey(addr usermem.Addr) (Key, error) { @@ -83,7 +89,7 @@ func TestFutexWake(t *testing.T) { // Start waiting for wakeup. w := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w) + defer m.WaitComplete(w, d) // Perform a wakeup. if n, err := m.Wake(d, 0, private, ^uint32(0), 1); err != nil || n != 1 { @@ -106,7 +112,7 @@ func TestFutexWakeBitmask(t *testing.T) { // Start waiting for wakeup. w := newPreparedTestWaiter(t, m, d, 0, private, 0, 0x0000ffff) - defer m.WaitComplete(w) + defer m.WaitComplete(w, d) // Perform a wakeup using the wrong bitmask. if n, err := m.Wake(d, 0, private, 0xffff0000, 1); err != nil || n != 0 { @@ -141,7 +147,7 @@ func TestFutexWakeTwo(t *testing.T) { var ws [3]*Waiter for i := range ws { ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i]) + defer m.WaitComplete(ws[i], d) } // Perform two wakeups. @@ -174,9 +180,9 @@ func TestFutexWakeUnrelated(t *testing.T) { // Start two waiters waiting for wakeup on different addresses. w1 := newPreparedTestWaiter(t, m, d, 0*sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) + defer m.WaitComplete(w1, d) w2 := newPreparedTestWaiter(t, m, d, 1*sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) + defer m.WaitComplete(w2, d) // Perform two wakeups on the second address. if n, err := m.Wake(d, 1*sizeofInt32, private, ^uint32(0), 2); err != nil || n != 1 { @@ -216,9 +222,9 @@ func TestWakeOpFirstNonEmpty(t *testing.T) { // Add two waiters on address 0. w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) + defer m.WaitComplete(w1, d) w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) + defer m.WaitComplete(w2, d) // Perform 10 wakeups on address 0. if n, err := m.WakeOp(d, 0, sizeofInt32, private, 10, 0, 0); err != nil || n != 2 { @@ -244,9 +250,9 @@ func TestWakeOpSecondNonEmpty(t *testing.T) { // Add two waiters on address sizeofInt32. w1 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) + defer m.WaitComplete(w1, d) w2 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) + defer m.WaitComplete(w2, d) // Perform 10 wakeups on address sizeofInt32 (contingent on // d.Op(0), which should succeed). @@ -273,9 +279,9 @@ func TestWakeOpSecondNonEmptyFailingOp(t *testing.T) { // Add two waiters on address sizeofInt32. w1 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) + defer m.WaitComplete(w1, d) w2 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) + defer m.WaitComplete(w2, d) // Perform 10 wakeups on address sizeofInt32 (contingent on // d.Op(1), which should fail). @@ -302,15 +308,15 @@ func TestWakeOpAllNonEmpty(t *testing.T) { // Add two waiters on address 0. w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) + defer m.WaitComplete(w1, d) w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) + defer m.WaitComplete(w2, d) // Add two waiters on address sizeofInt32. w3 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w3) + defer m.WaitComplete(w3, d) w4 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w4) + defer m.WaitComplete(w4, d) // Perform 10 wakeups on address 0 (unconditionally), and 10 // wakeups on address sizeofInt32 (contingent on d.Op(0), which @@ -344,15 +350,15 @@ func TestWakeOpAllNonEmptyFailingOp(t *testing.T) { // Add two waiters on address 0. w1 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w1) + defer m.WaitComplete(w1, d) w2 := newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(w2) + defer m.WaitComplete(w2, d) // Add two waiters on address sizeofInt32. w3 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w3) + defer m.WaitComplete(w3, d) w4 := newPreparedTestWaiter(t, m, d, sizeofInt32, private, 0, ^uint32(0)) - defer m.WaitComplete(w4) + defer m.WaitComplete(w4, d) // Perform 10 wakeups on address 0 (unconditionally), and 10 // wakeups on address sizeofInt32 (contingent on d.Op(1), which @@ -388,7 +394,7 @@ func TestWakeOpSameAddress(t *testing.T) { var ws [4]*Waiter for i := range ws { ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i]) + defer m.WaitComplete(ws[i], d) } // Perform 1 wakeup on address 0 (unconditionally), and 1 wakeup @@ -422,7 +428,7 @@ func TestWakeOpSameAddressFailingOp(t *testing.T) { var ws [4]*Waiter for i := range ws { ws[i] = newPreparedTestWaiter(t, m, d, 0, private, 0, ^uint32(0)) - defer m.WaitComplete(ws[i]) + defer m.WaitComplete(ws[i], d) } // Perform 1 wakeup on address 0 (unconditionally), and 1 wakeup @@ -472,7 +478,7 @@ func (t *testMutex) Lock() { for { // Attempt to grab the lock. if atomic.CompareAndSwapUint32( - (*uint32)(unsafe.Pointer(&t.d[t.a])), + (*uint32)(unsafe.Pointer(&t.d.data[t.a])), testMutexUnlocked, testMutexLocked) { // Lock held. @@ -490,7 +496,7 @@ func (t *testMutex) Lock() { panic("WaitPrepare returned unexpected error: " + err.Error()) } <-w.C - t.m.WaitComplete(w) + t.m.WaitComplete(w, t.d) } } @@ -498,7 +504,7 @@ func (t *testMutex) Lock() { // This will notify any waiters via the futex manager. func (t *testMutex) Unlock() { // Unlock. - atomic.StoreUint32((*uint32)(unsafe.Pointer(&t.d[t.a])), testMutexUnlocked) + atomic.StoreUint32((*uint32)(unsafe.Pointer(&t.d.data[t.a])), testMutexUnlocked) // Notify all waiters. t.m.Wake(t.d, t.a, true, ^uint32(0), math.MaxInt32) diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 15dae0f5b..316df249d 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -376,7 +376,8 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.netlinkPorts = port.New() if VFS2Enabled { - if err := k.vfs.Init(); err != nil { + ctx := k.SupervisorContext() + if err := k.vfs.Init(ctx); err != nil { return fmt.Errorf("failed to initialize VFS: %v", err) } @@ -384,19 +385,19 @@ func (k *Kernel) Init(args InitKernelArgs) error { if err != nil { return fmt.Errorf("failed to create pipefs filesystem: %v", err) } - defer pipeFilesystem.DecRef() + defer pipeFilesystem.DecRef(ctx) pipeMount, err := k.vfs.NewDisconnectedMount(pipeFilesystem, nil, &vfs.MountOptions{}) if err != nil { return fmt.Errorf("failed to create pipefs mount: %v", err) } k.pipeMount = pipeMount - tmpfsFilesystem, tmpfsRoot, err := tmpfs.NewFilesystem(k.SupervisorContext(), &k.vfs, auth.NewRootCredentials(k.rootUserNamespace)) + tmpfsFilesystem, tmpfsRoot, err := tmpfs.NewFilesystem(ctx, &k.vfs, auth.NewRootCredentials(k.rootUserNamespace)) if err != nil { return fmt.Errorf("failed to create tmpfs filesystem: %v", err) } - defer tmpfsFilesystem.DecRef() - defer tmpfsRoot.DecRef() + defer tmpfsFilesystem.DecRef(ctx) + defer tmpfsRoot.DecRef(ctx) shmMount, err := k.vfs.NewDisconnectedMount(tmpfsFilesystem, tmpfsRoot, &vfs.MountOptions{}) if err != nil { return fmt.Errorf("failed to create tmpfs mount: %v", err) @@ -407,7 +408,7 @@ func (k *Kernel) Init(args InitKernelArgs) error { if err != nil { return fmt.Errorf("failed to create sockfs filesystem: %v", err) } - defer socketFilesystem.DecRef() + defer socketFilesystem.DecRef(ctx) socketMount, err := k.vfs.NewDisconnectedMount(socketFilesystem, nil, &vfs.MountOptions{}) if err != nil { return fmt.Errorf("failed to create sockfs mount: %v", err) @@ -430,8 +431,8 @@ func (k *Kernel) SaveTo(w wire.Writer) error { defer k.extMu.Unlock() // Stop time. - k.pauseTimeLocked() - defer k.resumeTimeLocked() + k.pauseTimeLocked(ctx) + defer k.resumeTimeLocked(ctx) // Evict all evictable MemoryFile allocations. k.mf.StartEvictions() @@ -447,12 +448,12 @@ func (k *Kernel) SaveTo(w wire.Writer) error { // Remove all epoll waiter objects from underlying wait queues. // NOTE: for programs to resume execution in future snapshot scenarios, // we will need to re-establish these waiter objects after saving. - k.tasks.unregisterEpollWaiters() + k.tasks.unregisterEpollWaiters(ctx) // Clear the dirent cache before saving because Dirents must be Loaded in a // particular order (parents before children), and Loading dirents from a cache // breaks that order. - if err := k.flushMountSourceRefs(); err != nil { + if err := k.flushMountSourceRefs(ctx); err != nil { return err } @@ -505,7 +506,7 @@ func (k *Kernel) SaveTo(w wire.Writer) error { // flushMountSourceRefs flushes the MountSources for all mounted filesystems // and open FDs. -func (k *Kernel) flushMountSourceRefs() error { +func (k *Kernel) flushMountSourceRefs(ctx context.Context) error { // Flush all mount sources for currently mounted filesystems in each task. flushed := make(map[*fs.MountNamespace]struct{}) k.tasks.mu.RLock() @@ -521,7 +522,7 @@ func (k *Kernel) flushMountSourceRefs() error { // There may be some open FDs whose filesystems have been unmounted. We // must flush those as well. - return k.tasks.forEachFDPaused(func(file *fs.File, _ *vfs.FileDescription) error { + return k.tasks.forEachFDPaused(ctx, func(file *fs.File, _ *vfs.FileDescription) error { file.Dirent.Inode.MountSource.FlushDirentRefs() return nil }) @@ -531,7 +532,7 @@ func (k *Kernel) flushMountSourceRefs() error { // each task. // // Precondition: Must be called with the kernel paused. -func (ts *TaskSet) forEachFDPaused(f func(*fs.File, *vfs.FileDescription) error) (err error) { +func (ts *TaskSet) forEachFDPaused(ctx context.Context, f func(*fs.File, *vfs.FileDescription) error) (err error) { // TODO(gvisor.dev/issue/1663): Add save support for VFS2. if VFS2Enabled { return nil @@ -544,7 +545,7 @@ func (ts *TaskSet) forEachFDPaused(f func(*fs.File, *vfs.FileDescription) error) if t.fdTable == nil { continue } - t.fdTable.forEach(func(_ int32, file *fs.File, fileVFS2 *vfs.FileDescription, _ FDFlags) { + t.fdTable.forEach(ctx, func(_ int32, file *fs.File, fileVFS2 *vfs.FileDescription, _ FDFlags) { if lastErr := f(file, fileVFS2); lastErr != nil && err == nil { err = lastErr } @@ -555,7 +556,7 @@ func (ts *TaskSet) forEachFDPaused(f func(*fs.File, *vfs.FileDescription) error) func (ts *TaskSet) flushWritesToFiles(ctx context.Context) error { // TODO(gvisor.dev/issue/1663): Add save support for VFS2. - return ts.forEachFDPaused(func(file *fs.File, _ *vfs.FileDescription) error { + return ts.forEachFDPaused(ctx, func(file *fs.File, _ *vfs.FileDescription) error { if flags := file.Flags(); !flags.Write { return nil } @@ -602,7 +603,7 @@ func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error { return nil } -func (ts *TaskSet) unregisterEpollWaiters() { +func (ts *TaskSet) unregisterEpollWaiters(ctx context.Context) { // TODO(gvisor.dev/issue/1663): Add save support for VFS2. if VFS2Enabled { return @@ -623,7 +624,7 @@ func (ts *TaskSet) unregisterEpollWaiters() { if _, ok := processed[t.fdTable]; ok { continue } - t.fdTable.forEach(func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { + t.fdTable.forEach(ctx, func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { if e, ok := file.FileOperations.(*epoll.EventPoll); ok { e.UnregisterEpollWaiters() } @@ -900,7 +901,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, root := args.MountNamespaceVFS2.Root() // The call to newFSContext below will take a reference on root, so we // don't need to hold this one. - defer root.DecRef() + defer root.DecRef(ctx) // Grab the working directory. wd := root // Default. @@ -918,7 +919,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, if err != nil { return nil, 0, fmt.Errorf("failed to find initial working directory %q: %v", args.WorkingDirectory, err) } - defer wd.DecRef() + defer wd.DecRef(ctx) } opener = fsbridge.NewVFSLookup(mntnsVFS2, root, wd) fsContext = NewFSContextVFS2(root, wd, args.Umask) @@ -933,7 +934,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, root := mntns.Root() // The call to newFSContext below will take a reference on root, so we // don't need to hold this one. - defer root.DecRef() + defer root.DecRef(ctx) // Grab the working directory. remainingTraversals := args.MaxSymlinkTraversals @@ -944,7 +945,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, if err != nil { return nil, 0, fmt.Errorf("failed to find initial working directory %q: %v", args.WorkingDirectory, err) } - defer wd.DecRef() + defer wd.DecRef(ctx) } opener = fsbridge.NewFSLookup(mntns, root, wd) fsContext = newFSContext(root, wd, args.Umask) @@ -1054,7 +1055,7 @@ func (k *Kernel) Start() error { // If k was created by LoadKernelFrom, timers were stopped during // Kernel.SaveTo and need to be resumed. If k was created by NewKernel, // this is a no-op. - k.resumeTimeLocked() + k.resumeTimeLocked(k.SupervisorContext()) // Start task goroutines. k.tasks.mu.RLock() defer k.tasks.mu.RUnlock() @@ -1068,7 +1069,7 @@ func (k *Kernel) Start() error { // // Preconditions: Any task goroutines running in k must be stopped. k.extMu // must be locked. -func (k *Kernel) pauseTimeLocked() { +func (k *Kernel) pauseTimeLocked(ctx context.Context) { // k.cpuClockTicker may be nil since Kernel.SaveTo() may be called before // Kernel.Start(). if k.cpuClockTicker != nil { @@ -1090,7 +1091,7 @@ func (k *Kernel) pauseTimeLocked() { // This means we'll iterate FDTables shared by multiple tasks repeatedly, // but ktime.Timer.Pause is idempotent so this is harmless. if t.fdTable != nil { - t.fdTable.forEach(func(_ int32, file *fs.File, fd *vfs.FileDescription, _ FDFlags) { + t.fdTable.forEach(ctx, func(_ int32, file *fs.File, fd *vfs.FileDescription, _ FDFlags) { if VFS2Enabled { if tfd, ok := fd.Impl().(*timerfd.TimerFileDescription); ok { tfd.PauseTimer() @@ -1112,7 +1113,7 @@ func (k *Kernel) pauseTimeLocked() { // // Preconditions: Any task goroutines running in k must be stopped. k.extMu // must be locked. -func (k *Kernel) resumeTimeLocked() { +func (k *Kernel) resumeTimeLocked(ctx context.Context) { if k.cpuClockTicker != nil { k.cpuClockTicker.Resume() } @@ -1126,7 +1127,7 @@ func (k *Kernel) resumeTimeLocked() { } } if t.fdTable != nil { - t.fdTable.forEach(func(_ int32, file *fs.File, fd *vfs.FileDescription, _ FDFlags) { + t.fdTable.forEach(ctx, func(_ int32, file *fs.File, fd *vfs.FileDescription, _ FDFlags) { if VFS2Enabled { if tfd, ok := fd.Impl().(*timerfd.TimerFileDescription); ok { tfd.ResumeTimer() @@ -1511,7 +1512,7 @@ type SocketEntry struct { } // WeakRefGone implements refs.WeakRefUser.WeakRefGone. -func (s *SocketEntry) WeakRefGone() { +func (s *SocketEntry) WeakRefGone(context.Context) { s.k.extMu.Lock() s.k.sockets.Remove(s) s.k.extMu.Unlock() @@ -1600,7 +1601,7 @@ func (ctx supervisorContext) Value(key interface{}) interface{} { return vfs.VirtualDentry{} } mntns := ctx.k.GlobalInit().Leader().MountNamespaceVFS2() - defer mntns.DecRef() + defer mntns.DecRef(ctx) // Root() takes a reference on the root dirent for us. return mntns.Root() case vfs.CtxMountNamespace: diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go index 4b688c627..6497dc4ba 100644 --- a/pkg/sentry/kernel/pipe/node.go +++ b/pkg/sentry/kernel/pipe/node.go @@ -93,7 +93,7 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi if i.p.isNamed && !flags.NonBlocking && !i.p.HasWriters() { if !waitFor(&i.mu, &i.wWakeup, ctx) { - r.DecRef() + r.DecRef(ctx) return nil, syserror.ErrInterrupted } } @@ -111,12 +111,12 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi // On a nonblocking, write-only open, the open fails with ENXIO if the // read side isn't open yet. if flags.NonBlocking { - w.DecRef() + w.DecRef(ctx) return nil, syserror.ENXIO } if !waitFor(&i.mu, &i.rWakeup, ctx) { - w.DecRef() + w.DecRef(ctx) return nil, syserror.ErrInterrupted } } diff --git a/pkg/sentry/kernel/pipe/node_test.go b/pkg/sentry/kernel/pipe/node_test.go index ab75a87ff..ce0db5583 100644 --- a/pkg/sentry/kernel/pipe/node_test.go +++ b/pkg/sentry/kernel/pipe/node_test.go @@ -167,7 +167,7 @@ func TestClosedReaderBlocksWriteOpen(t *testing.T) { f := NewInodeOperations(ctx, perms, newNamedPipe(t)) rFile, _ := testOpenOrDie(ctx, t, f, fs.FileFlags{Read: true, NonBlocking: true}, nil) - rFile.DecRef() + rFile.DecRef(ctx) wDone := make(chan struct{}) // This open for write should block because the reader is now gone. diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 79645d7d2..297e8f28f 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -152,7 +152,7 @@ func NewConnectedPipe(ctx context.Context, sizeBytes, atomicIOBytes int64) (*fs. d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, ms, sattr), fmt.Sprintf("pipe:[%d]", ino)) // The p.Open calls below will each take a reference on the Dirent. We // must drop the one we already have. - defer d.DecRef() + defer d.DecRef(ctx) return p.Open(ctx, d, fs.FileFlags{Read: true}), p.Open(ctx, d, fs.FileFlags{Write: true}) } diff --git a/pkg/sentry/kernel/pipe/pipe_test.go b/pkg/sentry/kernel/pipe/pipe_test.go index bda739dbe..fe97e9800 100644 --- a/pkg/sentry/kernel/pipe/pipe_test.go +++ b/pkg/sentry/kernel/pipe/pipe_test.go @@ -27,8 +27,8 @@ import ( func TestPipeRW(t *testing.T) { ctx := contexttest.Context(t) r, w := NewConnectedPipe(ctx, 65536, 4096) - defer r.DecRef() - defer w.DecRef() + defer r.DecRef(ctx) + defer w.DecRef(ctx) msg := []byte("here's some bytes") wantN := int64(len(msg)) @@ -47,8 +47,8 @@ func TestPipeRW(t *testing.T) { func TestPipeReadBlock(t *testing.T) { ctx := contexttest.Context(t) r, w := NewConnectedPipe(ctx, 65536, 4096) - defer r.DecRef() - defer w.DecRef() + defer r.DecRef(ctx) + defer w.DecRef(ctx) n, err := r.Readv(ctx, usermem.BytesIOSequence(make([]byte, 1))) if n != 0 || err != syserror.ErrWouldBlock { @@ -62,8 +62,8 @@ func TestPipeWriteBlock(t *testing.T) { ctx := contexttest.Context(t) r, w := NewConnectedPipe(ctx, capacity, atomicIOBytes) - defer r.DecRef() - defer w.DecRef() + defer r.DecRef(ctx) + defer w.DecRef(ctx) msg := make([]byte, capacity+1) n, err := w.Writev(ctx, usermem.BytesIOSequence(msg)) @@ -77,8 +77,8 @@ func TestPipeWriteUntilEnd(t *testing.T) { ctx := contexttest.Context(t) r, w := NewConnectedPipe(ctx, atomicIOBytes, atomicIOBytes) - defer r.DecRef() - defer w.DecRef() + defer r.DecRef(ctx) + defer w.DecRef(ctx) msg := []byte("here's some bytes") diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go index aacf28da2..6d58b682f 100644 --- a/pkg/sentry/kernel/pipe/pipe_util.go +++ b/pkg/sentry/kernel/pipe/pipe_util.go @@ -33,7 +33,7 @@ import ( // the old fs architecture. // Release cleans up the pipe's state. -func (p *Pipe) Release() { +func (p *Pipe) Release(context.Context) { p.rClose() p.wClose() diff --git a/pkg/sentry/kernel/pipe/reader.go b/pkg/sentry/kernel/pipe/reader.go index 7724b4452..ac18785c0 100644 --- a/pkg/sentry/kernel/pipe/reader.go +++ b/pkg/sentry/kernel/pipe/reader.go @@ -15,6 +15,7 @@ package pipe import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/waiter" ) @@ -29,7 +30,7 @@ type Reader struct { // Release implements fs.FileOperations.Release. // // This overrides ReaderWriter.Release. -func (r *Reader) Release() { +func (r *Reader) Release(context.Context) { r.Pipe.rClose() // Wake up writers. diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 45d4c5fc1..28f998e45 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -101,7 +101,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s // If this pipe is being opened as blocking and there's no // writer, we have to wait for a writer to open the other end. if vp.pipe.isNamed && statusFlags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() && !waitFor(&vp.mu, &vp.wWakeup, ctx) { - fd.DecRef() + fd.DecRef(ctx) return nil, syserror.EINTR } @@ -112,12 +112,12 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s // Non-blocking, write-only opens fail with ENXIO when the read // side isn't open yet. if statusFlags&linux.O_NONBLOCK != 0 { - fd.DecRef() + fd.DecRef(ctx) return nil, syserror.ENXIO } // Wait for a reader to open the other end. if !waitFor(&vp.mu, &vp.rWakeup, ctx) { - fd.DecRef() + fd.DecRef(ctx) return nil, syserror.EINTR } } @@ -169,7 +169,7 @@ type VFSPipeFD struct { } // Release implements vfs.FileDescriptionImpl.Release. -func (fd *VFSPipeFD) Release() { +func (fd *VFSPipeFD) Release(context.Context) { var event waiter.EventMask if fd.vfsfd.IsReadable() { fd.pipe.rClose() diff --git a/pkg/sentry/kernel/pipe/writer.go b/pkg/sentry/kernel/pipe/writer.go index 5bc6aa931..ef4b70ca3 100644 --- a/pkg/sentry/kernel/pipe/writer.go +++ b/pkg/sentry/kernel/pipe/writer.go @@ -15,6 +15,7 @@ package pipe import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/waiter" ) @@ -29,7 +30,7 @@ type Writer struct { // Release implements fs.FileOperations.Release. // // This overrides ReaderWriter.Release. -func (w *Writer) Release() { +func (w *Writer) Release(context.Context) { w.Pipe.wClose() // Wake up readers. diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go index 0e19286de..5c4c622c2 100644 --- a/pkg/sentry/kernel/sessions.go +++ b/pkg/sentry/kernel/sessions.go @@ -16,6 +16,7 @@ package kernel import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/syserror" @@ -70,7 +71,7 @@ func (s *Session) incRef() { // // Precondition: callers must hold TaskSet.mu for writing. func (s *Session) decRef() { - s.refs.DecRefWithDestructor(func() { + s.refs.DecRefWithDestructor(nil, func(context.Context) { // Remove translations from the leader. for ns := s.leader.pidns; ns != nil; ns = ns.parent { id := ns.sids[s] @@ -162,7 +163,7 @@ func (pg *ProcessGroup) decRefWithParent(parentPG *ProcessGroup) { } alive := true - pg.refs.DecRefWithDestructor(func() { + pg.refs.DecRefWithDestructor(nil, func(context.Context) { alive = false // don't bother with handleOrphan. // Remove translations from the originator. diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go index 55b4c2cdb..13ec7afe0 100644 --- a/pkg/sentry/kernel/shm/shm.go +++ b/pkg/sentry/kernel/shm/shm.go @@ -431,8 +431,8 @@ func (s *Shm) InodeID() uint64 { // DecRef overrides refs.RefCount.DecRef with a destructor. // // Precondition: Caller must not hold s.mu. -func (s *Shm) DecRef() { - s.DecRefWithDestructor(s.destroy) +func (s *Shm) DecRef(ctx context.Context) { + s.DecRefWithDestructor(ctx, s.destroy) } // Msync implements memmap.MappingIdentity.Msync. Msync is a no-op for shm @@ -642,7 +642,7 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error { return nil } -func (s *Shm) destroy() { +func (s *Shm) destroy(context.Context) { s.mfp.MemoryFile().DecRef(s.fr) s.registry.remove(s) } @@ -651,7 +651,7 @@ func (s *Shm) destroy() { // destroyed once it has no references. MarkDestroyed may be called multiple // times, and is safe to call after a segment has already been destroyed. See // shmctl(IPC_RMID). -func (s *Shm) MarkDestroyed() { +func (s *Shm) MarkDestroyed(ctx context.Context) { s.registry.dissociateKey(s) s.mu.Lock() @@ -663,7 +663,7 @@ func (s *Shm) MarkDestroyed() { // // N.B. This cannot be the final DecRef, as the caller also // holds a reference. - s.DecRef() + s.DecRef(ctx) return } } diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go index 8243bb93e..b07e1c1bd 100644 --- a/pkg/sentry/kernel/signalfd/signalfd.go +++ b/pkg/sentry/kernel/signalfd/signalfd.go @@ -76,7 +76,7 @@ func New(ctx context.Context, mask linux.SignalSet) (*fs.File, error) { } // Release implements fs.FileOperations.Release. -func (s *SignalOperations) Release() {} +func (s *SignalOperations) Release(context.Context) {} // Mask returns the signal mask. func (s *SignalOperations) Mask() linux.SignalSet { diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index c4db05bd8..5aee699e7 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -730,17 +730,17 @@ func (t *Task) SyscallRestartBlock() SyscallRestartBlock { func (t *Task) IsChrooted() bool { if VFS2Enabled { realRoot := t.mountNamespaceVFS2.Root() - defer realRoot.DecRef() + defer realRoot.DecRef(t) root := t.fsContext.RootDirectoryVFS2() - defer root.DecRef() + defer root.DecRef(t) return root != realRoot } realRoot := t.tg.mounts.Root() - defer realRoot.DecRef() + defer realRoot.DecRef(t) root := t.fsContext.RootDirectory() if root != nil { - defer root.DecRef() + defer root.DecRef(t) } return root != realRoot } diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index e1ecca99e..fe6ba6041 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -237,7 +237,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { var fdTable *FDTable if opts.NewFiles { - fdTable = t.fdTable.Fork() + fdTable = t.fdTable.Fork(t) } else { fdTable = t.fdTable fdTable.IncRef() @@ -294,7 +294,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { nt, err := t.tg.pidns.owner.NewTask(cfg) if err != nil { if opts.NewThreadGroup { - tg.release() + tg.release(t) } return 0, nil, err } @@ -510,7 +510,7 @@ func (t *Task) Unshare(opts *SharingOptions) error { var oldFDTable *FDTable if opts.NewFiles { oldFDTable = t.fdTable - t.fdTable = oldFDTable.Fork() + t.fdTable = oldFDTable.Fork(t) } var oldFSContext *FSContext if opts.NewFSContext { @@ -519,10 +519,10 @@ func (t *Task) Unshare(opts *SharingOptions) error { } t.mu.Unlock() if oldFDTable != nil { - oldFDTable.DecRef() + oldFDTable.DecRef(t) } if oldFSContext != nil { - oldFSContext.DecRef() + oldFSContext.DecRef(t) } return nil } diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go index 7803b98d0..47c28b8ff 100644 --- a/pkg/sentry/kernel/task_exec.go +++ b/pkg/sentry/kernel/task_exec.go @@ -199,11 +199,11 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState { t.tg.pidns.owner.mu.Unlock() oldFDTable := t.fdTable - t.fdTable = t.fdTable.Fork() - oldFDTable.DecRef() + t.fdTable = t.fdTable.Fork(t) + oldFDTable.DecRef(t) // Remove FDs with the CloseOnExec flag set. - t.fdTable.RemoveIf(func(_ *fs.File, _ *vfs.FileDescription, flags FDFlags) bool { + t.fdTable.RemoveIf(t, func(_ *fs.File, _ *vfs.FileDescription, flags FDFlags) bool { return flags.CloseOnExec }) diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index 231ac548a..c165d6cb1 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -269,12 +269,12 @@ func (*runExitMain) execute(t *Task) taskRunState { // Releasing the MM unblocks a blocked CLONE_VFORK parent. t.unstopVforkParent() - t.fsContext.DecRef() - t.fdTable.DecRef() + t.fsContext.DecRef(t) + t.fdTable.DecRef(t) t.mu.Lock() if t.mountNamespaceVFS2 != nil { - t.mountNamespaceVFS2.DecRef() + t.mountNamespaceVFS2.DecRef(t) t.mountNamespaceVFS2 = nil } t.mu.Unlock() @@ -282,7 +282,7 @@ func (*runExitMain) execute(t *Task) taskRunState { // If this is the last task to exit from the thread group, release the // thread group's resources. if lastExiter { - t.tg.release() + t.tg.release(t) } // Detach tracees. diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go index eeccaa197..ab86ceedc 100644 --- a/pkg/sentry/kernel/task_log.go +++ b/pkg/sentry/kernel/task_log.go @@ -203,6 +203,6 @@ func (t *Task) traceExecEvent(tc *TaskContext) { trace.Logf(t.traceContext, traceCategory, "exec: << unknown >>") return } - defer file.DecRef() + defer file.DecRef(t) trace.Logf(t.traceContext, traceCategory, "exec: %s", file.PathnameWithDeleted(t)) } diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go index 8485fb4b6..64c1e120a 100644 --- a/pkg/sentry/kernel/task_start.go +++ b/pkg/sentry/kernel/task_start.go @@ -102,10 +102,10 @@ func (ts *TaskSet) NewTask(cfg *TaskConfig) (*Task, error) { t, err := ts.newTask(cfg) if err != nil { cfg.TaskContext.release() - cfg.FSContext.DecRef() - cfg.FDTable.DecRef() + cfg.FSContext.DecRef(t) + cfg.FDTable.DecRef(t) if cfg.MountNamespaceVFS2 != nil { - cfg.MountNamespaceVFS2.DecRef() + cfg.MountNamespaceVFS2.DecRef(t) } return nil, err } diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 4dfd2c990..0b34c0099 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -308,7 +308,7 @@ func (tg *ThreadGroup) Limits() *limits.LimitSet { } // release releases the thread group's resources. -func (tg *ThreadGroup) release() { +func (tg *ThreadGroup) release(t *Task) { // Timers must be destroyed without holding the TaskSet or signal mutexes // since timers send signals with Timer.mu locked. tg.itimerRealTimer.Destroy() @@ -325,7 +325,7 @@ func (tg *ThreadGroup) release() { it.DestroyTimer() } if tg.mounts != nil { - tg.mounts.DecRef() + tg.mounts.DecRef(t) } } diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go index ddeaff3db..20dd1cc21 100644 --- a/pkg/sentry/loader/elf.go +++ b/pkg/sentry/loader/elf.go @@ -281,7 +281,7 @@ func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr } defer func() { if mopts.MappingIdentity != nil { - mopts.MappingIdentity.DecRef() + mopts.MappingIdentity.DecRef(ctx) } }() if err := f.ConfigureMMap(ctx, &mopts); err != nil { @@ -663,7 +663,7 @@ func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error ctx.Infof("Error opening interpreter %s: %v", bin.interpreter, err) return loadedELF{}, nil, err } - defer intFile.DecRef() + defer intFile.DecRef(ctx) interp, err = loadInterpreterELF(ctx, args.MemoryManager, intFile, bin) if err != nil { diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go index 986c7fb4d..8d6802ea3 100644 --- a/pkg/sentry/loader/loader.go +++ b/pkg/sentry/loader/loader.go @@ -154,7 +154,7 @@ func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context return loadedELF{}, nil, nil, nil, err } // Ensure file is release in case the code loops or errors out. - defer args.File.DecRef() + defer args.File.DecRef(ctx) } else { if err := checkIsRegularFile(ctx, args.File, args.Filename); err != nil { return loadedELF{}, nil, nil, nil, err @@ -223,7 +223,7 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V if err != nil { return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux()) } - defer file.DecRef() + defer file.DecRef(ctx) // Load the VDSO. vdsoAddr, err := loadVDSO(ctx, args.MemoryManager, vdso, loaded) @@ -292,7 +292,7 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V m.SetEnvvStart(sl.EnvvStart) m.SetEnvvEnd(sl.EnvvEnd) m.SetAuxv(auxv) - m.SetExecutable(file) + m.SetExecutable(ctx, file) symbolValue, err := getSymbolValueFromVDSO("rt_sigreturn") if err != nil { diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go index c188f6c29..59c92c7e8 100644 --- a/pkg/sentry/memmap/memmap.go +++ b/pkg/sentry/memmap/memmap.go @@ -238,7 +238,7 @@ type MappingIdentity interface { IncRef() // DecRef decrements the MappingIdentity's reference count. - DecRef() + DecRef(ctx context.Context) // MappedName returns the application-visible name shown in // /proc/[pid]/maps. diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 1999ec706..16fea53c4 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -258,8 +258,8 @@ func newAIOMappable(mfp pgalloc.MemoryFileProvider) (*aioMappable, error) { } // DecRef implements refs.RefCounter.DecRef. -func (m *aioMappable) DecRef() { - m.AtomicRefCount.DecRefWithDestructor(func() { +func (m *aioMappable) DecRef(ctx context.Context) { + m.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) { m.mfp.MemoryFile().DecRef(m.fr) }) } @@ -367,7 +367,7 @@ func (mm *MemoryManager) NewAIOContext(ctx context.Context, events uint32) (uint if err != nil { return 0, err } - defer m.DecRef() + defer m.DecRef(ctx) addr, err := mm.MMap(ctx, memmap.MMapOpts{ Length: aioRingBufferSize, MappingIdentity: m, diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go index aac56679b..4d7773f8b 100644 --- a/pkg/sentry/mm/lifecycle.go +++ b/pkg/sentry/mm/lifecycle.go @@ -258,7 +258,7 @@ func (mm *MemoryManager) DecUsers(ctx context.Context) { mm.executable = nil mm.metadataMu.Unlock() if exe != nil { - exe.DecRef() + exe.DecRef(ctx) } mm.activeMu.Lock() diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go index 28e5057f7..0cfd60f6c 100644 --- a/pkg/sentry/mm/metadata.go +++ b/pkg/sentry/mm/metadata.go @@ -15,6 +15,7 @@ package mm import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fsbridge" "gvisor.dev/gvisor/pkg/usermem" @@ -147,7 +148,7 @@ func (mm *MemoryManager) Executable() fsbridge.File { // SetExecutable sets the executable. // // This takes a reference on d. -func (mm *MemoryManager) SetExecutable(file fsbridge.File) { +func (mm *MemoryManager) SetExecutable(ctx context.Context, file fsbridge.File) { mm.metadataMu.Lock() // Grab a new reference. @@ -164,7 +165,7 @@ func (mm *MemoryManager) SetExecutable(file fsbridge.File) { // Do this without holding the lock, since it may wind up doing some // I/O to sync the dirent, etc. if orig != nil { - orig.DecRef() + orig.DecRef(ctx) } } diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index 0e142fb11..4cdb52eb6 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -50,8 +50,8 @@ func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr memmap.F } // DecRef implements refs.RefCounter.DecRef. -func (m *SpecialMappable) DecRef() { - m.AtomicRefCount.DecRefWithDestructor(func() { +func (m *SpecialMappable) DecRef(ctx context.Context) { + m.AtomicRefCount.DecRefWithDestructor(ctx, func(context.Context) { m.mfp.MemoryFile().DecRef(m.fr) }) } diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go index 3f496aa9f..e74d4e1c1 100644 --- a/pkg/sentry/mm/syscalls.go +++ b/pkg/sentry/mm/syscalls.go @@ -101,7 +101,7 @@ func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (userme if err != nil { return 0, err } - defer m.DecRef() + defer m.DecRef(ctx) opts.MappingIdentity = m opts.Mappable = m } @@ -1191,7 +1191,7 @@ func (mm *MemoryManager) MSync(ctx context.Context, addr usermem.Addr, length ui mr := vseg.mappableRangeOf(vseg.Range().Intersect(ar)) mm.mappingMu.RUnlock() err := id.Msync(ctx, mr) - id.DecRef() + id.DecRef(ctx) if err != nil { return err } diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go index 16d8207e9..bd751d696 100644 --- a/pkg/sentry/mm/vma.go +++ b/pkg/sentry/mm/vma.go @@ -377,7 +377,7 @@ func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRa vma.mappable.RemoveMapping(ctx, mm, vmaAR, vma.off, vma.canWriteMappableLocked()) } if vma.id != nil { - vma.id.DecRef() + vma.id.DecRef(ctx) } mm.usageAS -= uint64(vmaAR.Length()) if vma.isPrivateDataLocked() { @@ -446,7 +446,7 @@ func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRa } if vma2.id != nil { - vma2.id.DecRef() + vma2.id.DecRef(context.Background()) } return vma1, true } diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go index 8b439a078..70ccf77a7 100644 --- a/pkg/sentry/socket/control/control.go +++ b/pkg/sentry/socket/control/control.go @@ -68,7 +68,7 @@ func NewSCMRights(t *kernel.Task, fds []int32) (SCMRights, error) { for _, fd := range fds { file := t.GetFile(fd) if file == nil { - files.Release() + files.Release(t) return nil, syserror.EBADF } files = append(files, file) @@ -100,9 +100,9 @@ func (fs *RightsFiles) Clone() transport.RightsControlMessage { } // Release implements transport.RightsControlMessage.Release. -func (fs *RightsFiles) Release() { +func (fs *RightsFiles) Release(ctx context.Context) { for _, f := range *fs { - f.DecRef() + f.DecRef(ctx) } *fs = nil } @@ -115,7 +115,7 @@ func rightsFDs(t *kernel.Task, rights SCMRights, cloexec bool, max int) ([]int32 fd, err := t.NewFDFrom(0, files[0], kernel.FDFlags{ CloseOnExec: cloexec, }) - files[0].DecRef() + files[0].DecRef(t) files = files[1:] if err != nil { t.Warningf("Error inserting FD: %v", err) diff --git a/pkg/sentry/socket/control/control_vfs2.go b/pkg/sentry/socket/control/control_vfs2.go index fd08179be..d9621968c 100644 --- a/pkg/sentry/socket/control/control_vfs2.go +++ b/pkg/sentry/socket/control/control_vfs2.go @@ -46,7 +46,7 @@ func NewSCMRightsVFS2(t *kernel.Task, fds []int32) (SCMRightsVFS2, error) { for _, fd := range fds { file := t.GetFileVFS2(fd) if file == nil { - files.Release() + files.Release(t) return nil, syserror.EBADF } files = append(files, file) @@ -78,9 +78,9 @@ func (fs *RightsFilesVFS2) Clone() transport.RightsControlMessage { } // Release implements transport.RightsControlMessage.Release. -func (fs *RightsFilesVFS2) Release() { +func (fs *RightsFilesVFS2) Release(ctx context.Context) { for _, f := range *fs { - f.DecRef() + f.DecRef(ctx) } *fs = nil } @@ -93,7 +93,7 @@ func rightsFDsVFS2(t *kernel.Task, rights SCMRightsVFS2, cloexec bool, max int) fd, err := t.NewFDFromVFS2(0, files[0], kernel.FDFlags{ CloseOnExec: cloexec, }) - files[0].DecRef() + files[0].DecRef(t) files = files[1:] if err != nil { t.Warningf("Error inserting FD: %v", err) diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go index 532a1ea5d..242e6bf76 100644 --- a/pkg/sentry/socket/hostinet/socket.go +++ b/pkg/sentry/socket/hostinet/socket.go @@ -100,12 +100,12 @@ func newSocketFile(ctx context.Context, family int, stype linux.SockType, protoc return nil, syserr.FromError(err) } dirent := socket.NewDirent(ctx, socketDevice) - defer dirent.DecRef() + defer dirent.DecRef(ctx) return fs.NewFile(ctx, dirent, fs.FileFlags{NonBlocking: nonblock, Read: true, Write: true, NonSeekable: true}, s), nil } // Release implements fs.FileOperations.Release. -func (s *socketOpsCommon) Release() { +func (s *socketOpsCommon) Release(context.Context) { fdnotifier.RemoveFD(int32(s.fd)) syscall.Close(s.fd) } @@ -269,7 +269,7 @@ func (s *socketOpsCommon) Accept(t *kernel.Task, peerRequested bool, flags int, syscall.Close(fd) return 0, nil, 0, err } - defer f.DecRef() + defer f.DecRef(t) kfd, kerr = t.NewFDFromVFS2(0, f, kernel.FDFlags{ CloseOnExec: flags&syscall.SOCK_CLOEXEC != 0, @@ -281,7 +281,7 @@ func (s *socketOpsCommon) Accept(t *kernel.Task, peerRequested bool, flags int, syscall.Close(fd) return 0, nil, 0, err } - defer f.DecRef() + defer f.DecRef(t) kfd, kerr = t.NewFDFrom(0, f, kernel.FDFlags{ CloseOnExec: flags&syscall.SOCK_CLOEXEC != 0, diff --git a/pkg/sentry/socket/netlink/provider.go b/pkg/sentry/socket/netlink/provider.go index 0d45e5053..31e374833 100644 --- a/pkg/sentry/socket/netlink/provider.go +++ b/pkg/sentry/socket/netlink/provider.go @@ -97,7 +97,7 @@ func (*socketProvider) Socket(t *kernel.Task, stype linux.SockType, protocol int } d := socket.NewDirent(t, netlinkSocketDevice) - defer d.DecRef() + defer d.DecRef(t) return fs.NewFile(t, d, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, s), nil } diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go index 98ca7add0..68a9b9a96 100644 --- a/pkg/sentry/socket/netlink/socket.go +++ b/pkg/sentry/socket/netlink/socket.go @@ -140,14 +140,14 @@ func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socke // Bind the endpoint for good measure so we can connect to it. The // bound address will never be exposed. if err := ep.Bind(tcpip.FullAddress{Addr: "dummy"}, nil); err != nil { - ep.Close() + ep.Close(t) return nil, err } // Create a connection from which the kernel can write messages. connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect(t) if err != nil { - ep.Close() + ep.Close(t) return nil, err } @@ -164,9 +164,9 @@ func NewSocket(t *kernel.Task, skType linux.SockType, protocol Protocol) (*Socke } // Release implements fs.FileOperations.Release. -func (s *socketOpsCommon) Release() { - s.connection.Release() - s.ep.Close() +func (s *socketOpsCommon) Release(ctx context.Context) { + s.connection.Release(ctx) + s.ep.Close(ctx) if s.bound { s.ports.Release(s.protocol.Protocol(), s.portID) @@ -621,7 +621,7 @@ func (s *socketOpsCommon) sendResponse(ctx context.Context, ms *MessageSet) *sys if len(bufs) > 0 { // RecvMsg never receives the address, so we don't need to send // one. - _, notify, err := s.connection.Send(bufs, cms, tcpip.FullAddress{}) + _, notify, err := s.connection.Send(ctx, bufs, cms, tcpip.FullAddress{}) // If the buffer is full, we simply drop messages, just like // Linux. if err != nil && err != syserr.ErrWouldBlock { @@ -648,7 +648,7 @@ func (s *socketOpsCommon) sendResponse(ctx context.Context, ms *MessageSet) *sys // Add the dump_done_errno payload. m.Put(int64(0)) - _, notify, err := s.connection.Send([][]byte{m.Finalize()}, cms, tcpip.FullAddress{}) + _, notify, err := s.connection.Send(ctx, [][]byte{m.Finalize()}, cms, tcpip.FullAddress{}) if err != nil && err != syserr.ErrWouldBlock { return err } diff --git a/pkg/sentry/socket/netlink/socket_vfs2.go b/pkg/sentry/socket/netlink/socket_vfs2.go index dbcd8b49a..a38d25da9 100644 --- a/pkg/sentry/socket/netlink/socket_vfs2.go +++ b/pkg/sentry/socket/netlink/socket_vfs2.go @@ -57,14 +57,14 @@ func NewVFS2(t *kernel.Task, skType linux.SockType, protocol Protocol) (*SocketV // Bind the endpoint for good measure so we can connect to it. The // bound address will never be exposed. if err := ep.Bind(tcpip.FullAddress{Addr: "dummy"}, nil); err != nil { - ep.Close() + ep.Close(t) return nil, err } // Create a connection from which the kernel can write messages. connection, err := ep.(transport.BoundEndpoint).UnidirectionalConnect(t) if err != nil { - ep.Close() + ep.Close(t) return nil, err } diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 31a168f7e..e4846bc0b 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -330,7 +330,7 @@ func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue } dirent := socket.NewDirent(t, netstackDevice) - defer dirent.DecRef() + defer dirent.DecRef(t) return fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, &SocketOperations{ socketOpsCommon: socketOpsCommon{ Queue: queue, @@ -479,7 +479,7 @@ func (s *socketOpsCommon) fetchReadView() *syserr.Error { } // Release implements fs.FileOperations.Release. -func (s *socketOpsCommon) Release() { +func (s *socketOpsCommon) Release(context.Context) { s.Endpoint.Close() } @@ -854,7 +854,7 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, if err != nil { return 0, nil, 0, err } - defer ns.DecRef() + defer ns.DecRef(t) if flags&linux.SOCK_NONBLOCK != 0 { flags := ns.Flags() diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go index a9025b0ec..3335e7430 100644 --- a/pkg/sentry/socket/netstack/netstack_vfs2.go +++ b/pkg/sentry/socket/netstack/netstack_vfs2.go @@ -169,7 +169,7 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block if err != nil { return 0, nil, 0, err } - defer ns.DecRef() + defer ns.DecRef(t) if err := ns.SetStatusFlags(t, t.Credentials(), uint32(flags&linux.SOCK_NONBLOCK)); err != nil { return 0, nil, 0, syserr.FromError(err) diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go index d112757fb..04b259d27 100644 --- a/pkg/sentry/socket/socket.go +++ b/pkg/sentry/socket/socket.go @@ -46,8 +46,8 @@ type ControlMessages struct { } // Release releases Unix domain socket credentials and rights. -func (c *ControlMessages) Release() { - c.Unix.Release() +func (c *ControlMessages) Release(ctx context.Context) { + c.Unix.Release(ctx) } // Socket is an interface combining fs.FileOperations and SocketOps, diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go index a1e49cc57..c67b602f0 100644 --- a/pkg/sentry/socket/unix/transport/connectioned.go +++ b/pkg/sentry/socket/unix/transport/connectioned.go @@ -211,7 +211,7 @@ func (e *connectionedEndpoint) Listening() bool { // The socket will be a fresh state after a call to close and may be reused. // That is, close may be used to "unbind" or "disconnect" the socket in error // paths. -func (e *connectionedEndpoint) Close() { +func (e *connectionedEndpoint) Close(ctx context.Context) { e.Lock() var c ConnectedEndpoint var r Receiver @@ -233,7 +233,7 @@ func (e *connectionedEndpoint) Close() { case e.Listening(): close(e.acceptedChan) for n := range e.acceptedChan { - n.Close() + n.Close(ctx) } e.acceptedChan = nil e.path = "" @@ -241,11 +241,11 @@ func (e *connectionedEndpoint) Close() { e.Unlock() if c != nil { c.CloseNotify() - c.Release() + c.Release(ctx) } if r != nil { r.CloseNotify() - r.Release() + r.Release(ctx) } } @@ -340,7 +340,7 @@ func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce Conn return nil default: // Busy; return ECONNREFUSED per spec. - ne.Close() + ne.Close(ctx) e.Unlock() ce.Unlock() return syserr.ErrConnectionRefused diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go index 4b06d63ac..70ee8f9b8 100644 --- a/pkg/sentry/socket/unix/transport/connectionless.go +++ b/pkg/sentry/socket/unix/transport/connectionless.go @@ -54,10 +54,10 @@ func (e *connectionlessEndpoint) isBound() bool { // Close puts the endpoint in a closed state and frees all resources associated // with it. -func (e *connectionlessEndpoint) Close() { +func (e *connectionlessEndpoint) Close(ctx context.Context) { e.Lock() if e.connected != nil { - e.connected.Release() + e.connected.Release(ctx) e.connected = nil } @@ -71,7 +71,7 @@ func (e *connectionlessEndpoint) Close() { e.Unlock() r.CloseNotify() - r.Release() + r.Release(ctx) } // BidirectionalConnect implements BoundEndpoint.BidirectionalConnect. @@ -108,10 +108,10 @@ func (e *connectionlessEndpoint) SendMsg(ctx context.Context, data [][]byte, c C if err != nil { return 0, syserr.ErrInvalidEndpointState } - defer connected.Release() + defer connected.Release(ctx) e.Lock() - n, notify, err := connected.Send(data, c, tcpip.FullAddress{Addr: tcpip.Address(e.path)}) + n, notify, err := connected.Send(ctx, data, c, tcpip.FullAddress{Addr: tcpip.Address(e.path)}) e.Unlock() if notify { @@ -135,7 +135,7 @@ func (e *connectionlessEndpoint) Connect(ctx context.Context, server BoundEndpoi e.Lock() if e.connected != nil { - e.connected.Release() + e.connected.Release(ctx) } e.connected = connected e.Unlock() diff --git a/pkg/sentry/socket/unix/transport/queue.go b/pkg/sentry/socket/unix/transport/queue.go index d8f3ad63d..ef6043e19 100644 --- a/pkg/sentry/socket/unix/transport/queue.go +++ b/pkg/sentry/socket/unix/transport/queue.go @@ -15,6 +15,7 @@ package transport import ( + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserr" @@ -57,10 +58,10 @@ func (q *queue) Close() { // Both the read and write queues must be notified after resetting: // q.ReaderQueue.Notify(waiter.EventIn) // q.WriterQueue.Notify(waiter.EventOut) -func (q *queue) Reset() { +func (q *queue) Reset(ctx context.Context) { q.mu.Lock() for cur := q.dataList.Front(); cur != nil; cur = cur.Next() { - cur.Release() + cur.Release(ctx) } q.dataList.Reset() q.used = 0 @@ -68,8 +69,8 @@ func (q *queue) Reset() { } // DecRef implements RefCounter.DecRef with destructor q.Reset. -func (q *queue) DecRef() { - q.DecRefWithDestructor(q.Reset) +func (q *queue) DecRef(ctx context.Context) { + q.DecRefWithDestructor(ctx, q.Reset) // We don't need to notify after resetting because no one cares about // this queue after all references have been dropped. } @@ -111,7 +112,7 @@ func (q *queue) IsWritable() bool { // // If notify is true, ReaderQueue.Notify must be called: // q.ReaderQueue.Notify(waiter.EventIn) -func (q *queue) Enqueue(data [][]byte, c ControlMessages, from tcpip.FullAddress, discardEmpty bool, truncate bool) (l int64, notify bool, err *syserr.Error) { +func (q *queue) Enqueue(ctx context.Context, data [][]byte, c ControlMessages, from tcpip.FullAddress, discardEmpty bool, truncate bool) (l int64, notify bool, err *syserr.Error) { q.mu.Lock() if q.closed { @@ -124,7 +125,7 @@ func (q *queue) Enqueue(data [][]byte, c ControlMessages, from tcpip.FullAddress } if discardEmpty && l == 0 { q.mu.Unlock() - c.Release() + c.Release(ctx) return 0, false, nil } diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go index 2f1b127df..475d7177e 100644 --- a/pkg/sentry/socket/unix/transport/unix.go +++ b/pkg/sentry/socket/unix/transport/unix.go @@ -37,7 +37,7 @@ type RightsControlMessage interface { Clone() RightsControlMessage // Release releases any resources owned by the RightsControlMessage. - Release() + Release(ctx context.Context) } // A CredentialsControlMessage is a control message containing Unix credentials. @@ -74,9 +74,9 @@ func (c *ControlMessages) Clone() ControlMessages { } // Release releases both the credentials and the rights. -func (c *ControlMessages) Release() { +func (c *ControlMessages) Release(ctx context.Context) { if c.Rights != nil { - c.Rights.Release() + c.Rights.Release(ctx) } *c = ControlMessages{} } @@ -90,7 +90,7 @@ type Endpoint interface { // Close puts the endpoint in a closed state and frees all resources // associated with it. - Close() + Close(ctx context.Context) // RecvMsg reads data and a control message from the endpoint. This method // does not block if there is no data pending. @@ -252,7 +252,7 @@ type BoundEndpoint interface { // Release releases any resources held by the BoundEndpoint. It must be // called before dropping all references to a BoundEndpoint returned by a // function. - Release() + Release(ctx context.Context) } // message represents a message passed over a Unix domain socket. @@ -281,8 +281,8 @@ func (m *message) Length() int64 { } // Release releases any resources held by the message. -func (m *message) Release() { - m.Control.Release() +func (m *message) Release(ctx context.Context) { + m.Control.Release(ctx) } // Peek returns a copy of the message. @@ -304,7 +304,7 @@ type Receiver interface { // See Endpoint.RecvMsg for documentation on shared arguments. // // notify indicates if RecvNotify should be called. - Recv(data [][]byte, creds bool, numRights int, peek bool) (recvLen, msgLen int64, cm ControlMessages, CMTruncated bool, source tcpip.FullAddress, notify bool, err *syserr.Error) + Recv(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool) (recvLen, msgLen int64, cm ControlMessages, CMTruncated bool, source tcpip.FullAddress, notify bool, err *syserr.Error) // RecvNotify notifies the Receiver of a successful Recv. This must not be // called while holding any endpoint locks. @@ -333,7 +333,7 @@ type Receiver interface { // Release releases any resources owned by the Receiver. It should be // called before droping all references to a Receiver. - Release() + Release(ctx context.Context) } // queueReceiver implements Receiver for datagram sockets. @@ -344,7 +344,7 @@ type queueReceiver struct { } // Recv implements Receiver.Recv. -func (q *queueReceiver) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { +func (q *queueReceiver) Recv(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { var m *message var notify bool var err *syserr.Error @@ -398,8 +398,8 @@ func (q *queueReceiver) RecvMaxQueueSize() int64 { } // Release implements Receiver.Release. -func (q *queueReceiver) Release() { - q.readQueue.DecRef() +func (q *queueReceiver) Release(ctx context.Context) { + q.readQueue.DecRef(ctx) } // streamQueueReceiver implements Receiver for stream sockets. @@ -456,7 +456,7 @@ func (q *streamQueueReceiver) RecvMaxQueueSize() int64 { } // Recv implements Receiver.Recv. -func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { +func (q *streamQueueReceiver) Recv(ctx context.Context, data [][]byte, wantCreds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) { q.mu.Lock() defer q.mu.Unlock() @@ -502,7 +502,7 @@ func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights int, var cmTruncated bool if c.Rights != nil && numRights == 0 { - c.Rights.Release() + c.Rights.Release(ctx) c.Rights = nil cmTruncated = true } @@ -557,7 +557,7 @@ func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights int, // Consume rights. if numRights == 0 { cmTruncated = true - q.control.Rights.Release() + q.control.Rights.Release(ctx) } else { c.Rights = q.control.Rights haveRights = true @@ -582,7 +582,7 @@ type ConnectedEndpoint interface { // // syserr.ErrWouldBlock can be returned along with a partial write if // the caller should block to send the rest of the data. - Send(data [][]byte, c ControlMessages, from tcpip.FullAddress) (n int64, notify bool, err *syserr.Error) + Send(ctx context.Context, data [][]byte, c ControlMessages, from tcpip.FullAddress) (n int64, notify bool, err *syserr.Error) // SendNotify notifies the ConnectedEndpoint of a successful Send. This // must not be called while holding any endpoint locks. @@ -616,7 +616,7 @@ type ConnectedEndpoint interface { // Release releases any resources owned by the ConnectedEndpoint. It should // be called before droping all references to a ConnectedEndpoint. - Release() + Release(ctx context.Context) // CloseUnread sets the fact that this end is closed with unread data to // the peer socket. @@ -654,7 +654,7 @@ func (e *connectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) } // Send implements ConnectedEndpoint.Send. -func (e *connectedEndpoint) Send(data [][]byte, c ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) { +func (e *connectedEndpoint) Send(ctx context.Context, data [][]byte, c ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) { discardEmpty := false truncate := false if e.endpoint.Type() == linux.SOCK_STREAM { @@ -669,7 +669,7 @@ func (e *connectedEndpoint) Send(data [][]byte, c ControlMessages, from tcpip.Fu truncate = true } - return e.writeQueue.Enqueue(data, c, from, discardEmpty, truncate) + return e.writeQueue.Enqueue(ctx, data, c, from, discardEmpty, truncate) } // SendNotify implements ConnectedEndpoint.SendNotify. @@ -707,8 +707,8 @@ func (e *connectedEndpoint) SendMaxQueueSize() int64 { } // Release implements ConnectedEndpoint.Release. -func (e *connectedEndpoint) Release() { - e.writeQueue.DecRef() +func (e *connectedEndpoint) Release(ctx context.Context) { + e.writeQueue.DecRef(ctx) } // CloseUnread implements ConnectedEndpoint.CloseUnread. @@ -798,7 +798,7 @@ func (e *baseEndpoint) RecvMsg(ctx context.Context, data [][]byte, creds bool, n return 0, 0, ControlMessages{}, false, syserr.ErrNotConnected } - recvLen, msgLen, cms, cmt, a, notify, err := e.receiver.Recv(data, creds, numRights, peek) + recvLen, msgLen, cms, cmt, a, notify, err := e.receiver.Recv(ctx, data, creds, numRights, peek) e.Unlock() if err != nil { return 0, 0, ControlMessages{}, false, err @@ -827,7 +827,7 @@ func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMess return 0, syserr.ErrAlreadyConnected } - n, notify, err := e.connected.Send(data, c, tcpip.FullAddress{Addr: tcpip.Address(e.path)}) + n, notify, err := e.connected.Send(ctx, data, c, tcpip.FullAddress{Addr: tcpip.Address(e.path)}) e.Unlock() if notify { @@ -1001,6 +1001,6 @@ func (e *baseEndpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { } // Release implements BoundEndpoint.Release. -func (*baseEndpoint) Release() { +func (*baseEndpoint) Release(context.Context) { // Binding a baseEndpoint doesn't take a reference. } diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go index 0482d33cf..2b8454edb 100644 --- a/pkg/sentry/socket/unix/unix.go +++ b/pkg/sentry/socket/unix/unix.go @@ -62,7 +62,7 @@ type SocketOperations struct { // New creates a new unix socket. func New(ctx context.Context, endpoint transport.Endpoint, stype linux.SockType) *fs.File { dirent := socket.NewDirent(ctx, unixSocketDevice) - defer dirent.DecRef() + defer dirent.DecRef(ctx) return NewWithDirent(ctx, dirent, endpoint, stype, fs.FileFlags{Read: true, Write: true, NonSeekable: true}) } @@ -97,17 +97,17 @@ type socketOpsCommon struct { } // DecRef implements RefCounter.DecRef. -func (s *socketOpsCommon) DecRef() { - s.DecRefWithDestructor(func() { - s.ep.Close() +func (s *socketOpsCommon) DecRef(ctx context.Context) { + s.DecRefWithDestructor(ctx, func(context.Context) { + s.ep.Close(ctx) }) } // Release implemements fs.FileOperations.Release. -func (s *socketOpsCommon) Release() { +func (s *socketOpsCommon) Release(ctx context.Context) { // Release only decrements a reference on s because s may be referenced in // the abstract socket namespace. - s.DecRef() + s.DecRef(ctx) } func (s *socketOpsCommon) isPacket() bool { @@ -234,7 +234,7 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, } ns := New(t, ep, s.stype) - defer ns.DecRef() + defer ns.DecRef(t) if flags&linux.SOCK_NONBLOCK != 0 { flags := ns.Flags() @@ -284,7 +284,7 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { if t.IsNetworkNamespaced() { return syserr.ErrInvalidEndpointState } - if err := t.AbstractSockets().Bind(p[1:], bep, s); err != nil { + if err := t.AbstractSockets().Bind(t, p[1:], bep, s); err != nil { // syserr.ErrPortInUse corresponds to EADDRINUSE. return syserr.ErrPortInUse } @@ -294,7 +294,7 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { var name string cwd := t.FSContext().WorkingDirectory() - defer cwd.DecRef() + defer cwd.DecRef(t) // Is there no slash at all? if !strings.Contains(p, "/") { @@ -302,7 +302,7 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { name = p } else { root := t.FSContext().RootDirectory() - defer root.DecRef() + defer root.DecRef(t) // Find the last path component, we know that something follows // that final slash, otherwise extractPath() would have failed. lastSlash := strings.LastIndex(p, "/") @@ -318,7 +318,7 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { // No path available. return syserr.ErrNoSuchFile } - defer d.DecRef() + defer d.DecRef(t) name = p[lastSlash+1:] } @@ -332,7 +332,7 @@ func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { if err != nil { return syserr.ErrPortInUse } - childDir.DecRef() + childDir.DecRef(t) } return nil @@ -378,9 +378,9 @@ func extractEndpoint(t *kernel.Task, sockaddr []byte) (transport.BoundEndpoint, FollowFinalSymlink: true, } ep, e := t.Kernel().VFS().BoundEndpointAt(t, t.Credentials(), &pop, &vfs.BoundEndpointOptions{path}) - root.DecRef() + root.DecRef(t) if relPath { - start.DecRef() + start.DecRef(t) } if e != nil { return nil, syserr.FromError(e) @@ -393,15 +393,15 @@ func extractEndpoint(t *kernel.Task, sockaddr []byte) (transport.BoundEndpoint, cwd := t.FSContext().WorkingDirectory() remainingTraversals := uint(fs.DefaultTraversalLimit) d, e := t.MountNamespace().FindInode(t, root, cwd, path, &remainingTraversals) - cwd.DecRef() - root.DecRef() + cwd.DecRef(t) + root.DecRef(t) if e != nil { return nil, syserr.FromError(e) } // Extract the endpoint if one is there. ep := d.Inode.BoundEndpoint(path) - d.DecRef() + d.DecRef(t) if ep == nil { // No socket! return nil, syserr.ErrConnectionRefused @@ -415,7 +415,7 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool if err != nil { return err } - defer ep.Release() + defer ep.Release(t) // Connect the server endpoint. err = s.ep.Connect(t, ep) @@ -473,7 +473,7 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b if err != nil { return 0, err } - defer ep.Release() + defer ep.Release(t) w.To = ep if ep.Passcred() && w.Control.Credentials == nil { diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go index 05c16fcfe..dfa25241a 100644 --- a/pkg/sentry/socket/unix/unix_vfs2.go +++ b/pkg/sentry/socket/unix/unix_vfs2.go @@ -136,7 +136,7 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block if err != nil { return 0, nil, 0, err } - defer ns.DecRef() + defer ns.DecRef(t) if flags&linux.SOCK_NONBLOCK != 0 { ns.SetStatusFlags(t, t.Credentials(), linux.SOCK_NONBLOCK) @@ -183,19 +183,19 @@ func (s *SocketVFS2) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { if t.IsNetworkNamespaced() { return syserr.ErrInvalidEndpointState } - if err := t.AbstractSockets().Bind(p[1:], bep, s); err != nil { + if err := t.AbstractSockets().Bind(t, p[1:], bep, s); err != nil { // syserr.ErrPortInUse corresponds to EADDRINUSE. return syserr.ErrPortInUse } } else { path := fspath.Parse(p) root := t.FSContext().RootDirectoryVFS2() - defer root.DecRef() + defer root.DecRef(t) start := root relPath := !path.Absolute if relPath { start = t.FSContext().WorkingDirectoryVFS2() - defer start.DecRef() + defer start.DecRef(t) } pop := vfs.PathOperation{ Root: root, @@ -333,7 +333,7 @@ func (*providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int) f, err := NewSockfsFile(t, ep, stype) if err != nil { - ep.Close() + ep.Close(t) return nil, err } return f, nil @@ -357,14 +357,14 @@ func (*providerVFS2) Pair(t *kernel.Task, stype linux.SockType, protocol int) (* ep1, ep2 := transport.NewPair(t, stype, t.Kernel()) s1, err := NewSockfsFile(t, ep1, stype) if err != nil { - ep1.Close() - ep2.Close() + ep1.Close(t) + ep2.Close(t) return nil, nil, err } s2, err := NewSockfsFile(t, ep2, stype) if err != nil { - s1.DecRef() - ep2.Close() + s1.DecRef(t) + ep2.Close(t) return nil, nil, err } diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go index 68ca537c8..87b239730 100644 --- a/pkg/sentry/strace/strace.go +++ b/pkg/sentry/strace/strace.go @@ -147,14 +147,14 @@ func fd(t *kernel.Task, fd int32) string { root := t.FSContext().RootDirectory() if root != nil { - defer root.DecRef() + defer root.DecRef(t) } if fd == linux.AT_FDCWD { wd := t.FSContext().WorkingDirectory() var name string if wd != nil { - defer wd.DecRef() + defer wd.DecRef(t) name, _ = wd.FullName(root) } else { name = "(unknown cwd)" @@ -167,7 +167,7 @@ func fd(t *kernel.Task, fd int32) string { // Cast FD to uint64 to avoid printing negative hex. return fmt.Sprintf("%#x (bad FD)", uint64(fd)) } - defer file.DecRef() + defer file.DecRef(t) name, _ := file.Dirent.FullName(root) return fmt.Sprintf("%#x %s", fd, name) @@ -175,12 +175,12 @@ func fd(t *kernel.Task, fd int32) string { func fdVFS2(t *kernel.Task, fd int32) string { root := t.FSContext().RootDirectoryVFS2() - defer root.DecRef() + defer root.DecRef(t) vfsObj := root.Mount().Filesystem().VirtualFilesystem() if fd == linux.AT_FDCWD { wd := t.FSContext().WorkingDirectoryVFS2() - defer wd.DecRef() + defer wd.DecRef(t) name, _ := vfsObj.PathnameWithDeleted(t, root, wd) return fmt.Sprintf("AT_FDCWD %s", name) @@ -191,7 +191,7 @@ func fdVFS2(t *kernel.Task, fd int32) string { // Cast FD to uint64 to avoid printing negative hex. return fmt.Sprintf("%#x (bad FD)", uint64(fd)) } - defer file.DecRef() + defer file.DecRef(t) name, _ := vfsObj.PathnameWithDeleted(t, root, file.VirtualDentry()) return fmt.Sprintf("%#x %s", fd, name) diff --git a/pkg/sentry/syscalls/epoll.go b/pkg/sentry/syscalls/epoll.go index d9fb808c0..d23a0068a 100644 --- a/pkg/sentry/syscalls/epoll.go +++ b/pkg/sentry/syscalls/epoll.go @@ -28,7 +28,7 @@ import ( // CreateEpoll implements the epoll_create(2) linux syscall. func CreateEpoll(t *kernel.Task, closeOnExec bool) (int32, error) { file := epoll.NewEventPoll(t) - defer file.DecRef() + defer file.DecRef(t) fd, err := t.NewFDFrom(0, file, kernel.FDFlags{ CloseOnExec: closeOnExec, @@ -47,14 +47,14 @@ func AddEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, mask if epollfile == nil { return syserror.EBADF } - defer epollfile.DecRef() + defer epollfile.DecRef(t) // Get the target file id. file := t.GetFile(fd) if file == nil { return syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the epollPoll operations. e, ok := epollfile.FileOperations.(*epoll.EventPoll) @@ -73,14 +73,14 @@ func UpdateEpoll(t *kernel.Task, epfd int32, fd int32, flags epoll.EntryFlags, m if epollfile == nil { return syserror.EBADF } - defer epollfile.DecRef() + defer epollfile.DecRef(t) // Get the target file id. file := t.GetFile(fd) if file == nil { return syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the epollPoll operations. e, ok := epollfile.FileOperations.(*epoll.EventPoll) @@ -99,14 +99,14 @@ func RemoveEpoll(t *kernel.Task, epfd int32, fd int32) error { if epollfile == nil { return syserror.EBADF } - defer epollfile.DecRef() + defer epollfile.DecRef(t) // Get the target file id. file := t.GetFile(fd) if file == nil { return syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the epollPoll operations. e, ok := epollfile.FileOperations.(*epoll.EventPoll) @@ -115,7 +115,7 @@ func RemoveEpoll(t *kernel.Task, epfd int32, fd int32) error { } // Try to remove the entry. - return e.RemoveEntry(epoll.FileIdentifier{file, fd}) + return e.RemoveEntry(t, epoll.FileIdentifier{file, fd}) } // WaitEpoll implements the epoll_wait(2) linux syscall. @@ -125,7 +125,7 @@ func WaitEpoll(t *kernel.Task, fd int32, max int, timeout int) ([]linux.EpollEve if epollfile == nil { return nil, syserror.EBADF } - defer epollfile.DecRef() + defer epollfile.DecRef(t) // Extract the epollPoll operations. e, ok := epollfile.FileOperations.(*epoll.EventPoll) diff --git a/pkg/sentry/syscalls/linux/sys_aio.go b/pkg/sentry/syscalls/linux/sys_aio.go index ba2557c52..e9d64dec5 100644 --- a/pkg/sentry/syscalls/linux/sys_aio.go +++ b/pkg/sentry/syscalls/linux/sys_aio.go @@ -247,7 +247,7 @@ func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *linu ev.Result = -int64(kernel.ExtractErrno(err, 0)) } - file.DecRef() + file.DecRef(ctx) // Queue the result for delivery. actx.FinishRequest(ev) @@ -257,7 +257,7 @@ func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr usermem.Addr, cb *linu // wake up. if eventFile != nil { eventFile.FileOperations.(*eventfd.EventOperations).Signal(1) - eventFile.DecRef() + eventFile.DecRef(ctx) } } } @@ -269,7 +269,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr user // File not found. return syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Was there an eventFD? Extract it. var eventFile *fs.File @@ -279,7 +279,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr user // Bad FD. return syserror.EBADF } - defer eventFile.DecRef() + defer eventFile.DecRef(t) // Check that it is an eventfd. if _, ok := eventFile.FileOperations.(*eventfd.EventOperations); !ok { diff --git a/pkg/sentry/syscalls/linux/sys_eventfd.go b/pkg/sentry/syscalls/linux/sys_eventfd.go index ed3413ca6..3b4f879e4 100644 --- a/pkg/sentry/syscalls/linux/sys_eventfd.go +++ b/pkg/sentry/syscalls/linux/sys_eventfd.go @@ -37,7 +37,7 @@ func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc event.SetFlags(fs.SettableFileFlags{ NonBlocking: flags&linux.EFD_NONBLOCK != 0, }) - defer event.DecRef() + defer event.DecRef(t) fd, err := t.NewFDFrom(0, event, kernel.FDFlags{ CloseOnExec: flags&linux.EFD_CLOEXEC != 0, diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 8cf6401e7..1bc9b184e 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -40,7 +40,7 @@ func fileOpAt(t *kernel.Task, dirFD int32, path string, fn func(root *fs.Dirent, // Common case: we are accessing a file in the root. root := t.FSContext().RootDirectory() err := fn(root, root, name, linux.MaxSymlinkTraversals) - root.DecRef() + root.DecRef(t) return err } else if dir == "." && dirFD == linux.AT_FDCWD { // Common case: we are accessing a file relative to the current @@ -48,8 +48,8 @@ func fileOpAt(t *kernel.Task, dirFD int32, path string, fn func(root *fs.Dirent, wd := t.FSContext().WorkingDirectory() root := t.FSContext().RootDirectory() err := fn(root, wd, name, linux.MaxSymlinkTraversals) - wd.DecRef() - root.DecRef() + wd.DecRef(t) + root.DecRef(t) return err } @@ -97,19 +97,19 @@ func fileOpOn(t *kernel.Task, dirFD int32, path string, resolve bool, fn func(ro } else { d, err = t.MountNamespace().FindLink(t, root, rel, path, &remainingTraversals) } - root.DecRef() + root.DecRef(t) if wd != nil { - wd.DecRef() + wd.DecRef(t) } if f != nil { - f.DecRef() + f.DecRef(t) } if err != nil { return err } err = fn(root, d, remainingTraversals) - d.DecRef() + d.DecRef(t) return err } @@ -186,7 +186,7 @@ func openAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint) (fd uint if err != nil { return syserror.ConvertIntr(err, kernel.ERESTARTSYS) } - defer file.DecRef() + defer file.DecRef(t) // Success. newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{ @@ -242,7 +242,7 @@ func mknodAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode if err != nil { return err } - file.DecRef() + file.DecRef(t) return nil case linux.ModeNamedPipe: @@ -332,7 +332,7 @@ func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode l if err != nil { break } - defer found.DecRef() + defer found.DecRef(t) // We found something (possibly a symlink). If the // O_EXCL flag was passed, then we can immediately @@ -357,7 +357,7 @@ func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode l resolved, err = found.Inode.Getlink(t) if err == nil { // No more resolution necessary. - defer resolved.DecRef() + defer resolved.DecRef(t) break } if err != fs.ErrResolveViaReadlink { @@ -384,7 +384,7 @@ func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode l if err != nil { break } - defer newParent.DecRef() + defer newParent.DecRef(t) // Repeat the process with the parent and name of the // symlink target. @@ -416,7 +416,7 @@ func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode l if err != nil { return syserror.ConvertIntr(err, kernel.ERESTARTSYS) } - defer newFile.DecRef() + defer newFile.DecRef(t) case syserror.ENOENT: // File does not exist. Proceed with creation. @@ -432,7 +432,7 @@ func createAt(t *kernel.Task, dirFD int32, addr usermem.Addr, flags uint, mode l // No luck, bail. return err } - defer newFile.DecRef() + defer newFile.DecRef(t) found = newFile.Dirent default: return err @@ -596,7 +596,7 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Shared flags between file and socket. switch request { @@ -671,9 +671,9 @@ func Getcwd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal addr := args[0].Pointer() size := args[1].SizeT() cwd := t.FSContext().WorkingDirectory() - defer cwd.DecRef() + defer cwd.DecRef(t) root := t.FSContext().RootDirectory() - defer root.DecRef() + defer root.DecRef(t) // Get our fullname from the root and preprend unreachable if the root was // unreachable from our current dirent this is the same behavior as on linux. @@ -722,7 +722,7 @@ func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return err } - t.FSContext().SetRootDirectory(d) + t.FSContext().SetRootDirectory(t, d) return nil }) } @@ -747,7 +747,7 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return err } - t.FSContext().SetWorkingDirectory(d) + t.FSContext().SetWorkingDirectory(t, d) return nil }) } @@ -760,7 +760,7 @@ func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Is it a directory? if !fs.IsDir(file.Dirent.Inode.StableAttr) { @@ -772,7 +772,7 @@ func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } - t.FSContext().SetWorkingDirectory(file.Dirent) + t.FSContext().SetWorkingDirectory(t, file.Dirent) return 0, nil, nil } @@ -791,7 +791,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) err := file.Flush(t) return 0, nil, handleIOError(t, false /* partial */, err, syserror.EINTR, "close", file) @@ -805,7 +805,7 @@ func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{}) if err != nil { @@ -826,7 +826,7 @@ func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if oldFile == nil { return 0, nil, syserror.EBADF } - defer oldFile.DecRef() + defer oldFile.DecRef(t) return uintptr(newfd), nil, nil } @@ -850,7 +850,7 @@ func Dup3(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if oldFile == nil { return 0, nil, syserror.EBADF } - defer oldFile.DecRef() + defer oldFile.DecRef(t) err := t.NewFDAt(newfd, oldFile, kernel.FDFlags{CloseOnExec: flags&linux.O_CLOEXEC != 0}) if err != nil { @@ -925,7 +925,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) switch cmd { case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: @@ -1132,7 +1132,7 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // If the FD refers to a pipe or FIFO, return error. if fs.IsPipe(file.Dirent.Inode.StableAttr) { @@ -1171,7 +1171,7 @@ func mkdirAt(t *kernel.Task, dirFD int32, addr usermem.Addr, mode linux.FileMode switch err { case nil: // The directory existed. - defer f.DecRef() + defer f.DecRef(t) return syserror.EEXIST case syserror.EACCES: // Permission denied while walking to the directory. @@ -1349,7 +1349,7 @@ func linkAt(t *kernel.Task, oldDirFD int32, oldAddr usermem.Addr, newDirFD int32 if target == nil { return syserror.EBADF } - defer target.DecRef() + defer target.DecRef(t) if err := mayLinkAt(t, target.Dirent.Inode); err != nil { return err } @@ -1602,7 +1602,7 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Reject truncation if the file flags do not permit this operation. // This is different from truncate(2) above. @@ -1730,7 +1730,7 @@ func chownAt(t *kernel.Task, fd int32, addr usermem.Addr, resolve, allowEmpty bo if file == nil { return syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) return chown(t, file.Dirent, uid, gid) } @@ -1768,7 +1768,7 @@ func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) return 0, nil, chown(t, file.Dirent, uid, gid) } @@ -1833,7 +1833,7 @@ func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) return 0, nil, chmod(t, file.Dirent, mode) } @@ -1893,10 +1893,10 @@ func utimes(t *kernel.Task, dirFD int32, addr usermem.Addr, ts fs.TimeSpec, reso if f == nil { return syserror.EBADF } - defer f.DecRef() + defer f.DecRef(t) root := t.FSContext().RootDirectory() - defer root.DecRef() + defer root.DecRef(t) return setTimestamp(root, f.Dirent, linux.MaxSymlinkTraversals) } @@ -2088,7 +2088,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) if offset < 0 || length <= 0 { return 0, nil, syserror.EINVAL @@ -2141,7 +2141,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // flock(2): EBADF fd is not an open file descriptor. return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) nonblocking := operation&linux.LOCK_NB != 0 operation &^= linux.LOCK_NB @@ -2224,8 +2224,8 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S return 0, nil, err } - defer dirent.DecRef() - defer file.DecRef() + defer dirent.DecRef(t) + defer file.DecRef(t) newFD, err := t.NewFDFrom(0, file, kernel.FDFlags{ CloseOnExec: cloExec, diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go index f04d78856..9d1b2edb1 100644 --- a/pkg/sentry/syscalls/linux/sys_futex.go +++ b/pkg/sentry/syscalls/linux/sys_futex.go @@ -73,7 +73,7 @@ func futexWaitAbsolute(t *kernel.Task, clockRealtime bool, ts linux.Timespec, fo err = t.BlockWithDeadline(w.C, true, ktime.FromTimespec(ts)) } - t.Futex().WaitComplete(w) + t.Futex().WaitComplete(w, t) return 0, syserror.ConvertIntr(err, kernel.ERESTARTSYS) } @@ -95,7 +95,7 @@ func futexWaitDuration(t *kernel.Task, duration time.Duration, forever bool, add } remaining, err := t.BlockWithTimeout(w.C, !forever, duration) - t.Futex().WaitComplete(w) + t.Futex().WaitComplete(w, t) if err == nil { return 0, nil } @@ -148,7 +148,7 @@ func futexLockPI(t *kernel.Task, ts linux.Timespec, forever bool, addr usermem.A timer.Destroy() } - t.Futex().WaitComplete(w) + t.Futex().WaitComplete(w, t) return syserror.ConvertIntr(err, kernel.ERESTARTSYS) } diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go index b126fecc0..f5699e55d 100644 --- a/pkg/sentry/syscalls/linux/sys_getdents.go +++ b/pkg/sentry/syscalls/linux/sys_getdents.go @@ -68,7 +68,7 @@ func getdents(t *kernel.Task, fd int32, addr usermem.Addr, size int, f func(*dir if dir == nil { return 0, syserror.EBADF } - defer dir.DecRef() + defer dir.DecRef(t) w := &usermem.IOReadWriter{ Ctx: t, diff --git a/pkg/sentry/syscalls/linux/sys_inotify.go b/pkg/sentry/syscalls/linux/sys_inotify.go index b2c7b3444..cf47bb9dd 100644 --- a/pkg/sentry/syscalls/linux/sys_inotify.go +++ b/pkg/sentry/syscalls/linux/sys_inotify.go @@ -40,7 +40,7 @@ func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. NonBlocking: flags&linux.IN_NONBLOCK != 0, } n := fs.NewFile(t, dirent, fileFlags, fs.NewInotify(t)) - defer n.DecRef() + defer n.DecRef(t) fd, err := t.NewFDFrom(0, n, kernel.FDFlags{ CloseOnExec: flags&linux.IN_CLOEXEC != 0, @@ -71,7 +71,7 @@ func fdToInotify(t *kernel.Task, fd int32) (*fs.Inotify, *fs.File, error) { ino, ok := file.FileOperations.(*fs.Inotify) if !ok { // Not an inotify fd. - file.DecRef() + file.DecRef(t) return nil, nil, syserror.EINVAL } @@ -98,7 +98,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern if err != nil { return 0, nil, err } - defer file.DecRef() + defer file.DecRef(t) path, _, err := copyInPath(t, addr, false /* allowEmpty */) if err != nil { @@ -128,6 +128,6 @@ func InotifyRmWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne if err != nil { return 0, nil, err } - defer file.DecRef() - return 0, nil, ino.RmWatch(wd) + defer file.DecRef(t) + return 0, nil, ino.RmWatch(t, wd) } diff --git a/pkg/sentry/syscalls/linux/sys_lseek.go b/pkg/sentry/syscalls/linux/sys_lseek.go index 3f7691eae..1c38f8f4f 100644 --- a/pkg/sentry/syscalls/linux/sys_lseek.go +++ b/pkg/sentry/syscalls/linux/sys_lseek.go @@ -33,7 +33,7 @@ func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) var sw fs.SeekWhence switch whence { diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index 91694d374..72786b032 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -75,7 +75,7 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC } defer func() { if opts.MappingIdentity != nil { - opts.MappingIdentity.DecRef() + opts.MappingIdentity.DecRef(t) } }() @@ -85,7 +85,7 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) flags := file.Flags() // mmap unconditionally requires that the FD is readable. diff --git a/pkg/sentry/syscalls/linux/sys_mount.go b/pkg/sentry/syscalls/linux/sys_mount.go index eb5ff48f5..bd0633564 100644 --- a/pkg/sentry/syscalls/linux/sys_mount.go +++ b/pkg/sentry/syscalls/linux/sys_mount.go @@ -115,7 +115,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall }); err != nil { // Something went wrong. Drop our ref on rootInode before // returning the error. - rootInode.DecRef() + rootInode.DecRef(t) return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/sys_pipe.go b/pkg/sentry/syscalls/linux/sys_pipe.go index 43c510930..3149e4aad 100644 --- a/pkg/sentry/syscalls/linux/sys_pipe.go +++ b/pkg/sentry/syscalls/linux/sys_pipe.go @@ -34,10 +34,10 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) { r, w := pipe.NewConnectedPipe(t, pipe.DefaultPipeSize, usermem.PageSize) r.SetFlags(linuxToFlags(flags).Settable()) - defer r.DecRef() + defer r.DecRef(t) w.SetFlags(linuxToFlags(flags).Settable()) - defer w.DecRef() + defer w.DecRef(t) fds, err := t.NewFDs(0, []*fs.File{r, w}, kernel.FDFlags{ CloseOnExec: flags&linux.O_CLOEXEC != 0, @@ -49,7 +49,7 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags uint) (uintptr, error) { if _, err := t.CopyOut(addr, fds); err != nil { for _, fd := range fds { if file, _ := t.FDTable().Remove(fd); file != nil { - file.DecRef() + file.DecRef(t) } } return 0, err diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index f0198141c..3435bdf77 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -70,7 +70,7 @@ func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan } if ch == nil { - defer file.DecRef() + defer file.DecRef(t) } else { state.file = file state.waiter, _ = waiter.NewChannelEntry(ch) @@ -82,11 +82,11 @@ func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan } // releaseState releases all the pollState in "state". -func releaseState(state []pollState) { +func releaseState(t *kernel.Task, state []pollState) { for i := range state { if state[i].file != nil { state[i].file.EventUnregister(&state[i].waiter) - state[i].file.DecRef() + state[i].file.DecRef(t) } } } @@ -107,7 +107,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time. // result, we stop registering for events but still go through all files // to get their ready masks. state := make([]pollState, len(pfd)) - defer releaseState(state) + defer releaseState(t, state) n := uintptr(0) for i := range pfd { initReadiness(t, &pfd[i], &state[i], ch) @@ -266,7 +266,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add if file == nil { return 0, syserror.EBADF } - file.DecRef() + file.DecRef(t) var mask int16 if (rV & m) != 0 { diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index f92bf8096..64a725296 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -128,7 +128,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // They trying to set exe to a non-file? if !fs.IsFile(file.Dirent.Inode.StableAttr) { @@ -136,7 +136,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } // Set the underlying executable. - t.MemoryManager().SetExecutable(fsbridge.NewFSFile(file)) + t.MemoryManager().SetExecutable(t, fsbridge.NewFSFile(file)) case linux.PR_SET_MM_AUXV, linux.PR_SET_MM_START_CODE, diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go index 071b4bacc..3bbc3fa4b 100644 --- a/pkg/sentry/syscalls/linux/sys_read.go +++ b/pkg/sentry/syscalls/linux/sys_read.go @@ -48,7 +48,7 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the file is readable. if !file.Flags().Read { @@ -84,7 +84,7 @@ func Readahead(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the file is readable. if !file.Flags().Read { @@ -118,7 +118,7 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { @@ -164,7 +164,7 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the file is readable. if !file.Flags().Read { @@ -195,7 +195,7 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate. if offset < 0 { @@ -244,7 +244,7 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate. if offset < -1 { diff --git a/pkg/sentry/syscalls/linux/sys_shm.go b/pkg/sentry/syscalls/linux/sys_shm.go index 4a8bc24a2..f0ae8fa8e 100644 --- a/pkg/sentry/syscalls/linux/sys_shm.go +++ b/pkg/sentry/syscalls/linux/sys_shm.go @@ -39,7 +39,7 @@ func Shmget(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, err } - defer segment.DecRef() + defer segment.DecRef(t) return uintptr(segment.ID), nil, nil } @@ -66,7 +66,7 @@ func Shmat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if err != nil { return 0, nil, syserror.EINVAL } - defer segment.DecRef() + defer segment.DecRef(t) opts, err := segment.ConfigureAttach(t, addr, shm.AttachOpts{ Execute: flag&linux.SHM_EXEC == linux.SHM_EXEC, @@ -108,7 +108,7 @@ func Shmctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, syserror.EINVAL } - defer segment.DecRef() + defer segment.DecRef(t) stat, err := segment.IPCStat(t) if err == nil { @@ -132,7 +132,7 @@ func Shmctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, syserror.EINVAL } - defer segment.DecRef() + defer segment.DecRef(t) switch cmd { case linux.IPC_SET: @@ -145,7 +145,7 @@ func Shmctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err case linux.IPC_RMID: - segment.MarkDestroyed() + segment.MarkDestroyed(t) return 0, nil, nil case linux.SHM_LOCK, linux.SHM_UNLOCK: diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go index d2b0012ae..20cb1a5cb 100644 --- a/pkg/sentry/syscalls/linux/sys_signal.go +++ b/pkg/sentry/syscalls/linux/sys_signal.go @@ -536,7 +536,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize ui if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Is this a signalfd? if s, ok := file.FileOperations.(*signalfd.SignalOperations); ok { @@ -553,7 +553,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize ui if err != nil { return 0, nil, err } - defer file.DecRef() + defer file.DecRef(t) // Set appropriate flags. file.SetFlags(fs.SettableFileFlags{ diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go index 414fce8e3..fec1c1974 100644 --- a/pkg/sentry/syscalls/linux/sys_socket.go +++ b/pkg/sentry/syscalls/linux/sys_socket.go @@ -200,7 +200,7 @@ func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal s.SetFlags(fs.SettableFileFlags{ NonBlocking: stype&linux.SOCK_NONBLOCK != 0, }) - defer s.DecRef() + defer s.DecRef(t) fd, err := t.NewFDFrom(0, s, kernel.FDFlags{ CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, @@ -235,8 +235,8 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy } s1.SetFlags(fileFlags) s2.SetFlags(fileFlags) - defer s1.DecRef() - defer s2.DecRef() + defer s1.DecRef(t) + defer s2.DecRef(t) // Create the FDs for the sockets. fds, err := t.NewFDs(0, []*fs.File{s1, s2}, kernel.FDFlags{ @@ -250,7 +250,7 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy if _, err := t.CopyOut(socks, fds); err != nil { for _, fd := range fds { if file, _ := t.FDTable().Remove(fd); file != nil { - file.DecRef() + file.DecRef(t) } } return 0, nil, err @@ -270,7 +270,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -301,7 +301,7 @@ func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, f if file == nil { return 0, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -360,7 +360,7 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -387,7 +387,7 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -416,7 +416,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -447,7 +447,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -529,7 +529,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -567,7 +567,7 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -595,7 +595,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -628,7 +628,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -681,7 +681,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -775,7 +775,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i } if !cms.Unix.Empty() { mflags |= linux.MSG_CTRUNC - cms.Release() + cms.Release(t) } if int(msg.Flags) != mflags { @@ -795,7 +795,7 @@ func recvSingleMsg(t *kernel.Task, s socket.Socket, msgPtr usermem.Addr, flags i if e != nil { return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS) } - defer cms.Release() + defer cms.Release(t) controlData := make([]byte, 0, msg.ControlLen) controlData = control.PackControlMessages(t, cms, controlData) @@ -851,7 +851,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flag if file == nil { return 0, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -880,7 +880,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flag } n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0) - cm.Release() + cm.Release(t) if e != nil { return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS) } @@ -924,7 +924,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -962,7 +962,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) @@ -1066,7 +1066,7 @@ func sendSingleMsg(t *kernel.Task, s socket.Socket, file *fs.File, msgPtr userme n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages) err = handleIOError(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendmsg", file) if err != nil { - controlMessages.Release() + controlMessages.Release(t) } return uintptr(n), err } @@ -1084,7 +1084,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags if file == nil { return 0, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.FileOperations.(socket.Socket) diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index 77c78889d..b8846a10a 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -101,7 +101,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if inFile == nil { return 0, nil, syserror.EBADF } - defer inFile.DecRef() + defer inFile.DecRef(t) if !inFile.Flags().Read { return 0, nil, syserror.EBADF @@ -111,7 +111,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if outFile == nil { return 0, nil, syserror.EBADF } - defer outFile.DecRef() + defer outFile.DecRef(t) if !outFile.Flags().Write { return 0, nil, syserror.EBADF @@ -192,13 +192,13 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if outFile == nil { return 0, nil, syserror.EBADF } - defer outFile.DecRef() + defer outFile.DecRef(t) inFile := t.GetFile(inFD) if inFile == nil { return 0, nil, syserror.EBADF } - defer inFile.DecRef() + defer inFile.DecRef(t) // The operation is non-blocking if anything is non-blocking. // @@ -300,13 +300,13 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo if outFile == nil { return 0, nil, syserror.EBADF } - defer outFile.DecRef() + defer outFile.DecRef(t) inFile := t.GetFile(inFD) if inFile == nil { return 0, nil, syserror.EBADF } - defer inFile.DecRef() + defer inFile.DecRef(t) // All files must be pipes. if !fs.IsPipe(inFile.Dirent.Inode.StableAttr) || !fs.IsPipe(outFile.Dirent.Inode.StableAttr) { diff --git a/pkg/sentry/syscalls/linux/sys_stat.go b/pkg/sentry/syscalls/linux/sys_stat.go index 46ebf27a2..a5826f2dd 100644 --- a/pkg/sentry/syscalls/linux/sys_stat.go +++ b/pkg/sentry/syscalls/linux/sys_stat.go @@ -58,7 +58,7 @@ func Fstatat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) return 0, nil, fstat(t, file, statAddr) } @@ -100,7 +100,7 @@ func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) return 0, nil, fstat(t, file, statAddr) } @@ -158,7 +158,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) uattr, err := file.UnstableAttr(t) if err != nil { return 0, nil, err @@ -249,7 +249,7 @@ func Fstatfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) return 0, nil, statfsImpl(t, file.Dirent, statfsAddr) } diff --git a/pkg/sentry/syscalls/linux/sys_sync.go b/pkg/sentry/syscalls/linux/sys_sync.go index 5ad465ae3..f2c0e5069 100644 --- a/pkg/sentry/syscalls/linux/sys_sync.go +++ b/pkg/sentry/syscalls/linux/sys_sync.go @@ -39,7 +39,7 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Use "sync-the-world" for now, it's guaranteed that fd is at least // on the root filesystem. @@ -54,7 +54,7 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) err := file.Fsync(t, 0, fs.FileMaxOffset, fs.SyncAll) return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS) @@ -70,7 +70,7 @@ func Fdatasync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) err := file.Fsync(t, 0, fs.FileMaxOffset, fs.SyncData) return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS) @@ -103,7 +103,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // SYNC_FILE_RANGE_WAIT_BEFORE waits upon write-out of all pages in the // specified range that have already been submitted to the device diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 00915fdde..2d16e4933 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -117,7 +117,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user resolveFinal := flags&linux.AT_SYMLINK_NOFOLLOW == 0 root := t.FSContext().RootDirectory() - defer root.DecRef() + defer root.DecRef(t) var wd *fs.Dirent var executable fsbridge.File @@ -133,7 +133,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user if f == nil { return 0, nil, syserror.EBADF } - defer f.DecRef() + defer f.DecRef(t) closeOnExec = fdFlags.CloseOnExec if atEmptyPath && len(pathname) == 0 { @@ -155,7 +155,7 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user } } if wd != nil { - defer wd.DecRef() + defer wd.DecRef(t) } // Load the new TaskContext. diff --git a/pkg/sentry/syscalls/linux/sys_timerfd.go b/pkg/sentry/syscalls/linux/sys_timerfd.go index cf49b43db..34b03e4ee 100644 --- a/pkg/sentry/syscalls/linux/sys_timerfd.go +++ b/pkg/sentry/syscalls/linux/sys_timerfd.go @@ -43,7 +43,7 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel return 0, nil, syserror.EINVAL } f := timerfd.NewFile(t, c) - defer f.DecRef() + defer f.DecRef(t) f.SetFlags(fs.SettableFileFlags{ NonBlocking: flags&linux.TFD_NONBLOCK != 0, }) @@ -73,7 +73,7 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne if f == nil { return 0, nil, syserror.EBADF } - defer f.DecRef() + defer f.DecRef(t) tf, ok := f.FileOperations.(*timerfd.TimerOperations) if !ok { @@ -107,7 +107,7 @@ func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne if f == nil { return 0, nil, syserror.EBADF } - defer f.DecRef() + defer f.DecRef(t) tf, ok := f.FileOperations.(*timerfd.TimerOperations) if !ok { diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go index 6ec0de96e..485526e28 100644 --- a/pkg/sentry/syscalls/linux/sys_write.go +++ b/pkg/sentry/syscalls/linux/sys_write.go @@ -48,7 +48,7 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the file is writable. if !file.Flags().Write { @@ -85,7 +85,7 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { @@ -131,7 +131,7 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the file is writable. if !file.Flags().Write { @@ -162,7 +162,7 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate. if offset < 0 { @@ -215,7 +215,7 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate. if offset < -1 { diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index c24946160..97474fd3c 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -49,7 +49,7 @@ func FGetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if f == nil { return 0, nil, syserror.EBADF } - defer f.DecRef() + defer f.DecRef(t) n, err := getXattr(t, f.Dirent, nameAddr, valueAddr, size) if err != nil { @@ -153,7 +153,7 @@ func FSetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if f == nil { return 0, nil, syserror.EBADF } - defer f.DecRef() + defer f.DecRef(t) return 0, nil, setXattr(t, f.Dirent, nameAddr, valueAddr, uint64(size), flags) } @@ -270,7 +270,7 @@ func FListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy if f == nil { return 0, nil, syserror.EBADF } - defer f.DecRef() + defer f.DecRef(t) n, err := listXattr(t, f.Dirent, listAddr, size) if err != nil { @@ -384,7 +384,7 @@ func FRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. if f == nil { return 0, nil, syserror.EBADF } - defer f.DecRef() + defer f.DecRef(t) return 0, nil, removeXattr(t, f.Dirent, nameAddr) } diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go index e5cdefc50..399b4f60c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/aio.go +++ b/pkg/sentry/syscalls/linux/vfs2/aio.go @@ -88,7 +88,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr user if fd == nil { return syserror.EBADF } - defer fd.DecRef() + defer fd.DecRef(t) // Was there an eventFD? Extract it. var eventFD *vfs.FileDescription @@ -97,7 +97,7 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr user if eventFD == nil { return syserror.EBADF } - defer eventFD.DecRef() + defer eventFD.DecRef(t) // Check that it is an eventfd. if _, ok := eventFD.Impl().(*eventfd.EventFileDescription); !ok { @@ -169,7 +169,7 @@ func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr use ev.Result = -int64(kernel.ExtractErrno(err, 0)) } - fd.DecRef() + fd.DecRef(ctx) // Queue the result for delivery. aioCtx.FinishRequest(ev) @@ -179,7 +179,7 @@ func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr use // wake up. if eventFD != nil { eventFD.Impl().(*eventfd.EventFileDescription).Signal(1) - eventFD.DecRef() + eventFD.DecRef(ctx) } } } diff --git a/pkg/sentry/syscalls/linux/vfs2/epoll.go b/pkg/sentry/syscalls/linux/vfs2/epoll.go index 34c90ae3e..c62f03509 100644 --- a/pkg/sentry/syscalls/linux/vfs2/epoll.go +++ b/pkg/sentry/syscalls/linux/vfs2/epoll.go @@ -37,11 +37,11 @@ func EpollCreate1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. return 0, nil, syserror.EINVAL } - file, err := t.Kernel().VFS().NewEpollInstanceFD() + file, err := t.Kernel().VFS().NewEpollInstanceFD(t) if err != nil { return 0, nil, err } - defer file.DecRef() + defer file.DecRef(t) fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{ CloseOnExec: flags&linux.EPOLL_CLOEXEC != 0, @@ -62,11 +62,11 @@ func EpollCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S return 0, nil, syserror.EINVAL } - file, err := t.Kernel().VFS().NewEpollInstanceFD() + file, err := t.Kernel().VFS().NewEpollInstanceFD(t) if err != nil { return 0, nil, err } - defer file.DecRef() + defer file.DecRef(t) fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{}) if err != nil { @@ -86,7 +86,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if epfile == nil { return 0, nil, syserror.EBADF } - defer epfile.DecRef() + defer epfile.DecRef(t) ep, ok := epfile.Impl().(*vfs.EpollInstance) if !ok { return 0, nil, syserror.EINVAL @@ -95,7 +95,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) if epfile == file { return 0, nil, syserror.EINVAL } @@ -135,7 +135,7 @@ func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if epfile == nil { return 0, nil, syserror.EBADF } - defer epfile.DecRef() + defer epfile.DecRef(t) ep, ok := epfile.Impl().(*vfs.EpollInstance) if !ok { return 0, nil, syserror.EINVAL diff --git a/pkg/sentry/syscalls/linux/vfs2/eventfd.go b/pkg/sentry/syscalls/linux/vfs2/eventfd.go index aff1a2070..807f909da 100644 --- a/pkg/sentry/syscalls/linux/vfs2/eventfd.go +++ b/pkg/sentry/syscalls/linux/vfs2/eventfd.go @@ -38,11 +38,11 @@ func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc fileFlags |= linux.O_NONBLOCK } semMode := flags&linux.EFD_SEMAPHORE != 0 - eventfd, err := eventfd.New(vfsObj, initVal, semMode, fileFlags) + eventfd, err := eventfd.New(t, vfsObj, initVal, semMode, fileFlags) if err != nil { return 0, nil, err } - defer eventfd.DecRef() + defer eventfd.DecRef(t) fd, err := t.NewFDFromVFS2(0, eventfd, kernel.FDFlags{ CloseOnExec: flags&linux.EFD_CLOEXEC != 0, diff --git a/pkg/sentry/syscalls/linux/vfs2/execve.go b/pkg/sentry/syscalls/linux/vfs2/execve.go index aef0078a8..066ee0863 100644 --- a/pkg/sentry/syscalls/linux/vfs2/execve.go +++ b/pkg/sentry/syscalls/linux/vfs2/execve.go @@ -71,7 +71,7 @@ func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr user } root := t.FSContext().RootDirectoryVFS2() - defer root.DecRef() + defer root.DecRef(t) var executable fsbridge.File closeOnExec := false if path := fspath.Parse(pathname); dirfd != linux.AT_FDCWD && !path.Absolute { @@ -90,7 +90,7 @@ func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr user } start := dirfile.VirtualDentry() start.IncRef() - dirfile.DecRef() + dirfile.DecRef(t) closeOnExec = dirfileFlags.CloseOnExec file, err := t.Kernel().VFS().OpenAt(t, t.Credentials(), &vfs.PathOperation{ Root: root, @@ -101,19 +101,19 @@ func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr user Flags: linux.O_RDONLY, FileExec: true, }) - start.DecRef() + start.DecRef(t) if err != nil { return 0, nil, err } - defer file.DecRef() + defer file.DecRef(t) executable = fsbridge.NewVFSFile(file) } // Load the new TaskContext. mntns := t.MountNamespaceVFS2() // FIXME(jamieliu): useless refcount change - defer mntns.DecRef() + defer mntns.DecRef(t) wd := t.FSContext().WorkingDirectoryVFS2() - defer wd.DecRef() + defer wd.DecRef(t) remainingTraversals := uint(linux.MaxSymlinkTraversals) loadArgs := loader.LoadArgs{ Opener: fsbridge.NewVFSLookup(mntns, root, wd), diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 67f191551..72ca916a0 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -38,7 +38,7 @@ func Close(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) err := file.OnClose(t) return 0, nil, slinux.HandleIOErrorVFS2(t, false /* partial */, err, syserror.EINTR, "close", file) @@ -52,7 +52,7 @@ func Dup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) newFD, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{}) if err != nil { @@ -72,7 +72,7 @@ func Dup2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if file == nil { return 0, nil, syserror.EBADF } - file.DecRef() + file.DecRef(t) return uintptr(newfd), nil, nil } @@ -101,7 +101,7 @@ func dup3(t *kernel.Task, oldfd, newfd int32, flags uint32) (uintptr, *kernel.Sy if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) err := t.NewFDAtVFS2(newfd, file, kernel.FDFlags{ CloseOnExec: flags&linux.O_CLOEXEC != 0, @@ -121,7 +121,7 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) switch cmd { case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: @@ -332,7 +332,7 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // If the FD refers to a pipe or FIFO, return error. if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe { diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go index b6d2ddd65..01e0f9010 100644 --- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go +++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go @@ -56,7 +56,7 @@ func linkat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd i if err != nil { return err } - defer oldtpop.Release() + defer oldtpop.Release(t) newpath, err := copyInPath(t, newpathAddr) if err != nil { @@ -66,7 +66,7 @@ func linkat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd i if err != nil { return err } - defer newtpop.Release() + defer newtpop.Release(t) return t.Kernel().VFS().LinkAt(t, t.Credentials(), &oldtpop.pop, &newtpop.pop) } @@ -95,7 +95,7 @@ func mkdirat(t *kernel.Task, dirfd int32, addr usermem.Addr, mode uint) error { if err != nil { return err } - defer tpop.Release() + defer tpop.Release(t) return t.Kernel().VFS().MkdirAt(t, t.Credentials(), &tpop.pop, &vfs.MkdirOptions{ Mode: linux.FileMode(mode & (0777 | linux.S_ISVTX) &^ t.FSContext().Umask()), }) @@ -127,7 +127,7 @@ func mknodat(t *kernel.Task, dirfd int32, addr usermem.Addr, mode linux.FileMode if err != nil { return err } - defer tpop.Release() + defer tpop.Release(t) // "Zero file type is equivalent to type S_IFREG." - mknod(2) if mode.FileType() == 0 { @@ -174,7 +174,7 @@ func openat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, flags uint32, mo if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) file, err := t.Kernel().VFS().OpenAt(t, t.Credentials(), &tpop.pop, &vfs.OpenOptions{ Flags: flags | linux.O_LARGEFILE, @@ -183,7 +183,7 @@ func openat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, flags uint32, mo if err != nil { return 0, nil, err } - defer file.DecRef() + defer file.DecRef(t) fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{ CloseOnExec: flags&linux.O_CLOEXEC != 0, @@ -227,7 +227,7 @@ func renameat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd if err != nil { return err } - defer oldtpop.Release() + defer oldtpop.Release(t) newpath, err := copyInPath(t, newpathAddr) if err != nil { @@ -237,7 +237,7 @@ func renameat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd if err != nil { return err } - defer newtpop.Release() + defer newtpop.Release(t) return t.Kernel().VFS().RenameAt(t, t.Credentials(), &oldtpop.pop, &newtpop.pop, &vfs.RenameOptions{ Flags: flags, @@ -259,7 +259,7 @@ func rmdirat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr) error { if err != nil { return err } - defer tpop.Release() + defer tpop.Release(t) return t.Kernel().VFS().RmdirAt(t, t.Credentials(), &tpop.pop) } @@ -278,7 +278,7 @@ func unlinkat(t *kernel.Task, dirfd int32, pathAddr usermem.Addr) error { if err != nil { return err } - defer tpop.Release() + defer tpop.Release(t) return t.Kernel().VFS().UnlinkAt(t, t.Credentials(), &tpop.pop) } @@ -329,6 +329,6 @@ func symlinkat(t *kernel.Task, targetAddr usermem.Addr, newdirfd int32, linkpath if err != nil { return err } - defer tpop.Release() + defer tpop.Release(t) return t.Kernel().VFS().SymlinkAt(t, t.Credentials(), &tpop.pop, target) } diff --git a/pkg/sentry/syscalls/linux/vfs2/fscontext.go b/pkg/sentry/syscalls/linux/vfs2/fscontext.go index 317409a18..a7d4d2a36 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fscontext.go +++ b/pkg/sentry/syscalls/linux/vfs2/fscontext.go @@ -31,8 +31,8 @@ func Getcwd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal root := t.FSContext().RootDirectoryVFS2() wd := t.FSContext().WorkingDirectoryVFS2() s, err := t.Kernel().VFS().PathnameForGetcwd(t, root, wd) - root.DecRef() - wd.DecRef() + root.DecRef(t) + wd.DecRef(t) if err != nil { return 0, nil, err } @@ -67,7 +67,7 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) vd, err := t.Kernel().VFS().GetDentryAt(t, t.Credentials(), &tpop.pop, &vfs.GetDentryOptions{ CheckSearchable: true, @@ -75,8 +75,8 @@ func Chdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if err != nil { return 0, nil, err } - t.FSContext().SetWorkingDirectoryVFS2(vd) - vd.DecRef() + t.FSContext().SetWorkingDirectoryVFS2(t, vd) + vd.DecRef(t) return 0, nil, nil } @@ -88,7 +88,7 @@ func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) vd, err := t.Kernel().VFS().GetDentryAt(t, t.Credentials(), &tpop.pop, &vfs.GetDentryOptions{ CheckSearchable: true, @@ -96,8 +96,8 @@ func Fchdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, err } - t.FSContext().SetWorkingDirectoryVFS2(vd) - vd.DecRef() + t.FSContext().SetWorkingDirectoryVFS2(t, vd) + vd.DecRef(t) return 0, nil, nil } @@ -117,7 +117,7 @@ func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) vd, err := t.Kernel().VFS().GetDentryAt(t, t.Credentials(), &tpop.pop, &vfs.GetDentryOptions{ CheckSearchable: true, @@ -125,7 +125,7 @@ func Chroot(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, err } - t.FSContext().SetRootDirectoryVFS2(vd) - vd.DecRef() + t.FSContext().SetRootDirectoryVFS2(t, vd) + vd.DecRef(t) return 0, nil, nil } diff --git a/pkg/sentry/syscalls/linux/vfs2/getdents.go b/pkg/sentry/syscalls/linux/vfs2/getdents.go index c7c7bf7ce..5517595b5 100644 --- a/pkg/sentry/syscalls/linux/vfs2/getdents.go +++ b/pkg/sentry/syscalls/linux/vfs2/getdents.go @@ -44,7 +44,7 @@ func getdents(t *kernel.Task, args arch.SyscallArguments, isGetdents64 bool) (ui if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) cb := getGetdentsCallback(t, addr, size, isGetdents64) err := file.IterDirents(t, cb) diff --git a/pkg/sentry/syscalls/linux/vfs2/inotify.go b/pkg/sentry/syscalls/linux/vfs2/inotify.go index 5d98134a5..11753d8e5 100644 --- a/pkg/sentry/syscalls/linux/vfs2/inotify.go +++ b/pkg/sentry/syscalls/linux/vfs2/inotify.go @@ -35,7 +35,7 @@ func InotifyInit1(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. if err != nil { return 0, nil, err } - defer ino.DecRef() + defer ino.DecRef(t) fd, err := t.NewFDFromVFS2(0, ino, kernel.FDFlags{ CloseOnExec: flags&linux.IN_CLOEXEC != 0, @@ -66,7 +66,7 @@ func fdToInotify(t *kernel.Task, fd int32) (*vfs.Inotify, *vfs.FileDescription, ino, ok := f.Impl().(*vfs.Inotify) if !ok { // Not an inotify fd. - f.DecRef() + f.DecRef(t) return nil, nil, syserror.EINVAL } @@ -96,7 +96,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern if err != nil { return 0, nil, err } - defer f.DecRef() + defer f.DecRef(t) path, err := copyInPath(t, addr) if err != nil { @@ -109,12 +109,12 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) d, err := t.Kernel().VFS().GetDentryAt(t, t.Credentials(), &tpop.pop, &vfs.GetDentryOptions{}) if err != nil { return 0, nil, err } - defer d.DecRef() + defer d.DecRef(t) fd, err = ino.AddWatch(d.Dentry(), mask) if err != nil { @@ -132,6 +132,6 @@ func InotifyRmWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne if err != nil { return 0, nil, err } - defer f.DecRef() - return 0, nil, ino.RmWatch(wd) + defer f.DecRef(t) + return 0, nil, ino.RmWatch(t, wd) } diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go index fd6ab94b2..38778a388 100644 --- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go +++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go @@ -29,7 +29,7 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Handle ioctls that apply to all FDs. switch args[1].Int() { diff --git a/pkg/sentry/syscalls/linux/vfs2/lock.go b/pkg/sentry/syscalls/linux/vfs2/lock.go index bf19028c4..b910b5a74 100644 --- a/pkg/sentry/syscalls/linux/vfs2/lock.go +++ b/pkg/sentry/syscalls/linux/vfs2/lock.go @@ -32,7 +32,7 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // flock(2): EBADF fd is not an open file descriptor. return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) nonblocking := operation&linux.LOCK_NB != 0 operation &^= linux.LOCK_NB diff --git a/pkg/sentry/syscalls/linux/vfs2/memfd.go b/pkg/sentry/syscalls/linux/vfs2/memfd.go index bbe248d17..519583e4e 100644 --- a/pkg/sentry/syscalls/linux/vfs2/memfd.go +++ b/pkg/sentry/syscalls/linux/vfs2/memfd.go @@ -47,7 +47,7 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S } shmMount := t.Kernel().ShmMount() - file, err := tmpfs.NewMemfd(shmMount, t.Credentials(), allowSeals, memfdPrefix+name) + file, err := tmpfs.NewMemfd(t, t.Credentials(), shmMount, allowSeals, memfdPrefix+name) if err != nil { return 0, nil, err } diff --git a/pkg/sentry/syscalls/linux/vfs2/mmap.go b/pkg/sentry/syscalls/linux/vfs2/mmap.go index 60a43f0a0..dc05c2994 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mmap.go +++ b/pkg/sentry/syscalls/linux/vfs2/mmap.go @@ -61,7 +61,7 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC } defer func() { if opts.MappingIdentity != nil { - opts.MappingIdentity.DecRef() + opts.MappingIdentity.DecRef(t) } }() @@ -71,7 +71,7 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // mmap unconditionally requires that the FD is readable. if !file.IsReadable() { diff --git a/pkg/sentry/syscalls/linux/vfs2/mount.go b/pkg/sentry/syscalls/linux/vfs2/mount.go index ea337de7c..4bd5c7ca2 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mount.go +++ b/pkg/sentry/syscalls/linux/vfs2/mount.go @@ -108,7 +108,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if err != nil { return 0, nil, err } - defer target.Release() + defer target.Release(t) return 0, nil, t.Kernel().VFS().MountAt(t, creds, source, &target.pop, fsType, &opts) } @@ -140,7 +140,7 @@ func Umount2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) opts := vfs.UmountOptions{ Flags: uint32(flags), diff --git a/pkg/sentry/syscalls/linux/vfs2/path.go b/pkg/sentry/syscalls/linux/vfs2/path.go index 97da6c647..90a511d9a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/path.go +++ b/pkg/sentry/syscalls/linux/vfs2/path.go @@ -42,7 +42,7 @@ func getTaskPathOperation(t *kernel.Task, dirfd int32, path fspath.Path, shouldA haveStartRef := false if !path.Absolute { if !path.HasComponents() && !bool(shouldAllowEmptyPath) { - root.DecRef() + root.DecRef(t) return taskPathOperation{}, syserror.ENOENT } if dirfd == linux.AT_FDCWD { @@ -51,13 +51,13 @@ func getTaskPathOperation(t *kernel.Task, dirfd int32, path fspath.Path, shouldA } else { dirfile := t.GetFileVFS2(dirfd) if dirfile == nil { - root.DecRef() + root.DecRef(t) return taskPathOperation{}, syserror.EBADF } start = dirfile.VirtualDentry() start.IncRef() haveStartRef = true - dirfile.DecRef() + dirfile.DecRef(t) } } return taskPathOperation{ @@ -71,10 +71,10 @@ func getTaskPathOperation(t *kernel.Task, dirfd int32, path fspath.Path, shouldA }, nil } -func (tpop *taskPathOperation) Release() { - tpop.pop.Root.DecRef() +func (tpop *taskPathOperation) Release(t *kernel.Task) { + tpop.pop.Root.DecRef(t) if tpop.haveStartRef { - tpop.pop.Start.DecRef() + tpop.pop.Start.DecRef(t) tpop.haveStartRef = false } } diff --git a/pkg/sentry/syscalls/linux/vfs2/pipe.go b/pkg/sentry/syscalls/linux/vfs2/pipe.go index 4a01e4209..9b4848d9e 100644 --- a/pkg/sentry/syscalls/linux/vfs2/pipe.go +++ b/pkg/sentry/syscalls/linux/vfs2/pipe.go @@ -42,8 +42,8 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags int32) error { return syserror.EINVAL } r, w := pipefs.NewConnectedPipeFDs(t, t.Kernel().PipeMount(), uint32(flags&linux.O_NONBLOCK)) - defer r.DecRef() - defer w.DecRef() + defer r.DecRef(t) + defer w.DecRef(t) fds, err := t.NewFDsVFS2(0, []*vfs.FileDescription{r, w}, kernel.FDFlags{ CloseOnExec: flags&linux.O_CLOEXEC != 0, @@ -54,7 +54,7 @@ func pipe2(t *kernel.Task, addr usermem.Addr, flags int32) error { if _, err := t.CopyOut(addr, fds); err != nil { for _, fd := range fds { if _, file := t.FDTable().Remove(fd); file != nil { - file.DecRef() + file.DecRef(t) } } return err diff --git a/pkg/sentry/syscalls/linux/vfs2/poll.go b/pkg/sentry/syscalls/linux/vfs2/poll.go index ff1b25d7b..7b9d5e18a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/poll.go +++ b/pkg/sentry/syscalls/linux/vfs2/poll.go @@ -73,7 +73,7 @@ func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan } if ch == nil { - defer file.DecRef() + defer file.DecRef(t) } else { state.file = file state.waiter, _ = waiter.NewChannelEntry(ch) @@ -85,11 +85,11 @@ func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan } // releaseState releases all the pollState in "state". -func releaseState(state []pollState) { +func releaseState(t *kernel.Task, state []pollState) { for i := range state { if state[i].file != nil { state[i].file.EventUnregister(&state[i].waiter) - state[i].file.DecRef() + state[i].file.DecRef(t) } } } @@ -110,7 +110,7 @@ func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time. // result, we stop registering for events but still go through all files // to get their ready masks. state := make([]pollState, len(pfd)) - defer releaseState(state) + defer releaseState(t, state) n := uintptr(0) for i := range pfd { initReadiness(t, &pfd[i], &state[i], ch) @@ -269,7 +269,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add if file == nil { return 0, syserror.EBADF } - file.DecRef() + file.DecRef(t) var mask int16 if (rV & m) != 0 { diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go index cd25597a7..a905dae0a 100644 --- a/pkg/sentry/syscalls/linux/vfs2/read_write.go +++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go @@ -44,7 +44,7 @@ func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the size is legitimate. si := int(size) @@ -75,7 +75,7 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Get the destination of the read. dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ @@ -94,7 +94,7 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt n, err := file.Read(t, dst, opts) if err != syserror.ErrWouldBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -102,7 +102,7 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -135,7 +135,7 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt file.EventUnregister(&w) if total > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) } return total, err } @@ -151,7 +151,7 @@ func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { @@ -188,7 +188,7 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate. if offset < 0 { @@ -226,7 +226,7 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate. if offset < -1 { @@ -258,7 +258,7 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of n, err := file.PRead(t, dst, offset, opts) if err != syserror.ErrWouldBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -266,7 +266,7 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -299,7 +299,7 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of file.EventUnregister(&w) if total > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) } return total, err } @@ -314,7 +314,7 @@ func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the size is legitimate. si := int(size) @@ -345,7 +345,7 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Get the source of the write. src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ @@ -364,7 +364,7 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op n, err := file.Write(t, src, opts) if err != syserror.ErrWouldBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) } return n, err } @@ -372,7 +372,7 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) } return n, err } @@ -405,7 +405,7 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op file.EventUnregister(&w) if total > 0 { - file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) } return total, err } @@ -421,7 +421,7 @@ func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate and does not overflow. if offset < 0 || offset+int64(size) < 0 { @@ -458,7 +458,7 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate. if offset < 0 { @@ -496,7 +496,7 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the offset is legitimate. if offset < -1 { @@ -528,7 +528,7 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o n, err := file.PWrite(t, src, offset, opts) if err != syserror.ErrWouldBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) } return n, err } @@ -536,7 +536,7 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { if n > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) } return n, err } @@ -569,7 +569,7 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o file.EventUnregister(&w) if total > 0 { - file.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) } return total, err } @@ -601,7 +601,7 @@ func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) newoff, err := file.Seek(t, offset, whence) return uintptr(newoff), nil, err @@ -617,7 +617,7 @@ func Readahead(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Check that the file is readable. if !file.IsReadable() { diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go index 25cdb7a55..5e6eb13ba 100644 --- a/pkg/sentry/syscalls/linux/vfs2/setstat.go +++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go @@ -66,7 +66,7 @@ func Fchmod(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) return 0, nil, file.SetStat(t, vfs.SetStatOptions{ Stat: linux.Statx{ @@ -151,7 +151,7 @@ func Fchown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) var opts vfs.SetStatOptions if err := populateSetStatOptionsForChown(t, owner, group, &opts); err != nil { @@ -197,7 +197,7 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) if !file.IsWritable() { return 0, nil, syserror.EINVAL @@ -224,7 +224,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) if !file.IsWritable() { return 0, nil, syserror.EBADF @@ -258,7 +258,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, err } - file.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) return 0, nil, nil } @@ -438,7 +438,7 @@ func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr usermem.Addr, op func setstatat(t *kernel.Task, dirfd int32, path fspath.Path, shouldAllowEmptyPath shouldAllowEmptyPath, shouldFollowFinalSymlink shouldFollowFinalSymlink, opts *vfs.SetStatOptions) error { root := t.FSContext().RootDirectoryVFS2() - defer root.DecRef() + defer root.DecRef(t) start := root if !path.Absolute { if !path.HasComponents() && !bool(shouldAllowEmptyPath) { @@ -446,7 +446,7 @@ func setstatat(t *kernel.Task, dirfd int32, path fspath.Path, shouldAllowEmptyPa } if dirfd == linux.AT_FDCWD { start = t.FSContext().WorkingDirectoryVFS2() - defer start.DecRef() + defer start.DecRef(t) } else { dirfile := t.GetFileVFS2(dirfd) if dirfile == nil { @@ -457,13 +457,13 @@ func setstatat(t *kernel.Task, dirfd int32, path fspath.Path, shouldAllowEmptyPa // VirtualFilesystem.SetStatAt(), since the former may be able // to use opened file state to expedite the SetStat. err := dirfile.SetStat(t, *opts) - dirfile.DecRef() + dirfile.DecRef(t) return err } start = dirfile.VirtualDentry() start.IncRef() - defer start.DecRef() - dirfile.DecRef() + defer start.DecRef(t) + dirfile.DecRef(t) } } return t.Kernel().VFS().SetStatAt(t, t.Credentials(), &vfs.PathOperation{ diff --git a/pkg/sentry/syscalls/linux/vfs2/signal.go b/pkg/sentry/syscalls/linux/vfs2/signal.go index 623992f6f..b89f34cdb 100644 --- a/pkg/sentry/syscalls/linux/vfs2/signal.go +++ b/pkg/sentry/syscalls/linux/vfs2/signal.go @@ -45,7 +45,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize ui if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Is this a signalfd? if sfd, ok := file.Impl().(*signalfd.SignalFileDescription); ok { @@ -68,7 +68,7 @@ func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize ui if err != nil { return 0, nil, err } - defer file.DecRef() + defer file.DecRef(t) // Create a new descriptor. fd, err = t.NewFDFromVFS2(0, file, kernel.FDFlags{ diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go index 8096a8f9c..4a68c64f3 100644 --- a/pkg/sentry/syscalls/linux/vfs2/socket.go +++ b/pkg/sentry/syscalls/linux/vfs2/socket.go @@ -196,7 +196,7 @@ func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if e != nil { return 0, nil, e.ToError() } - defer s.DecRef() + defer s.DecRef(t) if err := s.SetStatusFlags(t, t.Credentials(), uint32(stype&linux.SOCK_NONBLOCK)); err != nil { return 0, nil, err @@ -230,8 +230,8 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy return 0, nil, e.ToError() } // Adding to the FD table will cause an extra reference to be acquired. - defer s1.DecRef() - defer s2.DecRef() + defer s1.DecRef(t) + defer s2.DecRef(t) nonblocking := uint32(stype & linux.SOCK_NONBLOCK) if err := s1.SetStatusFlags(t, t.Credentials(), nonblocking); err != nil { @@ -253,7 +253,7 @@ func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy if _, err := t.CopyOut(addr, fds); err != nil { for _, fd := range fds { if _, file := t.FDTable().Remove(fd); file != nil { - file.DecRef() + file.DecRef(t) } } return 0, nil, err @@ -273,7 +273,7 @@ func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -304,7 +304,7 @@ func accept(t *kernel.Task, fd int32, addr usermem.Addr, addrLen usermem.Addr, f if file == nil { return 0, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -363,7 +363,7 @@ func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -390,7 +390,7 @@ func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -419,7 +419,7 @@ func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -450,7 +450,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -532,7 +532,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -570,7 +570,7 @@ func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -598,7 +598,7 @@ func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -631,7 +631,7 @@ func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -684,7 +684,7 @@ func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -778,7 +778,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla } if !cms.Unix.Empty() { mflags |= linux.MSG_CTRUNC - cms.Release() + cms.Release(t) } if int(msg.Flags) != mflags { @@ -798,7 +798,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla if e != nil { return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS) } - defer cms.Release() + defer cms.Release(t) controlData := make([]byte, 0, msg.ControlLen) controlData = control.PackControlMessages(t, cms, controlData) @@ -854,7 +854,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flag if file == nil { return 0, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -883,7 +883,7 @@ func recvFrom(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flag } n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0) - cm.Release() + cm.Release(t) if e != nil { return 0, syserror.ConvertIntr(e.ToError(), kernel.ERESTARTSYS) } @@ -927,7 +927,7 @@ func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -965,7 +965,7 @@ func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) @@ -1069,7 +1069,7 @@ func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescriptio n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages) err = slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), kernel.ERESTARTSYS, "sendmsg", file) if err != nil { - controlMessages.Release() + controlMessages.Release(t) } return uintptr(n), err } @@ -1087,7 +1087,7 @@ func sendTo(t *kernel.Task, fd int32, bufPtr usermem.Addr, bufLen uint64, flags if file == nil { return 0, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // Extract the socket. s, ok := file.Impl().(socket.SocketVFS2) diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 63ab11f8c..16f59fce9 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -53,12 +53,12 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if inFile == nil { return 0, nil, syserror.EBADF } - defer inFile.DecRef() + defer inFile.DecRef(t) outFile := t.GetFileVFS2(outFD) if outFile == nil { return 0, nil, syserror.EBADF } - defer outFile.DecRef() + defer outFile.DecRef(t) // Check that both files support the required directionality. if !inFile.IsReadable() || !outFile.IsWritable() { @@ -175,7 +175,7 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal // On Linux, inotify behavior is not very consistent with splice(2). We try // our best to emulate Linux for very basic calls to splice, where for some // reason, events are generated for output files, but not input files. - outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) return uintptr(n), nil, nil } @@ -203,12 +203,12 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo if inFile == nil { return 0, nil, syserror.EBADF } - defer inFile.DecRef() + defer inFile.DecRef(t) outFile := t.GetFileVFS2(outFD) if outFile == nil { return 0, nil, syserror.EBADF } - defer outFile.DecRef() + defer outFile.DecRef(t) // Check that both files support the required directionality. if !inFile.IsReadable() || !outFile.IsWritable() { @@ -251,7 +251,7 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo if n == 0 { return 0, nil, err } - outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) return uintptr(n), nil, nil } @@ -266,7 +266,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if inFile == nil { return 0, nil, syserror.EBADF } - defer inFile.DecRef() + defer inFile.DecRef(t) if !inFile.IsReadable() { return 0, nil, syserror.EBADF } @@ -275,7 +275,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if outFile == nil { return 0, nil, syserror.EBADF } - defer outFile.DecRef() + defer outFile.DecRef(t) if !outFile.IsWritable() { return 0, nil, syserror.EBADF } @@ -419,8 +419,8 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc return 0, nil, err } - inFile.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) - outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) + inFile.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) + outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) return uintptr(n), nil, nil } diff --git a/pkg/sentry/syscalls/linux/vfs2/stat.go b/pkg/sentry/syscalls/linux/vfs2/stat.go index bb1d5cac4..0f5d5189c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/stat.go +++ b/pkg/sentry/syscalls/linux/vfs2/stat.go @@ -65,7 +65,7 @@ func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr usermem.Addr, flags } root := t.FSContext().RootDirectoryVFS2() - defer root.DecRef() + defer root.DecRef(t) start := root if !path.Absolute { if !path.HasComponents() && flags&linux.AT_EMPTY_PATH == 0 { @@ -73,7 +73,7 @@ func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr usermem.Addr, flags } if dirfd == linux.AT_FDCWD { start = t.FSContext().WorkingDirectoryVFS2() - defer start.DecRef() + defer start.DecRef(t) } else { dirfile := t.GetFileVFS2(dirfd) if dirfile == nil { @@ -85,7 +85,7 @@ func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr usermem.Addr, flags // former may be able to use opened file state to expedite the // Stat. statx, err := dirfile.Stat(t, opts) - dirfile.DecRef() + dirfile.DecRef(t) if err != nil { return err } @@ -96,8 +96,8 @@ func fstatat(t *kernel.Task, dirfd int32, pathAddr, statAddr usermem.Addr, flags } start = dirfile.VirtualDentry() start.IncRef() - defer start.DecRef() - dirfile.DecRef() + defer start.DecRef(t) + dirfile.DecRef(t) } } @@ -132,7 +132,7 @@ func Fstat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) statx, err := file.Stat(t, vfs.StatOptions{ Mask: linux.STATX_BASIC_STATS, @@ -177,7 +177,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } root := t.FSContext().RootDirectoryVFS2() - defer root.DecRef() + defer root.DecRef(t) start := root if !path.Absolute { if !path.HasComponents() && flags&linux.AT_EMPTY_PATH == 0 { @@ -185,7 +185,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } if dirfd == linux.AT_FDCWD { start = t.FSContext().WorkingDirectoryVFS2() - defer start.DecRef() + defer start.DecRef(t) } else { dirfile := t.GetFileVFS2(dirfd) if dirfile == nil { @@ -197,7 +197,7 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // former may be able to use opened file state to expedite the // Stat. statx, err := dirfile.Stat(t, opts) - dirfile.DecRef() + dirfile.DecRef(t) if err != nil { return 0, nil, err } @@ -207,8 +207,8 @@ func Statx(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } start = dirfile.VirtualDentry() start.IncRef() - defer start.DecRef() - dirfile.DecRef() + defer start.DecRef(t) + dirfile.DecRef(t) } } @@ -282,7 +282,7 @@ func accessAt(t *kernel.Task, dirfd int32, pathAddr usermem.Addr, mode uint) err if err != nil { return err } - defer tpop.Release() + defer tpop.Release(t) // access(2) and faccessat(2) check permissions using real // UID/GID, not effective UID/GID. @@ -328,7 +328,7 @@ func readlinkat(t *kernel.Task, dirfd int32, pathAddr, bufAddr usermem.Addr, siz if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) target, err := t.Kernel().VFS().ReadlinkAt(t, t.Credentials(), &tpop.pop) if err != nil { @@ -358,7 +358,7 @@ func Statfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) statfs, err := t.Kernel().VFS().StatFSAt(t, t.Credentials(), &tpop.pop) if err != nil { @@ -377,7 +377,7 @@ func Fstatfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) statfs, err := t.Kernel().VFS().StatFSAt(t, t.Credentials(), &tpop.pop) if err != nil { diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go index 0d0ebf46a..a6491ac37 100644 --- a/pkg/sentry/syscalls/linux/vfs2/sync.go +++ b/pkg/sentry/syscalls/linux/vfs2/sync.go @@ -34,7 +34,7 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) return 0, nil, file.SyncFS(t) } @@ -47,7 +47,7 @@ func Fsync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) return 0, nil, file.Sync(t) } @@ -77,7 +77,7 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) // TODO(gvisor.dev/issue/1897): Currently, the only file syncing we support // is a full-file sync, i.e. fsync(2). As a result, there are severe diff --git a/pkg/sentry/syscalls/linux/vfs2/timerfd.go b/pkg/sentry/syscalls/linux/vfs2/timerfd.go index 5ac79bc09..7a26890ef 100644 --- a/pkg/sentry/syscalls/linux/vfs2/timerfd.go +++ b/pkg/sentry/syscalls/linux/vfs2/timerfd.go @@ -50,11 +50,11 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel return 0, nil, syserror.EINVAL } vfsObj := t.Kernel().VFS() - file, err := timerfd.New(vfsObj, clock, fileFlags) + file, err := timerfd.New(t, vfsObj, clock, fileFlags) if err != nil { return 0, nil, err } - defer file.DecRef() + defer file.DecRef(t) fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{ CloseOnExec: flags&linux.TFD_CLOEXEC != 0, }) @@ -79,7 +79,7 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) tfd, ok := file.Impl().(*timerfd.TimerFileDescription) if !ok { @@ -113,7 +113,7 @@ func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) tfd, ok := file.Impl().(*timerfd.TimerFileDescription) if !ok { diff --git a/pkg/sentry/syscalls/linux/vfs2/xattr.go b/pkg/sentry/syscalls/linux/vfs2/xattr.go index af455d5c1..ef99246ed 100644 --- a/pkg/sentry/syscalls/linux/vfs2/xattr.go +++ b/pkg/sentry/syscalls/linux/vfs2/xattr.go @@ -49,7 +49,7 @@ func listxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSyml if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) names, err := t.Kernel().VFS().ListxattrAt(t, t.Credentials(), &tpop.pop, uint64(size)) if err != nil { @@ -72,7 +72,7 @@ func Flistxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) names, err := file.Listxattr(t, uint64(size)) if err != nil { @@ -109,7 +109,7 @@ func getxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSymli if err != nil { return 0, nil, err } - defer tpop.Release() + defer tpop.Release(t) name, err := copyInXattrName(t, nameAddr) if err != nil { @@ -141,7 +141,7 @@ func Fgetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) name, err := copyInXattrName(t, nameAddr) if err != nil { @@ -188,7 +188,7 @@ func setxattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSymli if err != nil { return err } - defer tpop.Release() + defer tpop.Release(t) name, err := copyInXattrName(t, nameAddr) if err != nil { @@ -222,7 +222,7 @@ func Fsetxattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) name, err := copyInXattrName(t, nameAddr) if err != nil { @@ -262,7 +262,7 @@ func removexattr(t *kernel.Task, args arch.SyscallArguments, shouldFollowFinalSy if err != nil { return err } - defer tpop.Release() + defer tpop.Release(t) name, err := copyInXattrName(t, nameAddr) if err != nil { @@ -281,7 +281,7 @@ func Fremovexattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel. if file == nil { return 0, nil, syserror.EBADF } - defer file.DecRef() + defer file.DecRef(t) name, err := copyInXattrName(t, nameAddr) if err != nil { diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index 641e3e502..5a0e3e6b5 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -82,7 +82,7 @@ type anonDentry struct { } // Release implements FilesystemImpl.Release. -func (fs *anonFilesystem) Release() { +func (fs *anonFilesystem) Release(ctx context.Context) { } // Sync implements FilesystemImpl.Sync. @@ -294,7 +294,7 @@ func (d *anonDentry) TryIncRef() bool { } // DecRef implements DentryImpl.DecRef. -func (d *anonDentry) DecRef() { +func (d *anonDentry) DecRef(ctx context.Context) { // no-op } @@ -303,7 +303,7 @@ func (d *anonDentry) DecRef() { // Although Linux technically supports inotify on pseudo filesystems (inotify // is implemented at the vfs layer), it is not particularly useful. It is left // unimplemented until someone actually needs it. -func (d *anonDentry) InotifyWithParent(events, cookie uint32, et EventType) {} +func (d *anonDentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) {} // Watches implements DentryImpl.Watches. func (d *anonDentry) Watches() *Watches { @@ -311,4 +311,4 @@ func (d *anonDentry) Watches() *Watches { } // OnZeroWatches implements Dentry.OnZeroWatches. -func (d *anonDentry) OnZeroWatches() {} +func (d *anonDentry) OnZeroWatches(context.Context) {} diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go index cea3e6955..bc7ea93ea 100644 --- a/pkg/sentry/vfs/dentry.go +++ b/pkg/sentry/vfs/dentry.go @@ -17,6 +17,7 @@ package vfs import ( "sync/atomic" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" ) @@ -102,7 +103,7 @@ type DentryImpl interface { TryIncRef() bool // DecRef decrements the Dentry's reference count. - DecRef() + DecRef(ctx context.Context) // InotifyWithParent notifies all watches on the targets represented by this // dentry and its parent. The parent's watches are notified first, followed @@ -113,7 +114,7 @@ type DentryImpl interface { // // Note that the events may not actually propagate up to the user, depending // on the event masks. - InotifyWithParent(events, cookie uint32, et EventType) + InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) // Watches returns the set of inotify watches for the file corresponding to // the Dentry. Dentries that are hard links to the same underlying file @@ -135,7 +136,7 @@ type DentryImpl interface { // The caller does not need to hold a reference on the dentry. OnZeroWatches // may acquire inotify locks, so to prevent deadlock, no inotify locks should // be held by the caller. - OnZeroWatches() + OnZeroWatches(ctx context.Context) } // IncRef increments d's reference count. @@ -150,8 +151,8 @@ func (d *Dentry) TryIncRef() bool { } // DecRef decrements d's reference count. -func (d *Dentry) DecRef() { - d.impl.DecRef() +func (d *Dentry) DecRef(ctx context.Context) { + d.impl.DecRef(ctx) } // IsDead returns true if d has been deleted or invalidated by its owning @@ -168,8 +169,8 @@ func (d *Dentry) isMounted() bool { // InotifyWithParent notifies all watches on the targets represented by d and // its parent of events. -func (d *Dentry) InotifyWithParent(events, cookie uint32, et EventType) { - d.impl.InotifyWithParent(events, cookie, et) +func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) { + d.impl.InotifyWithParent(ctx, events, cookie, et) } // Watches returns the set of inotify watches associated with d. @@ -182,8 +183,8 @@ func (d *Dentry) Watches() *Watches { // OnZeroWatches performs cleanup tasks whenever the number of watches on a // dentry drops to zero. -func (d *Dentry) OnZeroWatches() { - d.impl.OnZeroWatches() +func (d *Dentry) OnZeroWatches(ctx context.Context) { + d.impl.OnZeroWatches(ctx) } // The following functions are exported so that filesystem implementations can @@ -214,11 +215,11 @@ func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) { // CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion // succeeds. -func (vfs *VirtualFilesystem) CommitDeleteDentry(d *Dentry) { +func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) { d.dead = true d.mu.Unlock() if d.isMounted() { - vfs.forgetDeadMountpoint(d) + vfs.forgetDeadMountpoint(ctx, d) } } @@ -226,12 +227,12 @@ func (vfs *VirtualFilesystem) CommitDeleteDentry(d *Dentry) { // did for reasons outside of VFS' control (e.g. d represents the local state // of a file on a remote filesystem on which the file has already been // deleted). -func (vfs *VirtualFilesystem) InvalidateDentry(d *Dentry) { +func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) { d.mu.Lock() d.dead = true d.mu.Unlock() if d.isMounted() { - vfs.forgetDeadMountpoint(d) + vfs.forgetDeadMountpoint(ctx, d) } } @@ -278,13 +279,13 @@ func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) { // that was replaced by from. // // Preconditions: PrepareRenameDentry was previously called on from and to. -func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(from, to *Dentry) { +func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) { from.mu.Unlock() if to != nil { to.dead = true to.mu.Unlock() if to.isMounted() { - vfs.forgetDeadMountpoint(to) + vfs.forgetDeadMountpoint(ctx, to) } } } @@ -303,7 +304,7 @@ func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) { // // forgetDeadMountpoint is analogous to Linux's // fs/namespace.c:__detach_mounts(). -func (vfs *VirtualFilesystem) forgetDeadMountpoint(d *Dentry) { +func (vfs *VirtualFilesystem) forgetDeadMountpoint(ctx context.Context, d *Dentry) { var ( vdsToDecRef []VirtualDentry mountsToDecRef []*Mount @@ -316,9 +317,9 @@ func (vfs *VirtualFilesystem) forgetDeadMountpoint(d *Dentry) { vfs.mounts.seq.EndWrite() vfs.mountMu.Unlock() for _, vd := range vdsToDecRef { - vd.DecRef() + vd.DecRef(ctx) } for _, mnt := range mountsToDecRef { - mnt.DecRef() + mnt.DecRef(ctx) } } diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go index 5b009b928..1b5af9f73 100644 --- a/pkg/sentry/vfs/epoll.go +++ b/pkg/sentry/vfs/epoll.go @@ -93,9 +93,9 @@ type epollInterest struct { // NewEpollInstanceFD returns a FileDescription representing a new epoll // instance. A reference is taken on the returned FileDescription. -func (vfs *VirtualFilesystem) NewEpollInstanceFD() (*FileDescription, error) { +func (vfs *VirtualFilesystem) NewEpollInstanceFD(ctx context.Context) (*FileDescription, error) { vd := vfs.NewAnonVirtualDentry("[eventpoll]") - defer vd.DecRef() + defer vd.DecRef(ctx) ep := &EpollInstance{ interest: make(map[epollInterestKey]*epollInterest), } @@ -110,7 +110,7 @@ func (vfs *VirtualFilesystem) NewEpollInstanceFD() (*FileDescription, error) { } // Release implements FileDescriptionImpl.Release. -func (ep *EpollInstance) Release() { +func (ep *EpollInstance) Release(ctx context.Context) { // Unregister all polled fds. ep.interestMu.Lock() defer ep.interestMu.Unlock() diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 93861fb4a..576ab3920 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -171,7 +171,7 @@ func (fd *FileDescription) TryIncRef() bool { } // DecRef decrements fd's reference count. -func (fd *FileDescription) DecRef() { +func (fd *FileDescription) DecRef(ctx context.Context) { if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 { // Unregister fd from all epoll instances. fd.epollMu.Lock() @@ -196,11 +196,11 @@ func (fd *FileDescription) DecRef() { } // Release implementation resources. - fd.impl.Release() + fd.impl.Release(ctx) if fd.writable { fd.vd.mount.EndWrite() } - fd.vd.DecRef() + fd.vd.DecRef(ctx) fd.flagsMu.Lock() // TODO(gvisor.dev/issue/1663): We may need to unregister during save, as we do in VFS1. if fd.statusFlags&linux.O_ASYNC != 0 && fd.asyncHandler != nil { @@ -335,7 +335,7 @@ func (fd *FileDescription) Impl() FileDescriptionImpl { type FileDescriptionImpl interface { // Release is called when the associated FileDescription reaches zero // references. - Release() + Release(ctx context.Context) // OnClose is called when a file descriptor representing the // FileDescription is closed. Note that returning a non-nil error does not @@ -526,7 +526,7 @@ func (fd *FileDescription) Stat(ctx context.Context, opts StatOptions) (linux.St Start: fd.vd, }) stat, err := fd.vd.mount.fs.impl.StatAt(ctx, rp, opts) - vfsObj.putResolvingPath(rp) + vfsObj.putResolvingPath(ctx, rp) return stat, err } return fd.impl.Stat(ctx, opts) @@ -541,7 +541,7 @@ func (fd *FileDescription) SetStat(ctx context.Context, opts SetStatOptions) err Start: fd.vd, }) err := fd.vd.mount.fs.impl.SetStatAt(ctx, rp, opts) - vfsObj.putResolvingPath(rp) + vfsObj.putResolvingPath(ctx, rp) return err } return fd.impl.SetStat(ctx, opts) @@ -557,7 +557,7 @@ func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { Start: fd.vd, }) statfs, err := fd.vd.mount.fs.impl.StatFSAt(ctx, rp) - vfsObj.putResolvingPath(rp) + vfsObj.putResolvingPath(ctx, rp) return statfs, err } return fd.impl.StatFS(ctx) @@ -674,7 +674,7 @@ func (fd *FileDescription) Listxattr(ctx context.Context, size uint64) ([]string Start: fd.vd, }) names, err := fd.vd.mount.fs.impl.ListxattrAt(ctx, rp, size) - vfsObj.putResolvingPath(rp) + vfsObj.putResolvingPath(ctx, rp) return names, err } names, err := fd.impl.Listxattr(ctx, size) @@ -703,7 +703,7 @@ func (fd *FileDescription) Getxattr(ctx context.Context, opts *GetxattrOptions) Start: fd.vd, }) val, err := fd.vd.mount.fs.impl.GetxattrAt(ctx, rp, *opts) - vfsObj.putResolvingPath(rp) + vfsObj.putResolvingPath(ctx, rp) return val, err } return fd.impl.Getxattr(ctx, *opts) @@ -719,7 +719,7 @@ func (fd *FileDescription) Setxattr(ctx context.Context, opts *SetxattrOptions) Start: fd.vd, }) err := fd.vd.mount.fs.impl.SetxattrAt(ctx, rp, *opts) - vfsObj.putResolvingPath(rp) + vfsObj.putResolvingPath(ctx, rp) return err } return fd.impl.Setxattr(ctx, *opts) @@ -735,7 +735,7 @@ func (fd *FileDescription) Removexattr(ctx context.Context, name string) error { Start: fd.vd, }) err := fd.vd.mount.fs.impl.RemovexattrAt(ctx, rp, name) - vfsObj.putResolvingPath(rp) + vfsObj.putResolvingPath(ctx, rp) return err } return fd.impl.Removexattr(ctx, name) @@ -752,7 +752,7 @@ func (fd *FileDescription) MappedName(ctx context.Context) string { vfsroot := RootFromContext(ctx) s, _ := fd.vd.mount.vfs.PathnameWithDeleted(ctx, vfsroot, fd.vd) if vfsroot.Ok() { - vfsroot.DecRef() + vfsroot.DecRef(ctx) } return s } diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go index 3b7e1c273..1cd607c0a 100644 --- a/pkg/sentry/vfs/file_description_impl_util_test.go +++ b/pkg/sentry/vfs/file_description_impl_util_test.go @@ -80,9 +80,9 @@ type testFD struct { data DynamicBytesSource } -func newTestFD(vfsObj *VirtualFilesystem, statusFlags uint32, data DynamicBytesSource) *FileDescription { +func newTestFD(ctx context.Context, vfsObj *VirtualFilesystem, statusFlags uint32, data DynamicBytesSource) *FileDescription { vd := vfsObj.NewAnonVirtualDentry("genCountFD") - defer vd.DecRef() + defer vd.DecRef(ctx) var fd testFD fd.vfsfd.Init(&fd, statusFlags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{}) fd.DynamicBytesFileDescriptionImpl.SetDataSource(data) @@ -90,7 +90,7 @@ func newTestFD(vfsObj *VirtualFilesystem, statusFlags uint32, data DynamicBytesS } // Release implements FileDescriptionImpl.Release. -func (fd *testFD) Release() { +func (fd *testFD) Release(context.Context) { } // SetStatusFlags implements FileDescriptionImpl.SetStatusFlags. @@ -109,11 +109,11 @@ func TestGenCountFD(t *testing.T) { ctx := contexttest.Context(t) vfsObj := &VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { t.Fatalf("VFS init: %v", err) } - fd := newTestFD(vfsObj, linux.O_RDWR, &genCount{}) - defer fd.DecRef() + fd := newTestFD(ctx, vfsObj, linux.O_RDWR, &genCount{}) + defer fd.DecRef(ctx) // The first read causes Generate to be called to fill the FD's buffer. buf := make([]byte, 2) @@ -167,11 +167,11 @@ func TestWritable(t *testing.T) { ctx := contexttest.Context(t) vfsObj := &VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { + if err := vfsObj.Init(ctx); err != nil { t.Fatalf("VFS init: %v", err) } - fd := newTestFD(vfsObj, linux.O_RDWR, &storeData{data: "init"}) - defer fd.DecRef() + fd := newTestFD(ctx, vfsObj, linux.O_RDWR, &storeData{data: "init"}) + defer fd.DecRef(ctx) buf := make([]byte, 10) ioseq := usermem.BytesIOSequence(buf) diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index 6bb9ca180..df3758fd1 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -100,12 +100,12 @@ func (fs *Filesystem) TryIncRef() bool { } // DecRef decrements fs' reference count. -func (fs *Filesystem) DecRef() { +func (fs *Filesystem) DecRef(ctx context.Context) { if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 { fs.vfs.filesystemsMu.Lock() delete(fs.vfs.filesystems, fs) fs.vfs.filesystemsMu.Unlock() - fs.impl.Release() + fs.impl.Release(ctx) } else if refs < 0 { panic("Filesystem.decRef() called without holding a reference") } @@ -149,7 +149,7 @@ func (fs *Filesystem) DecRef() { type FilesystemImpl interface { // Release is called when the associated Filesystem reaches zero // references. - Release() + Release(ctx context.Context) // Sync "causes all pending modifications to filesystem metadata and cached // file data to be written to the underlying [filesystem]", as by syncfs(2). diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go index 167b731ac..aff220a61 100644 --- a/pkg/sentry/vfs/inotify.go +++ b/pkg/sentry/vfs/inotify.go @@ -100,7 +100,7 @@ func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32) id := uniqueid.GlobalFromContext(ctx) vd := vfsObj.NewAnonVirtualDentry(fmt.Sprintf("[inotifyfd:%d]", id)) - defer vd.DecRef() + defer vd.DecRef(ctx) fd := &Inotify{ id: id, scratch: make([]byte, inotifyEventBaseSize), @@ -118,7 +118,7 @@ func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32) // Release implements FileDescriptionImpl.Release. Release removes all // watches and frees all resources for an inotify instance. -func (i *Inotify) Release() { +func (i *Inotify) Release(ctx context.Context) { var ds []*Dentry // We need to hold i.mu to avoid a race with concurrent calls to @@ -144,7 +144,7 @@ func (i *Inotify) Release() { i.mu.Unlock() for _, d := range ds { - d.OnZeroWatches() + d.OnZeroWatches(ctx) } } @@ -350,7 +350,7 @@ func (i *Inotify) AddWatch(target *Dentry, mask uint32) (int32, error) { // RmWatch looks up an inotify watch for the given 'wd' and configures the // target to stop sending events to this inotify instance. -func (i *Inotify) RmWatch(wd int32) error { +func (i *Inotify) RmWatch(ctx context.Context, wd int32) error { i.mu.Lock() // Find the watch we were asked to removed. @@ -374,7 +374,7 @@ func (i *Inotify) RmWatch(wd int32) error { i.mu.Unlock() if remaining == 0 { - w.target.OnZeroWatches() + w.target.OnZeroWatches(ctx) } // Generate the event for the removal. @@ -462,7 +462,7 @@ func (w *Watches) Remove(id uint64) { // Notify queues a new event with watches in this set. Watches with // IN_EXCL_UNLINK are skipped if the event is coming from a child that has been // unlinked. -func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlinked bool) { +func (w *Watches) Notify(ctx context.Context, name string, events, cookie uint32, et EventType, unlinked bool) { var hasExpired bool w.mu.RLock() for _, watch := range w.ws { @@ -476,13 +476,13 @@ func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlin w.mu.RUnlock() if hasExpired { - w.cleanupExpiredWatches() + w.cleanupExpiredWatches(ctx) } } // This function is relatively expensive and should only be called where there // are expired watches. -func (w *Watches) cleanupExpiredWatches() { +func (w *Watches) cleanupExpiredWatches(ctx context.Context) { // Because of lock ordering, we cannot acquire Inotify.mu for each watch // owner while holding w.mu. As a result, store expired watches locally // before removing. @@ -495,15 +495,15 @@ func (w *Watches) cleanupExpiredWatches() { } w.mu.RUnlock() for _, watch := range toRemove { - watch.owner.RmWatch(watch.wd) + watch.owner.RmWatch(ctx, watch.wd) } } // HandleDeletion is called when the watch target is destroyed. Clear the // watch set, detach watches from the inotify instances they belong to, and // generate the appropriate events. -func (w *Watches) HandleDeletion() { - w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */) +func (w *Watches) HandleDeletion(ctx context.Context) { + w.Notify(ctx, "", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */) // As in Watches.Notify, we can't hold w.mu while acquiring Inotify.mu for // the owner of each watch being deleted. Instead, atomically store the @@ -744,12 +744,12 @@ func InotifyEventFromStatMask(mask uint32) uint32 { // InotifyRemoveChild sends the appriopriate notifications to the watch sets of // the child being removed and its parent. Note that unlike most pairs of // parent/child notifications, the child is notified first in this case. -func InotifyRemoveChild(self, parent *Watches, name string) { +func InotifyRemoveChild(ctx context.Context, self, parent *Watches, name string) { if self != nil { - self.Notify("", linux.IN_ATTRIB, 0, InodeEvent, true /* unlinked */) + self.Notify(ctx, "", linux.IN_ATTRIB, 0, InodeEvent, true /* unlinked */) } if parent != nil { - parent.Notify(name, linux.IN_DELETE, 0, InodeEvent, true /* unlinked */) + parent.Notify(ctx, name, linux.IN_DELETE, 0, InodeEvent, true /* unlinked */) } } @@ -762,13 +762,13 @@ func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches, } cookie := uniqueid.InotifyCookie(ctx) if oldParent != nil { - oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent, false /* unlinked */) + oldParent.Notify(ctx, oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent, false /* unlinked */) } if newParent != nil { - newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent, false /* unlinked */) + newParent.Notify(ctx, newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent, false /* unlinked */) } // Somewhat surprisingly, self move events do not have a cookie. if renamed != nil { - renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent, false /* unlinked */) + renamed.Notify(ctx, "", linux.IN_MOVE_SELF, 0, InodeEvent, false /* unlinked */) } } diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index 32f901bd8..d1d29d0cd 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -200,8 +200,8 @@ func (vfs *VirtualFilesystem) MountDisconnected(ctx context.Context, creds *auth if err != nil { return nil, err } - defer root.DecRef() - defer fs.DecRef() + defer root.DecRef(ctx) + defer fs.DecRef(ctx) return vfs.NewDisconnectedMount(fs, root, opts) } @@ -221,7 +221,7 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr if vd.dentry.dead { vd.dentry.mu.Unlock() vfs.mountMu.Unlock() - vd.DecRef() + vd.DecRef(ctx) return syserror.ENOENT } // vd might have been mounted over between vfs.GetDentryAt() and @@ -243,7 +243,7 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr // This can't fail since we're holding vfs.mountMu. nextmnt.root.IncRef() vd.dentry.mu.Unlock() - vd.DecRef() + vd.DecRef(ctx) vd = VirtualDentry{ mount: nextmnt, dentry: nextmnt.root, @@ -268,7 +268,7 @@ func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentia if err != nil { return err } - defer mnt.DecRef() + defer mnt.DecRef(ctx) if err := vfs.ConnectMountAt(ctx, creds, mnt, target); err != nil { return err } @@ -293,13 +293,13 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti if err != nil { return err } - defer vd.DecRef() + defer vd.DecRef(ctx) if vd.dentry != vd.mount.root { return syserror.EINVAL } vfs.mountMu.Lock() if mntns := MountNamespaceFromContext(ctx); mntns != nil { - defer mntns.DecRef() + defer mntns.DecRef(ctx) if mntns != vd.mount.ns { vfs.mountMu.Unlock() return syserror.EINVAL @@ -335,10 +335,10 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti vfs.mounts.seq.EndWrite() vfs.mountMu.Unlock() for _, vd := range vdsToDecRef { - vd.DecRef() + vd.DecRef(ctx) } for _, mnt := range mountsToDecRef { - mnt.DecRef() + mnt.DecRef(ctx) } return nil } @@ -479,7 +479,7 @@ func (mnt *Mount) IncRef() { } // DecRef decrements mnt's reference count. -func (mnt *Mount) DecRef() { +func (mnt *Mount) DecRef(ctx context.Context) { refs := atomic.AddInt64(&mnt.refs, -1) if refs&^math.MinInt64 == 0 { // mask out MSB var vd VirtualDentry @@ -490,10 +490,10 @@ func (mnt *Mount) DecRef() { mnt.vfs.mounts.seq.EndWrite() mnt.vfs.mountMu.Unlock() } - mnt.root.DecRef() - mnt.fs.DecRef() + mnt.root.DecRef(ctx) + mnt.fs.DecRef(ctx) if vd.Ok() { - vd.DecRef() + vd.DecRef(ctx) } } } @@ -506,7 +506,7 @@ func (mntns *MountNamespace) IncRef() { } // DecRef decrements mntns' reference count. -func (mntns *MountNamespace) DecRef() { +func (mntns *MountNamespace) DecRef(ctx context.Context) { vfs := mntns.root.fs.VirtualFilesystem() if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 { vfs.mountMu.Lock() @@ -517,10 +517,10 @@ func (mntns *MountNamespace) DecRef() { vfs.mounts.seq.EndWrite() vfs.mountMu.Unlock() for _, vd := range vdsToDecRef { - vd.DecRef() + vd.DecRef(ctx) } for _, mnt := range mountsToDecRef { - mnt.DecRef() + mnt.DecRef(ctx) } } else if refs < 0 { panic("MountNamespace.DecRef() called without holding a reference") @@ -534,7 +534,7 @@ func (mntns *MountNamespace) DecRef() { // getMountAt is analogous to Linux's fs/namei.c:follow_mount(). // // Preconditions: References are held on mnt and d. -func (vfs *VirtualFilesystem) getMountAt(mnt *Mount, d *Dentry) *Mount { +func (vfs *VirtualFilesystem) getMountAt(ctx context.Context, mnt *Mount, d *Dentry) *Mount { // The first mount is special-cased: // // - The caller is assumed to have checked d.isMounted() already. (This @@ -565,7 +565,7 @@ retryFirst: // Raced with umount. continue } - mnt.DecRef() + mnt.DecRef(ctx) mnt = next d = next.root } @@ -578,7 +578,7 @@ retryFirst: // // Preconditions: References are held on mnt and root. vfsroot is not (mnt, // mnt.root). -func (vfs *VirtualFilesystem) getMountpointAt(mnt *Mount, vfsroot VirtualDentry) VirtualDentry { +func (vfs *VirtualFilesystem) getMountpointAt(ctx context.Context, mnt *Mount, vfsroot VirtualDentry) VirtualDentry { // The first mount is special-cased: // // - The caller must have already checked mnt against vfsroot. @@ -602,12 +602,12 @@ retryFirst: if !point.TryIncRef() { // Since Mount holds a reference on Mount.key.point, this can only // happen due to a racing change to Mount.key. - parent.DecRef() + parent.DecRef(ctx) goto retryFirst } if !vfs.mounts.seq.ReadOk(epoch) { - point.DecRef() - parent.DecRef() + point.DecRef(ctx) + parent.DecRef(ctx) goto retryFirst } mnt = parent @@ -635,16 +635,16 @@ retryFirst: if !point.TryIncRef() { // Since Mount holds a reference on Mount.key.point, this can // only happen due to a racing change to Mount.key. - parent.DecRef() + parent.DecRef(ctx) goto retryNotFirst } if !vfs.mounts.seq.ReadOk(epoch) { - point.DecRef() - parent.DecRef() + point.DecRef(ctx) + parent.DecRef(ctx) goto retryNotFirst } - d.DecRef() - mnt.DecRef() + d.DecRef(ctx) + mnt.DecRef(ctx) mnt = parent d = point } diff --git a/pkg/sentry/vfs/pathname.go b/pkg/sentry/vfs/pathname.go index cd78d66bc..e4da15009 100644 --- a/pkg/sentry/vfs/pathname.go +++ b/pkg/sentry/vfs/pathname.go @@ -47,7 +47,7 @@ func (vfs *VirtualFilesystem) PathnameWithDeleted(ctx context.Context, vfsroot, haveRef := false defer func() { if haveRef { - vd.DecRef() + vd.DecRef(ctx) } }() @@ -64,12 +64,12 @@ loop: // of FilesystemImpl.PrependPath() may return nil instead. break loop } - nextVD := vfs.getMountpointAt(vd.mount, vfsroot) + nextVD := vfs.getMountpointAt(ctx, vd.mount, vfsroot) if !nextVD.Ok() { break loop } if haveRef { - vd.DecRef() + vd.DecRef(ctx) } vd = nextVD haveRef = true @@ -101,7 +101,7 @@ func (vfs *VirtualFilesystem) PathnameReachable(ctx context.Context, vfsroot, vd haveRef := false defer func() { if haveRef { - vd.DecRef() + vd.DecRef(ctx) } }() loop: @@ -112,12 +112,12 @@ loop: if vd.mount == vfsroot.mount && vd.mount.root == vfsroot.dentry { break loop } - nextVD := vfs.getMountpointAt(vd.mount, vfsroot) + nextVD := vfs.getMountpointAt(ctx, vd.mount, vfsroot) if !nextVD.Ok() { return "", nil } if haveRef { - vd.DecRef() + vd.DecRef(ctx) } vd = nextVD haveRef = true @@ -145,7 +145,7 @@ func (vfs *VirtualFilesystem) PathnameForGetcwd(ctx context.Context, vfsroot, vd haveRef := false defer func() { if haveRef { - vd.DecRef() + vd.DecRef(ctx) } }() unreachable := false @@ -157,13 +157,13 @@ loop: if vd.mount == vfsroot.mount && vd.mount.root == vfsroot.dentry { break loop } - nextVD := vfs.getMountpointAt(vd.mount, vfsroot) + nextVD := vfs.getMountpointAt(ctx, vd.mount, vfsroot) if !nextVD.Ok() { unreachable = true break loop } if haveRef { - vd.DecRef() + vd.DecRef(ctx) } vd = nextVD haveRef = true diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go index 9d047ff88..3304372d9 100644 --- a/pkg/sentry/vfs/resolving_path.go +++ b/pkg/sentry/vfs/resolving_path.go @@ -18,6 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sync" @@ -136,31 +137,31 @@ func (vfs *VirtualFilesystem) getResolvingPath(creds *auth.Credentials, pop *Pat return rp } -func (vfs *VirtualFilesystem) putResolvingPath(rp *ResolvingPath) { +func (vfs *VirtualFilesystem) putResolvingPath(ctx context.Context, rp *ResolvingPath) { rp.root = VirtualDentry{} - rp.decRefStartAndMount() + rp.decRefStartAndMount(ctx) rp.mount = nil rp.start = nil - rp.releaseErrorState() + rp.releaseErrorState(ctx) resolvingPathPool.Put(rp) } -func (rp *ResolvingPath) decRefStartAndMount() { +func (rp *ResolvingPath) decRefStartAndMount(ctx context.Context) { if rp.flags&rpflagsHaveStartRef != 0 { - rp.start.DecRef() + rp.start.DecRef(ctx) } if rp.flags&rpflagsHaveMountRef != 0 { - rp.mount.DecRef() + rp.mount.DecRef(ctx) } } -func (rp *ResolvingPath) releaseErrorState() { +func (rp *ResolvingPath) releaseErrorState(ctx context.Context) { if rp.nextStart != nil { - rp.nextStart.DecRef() + rp.nextStart.DecRef(ctx) rp.nextStart = nil } if rp.nextMount != nil { - rp.nextMount.DecRef() + rp.nextMount.DecRef(ctx) rp.nextMount = nil } } @@ -236,13 +237,13 @@ func (rp *ResolvingPath) Advance() { // Restart resets the stream of path components represented by rp to its state // on entry to the current FilesystemImpl method. -func (rp *ResolvingPath) Restart() { +func (rp *ResolvingPath) Restart(ctx context.Context) { rp.pit = rp.origParts[rp.numOrigParts-1] rp.mustBeDir = rp.mustBeDirOrig rp.symlinks = rp.symlinksOrig rp.curPart = rp.numOrigParts - 1 copy(rp.parts[:], rp.origParts[:rp.numOrigParts]) - rp.releaseErrorState() + rp.releaseErrorState(ctx) } func (rp *ResolvingPath) relpathCommit() { @@ -260,13 +261,13 @@ func (rp *ResolvingPath) relpathCommit() { // Mount, CheckRoot returns (unspecified, non-nil error). Otherwise, path // resolution should resolve d's parent normally, and CheckRoot returns (false, // nil). -func (rp *ResolvingPath) CheckRoot(d *Dentry) (bool, error) { +func (rp *ResolvingPath) CheckRoot(ctx context.Context, d *Dentry) (bool, error) { if d == rp.root.dentry && rp.mount == rp.root.mount { // At contextual VFS root (due to e.g. chroot(2)). return true, nil } else if d == rp.mount.root { // At mount root ... - vd := rp.vfs.getMountpointAt(rp.mount, rp.root) + vd := rp.vfs.getMountpointAt(ctx, rp.mount, rp.root) if vd.Ok() { // ... of non-root mount. rp.nextMount = vd.mount @@ -283,11 +284,11 @@ func (rp *ResolvingPath) CheckRoot(d *Dentry) (bool, error) { // to d. If d is a mount point, such that path resolution should switch to // another Mount, CheckMount returns a non-nil error. Otherwise, CheckMount // returns nil. -func (rp *ResolvingPath) CheckMount(d *Dentry) error { +func (rp *ResolvingPath) CheckMount(ctx context.Context, d *Dentry) error { if !d.isMounted() { return nil } - if mnt := rp.vfs.getMountAt(rp.mount, d); mnt != nil { + if mnt := rp.vfs.getMountAt(ctx, rp.mount, d); mnt != nil { rp.nextMount = mnt return resolveMountPointError{} } @@ -389,11 +390,11 @@ func (rp *ResolvingPath) HandleJump(target VirtualDentry) error { return resolveMountRootOrJumpError{} } -func (rp *ResolvingPath) handleError(err error) bool { +func (rp *ResolvingPath) handleError(ctx context.Context, err error) bool { switch err.(type) { case resolveMountRootOrJumpError: // Switch to the new Mount. We hold references on the Mount and Dentry. - rp.decRefStartAndMount() + rp.decRefStartAndMount(ctx) rp.mount = rp.nextMount rp.start = rp.nextStart rp.flags |= rpflagsHaveMountRef | rpflagsHaveStartRef @@ -412,7 +413,7 @@ func (rp *ResolvingPath) handleError(err error) bool { case resolveMountPointError: // Switch to the new Mount. We hold a reference on the Mount, but // borrow the reference on the mount root from the Mount. - rp.decRefStartAndMount() + rp.decRefStartAndMount(ctx) rp.mount = rp.nextMount rp.start = rp.nextMount.root rp.flags = rp.flags&^rpflagsHaveStartRef | rpflagsHaveMountRef @@ -423,12 +424,12 @@ func (rp *ResolvingPath) handleError(err error) bool { // path. rp.relpathCommit() // Restart path resolution on the new Mount. - rp.releaseErrorState() + rp.releaseErrorState(ctx) return true case resolveAbsSymlinkError: // Switch to the new Mount. References are borrowed from rp.root. - rp.decRefStartAndMount() + rp.decRefStartAndMount(ctx) rp.mount = rp.root.mount rp.start = rp.root.dentry rp.flags &^= rpflagsHaveMountRef | rpflagsHaveStartRef @@ -440,7 +441,7 @@ func (rp *ResolvingPath) handleError(err error) bool { // path, including the symlink target we just prepended. rp.relpathCommit() // Restart path resolution on the new Mount. - rp.releaseErrorState() + rp.releaseErrorState(ctx) return true default: diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go index 522e27475..9c2420683 100644 --- a/pkg/sentry/vfs/vfs.go +++ b/pkg/sentry/vfs/vfs.go @@ -122,7 +122,7 @@ type VirtualFilesystem struct { } // Init initializes a new VirtualFilesystem with no mounts or FilesystemTypes. -func (vfs *VirtualFilesystem) Init() error { +func (vfs *VirtualFilesystem) Init(ctx context.Context) error { if vfs.mountpoints != nil { panic("VFS already initialized") } @@ -145,7 +145,7 @@ func (vfs *VirtualFilesystem) Init() error { devMinor: anonfsDevMinor, } anonfs.vfsfs.Init(vfs, &anonFilesystemType{}, &anonfs) - defer anonfs.vfsfs.DecRef() + defer anonfs.vfsfs.DecRef(ctx) anonMount, err := vfs.NewDisconnectedMount(&anonfs.vfsfs, nil, &MountOptions{}) if err != nil { // We should not be passing any MountOptions that would cause @@ -192,11 +192,11 @@ func (vfs *VirtualFilesystem) AccessAt(ctx context.Context, creds *auth.Credenti for { err := rp.mount.fs.impl.AccessAt(ctx, rp, creds, ats) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return err } } @@ -214,11 +214,11 @@ func (vfs *VirtualFilesystem) GetDentryAt(ctx context.Context, creds *auth.Crede dentry: d, } rp.mount.IncRef() - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return vd, nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return VirtualDentry{}, err } } @@ -236,7 +236,7 @@ func (vfs *VirtualFilesystem) getParentDirAndName(ctx context.Context, creds *au } rp.mount.IncRef() name := rp.Component() - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return parentVD, name, nil } if checkInvariants { @@ -244,8 +244,8 @@ func (vfs *VirtualFilesystem) getParentDirAndName(ctx context.Context, creds *au panic(fmt.Sprintf("%T.GetParentDentryAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return VirtualDentry{}, "", err } } @@ -260,14 +260,14 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential } if !newpop.Path.Begin.Ok() { - oldVD.DecRef() + oldVD.DecRef(ctx) if newpop.Path.Absolute { return syserror.EEXIST } return syserror.ENOENT } if newpop.FollowFinalSymlink { - oldVD.DecRef() + oldVD.DecRef(ctx) ctx.Warningf("VirtualFilesystem.LinkAt: file creation paths can't follow final symlink") return syserror.EINVAL } @@ -276,8 +276,8 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential for { err := rp.mount.fs.impl.LinkAt(ctx, rp, oldVD) if err == nil { - vfs.putResolvingPath(rp) - oldVD.DecRef() + vfs.putResolvingPath(ctx, rp) + oldVD.DecRef(ctx) return nil } if checkInvariants { @@ -285,9 +285,9 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential panic(fmt.Sprintf("%T.LinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) - oldVD.DecRef() + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) + oldVD.DecRef(ctx) return err } } @@ -313,7 +313,7 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia for { err := rp.mount.fs.impl.MkdirAt(ctx, rp, *opts) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil } if checkInvariants { @@ -321,8 +321,8 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia panic(fmt.Sprintf("%T.MkdirAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return err } } @@ -346,7 +346,7 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia for { err := rp.mount.fs.impl.MknodAt(ctx, rp, *opts) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil } if checkInvariants { @@ -354,8 +354,8 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia panic(fmt.Sprintf("%T.MknodAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return err } } @@ -408,31 +408,31 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential for { fd, err := rp.mount.fs.impl.OpenAt(ctx, rp, *opts) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) if opts.FileExec { if fd.Mount().Flags.NoExec { - fd.DecRef() + fd.DecRef(ctx) return nil, syserror.EACCES } // Only a regular file can be executed. stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_TYPE}) if err != nil { - fd.DecRef() + fd.DecRef(ctx) return nil, err } if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.S_IFMT != linux.S_IFREG { - fd.DecRef() + fd.DecRef(ctx) return nil, syserror.EACCES } } - fd.Dentry().InotifyWithParent(linux.IN_OPEN, 0, PathEvent) + fd.Dentry().InotifyWithParent(ctx, linux.IN_OPEN, 0, PathEvent) return fd, nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return nil, err } } @@ -444,11 +444,11 @@ func (vfs *VirtualFilesystem) ReadlinkAt(ctx context.Context, creds *auth.Creden for { target, err := rp.mount.fs.impl.ReadlinkAt(ctx, rp) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return target, nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return "", err } } @@ -472,19 +472,19 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti return err } if oldName == "." || oldName == ".." { - oldParentVD.DecRef() + oldParentVD.DecRef(ctx) return syserror.EBUSY } if !newpop.Path.Begin.Ok() { - oldParentVD.DecRef() + oldParentVD.DecRef(ctx) if newpop.Path.Absolute { return syserror.EBUSY } return syserror.ENOENT } if newpop.FollowFinalSymlink { - oldParentVD.DecRef() + oldParentVD.DecRef(ctx) ctx.Warningf("VirtualFilesystem.RenameAt: destination path can't follow final symlink") return syserror.EINVAL } @@ -497,8 +497,8 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti for { err := rp.mount.fs.impl.RenameAt(ctx, rp, oldParentVD, oldName, renameOpts) if err == nil { - vfs.putResolvingPath(rp) - oldParentVD.DecRef() + vfs.putResolvingPath(ctx, rp) + oldParentVD.DecRef(ctx) return nil } if checkInvariants { @@ -506,9 +506,9 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti panic(fmt.Sprintf("%T.RenameAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) - oldParentVD.DecRef() + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) + oldParentVD.DecRef(ctx) return err } } @@ -531,7 +531,7 @@ func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentia for { err := rp.mount.fs.impl.RmdirAt(ctx, rp) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil } if checkInvariants { @@ -539,8 +539,8 @@ func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentia panic(fmt.Sprintf("%T.RmdirAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return err } } @@ -552,11 +552,11 @@ func (vfs *VirtualFilesystem) SetStatAt(ctx context.Context, creds *auth.Credent for { err := rp.mount.fs.impl.SetStatAt(ctx, rp, *opts) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return err } } @@ -568,11 +568,11 @@ func (vfs *VirtualFilesystem) StatAt(ctx context.Context, creds *auth.Credential for { stat, err := rp.mount.fs.impl.StatAt(ctx, rp, *opts) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return stat, nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return linux.Statx{}, err } } @@ -585,11 +585,11 @@ func (vfs *VirtualFilesystem) StatFSAt(ctx context.Context, creds *auth.Credenti for { statfs, err := rp.mount.fs.impl.StatFSAt(ctx, rp) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return statfs, nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return linux.Statfs{}, err } } @@ -612,7 +612,7 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent for { err := rp.mount.fs.impl.SymlinkAt(ctx, rp, target) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil } if checkInvariants { @@ -620,8 +620,8 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent panic(fmt.Sprintf("%T.SymlinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return err } } @@ -644,7 +644,7 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti for { err := rp.mount.fs.impl.UnlinkAt(ctx, rp) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil } if checkInvariants { @@ -652,8 +652,8 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti panic(fmt.Sprintf("%T.UnlinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return err } } @@ -671,7 +671,7 @@ func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.C for { bep, err := rp.mount.fs.impl.BoundEndpointAt(ctx, rp, *opts) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return bep, nil } if checkInvariants { @@ -679,8 +679,8 @@ func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.C panic(fmt.Sprintf("%T.BoundEndpointAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return nil, err } } @@ -693,7 +693,7 @@ func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Crede for { names, err := rp.mount.fs.impl.ListxattrAt(ctx, rp, size) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return names, nil } if err == syserror.ENOTSUP { @@ -701,11 +701,11 @@ func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Crede // fs/xattr.c:vfs_listxattr() falls back to allowing the security // subsystem to return security extended attributes, which by // default don't exist. - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil, nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return nil, err } } @@ -718,11 +718,11 @@ func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Creden for { val, err := rp.mount.fs.impl.GetxattrAt(ctx, rp, *opts) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return val, nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return "", err } } @@ -735,11 +735,11 @@ func (vfs *VirtualFilesystem) SetxattrAt(ctx context.Context, creds *auth.Creden for { err := rp.mount.fs.impl.SetxattrAt(ctx, rp, *opts) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return err } } @@ -751,11 +751,11 @@ func (vfs *VirtualFilesystem) RemovexattrAt(ctx context.Context, creds *auth.Cre for { err := rp.mount.fs.impl.RemovexattrAt(ctx, rp, name) if err == nil { - vfs.putResolvingPath(rp) + vfs.putResolvingPath(ctx, rp) return nil } - if !rp.handleError(err) { - vfs.putResolvingPath(rp) + if !rp.handleError(ctx, err) { + vfs.putResolvingPath(ctx, rp) return err } } @@ -777,7 +777,7 @@ func (vfs *VirtualFilesystem) SyncAllFilesystems(ctx context.Context) error { if err := fs.impl.Sync(ctx); err != nil && retErr == nil { retErr = err } - fs.DecRef() + fs.DecRef(ctx) } return retErr } @@ -831,9 +831,9 @@ func (vd VirtualDentry) IncRef() { // DecRef decrements the reference counts on the Mount and Dentry represented // by vd. -func (vd VirtualDentry) DecRef() { - vd.dentry.DecRef() - vd.mount.DecRef() +func (vd VirtualDentry) DecRef(ctx context.Context) { + vd.dentry.DecRef(ctx) + vd.mount.DecRef(ctx) } // Mount returns the Mount associated with vd. It does not take a reference on diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD index e0db6cf54..6c137f693 100644 --- a/pkg/tcpip/link/tun/BUILD +++ b/pkg/tcpip/link/tun/BUILD @@ -12,6 +12,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/abi/linux", + "//pkg/context", "//pkg/refs", "//pkg/sync", "//pkg/syserror", diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index 04ae58e59..22b0a12bd 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -18,6 +18,7 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -64,14 +65,14 @@ func (d *Device) beforeSave() { } // Release implements fs.FileOperations.Release. -func (d *Device) Release() { +func (d *Device) Release(ctx context.Context) { d.mu.Lock() defer d.mu.Unlock() // Decrease refcount if there is an endpoint associated with this file. if d.endpoint != nil { d.endpoint.RemoveNotify(d.notifyHandle) - d.endpoint.DecRef() + d.endpoint.DecRef(ctx) d.endpoint = nil } } @@ -341,8 +342,8 @@ type tunEndpoint struct { } // DecRef decrements refcount of e, removes NIC if refcount goes to 0. -func (e *tunEndpoint) DecRef() { - e.DecRefWithDestructor(func() { +func (e *tunEndpoint) DecRef(ctx context.Context) { + e.DecRefWithDestructor(ctx, func(context.Context) { e.stack.RemoveNIC(e.nicID) }) } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 59639ba19..9dd5b0184 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -640,7 +640,7 @@ func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Confi func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace) error { root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) for _, m := range c.mounts { log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options) @@ -868,7 +868,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns if err != nil { return fmt.Errorf("can't find mount destination %q: %v", m.Destination, err) } - defer dirent.DecRef() + defer dirent.DecRef(ctx) if err := mns.Mount(ctx, dirent, inode); err != nil { return fmt.Errorf("mount %q error: %v", m.Destination, err) } @@ -889,12 +889,12 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun if err != nil { return fmt.Errorf("can't find mount destination %q: %v", mount.Destination, err) } - defer target.DecRef() + defer target.DecRef(ctx) // Take a ref on the inode that is about to be (re)-mounted. source.root.IncRef() if err := mns.Mount(ctx, target, source.root); err != nil { - source.root.DecRef() + source.root.DecRef(ctx) return fmt.Errorf("bind mount %q error: %v", mount.Destination, err) } @@ -997,12 +997,12 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M switch err { case nil: // Found '/tmp' in filesystem, check if it's empty. - defer tmp.DecRef() + defer tmp.DecRef(ctx) f, err := tmp.Inode.GetFile(ctx, tmp, fs.FileFlags{Read: true, Directory: true}) if err != nil { return err } - defer f.DecRef() + defer f.DecRef(ctx) serializer := &fs.CollectEntriesSerializer{} if err := f.Readdir(ctx, serializer); err != nil { return err diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 9cd9c5909..e0d077f5a 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -346,7 +346,7 @@ func New(args Args) (*Loader, error) { if err != nil { return nil, fmt.Errorf("failed to create hostfs filesystem: %v", err) } - defer hostFilesystem.DecRef() + defer hostFilesystem.DecRef(k.SupervisorContext()) hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{}) if err != nil { return nil, fmt.Errorf("failed to create hostfs mount: %v", err) @@ -755,7 +755,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn return nil, fmt.Errorf("creating process: %v", err) } // CreateProcess takes a reference on FDTable if successful. - info.procArgs.FDTable.DecRef() + info.procArgs.FDTable.DecRef(ctx) // Set the foreground process group on the TTY to the global init process // group, since that is what we are about to start running. @@ -890,22 +890,20 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { // Add the HOME environment variable if it is not already set. if kernel.VFS2Enabled { - defer args.MountNamespaceVFS2.DecRef() - root := args.MountNamespaceVFS2.Root() - defer root.DecRef() ctx := vfs.WithRoot(l.k.SupervisorContext(), root) + defer args.MountNamespaceVFS2.DecRef(ctx) + defer root.DecRef(ctx) envv, err := user.MaybeAddExecUserHomeVFS2(ctx, args.MountNamespaceVFS2, args.KUID, args.Envv) if err != nil { return 0, err } args.Envv = envv } else { - defer args.MountNamespace.DecRef() - root := args.MountNamespace.Root() - defer root.DecRef() ctx := fs.WithRoot(l.k.SupervisorContext(), root) + defer args.MountNamespace.DecRef(ctx) + defer root.DecRef(ctx) envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) if err != nil { return 0, err @@ -1263,7 +1261,7 @@ func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.F fdTable := k.NewFDTable() ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs) if err != nil { - fdTable.DecRef() + fdTable.DecRef(ctx) return nil, nil, nil, err } return fdTable, ttyFile, ttyFileVFS2, nil diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 8e6fe57e1..aa3fdf96c 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -450,13 +450,13 @@ func TestCreateMountNamespace(t *testing.T) { } root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) for _, p := range tc.expectedPaths { maxTraversals := uint(0) if d, err := mns.FindInode(ctx, root, root, p, &maxTraversals); err != nil { t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err) } else { - d.DecRef() + d.DecRef(ctx) } } }) @@ -491,7 +491,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) { } root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) for _, p := range tc.expectedPaths { target := &vfs.PathOperation{ Root: root, @@ -502,7 +502,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) { if d, err := l.k.VFS().GetDentryAt(ctx, l.root.procArgs.Credentials, target, &vfs.GetDentryOptions{}); err != nil { t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err) } else { - d.DecRef() + d.DecRef(ctx) } } }) diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index cfe2d36aa..252ca07e3 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -103,7 +103,7 @@ func registerFilesystems(k *kernel.Kernel) error { if err != nil { return fmt.Errorf("creating devtmpfs accessor: %w", err) } - defer a.Release() + defer a.Release(ctx) if err := a.UserspaceInit(ctx); err != nil { return fmt.Errorf("initializing userspace: %w", err) @@ -252,7 +252,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error { root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) target := &vfs.PathOperation{ Root: root, Start: root, @@ -387,7 +387,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds } root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) pop := vfs.PathOperation{ Root: root, Start: root, @@ -481,10 +481,10 @@ func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *Co if err != nil { return err } - defer newMnt.DecRef() + defer newMnt.DecRef(ctx) root := mns.Root() - defer root.DecRef() + defer root.DecRef(ctx) if err := c.makeSyntheticMount(ctx, mount.Destination, root, creds); err != nil { return err } -- cgit v1.2.3 From 35312a95c4c8626365b4ece5ffb0bcab44b4bede Mon Sep 17 00:00:00 2001 From: Nayana Bidari Date: Wed, 5 Aug 2020 20:45:02 -0700 Subject: Add loss recovery option for TCP. /proc/sys/net/ipv4/tcp_recovery is used to enable RACK loss recovery in TCP. PiperOrigin-RevId: 325157807 --- pkg/sentry/fs/proc/sys_net.go | 95 +++++++++++++++++++++++++++++++++++++ pkg/sentry/fsimpl/proc/tasks_sys.go | 49 ++++++++++++++++++- pkg/sentry/inet/inet.go | 17 +++++++ pkg/sentry/inet/test_stack.go | 12 +++++ pkg/sentry/socket/hostinet/stack.go | 11 +++++ pkg/sentry/socket/netstack/stack.go | 14 ++++++ pkg/tcpip/transport/tcp/protocol.go | 33 +++++++++++++ test/syscalls/linux/proc_net.cc | 38 +++++++++++++++ 8 files changed, 268 insertions(+), 1 deletion(-) (limited to 'pkg/sentry/fs') diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index 702fdd392..8615b60f0 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -272,6 +272,96 @@ func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSeque return n, f.tcpSack.stack.SetTCPSACKEnabled(*f.tcpSack.enabled) } +// +stateify savable +type tcpRecovery struct { + fsutil.SimpleFileInode + + stack inet.Stack `state:"wait"` + recovery inet.TCPLossRecovery +} + +func newTCPRecoveryInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + ts := &tcpRecovery{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), + stack: s, + } + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: fs.SpecialFile, + } + return fs.NewInode(ctx, ts, msrc, sattr) +} + +// Truncate implements fs.InodeOperations.Truncate. +func (*tcpRecovery) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + +// GetFile implements fs.InodeOperations.GetFile. +func (r *tcpRecovery) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + flags.Pread = true + flags.Pwrite = true + return fs.NewFile(ctx, dirent, flags, &tcpRecoveryFile{ + tcpRecovery: r, + stack: r.stack, + }), nil +} + +// +stateify savable +type tcpRecoveryFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + tcpRecovery *tcpRecovery + + stack inet.Stack `state:"wait"` +} + +// Read implements fs.FileOperations.Read. +func (f *tcpRecoveryFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + + recovery, err := f.stack.TCPRecovery() + if err != nil { + return 0, err + } + f.tcpRecovery.recovery = recovery + s := fmt.Sprintf("%d\n", f.tcpRecovery.recovery) + n, err := dst.CopyOut(ctx, []byte(s)) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +func (f *tcpRecoveryFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + f.tcpRecovery.recovery = inet.TCPLossRecovery(v) + if err := f.tcpRecovery.stack.SetTCPRecovery(f.tcpRecovery.recovery); err != nil { + return 0, err + } + return n, nil +} + func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { // The following files are simple stubs until they are implemented in // netstack, most of these files are configuration related. We use the @@ -351,6 +441,11 @@ func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s ine contents["tcp_wmem"] = newTCPMemInode(ctx, msrc, s, tcpWMem) } + // Add tcp_recovery. + if _, err := s.TCPRecovery(); err == nil { + contents["tcp_recovery"] = newTCPRecoveryInode(ctx, msrc, s) + } + d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)) return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil) } diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index 6dac2afa4..b71778128 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -55,7 +55,8 @@ func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *ke if stack := k.RootNetworkNamespace().Stack(); stack != nil { contents = map[string]*kernfs.Dentry{ "ipv4": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{ - "tcp_sack": fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}), + "tcp_recovery": fs.newDentry(root, fs.NextIno(), 0644, &tcpRecoveryData{stack: stack}), + "tcp_sack": fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}), // The following files are simple stubs until they are implemented in // netstack, most of these files are configuration related. We use the @@ -207,3 +208,49 @@ func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset *d.enabled = v != 0 return n, d.stack.SetTCPSACKEnabled(*d.enabled) } + +// tcpRecoveryData implements vfs.WritableDynamicBytesSource for +// /proc/sys/net/ipv4/tcp_recovery. +// +// +stateify savable +type tcpRecoveryData struct { + kernfs.DynamicBytesFile + + stack inet.Stack `state:"wait"` +} + +var _ vfs.WritableDynamicBytesSource = (*tcpRecoveryData)(nil) + +// Generate implements vfs.DynamicBytesSource. +func (d *tcpRecoveryData) Generate(ctx context.Context, buf *bytes.Buffer) error { + recovery, err := d.stack.TCPRecovery() + if err != nil { + return err + } + + buf.WriteString(fmt.Sprintf("%d\n", recovery)) + return nil +} + +func (d *tcpRecoveryData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + // No need to handle partial writes thus far. + return 0, syserror.EINVAL + } + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit the amount of memory allocated. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + if err := d.stack.SetTCPRecovery(inet.TCPLossRecovery(v)); err != nil { + return 0, err + } + return n, nil +} diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go index 2916a0644..c0b4831d1 100644 --- a/pkg/sentry/inet/inet.go +++ b/pkg/sentry/inet/inet.go @@ -56,6 +56,12 @@ type Stack interface { // settings. SetTCPSACKEnabled(enabled bool) error + // TCPRecovery returns the TCP loss detection algorithm. + TCPRecovery() (TCPLossRecovery, error) + + // SetTCPRecovery attempts to change TCP loss detection algorithm. + SetTCPRecovery(recovery TCPLossRecovery) error + // Statistics reports stack statistics. Statistics(stat interface{}, arg string) error @@ -189,3 +195,14 @@ type StatSNMPUDP [8]uint64 // StatSNMPUDPLite describes UdpLite line of /proc/net/snmp. type StatSNMPUDPLite [8]uint64 + +// TCPLossRecovery indicates TCP loss detection and recovery methods to use. +type TCPLossRecovery int32 + +// Loss recovery constants from include/net/tcp.h which are used to set +// /proc/sys/net/ipv4/tcp_recovery. +const ( + TCP_RACK_LOSS_DETECTION TCPLossRecovery = 1 << iota + TCP_RACK_STATIC_REO_WND + TCP_RACK_NO_DUPTHRESH +) diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go index d8961fc94..9771f01fc 100644 --- a/pkg/sentry/inet/test_stack.go +++ b/pkg/sentry/inet/test_stack.go @@ -25,6 +25,7 @@ type TestStack struct { TCPRecvBufSize TCPBufferSize TCPSendBufSize TCPBufferSize TCPSACKFlag bool + Recovery TCPLossRecovery } // NewTestStack returns a TestStack with no network interfaces. The value of @@ -91,6 +92,17 @@ func (s *TestStack) SetTCPSACKEnabled(enabled bool) error { return nil } +// TCPRecovery implements Stack.TCPRecovery. +func (s *TestStack) TCPRecovery() (TCPLossRecovery, error) { + return s.Recovery, nil +} + +// SetTCPRecovery implements Stack.SetTCPRecovery. +func (s *TestStack) SetTCPRecovery(recovery TCPLossRecovery) error { + s.Recovery = recovery + return nil +} + // Statistics implements inet.Stack.Statistics. func (s *TestStack) Statistics(stat interface{}, arg string) error { return nil diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index a48082631..fda3dcb35 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -53,6 +53,7 @@ type Stack struct { interfaceAddrs map[int32][]inet.InterfaceAddr routes []inet.Route supportsIPv6 bool + tcpRecovery inet.TCPLossRecovery tcpRecvBufSize inet.TCPBufferSize tcpSendBufSize inet.TCPBufferSize tcpSACKEnabled bool @@ -350,6 +351,16 @@ func (s *Stack) SetTCPSACKEnabled(enabled bool) error { return syserror.EACCES } +// TCPRecovery implements inet.Stack.TCPRecovery. +func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) { + return s.tcpRecovery, nil +} + +// SetTCPRecovery implements inet.Stack.SetTCPRecovery. +func (s *Stack) SetTCPRecovery(recovery inet.TCPLossRecovery) error { + return syserror.EACCES +} + // getLine reads one line from proc file, with specified prefix. // The last argument, withHeader, specifies if it contains line header. func getLine(f *os.File, prefix string, withHeader bool) string { diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 67737ae87..f0fe18684 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -207,6 +207,20 @@ func (s *Stack) SetTCPSACKEnabled(enabled bool) error { return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enabled))).ToError() } +// TCPRecovery implements inet.Stack.TCPRecovery. +func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) { + var recovery tcp.Recovery + if err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &recovery); err != nil { + return 0, syserr.TranslateNetstackError(err).ToError() + } + return inet.TCPLossRecovery(recovery), nil +} + +// SetTCPRecovery implements inet.Stack.SetTCPRecovery. +func (s *Stack) SetTCPRecovery(recovery inet.TCPLossRecovery) error { + return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.Recovery(recovery))).ToError() +} + // Statistics implements inet.Stack.Statistics. func (s *Stack) Statistics(stat interface{}, arg string) error { switch stats := stat.(type) { diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index b34e47bbd..d9abb8d94 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -80,6 +80,25 @@ const ( // enable/disable SACK support in TCP. See: https://tools.ietf.org/html/rfc2018. type SACKEnabled bool +// Recovery is used by stack.(*Stack).TransportProtocolOption to +// set loss detection algorithm in TCP. +type Recovery int32 + +const ( + // RACKLossDetection indicates RACK is used for loss detection and + // recovery. + RACKLossDetection Recovery = 1 << iota + + // RACKStaticReoWnd indicates the reordering window should not be + // adjusted when DSACK is received. + RACKStaticReoWnd + + // RACKNoDupTh indicates RACK should not consider the classic three + // duplicate acknowledgements rule to mark the segments as lost. This + // is used when reordering is not detected. + RACKNoDupTh +) + // DelayEnabled is used by stack.(Stack*).TransportProtocolOption to // enable/disable Nagle's algorithm in TCP. type DelayEnabled bool @@ -161,6 +180,7 @@ func (s *synRcvdCounter) Threshold() uint64 { type protocol struct { mu sync.RWMutex sackEnabled bool + recovery Recovery delayEnabled bool sendBufferSize SendBufferSizeOption recvBufferSize ReceiveBufferSizeOption @@ -280,6 +300,12 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error { p.mu.Unlock() return nil + case Recovery: + p.mu.Lock() + p.recovery = Recovery(v) + p.mu.Unlock() + return nil + case DelayEnabled: p.mu.Lock() p.delayEnabled = bool(v) @@ -394,6 +420,12 @@ func (p *protocol) Option(option interface{}) *tcpip.Error { p.mu.RUnlock() return nil + case *Recovery: + p.mu.RLock() + *v = Recovery(p.recovery) + p.mu.RUnlock() + return nil + case *DelayEnabled: p.mu.RLock() *v = DelayEnabled(p.delayEnabled) @@ -535,6 +567,7 @@ func NewProtocol() stack.TransportProtocol { minRTO: MinRTO, maxRTO: MaxRTO, maxRetries: MaxRetries, + recovery: RACKLossDetection, } p.dispatcher.init(runtime.GOMAXPROCS(0)) return &p diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc index 3377b65cf..4fab097f4 100644 --- a/test/syscalls/linux/proc_net.cc +++ b/test/syscalls/linux/proc_net.cc @@ -477,6 +477,44 @@ TEST(ProcNetSnmp, CheckSnmp) { EXPECT_EQ(value_count, 1); } +TEST(ProcSysNetIpv4Recovery, Exists) { + EXPECT_THAT(open("/proc/sys/net/ipv4/tcp_recovery", O_RDONLY), + SyscallSucceeds()); +} + +TEST(ProcSysNetIpv4Recovery, CanReadAndWrite) { + // TODO(b/162988252): Enable save/restore for this test after the bug is + // fixed. + DisableSave ds; + + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_DAC_OVERRIDE)))); + + auto const fd = ASSERT_NO_ERRNO_AND_VALUE( + Open("/proc/sys/net/ipv4/tcp_recovery", O_RDWR)); + + char buf[10] = {'\0'}; + char to_write = '2'; + + // Check initial value is set to 1. + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(to_write) + 1)); + EXPECT_EQ(strcmp(buf, "1\n"), 0); + + // Set tcp_recovery to one of the allowed constants. + EXPECT_THAT(PwriteFd(fd.get(), &to_write, sizeof(to_write), 0), + SyscallSucceedsWithValue(sizeof(to_write))); + EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0), + SyscallSucceedsWithValue(sizeof(to_write) + 1)); + EXPECT_EQ(strcmp(buf, "2\n"), 0); + + // Set tcp_recovery to any random value. + char kMessage[] = "100"; + EXPECT_THAT(PwriteFd(fd.get(), kMessage, strlen(kMessage), 0), + SyscallSucceedsWithValue(strlen(kMessage))); + EXPECT_THAT(PreadFd(fd.get(), buf, sizeof(kMessage), 0), + SyscallSucceedsWithValue(sizeof(kMessage))); + EXPECT_EQ(strcmp(buf, "100\n"), 0); +} } // namespace } // namespace testing } // namespace gvisor -- cgit v1.2.3