diff options
Diffstat (limited to 'pkg/sentry/fsimpl/tmpfs')
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/BUILD | 92 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/benchmark_test.go | 487 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/directory.go | 187 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/filesystem.go | 698 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/named_pipe.go | 60 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/pipe_test.go | 235 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/regular_file.go | 357 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/regular_file_test.go | 224 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/symlink.go | 36 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/tmpfs.go | 299 |
10 files changed, 0 insertions, 2675 deletions
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD deleted file mode 100644 index a5b285987..000000000 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ /dev/null @@ -1,92 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "dentry_list", - out = "dentry_list.go", - package = "tmpfs", - prefix = "dentry", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*dentry", - "Linker": "*dentry", - }, -) - -go_library( - name = "tmpfs", - srcs = [ - "dentry_list.go", - "directory.go", - "filesystem.go", - "named_pipe.go", - "regular_file.go", - "symlink.go", - "tmpfs.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs", - deps = [ - "//pkg/abi/linux", - "//pkg/amutex", - "//pkg/fspath", - "//pkg/log", - "//pkg/sentry/arch", - "//pkg/sentry/context", - "//pkg/sentry/fs", - "//pkg/sentry/fs/fsutil", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/pipe", - "//pkg/sentry/memmap", - "//pkg/sentry/pgalloc", - "//pkg/sentry/platform", - "//pkg/sentry/safemem", - "//pkg/sentry/usage", - "//pkg/sentry/usermem", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) - -go_test( - name = "benchmark_test", - size = "small", - srcs = ["benchmark_test.go"], - deps = [ - ":tmpfs", - "//pkg/abi/linux", - "//pkg/fspath", - "//pkg/refs", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fs", - "//pkg/sentry/fs/tmpfs", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) - -go_test( - name = "tmpfs_test", - size = "small", - srcs = [ - "pipe_test.go", - "regular_file_test.go", - ], - embed = [":tmpfs"], - deps = [ - "//pkg/abi/linux", - "//pkg/fspath", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/contexttest", - "//pkg/sentry/usermem", - "//pkg/sentry/vfs", - "//pkg/syserror", - ], -) diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go deleted file mode 100644 index d88c83499..000000000 --- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go +++ /dev/null @@ -1,487 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package benchmark_test - -import ( - "fmt" - "runtime" - "strings" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" - _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// Differences from stat_benchmark: -// -// - Syscall interception, CopyInPath, copyOutStat, and overlayfs overheads are -// not included. -// -// - *MountStat benchmarks use a tmpfs root mount and a tmpfs submount at /tmp. -// Non-MountStat benchmarks use a tmpfs root mount and no submounts. -// stat_benchmark uses a varying root mount, a tmpfs submount at /tmp, and a -// subdirectory /tmp/<top_dir> (assuming TEST_TMPDIR == "/tmp"). Thus -// stat_benchmark at depth 1 does a comparable amount of work to *MountStat -// benchmarks at depth 2, and non-MountStat benchmarks at depth 3. -var depths = []int{1, 2, 3, 8, 64, 100} - -const ( - mountPointName = "tmp" - filename = "gvisor_test_temp_0_1557494568" -) - -// This is copied from syscalls/linux/sys_file.go, with the dependency on -// kernel.Task stripped out. -func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error { - var ( - d *fs.Dirent // The file. - rel *fs.Dirent // The relative directory for search (if required.) - err error - ) - - // Extract the working directory (maybe). - if len(path) > 0 && path[0] == '/' { - // Absolute path; rel can be nil. - } else if dirFD == linux.AT_FDCWD { - // Need to reference the working directory. - rel = wd - } else { - // Need to extract the given FD. - return syserror.EBADF - } - - // Lookup the node. - remainingTraversals := uint(linux.MaxSymlinkTraversals) - if resolve { - d, err = mntns.FindInode(ctx, root, rel, path, &remainingTraversals) - } else { - d, err = mntns.FindLink(ctx, root, rel, path, &remainingTraversals) - } - if err != nil { - return err - } - - err = fn(root, d) - d.DecRef() - return err -} - -func BenchmarkVFS1TmpfsStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - - // Create VFS. - tmpfsFS, ok := fs.FindFilesystem("tmpfs") - if !ok { - b.Fatalf("failed to find tmpfs filesystem type") - } - rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - mntns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - b.Fatalf("failed to create mount namespace: %v", err) - } - defer mntns.DecRef() - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - root := mntns.Root() - defer root.DecRef() - d := root - d.IncRef() - defer d.DecRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - next, err := d.Walk(ctx, root, name) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - d.DecRef() - d = next - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - file.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - dirPath := false - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR - } - uattr, err := d.Inode.UnstableAttr(ctx) - if err != nil { - return err - } - // Sanity check. - if uattr.Perms.User.Execute { - b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) - } - return nil - }) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func BenchmarkVFS2MemfsStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - defer mntns.DecRef(vfsObj) - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - root := mntns.Root() - defer root.DecRef() - vd := root - vd.IncRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - pop := vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(name), - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - vd.DecRef() - vd = nextVD - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(filename), - FollowFinalSymlink: true, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: 0644, - }) - vd.DecRef() - vd = vfs.VirtualDentry{} - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - defer fd.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filePath), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. - if stat.Mode&^linux.S_IFMT != 0644 { - b.Fatalf("got wrong permissions (%0o)", stat.Mode) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func BenchmarkVFS1TmpfsMountStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - - // Create VFS. - tmpfsFS, ok := fs.FindFilesystem("tmpfs") - if !ok { - b.Fatalf("failed to find tmpfs filesystem type") - } - rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - mntns, err := fs.NewMountNamespace(ctx, rootInode) - if err != nil { - b.Fatalf("failed to create mount namespace: %v", err) - } - defer mntns.DecRef() - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create and mount the submount. - root := mntns.Root() - defer root.DecRef() - if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create mount point: %v", err) - } - mountPoint, err := root.Walk(ctx, root, mountPointName) - if err != nil { - b.Fatalf("failed to walk to mount point: %v", err) - } - defer mountPoint.DecRef() - submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil) - if err != nil { - b.Fatalf("failed to create tmpfs submount: %v", err) - } - if err := mntns.Mount(ctx, mountPoint, submountInode); err != nil { - b.Fatalf("failed to mount tmpfs submount: %v", err) - } - filePathBuilder.WriteString(mountPointName) - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - d, err := root.Walk(ctx, root, mountPointName) - if err != nil { - b.Fatalf("failed to walk to mount root: %v", err) - } - defer d.DecRef() - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - next, err := d.Walk(ctx, root, name) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - d.DecRef() - d = next - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Create the file that will be stat'd. - file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644)) - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - file.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - dirPath := false - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error { - if dirPath && !fs.IsDir(d.Inode.StableAttr) { - return syserror.ENOTDIR - } - uattr, err := d.Inode.UnstableAttr(ctx) - if err != nil { - return err - } - // Sanity check. - if uattr.Perms.User.Execute { - b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode()) - } - return nil - }) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func BenchmarkVFS2MemfsMountStat(b *testing.B) { - for _, depth := range depths { - b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { - ctx := contexttest.Context(b) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) - if err != nil { - b.Fatalf("failed to create tmpfs root mount: %v", err) - } - defer mntns.DecRef(vfsObj) - - var filePathBuilder strings.Builder - filePathBuilder.WriteByte('/') - - // Create the mount point. - root := mntns.Root() - defer root.DecRef() - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(mountPointName), - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create mount point: %v", err) - } - // Save the mount point for later use. - mountPoint, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to mount point: %v", err) - } - defer mountPoint.DecRef() - // Create and mount the submount. - if err := vfsObj.MountAt(ctx, creds, "", &pop, "tmpfs", &vfs.MountOptions{}); err != nil { - b.Fatalf("failed to mount tmpfs submount: %v", err) - } - filePathBuilder.WriteString(mountPointName) - filePathBuilder.WriteByte('/') - - // Create nested directories with given depth. - vd, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to mount root: %v", err) - } - for i := depth; i > 0; i-- { - name := fmt.Sprintf("%d", i) - pop := vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(name), - } - if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ - Mode: 0755, - }); err != nil { - b.Fatalf("failed to create directory %q: %v", name, err) - } - nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{}) - if err != nil { - b.Fatalf("failed to walk to directory %q: %v", name, err) - } - vd.DecRef() - vd = nextVD - filePathBuilder.WriteString(name) - filePathBuilder.WriteByte('/') - } - - // Verify that we didn't create any directories under the mount - // point (i.e. they were all created on the submount). - firstDirName := fmt.Sprintf("%d", depth) - if child := mountPoint.Dentry().Child(firstDirName); child != nil { - b.Fatalf("created directory %q under root mount, not submount", firstDirName) - } - - // Create the file that will be stat'd. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: vd, - Path: fspath.Parse(filename), - FollowFinalSymlink: true, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: 0644, - }) - vd.DecRef() - if err != nil { - b.Fatalf("failed to create file %q: %v", filename, err) - } - fd.DecRef() - filePathBuilder.WriteString(filename) - filePath := filePathBuilder.String() - - runtime.GC() - b.ResetTimer() - for i := 0; i < b.N; i++ { - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filePath), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - b.Fatalf("stat(%q) failed: %v", filePath, err) - } - // Sanity check. - if stat.Mode&^linux.S_IFMT != 0644 { - b.Fatalf("got wrong permissions (%0o)", stat.Mode) - } - } - // Don't include deferred cleanup in benchmark time. - b.StopTimer() - }) - } -} - -func init() { - // Turn off reference leak checking for a fair comparison between vfs1 and - // vfs2. - refs.SetLeakMode(refs.NoLeakChecking) -} diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go deleted file mode 100644 index 887ca2619..000000000 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -type directory struct { - inode inode - - // childList is a list containing (1) child Dentries and (2) fake Dentries - // (with inode == nil) that represent the iteration position of - // directoryFDs. childList is used to support directoryFD.IterDirents() - // efficiently. childList is protected by filesystem.mu. - childList dentryList -} - -func (fs *filesystem) newDirectory(creds *auth.Credentials, mode linux.FileMode) *inode { - dir := &directory{} - dir.inode.init(dir, fs, creds, mode) - dir.inode.nlink = 2 // from "." and parent directory or ".." for root - return &dir.inode -} - -func (i *inode) isDir() bool { - _, ok := i.impl.(*directory) - return ok -} - -type directoryFD struct { - fileDescription - vfs.DirectoryFileDescriptionDefaultImpl - - // Protected by filesystem.mu. - iter *dentry - off int64 -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *directoryFD) Release() { - if fd.iter != nil { - fs := fd.filesystem() - dir := fd.inode().impl.(*directory) - fs.mu.Lock() - dir.childList.Remove(fd.iter) - fs.mu.Unlock() - fd.iter = nil - } -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - fs := fd.filesystem() - vfsd := fd.vfsfd.VirtualDentry().Dentry() - - fs.mu.Lock() - defer fs.mu.Unlock() - - if fd.off == 0 { - if !cb.Handle(vfs.Dirent{ - Name: ".", - Type: linux.DT_DIR, - Ino: vfsd.Impl().(*dentry).inode.ino, - NextOff: 1, - }) { - return nil - } - fd.off++ - } - if fd.off == 1 { - parentInode := vfsd.ParentOrSelf().Impl().(*dentry).inode - if !cb.Handle(vfs.Dirent{ - Name: "..", - Type: parentInode.direntType(), - Ino: parentInode.ino, - NextOff: 2, - }) { - return nil - } - fd.off++ - } - - dir := vfsd.Impl().(*dentry).inode.impl.(*directory) - var child *dentry - if fd.iter == nil { - // Start iteration at the beginning of dir. - child = dir.childList.Front() - fd.iter = &dentry{} - } else { - // Continue iteration from where we left off. - child = fd.iter.Next() - dir.childList.Remove(fd.iter) - } - for child != nil { - // Skip other directoryFD iterators. - if child.inode != nil { - if !cb.Handle(vfs.Dirent{ - Name: child.vfsd.Name(), - Type: child.inode.direntType(), - Ino: child.inode.ino, - NextOff: fd.off + 1, - }) { - dir.childList.InsertBefore(child, fd.iter) - return nil - } - fd.off++ - } - child = child.Next() - } - dir.childList.PushBack(fd.iter) - return nil -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fs := fd.filesystem() - fs.mu.Lock() - defer fs.mu.Unlock() - - switch whence { - case linux.SEEK_SET: - // Use offset as given. - case linux.SEEK_CUR: - offset += fd.off - default: - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - - // If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't - // seek even if doing so might reposition the iterator due to concurrent - // mutation of the directory. Compare fs/libfs.c:dcache_dir_lseek(). - if fd.off == offset { - return offset, nil - } - - fd.off = offset - // Compensate for "." and "..". - remChildren := int64(0) - if offset >= 2 { - remChildren = offset - 2 - } - - dir := fd.inode().impl.(*directory) - - // Ensure that fd.iter exists and is not linked into dir.childList. - if fd.iter == nil { - fd.iter = &dentry{} - } else { - dir.childList.Remove(fd.iter) - } - // Insert fd.iter before the remChildren'th child, or at the end of the - // list if remChildren >= number of children. - child := dir.childList.Front() - for child != nil { - // Skip other directoryFD iterators. - if child.inode != nil { - if remChildren == 0 { - dir.childList.InsertBefore(child, fd.iter) - return offset, nil - } - remChildren-- - } - child = child.Next() - } - dir.childList.PushBack(fd.iter) - return offset, nil -} diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go deleted file mode 100644 index 26979729e..000000000 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ /dev/null @@ -1,698 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "fmt" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// Sync implements vfs.FilesystemImpl.Sync. -func (fs *filesystem) Sync(ctx context.Context) error { - // All filesystem state is in-memory. - return nil -} - -// stepLocked resolves rp.Component() to an existing file, starting from the -// given directory. -// -// stepLocked is loosely analogous to fs/namei.c:walk_component(). -// -// Preconditions: filesystem.mu must be locked. !rp.Done(). -func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { - if !d.inode.isDir() { - return nil, syserror.ENOTDIR - } - if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { - return nil, err - } -afterSymlink: - nextVFSD, err := rp.ResolveComponent(&d.vfsd) - if err != nil { - return nil, err - } - if nextVFSD == nil { - // Since the Dentry tree is the sole source of truth for tmpfs, if it's - // not in the Dentry tree, it doesn't exist. - return nil, syserror.ENOENT - } - next := nextVFSD.Impl().(*dentry) - if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO: symlink traversals update access time - if err := rp.HandleSymlink(symlink.target); err != nil { - return nil, err - } - goto afterSymlink // don't check the current directory again - } - rp.Advance() - return next, nil -} - -// walkParentDirLocked resolves all but the last path component of rp to an -// existing directory, starting from the given directory (which is usually -// rp.Start().Impl().(*dentry)). It does not check that the returned directory -// is searchable by the provider of rp. -// -// walkParentDirLocked is loosely analogous to Linux's -// fs/namei.c:path_parentat(). -// -// Preconditions: filesystem.mu must be locked. !rp.Done(). -func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { - for !rp.Final() { - next, err := stepLocked(rp, d) - if err != nil { - return nil, err - } - d = next - } - if !d.inode.isDir() { - return nil, syserror.ENOTDIR - } - return d, nil -} - -// resolveLocked resolves rp to an existing file. -// -// resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat(). -// -// Preconditions: filesystem.mu must be locked. -func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) { - d := rp.Start().Impl().(*dentry) - for !rp.Done() { - next, err := stepLocked(rp, d) - if err != nil { - return nil, err - } - d = next - } - if rp.MustBeDir() && !d.inode.isDir() { - return nil, syserror.ENOTDIR - } - return d, nil -} - -// doCreateAt checks that creating a file at rp is permitted, then invokes -// create to do so. -// -// doCreateAt is loosely analogous to a conjunction of Linux's -// fs/namei.c:filename_create() and done_path_create(). -// -// Preconditions: !rp.Done(). For the final path component in rp, -// !rp.ShouldFollowSymlink(). -func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error { - fs.mu.Lock() - defer fs.mu.Unlock() - parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return err - } - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - name := rp.Component() - if name == "." || name == ".." { - return syserror.EEXIST - } - // Call parent.vfsd.Child() instead of stepLocked() or rp.ResolveChild(), - // because if the child exists we want to return EEXIST immediately instead - // of attempting symlink/mount traversal. - if parent.vfsd.Child(name) != nil { - return syserror.EEXIST - } - if !dir && rp.MustBeDir() { - return syserror.ENOENT - } - // In memfs, the only way to cause a dentry to be disowned is by removing - // it from the filesystem, so this check is equivalent to checking if - // parent has been removed. - if parent.vfsd.IsDisowned() { - return syserror.ENOENT - } - mnt := rp.Mount() - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - return create(parent, name) -} - -// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. -func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := resolveLocked(rp) - if err != nil { - return nil, err - } - if opts.CheckSearchable { - if !d.inode.isDir() { - return nil, syserror.ENOTDIR - } - if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true /* isDir */); err != nil { - return nil, err - } - } - d.IncRef() - return &d.vfsd, nil -} - -// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. -func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return nil, err - } - d.IncRef() - return &d.vfsd, nil -} - -// LinkAt implements vfs.FilesystemImpl.LinkAt. -func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { - return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { - if rp.Mount() != vd.Mount() { - return syserror.EXDEV - } - d := vd.Dentry().Impl().(*dentry) - if d.inode.isDir() { - return syserror.EPERM - } - if d.inode.nlink == 0 { - return syserror.ENOENT - } - if d.inode.nlink == maxLinks { - return syserror.EMLINK - } - d.inode.incLinksLocked() - child := fs.newDentry(d.inode) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - }) -} - -// MkdirAt implements vfs.FilesystemImpl.MkdirAt. -func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { - return fs.doCreateAt(rp, true /* dir */, func(parent *dentry, name string) error { - if parent.inode.nlink == maxLinks { - return syserror.EMLINK - } - parent.inode.incLinksLocked() // from child's ".." - child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - }) -} - -// MknodAt implements vfs.FilesystemImpl.MknodAt. -func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { - return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { - switch opts.Mode.FileType() { - case 0, linux.S_IFREG: - child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - case linux.S_IFIFO: - child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK: - // Not yet supported. - return syserror.EPERM - default: - return syserror.EINVAL - } - }) -} - -// OpenAt implements vfs.FilesystemImpl.OpenAt. -func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - if opts.Flags&linux.O_TMPFILE != 0 { - // Not yet supported. - return nil, syserror.EOPNOTSUPP - } - - // Handle O_CREAT and !O_CREAT separately, since in the latter case we - // don't need fs.mu for writing. - if opts.Flags&linux.O_CREAT == 0 { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := resolveLocked(rp) - if err != nil { - return nil, err - } - return d.open(ctx, rp, opts.Flags, false /* afterCreate */) - } - - mustCreate := opts.Flags&linux.O_EXCL != 0 - start := rp.Start().Impl().(*dentry) - fs.mu.Lock() - defer fs.mu.Unlock() - if rp.Done() { - // Reject attempts to open directories with O_CREAT. - if rp.MustBeDir() { - return nil, syserror.EISDIR - } - if mustCreate { - return nil, syserror.EEXIST - } - return start.open(ctx, rp, opts.Flags, false /* afterCreate */) - } -afterTrailingSymlink: - parent, err := walkParentDirLocked(rp, start) - if err != nil { - return nil, err - } - // Check for search permission in the parent directory. - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { - return nil, err - } - // Reject attempts to open directories with O_CREAT. - if rp.MustBeDir() { - return nil, syserror.EISDIR - } - name := rp.Component() - if name == "." || name == ".." { - return nil, syserror.EISDIR - } - // Determine whether or not we need to create a file. - child, err := stepLocked(rp, parent) - if err == syserror.ENOENT { - // Already checked for searchability above; now check for writability. - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { - return nil, err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return nil, err - } - defer rp.Mount().EndWrite() - // Create and open the child. - child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return child.open(ctx, rp, opts.Flags, true) - } - if err != nil { - return nil, err - } - // Do we need to resolve a trailing symlink? - if !rp.Done() { - start = parent - goto afterTrailingSymlink - } - // Open existing file. - if mustCreate { - return nil, syserror.EEXIST - } - return child.open(ctx, rp, opts.Flags, false) -} - -func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { - ats := vfs.AccessTypesForOpenFlags(flags) - if !afterCreate { - if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil { - return nil, err - } - } - mnt := rp.Mount() - switch impl := d.inode.impl.(type) { - case *regularFile: - var fd regularFileFD - fd.readable = vfs.MayReadFileWithOpenFlags(flags) - fd.writable = vfs.MayWriteFileWithOpenFlags(flags) - if fd.writable { - if err := mnt.CheckBeginWrite(); err != nil { - return nil, err - } - // mnt.EndWrite() is called by regularFileFD.Release(). - } - fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) - if flags&linux.O_TRUNC != 0 { - impl.mu.Lock() - impl.data.Truncate(0, impl.memFile) - atomic.StoreUint64(&impl.size, 0) - impl.mu.Unlock() - } - return &fd.vfsfd, nil - case *directory: - // Can't open directories writably. - if ats&vfs.MayWrite != 0 { - return nil, syserror.EISDIR - } - var fd directoryFD - fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) - return &fd.vfsfd, nil - case *symlink: - // Can't open symlinks without O_PATH (which is unimplemented). - return nil, syserror.ELOOP - case *namedPipe: - return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags) - default: - panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl)) - } -} - -// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. -func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := resolveLocked(rp) - if err != nil { - return "", err - } - symlink, ok := d.inode.impl.(*symlink) - if !ok { - return "", syserror.EINVAL - } - return symlink.target, nil -} - -// RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { - if opts.Flags != 0 { - // TODO(b/145974740): Support renameat2 flags. - return syserror.EINVAL - } - - // Resolve newParent first to verify that it's on this Mount. - fs.mu.Lock() - defer fs.mu.Unlock() - newParent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return err - } - newName := rp.Component() - if newName == "." || newName == ".." { - return syserror.EBUSY - } - mnt := rp.Mount() - if mnt != oldParentVD.Mount() { - return syserror.EXDEV - } - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - - oldParent := oldParentVD.Dentry().Impl().(*dentry) - if err := oldParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - // Call vfs.Dentry.Child() instead of stepLocked() or rp.ResolveChild(), - // because if the existing child is a symlink or mount point then we want - // to rename over it rather than follow it. - renamedVFSD := oldParent.vfsd.Child(oldName) - if renamedVFSD == nil { - return syserror.ENOENT - } - renamed := renamedVFSD.Impl().(*dentry) - if renamed.inode.isDir() { - if renamed == newParent || renamedVFSD.IsAncestorOf(&newParent.vfsd) { - return syserror.EINVAL - } - if oldParent != newParent { - // Writability is needed to change renamed's "..". - if err := renamed.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true /* isDir */); err != nil { - return err - } - } - } else { - if opts.MustBeDir || rp.MustBeDir() { - return syserror.ENOTDIR - } - } - - if err := newParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - replacedVFSD := newParent.vfsd.Child(newName) - var replaced *dentry - if replacedVFSD != nil { - replaced = replacedVFSD.Impl().(*dentry) - if replaced.inode.isDir() { - if !renamed.inode.isDir() { - return syserror.EISDIR - } - if replaced.vfsd.HasChildren() { - return syserror.ENOTEMPTY - } - } else { - if rp.MustBeDir() { - return syserror.ENOTDIR - } - if renamed.inode.isDir() { - return syserror.ENOTDIR - } - } - } else { - if renamed.inode.isDir() && newParent.inode.nlink == maxLinks { - return syserror.EMLINK - } - } - if newParent.vfsd.IsDisowned() { - return syserror.ENOENT - } - - // Linux places this check before some of those above; we do it here for - // simplicity, under the assumption that applications are not intentionally - // doing noop renames expecting them to succeed where non-noop renames - // would fail. - if renamedVFSD == replacedVFSD { - return nil - } - vfsObj := rp.VirtualFilesystem() - oldParentDir := oldParent.inode.impl.(*directory) - newParentDir := newParent.inode.impl.(*directory) - if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil { - return err - } - if replaced != nil { - newParentDir.childList.Remove(replaced) - if replaced.inode.isDir() { - newParent.inode.decLinksLocked() // from replaced's ".." - } - replaced.inode.decLinksLocked() - } - oldParentDir.childList.Remove(renamed) - newParentDir.childList.PushBack(renamed) - if renamed.inode.isDir() { - oldParent.inode.decLinksLocked() - newParent.inode.incLinksLocked() - } - // TODO: update timestamps and parent directory sizes - vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD) - return nil -} - -// RmdirAt implements vfs.FilesystemImpl.RmdirAt. -func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { - fs.mu.Lock() - defer fs.mu.Unlock() - parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return err - } - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - name := rp.Component() - if name == "." { - return syserror.EINVAL - } - if name == ".." { - return syserror.ENOTEMPTY - } - childVFSD := parent.vfsd.Child(name) - if childVFSD == nil { - return syserror.ENOENT - } - child := childVFSD.Impl().(*dentry) - if !child.inode.isDir() { - return syserror.ENOTDIR - } - if childVFSD.HasChildren() { - return syserror.ENOTEMPTY - } - mnt := rp.Mount() - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - vfsObj := rp.VirtualFilesystem() - if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { - return err - } - parent.inode.impl.(*directory).childList.Remove(child) - parent.inode.decLinksLocked() // from child's ".." - child.inode.decLinksLocked() - vfsObj.CommitDeleteDentry(childVFSD) - return nil -} - -// SetStatAt implements vfs.FilesystemImpl.SetStatAt. -func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return err - } - if opts.Stat.Mask == 0 { - return nil - } - // TODO: implement inode.setStat - return syserror.EPERM -} - -// StatAt implements vfs.FilesystemImpl.StatAt. -func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - d, err := resolveLocked(rp) - if err != nil { - return linux.Statx{}, err - } - var stat linux.Statx - d.inode.statTo(&stat) - return stat, nil -} - -// StatFSAt implements vfs.FilesystemImpl.StatFSAt. -func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return linux.Statfs{}, err - } - // TODO: actually implement statfs - return linux.Statfs{}, syserror.ENOSYS -} - -// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. -func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { - return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { - child := fs.newDentry(fs.newSymlink(rp.Credentials(), target)) - parent.vfsd.InsertChild(&child.vfsd, name) - parent.inode.impl.(*directory).childList.PushBack(child) - return nil - }) -} - -// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. -func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { - fs.mu.Lock() - defer fs.mu.Unlock() - parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) - if err != nil { - return err - } - if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { - return err - } - name := rp.Component() - if name == "." || name == ".." { - return syserror.EISDIR - } - childVFSD := parent.vfsd.Child(name) - if childVFSD == nil { - return syserror.ENOENT - } - child := childVFSD.Impl().(*dentry) - if child.inode.isDir() { - return syserror.EISDIR - } - if !rp.MustBeDir() { - return syserror.ENOTDIR - } - mnt := rp.Mount() - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - vfsObj := rp.VirtualFilesystem() - if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { - return err - } - parent.inode.impl.(*directory).childList.Remove(child) - child.inode.decLinksLocked() - vfsObj.CommitDeleteDentry(childVFSD) - return nil -} - -// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return nil, err - } - // TODO(b/127675828): support extended attributes - return nil, syserror.ENOTSUP -} - -// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return "", err - } - // TODO(b/127675828): support extended attributes - return "", syserror.ENOTSUP -} - -// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. -func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return err - } - // TODO(b/127675828): support extended attributes - return syserror.ENOTSUP -} - -// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. -func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { - return err - } - // TODO(b/127675828): support extended attributes - return syserror.ENOTSUP -} - -// PrependPath implements vfs.FilesystemImpl.PrependPath. -func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - return vfs.GenericPrependPath(vfsroot, vd, b) -} diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go deleted file mode 100644 index 40bde54de..000000000 --- a/pkg/sentry/fsimpl/tmpfs/named_pipe.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -type namedPipe struct { - inode inode - - pipe *pipe.VFSPipe -} - -// Preconditions: -// * fs.mu must be locked. -// * rp.Mount().CheckBeginWrite() has been called successfully. -func (fs *filesystem) newNamedPipe(creds *auth.Credentials, mode linux.FileMode) *inode { - file := &namedPipe{pipe: pipe.NewVFSPipe(pipe.DefaultPipeSize, usermem.PageSize)} - file.inode.init(file, fs, creds, mode) - file.inode.nlink = 1 // Only the parent has a link. - return &file.inode -} - -// namedPipeFD implements vfs.FileDescriptionImpl. Methods are implemented -// entirely via struct embedding. -type namedPipeFD struct { - fileDescription - - *pipe.VFSPipeFD -} - -func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { - var err error - var fd namedPipeFD - fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, rp, vfsd, &fd.vfsfd, flags) - if err != nil { - return nil, err - } - mnt := rp.Mount() - fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) - return &fd.vfsfd, nil -} diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go deleted file mode 100644 index 70b42a6ec..000000000 --- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "bytes" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -const fileName = "mypipe" - -func TestSeparateFDs(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() - - // Open the read side. This is done in a concurrently because opening - // One end the pipe blocks until the other end is opened. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - rfdchan := make(chan *vfs.FileDescription) - go func() { - openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY} - rfd, _ := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - rfdchan <- rfd - }() - - // Open the write side. - openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY} - wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) - } - defer wfd.DecRef() - - rfd, ok := <-rfdchan - if !ok { - t.Fatalf("failed to open pipe for reading %q", fileName) - } - defer rfd.DecRef() - - const msg = "vamos azul" - checkEmpty(ctx, t, rfd) - checkWrite(ctx, t, wfd, msg) - checkRead(ctx, t, rfd, msg) -} - -func TestNonblockingRead(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() - - // Open the read side as nonblocking. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK} - rfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for reading %q: %v", fileName, err) - } - defer rfd.DecRef() - - // Open the write side. - openOpts = vfs.OpenOptions{Flags: linux.O_WRONLY} - wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) - } - defer wfd.DecRef() - - const msg = "geh blau" - checkEmpty(ctx, t, rfd) - checkWrite(ctx, t, wfd, msg) - checkRead(ctx, t, rfd, msg) -} - -func TestNonblockingWriteError(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() - - // Open the write side as nonblocking, which should return ENXIO. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK} - _, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != syserror.ENXIO { - t.Fatalf("expected ENXIO, but got error: %v", err) - } -} - -func TestSingleFD(t *testing.T) { - ctx, creds, vfsObj, root := setup(t) - defer root.DecRef() - - // Open the pipe as readable and writable. - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - } - openOpts := vfs.OpenOptions{Flags: linux.O_RDWR} - fd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts) - if err != nil { - t.Fatalf("failed to open pipe for writing %q: %v", fileName, err) - } - defer fd.DecRef() - - const msg = "forza blu" - checkEmpty(ctx, t, fd) - checkWrite(ctx, t, fd, msg) - checkRead(ctx, t, fd, msg) -} - -// setup creates a VFS with a pipe in the root directory at path fileName. The -// returned VirtualDentry must be DecRef()'d be the caller. It calls t.Fatal -// upon failure. -func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesystem, vfs.VirtualDentry) { - ctx := contexttest.Context(t) - creds := auth.CredentialsFromContext(ctx) - - // Create VFS. - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) - if err != nil { - t.Fatalf("failed to create tmpfs root mount: %v", err) - } - - // Create the pipe. - root := mntns.Root() - pop := vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - } - mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644} - if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil { - t.Fatalf("failed to create file %q: %v", fileName, err) - } - - // Sanity check: the file pipe exists and has the correct mode. - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(fileName), - FollowFinalSymlink: true, - }, &vfs.StatOptions{}) - if err != nil { - t.Fatalf("stat(%q) failed: %v", fileName, err) - } - if stat.Mode&^linux.S_IFMT != 0644 { - t.Errorf("got wrong permissions (%0o)", stat.Mode) - } - if stat.Mode&linux.S_IFMT != linux.ModeNamedPipe { - t.Errorf("got wrong file type (%0o)", stat.Mode) - } - - return ctx, creds, vfsObj, root -} - -// checkEmpty calls t.Fatal if the pipe in fd is not empty. -func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) { - readData := make([]byte, 1) - dst := usermem.BytesIOSequence(readData) - bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{}) - if err != syserror.ErrWouldBlock { - t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err) - } - if bytesRead != 0 { - t.Fatalf("expected to read 0 bytes, but got %d", bytesRead) - } -} - -// checkWrite calls t.Fatal if it fails to write all of msg to fd. -func checkWrite(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) { - writeData := []byte(msg) - src := usermem.BytesIOSequence(writeData) - bytesWritten, err := fd.Write(ctx, src, vfs.WriteOptions{}) - if err != nil { - t.Fatalf("error writing to pipe %q: %v", fileName, err) - } - if bytesWritten != int64(len(writeData)) { - t.Fatalf("expected to write %d bytes, but wrote %d", len(writeData), bytesWritten) - } -} - -// checkRead calls t.Fatal if it fails to read msg from fd. -func checkRead(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) { - readData := make([]byte, len(msg)) - dst := usermem.BytesIOSequence(readData) - bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{}) - if err != nil { - t.Fatalf("error reading from pipe %q: %v", fileName, err) - } - if bytesRead != int64(len(msg)) { - t.Fatalf("expected to read %d bytes, but got %d", len(msg), bytesRead) - } - if !bytes.Equal(readData, []byte(msg)) { - t.Fatalf("expected to read %q from pipe, but got %q", msg, string(readData)) - } -} diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go deleted file mode 100644 index f51e247a7..000000000 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ /dev/null @@ -1,357 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "io" - "math" - "sync" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/safemem" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -type regularFile struct { - inode inode - - // memFile is a platform.File used to allocate pages to this regularFile. - memFile *pgalloc.MemoryFile - - // mu protects the fields below. - mu sync.RWMutex - - // data maps offsets into the file to offsets into memFile that store - // the file's data. - data fsutil.FileRangeSet - - // size is the size of data, but accessed using atomic memory - // operations to avoid locking in inode.stat(). - size uint64 - - // seals represents file seals on this inode. - seals uint32 -} - -func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode { - file := ®ularFile{ - memFile: fs.memFile, - } - file.inode.init(file, fs, creds, mode) - file.inode.nlink = 1 // from parent directory - return &file.inode -} - -type regularFileFD struct { - fileDescription - - // These are immutable. - readable bool - writable bool - - // off is the file offset. off is accessed using atomic memory operations. - // offMu serializes operations that may mutate off. - off int64 - offMu sync.Mutex -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *regularFileFD) Release() { - if fd.writable { - fd.vfsfd.VirtualDentry().Mount().EndWrite() - } -} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - if !fd.readable { - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - if dst.NumBytes() == 0 { - return 0, nil - } - f := fd.inode().impl.(*regularFile) - rw := getRegularFileReadWriter(f, offset) - n, err := dst.CopyOutFrom(ctx, rw) - putRegularFileReadWriter(rw) - return int64(n), err -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - fd.offMu.Lock() - n, err := fd.PRead(ctx, dst, fd.off, opts) - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - if !fd.writable { - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - srclen := src.NumBytes() - if srclen == 0 { - return 0, nil - } - f := fd.inode().impl.(*regularFile) - end := offset + srclen - if end < offset { - // Overflow. - return 0, syserror.EFBIG - } - rw := getRegularFileReadWriter(f, offset) - n, err := src.CopyInTo(ctx, rw) - putRegularFileReadWriter(rw) - return n, err -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - fd.offMu.Lock() - n, err := fd.PWrite(ctx, src, fd.off, opts) - fd.off += n - fd.offMu.Unlock() - return n, err -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fd.offMu.Lock() - defer fd.offMu.Unlock() - switch whence { - case linux.SEEK_SET: - // use offset as specified - case linux.SEEK_CUR: - offset += fd.off - case linux.SEEK_END: - offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size)) - default: - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - fd.off = offset - return offset, nil -} - -// Sync implements vfs.FileDescriptionImpl.Sync. -func (fd *regularFileFD) Sync(ctx context.Context) error { - return nil -} - -// regularFileReadWriter implements safemem.Reader and Safemem.Writer. -type regularFileReadWriter struct { - file *regularFile - - // Offset into the file to read/write at. Note that this may be - // different from the FD offset if PRead/PWrite is used. - off uint64 -} - -var regularFileReadWriterPool = sync.Pool{ - New: func() interface{} { - return ®ularFileReadWriter{} - }, -} - -func getRegularFileReadWriter(file *regularFile, offset int64) *regularFileReadWriter { - rw := regularFileReadWriterPool.Get().(*regularFileReadWriter) - rw.file = file - rw.off = uint64(offset) - return rw -} - -func putRegularFileReadWriter(rw *regularFileReadWriter) { - rw.file = nil - regularFileReadWriterPool.Put(rw) -} - -// ReadToBlocks implements safemem.Reader.ReadToBlocks. -func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { - rw.file.mu.RLock() - - // Compute the range to read (limited by file size and overflow-checked). - if rw.off >= rw.file.size { - rw.file.mu.RUnlock() - return 0, io.EOF - } - end := rw.file.size - if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end { - end = rend - } - - var done uint64 - seg, gap := rw.file.data.Find(uint64(rw.off)) - for rw.off < end { - mr := memmap.MappableRange{uint64(rw.off), uint64(end)} - switch { - case seg.Ok(): - // Get internal mappings. - ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read) - if err != nil { - rw.file.mu.RUnlock() - return done, err - } - - // Copy from internal mappings. - n, err := safemem.CopySeq(dsts, ims) - done += n - rw.off += uint64(n) - dsts = dsts.DropFirst64(n) - if err != nil { - rw.file.mu.RUnlock() - return done, err - } - - // Continue. - seg, gap = seg.NextNonEmpty() - - case gap.Ok(): - // Tmpfs holes are zero-filled. - gapmr := gap.Range().Intersect(mr) - dst := dsts.TakeFirst64(gapmr.Length()) - n, err := safemem.ZeroSeq(dst) - done += n - rw.off += uint64(n) - dsts = dsts.DropFirst64(n) - if err != nil { - rw.file.mu.RUnlock() - return done, err - } - - // Continue. - seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{} - } - } - rw.file.mu.RUnlock() - return done, nil -} - -// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. -func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { - rw.file.mu.Lock() - - // Compute the range to write (overflow-checked). - end := rw.off + srcs.NumBytes() - if end <= rw.off { - end = math.MaxInt64 - } - - // Check if seals prevent either file growth or all writes. - switch { - case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed - rw.file.mu.Unlock() - return 0, syserror.EPERM - case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed - // When growth is sealed, Linux effectively allows writes which would - // normally grow the file to partially succeed up to the current EOF, - // rounded down to the page boundary before the EOF. - // - // This happens because writes (and thus the growth check) for tmpfs - // files proceed page-by-page on Linux, and the final write to the page - // containing EOF fails, resulting in a partial write up to the start of - // that page. - // - // To emulate this behaviour, artifically truncate the write to the - // start of the page containing the current EOF. - // - // See Linux, mm/filemap.c:generic_perform_write() and - // mm/shmem.c:shmem_write_begin(). - if pgstart := uint64(usermem.Addr(rw.file.size).RoundDown()); end > pgstart { - end = pgstart - } - if end <= rw.off { - // Truncation would result in no data being written. - rw.file.mu.Unlock() - return 0, syserror.EPERM - } - } - - // Page-aligned mr for when we need to allocate memory. RoundUp can't - // overflow since end is an int64. - pgstartaddr := usermem.Addr(rw.off).RoundDown() - pgendaddr, _ := usermem.Addr(end).RoundUp() - pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)} - - var ( - done uint64 - retErr error - ) - seg, gap := rw.file.data.Find(uint64(rw.off)) - for rw.off < end { - mr := memmap.MappableRange{uint64(rw.off), uint64(end)} - switch { - case seg.Ok(): - // Get internal mappings. - ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write) - if err != nil { - retErr = err - goto exitLoop - } - - // Copy to internal mappings. - n, err := safemem.CopySeq(ims, srcs) - done += n - rw.off += uint64(n) - srcs = srcs.DropFirst64(n) - if err != nil { - retErr = err - goto exitLoop - } - - // Continue. - seg, gap = seg.NextNonEmpty() - - case gap.Ok(): - // Allocate memory for the write. - gapMR := gap.Range().Intersect(pgMR) - fr, err := rw.file.memFile.Allocate(gapMR.Length(), usage.Tmpfs) - if err != nil { - retErr = err - goto exitLoop - } - - // Write to that memory as usual. - seg, gap = rw.file.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{} - } - } -exitLoop: - // If the write ends beyond the file's previous size, it causes the - // file to grow. - if rw.off > rw.file.size { - atomic.StoreUint64(&rw.file.size, rw.off) - } - - rw.file.mu.Unlock() - return done, retErr -} diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go deleted file mode 100644 index 3731c5b6f..000000000 --- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "bytes" - "fmt" - "io" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If -// the returned err is not nil, then cleanup should be called when the FD is no -// longer needed. -func newFileFD(ctx context.Context, filename string) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - - vfsObj := vfs.New() - vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) - if err != nil { - return nil, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err) - } - root := mntns.Root() - - // Create the file that will be write/read. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filename), - FollowFinalSymlink: true, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: 0644, - }) - if err != nil { - root.DecRef() - mntns.DecRef(vfsObj) - return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err) - } - - return fd, func() { - root.DecRef() - mntns.DecRef(vfsObj) - }, nil -} - -// Test that we can write some data to a file and read it back.` -func TestSimpleWriteRead(t *testing.T) { - ctx := contexttest.Context(t) - fd, cleanup, err := newFileFD(ctx, "simpleReadWrite") - if err != nil { - t.Fatal(err) - } - defer cleanup() - - // Write. - data := []byte("foobarbaz") - n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) - if err != nil { - t.Fatalf("fd.Write failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.Write got short write length %d, want %d", n, len(data)) - } - if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want { - t.Errorf("fd.Write left offset at %d, want %d", got, want) - } - - // Seek back to beginning. - if _, err := fd.Seek(ctx, 0, linux.SEEK_SET); err != nil { - t.Fatalf("fd.Seek failed: %v", err) - } - if got, want := fd.Impl().(*regularFileFD).off, int64(0); got != want { - t.Errorf("fd.Seek(0) left offset at %d, want %d", got, want) - } - - // Read. - buf := make([]byte, len(data)) - n, err = fd.Read(ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) - if err != nil && err != io.EOF { - t.Fatalf("fd.Read failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.Read got short read length %d, want %d", n, len(data)) - } - if got, want := string(buf), string(data); got != want { - t.Errorf("Read got %q want %s", got, want) - } - if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want { - t.Errorf("fd.Write left offset at %d, want %d", got, want) - } -} - -func TestPWrite(t *testing.T) { - ctx := contexttest.Context(t) - fd, cleanup, err := newFileFD(ctx, "PRead") - if err != nil { - t.Fatal(err) - } - defer cleanup() - - // Fill file with 1k 'a's. - data := bytes.Repeat([]byte{'a'}, 1000) - n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) - if err != nil { - t.Fatalf("fd.Write failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.Write got short write length %d, want %d", n, len(data)) - } - - // Write "gVisor is awesome" at various offsets. - buf := []byte("gVisor is awesome") - offsets := []int{0, 1, 2, 10, 20, 50, 100, len(data) - 100, len(data) - 1, len(data), len(data) + 1} - for _, offset := range offsets { - name := fmt.Sprintf("PWrite offset=%d", offset) - t.Run(name, func(t *testing.T) { - n, err := fd.PWrite(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.WriteOptions{}) - if err != nil { - t.Errorf("fd.PWrite got err %v want nil", err) - } - if n != int64(len(buf)) { - t.Errorf("fd.PWrite got %d bytes want %d", n, len(buf)) - } - - // Update data to reflect expected file contents. - if len(data) < offset+len(buf) { - data = append(data, make([]byte, (offset+len(buf))-len(data))...) - } - copy(data[offset:], buf) - - // Read the whole file and compare with data. - readBuf := make([]byte, len(data)) - n, err = fd.PRead(ctx, usermem.BytesIOSequence(readBuf), 0, vfs.ReadOptions{}) - if err != nil { - t.Fatalf("fd.PRead failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.PRead got short read length %d, want %d", n, len(data)) - } - if got, want := string(readBuf), string(data); got != want { - t.Errorf("PRead got %q want %s", got, want) - } - - }) - } -} - -func TestPRead(t *testing.T) { - ctx := contexttest.Context(t) - fd, cleanup, err := newFileFD(ctx, "PRead") - if err != nil { - t.Fatal(err) - } - defer cleanup() - - // Write 100 sequences of 'gVisor is awesome'. - data := bytes.Repeat([]byte("gVisor is awsome"), 100) - n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{}) - if err != nil { - t.Fatalf("fd.Write failed: %v", err) - } - if n != int64(len(data)) { - t.Errorf("fd.Write got short write length %d, want %d", n, len(data)) - } - - // Read various sizes from various offsets. - sizes := []int{0, 1, 2, 10, 20, 50, 100, 1000} - offsets := []int{0, 1, 2, 10, 20, 50, 100, 1000, len(data) - 100, len(data) - 1, len(data), len(data) + 1} - - for _, size := range sizes { - for _, offset := range offsets { - name := fmt.Sprintf("PRead offset=%d size=%d", offset, size) - t.Run(name, func(t *testing.T) { - var ( - wantRead []byte - wantErr error - ) - if offset < len(data) { - wantRead = data[offset:] - } else if size > 0 { - wantErr = io.EOF - } - if offset+size < len(data) { - wantRead = wantRead[:size] - } - buf := make([]byte, size) - n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.ReadOptions{}) - if err != wantErr { - t.Errorf("fd.PRead got err %v want %v", err, wantErr) - } - if n != int64(len(wantRead)) { - t.Errorf("fd.PRead got %d bytes want %d", n, len(wantRead)) - } - if got := string(buf[:n]); got != string(wantRead) { - t.Errorf("fd.PRead got %q want %q", got, string(wantRead)) - } - }) - } - } -} diff --git a/pkg/sentry/fsimpl/tmpfs/symlink.go b/pkg/sentry/fsimpl/tmpfs/symlink.go deleted file mode 100644 index 5246aca84..000000000 --- a/pkg/sentry/fsimpl/tmpfs/symlink.go +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tmpfs - -import ( - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" -) - -type symlink struct { - inode inode - target string // immutable -} - -func (fs *filesystem) newSymlink(creds *auth.Credentials, target string) *inode { - link := &symlink{ - target: target, - } - link.inode.init(link, fs, creds, 0777) - link.inode.nlink = 1 // from parent directory - return &link.inode -} - -// O_PATH is unimplemented, so there's no way to get a FileDescription -// representing a symlink yet. diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go deleted file mode 100644 index 7be6faa5b..000000000 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package tmpfs provides a filesystem implementation that behaves like tmpfs: -// the Dentry tree is the sole source of truth for the state of the filesystem. -// -// Lock order: -// -// filesystem.mu -// regularFileFD.offMu -// regularFile.mu -// inode.mu -package tmpfs - -import ( - "fmt" - "math" - "sync" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/pgalloc" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// FilesystemType implements vfs.FilesystemType. -type FilesystemType struct{} - -// filesystem implements vfs.FilesystemImpl. -type filesystem struct { - vfsfs vfs.Filesystem - - // memFile is used to allocate pages to for regular files. - memFile *pgalloc.MemoryFile - - // mu serializes changes to the Dentry tree. - mu sync.RWMutex - - nextInoMinusOne uint64 // accessed using atomic memory operations -} - -// GetFilesystem implements vfs.FilesystemType.GetFilesystem. -func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - memFileProvider := pgalloc.MemoryFileProviderFromContext(ctx) - if memFileProvider == nil { - panic("MemoryFileProviderFromContext returned nil") - } - fs := filesystem{ - memFile: memFileProvider.MemoryFile(), - } - fs.vfsfs.Init(vfsObj, &fs) - root := fs.newDentry(fs.newDirectory(creds, 01777)) - return &fs.vfsfs, &root.vfsd, nil -} - -// Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release() { -} - -// dentry implements vfs.DentryImpl. -type dentry struct { - vfsd vfs.Dentry - - // inode is the inode represented by this dentry. Multiple Dentries may - // share a single non-directory inode (with hard links). inode is - // immutable. - inode *inode - - // tmpfs doesn't count references on dentries; because the dentry tree is - // the sole source of truth, it is by definition always consistent with the - // state of the filesystem. However, it does count references on inodes, - // because inode resources are released when all references are dropped. - // (tmpfs doesn't really have resources to release, but we implement - // reference counting because tmpfs regular files will.) - - // dentryEntry (ugh) links dentries into their parent directory.childList. - dentryEntry -} - -func (fs *filesystem) newDentry(inode *inode) *dentry { - d := &dentry{ - inode: inode, - } - d.vfsd.Init(d) - return d -} - -// IncRef implements vfs.DentryImpl.IncRef. -func (d *dentry) IncRef() { - d.inode.incRef() -} - -// TryIncRef implements vfs.DentryImpl.TryIncRef. -func (d *dentry) TryIncRef() bool { - return d.inode.tryIncRef() -} - -// DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef() { - d.inode.decRef() -} - -// inode represents a filesystem object. -type inode struct { - // refs is a reference count. refs is accessed using atomic memory - // operations. - // - // A reference is held on all inodes that are reachable in the filesystem - // tree. For non-directories (which may have multiple hard links), this - // means that a reference is dropped when nlink reaches 0. For directories, - // nlink never reaches 0 due to the "." entry; instead, - // filesystem.RmdirAt() drops the reference. - refs int64 - - // Inode metadata; protected by mu and accessed using atomic memory - // operations unless otherwise specified. - mu sync.RWMutex - mode uint32 // excluding file type bits, which are based on impl - nlink uint32 // protected by filesystem.mu instead of inode.mu - uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic - gid uint32 // auth.KGID, but ... - ino uint64 // immutable - - impl interface{} // immutable -} - -const maxLinks = math.MaxUint32 - -func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) { - i.refs = 1 - i.mode = uint32(mode) - i.uid = uint32(creds.EffectiveKUID) - i.gid = uint32(creds.EffectiveKGID) - i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1) - // i.nlink initialized by caller - i.impl = impl -} - -// incLinksLocked increments i's link count. -// -// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. -// i.nlink < maxLinks. -func (i *inode) incLinksLocked() { - if i.nlink == 0 { - panic("tmpfs.inode.incLinksLocked() called with no existing links") - } - if i.nlink == maxLinks { - panic("memfs.inode.incLinksLocked() called with maximum link count") - } - atomic.AddUint32(&i.nlink, 1) -} - -// decLinksLocked decrements i's link count. -// -// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. -func (i *inode) decLinksLocked() { - if i.nlink == 0 { - panic("tmpfs.inode.decLinksLocked() called with no existing links") - } - atomic.AddUint32(&i.nlink, ^uint32(0)) -} - -func (i *inode) incRef() { - if atomic.AddInt64(&i.refs, 1) <= 1 { - panic("tmpfs.inode.incRef() called without holding a reference") - } -} - -func (i *inode) tryIncRef() bool { - for { - refs := atomic.LoadInt64(&i.refs) - if refs == 0 { - return false - } - if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) { - return true - } - } -} - -func (i *inode) decRef() { - if refs := atomic.AddInt64(&i.refs, -1); refs == 0 { - // This is unnecessary; it's mostly to simulate what tmpfs would do. - if regFile, ok := i.impl.(*regularFile); ok { - regFile.mu.Lock() - regFile.data.DropAll(regFile.memFile) - atomic.StoreUint64(®File.size, 0) - regFile.mu.Unlock() - } - } else if refs < 0 { - panic("tmpfs.inode.decRef() called without holding a reference") - } -} - -func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error { - return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))) -} - -// Go won't inline this function, and returning linux.Statx (which is quite -// big) means spending a lot of time in runtime.duffcopy(), so instead it's an -// output parameter. -func (i *inode) statTo(stat *linux.Statx) { - stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO - stat.Blksize = 1 // usermem.PageSize in tmpfs - stat.Nlink = atomic.LoadUint32(&i.nlink) - stat.UID = atomic.LoadUint32(&i.uid) - stat.GID = atomic.LoadUint32(&i.gid) - stat.Mode = uint16(atomic.LoadUint32(&i.mode)) - stat.Ino = i.ino - // TODO: device number - switch impl := i.impl.(type) { - case *regularFile: - stat.Mode |= linux.S_IFREG - stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS - stat.Size = uint64(atomic.LoadUint64(&impl.size)) - // In tmpfs, this will be FileRangeSet.Span() / 512 (but also cached in - // a uint64 accessed using atomic memory operations to avoid taking - // locks). - stat.Blocks = allocatedBlocksForSize(stat.Size) - case *directory: - stat.Mode |= linux.S_IFDIR - case *symlink: - stat.Mode |= linux.S_IFLNK - stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS - stat.Size = uint64(len(impl.target)) - stat.Blocks = allocatedBlocksForSize(stat.Size) - case *namedPipe: - stat.Mode |= linux.S_IFIFO - default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) - } -} - -// allocatedBlocksForSize returns the number of 512B blocks needed to -// accommodate the given size in bytes, as appropriate for struct -// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block -// size is independent of the "preferred block size for I/O", struct -// stat::st_blksize and struct statx::stx_blksize.) -func allocatedBlocksForSize(size uint64) uint64 { - return (size + 511) / 512 -} - -func (i *inode) direntType() uint8 { - switch i.impl.(type) { - case *regularFile: - return linux.DT_REG - case *directory: - return linux.DT_DIR - case *symlink: - return linux.DT_LNK - default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) - } -} - -// fileDescription is embedded by tmpfs implementations of -// vfs.FileDescriptionImpl. -type fileDescription struct { - vfsfd vfs.FileDescription - vfs.FileDescriptionDefaultImpl -} - -func (fd *fileDescription) filesystem() *filesystem { - return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) -} - -func (fd *fileDescription) inode() *inode { - return fd.vfsfd.Dentry().Impl().(*dentry).inode -} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - var stat linux.Statx - fd.inode().statTo(&stat) - return stat, nil -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - if opts.Stat.Mask == 0 { - return nil - } - // TODO: implement inode.setStat - return syserror.EPERM -} |