summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/tmpfs
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/tmpfs')
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD95
-rw-r--r--pkg/sentry/fsimpl/tmpfs/benchmark_test.go487
-rw-r--r--pkg/sentry/fsimpl/tmpfs/directory.go187
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go696
-rw-r--r--pkg/sentry/fsimpl/tmpfs/named_pipe.go60
-rw-r--r--pkg/sentry/fsimpl/tmpfs/pipe_test.go235
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go392
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file_test.go436
-rw-r--r--pkg/sentry/fsimpl/tmpfs/stat_test.go232
-rw-r--r--pkg/sentry/fsimpl/tmpfs/symlink.go36
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go393
11 files changed, 0 insertions, 3249 deletions
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
deleted file mode 100644
index 7601c7c04..000000000
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ /dev/null
@@ -1,95 +0,0 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
-load("//tools/go_generics:defs.bzl", "go_template_instance")
-
-package(licenses = ["notice"])
-
-go_template_instance(
- name = "dentry_list",
- out = "dentry_list.go",
- package = "tmpfs",
- prefix = "dentry",
- template = "//pkg/ilist:generic_list",
- types = {
- "Element": "*dentry",
- "Linker": "*dentry",
- },
-)
-
-go_library(
- name = "tmpfs",
- srcs = [
- "dentry_list.go",
- "directory.go",
- "filesystem.go",
- "named_pipe.go",
- "regular_file.go",
- "symlink.go",
- "tmpfs.go",
- ],
- importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs",
- deps = [
- "//pkg/abi/linux",
- "//pkg/amutex",
- "//pkg/fspath",
- "//pkg/log",
- "//pkg/sentry/arch",
- "//pkg/sentry/context",
- "//pkg/sentry/fs",
- "//pkg/sentry/fs/fsutil",
- "//pkg/sentry/kernel",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/pipe",
- "//pkg/sentry/kernel/time",
- "//pkg/sentry/memmap",
- "//pkg/sentry/pgalloc",
- "//pkg/sentry/platform",
- "//pkg/sentry/safemem",
- "//pkg/sentry/usage",
- "//pkg/sentry/usermem",
- "//pkg/sentry/vfs",
- "//pkg/sync",
- "//pkg/syserror",
- ],
-)
-
-go_test(
- name = "benchmark_test",
- size = "small",
- srcs = ["benchmark_test.go"],
- deps = [
- ":tmpfs",
- "//pkg/abi/linux",
- "//pkg/fspath",
- "//pkg/refs",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
- "//pkg/sentry/fs",
- "//pkg/sentry/fs/tmpfs",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/vfs",
- "//pkg/syserror",
- ],
-)
-
-go_test(
- name = "tmpfs_test",
- size = "small",
- srcs = [
- "pipe_test.go",
- "regular_file_test.go",
- "stat_test.go",
- ],
- embed = [":tmpfs"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/fspath",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/contexttest",
- "//pkg/sentry/usermem",
- "//pkg/sentry/vfs",
- "//pkg/syserror",
- ],
-)
diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
deleted file mode 100644
index d88c83499..000000000
--- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
+++ /dev/null
@@ -1,487 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package benchmark_test
-
-import (
- "fmt"
- "runtime"
- "strings"
- "testing"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/refs"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// Differences from stat_benchmark:
-//
-// - Syscall interception, CopyInPath, copyOutStat, and overlayfs overheads are
-// not included.
-//
-// - *MountStat benchmarks use a tmpfs root mount and a tmpfs submount at /tmp.
-// Non-MountStat benchmarks use a tmpfs root mount and no submounts.
-// stat_benchmark uses a varying root mount, a tmpfs submount at /tmp, and a
-// subdirectory /tmp/<top_dir> (assuming TEST_TMPDIR == "/tmp"). Thus
-// stat_benchmark at depth 1 does a comparable amount of work to *MountStat
-// benchmarks at depth 2, and non-MountStat benchmarks at depth 3.
-var depths = []int{1, 2, 3, 8, 64, 100}
-
-const (
- mountPointName = "tmp"
- filename = "gvisor_test_temp_0_1557494568"
-)
-
-// This is copied from syscalls/linux/sys_file.go, with the dependency on
-// kernel.Task stripped out.
-func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error {
- var (
- d *fs.Dirent // The file.
- rel *fs.Dirent // The relative directory for search (if required.)
- err error
- )
-
- // Extract the working directory (maybe).
- if len(path) > 0 && path[0] == '/' {
- // Absolute path; rel can be nil.
- } else if dirFD == linux.AT_FDCWD {
- // Need to reference the working directory.
- rel = wd
- } else {
- // Need to extract the given FD.
- return syserror.EBADF
- }
-
- // Lookup the node.
- remainingTraversals := uint(linux.MaxSymlinkTraversals)
- if resolve {
- d, err = mntns.FindInode(ctx, root, rel, path, &remainingTraversals)
- } else {
- d, err = mntns.FindLink(ctx, root, rel, path, &remainingTraversals)
- }
- if err != nil {
- return err
- }
-
- err = fn(root, d)
- d.DecRef()
- return err
-}
-
-func BenchmarkVFS1TmpfsStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- ctx := contexttest.Context(b)
-
- // Create VFS.
- tmpfsFS, ok := fs.FindFilesystem("tmpfs")
- if !ok {
- b.Fatalf("failed to find tmpfs filesystem type")
- }
- rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
- if err != nil {
- b.Fatalf("failed to create tmpfs root mount: %v", err)
- }
- mntns, err := fs.NewMountNamespace(ctx, rootInode)
- if err != nil {
- b.Fatalf("failed to create mount namespace: %v", err)
- }
- defer mntns.DecRef()
-
- var filePathBuilder strings.Builder
- filePathBuilder.WriteByte('/')
-
- // Create nested directories with given depth.
- root := mntns.Root()
- defer root.DecRef()
- d := root
- d.IncRef()
- defer d.DecRef()
- for i := depth; i > 0; i-- {
- name := fmt.Sprintf("%d", i)
- if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil {
- b.Fatalf("failed to create directory %q: %v", name, err)
- }
- next, err := d.Walk(ctx, root, name)
- if err != nil {
- b.Fatalf("failed to walk to directory %q: %v", name, err)
- }
- d.DecRef()
- d = next
- filePathBuilder.WriteString(name)
- filePathBuilder.WriteByte('/')
- }
-
- // Create the file that will be stat'd.
- file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644))
- if err != nil {
- b.Fatalf("failed to create file %q: %v", filename, err)
- }
- file.DecRef()
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- dirPath := false
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
- if dirPath && !fs.IsDir(d.Inode.StableAttr) {
- return syserror.ENOTDIR
- }
- uattr, err := d.Inode.UnstableAttr(ctx)
- if err != nil {
- return err
- }
- // Sanity check.
- if uattr.Perms.User.Execute {
- b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode())
- }
- return nil
- })
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- }
- // Don't include deferred cleanup in benchmark time.
- b.StopTimer()
- })
- }
-}
-
-func BenchmarkVFS2MemfsStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- ctx := contexttest.Context(b)
- creds := auth.CredentialsFromContext(ctx)
-
- // Create VFS.
- vfsObj := vfs.New()
- vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- })
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
- if err != nil {
- b.Fatalf("failed to create tmpfs root mount: %v", err)
- }
- defer mntns.DecRef(vfsObj)
-
- var filePathBuilder strings.Builder
- filePathBuilder.WriteByte('/')
-
- // Create nested directories with given depth.
- root := mntns.Root()
- defer root.DecRef()
- vd := root
- vd.IncRef()
- for i := depth; i > 0; i-- {
- name := fmt.Sprintf("%d", i)
- pop := vfs.PathOperation{
- Root: root,
- Start: vd,
- Path: fspath.Parse(name),
- }
- if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
- Mode: 0755,
- }); err != nil {
- b.Fatalf("failed to create directory %q: %v", name, err)
- }
- nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- b.Fatalf("failed to walk to directory %q: %v", name, err)
- }
- vd.DecRef()
- vd = nextVD
- filePathBuilder.WriteString(name)
- filePathBuilder.WriteByte('/')
- }
-
- // Create the file that will be stat'd.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: vd,
- Path: fspath.Parse(filename),
- FollowFinalSymlink: true,
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
- Mode: 0644,
- })
- vd.DecRef()
- vd = vfs.VirtualDentry{}
- if err != nil {
- b.Fatalf("failed to create file %q: %v", filename, err)
- }
- defer fd.DecRef()
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(filePath),
- FollowFinalSymlink: true,
- }, &vfs.StatOptions{})
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- // Sanity check.
- if stat.Mode&^linux.S_IFMT != 0644 {
- b.Fatalf("got wrong permissions (%0o)", stat.Mode)
- }
- }
- // Don't include deferred cleanup in benchmark time.
- b.StopTimer()
- })
- }
-}
-
-func BenchmarkVFS1TmpfsMountStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- ctx := contexttest.Context(b)
-
- // Create VFS.
- tmpfsFS, ok := fs.FindFilesystem("tmpfs")
- if !ok {
- b.Fatalf("failed to find tmpfs filesystem type")
- }
- rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
- if err != nil {
- b.Fatalf("failed to create tmpfs root mount: %v", err)
- }
- mntns, err := fs.NewMountNamespace(ctx, rootInode)
- if err != nil {
- b.Fatalf("failed to create mount namespace: %v", err)
- }
- defer mntns.DecRef()
-
- var filePathBuilder strings.Builder
- filePathBuilder.WriteByte('/')
-
- // Create and mount the submount.
- root := mntns.Root()
- defer root.DecRef()
- if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil {
- b.Fatalf("failed to create mount point: %v", err)
- }
- mountPoint, err := root.Walk(ctx, root, mountPointName)
- if err != nil {
- b.Fatalf("failed to walk to mount point: %v", err)
- }
- defer mountPoint.DecRef()
- submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
- if err != nil {
- b.Fatalf("failed to create tmpfs submount: %v", err)
- }
- if err := mntns.Mount(ctx, mountPoint, submountInode); err != nil {
- b.Fatalf("failed to mount tmpfs submount: %v", err)
- }
- filePathBuilder.WriteString(mountPointName)
- filePathBuilder.WriteByte('/')
-
- // Create nested directories with given depth.
- d, err := root.Walk(ctx, root, mountPointName)
- if err != nil {
- b.Fatalf("failed to walk to mount root: %v", err)
- }
- defer d.DecRef()
- for i := depth; i > 0; i-- {
- name := fmt.Sprintf("%d", i)
- if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil {
- b.Fatalf("failed to create directory %q: %v", name, err)
- }
- next, err := d.Walk(ctx, root, name)
- if err != nil {
- b.Fatalf("failed to walk to directory %q: %v", name, err)
- }
- d.DecRef()
- d = next
- filePathBuilder.WriteString(name)
- filePathBuilder.WriteByte('/')
- }
-
- // Create the file that will be stat'd.
- file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644))
- if err != nil {
- b.Fatalf("failed to create file %q: %v", filename, err)
- }
- file.DecRef()
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- dirPath := false
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
- if dirPath && !fs.IsDir(d.Inode.StableAttr) {
- return syserror.ENOTDIR
- }
- uattr, err := d.Inode.UnstableAttr(ctx)
- if err != nil {
- return err
- }
- // Sanity check.
- if uattr.Perms.User.Execute {
- b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode())
- }
- return nil
- })
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- }
- // Don't include deferred cleanup in benchmark time.
- b.StopTimer()
- })
- }
-}
-
-func BenchmarkVFS2MemfsMountStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- ctx := contexttest.Context(b)
- creds := auth.CredentialsFromContext(ctx)
-
- // Create VFS.
- vfsObj := vfs.New()
- vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- })
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
- if err != nil {
- b.Fatalf("failed to create tmpfs root mount: %v", err)
- }
- defer mntns.DecRef(vfsObj)
-
- var filePathBuilder strings.Builder
- filePathBuilder.WriteByte('/')
-
- // Create the mount point.
- root := mntns.Root()
- defer root.DecRef()
- pop := vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(mountPointName),
- }
- if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
- Mode: 0755,
- }); err != nil {
- b.Fatalf("failed to create mount point: %v", err)
- }
- // Save the mount point for later use.
- mountPoint, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- b.Fatalf("failed to walk to mount point: %v", err)
- }
- defer mountPoint.DecRef()
- // Create and mount the submount.
- if err := vfsObj.MountAt(ctx, creds, "", &pop, "tmpfs", &vfs.MountOptions{}); err != nil {
- b.Fatalf("failed to mount tmpfs submount: %v", err)
- }
- filePathBuilder.WriteString(mountPointName)
- filePathBuilder.WriteByte('/')
-
- // Create nested directories with given depth.
- vd, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- b.Fatalf("failed to walk to mount root: %v", err)
- }
- for i := depth; i > 0; i-- {
- name := fmt.Sprintf("%d", i)
- pop := vfs.PathOperation{
- Root: root,
- Start: vd,
- Path: fspath.Parse(name),
- }
- if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
- Mode: 0755,
- }); err != nil {
- b.Fatalf("failed to create directory %q: %v", name, err)
- }
- nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- b.Fatalf("failed to walk to directory %q: %v", name, err)
- }
- vd.DecRef()
- vd = nextVD
- filePathBuilder.WriteString(name)
- filePathBuilder.WriteByte('/')
- }
-
- // Verify that we didn't create any directories under the mount
- // point (i.e. they were all created on the submount).
- firstDirName := fmt.Sprintf("%d", depth)
- if child := mountPoint.Dentry().Child(firstDirName); child != nil {
- b.Fatalf("created directory %q under root mount, not submount", firstDirName)
- }
-
- // Create the file that will be stat'd.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: vd,
- Path: fspath.Parse(filename),
- FollowFinalSymlink: true,
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
- Mode: 0644,
- })
- vd.DecRef()
- if err != nil {
- b.Fatalf("failed to create file %q: %v", filename, err)
- }
- fd.DecRef()
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(filePath),
- FollowFinalSymlink: true,
- }, &vfs.StatOptions{})
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- // Sanity check.
- if stat.Mode&^linux.S_IFMT != 0644 {
- b.Fatalf("got wrong permissions (%0o)", stat.Mode)
- }
- }
- // Don't include deferred cleanup in benchmark time.
- b.StopTimer()
- })
- }
-}
-
-func init() {
- // Turn off reference leak checking for a fair comparison between vfs1 and
- // vfs2.
- refs.SetLeakMode(refs.NoLeakChecking)
-}
diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go
deleted file mode 100644
index 887ca2619..000000000
--- a/pkg/sentry/fsimpl/tmpfs/directory.go
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tmpfs
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-type directory struct {
- inode inode
-
- // childList is a list containing (1) child Dentries and (2) fake Dentries
- // (with inode == nil) that represent the iteration position of
- // directoryFDs. childList is used to support directoryFD.IterDirents()
- // efficiently. childList is protected by filesystem.mu.
- childList dentryList
-}
-
-func (fs *filesystem) newDirectory(creds *auth.Credentials, mode linux.FileMode) *inode {
- dir := &directory{}
- dir.inode.init(dir, fs, creds, mode)
- dir.inode.nlink = 2 // from "." and parent directory or ".." for root
- return &dir.inode
-}
-
-func (i *inode) isDir() bool {
- _, ok := i.impl.(*directory)
- return ok
-}
-
-type directoryFD struct {
- fileDescription
- vfs.DirectoryFileDescriptionDefaultImpl
-
- // Protected by filesystem.mu.
- iter *dentry
- off int64
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *directoryFD) Release() {
- if fd.iter != nil {
- fs := fd.filesystem()
- dir := fd.inode().impl.(*directory)
- fs.mu.Lock()
- dir.childList.Remove(fd.iter)
- fs.mu.Unlock()
- fd.iter = nil
- }
-}
-
-// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
-func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
- fs := fd.filesystem()
- vfsd := fd.vfsfd.VirtualDentry().Dentry()
-
- fs.mu.Lock()
- defer fs.mu.Unlock()
-
- if fd.off == 0 {
- if !cb.Handle(vfs.Dirent{
- Name: ".",
- Type: linux.DT_DIR,
- Ino: vfsd.Impl().(*dentry).inode.ino,
- NextOff: 1,
- }) {
- return nil
- }
- fd.off++
- }
- if fd.off == 1 {
- parentInode := vfsd.ParentOrSelf().Impl().(*dentry).inode
- if !cb.Handle(vfs.Dirent{
- Name: "..",
- Type: parentInode.direntType(),
- Ino: parentInode.ino,
- NextOff: 2,
- }) {
- return nil
- }
- fd.off++
- }
-
- dir := vfsd.Impl().(*dentry).inode.impl.(*directory)
- var child *dentry
- if fd.iter == nil {
- // Start iteration at the beginning of dir.
- child = dir.childList.Front()
- fd.iter = &dentry{}
- } else {
- // Continue iteration from where we left off.
- child = fd.iter.Next()
- dir.childList.Remove(fd.iter)
- }
- for child != nil {
- // Skip other directoryFD iterators.
- if child.inode != nil {
- if !cb.Handle(vfs.Dirent{
- Name: child.vfsd.Name(),
- Type: child.inode.direntType(),
- Ino: child.inode.ino,
- NextOff: fd.off + 1,
- }) {
- dir.childList.InsertBefore(child, fd.iter)
- return nil
- }
- fd.off++
- }
- child = child.Next()
- }
- dir.childList.PushBack(fd.iter)
- return nil
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- fs := fd.filesystem()
- fs.mu.Lock()
- defer fs.mu.Unlock()
-
- switch whence {
- case linux.SEEK_SET:
- // Use offset as given.
- case linux.SEEK_CUR:
- offset += fd.off
- default:
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
-
- // If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't
- // seek even if doing so might reposition the iterator due to concurrent
- // mutation of the directory. Compare fs/libfs.c:dcache_dir_lseek().
- if fd.off == offset {
- return offset, nil
- }
-
- fd.off = offset
- // Compensate for "." and "..".
- remChildren := int64(0)
- if offset >= 2 {
- remChildren = offset - 2
- }
-
- dir := fd.inode().impl.(*directory)
-
- // Ensure that fd.iter exists and is not linked into dir.childList.
- if fd.iter == nil {
- fd.iter = &dentry{}
- } else {
- dir.childList.Remove(fd.iter)
- }
- // Insert fd.iter before the remChildren'th child, or at the end of the
- // list if remChildren >= number of children.
- child := dir.childList.Front()
- for child != nil {
- // Skip other directoryFD iterators.
- if child.inode != nil {
- if remChildren == 0 {
- dir.childList.InsertBefore(child, fd.iter)
- return offset, nil
- }
- remChildren--
- }
- child = child.Next()
- }
- dir.childList.PushBack(fd.iter)
- return offset, nil
-}
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
deleted file mode 100644
index 4cd7e9aea..000000000
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ /dev/null
@@ -1,696 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tmpfs
-
-import (
- "fmt"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// Sync implements vfs.FilesystemImpl.Sync.
-func (fs *filesystem) Sync(ctx context.Context) error {
- // All filesystem state is in-memory.
- return nil
-}
-
-// stepLocked resolves rp.Component() to an existing file, starting from the
-// given directory.
-//
-// stepLocked is loosely analogous to fs/namei.c:walk_component().
-//
-// Preconditions: filesystem.mu must be locked. !rp.Done().
-func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
- if !d.inode.isDir() {
- return nil, syserror.ENOTDIR
- }
- if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
- return nil, err
- }
-afterSymlink:
- nextVFSD, err := rp.ResolveComponent(&d.vfsd)
- if err != nil {
- return nil, err
- }
- if nextVFSD == nil {
- // Since the Dentry tree is the sole source of truth for tmpfs, if it's
- // not in the Dentry tree, it doesn't exist.
- return nil, syserror.ENOENT
- }
- next := nextVFSD.Impl().(*dentry)
- if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
- // TODO(gvisor.dev/issues/1197): Symlink traversals updates
- // access time.
- if err := rp.HandleSymlink(symlink.target); err != nil {
- return nil, err
- }
- goto afterSymlink // don't check the current directory again
- }
- rp.Advance()
- return next, nil
-}
-
-// walkParentDirLocked resolves all but the last path component of rp to an
-// existing directory, starting from the given directory (which is usually
-// rp.Start().Impl().(*dentry)). It does not check that the returned directory
-// is searchable by the provider of rp.
-//
-// walkParentDirLocked is loosely analogous to Linux's
-// fs/namei.c:path_parentat().
-//
-// Preconditions: filesystem.mu must be locked. !rp.Done().
-func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
- for !rp.Final() {
- next, err := stepLocked(rp, d)
- if err != nil {
- return nil, err
- }
- d = next
- }
- if !d.inode.isDir() {
- return nil, syserror.ENOTDIR
- }
- return d, nil
-}
-
-// resolveLocked resolves rp to an existing file.
-//
-// resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat().
-//
-// Preconditions: filesystem.mu must be locked.
-func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) {
- d := rp.Start().Impl().(*dentry)
- for !rp.Done() {
- next, err := stepLocked(rp, d)
- if err != nil {
- return nil, err
- }
- d = next
- }
- if rp.MustBeDir() && !d.inode.isDir() {
- return nil, syserror.ENOTDIR
- }
- return d, nil
-}
-
-// doCreateAt checks that creating a file at rp is permitted, then invokes
-// create to do so.
-//
-// doCreateAt is loosely analogous to a conjunction of Linux's
-// fs/namei.c:filename_create() and done_path_create().
-//
-// Preconditions: !rp.Done(). For the final path component in rp,
-// !rp.ShouldFollowSymlink().
-func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error {
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
- if err != nil {
- return err
- }
- if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
- return err
- }
- name := rp.Component()
- if name == "." || name == ".." {
- return syserror.EEXIST
- }
- // Call parent.vfsd.Child() instead of stepLocked() or rp.ResolveChild(),
- // because if the child exists we want to return EEXIST immediately instead
- // of attempting symlink/mount traversal.
- if parent.vfsd.Child(name) != nil {
- return syserror.EEXIST
- }
- if !dir && rp.MustBeDir() {
- return syserror.ENOENT
- }
- // In memfs, the only way to cause a dentry to be disowned is by removing
- // it from the filesystem, so this check is equivalent to checking if
- // parent has been removed.
- if parent.vfsd.IsDisowned() {
- return syserror.ENOENT
- }
- mnt := rp.Mount()
- if err := mnt.CheckBeginWrite(); err != nil {
- return err
- }
- defer mnt.EndWrite()
- return create(parent, name)
-}
-
-// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
-func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- d, err := resolveLocked(rp)
- if err != nil {
- return nil, err
- }
- if opts.CheckSearchable {
- if !d.inode.isDir() {
- return nil, syserror.ENOTDIR
- }
- if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true /* isDir */); err != nil {
- return nil, err
- }
- }
- d.IncRef()
- return &d.vfsd, nil
-}
-
-// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
-func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- d, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
- if err != nil {
- return nil, err
- }
- d.IncRef()
- return &d.vfsd, nil
-}
-
-// LinkAt implements vfs.FilesystemImpl.LinkAt.
-func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
- return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
- if rp.Mount() != vd.Mount() {
- return syserror.EXDEV
- }
- d := vd.Dentry().Impl().(*dentry)
- if d.inode.isDir() {
- return syserror.EPERM
- }
- if d.inode.nlink == 0 {
- return syserror.ENOENT
- }
- if d.inode.nlink == maxLinks {
- return syserror.EMLINK
- }
- d.inode.incLinksLocked()
- child := fs.newDentry(d.inode)
- parent.vfsd.InsertChild(&child.vfsd, name)
- parent.inode.impl.(*directory).childList.PushBack(child)
- return nil
- })
-}
-
-// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
-func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
- return fs.doCreateAt(rp, true /* dir */, func(parent *dentry, name string) error {
- if parent.inode.nlink == maxLinks {
- return syserror.EMLINK
- }
- parent.inode.incLinksLocked() // from child's ".."
- child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode))
- parent.vfsd.InsertChild(&child.vfsd, name)
- parent.inode.impl.(*directory).childList.PushBack(child)
- return nil
- })
-}
-
-// MknodAt implements vfs.FilesystemImpl.MknodAt.
-func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
- return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
- switch opts.Mode.FileType() {
- case 0, linux.S_IFREG:
- child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
- parent.vfsd.InsertChild(&child.vfsd, name)
- parent.inode.impl.(*directory).childList.PushBack(child)
- return nil
- case linux.S_IFIFO:
- child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode))
- parent.vfsd.InsertChild(&child.vfsd, name)
- parent.inode.impl.(*directory).childList.PushBack(child)
- return nil
- case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK:
- // Not yet supported.
- return syserror.EPERM
- default:
- return syserror.EINVAL
- }
- })
-}
-
-// OpenAt implements vfs.FilesystemImpl.OpenAt.
-func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- if opts.Flags&linux.O_TMPFILE != 0 {
- // Not yet supported.
- return nil, syserror.EOPNOTSUPP
- }
-
- // Handle O_CREAT and !O_CREAT separately, since in the latter case we
- // don't need fs.mu for writing.
- if opts.Flags&linux.O_CREAT == 0 {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- d, err := resolveLocked(rp)
- if err != nil {
- return nil, err
- }
- return d.open(ctx, rp, opts.Flags, false /* afterCreate */)
- }
-
- mustCreate := opts.Flags&linux.O_EXCL != 0
- start := rp.Start().Impl().(*dentry)
- fs.mu.Lock()
- defer fs.mu.Unlock()
- if rp.Done() {
- // Reject attempts to open directories with O_CREAT.
- if rp.MustBeDir() {
- return nil, syserror.EISDIR
- }
- if mustCreate {
- return nil, syserror.EEXIST
- }
- return start.open(ctx, rp, opts.Flags, false /* afterCreate */)
- }
-afterTrailingSymlink:
- parent, err := walkParentDirLocked(rp, start)
- if err != nil {
- return nil, err
- }
- // Check for search permission in the parent directory.
- if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
- return nil, err
- }
- // Reject attempts to open directories with O_CREAT.
- if rp.MustBeDir() {
- return nil, syserror.EISDIR
- }
- name := rp.Component()
- if name == "." || name == ".." {
- return nil, syserror.EISDIR
- }
- // Determine whether or not we need to create a file.
- child, err := stepLocked(rp, parent)
- if err == syserror.ENOENT {
- // Already checked for searchability above; now check for writability.
- if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
- return nil, err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return nil, err
- }
- defer rp.Mount().EndWrite()
- // Create and open the child.
- child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
- parent.vfsd.InsertChild(&child.vfsd, name)
- parent.inode.impl.(*directory).childList.PushBack(child)
- return child.open(ctx, rp, opts.Flags, true)
- }
- if err != nil {
- return nil, err
- }
- // Do we need to resolve a trailing symlink?
- if !rp.Done() {
- start = parent
- goto afterTrailingSymlink
- }
- // Open existing file.
- if mustCreate {
- return nil, syserror.EEXIST
- }
- return child.open(ctx, rp, opts.Flags, false)
-}
-
-func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) {
- ats := vfs.AccessTypesForOpenFlags(flags)
- if !afterCreate {
- if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil {
- return nil, err
- }
- }
- mnt := rp.Mount()
- switch impl := d.inode.impl.(type) {
- case *regularFile:
- var fd regularFileFD
- fd.readable = vfs.MayReadFileWithOpenFlags(flags)
- fd.writable = vfs.MayWriteFileWithOpenFlags(flags)
- if fd.writable {
- if err := mnt.CheckBeginWrite(); err != nil {
- return nil, err
- }
- // mnt.EndWrite() is called by regularFileFD.Release().
- }
- fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
- if flags&linux.O_TRUNC != 0 {
- impl.mu.Lock()
- impl.data.Truncate(0, impl.memFile)
- atomic.StoreUint64(&impl.size, 0)
- impl.mu.Unlock()
- }
- return &fd.vfsfd, nil
- case *directory:
- // Can't open directories writably.
- if ats&vfs.MayWrite != 0 {
- return nil, syserror.EISDIR
- }
- var fd directoryFD
- fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
- return &fd.vfsfd, nil
- case *symlink:
- // Can't open symlinks without O_PATH (which is unimplemented).
- return nil, syserror.ELOOP
- case *namedPipe:
- return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags)
- default:
- panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl))
- }
-}
-
-// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
-func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- d, err := resolveLocked(rp)
- if err != nil {
- return "", err
- }
- symlink, ok := d.inode.impl.(*symlink)
- if !ok {
- return "", syserror.EINVAL
- }
- return symlink.target, nil
-}
-
-// RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
- if opts.Flags != 0 {
- // TODO(b/145974740): Support renameat2 flags.
- return syserror.EINVAL
- }
-
- // Resolve newParent first to verify that it's on this Mount.
- fs.mu.Lock()
- defer fs.mu.Unlock()
- newParent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
- if err != nil {
- return err
- }
- newName := rp.Component()
- if newName == "." || newName == ".." {
- return syserror.EBUSY
- }
- mnt := rp.Mount()
- if mnt != oldParentVD.Mount() {
- return syserror.EXDEV
- }
- if err := mnt.CheckBeginWrite(); err != nil {
- return err
- }
- defer mnt.EndWrite()
-
- oldParent := oldParentVD.Dentry().Impl().(*dentry)
- if err := oldParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
- return err
- }
- // Call vfs.Dentry.Child() instead of stepLocked() or rp.ResolveChild(),
- // because if the existing child is a symlink or mount point then we want
- // to rename over it rather than follow it.
- renamedVFSD := oldParent.vfsd.Child(oldName)
- if renamedVFSD == nil {
- return syserror.ENOENT
- }
- renamed := renamedVFSD.Impl().(*dentry)
- if renamed.inode.isDir() {
- if renamed == newParent || renamedVFSD.IsAncestorOf(&newParent.vfsd) {
- return syserror.EINVAL
- }
- if oldParent != newParent {
- // Writability is needed to change renamed's "..".
- if err := renamed.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true /* isDir */); err != nil {
- return err
- }
- }
- } else {
- if opts.MustBeDir || rp.MustBeDir() {
- return syserror.ENOTDIR
- }
- }
-
- if err := newParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
- return err
- }
- replacedVFSD := newParent.vfsd.Child(newName)
- var replaced *dentry
- if replacedVFSD != nil {
- replaced = replacedVFSD.Impl().(*dentry)
- if replaced.inode.isDir() {
- if !renamed.inode.isDir() {
- return syserror.EISDIR
- }
- if replaced.vfsd.HasChildren() {
- return syserror.ENOTEMPTY
- }
- } else {
- if rp.MustBeDir() {
- return syserror.ENOTDIR
- }
- if renamed.inode.isDir() {
- return syserror.ENOTDIR
- }
- }
- } else {
- if renamed.inode.isDir() && newParent.inode.nlink == maxLinks {
- return syserror.EMLINK
- }
- }
- if newParent.vfsd.IsDisowned() {
- return syserror.ENOENT
- }
-
- // Linux places this check before some of those above; we do it here for
- // simplicity, under the assumption that applications are not intentionally
- // doing noop renames expecting them to succeed where non-noop renames
- // would fail.
- if renamedVFSD == replacedVFSD {
- return nil
- }
- vfsObj := rp.VirtualFilesystem()
- oldParentDir := oldParent.inode.impl.(*directory)
- newParentDir := newParent.inode.impl.(*directory)
- if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil {
- return err
- }
- if replaced != nil {
- newParentDir.childList.Remove(replaced)
- if replaced.inode.isDir() {
- newParent.inode.decLinksLocked() // from replaced's ".."
- }
- replaced.inode.decLinksLocked()
- }
- oldParentDir.childList.Remove(renamed)
- newParentDir.childList.PushBack(renamed)
- if renamed.inode.isDir() {
- oldParent.inode.decLinksLocked()
- newParent.inode.incLinksLocked()
- }
- // TODO(gvisor.dev/issues/1197): Update timestamps and parent directory
- // sizes.
- vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD)
- return nil
-}
-
-// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
-func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
- if err != nil {
- return err
- }
- if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
- return err
- }
- name := rp.Component()
- if name == "." {
- return syserror.EINVAL
- }
- if name == ".." {
- return syserror.ENOTEMPTY
- }
- childVFSD := parent.vfsd.Child(name)
- if childVFSD == nil {
- return syserror.ENOENT
- }
- child := childVFSD.Impl().(*dentry)
- if !child.inode.isDir() {
- return syserror.ENOTDIR
- }
- if childVFSD.HasChildren() {
- return syserror.ENOTEMPTY
- }
- mnt := rp.Mount()
- if err := mnt.CheckBeginWrite(); err != nil {
- return err
- }
- defer mnt.EndWrite()
- vfsObj := rp.VirtualFilesystem()
- if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil {
- return err
- }
- parent.inode.impl.(*directory).childList.Remove(child)
- parent.inode.decLinksLocked() // from child's ".."
- child.inode.decLinksLocked()
- vfsObj.CommitDeleteDentry(childVFSD)
- return nil
-}
-
-// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
-func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- d, err := resolveLocked(rp)
- if err != nil {
- return err
- }
- return d.inode.setStat(opts.Stat)
-}
-
-// StatAt implements vfs.FilesystemImpl.StatAt.
-func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- d, err := resolveLocked(rp)
- if err != nil {
- return linux.Statx{}, err
- }
- var stat linux.Statx
- d.inode.statTo(&stat)
- return stat, nil
-}
-
-// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
-func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- _, err := resolveLocked(rp)
- if err != nil {
- return linux.Statfs{}, err
- }
- // TODO(gvisor.dev/issues/1197): Actually implement statfs.
- return linux.Statfs{}, syserror.ENOSYS
-}
-
-// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
-func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
- return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
- child := fs.newDentry(fs.newSymlink(rp.Credentials(), target))
- parent.vfsd.InsertChild(&child.vfsd, name)
- parent.inode.impl.(*directory).childList.PushBack(child)
- return nil
- })
-}
-
-// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
-func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
- if err != nil {
- return err
- }
- if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
- return err
- }
- name := rp.Component()
- if name == "." || name == ".." {
- return syserror.EISDIR
- }
- childVFSD := parent.vfsd.Child(name)
- if childVFSD == nil {
- return syserror.ENOENT
- }
- child := childVFSD.Impl().(*dentry)
- if child.inode.isDir() {
- return syserror.EISDIR
- }
- if !rp.MustBeDir() {
- return syserror.ENOTDIR
- }
- mnt := rp.Mount()
- if err := mnt.CheckBeginWrite(); err != nil {
- return err
- }
- defer mnt.EndWrite()
- vfsObj := rp.VirtualFilesystem()
- if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil {
- return err
- }
- parent.inode.impl.(*directory).childList.Remove(child)
- child.inode.decLinksLocked()
- vfsObj.CommitDeleteDentry(childVFSD)
- return nil
-}
-
-// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
-func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- _, err := resolveLocked(rp)
- if err != nil {
- return nil, err
- }
- // TODO(b/127675828): support extended attributes
- return nil, syserror.ENOTSUP
-}
-
-// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
-func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- _, err := resolveLocked(rp)
- if err != nil {
- return "", err
- }
- // TODO(b/127675828): support extended attributes
- return "", syserror.ENOTSUP
-}
-
-// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
-func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- _, err := resolveLocked(rp)
- if err != nil {
- return err
- }
- // TODO(b/127675828): support extended attributes
- return syserror.ENOTSUP
-}
-
-// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
-func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- _, err := resolveLocked(rp)
- if err != nil {
- return err
- }
- // TODO(b/127675828): support extended attributes
- return syserror.ENOTSUP
-}
-
-// PrependPath implements vfs.FilesystemImpl.PrependPath.
-func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- return vfs.GenericPrependPath(vfsroot, vd, b)
-}
diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go
deleted file mode 100644
index 40bde54de..000000000
--- a/pkg/sentry/fsimpl/tmpfs/named_pipe.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tmpfs
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-type namedPipe struct {
- inode inode
-
- pipe *pipe.VFSPipe
-}
-
-// Preconditions:
-// * fs.mu must be locked.
-// * rp.Mount().CheckBeginWrite() has been called successfully.
-func (fs *filesystem) newNamedPipe(creds *auth.Credentials, mode linux.FileMode) *inode {
- file := &namedPipe{pipe: pipe.NewVFSPipe(pipe.DefaultPipeSize, usermem.PageSize)}
- file.inode.init(file, fs, creds, mode)
- file.inode.nlink = 1 // Only the parent has a link.
- return &file.inode
-}
-
-// namedPipeFD implements vfs.FileDescriptionImpl. Methods are implemented
-// entirely via struct embedding.
-type namedPipeFD struct {
- fileDescription
-
- *pipe.VFSPipeFD
-}
-
-func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
- var err error
- var fd namedPipeFD
- fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, rp, vfsd, &fd.vfsfd, flags)
- if err != nil {
- return nil, err
- }
- mnt := rp.Mount()
- fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{})
- return &fd.vfsfd, nil
-}
diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
deleted file mode 100644
index 70b42a6ec..000000000
--- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tmpfs
-
-import (
- "bytes"
- "testing"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-const fileName = "mypipe"
-
-func TestSeparateFDs(t *testing.T) {
- ctx, creds, vfsObj, root := setup(t)
- defer root.DecRef()
-
- // Open the read side. This is done in a concurrently because opening
- // One end the pipe blocks until the other end is opened.
- pop := vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(fileName),
- FollowFinalSymlink: true,
- }
- rfdchan := make(chan *vfs.FileDescription)
- go func() {
- openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY}
- rfd, _ := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
- rfdchan <- rfd
- }()
-
- // Open the write side.
- openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY}
- wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
- if err != nil {
- t.Fatalf("failed to open pipe for writing %q: %v", fileName, err)
- }
- defer wfd.DecRef()
-
- rfd, ok := <-rfdchan
- if !ok {
- t.Fatalf("failed to open pipe for reading %q", fileName)
- }
- defer rfd.DecRef()
-
- const msg = "vamos azul"
- checkEmpty(ctx, t, rfd)
- checkWrite(ctx, t, wfd, msg)
- checkRead(ctx, t, rfd, msg)
-}
-
-func TestNonblockingRead(t *testing.T) {
- ctx, creds, vfsObj, root := setup(t)
- defer root.DecRef()
-
- // Open the read side as nonblocking.
- pop := vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(fileName),
- FollowFinalSymlink: true,
- }
- openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK}
- rfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
- if err != nil {
- t.Fatalf("failed to open pipe for reading %q: %v", fileName, err)
- }
- defer rfd.DecRef()
-
- // Open the write side.
- openOpts = vfs.OpenOptions{Flags: linux.O_WRONLY}
- wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
- if err != nil {
- t.Fatalf("failed to open pipe for writing %q: %v", fileName, err)
- }
- defer wfd.DecRef()
-
- const msg = "geh blau"
- checkEmpty(ctx, t, rfd)
- checkWrite(ctx, t, wfd, msg)
- checkRead(ctx, t, rfd, msg)
-}
-
-func TestNonblockingWriteError(t *testing.T) {
- ctx, creds, vfsObj, root := setup(t)
- defer root.DecRef()
-
- // Open the write side as nonblocking, which should return ENXIO.
- pop := vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(fileName),
- FollowFinalSymlink: true,
- }
- openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK}
- _, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
- if err != syserror.ENXIO {
- t.Fatalf("expected ENXIO, but got error: %v", err)
- }
-}
-
-func TestSingleFD(t *testing.T) {
- ctx, creds, vfsObj, root := setup(t)
- defer root.DecRef()
-
- // Open the pipe as readable and writable.
- pop := vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(fileName),
- FollowFinalSymlink: true,
- }
- openOpts := vfs.OpenOptions{Flags: linux.O_RDWR}
- fd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
- if err != nil {
- t.Fatalf("failed to open pipe for writing %q: %v", fileName, err)
- }
- defer fd.DecRef()
-
- const msg = "forza blu"
- checkEmpty(ctx, t, fd)
- checkWrite(ctx, t, fd, msg)
- checkRead(ctx, t, fd, msg)
-}
-
-// setup creates a VFS with a pipe in the root directory at path fileName. The
-// returned VirtualDentry must be DecRef()'d be the caller. It calls t.Fatal
-// upon failure.
-func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesystem, vfs.VirtualDentry) {
- ctx := contexttest.Context(t)
- creds := auth.CredentialsFromContext(ctx)
-
- // Create VFS.
- vfsObj := vfs.New()
- vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- })
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
- if err != nil {
- t.Fatalf("failed to create tmpfs root mount: %v", err)
- }
-
- // Create the pipe.
- root := mntns.Root()
- pop := vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(fileName),
- }
- mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644}
- if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil {
- t.Fatalf("failed to create file %q: %v", fileName, err)
- }
-
- // Sanity check: the file pipe exists and has the correct mode.
- stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(fileName),
- FollowFinalSymlink: true,
- }, &vfs.StatOptions{})
- if err != nil {
- t.Fatalf("stat(%q) failed: %v", fileName, err)
- }
- if stat.Mode&^linux.S_IFMT != 0644 {
- t.Errorf("got wrong permissions (%0o)", stat.Mode)
- }
- if stat.Mode&linux.S_IFMT != linux.ModeNamedPipe {
- t.Errorf("got wrong file type (%0o)", stat.Mode)
- }
-
- return ctx, creds, vfsObj, root
-}
-
-// checkEmpty calls t.Fatal if the pipe in fd is not empty.
-func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) {
- readData := make([]byte, 1)
- dst := usermem.BytesIOSequence(readData)
- bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{})
- if err != syserror.ErrWouldBlock {
- t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err)
- }
- if bytesRead != 0 {
- t.Fatalf("expected to read 0 bytes, but got %d", bytesRead)
- }
-}
-
-// checkWrite calls t.Fatal if it fails to write all of msg to fd.
-func checkWrite(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) {
- writeData := []byte(msg)
- src := usermem.BytesIOSequence(writeData)
- bytesWritten, err := fd.Write(ctx, src, vfs.WriteOptions{})
- if err != nil {
- t.Fatalf("error writing to pipe %q: %v", fileName, err)
- }
- if bytesWritten != int64(len(writeData)) {
- t.Fatalf("expected to write %d bytes, but wrote %d", len(writeData), bytesWritten)
- }
-}
-
-// checkRead calls t.Fatal if it fails to read msg from fd.
-func checkRead(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) {
- readData := make([]byte, len(msg))
- dst := usermem.BytesIOSequence(readData)
- bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{})
- if err != nil {
- t.Fatalf("error reading from pipe %q: %v", fileName, err)
- }
- if bytesRead != int64(len(msg)) {
- t.Fatalf("expected to read %d bytes, but got %d", len(msg), bytesRead)
- }
- if !bytes.Equal(readData, []byte(msg)) {
- t.Fatalf("expected to read %q from pipe, but got %q", msg, string(readData))
- }
-}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
deleted file mode 100644
index 5fa70cc6d..000000000
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ /dev/null
@@ -1,392 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tmpfs
-
-import (
- "io"
- "math"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/pgalloc"
- "gvisor.dev/gvisor/pkg/sentry/safemem"
- "gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-type regularFile struct {
- inode inode
-
- // memFile is a platform.File used to allocate pages to this regularFile.
- memFile *pgalloc.MemoryFile
-
- // mu protects the fields below.
- mu sync.RWMutex
-
- // data maps offsets into the file to offsets into memFile that store
- // the file's data.
- data fsutil.FileRangeSet
-
- // size is the size of data, but accessed using atomic memory
- // operations to avoid locking in inode.stat().
- size uint64
-
- // seals represents file seals on this inode.
- seals uint32
-}
-
-func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode {
- file := &regularFile{
- memFile: fs.memFile,
- }
- file.inode.init(file, fs, creds, mode)
- file.inode.nlink = 1 // from parent directory
- return &file.inode
-}
-
-// truncate grows or shrinks the file to the given size. It returns true if the
-// file size was updated.
-func (rf *regularFile) truncate(size uint64) (bool, error) {
- rf.mu.Lock()
- defer rf.mu.Unlock()
-
- if size == rf.size {
- // Nothing to do.
- return false, nil
- }
-
- if size > rf.size {
- // Growing the file.
- if rf.seals&linux.F_SEAL_GROW != 0 {
- // Seal does not allow growth.
- return false, syserror.EPERM
- }
- rf.size = size
- return true, nil
- }
-
- // Shrinking the file
- if rf.seals&linux.F_SEAL_SHRINK != 0 {
- // Seal does not allow shrink.
- return false, syserror.EPERM
- }
-
- // TODO(gvisor.dev/issues/1197): Invalidate mappings once we have
- // mappings.
-
- rf.data.Truncate(size, rf.memFile)
- rf.size = size
- return true, nil
-}
-
-type regularFileFD struct {
- fileDescription
-
- // These are immutable.
- readable bool
- writable bool
-
- // off is the file offset. off is accessed using atomic memory operations.
- // offMu serializes operations that may mutate off.
- off int64
- offMu sync.Mutex
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *regularFileFD) Release() {
- if fd.writable {
- fd.vfsfd.VirtualDentry().Mount().EndWrite()
- }
-}
-
-// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- if !fd.readable {
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- if dst.NumBytes() == 0 {
- return 0, nil
- }
- f := fd.inode().impl.(*regularFile)
- rw := getRegularFileReadWriter(f, offset)
- n, err := dst.CopyOutFrom(ctx, rw)
- putRegularFileReadWriter(rw)
- return int64(n), err
-}
-
-// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- fd.offMu.Lock()
- n, err := fd.PRead(ctx, dst, fd.off, opts)
- fd.off += n
- fd.offMu.Unlock()
- return n, err
-}
-
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- if !fd.writable {
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- srclen := src.NumBytes()
- if srclen == 0 {
- return 0, nil
- }
- f := fd.inode().impl.(*regularFile)
- end := offset + srclen
- if end < offset {
- // Overflow.
- return 0, syserror.EFBIG
- }
- rw := getRegularFileReadWriter(f, offset)
- n, err := src.CopyInTo(ctx, rw)
- putRegularFileReadWriter(rw)
- return n, err
-}
-
-// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- fd.offMu.Lock()
- n, err := fd.PWrite(ctx, src, fd.off, opts)
- fd.off += n
- fd.offMu.Unlock()
- return n, err
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- fd.offMu.Lock()
- defer fd.offMu.Unlock()
- switch whence {
- case linux.SEEK_SET:
- // use offset as specified
- case linux.SEEK_CUR:
- offset += fd.off
- case linux.SEEK_END:
- offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size))
- default:
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- fd.off = offset
- return offset, nil
-}
-
-// Sync implements vfs.FileDescriptionImpl.Sync.
-func (fd *regularFileFD) Sync(ctx context.Context) error {
- return nil
-}
-
-// regularFileReadWriter implements safemem.Reader and Safemem.Writer.
-type regularFileReadWriter struct {
- file *regularFile
-
- // Offset into the file to read/write at. Note that this may be
- // different from the FD offset if PRead/PWrite is used.
- off uint64
-}
-
-var regularFileReadWriterPool = sync.Pool{
- New: func() interface{} {
- return &regularFileReadWriter{}
- },
-}
-
-func getRegularFileReadWriter(file *regularFile, offset int64) *regularFileReadWriter {
- rw := regularFileReadWriterPool.Get().(*regularFileReadWriter)
- rw.file = file
- rw.off = uint64(offset)
- return rw
-}
-
-func putRegularFileReadWriter(rw *regularFileReadWriter) {
- rw.file = nil
- regularFileReadWriterPool.Put(rw)
-}
-
-// ReadToBlocks implements safemem.Reader.ReadToBlocks.
-func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
- rw.file.mu.RLock()
-
- // Compute the range to read (limited by file size and overflow-checked).
- if rw.off >= rw.file.size {
- rw.file.mu.RUnlock()
- return 0, io.EOF
- }
- end := rw.file.size
- if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end {
- end = rend
- }
-
- var done uint64
- seg, gap := rw.file.data.Find(uint64(rw.off))
- for rw.off < end {
- mr := memmap.MappableRange{uint64(rw.off), uint64(end)}
- switch {
- case seg.Ok():
- // Get internal mappings.
- ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
- if err != nil {
- rw.file.mu.RUnlock()
- return done, err
- }
-
- // Copy from internal mappings.
- n, err := safemem.CopySeq(dsts, ims)
- done += n
- rw.off += uint64(n)
- dsts = dsts.DropFirst64(n)
- if err != nil {
- rw.file.mu.RUnlock()
- return done, err
- }
-
- // Continue.
- seg, gap = seg.NextNonEmpty()
-
- case gap.Ok():
- // Tmpfs holes are zero-filled.
- gapmr := gap.Range().Intersect(mr)
- dst := dsts.TakeFirst64(gapmr.Length())
- n, err := safemem.ZeroSeq(dst)
- done += n
- rw.off += uint64(n)
- dsts = dsts.DropFirst64(n)
- if err != nil {
- rw.file.mu.RUnlock()
- return done, err
- }
-
- // Continue.
- seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{}
- }
- }
- rw.file.mu.RUnlock()
- return done, nil
-}
-
-// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
-func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
- rw.file.mu.Lock()
-
- // Compute the range to write (overflow-checked).
- end := rw.off + srcs.NumBytes()
- if end <= rw.off {
- end = math.MaxInt64
- }
-
- // Check if seals prevent either file growth or all writes.
- switch {
- case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed
- rw.file.mu.Unlock()
- return 0, syserror.EPERM
- case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed
- // When growth is sealed, Linux effectively allows writes which would
- // normally grow the file to partially succeed up to the current EOF,
- // rounded down to the page boundary before the EOF.
- //
- // This happens because writes (and thus the growth check) for tmpfs
- // files proceed page-by-page on Linux, and the final write to the page
- // containing EOF fails, resulting in a partial write up to the start of
- // that page.
- //
- // To emulate this behaviour, artifically truncate the write to the
- // start of the page containing the current EOF.
- //
- // See Linux, mm/filemap.c:generic_perform_write() and
- // mm/shmem.c:shmem_write_begin().
- if pgstart := uint64(usermem.Addr(rw.file.size).RoundDown()); end > pgstart {
- end = pgstart
- }
- if end <= rw.off {
- // Truncation would result in no data being written.
- rw.file.mu.Unlock()
- return 0, syserror.EPERM
- }
- }
-
- // Page-aligned mr for when we need to allocate memory. RoundUp can't
- // overflow since end is an int64.
- pgstartaddr := usermem.Addr(rw.off).RoundDown()
- pgendaddr, _ := usermem.Addr(end).RoundUp()
- pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)}
-
- var (
- done uint64
- retErr error
- )
- seg, gap := rw.file.data.Find(uint64(rw.off))
- for rw.off < end {
- mr := memmap.MappableRange{uint64(rw.off), uint64(end)}
- switch {
- case seg.Ok():
- // Get internal mappings.
- ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write)
- if err != nil {
- retErr = err
- goto exitLoop
- }
-
- // Copy to internal mappings.
- n, err := safemem.CopySeq(ims, srcs)
- done += n
- rw.off += uint64(n)
- srcs = srcs.DropFirst64(n)
- if err != nil {
- retErr = err
- goto exitLoop
- }
-
- // Continue.
- seg, gap = seg.NextNonEmpty()
-
- case gap.Ok():
- // Allocate memory for the write.
- gapMR := gap.Range().Intersect(pgMR)
- fr, err := rw.file.memFile.Allocate(gapMR.Length(), usage.Tmpfs)
- if err != nil {
- retErr = err
- goto exitLoop
- }
-
- // Write to that memory as usual.
- seg, gap = rw.file.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{}
- }
- }
-exitLoop:
- // If the write ends beyond the file's previous size, it causes the
- // file to grow.
- if rw.off > rw.file.size {
- atomic.StoreUint64(&rw.file.size, rw.off)
- }
-
- rw.file.mu.Unlock()
- return done, retErr
-}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
deleted file mode 100644
index 034a29fdb..000000000
--- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
+++ /dev/null
@@ -1,436 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tmpfs
-
-import (
- "bytes"
- "fmt"
- "io"
- "sync/atomic"
- "testing"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-// nextFileID is used to generate unique file names.
-var nextFileID int64
-
-// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error
-// is not nil, then cleanup should be called when the root is no longer needed.
-func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
-
- vfsObj := vfs.New()
- vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- })
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
- if err != nil {
- return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err)
- }
- root := mntns.Root()
- return vfsObj, root, func() {
- root.DecRef()
- mntns.DecRef(vfsObj)
- }, nil
-}
-
-// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If
-// the returned err is not nil, then cleanup should be called when the FD is no
-// longer needed.
-func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
- vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
- if err != nil {
- return nil, nil, err
- }
-
- filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1))
-
- // Create the file that will be write/read.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(filename),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
- Mode: linux.ModeRegular | mode,
- })
- if err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err)
- }
-
- return fd, cleanup, nil
-}
-
-// newDirFD is like newFileFD, but for directories.
-func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
- vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
- if err != nil {
- return nil, nil, err
- }
-
- dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1))
-
- // Create the dir.
- if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(dirname),
- }, &vfs.MkdirOptions{
- Mode: linux.ModeDirectory | mode,
- }); err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err)
- }
-
- // Open the dir and return it.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(dirname),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDONLY | linux.O_DIRECTORY,
- })
- if err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err)
- }
-
- return fd, cleanup, nil
-}
-
-// newPipeFD is like newFileFD, but for pipes.
-func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
- vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
- if err != nil {
- return nil, nil, err
- }
-
- pipename := fmt.Sprintf("tmpfs-test-pipe-%d", atomic.AddInt64(&nextFileID, 1))
-
- // Create the pipe.
- if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(pipename),
- }, &vfs.MknodOptions{
- Mode: linux.ModeNamedPipe | mode,
- }); err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to create pipe %q: %v", pipename, err)
- }
-
- // Open the pipe and return it.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(pipename),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR,
- })
- if err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to open pipe %q: %v", pipename, err)
- }
-
- return fd, cleanup, nil
-}
-
-// Test that we can write some data to a file and read it back.`
-func TestSimpleWriteRead(t *testing.T) {
- ctx := contexttest.Context(t)
- fd, cleanup, err := newFileFD(ctx, 0644)
- if err != nil {
- t.Fatal(err)
- }
- defer cleanup()
-
- // Write.
- data := []byte("foobarbaz")
- n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{})
- if err != nil {
- t.Fatalf("fd.Write failed: %v", err)
- }
- if n != int64(len(data)) {
- t.Errorf("fd.Write got short write length %d, want %d", n, len(data))
- }
- if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want {
- t.Errorf("fd.Write left offset at %d, want %d", got, want)
- }
-
- // Seek back to beginning.
- if _, err := fd.Seek(ctx, 0, linux.SEEK_SET); err != nil {
- t.Fatalf("fd.Seek failed: %v", err)
- }
- if got, want := fd.Impl().(*regularFileFD).off, int64(0); got != want {
- t.Errorf("fd.Seek(0) left offset at %d, want %d", got, want)
- }
-
- // Read.
- buf := make([]byte, len(data))
- n, err = fd.Read(ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{})
- if err != nil && err != io.EOF {
- t.Fatalf("fd.Read failed: %v", err)
- }
- if n != int64(len(data)) {
- t.Errorf("fd.Read got short read length %d, want %d", n, len(data))
- }
- if got, want := string(buf), string(data); got != want {
- t.Errorf("Read got %q want %s", got, want)
- }
- if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want {
- t.Errorf("fd.Write left offset at %d, want %d", got, want)
- }
-}
-
-func TestPWrite(t *testing.T) {
- ctx := contexttest.Context(t)
- fd, cleanup, err := newFileFD(ctx, 0644)
- if err != nil {
- t.Fatal(err)
- }
- defer cleanup()
-
- // Fill file with 1k 'a's.
- data := bytes.Repeat([]byte{'a'}, 1000)
- n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{})
- if err != nil {
- t.Fatalf("fd.Write failed: %v", err)
- }
- if n != int64(len(data)) {
- t.Errorf("fd.Write got short write length %d, want %d", n, len(data))
- }
-
- // Write "gVisor is awesome" at various offsets.
- buf := []byte("gVisor is awesome")
- offsets := []int{0, 1, 2, 10, 20, 50, 100, len(data) - 100, len(data) - 1, len(data), len(data) + 1}
- for _, offset := range offsets {
- name := fmt.Sprintf("PWrite offset=%d", offset)
- t.Run(name, func(t *testing.T) {
- n, err := fd.PWrite(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.WriteOptions{})
- if err != nil {
- t.Errorf("fd.PWrite got err %v want nil", err)
- }
- if n != int64(len(buf)) {
- t.Errorf("fd.PWrite got %d bytes want %d", n, len(buf))
- }
-
- // Update data to reflect expected file contents.
- if len(data) < offset+len(buf) {
- data = append(data, make([]byte, (offset+len(buf))-len(data))...)
- }
- copy(data[offset:], buf)
-
- // Read the whole file and compare with data.
- readBuf := make([]byte, len(data))
- n, err = fd.PRead(ctx, usermem.BytesIOSequence(readBuf), 0, vfs.ReadOptions{})
- if err != nil {
- t.Fatalf("fd.PRead failed: %v", err)
- }
- if n != int64(len(data)) {
- t.Errorf("fd.PRead got short read length %d, want %d", n, len(data))
- }
- if got, want := string(readBuf), string(data); got != want {
- t.Errorf("PRead got %q want %s", got, want)
- }
-
- })
- }
-}
-
-func TestPRead(t *testing.T) {
- ctx := contexttest.Context(t)
- fd, cleanup, err := newFileFD(ctx, 0644)
- if err != nil {
- t.Fatal(err)
- }
- defer cleanup()
-
- // Write 100 sequences of 'gVisor is awesome'.
- data := bytes.Repeat([]byte("gVisor is awsome"), 100)
- n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{})
- if err != nil {
- t.Fatalf("fd.Write failed: %v", err)
- }
- if n != int64(len(data)) {
- t.Errorf("fd.Write got short write length %d, want %d", n, len(data))
- }
-
- // Read various sizes from various offsets.
- sizes := []int{0, 1, 2, 10, 20, 50, 100, 1000}
- offsets := []int{0, 1, 2, 10, 20, 50, 100, 1000, len(data) - 100, len(data) - 1, len(data), len(data) + 1}
-
- for _, size := range sizes {
- for _, offset := range offsets {
- name := fmt.Sprintf("PRead offset=%d size=%d", offset, size)
- t.Run(name, func(t *testing.T) {
- var (
- wantRead []byte
- wantErr error
- )
- if offset < len(data) {
- wantRead = data[offset:]
- } else if size > 0 {
- wantErr = io.EOF
- }
- if offset+size < len(data) {
- wantRead = wantRead[:size]
- }
- buf := make([]byte, size)
- n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.ReadOptions{})
- if err != wantErr {
- t.Errorf("fd.PRead got err %v want %v", err, wantErr)
- }
- if n != int64(len(wantRead)) {
- t.Errorf("fd.PRead got %d bytes want %d", n, len(wantRead))
- }
- if got := string(buf[:n]); got != string(wantRead) {
- t.Errorf("fd.PRead got %q want %q", got, string(wantRead))
- }
- })
- }
- }
-}
-
-func TestTruncate(t *testing.T) {
- ctx := contexttest.Context(t)
- fd, cleanup, err := newFileFD(ctx, 0644)
- if err != nil {
- t.Fatal(err)
- }
- defer cleanup()
-
- // Fill the file with some data.
- data := bytes.Repeat([]byte("gVisor is awsome"), 100)
- written, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{})
- if err != nil {
- t.Fatalf("fd.Write failed: %v", err)
- }
-
- // Size should be same as written.
- sizeStatOpts := vfs.StatOptions{Mask: linux.STATX_SIZE}
- stat, err := fd.Stat(ctx, sizeStatOpts)
- if err != nil {
- t.Fatalf("fd.Stat failed: %v", err)
- }
- if got, want := int64(stat.Size), written; got != want {
- t.Errorf("fd.Stat got size %d, want %d", got, want)
- }
-
- // Truncate down.
- newSize := uint64(10)
- if err := fd.SetStat(ctx, vfs.SetStatOptions{
- Stat: linux.Statx{
- Mask: linux.STATX_SIZE,
- Size: newSize,
- },
- }); err != nil {
- t.Errorf("fd.Truncate failed: %v", err)
- }
- // Size should be updated.
- statAfterTruncateDown, err := fd.Stat(ctx, sizeStatOpts)
- if err != nil {
- t.Fatalf("fd.Stat failed: %v", err)
- }
- if got, want := statAfterTruncateDown.Size, newSize; got != want {
- t.Errorf("fd.Stat got size %d, want %d", got, want)
- }
- // We should only read newSize worth of data.
- buf := make([]byte, 1000)
- if n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), 0, vfs.ReadOptions{}); err != nil && err != io.EOF {
- t.Fatalf("fd.PRead failed: %v", err)
- } else if uint64(n) != newSize {
- t.Errorf("fd.PRead got size %d, want %d", n, newSize)
- }
- // Mtime and Ctime should be bumped.
- if got := statAfterTruncateDown.Mtime.ToNsec(); got <= stat.Mtime.ToNsec() {
- t.Errorf("fd.Stat got Mtime %v, want > %v", got, stat.Mtime)
- }
- if got := statAfterTruncateDown.Ctime.ToNsec(); got <= stat.Ctime.ToNsec() {
- t.Errorf("fd.Stat got Ctime %v, want > %v", got, stat.Ctime)
- }
-
- // Truncate up.
- newSize = 100
- if err := fd.SetStat(ctx, vfs.SetStatOptions{
- Stat: linux.Statx{
- Mask: linux.STATX_SIZE,
- Size: newSize,
- },
- }); err != nil {
- t.Errorf("fd.Truncate failed: %v", err)
- }
- // Size should be updated.
- statAfterTruncateUp, err := fd.Stat(ctx, sizeStatOpts)
- if err != nil {
- t.Fatalf("fd.Stat failed: %v", err)
- }
- if got, want := statAfterTruncateUp.Size, newSize; got != want {
- t.Errorf("fd.Stat got size %d, want %d", got, want)
- }
- // We should read newSize worth of data.
- buf = make([]byte, 1000)
- if n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), 0, vfs.ReadOptions{}); err != nil && err != io.EOF {
- t.Fatalf("fd.PRead failed: %v", err)
- } else if uint64(n) != newSize {
- t.Errorf("fd.PRead got size %d, want %d", n, newSize)
- }
- // Bytes should be null after 10, since we previously truncated to 10.
- for i := uint64(10); i < newSize; i++ {
- if buf[i] != 0 {
- t.Errorf("fd.PRead got byte %d=%x, want 0", i, buf[i])
- break
- }
- }
- // Mtime and Ctime should be bumped.
- if got := statAfterTruncateUp.Mtime.ToNsec(); got <= statAfterTruncateDown.Mtime.ToNsec() {
- t.Errorf("fd.Stat got Mtime %v, want > %v", got, statAfterTruncateDown.Mtime)
- }
- if got := statAfterTruncateUp.Ctime.ToNsec(); got <= statAfterTruncateDown.Ctime.ToNsec() {
- t.Errorf("fd.Stat got Ctime %v, want > %v", got, stat.Ctime)
- }
-
- // Truncate to the current size.
- newSize = statAfterTruncateUp.Size
- if err := fd.SetStat(ctx, vfs.SetStatOptions{
- Stat: linux.Statx{
- Mask: linux.STATX_SIZE,
- Size: newSize,
- },
- }); err != nil {
- t.Errorf("fd.Truncate failed: %v", err)
- }
- statAfterTruncateNoop, err := fd.Stat(ctx, sizeStatOpts)
- if err != nil {
- t.Fatalf("fd.Stat failed: %v", err)
- }
- // Mtime and Ctime should not be bumped, since operation is a noop.
- if got := statAfterTruncateNoop.Mtime.ToNsec(); got != statAfterTruncateUp.Mtime.ToNsec() {
- t.Errorf("fd.Stat got Mtime %v, want %v", got, statAfterTruncateUp.Mtime)
- }
- if got := statAfterTruncateNoop.Ctime.ToNsec(); got != statAfterTruncateUp.Ctime.ToNsec() {
- t.Errorf("fd.Stat got Ctime %v, want %v", got, statAfterTruncateUp.Ctime)
- }
-}
diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go
deleted file mode 100644
index ebe035dee..000000000
--- a/pkg/sentry/fsimpl/tmpfs/stat_test.go
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tmpfs
-
-import (
- "fmt"
- "testing"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-func TestStatAfterCreate(t *testing.T) {
- ctx := contexttest.Context(t)
- mode := linux.FileMode(0644)
-
- // Run with different file types.
- // TODO(gvisor.dev/issues/1197): Also test symlinks and sockets.
- for _, typ := range []string{"file", "dir", "pipe"} {
- t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) {
- var (
- fd *vfs.FileDescription
- cleanup func()
- err error
- )
- switch typ {
- case "file":
- fd, cleanup, err = newFileFD(ctx, mode)
- case "dir":
- fd, cleanup, err = newDirFD(ctx, mode)
- case "pipe":
- fd, cleanup, err = newPipeFD(ctx, mode)
- default:
- panic(fmt.Sprintf("unknown typ %q", typ))
- }
- if err != nil {
- t.Fatal(err)
- }
- defer cleanup()
-
- got, err := fd.Stat(ctx, vfs.StatOptions{})
- if err != nil {
- t.Fatalf("Stat failed: %v", err)
- }
-
- // Atime, Ctime, Mtime should all be current time (non-zero).
- atime, ctime, mtime := got.Atime.ToNsec(), got.Ctime.ToNsec(), got.Mtime.ToNsec()
- if atime != ctime || ctime != mtime {
- t.Errorf("got atime=%d ctime=%d mtime=%d, wanted equal values", atime, ctime, mtime)
- }
- if atime == 0 {
- t.Errorf("got atime=%d, want non-zero", atime)
- }
-
- // Btime should be 0, as it is not set by tmpfs.
- if btime := got.Btime.ToNsec(); btime != 0 {
- t.Errorf("got btime %d, want 0", got.Btime.ToNsec())
- }
-
- // Size should be 0.
- if got.Size != 0 {
- t.Errorf("got size %d, want 0", got.Size)
- }
-
- // Nlink should be 1 for files, 2 for dirs.
- wantNlink := uint32(1)
- if typ == "dir" {
- wantNlink = 2
- }
- if got.Nlink != wantNlink {
- t.Errorf("got nlink %d, want %d", got.Nlink, wantNlink)
- }
-
- // UID and GID are set from context creds.
- creds := auth.CredentialsFromContext(ctx)
- if got.UID != uint32(creds.EffectiveKUID) {
- t.Errorf("got uid %d, want %d", got.UID, uint32(creds.EffectiveKUID))
- }
- if got.GID != uint32(creds.EffectiveKGID) {
- t.Errorf("got gid %d, want %d", got.GID, uint32(creds.EffectiveKGID))
- }
-
- // Mode.
- wantMode := uint16(mode)
- switch typ {
- case "file":
- wantMode |= linux.S_IFREG
- case "dir":
- wantMode |= linux.S_IFDIR
- case "pipe":
- wantMode |= linux.S_IFIFO
- default:
- panic(fmt.Sprintf("unknown typ %q", typ))
- }
-
- if got.Mode != wantMode {
- t.Errorf("got mode %x, want %x", got.Mode, wantMode)
- }
-
- // Ino.
- if got.Ino == 0 {
- t.Errorf("got ino %d, want not 0", got.Ino)
- }
- })
- }
-}
-
-func TestSetStatAtime(t *testing.T) {
- ctx := contexttest.Context(t)
- fd, cleanup, err := newFileFD(ctx, 0644)
- if err != nil {
- t.Fatal(err)
- }
- defer cleanup()
-
- allStatOptions := vfs.StatOptions{Mask: linux.STATX_ALL}
-
- // Get initial stat.
- initialStat, err := fd.Stat(ctx, allStatOptions)
- if err != nil {
- t.Fatalf("Stat failed: %v", err)
- }
-
- // Set atime, but without the mask.
- if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: linux.Statx{
- Mask: 0,
- Atime: linux.NsecToStatxTimestamp(100),
- }}); err != nil {
- t.Errorf("SetStat atime without mask failed: %v")
- }
- // Atime should be unchanged.
- if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil {
- t.Errorf("Stat got error: %v", err)
- } else if gotStat.Atime != initialStat.Atime {
- t.Errorf("Stat got atime %d, want %d", gotStat.Atime, initialStat.Atime)
- }
-
- // Set atime, this time included in the mask.
- setStat := linux.Statx{
- Mask: linux.STATX_ATIME,
- Atime: linux.NsecToStatxTimestamp(100),
- }
- if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil {
- t.Errorf("SetStat atime with mask failed: %v")
- }
- if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil {
- t.Errorf("Stat got error: %v", err)
- } else if gotStat.Atime != setStat.Atime {
- t.Errorf("Stat got atime %d, want %d", gotStat.Atime, setStat.Atime)
- }
-}
-
-func TestSetStat(t *testing.T) {
- ctx := contexttest.Context(t)
- mode := linux.FileMode(0644)
-
- // Run with different file types.
- // TODO(gvisor.dev/issues/1197): Also test symlinks and sockets.
- for _, typ := range []string{"file", "dir", "pipe"} {
- t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) {
- var (
- fd *vfs.FileDescription
- cleanup func()
- err error
- )
- switch typ {
- case "file":
- fd, cleanup, err = newFileFD(ctx, mode)
- case "dir":
- fd, cleanup, err = newDirFD(ctx, mode)
- case "pipe":
- fd, cleanup, err = newPipeFD(ctx, mode)
- default:
- panic(fmt.Sprintf("unknown typ %q", typ))
- }
- if err != nil {
- t.Fatal(err)
- }
- defer cleanup()
-
- allStatOptions := vfs.StatOptions{Mask: linux.STATX_ALL}
-
- // Get initial stat.
- initialStat, err := fd.Stat(ctx, allStatOptions)
- if err != nil {
- t.Fatalf("Stat failed: %v", err)
- }
-
- // Set atime, but without the mask.
- if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: linux.Statx{
- Mask: 0,
- Atime: linux.NsecToStatxTimestamp(100),
- }}); err != nil {
- t.Errorf("SetStat atime without mask failed: %v")
- }
- // Atime should be unchanged.
- if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil {
- t.Errorf("Stat got error: %v", err)
- } else if gotStat.Atime != initialStat.Atime {
- t.Errorf("Stat got atime %d, want %d", gotStat.Atime, initialStat.Atime)
- }
-
- // Set atime, this time included in the mask.
- setStat := linux.Statx{
- Mask: linux.STATX_ATIME,
- Atime: linux.NsecToStatxTimestamp(100),
- }
- if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil {
- t.Errorf("SetStat atime with mask failed: %v")
- }
- if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil {
- t.Errorf("Stat got error: %v", err)
- } else if gotStat.Atime != setStat.Atime {
- t.Errorf("Stat got atime %d, want %d", gotStat.Atime, setStat.Atime)
- }
- })
- }
-}
diff --git a/pkg/sentry/fsimpl/tmpfs/symlink.go b/pkg/sentry/fsimpl/tmpfs/symlink.go
deleted file mode 100644
index 5246aca84..000000000
--- a/pkg/sentry/fsimpl/tmpfs/symlink.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tmpfs
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-)
-
-type symlink struct {
- inode inode
- target string // immutable
-}
-
-func (fs *filesystem) newSymlink(creds *auth.Credentials, target string) *inode {
- link := &symlink{
- target: target,
- }
- link.inode.init(link, fs, creds, 0777)
- link.inode.nlink = 1 // from parent directory
- return &link.inode
-}
-
-// O_PATH is unimplemented, so there's no way to get a FileDescription
-// representing a symlink yet.
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
deleted file mode 100644
index 1d4889c89..000000000
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ /dev/null
@@ -1,393 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package tmpfs provides a filesystem implementation that behaves like tmpfs:
-// the Dentry tree is the sole source of truth for the state of the filesystem.
-//
-// Lock order:
-//
-// filesystem.mu
-// regularFileFD.offMu
-// regularFile.mu
-// inode.mu
-package tmpfs
-
-import (
- "fmt"
- "math"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/sentry/pgalloc"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// FilesystemType implements vfs.FilesystemType.
-type FilesystemType struct{}
-
-// filesystem implements vfs.FilesystemImpl.
-type filesystem struct {
- vfsfs vfs.Filesystem
-
- // memFile is used to allocate pages to for regular files.
- memFile *pgalloc.MemoryFile
-
- // clock is a realtime clock used to set timestamps in file operations.
- clock time.Clock
-
- // mu serializes changes to the Dentry tree.
- mu sync.RWMutex
-
- nextInoMinusOne uint64 // accessed using atomic memory operations
-}
-
-// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
-func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
- memFileProvider := pgalloc.MemoryFileProviderFromContext(ctx)
- if memFileProvider == nil {
- panic("MemoryFileProviderFromContext returned nil")
- }
- clock := time.RealtimeClockFromContext(ctx)
- fs := filesystem{
- memFile: memFileProvider.MemoryFile(),
- clock: clock,
- }
- fs.vfsfs.Init(vfsObj, &fs)
- root := fs.newDentry(fs.newDirectory(creds, 01777))
- return &fs.vfsfs, &root.vfsd, nil
-}
-
-// Release implements vfs.FilesystemImpl.Release.
-func (fs *filesystem) Release() {
-}
-
-// dentry implements vfs.DentryImpl.
-type dentry struct {
- vfsd vfs.Dentry
-
- // inode is the inode represented by this dentry. Multiple Dentries may
- // share a single non-directory inode (with hard links). inode is
- // immutable.
- inode *inode
-
- // tmpfs doesn't count references on dentries; because the dentry tree is
- // the sole source of truth, it is by definition always consistent with the
- // state of the filesystem. However, it does count references on inodes,
- // because inode resources are released when all references are dropped.
- // (tmpfs doesn't really have resources to release, but we implement
- // reference counting because tmpfs regular files will.)
-
- // dentryEntry (ugh) links dentries into their parent directory.childList.
- dentryEntry
-}
-
-func (fs *filesystem) newDentry(inode *inode) *dentry {
- d := &dentry{
- inode: inode,
- }
- d.vfsd.Init(d)
- return d
-}
-
-// IncRef implements vfs.DentryImpl.IncRef.
-func (d *dentry) IncRef() {
- d.inode.incRef()
-}
-
-// TryIncRef implements vfs.DentryImpl.TryIncRef.
-func (d *dentry) TryIncRef() bool {
- return d.inode.tryIncRef()
-}
-
-// DecRef implements vfs.DentryImpl.DecRef.
-func (d *dentry) DecRef() {
- d.inode.decRef()
-}
-
-// inode represents a filesystem object.
-type inode struct {
- // clock is a realtime clock used to set timestamps in file operations.
- clock time.Clock
-
- // refs is a reference count. refs is accessed using atomic memory
- // operations.
- //
- // A reference is held on all inodes that are reachable in the filesystem
- // tree. For non-directories (which may have multiple hard links), this
- // means that a reference is dropped when nlink reaches 0. For directories,
- // nlink never reaches 0 due to the "." entry; instead,
- // filesystem.RmdirAt() drops the reference.
- refs int64
-
- // Inode metadata. Writing multiple fields atomically requires holding
- // mu, othewise atomic operations can be used.
- mu sync.Mutex
- mode uint32 // excluding file type bits, which are based on impl
- nlink uint32 // protected by filesystem.mu instead of inode.mu
- uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
- gid uint32 // auth.KGID, but ...
- ino uint64 // immutable
-
- // Linux's tmpfs has no concept of btime.
- atime int64 // nanoseconds
- ctime int64 // nanoseconds
- mtime int64 // nanoseconds
-
- impl interface{} // immutable
-}
-
-const maxLinks = math.MaxUint32
-
-func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) {
- i.clock = fs.clock
- i.refs = 1
- i.mode = uint32(mode)
- i.uid = uint32(creds.EffectiveKUID)
- i.gid = uint32(creds.EffectiveKGID)
- i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1)
- // Tmpfs creation sets atime, ctime, and mtime to current time.
- now := i.clock.Now().Nanoseconds()
- i.atime = now
- i.ctime = now
- i.mtime = now
- // i.nlink initialized by caller
- i.impl = impl
-}
-
-// incLinksLocked increments i's link count.
-//
-// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
-// i.nlink < maxLinks.
-func (i *inode) incLinksLocked() {
- if i.nlink == 0 {
- panic("tmpfs.inode.incLinksLocked() called with no existing links")
- }
- if i.nlink == maxLinks {
- panic("memfs.inode.incLinksLocked() called with maximum link count")
- }
- atomic.AddUint32(&i.nlink, 1)
-}
-
-// decLinksLocked decrements i's link count.
-//
-// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
-func (i *inode) decLinksLocked() {
- if i.nlink == 0 {
- panic("tmpfs.inode.decLinksLocked() called with no existing links")
- }
- atomic.AddUint32(&i.nlink, ^uint32(0))
-}
-
-func (i *inode) incRef() {
- if atomic.AddInt64(&i.refs, 1) <= 1 {
- panic("tmpfs.inode.incRef() called without holding a reference")
- }
-}
-
-func (i *inode) tryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&i.refs)
- if refs == 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) {
- return true
- }
- }
-}
-
-func (i *inode) decRef() {
- if refs := atomic.AddInt64(&i.refs, -1); refs == 0 {
- // This is unnecessary; it's mostly to simulate what tmpfs would do.
- if regFile, ok := i.impl.(*regularFile); ok {
- regFile.mu.Lock()
- regFile.data.DropAll(regFile.memFile)
- atomic.StoreUint64(&regFile.size, 0)
- regFile.mu.Unlock()
- }
- } else if refs < 0 {
- panic("tmpfs.inode.decRef() called without holding a reference")
- }
-}
-
-func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error {
- return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid)))
-}
-
-// Go won't inline this function, and returning linux.Statx (which is quite
-// big) means spending a lot of time in runtime.duffcopy(), so instead it's an
-// output parameter.
-//
-// Note that Linux does not guarantee to return consistent data (in the case of
-// a concurrent modification), so we do not require holding inode.mu.
-func (i *inode) statTo(stat *linux.Statx) {
- stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK |
- linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_ATIME |
- linux.STATX_BTIME | linux.STATX_CTIME | linux.STATX_MTIME
- stat.Blksize = 1 // usermem.PageSize in tmpfs
- stat.Nlink = atomic.LoadUint32(&i.nlink)
- stat.UID = atomic.LoadUint32(&i.uid)
- stat.GID = atomic.LoadUint32(&i.gid)
- stat.Mode = uint16(atomic.LoadUint32(&i.mode))
- stat.Ino = i.ino
- // Linux's tmpfs has no concept of btime, so zero-value is returned.
- stat.Atime = linux.NsecToStatxTimestamp(i.atime)
- stat.Ctime = linux.NsecToStatxTimestamp(i.ctime)
- stat.Mtime = linux.NsecToStatxTimestamp(i.mtime)
- // TODO(gvisor.dev/issues/1197): Device number.
- switch impl := i.impl.(type) {
- case *regularFile:
- stat.Mode |= linux.S_IFREG
- stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
- stat.Size = uint64(atomic.LoadUint64(&impl.size))
- // In tmpfs, this will be FileRangeSet.Span() / 512 (but also cached in
- // a uint64 accessed using atomic memory operations to avoid taking
- // locks).
- stat.Blocks = allocatedBlocksForSize(stat.Size)
- case *directory:
- stat.Mode |= linux.S_IFDIR
- case *symlink:
- stat.Mode |= linux.S_IFLNK
- stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
- stat.Size = uint64(len(impl.target))
- stat.Blocks = allocatedBlocksForSize(stat.Size)
- case *namedPipe:
- stat.Mode |= linux.S_IFIFO
- default:
- panic(fmt.Sprintf("unknown inode type: %T", i.impl))
- }
-}
-
-func (i *inode) setStat(stat linux.Statx) error {
- if stat.Mask == 0 {
- return nil
- }
- i.mu.Lock()
- var (
- needsMtimeBump bool
- needsCtimeBump bool
- )
- mask := stat.Mask
- if mask&linux.STATX_MODE != 0 {
- atomic.StoreUint32(&i.mode, uint32(stat.Mode))
- needsCtimeBump = true
- }
- if mask&linux.STATX_UID != 0 {
- atomic.StoreUint32(&i.uid, stat.UID)
- needsCtimeBump = true
- }
- if mask&linux.STATX_GID != 0 {
- atomic.StoreUint32(&i.gid, stat.GID)
- needsCtimeBump = true
- }
- if mask&linux.STATX_SIZE != 0 {
- switch impl := i.impl.(type) {
- case *regularFile:
- updated, err := impl.truncate(stat.Size)
- if err != nil {
- return err
- }
- if updated {
- needsMtimeBump = true
- needsCtimeBump = true
- }
- case *directory:
- return syserror.EISDIR
- case *symlink:
- return syserror.EINVAL
- case *namedPipe:
- // Nothing.
- default:
- panic(fmt.Sprintf("unknown inode type: %T", i.impl))
- }
- }
- if mask&linux.STATX_ATIME != 0 {
- atomic.StoreInt64(&i.atime, stat.Atime.ToNsecCapped())
- needsCtimeBump = true
- }
- if mask&linux.STATX_MTIME != 0 {
- atomic.StoreInt64(&i.mtime, stat.Mtime.ToNsecCapped())
- needsCtimeBump = true
- // Ignore the mtime bump, since we just set it ourselves.
- needsMtimeBump = false
- }
- if mask&linux.STATX_CTIME != 0 {
- atomic.StoreInt64(&i.ctime, stat.Ctime.ToNsecCapped())
- // Ignore the ctime bump, since we just set it ourselves.
- needsCtimeBump = false
- }
- now := i.clock.Now().Nanoseconds()
- if needsMtimeBump {
- atomic.StoreInt64(&i.mtime, now)
- }
- if needsCtimeBump {
- atomic.StoreInt64(&i.ctime, now)
- }
- i.mu.Unlock()
- return nil
-}
-
-// allocatedBlocksForSize returns the number of 512B blocks needed to
-// accommodate the given size in bytes, as appropriate for struct
-// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block
-// size is independent of the "preferred block size for I/O", struct
-// stat::st_blksize and struct statx::stx_blksize.)
-func allocatedBlocksForSize(size uint64) uint64 {
- return (size + 511) / 512
-}
-
-func (i *inode) direntType() uint8 {
- switch i.impl.(type) {
- case *regularFile:
- return linux.DT_REG
- case *directory:
- return linux.DT_DIR
- case *symlink:
- return linux.DT_LNK
- default:
- panic(fmt.Sprintf("unknown inode type: %T", i.impl))
- }
-}
-
-// fileDescription is embedded by tmpfs implementations of
-// vfs.FileDescriptionImpl.
-type fileDescription struct {
- vfsfd vfs.FileDescription
- vfs.FileDescriptionDefaultImpl
-}
-
-func (fd *fileDescription) filesystem() *filesystem {
- return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
-}
-
-func (fd *fileDescription) inode() *inode {
- return fd.vfsfd.Dentry().Impl().(*dentry).inode
-}
-
-// Stat implements vfs.FileDescriptionImpl.Stat.
-func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
- var stat linux.Statx
- fd.inode().statTo(&stat)
- return stat, nil
-}
-
-// SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
- return fd.inode().setStat(opts.Stat)
-}