summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl')
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/BUILD3
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/base.go13
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cgroupfs.go194
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/memory.go30
-rw-r--r--pkg/sentry/fsimpl/devpts/BUILD3
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts.go10
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts_test.go34
-rw-r--r--pkg/sentry/fsimpl/devpts/line_discipline.go67
-rw-r--r--pkg/sentry/fsimpl/devpts/master.go8
-rw-r--r--pkg/sentry/fsimpl/devpts/queue.go46
-rw-r--r--pkg/sentry/fsimpl/devpts/replica.go8
-rw-r--r--pkg/sentry/fsimpl/eventfd/BUILD2
-rw-r--r--pkg/sentry/fsimpl/eventfd/eventfd.go10
-rw-r--r--pkg/sentry/fsimpl/ext/BUILD103
-rw-r--r--pkg/sentry/fsimpl/ext/README.md117
-rw-r--r--pkg/sentry/fsimpl/ext/assets/README.md36
-rw-r--r--pkg/sentry/fsimpl/ext/assets/bigfile.txt41
-rw-r--r--pkg/sentry/fsimpl/ext/assets/file.txt1
l---------pkg/sentry/fsimpl/ext/assets/symlink.txt1
-rw-r--r--pkg/sentry/fsimpl/ext/assets/tiny.ext2bin65536 -> 0 bytes
-rw-r--r--pkg/sentry/fsimpl/ext/assets/tiny.ext3bin65536 -> 0 bytes
-rw-r--r--pkg/sentry/fsimpl/ext/assets/tiny.ext4bin65536 -> 0 bytes
-rw-r--r--pkg/sentry/fsimpl/ext/benchmark/BUILD17
-rw-r--r--pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go211
-rwxr-xr-xpkg/sentry/fsimpl/ext/benchmark/make_deep_ext4.sh72
-rw-r--r--pkg/sentry/fsimpl/ext/block_map_file.go203
-rw-r--r--pkg/sentry/fsimpl/ext/block_map_test.go160
-rw-r--r--pkg/sentry/fsimpl/ext/dentry.go82
-rw-r--r--pkg/sentry/fsimpl/ext/directory.go312
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/BUILD48
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/block_group.go143
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/block_group_32.go74
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/block_group_64.go95
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/block_group_test.go28
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/dirent.go75
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/dirent_new.go63
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/dirent_old.go51
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/dirent_test.go28
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/disklayout.go48
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/extent.go155
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/extent_test.go30
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/inode.go277
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/inode_new.go98
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/inode_old.go119
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/inode_test.go224
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/superblock.go477
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/superblock_32.go78
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/superblock_64.go97
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/superblock_old.go107
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/superblock_test.go30
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/test_utils.go30
-rw-r--r--pkg/sentry/fsimpl/ext/ext.go159
-rw-r--r--pkg/sentry/fsimpl/ext/ext_test.go926
-rw-r--r--pkg/sentry/fsimpl/ext/extent_file.go239
-rw-r--r--pkg/sentry/fsimpl/ext/extent_test.go266
-rw-r--r--pkg/sentry/fsimpl/ext/file_description.go65
-rw-r--r--pkg/sentry/fsimpl/ext/filesystem.go555
-rw-r--r--pkg/sentry/fsimpl/ext/inode.go246
-rw-r--r--pkg/sentry/fsimpl/ext/regular_file.go155
-rw-r--r--pkg/sentry/fsimpl/ext/symlink.go115
-rw-r--r--pkg/sentry/fsimpl/ext/utils.go94
-rw-r--r--pkg/sentry/fsimpl/fuse/BUILD4
-rw-r--r--pkg/sentry/fsimpl/fuse/connection.go8
-rw-r--r--pkg/sentry/fsimpl/fuse/connection_test.go4
-rw-r--r--pkg/sentry/fsimpl/fuse/dev.go36
-rw-r--r--pkg/sentry/fsimpl/fuse/dev_test.go4
-rw-r--r--pkg/sentry/fsimpl/fuse/directory.go12
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go60
-rw-r--r--pkg/sentry/fsimpl/fuse/read_write.go8
-rw-r--r--pkg/sentry/fsimpl/fuse/regular_file.go22
-rw-r--r--pkg/sentry/fsimpl/gofer/BUILD2
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go8
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go174
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go168
-rw-r--r--pkg/sentry/fsimpl/gofer/handle.go41
-rw-r--r--pkg/sentry/fsimpl/gofer/host_named_pipe.go6
-rw-r--r--pkg/sentry/fsimpl/gofer/p9file.go4
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go101
-rw-r--r--pkg/sentry/fsimpl/gofer/revalidate.go10
-rw-r--r--pkg/sentry/fsimpl/gofer/save_restore.go8
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go166
-rw-r--r--pkg/sentry/fsimpl/gofer/symlink.go2
-rw-r--r--pkg/sentry/fsimpl/host/BUILD3
-rw-r--r--pkg/sentry/fsimpl/host/host.go215
-rw-r--r--pkg/sentry/fsimpl/host/socket.go6
-rw-r--r--pkg/sentry/fsimpl/host/socket_iovec.go10
-rw-r--r--pkg/sentry/fsimpl/host/tty.go26
-rw-r--r--pkg/sentry/fsimpl/host/util.go6
-rw-r--r--pkg/sentry/fsimpl/kernfs/BUILD5
-rw-r--r--pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go6
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go8
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go98
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go69
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go121
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_test.go75
-rw-r--r--pkg/sentry/fsimpl/kernfs/symlink.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/synthetic_directory.go12
-rw-r--r--pkg/sentry/fsimpl/overlay/BUILD2
-rw-r--r--pkg/sentry/fsimpl/overlay/copy_up.go21
-rw-r--r--pkg/sentry/fsimpl/overlay/directory.go12
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go134
-rw-r--r--pkg/sentry/fsimpl/overlay/overlay.go24
-rw-r--r--pkg/sentry/fsimpl/overlay/regular_file.go4
-rw-r--r--pkg/sentry/fsimpl/pipefs/BUILD2
-rw-r--r--pkg/sentry/fsimpl/pipefs/pipefs.go4
-rw-r--r--pkg/sentry/fsimpl/proc/BUILD4
-rw-r--r--pkg/sentry/fsimpl/proc/filesystem.go4
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go20
-rw-r--r--pkg/sentry/fsimpl/proc/task.go12
-rw-r--r--pkg/sentry/fsimpl/proc/task_fds.go40
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go247
-rw-r--r--pkg/sentry/fsimpl/proc/task_net.go4
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go6
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_files.go14
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go14
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_test.go6
-rw-r--r--pkg/sentry/fsimpl/proc/yama.go6
-rw-r--r--pkg/sentry/fsimpl/signalfd/BUILD2
-rw-r--r--pkg/sentry/fsimpl/signalfd/signalfd.go4
-rw-r--r--pkg/sentry/fsimpl/sockfs/BUILD2
-rw-r--r--pkg/sentry/fsimpl/sockfs/sockfs.go4
-rw-r--r--pkg/sentry/fsimpl/sys/BUILD2
-rw-r--r--pkg/sentry/fsimpl/sys/kcov.go6
-rw-r--r--pkg/sentry/fsimpl/sys/sys.go6
-rw-r--r--pkg/sentry/fsimpl/testutil/kernel.go7
-rw-r--r--pkg/sentry/fsimpl/timerfd/BUILD2
-rw-r--r--pkg/sentry/fsimpl/timerfd/timerfd.go6
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD6
-rw-r--r--pkg/sentry/fsimpl/tmpfs/benchmark_test.go8
-rw-r--r--pkg/sentry/fsimpl/tmpfs/directory.go6
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go110
-rw-r--r--pkg/sentry/fsimpl/tmpfs/pipe_test.go6
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go34
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file_test.go6
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go26
-rw-r--r--pkg/sentry/fsimpl/verity/BUILD18
-rw-r--r--pkg/sentry/fsimpl/verity/filesystem.go159
-rw-r--r--pkg/sentry/fsimpl/verity/verity.go363
-rw-r--r--pkg/sentry/fsimpl/verity/verity_test.go22
139 files changed, 2149 insertions, 7727 deletions
diff --git a/pkg/sentry/fsimpl/cgroupfs/BUILD b/pkg/sentry/fsimpl/cgroupfs/BUILD
index 37efb641a..e5fdcc776 100644
--- a/pkg/sentry/fsimpl/cgroupfs/BUILD
+++ b/pkg/sentry/fsimpl/cgroupfs/BUILD
@@ -31,6 +31,8 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/coverage",
+ "//pkg/errors/linuxerr",
+ "//pkg/fspath",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
@@ -42,7 +44,6 @@ go_library(
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go
index fe9871bdd..71bb0a9c8 100644
--- a/pkg/sentry/fsimpl/cgroupfs/base.go
+++ b/pkg/sentry/fsimpl/cgroupfs/base.go
@@ -23,10 +23,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -88,7 +88,6 @@ type controller interface {
// +stateify savable
type cgroupInode struct {
dir
- fs *filesystem
// ts is the list of tasks in this cgroup. The kernel is responsible for
// removing tasks from this list before they're destroyed, so any tasks on
@@ -102,9 +101,10 @@ var _ kernel.CgroupImpl = (*cgroupInode)(nil)
func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
c := &cgroupInode{
- fs: fs,
- ts: make(map[*kernel.Task]struct{}),
+ dir: dir{fs: fs},
+ ts: make(map[*kernel.Task]struct{}),
}
+ c.dir.cgi = c
contents := make(map[string]kernfs.Inode)
contents["cgroup.procs"] = fs.newControllerFile(ctx, creds, &cgroupProcsData{c})
@@ -115,8 +115,7 @@ func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credential
}
c.dir.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555))
- c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
- c.dir.InitRefs()
+ c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true})
c.dir.IncLinks(c.dir.OrderedChildren.Populate(contents))
atomic.AddUint64(&fs.numCgroups, 1)
@@ -253,7 +252,7 @@ func parseInt64FromString(ctx context.Context, src usermem.IOSequence, offset in
// Note: This also handles zero-len writes if offset is beyond the end
// of src, or src is empty.
ctx.Warningf("cgroupfs.parseInt64FromString: failed to parse %q: %v", string(buf), err)
- return 0, int64(n), syserror.EINVAL
+ return 0, int64(n), linuxerr.EINVAL
}
return val, int64(n), nil
diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
index 05d7eb4ce..edc3b50b9 100644
--- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
+++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
@@ -32,7 +32,8 @@
// controllers associated with them.
//
// Since cgroupfs doesn't allow hardlinks, there is a unique mapping between
-// cgroupfs dentries and inodes.
+// cgroupfs dentries and inodes. Thus, cgroupfs inodes don't need to be ref
+// counted and exist until they're unlinked once or the FS is destroyed.
//
// # Synchronization
//
@@ -48,10 +49,11 @@
// Lock order:
//
// kernel.CgroupRegistry.mu
-// cgroupfs.filesystem.mu
-// kernel.TaskSet.mu
-// kernel.Task.mu
-// cgroupfs.filesystem.tasksMu.
+// kernfs.filesystem.mu
+// kernel.TaskSet.mu
+// kernel.Task.mu
+// cgroupfs.filesystem.tasksMu.
+// cgroupfs.dir.OrderedChildren.mu
package cgroupfs
import (
@@ -62,12 +64,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -108,6 +111,7 @@ type FilesystemType struct{}
// +stateify savable
type InternalData struct {
DefaultControlValues map[string]int64
+ InitialCgroupPath string
}
// filesystem implements vfs.FilesystemImpl and kernel.cgroupFS.
@@ -134,6 +138,11 @@ type filesystem struct {
numCgroups uint64 // Protected by atomic ops.
root *kernfs.Dentry
+ // effectiveRoot is the initial cgroup new tasks are created in. Unless
+ // overwritten by internal mount options, root == effectiveRoot. If
+ // effectiveRoot != root, an extra reference is held on effectiveRoot for
+ // the lifetime of the filesystem.
+ effectiveRoot *kernfs.Dentry
// tasksMu serializes task membership changes across all cgroups within a
// filesystem.
@@ -167,7 +176,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
maxCachedDentries, err = strconv.ParseUint(str, 10, 64)
if err != nil {
ctx.Warningf("sys.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
}
@@ -195,7 +204,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if _, ok := mopts["all"]; ok {
if len(wantControllers) > 0 {
ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: other controllers specified with all: %v", wantControllers)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
delete(mopts, "all")
@@ -209,7 +218,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if len(mopts) != 0 {
ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: unknown options: %v", mopts)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
k := kernel.KernelFromContext(ctx)
@@ -229,6 +238,9 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fs := vfsfs.Impl().(*filesystem)
ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: mounting new view to hierarchy %v", fs.hierarchyID)
fs.root.IncRef()
+ if fs.effectiveRoot != fs.root {
+ fs.effectiveRoot.IncRef()
+ }
return vfsfs, fs.root.VFSDentry(), nil
}
@@ -245,8 +257,8 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
var defaults map[string]int64
if opts.InternalData != nil {
- ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults)
defaults = opts.InternalData.(*InternalData).DefaultControlValues
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults)
}
for _, ty := range wantControllers {
@@ -286,6 +298,14 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
var rootD kernfs.Dentry
rootD.InitRoot(&fs.Filesystem, root)
fs.root = &rootD
+ fs.effectiveRoot = fs.root
+
+ if err := fs.prepareInitialCgroup(ctx, vfsObj, opts); err != nil {
+ ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: failed to prepare initial cgroup: %v", err)
+ rootD.DecRef(ctx)
+ fs.VFSFilesystem().DecRef(ctx)
+ return nil, nil, err
+ }
// Register controllers. The registry may be modified concurrently, so if we
// get an error, we raced with someone else who registered the same
@@ -294,7 +314,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
ctx.Infof("cgroupfs.FilesystemType.GetFilesystem: failed to register new hierarchy with controllers %v: %v", wantControllers, err)
rootD.DecRef(ctx)
fs.VFSFilesystem().DecRef(ctx)
- return nil, nil, syserror.EBUSY
+ return nil, nil, linuxerr.EBUSY
}
// Move all existing tasks to the root of the new hierarchy.
@@ -303,10 +323,47 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return fs.VFSFilesystem(), rootD.VFSDentry(), nil
}
+// prepareInitialCgroup creates the initial cgroup according to opts. An initial
+// cgroup is optional, and if not specified, this function is a no-op.
+func (fs *filesystem) prepareInitialCgroup(ctx context.Context, vfsObj *vfs.VirtualFilesystem, opts vfs.GetFilesystemOptions) error {
+ if opts.InternalData == nil {
+ return nil
+ }
+ initPathStr := opts.InternalData.(*InternalData).InitialCgroupPath
+ if initPathStr == "" {
+ return nil
+ }
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path: %v", initPathStr)
+ initPath := fspath.Parse(initPathStr)
+ if !initPath.Absolute || !initPath.HasComponents() {
+ ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path invalid: %+v", initPath)
+ return linuxerr.EINVAL
+ }
+
+ // Have initial cgroup target, create the tree.
+ cgDir := fs.root.Inode().(*cgroupInode)
+ for pit := initPath.Begin; pit.Ok(); pit = pit.Next() {
+ cgDirI, err := cgDir.NewDir(ctx, pit.String(), vfs.MkdirOptions{})
+ if err != nil {
+ return err
+ }
+ cgDir = cgDirI.(*cgroupInode)
+ }
+
+ // Walk to target dentry.
+ initDentry, err := fs.root.WalkDentryTree(ctx, vfsObj, initPath)
+ if err != nil {
+ ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup dentry not found: %v", err)
+ return linuxerr.ENOENT
+ }
+ fs.effectiveRoot = initDentry // Reference from WalkDentryTree transferred here.
+ return nil
+}
+
func (fs *filesystem) rootCgroup() kernel.Cgroup {
return kernel.Cgroup{
- Dentry: fs.root,
- CgroupImpl: fs.root.Inode().(kernel.CgroupImpl),
+ Dentry: fs.effectiveRoot,
+ CgroupImpl: fs.effectiveRoot.Inode().(kernel.CgroupImpl),
}
}
@@ -320,6 +377,10 @@ func (fs *filesystem) Release(ctx context.Context) {
r.Unregister(fs.hierarchyID)
}
+ if fs.root != fs.effectiveRoot {
+ fs.effectiveRoot.DecRef(ctx)
+ }
+
fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
fs.Filesystem.Release(ctx)
}
@@ -346,15 +407,18 @@ func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error
//
// +stateify savable
type dir struct {
- dirRefs
+ kernfs.InodeNoopRefCount
kernfs.InodeAlwaysValid
kernfs.InodeAttrs
kernfs.InodeNotSymlink
- kernfs.InodeDirectoryNoNewChildren // TODO(b/183137098): Implement mkdir.
+ kernfs.InodeDirectoryNoNewChildren
kernfs.OrderedChildren
implStatFS
locks vfs.FileLocks
+
+ fs *filesystem // Immutable.
+ cgi *cgroupInode // Immutable.
}
// Keep implements kernfs.Inode.Keep.
@@ -364,7 +428,7 @@ func (*dir) Keep() bool {
// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// Open implements kernfs.Inode.Open.
@@ -378,14 +442,100 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry
return fd.VFSFileDescription(), nil
}
-// DecRef implements kernfs.Inode.DecRef.
-func (d *dir) DecRef(ctx context.Context) {
- d.dirRefs.DecRef(func() { d.Destroy(ctx) })
+// NewDir implements kernfs.Inode.NewDir.
+func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) {
+ // "Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable."
+ // -- Linux, kernel/cgroup.c:cgroup_mkdir().
+ if strings.Contains(name, "\n") {
+ return nil, linuxerr.EINVAL
+ }
+ return d.OrderedChildren.Inserter(name, func() kernfs.Inode {
+ d.IncLinks(1)
+ return d.fs.newCgroupInode(ctx, auth.CredentialsFromContext(ctx))
+ })
}
-// StatFS implements kernfs.Inode.StatFS.
-func (d *dir) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) {
- return vfs.GenericStatFS(linux.CGROUP_SUPER_MAGIC), nil
+// Rename implements kernfs.Inode.Rename. Cgroupfs only allows renaming of
+// cgroup directories, and the rename may only change the name within the same
+// parent. See linux, kernel/cgroup.c:cgroup_rename().
+func (d *dir) Rename(ctx context.Context, oldname, newname string, child, dst kernfs.Inode) error {
+ if _, ok := child.(*cgroupInode); !ok {
+ // Not a cgroup directory. Control files are backed by different types.
+ return linuxerr.ENOTDIR
+ }
+
+ dstCGInode, ok := dst.(*cgroupInode)
+ if !ok {
+ // Not a cgroup inode, so definitely can't be *this* inode.
+ return linuxerr.EIO
+ }
+ // Note: We're intentionally comparing addresses, since two different dirs
+ // could plausibly be identical in memory, but would occupy different
+ // locations in memory.
+ if d != &dstCGInode.dir {
+ // Destination dir is a different cgroup inode. Cross directory renames
+ // aren't allowed.
+ return linuxerr.EIO
+ }
+
+ // Rename moves oldname to newname within d. Proceed.
+ return d.OrderedChildren.Rename(ctx, oldname, newname, child, dst)
+}
+
+// Unlink implements kernfs.Inode.Unlink. Cgroupfs disallows unlink, as the only
+// files in the filesystem are control files, which can't be deleted.
+func (d *dir) Unlink(ctx context.Context, name string, child kernfs.Inode) error {
+ return linuxerr.EPERM
+}
+
+// hasChildrenLocked returns whether the cgroup dir contains any objects that
+// prevent it from being deleted.
+func (d *dir) hasChildrenLocked() bool {
+ // Subdirs take a link on the parent, so checks if there are any direct
+ // children cgroups. Exclude the dir's self link and the link from ".".
+ if d.InodeAttrs.Links()-2 > 0 {
+ return true
+ }
+ return len(d.cgi.ts) > 0
+}
+
+// HasChildren implements kernfs.Inode.HasChildren.
+//
+// The empty check for a cgroupfs directory is unlike a regular directory since
+// a cgroupfs directory will always have control files. A cgroupfs directory can
+// be deleted if cgroup contains no tasks and has no sub-cgroups.
+func (d *dir) HasChildren() bool {
+ d.fs.tasksMu.RLock()
+ defer d.fs.tasksMu.RUnlock()
+ return d.hasChildrenLocked()
+}
+
+// RmDir implements kernfs.Inode.RmDir.
+func (d *dir) RmDir(ctx context.Context, name string, child kernfs.Inode) error {
+ // Unlike a normal directory, we need to recheck if d is empty again, since
+ // vfs/kernfs can't stop tasks from entering or leaving the cgroup.
+ d.fs.tasksMu.RLock()
+ defer d.fs.tasksMu.RUnlock()
+
+ cgi, ok := child.(*cgroupInode)
+ if !ok {
+ return linuxerr.ENOTDIR
+ }
+ if cgi.dir.hasChildrenLocked() {
+ return linuxerr.ENOTEMPTY
+ }
+
+ // Disallow deletion of the effective root cgroup.
+ if cgi == d.fs.effectiveRoot.Inode().(*cgroupInode) {
+ ctx.Warningf("Cannot delete initial cgroup for new tasks %q", d.fs.effectiveRoot.FSLocalPath())
+ return linuxerr.EBUSY
+ }
+
+ err := d.OrderedChildren.RmDir(ctx, name, child)
+ if err == nil {
+ d.InodeAttrs.DecLinks()
+ }
+ return err
}
// controllerFile represents a generic control file that appears within a cgroup
diff --git a/pkg/sentry/fsimpl/cgroupfs/memory.go b/pkg/sentry/fsimpl/cgroupfs/memory.go
index 485c98376..d880c9bc4 100644
--- a/pkg/sentry/fsimpl/cgroupfs/memory.go
+++ b/pkg/sentry/fsimpl/cgroupfs/memory.go
@@ -31,22 +31,34 @@ import (
type memoryController struct {
controllerCommon
- limitBytes int64
+ limitBytes int64
+ softLimitBytes int64
+ moveChargeAtImmigrate int64
}
var _ controller = (*memoryController)(nil)
func newMemoryController(fs *filesystem, defaults map[string]int64) *memoryController {
c := &memoryController{
- // Linux sets this to (PAGE_COUNTER_MAX * PAGE_SIZE) by default, which
- // is ~ 2**63 on a 64-bit system. So essentially, inifinity. The exact
- // value isn't very important.
- limitBytes: math.MaxInt64,
+ // Linux sets these limits to (PAGE_COUNTER_MAX * PAGE_SIZE) by default,
+ // which is ~ 2**63 on a 64-bit system. So essentially, inifinity. The
+ // exact value isn't very important.
+
+ limitBytes: math.MaxInt64,
+ softLimitBytes: math.MaxInt64,
}
- if val, ok := defaults["memory.limit_in_bytes"]; ok {
- c.limitBytes = val
- delete(defaults, "memory.limit_in_bytes")
+
+ consumeDefault := func(name string, valPtr *int64) {
+ if val, ok := defaults[name]; ok {
+ *valPtr = val
+ delete(defaults, name)
+ }
}
+
+ consumeDefault("memory.limit_in_bytes", &c.limitBytes)
+ consumeDefault("memory.soft_limit_in_bytes", &c.softLimitBytes)
+ consumeDefault("memory.move_charge_at_immigrate", &c.moveChargeAtImmigrate)
+
c.controllerCommon.init(controllerMemory, fs)
return c
}
@@ -55,6 +67,8 @@ func newMemoryController(fs *filesystem, defaults map[string]int64) *memoryContr
func (c *memoryController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) {
contents["memory.usage_in_bytes"] = c.fs.newControllerFile(ctx, creds, &memoryUsageInBytesData{})
contents["memory.limit_in_bytes"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.limitBytes))
+ contents["memory.soft_limit_in_bytes"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.softLimitBytes))
+ contents["memory.move_charge_at_immigrate"] = c.fs.newStaticControllerFile(ctx, creds, linux.FileMode(0644), fmt.Sprintf("%d\n", c.moveChargeAtImmigrate))
}
// +stateify savable
diff --git a/pkg/sentry/fsimpl/devpts/BUILD b/pkg/sentry/fsimpl/devpts/BUILD
index 6af3c3781..e0b879339 100644
--- a/pkg/sentry/fsimpl/devpts/BUILD
+++ b/pkg/sentry/fsimpl/devpts/BUILD
@@ -29,6 +29,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/log",
"//pkg/marshal",
"//pkg/marshal/primitive",
@@ -44,7 +45,6 @@ go_library(
"//pkg/sentry/unimpl",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
@@ -59,5 +59,6 @@ go_test(
"//pkg/abi/linux",
"//pkg/sentry/contexttest",
"//pkg/usermem",
+ "//pkg/waiter",
],
)
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index e75954105..e711debcb 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -25,10 +25,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Name is the filesystem name.
@@ -56,7 +56,7 @@ func (*FilesystemType) Name() string {
func (fstype *FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
// No data allowed.
if opts.Data != "" {
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
fstype.initOnce.Do(func() {
@@ -179,7 +179,7 @@ func (i *rootInode) allocateTerminal(ctx context.Context, creds *auth.Credential
i.mu.Lock()
defer i.mu.Unlock()
if i.nextIdx == math.MaxUint32 {
- return nil, syserror.ENOMEM
+ return nil, linuxerr.ENOMEM
}
idx := i.nextIdx
i.nextIdx++
@@ -240,7 +240,7 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (kernfs.Inode, erro
// Not a static entry.
idx, err := strconv.ParseUint(name, 10, 32)
if err != nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
i.mu.Lock()
defer i.mu.Unlock()
@@ -249,7 +249,7 @@ func (i *rootInode) Lookup(ctx context.Context, name string) (kernfs.Inode, erro
return ri, nil
}
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
// IterDirents implements kernfs.Inode.IterDirents.
diff --git a/pkg/sentry/fsimpl/devpts/devpts_test.go b/pkg/sentry/fsimpl/devpts/devpts_test.go
index 448390cfe..1ef07d702 100644
--- a/pkg/sentry/fsimpl/devpts/devpts_test.go
+++ b/pkg/sentry/fsimpl/devpts/devpts_test.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
func TestSimpleMasterToReplica(t *testing.T) {
@@ -54,3 +55,36 @@ func TestSimpleMasterToReplica(t *testing.T) {
t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr)
}
}
+
+type callback func(*waiter.Entry, waiter.EventMask)
+
+func (cb callback) Callback(entry *waiter.Entry, mask waiter.EventMask) {
+ cb(entry, mask)
+}
+
+func TestEchoDeadlock(t *testing.T) {
+ ctx := contexttest.Context(t)
+ termios := linux.DefaultReplicaTermios
+ termios.LocalFlags |= linux.ECHO
+ ld := newLineDiscipline(termios)
+ outBytes := make([]byte, 32)
+ dst := usermem.BytesIOSequence(outBytes)
+ entry := &waiter.Entry{Callback: callback(func(*waiter.Entry, waiter.EventMask) {
+ ld.inputQueueRead(ctx, dst)
+ })}
+ ld.masterWaiter.EventRegister(entry, waiter.ReadableEvents)
+ defer ld.masterWaiter.EventUnregister(entry)
+ inBytes := []byte("hello, tty\n")
+ n, err := ld.inputQueueWrite(ctx, usermem.BytesIOSequence(inBytes))
+ if err != nil {
+ t.Fatalf("inputQueueWrite: %v", err)
+ }
+ if int(n) != len(inBytes) {
+ t.Fatalf("read wrong length: got %d, want %d", n, len(inBytes))
+ }
+ outStr := string(outBytes[:n])
+ inStr := string(inBytes)
+ if outStr != inStr {
+ t.Fatalf("written and read strings do not match: got %q, want %q", outStr, inStr)
+ }
+}
diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go
index e94a5bac3..609623f9f 100644
--- a/pkg/sentry/fsimpl/devpts/line_discipline.go
+++ b/pkg/sentry/fsimpl/devpts/line_discipline.go
@@ -20,10 +20,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -70,6 +70,10 @@ const (
// +------------------------| output queue |<--------------------------+
// (outputQueueRead) +--------------+ (outputQueueWrite)
//
+// There is special handling for the ECHO option, where bytes written to the
+// input queue are also output back to the terminal by being written to
+// l.outQueue by the input queue transformer.
+//
// Lock order:
// termiosMu
// inQueue.mu
@@ -126,7 +130,6 @@ func (l *lineDiscipline) getTermios(task *kernel.Task, args arch.SyscallArgument
// setTermios sets a linux.Termios for the tty.
func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArguments) (uintptr, error) {
l.termiosMu.Lock()
- defer l.termiosMu.Unlock()
oldCanonEnabled := l.termios.LEnabled(linux.ICANON)
// We must copy a Termios struct, not KernelTermios.
var t linux.Termios
@@ -141,7 +144,10 @@ func (l *lineDiscipline) setTermios(task *kernel.Task, args arch.SyscallArgument
l.inQueue.pushWaitBufLocked(l)
l.inQueue.readable = true
l.inQueue.mu.Unlock()
+ l.termiosMu.Unlock()
l.replicaWaiter.Notify(waiter.ReadableEvents)
+ } else {
+ l.termiosMu.Unlock()
}
return 0, err
@@ -179,33 +185,42 @@ func (l *lineDiscipline) inputQueueReadSize(t *kernel.Task, io usermem.IO, args
func (l *lineDiscipline) inputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) {
l.termiosMu.RLock()
- defer l.termiosMu.RUnlock()
- n, pushed, err := l.inQueue.read(ctx, dst, l)
+ n, pushed, notifyEcho, err := l.inQueue.read(ctx, dst, l)
+ l.termiosMu.RUnlock()
if err != nil {
return 0, err
}
if n > 0 {
- l.masterWaiter.Notify(waiter.WritableEvents)
+ if notifyEcho {
+ l.masterWaiter.Notify(waiter.ReadableEvents | waiter.WritableEvents)
+ } else {
+ l.masterWaiter.Notify(waiter.WritableEvents)
+ }
if pushed {
l.replicaWaiter.Notify(waiter.ReadableEvents)
}
return n, nil
+ } else if notifyEcho {
+ l.masterWaiter.Notify(waiter.ReadableEvents)
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
func (l *lineDiscipline) inputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) {
l.termiosMu.RLock()
- defer l.termiosMu.RUnlock()
- n, err := l.inQueue.write(ctx, src, l)
+ n, notifyEcho, err := l.inQueue.write(ctx, src, l)
+ l.termiosMu.RUnlock()
if err != nil {
return 0, err
}
+ if notifyEcho {
+ l.masterWaiter.Notify(waiter.ReadableEvents)
+ }
if n > 0 {
l.replicaWaiter.Notify(waiter.ReadableEvents)
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, io usermem.IO, args arch.SyscallArguments) error {
@@ -214,8 +229,9 @@ func (l *lineDiscipline) outputQueueReadSize(t *kernel.Task, io usermem.IO, args
func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequence) (int64, error) {
l.termiosMu.RLock()
- defer l.termiosMu.RUnlock()
- n, pushed, err := l.outQueue.read(ctx, dst, l)
+ // Ignore notifyEcho, as it cannot happen when reading from the output queue.
+ n, pushed, _, err := l.outQueue.read(ctx, dst, l)
+ l.termiosMu.RUnlock()
if err != nil {
return 0, err
}
@@ -226,13 +242,14 @@ func (l *lineDiscipline) outputQueueRead(ctx context.Context, dst usermem.IOSequ
}
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSequence) (int64, error) {
l.termiosMu.RLock()
- defer l.termiosMu.RUnlock()
- n, err := l.outQueue.write(ctx, src, l)
+ // Ignore notifyEcho, as it cannot happen when writing to the output queue.
+ n, _, err := l.outQueue.write(ctx, src, l)
+ l.termiosMu.RUnlock()
if err != nil {
return 0, err
}
@@ -240,13 +257,14 @@ func (l *lineDiscipline) outputQueueWrite(ctx context.Context, src usermem.IOSeq
l.masterWaiter.Notify(waiter.ReadableEvents)
return n, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// transformer is a helper interface to make it easier to stateify queue.
type transformer interface {
// transform functions require queue's mutex to be held.
- transform(*lineDiscipline, *queue, []byte) int
+ // The boolean indicates whether there was any echoed bytes.
+ transform(*lineDiscipline, *queue, []byte) (int, bool)
}
// outputQueueTransformer implements transformer. It performs line discipline
@@ -261,7 +279,7 @@ type outputQueueTransformer struct{}
// Preconditions:
// * l.termiosMu must be held for reading.
// * q.mu must be held.
-func (*outputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) int {
+func (*outputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) (int, bool) {
// transformOutput is effectively always in noncanonical mode, as the
// master termios never has ICANON set.
@@ -270,7 +288,7 @@ func (*outputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte
if len(q.readBuf) > 0 {
q.readable = true
}
- return len(buf)
+ return len(buf), false
}
var ret int
@@ -321,7 +339,7 @@ func (*outputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte
if len(q.readBuf) > 0 {
q.readable = true
}
- return ret
+ return ret, false
}
// inputQueueTransformer implements transformer. It performs line discipline
@@ -334,15 +352,17 @@ type inputQueueTransformer struct{}
// transformed according to flags set in the termios struct. See
// drivers/tty/n_tty.c:n_tty_receive_char_special for an analogous kernel
// function.
+// It returns an extra boolean indicating whether any characters need to be
+// echoed, in which case we need to notify readers.
//
// Preconditions:
// * l.termiosMu must be held for reading.
// * q.mu must be held.
-func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) int {
+func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte) (int, bool) {
// If there's a line waiting to be read in canonical mode, don't write
// anything else to the read buffer.
if l.termios.LEnabled(linux.ICANON) && q.readable {
- return 0
+ return 0, false
}
maxBytes := nonCanonMaxBytes
@@ -351,6 +371,7 @@ func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte)
}
var ret int
+ var notifyEcho bool
for len(buf) > 0 && len(q.readBuf) < canonMaxBytes {
size := l.peek(buf)
cBytes := append([]byte{}, buf[:size]...)
@@ -397,7 +418,7 @@ func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte)
// Anything written to the readBuf will have to be echoed.
if l.termios.LEnabled(linux.ECHO) {
l.outQueue.writeBytes(cBytes, l)
- l.masterWaiter.Notify(waiter.ReadableEvents)
+ notifyEcho = true
}
// If we finish a line, make it available for reading.
@@ -412,7 +433,7 @@ func (*inputQueueTransformer) transform(l *lineDiscipline, q *queue, buf []byte)
q.readable = true
}
- return ret
+ return ret, notifyEcho
}
// shouldDiscard returns whether c should be discarded. In canonical mode, if
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 93c031c89..9a1a245dc 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -17,6 +17,7 @@ package devpts
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
@@ -24,7 +25,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/unimpl"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -80,7 +80,7 @@ func (mi *masterInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs
// SetStat implements kernfs.Inode.SetStat
func (mi *masterInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
if opts.Stat.Mask&linux.STATX_SIZE != 0 {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
return mi.InodeAttrs.SetStat(ctx, vfsfs, creds, opts)
}
@@ -132,7 +132,7 @@ func (mfd *masterFileDescription) Ioctl(ctx context.Context, io usermem.IO, args
t := kernel.TaskFromContext(ctx)
if t == nil {
// ioctl(2) may only be called from a task goroutine.
- return 0, syserror.ENOTTY
+ return 0, linuxerr.ENOTTY
}
switch cmd := args[1].Uint(); cmd {
@@ -177,7 +177,7 @@ func (mfd *masterFileDescription) Ioctl(ctx context.Context, io usermem.IO, args
return mfd.t.setForegroundProcessGroup(ctx, args, true /* isMaster */)
default:
maybeEmitUnimplementedEvent(ctx, cmd)
- return 0, syserror.ENOTTY
+ return 0, linuxerr.ENOTTY
}
}
diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go
index 47b0f1599..85aeefa43 100644
--- a/pkg/sentry/fsimpl/devpts/queue.go
+++ b/pkg/sentry/fsimpl/devpts/queue.go
@@ -17,12 +17,12 @@ package devpts
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -98,17 +98,19 @@ func (q *queue) readableSize(t *kernel.Task, io usermem.IO, args arch.SyscallArg
}
-// read reads from q to userspace. It returns the number of bytes read as well
-// as whether the read caused more readable data to become available (whether
+// read reads from q to userspace. It returns:
+// - The number of bytes read
+// - Whether the read caused more readable data to become available (whether
// data was pushed from the wait buffer to the read buffer).
+// - Whether any data was echoed back (need to notify readers).
//
// Preconditions: l.termiosMu must be held for reading.
-func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipline) (int64, bool, error) {
+func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipline) (int64, bool, bool, error) {
q.mu.Lock()
defer q.mu.Unlock()
if !q.readable {
- return 0, false, syserror.ErrWouldBlock
+ return 0, false, false, linuxerr.ErrWouldBlock
}
if dst.NumBytes() > canonMaxBytes {
@@ -131,19 +133,20 @@ func (q *queue) read(ctx context.Context, dst usermem.IOSequence, l *lineDiscipl
return n, nil
}))
if err != nil {
- return 0, false, err
+ return 0, false, false, err
}
// Move data from the queue's wait buffer to its read buffer.
- nPushed := q.pushWaitBufLocked(l)
+ nPushed, notifyEcho := q.pushWaitBufLocked(l)
- return int64(n), nPushed > 0, nil
+ return int64(n), nPushed > 0, notifyEcho, nil
}
// write writes to q from userspace.
+// The returned boolean indicates whether any data was echoed back.
//
// Preconditions: l.termiosMu must be held for reading.
-func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscipline) (int64, error) {
+func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscipline) (int64, bool, error) {
q.mu.Lock()
defer q.mu.Unlock()
@@ -153,7 +156,7 @@ func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscip
room := waitBufMaxBytes - q.waitBufLen
// If out of room, return EAGAIN.
if room == 0 && copyLen > 0 {
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// Cap the size of the wait buffer.
if copyLen > room {
@@ -173,44 +176,49 @@ func (q *queue) write(ctx context.Context, src usermem.IOSequence, l *lineDiscip
return n, nil
}))
if err != nil {
- return 0, err
+ return 0, false, err
}
// Push data from the wait to the read buffer.
- q.pushWaitBufLocked(l)
+ _, notifyEcho := q.pushWaitBufLocked(l)
- return n, nil
+ return n, notifyEcho, nil
}
// writeBytes writes to q from b.
+// The returned boolean indicates whether any data was echoed back.
//
// Preconditions: l.termiosMu must be held for reading.
-func (q *queue) writeBytes(b []byte, l *lineDiscipline) {
+func (q *queue) writeBytes(b []byte, l *lineDiscipline) bool {
q.mu.Lock()
defer q.mu.Unlock()
// Write to the wait buffer.
q.waitBufAppend(b)
- q.pushWaitBufLocked(l)
+ _, notifyEcho := q.pushWaitBufLocked(l)
+ return notifyEcho
}
// pushWaitBufLocked fills the queue's read buffer with data from the wait
// buffer.
+// The returned boolean indicates whether any data was echoed back.
//
// Preconditions:
// * l.termiosMu must be held for reading.
// * q.mu must be locked.
-func (q *queue) pushWaitBufLocked(l *lineDiscipline) int {
+func (q *queue) pushWaitBufLocked(l *lineDiscipline) (int, bool) {
if q.waitBufLen == 0 {
- return 0
+ return 0, false
}
// Move data from the wait to the read buffer.
var total int
var i int
+ var notifyEcho bool
for i = 0; i < len(q.waitBuf); i++ {
- n := q.transform(l, q, q.waitBuf[i])
+ n, echo := q.transform(l, q, q.waitBuf[i])
total += n
+ notifyEcho = notifyEcho || echo
if n != len(q.waitBuf[i]) {
// The read buffer filled up without consuming the
// entire buffer.
@@ -223,7 +231,7 @@ func (q *queue) pushWaitBufLocked(l *lineDiscipline) int {
q.waitBuf = q.waitBuf[i:]
q.waitBufLen -= uint64(total)
- return total
+ return total, notifyEcho
}
// Precondition: q.mu must be locked.
diff --git a/pkg/sentry/fsimpl/devpts/replica.go b/pkg/sentry/fsimpl/devpts/replica.go
index 96d2054cb..e251897b4 100644
--- a/pkg/sentry/fsimpl/devpts/replica.go
+++ b/pkg/sentry/fsimpl/devpts/replica.go
@@ -17,13 +17,13 @@ package devpts
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -92,7 +92,7 @@ func (ri *replicaInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vf
// SetStat implements kernfs.Inode.SetStat
func (ri *replicaInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
if opts.Stat.Mask&linux.STATX_SIZE != 0 {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
return ri.InodeAttrs.SetStat(ctx, vfsfs, creds, opts)
}
@@ -141,7 +141,7 @@ func (rfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, arg
t := kernel.TaskFromContext(ctx)
if t == nil {
// ioctl(2) may only be called from a task goroutine.
- return 0, syserror.ENOTTY
+ return 0, linuxerr.ENOTTY
}
switch cmd := args[1].Uint(); cmd {
@@ -179,7 +179,7 @@ func (rfd *replicaFileDescription) Ioctl(ctx context.Context, io usermem.IO, arg
return rfd.inode.t.setForegroundProcessGroup(ctx, args, false /* isMaster */)
default:
maybeEmitUnimplementedEvent(ctx, cmd)
- return 0, syserror.ENOTTY
+ return 0, linuxerr.ENOTTY
}
}
diff --git a/pkg/sentry/fsimpl/eventfd/BUILD b/pkg/sentry/fsimpl/eventfd/BUILD
index c09fdc7f9..1cb049a29 100644
--- a/pkg/sentry/fsimpl/eventfd/BUILD
+++ b/pkg/sentry/fsimpl/eventfd/BUILD
@@ -9,11 +9,11 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fdnotifier",
"//pkg/hostarch",
"//pkg/log",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/fsimpl/eventfd/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go
index 4f79cfcb7..af5ba5131 100644
--- a/pkg/sentry/fsimpl/eventfd/eventfd.go
+++ b/pkg/sentry/fsimpl/eventfd/eventfd.go
@@ -22,11 +22,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -149,7 +149,7 @@ func (efd *EventFileDescription) hostReadLocked(ctx context.Context, dst usermem
var buf [8]byte
if _, err := unix.Read(efd.hostfd, buf[:]); err != nil {
if err == unix.EWOULDBLOCK {
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
return err
}
@@ -167,7 +167,7 @@ func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequenc
// We can't complete the read if the value is currently zero.
if efd.val == 0 {
efd.mu.Unlock()
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
// Update the value based on the mode the event is operating in.
@@ -200,7 +200,7 @@ func (efd *EventFileDescription) hostWriteLocked(val uint64) error {
hostarch.ByteOrder.PutUint64(buf[:], val)
_, err := unix.Write(efd.hostfd, buf[:])
if err == unix.EWOULDBLOCK {
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
return err
}
@@ -232,7 +232,7 @@ func (efd *EventFileDescription) Signal(val uint64) error {
// uint64 minus 1.
if val > math.MaxUint64-1-efd.val {
efd.mu.Unlock()
- return syserror.ErrWouldBlock
+ return linuxerr.ErrWouldBlock
}
efd.val += val
diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD
index 2dbc6bfd5..e69de29bb 100644
--- a/pkg/sentry/fsimpl/ext/BUILD
+++ b/pkg/sentry/fsimpl/ext/BUILD
@@ -1,103 +0,0 @@
-load("//tools:defs.bzl", "go_library", "go_test")
-load("//tools/go_generics:defs.bzl", "go_template_instance")
-
-package(licenses = ["notice"])
-
-go_template_instance(
- name = "dirent_list",
- out = "dirent_list.go",
- package = "ext",
- prefix = "dirent",
- template = "//pkg/ilist:generic_list",
- types = {
- "Element": "*dirent",
- "Linker": "*dirent",
- },
-)
-
-go_template_instance(
- name = "fstree",
- out = "fstree.go",
- package = "ext",
- prefix = "generic",
- template = "//pkg/sentry/vfs/genericfstree:generic_fstree",
- types = {
- "Dentry": "dentry",
- },
-)
-
-go_library(
- name = "ext",
- srcs = [
- "block_map_file.go",
- "dentry.go",
- "directory.go",
- "dirent_list.go",
- "ext.go",
- "extent_file.go",
- "file_description.go",
- "filesystem.go",
- "fstree.go",
- "inode.go",
- "regular_file.go",
- "symlink.go",
- "utils.go",
- ],
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/context",
- "//pkg/fd",
- "//pkg/fspath",
- "//pkg/log",
- "//pkg/marshal",
- "//pkg/marshal/primitive",
- "//pkg/safemem",
- "//pkg/sentry/arch",
- "//pkg/sentry/fs",
- "//pkg/sentry/fs/lock",
- "//pkg/sentry/fsimpl/ext/disklayout",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/memmap",
- "//pkg/sentry/socket/unix/transport",
- "//pkg/sentry/syscalls/linux",
- "//pkg/sentry/vfs",
- "//pkg/sync",
- "//pkg/syserror",
- "//pkg/usermem",
- "//pkg/waiter",
- ],
-)
-
-go_test(
- name = "ext_test",
- size = "small",
- srcs = [
- "block_map_test.go",
- "ext_test.go",
- "extent_test.go",
- ],
- data = [
- "//pkg/sentry/fsimpl/ext:assets/bigfile.txt",
- "//pkg/sentry/fsimpl/ext:assets/file.txt",
- "//pkg/sentry/fsimpl/ext:assets/tiny.ext2",
- "//pkg/sentry/fsimpl/ext:assets/tiny.ext3",
- "//pkg/sentry/fsimpl/ext:assets/tiny.ext4",
- ],
- library = ":ext",
- deps = [
- "//pkg/abi/linux",
- "//pkg/context",
- "//pkg/fspath",
- "//pkg/marshal/primitive",
- "//pkg/sentry/contexttest",
- "//pkg/sentry/fsimpl/ext/disklayout",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/vfs",
- "//pkg/syserror",
- "//pkg/test/testutil",
- "//pkg/usermem",
- "@com_github_google_go_cmp//cmp:go_default_library",
- "@com_github_google_go_cmp//cmp/cmpopts:go_default_library",
- ],
-)
diff --git a/pkg/sentry/fsimpl/ext/README.md b/pkg/sentry/fsimpl/ext/README.md
deleted file mode 100644
index af00cfda8..000000000
--- a/pkg/sentry/fsimpl/ext/README.md
+++ /dev/null
@@ -1,117 +0,0 @@
-## EXT(2/3/4) File System
-
-This is a filesystem driver which supports ext2, ext3 and ext4 filesystems.
-Linux has specialized drivers for each variant but none which supports all. This
-library takes advantage of ext's backward compatibility and understands the
-internal organization of on-disk structures to support all variants.
-
-This driver implementation diverges from the Linux implementations in being more
-forgiving about versioning. For instance, if a filesystem contains both extent
-based inodes and classical block map based inodes, this driver will not complain
-and interpret them both correctly. While in Linux this would be an issue. This
-blurs the line between the three ext fs variants.
-
-Ext2 is considered deprecated as of Red Hat Enterprise Linux 7, and ext3 has
-been superseded by ext4 by large performance gains. Thus it is recommended to
-upgrade older filesystem images to ext4 using e2fsprogs for better performance.
-
-### Read Only
-
-This driver currently only allows read only operations. A lot of the design
-decisions are based on this feature. There are plans to implement write (the
-process for which is documented in the future work section).
-
-### Performance
-
-One of the biggest wins about this driver is that it directly talks to the
-underlying block device (or whatever persistent storage is being used), instead
-of making expensive RPCs to a gofer.
-
-Another advantage is that ext fs supports fast concurrent reads. Currently the
-device is represented using a `io.ReaderAt` which allows for concurrent reads.
-All reads are directly passed to the device driver which intelligently serves
-the read requests in the optimal order. There is no congestion due to locking
-while reading in the filesystem level.
-
-Reads are optimized further in the way file data is transferred over to user
-memory. Ext fs directly copies over file data from disk into user memory with no
-additional allocations on the way. We can only get faster by preloading file
-data into memory (see future work section).
-
-The internal structures used to represent files, inodes and file descriptors use
-a lot of inheritance. With the level of indirection that an interface adds with
-an internal pointer, it can quickly fragment a structure across memory. As this
-runs along side a full blown kernel (which is memory intensive), having a
-fragmented struct might hurt performance. Hence these internal structures,
-though interfaced, are tightly packed in memory using the same inheritance
-pattern that pkg/sentry/vfs uses. The pkg/sentry/fsimpl/ext/disklayout package
-makes an execption to this pattern for reasons documented in the package.
-
-### Security
-
-This driver also intends to help sandbox the container better by reducing the
-surface of the host kernel that the application touches. It prevents the
-application from exploiting vulnerabilities in the host filesystem driver. All
-`io.ReaderAt.ReadAt()` calls are translated to `pread(2)` which are directly
-passed to the device driver in the kernel. Hence this reduces the surface for
-attack.
-
-The application can not affect any host filesystems other than the one passed
-via block device by the user.
-
-### Future Work
-
-#### Write
-
-To support write operations we would need to modify the block device underneath.
-Currently, the driver does not modify the device at all, not even for updating
-the access times for reads. Modifying the filesystem incorrectly can corrupt it
-and render it unreadable for other correct ext(x) drivers. Hence caution must be
-maintained while modifying metadata structures.
-
-Ext4 specifically is built for performance and has added a lot of complexity as
-to how metadata structures are modified. For instance, files that are organized
-via an extent tree which must be balanced and file data blocks must be placed in
-the same extent as much as possible to increase locality. Such properties must
-be maintained while modifying the tree.
-
-Ext filesystems boast a lot about locality, which plays a big role in them being
-performant. The block allocation algorithm in Linux does a good job in keeping
-related data together. This behavior must be maintained as much as possible,
-else we might end up degrading the filesystem performance over time.
-
-Ext4 also supports a wide variety of features which are specialized for varying
-use cases. Implementing all of them can get difficult very quickly.
-
-Ext(x) checksums all its metadata structures to check for corruption, so
-modification of any metadata struct must correspond with re-checksumming the
-struct. Linux filesystem drivers also order on-disk updates intelligently to not
-corrupt the filesystem and also remain performant. The in-memory metadata
-structures must be kept in sync with what is on disk.
-
-There is also replication of some important structures across the filesystem.
-All replicas must be updated when their original copy is updated. There is also
-provisioning for snapshotting which must be kept in mind, although it should not
-affect this implementation unless we allow users to create filesystem snapshots.
-
-Ext4 also introduced journaling (jbd2). The journal must be updated
-appropriately.
-
-#### Performance
-
-To improve performance we should implement a buffer cache, and optionally, read
-ahead for small files. While doing so we must also keep in mind the memory usage
-and have a reasonable cap on how much file data we want to hold in memory.
-
-#### Features
-
-Our current implementation will work with most ext4 filesystems for readonly
-purposed. However, the following features are not supported yet:
-
-- Journal
-- Snapshotting
-- Extended Attributes
-- Hash Tree Directories
-- Meta Block Groups
-- Multiple Mount Protection
-- Bigalloc
diff --git a/pkg/sentry/fsimpl/ext/assets/README.md b/pkg/sentry/fsimpl/ext/assets/README.md
deleted file mode 100644
index 6f1e81b3a..000000000
--- a/pkg/sentry/fsimpl/ext/assets/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-### Tiny Ext(2/3/4) Images
-
-The images are of size 64Kb which supports 64 1k blocks and 16 inodes. This is
-the smallest size mkfs.ext(2/3/4) works with.
-
-These images were generated using the following commands.
-
-```bash
-fallocate -l 64K tiny.ext$VERSION
-mkfs.ext$VERSION -j tiny.ext$VERSION
-```
-
-where `VERSION` is `2`, `3` or `4`.
-
-You can mount it using:
-
-```bash
-sudo mount -o loop tiny.ext$VERSION $MOUNTPOINT
-```
-
-`file.txt`, `bigfile.txt` and `symlink.txt` were added to this image by just
-mounting it and copying (while preserving links) those files to the mountpoint
-directory using:
-
-```bash
-sudo cp -P {file.txt,symlink.txt,bigfile.txt} $MOUNTPOINT
-```
-
-The files in this directory mirror the contents and organisation of the files
-stored in the image.
-
-You can umount the filesystem using:
-
-```bash
-sudo umount $MOUNTPOINT
-```
diff --git a/pkg/sentry/fsimpl/ext/assets/bigfile.txt b/pkg/sentry/fsimpl/ext/assets/bigfile.txt
deleted file mode 100644
index 3857cf516..000000000
--- a/pkg/sentry/fsimpl/ext/assets/bigfile.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus faucibus eleifend orci, ut ornare nibh faucibus eu. Cras at condimentum massa. Nullam luctus, elit non porttitor congue, sapien diam feugiat sapien, sed eleifend nulla mauris non arcu. Sed lacinia mauris magna, eu mollis libero varius sit amet. Donec mollis, quam convallis commodo posuere, dolor nisi placerat nisi, in faucibus augue mi eu lorem. In pharetra consectetur faucibus. Ut euismod ex efficitur egestas tincidunt. Maecenas condimentum ut ante in rutrum. Vivamus sed arcu tempor, faucibus turpis et, lacinia diam.
-
-Sed in lacus vel nisl interdum bibendum in sed justo. Nunc tellus risus, molestie vitae arcu sed, molestie tempus ligula. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc risus neque, volutpat et ante non, ullamcorper condimentum ante. Aliquam sed metus in urna condimentum convallis. Vivamus ut libero mauris. Proin mollis posuere consequat. Vestibulum placerat mollis est et pulvinar.
-
-Donec rutrum odio ac diam pharetra, id fermentum magna cursus. Pellentesque in dapibus elit, et condimentum orci. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse euismod dapibus est, id vestibulum mauris. Nulla facilisi. Nulla cursus gravida nisi. Phasellus vestibulum rutrum lectus, a dignissim mauris hendrerit vitae. In at elementum mauris. Integer vel efficitur velit. Nullam fringilla sapien mi, quis luctus neque efficitur ac. Aenean nec quam dapibus nunc commodo pharetra. Proin sapien mi, fermentum aliquet vulputate non, aliquet porttitor diam. Quisque lacinia, urna et finibus fermentum, nunc lacus vehicula ex, sed congue metus lectus ac quam. Aliquam erat volutpat. Suspendisse sodales, dolor ut tincidunt finibus, augue erat varius tellus, a interdum erat sem at nunc. Vestibulum cursus iaculis sapien, vitae feugiat dui auctor quis.
-
-Pellentesque nec maximus nulla, eu blandit diam. Maecenas quis arcu ornare, congue ante at, vehicula ipsum. Praesent feugiat mauris rutrum sem fermentum, nec luctus ipsum placerat. Pellentesque placerat ipsum at dignissim fringilla. Vivamus et posuere sem, eget hendrerit felis. Aenean vulputate, augue vel mollis feugiat, justo ipsum mollis dolor, eu mollis elit neque ut ipsum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Fusce bibendum sem quam, vulputate laoreet mi dapibus imperdiet. Sed a purus non nibh pretium aliquet. Integer eget luctus augue, vitae tincidunt magna. Ut eros enim, egestas eu nulla et, lobortis egestas arcu. Cras id ipsum ac justo lacinia rutrum. Vivamus lectus leo, ultricies sed justo at, pellentesque feugiat magna. Ut sollicitudin neque elit, vel ornare mauris commodo id.
-
-Duis dapibus orci et sapien finibus finibus. Mauris eleifend, lacus at vestibulum maximus, quam ligula pharetra erat, sit amet dapibus neque elit vitae neque. In bibendum sollicitudin erat, eget ultricies tortor malesuada at. Sed sit amet orci turpis. Donec feugiat ligula nibh, molestie tincidunt lectus elementum id. Donec volutpat maximus nibh, in vulputate felis posuere eu. Cras tincidunt ullamcorper lacus. Phasellus porta lorem auctor, congue magna a, commodo elit.
-
-Etiam auctor mi quis elit sodales, eu pulvinar arcu condimentum. Aenean imperdiet risus et dapibus tincidunt. Nullam tincidunt dictum dui, sed commodo urna rutrum id. Ut mollis libero vel elit laoreet bibendum. Quisque arcu arcu, tincidunt at ultricies id, vulputate nec metus. In tristique posuere quam sit amet volutpat. Vivamus scelerisque et nunc at dapibus. Fusce finibus libero ut ligula pretium rhoncus. Mauris non elit in arcu finibus imperdiet. Pellentesque nec massa odio. Proin rutrum mauris non sagittis efficitur. Aliquam auctor quam at dignissim faucibus. Ut eget ligula in magna posuere ultricies vitae sit amet turpis. Duis maximus odio nulla. Donec gravida sem tristique tempus scelerisque.
-
-Interdum et malesuada fames ac ante ipsum primis in faucibus. Fusce pharetra magna vulputate aliquet tempus. Duis id hendrerit arcu. Quisque ut ex elit. Integer velit orci, venenatis ut sapien ac, placerat porttitor dui. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nunc hendrerit cursus diam, hendrerit finibus ipsum scelerisque ut. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos.
-
-Nulla non euismod neque. Phasellus vel sapien eu metus pulvinar rhoncus. Suspendisse eu mollis tellus, quis vestibulum tortor. Maecenas interdum dolor sed nulla fermentum maximus. Donec imperdiet ullamcorper condimentum. Nam quis nibh ante. Praesent quis tellus ut tortor pulvinar blandit sit amet ut sapien. Vestibulum est orci, pellentesque vitae tristique sit amet, tristique non felis.
-
-Vivamus sodales pellentesque varius. Sed vel tempus ligula. Nulla tristique nisl vel dui facilisis, ac sodales augue hendrerit. Proin augue nisi, vestibulum quis augue nec, sagittis tincidunt velit. Vestibulum euismod, nulla nec sodales faucibus, urna sapien vulputate magna, id varius metus sapien ut neque. Duis in mollis urna, in scelerisque enim. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc condimentum dictum turpis, et egestas neque dapibus eget. Quisque fringilla, dui eu venenatis eleifend, erat nibh lacinia urna, at lacinia lacus sapien eu dui. Duis eu erat ut mi lacinia convallis a sed ex.
-
-Fusce elit metus, tincidunt nec eleifend a, hendrerit nec ligula. Duis placerat finibus sollicitudin. In euismod porta tellus, in luctus justo bibendum bibendum. Maecenas at magna eleifend lectus tincidunt suscipit ut a ligula. Nulla tempor accumsan felis, fermentum dapibus est eleifend vitae. Mauris urna sem, fringilla at ultricies non, ultrices in arcu. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nam vehicula nunc at laoreet imperdiet. Nunc tristique ut risus id aliquet. Integer eleifend massa orci.
-
-Vestibulum sed ante sollicitudin nisi fringilla bibendum nec vel quam. Sed pretium augue eu ligula congue pulvinar. Donec vitae magna tincidunt, pharetra lacus id, convallis nulla. Cras viverra nisl nisl, varius convallis leo vulputate nec. Morbi at consequat dui, sed aliquet metus. Sed suscipit fermentum mollis. Maecenas nec mi sodales, tincidunt purus in, tristique mauris. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec interdum mi in velit efficitur, quis ultrices ex imperdiet. Sed vestibulum, magna ut tristique pretium, mi ipsum placerat tellus, non tempor enim augue et ex. Pellentesque eget felis quis ante sodales viverra ac sed lacus. Donec suscipit tempus massa, eget laoreet massa molestie at.
-
-Aenean fringilla dui non aliquet consectetur. Fusce cursus quam nec orci hendrerit faucibus. Donec consequat suscipit enim, non volutpat lectus auctor interdum. Proin lorem purus, maximus vel orci vitae, suscipit egestas turpis. Donec risus urna, congue a sem eu, aliquet placerat odio. Morbi gravida tristique turpis, quis efficitur enim. Nunc interdum gravida ipsum vel facilisis. Nunc congue finibus sollicitudin. Quisque euismod aliquet lectus et tincidunt. Curabitur ultrices sem ut mi fringilla fermentum. Morbi pretium, nisi sit amet dapibus congue, dolor enim consectetur risus, a interdum ligula odio sed odio. Quisque facilisis, mi at suscipit gravida, nunc sapien cursus justo, ut luctus odio nulla quis leo. Integer condimentum lobortis mauris, non egestas tellus lobortis sit amet.
-
-In sollicitudin velit ac ante vehicula, vitae varius tortor mollis. In hac habitasse platea dictumst. Quisque et orci lorem. Integer malesuada fringilla luctus. Pellentesque malesuada, mi non lobortis porttitor, ante ligula vulputate ante, nec dictum risus eros sit amet sapien. Nulla aliquam lorem libero, ac varius nulla tristique eget. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Ut pellentesque mauris orci, vel consequat mi varius a. Ut sit amet elit vulputate, lacinia metus non, fermentum nisl. Pellentesque eu nisi sed quam egestas blandit. Duis sit amet lobortis dolor. Donec consectetur sem interdum, tristique elit sit amet, sodales lacus. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Fusce id aliquam augue. Sed pretium congue risus vitae lacinia. Vestibulum non vulputate risus, ut malesuada justo.
-
-Sed odio elit, consectetur ac mauris quis, consequat commodo libero. Fusce sodales velit vulputate pulvinar fermentum. Donec iaculis nec nisl eget faucibus. Mauris at dictum velit. Donec fermentum lectus eu viverra volutpat. Aliquam consequat facilisis lorem, cursus consequat dui bibendum ullamcorper. Pellentesque nulla magna, imperdiet at magna et, cursus egestas enim. Nullam semper molestie lectus sit amet semper. Duis eget tincidunt est. Integer id neque risus. Integer ultricies hendrerit vestibulum. Donec blandit blandit sagittis. Nunc consectetur vitae nisi consectetur volutpat.
-
-Nulla id lorem fermentum, efficitur magna a, hendrerit dui. Vivamus sagittis orci gravida, bibendum quam eget, molestie est. Phasellus nec enim tincidunt, volutpat sapien non, laoreet diam. Nulla posuere enim nec porttitor lobortis. Donec auctor odio ut orci eleifend, ut eleifend purus convallis. Interdum et malesuada fames ac ante ipsum primis in faucibus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut hendrerit, purus eget viverra tincidunt, sem magna imperdiet libero, et aliquam turpis neque vitae elit. Maecenas semper varius iaculis. Cras non lorem quis quam bibendum eleifend in et libero. Curabitur at purus mauris. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus porta diam sed elit eleifend gravida.
-
-Nulla facilisi. Ut ultricies diam vel diam consectetur, vel porta augue molestie. Fusce interdum sapien et metus facilisis pellentesque. Nulla convallis sem at nunc vehicula facilisis. Nam ac rutrum purus. Nunc bibendum, dolor sit amet tempus ullamcorper, lorem leo tempor sem, id fringilla nunc augue scelerisque augue. Nullam sit amet rutrum nisl. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Donec sed mauris gravida eros vehicula sagittis at eget orci. Cras elementum, eros at accumsan bibendum, libero neque blandit purus, vitae vestibulum libero massa ac nibh. Integer at placerat nulla. Mauris eu eleifend orci. Aliquam consequat ligula vitae erat porta lobortis. Duis fermentum elit ac aliquet ornare.
-
-Mauris eget cursus tellus, eget sodales purus. Aliquam malesuada, augue id vulputate finibus, nisi ex bibendum nisl, sit amet laoreet quam urna a dolor. Nullam ultricies, sapien eu laoreet consequat, erat eros dignissim diam, ultrices sodales lectus mauris et leo. Morbi lacinia eu ante at tempus. Sed iaculis finibus magna malesuada efficitur. Donec faucibus erat sit amet elementum feugiat. Praesent a placerat nisi. Etiam lacinia gravida diam, et sollicitudin sapien tincidunt ut.
-
-Maecenas felis quam, tincidunt vitae venenatis scelerisque, viverra vitae odio. Phasellus enim neque, ultricies suscipit malesuada sit amet, vehicula sit amet purus. Nulla placerat sit amet dui vel tincidunt. Nam quis neque vel magna commodo egestas. Vestibulum sagittis rutrum lorem ut congue. Maecenas vel ultrices tellus. Donec efficitur, urna ac consequat iaculis, lorem felis pharetra eros, eget faucibus orci lectus sit amet arcu.
-
-Ut a tempus nisi. Nulla facilisi. Praesent vulputate maximus mi et dapibus. Sed sit amet libero ac augue hendrerit efficitur in a sapien. Mauris placerat velit sit amet tellus sollicitudin faucibus. Donec egestas a magna ac suscipit. Duis enim sapien, mollis sed egestas et, vestibulum vel leo.
-
-Proin quis dapibus dui. Donec eu tincidunt nunc. Vivamus eget purus consectetur, maximus ante vitae, tincidunt elit. Aenean mattis dolor a gravida aliquam. Praesent quis tellus id sem maximus vulputate nec sed nulla. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur metus nulla, volutpat volutpat est eu, hendrerit congue erat. Aliquam sollicitudin augue ante. Sed sollicitudin, magna eu consequat elementum, mi augue ullamcorper felis, molestie imperdiet erat metus iaculis est. Proin ac tortor nisi. Pellentesque quis nisi risus. Integer enim sapien, tincidunt quis tortor id, accumsan venenatis mi. Nulla facilisi.
-
-Cras pretium sit amet quam congue maximus. Morbi lacus libero, imperdiet commodo massa sed, scelerisque placerat libero. Cras nisl nisi, consectetur sed bibendum eu, venenatis at enim. Proin sodales justo at quam aliquam, a consectetur mi ornare. Donec porta ac est sit amet efficitur. Suspendisse vestibulum tortor id neque imperdiet, id lacinia risus vehicula. Phasellus ac eleifend purus. Mauris vel gravida ante. Aliquam vitae lobortis risus. Sed vehicula consectetur tincidunt. Nam et justo vitae purus molestie consequat. Pellentesque ipsum ex, convallis quis blandit non, gravida et urna. Donec diam ligula amet.
diff --git a/pkg/sentry/fsimpl/ext/assets/file.txt b/pkg/sentry/fsimpl/ext/assets/file.txt
deleted file mode 100644
index 980a0d5f1..000000000
--- a/pkg/sentry/fsimpl/ext/assets/file.txt
+++ /dev/null
@@ -1 +0,0 @@
-Hello World!
diff --git a/pkg/sentry/fsimpl/ext/assets/symlink.txt b/pkg/sentry/fsimpl/ext/assets/symlink.txt
deleted file mode 120000
index 4c330738c..000000000
--- a/pkg/sentry/fsimpl/ext/assets/symlink.txt
+++ /dev/null
@@ -1 +0,0 @@
-file.txt \ No newline at end of file
diff --git a/pkg/sentry/fsimpl/ext/assets/tiny.ext2 b/pkg/sentry/fsimpl/ext/assets/tiny.ext2
deleted file mode 100644
index 381ade9bf..000000000
--- a/pkg/sentry/fsimpl/ext/assets/tiny.ext2
+++ /dev/null
Binary files differ
diff --git a/pkg/sentry/fsimpl/ext/assets/tiny.ext3 b/pkg/sentry/fsimpl/ext/assets/tiny.ext3
deleted file mode 100644
index 0e97a324c..000000000
--- a/pkg/sentry/fsimpl/ext/assets/tiny.ext3
+++ /dev/null
Binary files differ
diff --git a/pkg/sentry/fsimpl/ext/assets/tiny.ext4 b/pkg/sentry/fsimpl/ext/assets/tiny.ext4
deleted file mode 100644
index a6859736d..000000000
--- a/pkg/sentry/fsimpl/ext/assets/tiny.ext4
+++ /dev/null
Binary files differ
diff --git a/pkg/sentry/fsimpl/ext/benchmark/BUILD b/pkg/sentry/fsimpl/ext/benchmark/BUILD
deleted file mode 100644
index 6c5a559fd..000000000
--- a/pkg/sentry/fsimpl/ext/benchmark/BUILD
+++ /dev/null
@@ -1,17 +0,0 @@
-load("//tools:defs.bzl", "go_test")
-
-package(licenses = ["notice"])
-
-go_test(
- name = "benchmark_test",
- size = "small",
- srcs = ["benchmark_test.go"],
- deps = [
- "//pkg/context",
- "//pkg/fspath",
- "//pkg/sentry/contexttest",
- "//pkg/sentry/fsimpl/ext",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/vfs",
- ],
-)
diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
deleted file mode 100644
index 2ee7cc7ac..000000000
--- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
+++ /dev/null
@@ -1,211 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// These benchmarks emulate memfs benchmarks. Ext4 images must be created
-// before this benchmark is run using the `make_deep_ext4.sh` script at
-// /tmp/image-{depth}.ext4 for all the depths tested below.
-//
-// The benchmark itself cannot run the script because the script requires
-// sudo privileges to create the file system images.
-package benchmark_test
-
-import (
- "fmt"
- "os"
- "runtime"
- "strings"
- "testing"
-
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-var depths = []int{1, 2, 3, 8, 64, 100}
-
-const filename = "file.txt"
-
-// setUp opens imagePath as an ext Filesystem and returns all necessary
-// elements required to run tests. If error is nil, it also returns a tear
-// down function which must be called after the test is run for clean up.
-func setUp(b *testing.B, imagePath string) (context.Context, *vfs.VirtualFilesystem, *vfs.VirtualDentry, func(), error) {
- f, err := os.Open(imagePath)
- if err != nil {
- return nil, nil, nil, nil, err
- }
-
- ctx := contexttest.Context(b)
- creds := auth.CredentialsFromContext(ctx)
-
- // Create VFS.
- vfsObj := &vfs.VirtualFilesystem{}
- if err := vfsObj.Init(ctx); err != nil {
- return nil, nil, nil, nil, err
- }
- vfsObj.MustRegisterFilesystemType("extfs", ext.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- })
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, imagePath, "extfs", &vfs.MountOptions{
- GetFilesystemOptions: vfs.GetFilesystemOptions{
- InternalData: int(f.Fd()),
- },
- })
- if err != nil {
- f.Close()
- return nil, nil, nil, nil, err
- }
-
- root := mntns.Root()
- root.IncRef()
-
- tearDown := func() {
- root.DecRef(ctx)
-
- if err := f.Close(); err != nil {
- b.Fatalf("tearDown failed: %v", err)
- }
- }
- return ctx, vfsObj, &root, tearDown, nil
-}
-
-// mount mounts extfs at the path operation passed. Returns a tear down
-// function which must be called after the test is run for clean up.
-func mount(b *testing.B, imagePath string, vfsfs *vfs.VirtualFilesystem, pop *vfs.PathOperation) func() {
- b.Helper()
-
- f, err := os.Open(imagePath)
- if err != nil {
- b.Fatalf("could not open image at %s: %v", imagePath, err)
- }
-
- ctx := contexttest.Context(b)
- creds := auth.CredentialsFromContext(ctx)
-
- if _, err := vfsfs.MountAt(ctx, creds, imagePath, pop, "extfs", &vfs.MountOptions{
- GetFilesystemOptions: vfs.GetFilesystemOptions{
- InternalData: int(f.Fd()),
- },
- }); err != nil {
- b.Fatalf("failed to mount tmpfs submount: %v", err)
- }
- return func() {
- if err := f.Close(); err != nil {
- b.Fatalf("tearDown failed: %v", err)
- }
- }
-}
-
-// BenchmarkVFS2Ext4fsStat emulates BenchmarkVFS2MemfsStat.
-func BenchmarkVFS2Ext4fsStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- ctx, vfsfs, root, tearDown, err := setUp(b, fmt.Sprintf("/tmp/image-%d.ext4", depth))
- if err != nil {
- b.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- creds := auth.CredentialsFromContext(ctx)
- var filePathBuilder strings.Builder
- filePathBuilder.WriteByte('/')
- for i := 1; i <= depth; i++ {
- filePathBuilder.WriteString(fmt.Sprintf("%d", i))
- filePathBuilder.WriteByte('/')
- }
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- stat, err := vfsfs.StatAt(ctx, creds, &vfs.PathOperation{
- Root: *root,
- Start: *root,
- Path: fspath.Parse(filePath),
- FollowFinalSymlink: true,
- }, &vfs.StatOptions{})
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- // Sanity check.
- if stat.Size > 0 {
- b.Fatalf("got wrong file size (%d)", stat.Size)
- }
- }
- })
- }
-}
-
-// BenchmarkVFS2ExtfsMountStat emulates BenchmarkVFS2MemfsMountStat.
-func BenchmarkVFS2ExtfsMountStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- // Create root extfs with depth 1 so we can mount extfs again at /1/.
- ctx, vfsfs, root, tearDown, err := setUp(b, fmt.Sprintf("/tmp/image-%d.ext4", 1))
- if err != nil {
- b.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- creds := auth.CredentialsFromContext(ctx)
- mountPointName := "/1/"
- pop := vfs.PathOperation{
- Root: *root,
- Start: *root,
- Path: fspath.Parse(mountPointName),
- }
-
- // Save the mount point for later use.
- mountPoint, err := vfsfs.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- b.Fatalf("failed to walk to mount point: %v", err)
- }
- defer mountPoint.DecRef(ctx)
-
- // Create extfs submount.
- mountTearDown := mount(b, fmt.Sprintf("/tmp/image-%d.ext4", depth), vfsfs, &pop)
- defer mountTearDown()
-
- var filePathBuilder strings.Builder
- filePathBuilder.WriteString(mountPointName)
- for i := 1; i <= depth; i++ {
- filePathBuilder.WriteString(fmt.Sprintf("%d", i))
- filePathBuilder.WriteByte('/')
- }
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- stat, err := vfsfs.StatAt(ctx, creds, &vfs.PathOperation{
- Root: *root,
- Start: *root,
- Path: fspath.Parse(filePath),
- FollowFinalSymlink: true,
- }, &vfs.StatOptions{})
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- // Sanity check. touch(1) always creates files of size 0 (empty).
- if stat.Size > 0 {
- b.Fatalf("got wrong file size (%d)", stat.Size)
- }
- }
- })
- }
-}
diff --git a/pkg/sentry/fsimpl/ext/benchmark/make_deep_ext4.sh b/pkg/sentry/fsimpl/ext/benchmark/make_deep_ext4.sh
deleted file mode 100755
index d0910da1f..000000000
--- a/pkg/sentry/fsimpl/ext/benchmark/make_deep_ext4.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/bash
-
-# Copyright 2019 The gVisor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script creates an ext4 image with $1 depth of directories and a file in
-# the inner most directory. The created file is at path /1/2/.../depth/file.txt.
-# The ext4 image is written to $2. The image is temporarily mounted at
-# /tmp/mountpoint. This script must be run with sudo privileges.
-
-# Usage:
-# sudo bash make_deep_ext4.sh {depth} {output path}
-
-# Check positional arguments.
-if [ "$#" -ne 2 ]; then
- echo "Usage: sudo bash make_deep_ext4.sh {depth} {output path}"
- exit 1
-fi
-
-# Make sure depth is a non-negative number.
-if ! [[ "$1" =~ ^[0-9]+$ ]]; then
- echo "Depth must be a non-negative number."
- exit 1
-fi
-
-# Create a 1 MB filesystem image at the requested output path.
-rm -f $2
-fallocate -l 1M $2
-if [ $? -ne 0 ]; then
- echo "fallocate failed"
- exit $?
-fi
-
-# Convert that blank into an ext4 image.
-mkfs.ext4 -j $2
-if [ $? -ne 0 ]; then
- echo "mkfs.ext4 failed"
- exit $?
-fi
-
-# Mount the image.
-MOUNTPOINT=/tmp/mountpoint
-mkdir -p $MOUNTPOINT
-mount -o loop $2 $MOUNTPOINT
-if [ $? -ne 0 ]; then
- echo "mount failed"
- exit $?
-fi
-
-# Create nested directories and the file.
-if [ "$1" -eq 0 ]; then
- FILEPATH=$MOUNTPOINT/file.txt
-else
- FILEPATH=$MOUNTPOINT/$(seq -s '/' 1 $1)/file.txt
-fi
-mkdir -p $(dirname $FILEPATH) || exit
-touch $FILEPATH
-
-# Clean up.
-umount $MOUNTPOINT
-rm -rf $MOUNTPOINT
diff --git a/pkg/sentry/fsimpl/ext/block_map_file.go b/pkg/sentry/fsimpl/ext/block_map_file.go
deleted file mode 100644
index 1165234f9..000000000
--- a/pkg/sentry/fsimpl/ext/block_map_file.go
+++ /dev/null
@@ -1,203 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "io"
- "math"
-
- "gvisor.dev/gvisor/pkg/marshal/primitive"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-const (
- // numDirectBlks is the number of direct blocks in ext block map inodes.
- numDirectBlks = 12
-)
-
-// blockMapFile is a type of regular file which uses direct/indirect block
-// addressing to store file data. This was deprecated in ext4.
-type blockMapFile struct {
- regFile regularFile
-
- // directBlks are the direct blocks numbers. The physical blocks pointed by
- // these holds file data. Contains file blocks 0 to 11.
- directBlks [numDirectBlks]primitive.Uint32
-
- // indirectBlk is the physical block which contains (blkSize/4) direct block
- // numbers (as uint32 integers).
- indirectBlk primitive.Uint32
-
- // doubleIndirectBlk is the physical block which contains (blkSize/4) indirect
- // block numbers (as uint32 integers).
- doubleIndirectBlk primitive.Uint32
-
- // tripleIndirectBlk is the physical block which contains (blkSize/4) doubly
- // indirect block numbers (as uint32 integers).
- tripleIndirectBlk primitive.Uint32
-
- // coverage at (i)th index indicates the amount of file data a node at
- // height (i) covers. Height 0 is the direct block.
- coverage [4]uint64
-}
-
-// Compiles only if blockMapFile implements io.ReaderAt.
-var _ io.ReaderAt = (*blockMapFile)(nil)
-
-// newBlockMapFile is the blockMapFile constructor. It initializes the file to
-// physical blocks map with (at most) the first 12 (direct) blocks.
-func newBlockMapFile(args inodeArgs) (*blockMapFile, error) {
- file := &blockMapFile{}
- file.regFile.impl = file
- file.regFile.inode.init(args, &file.regFile)
-
- for i := uint(0); i < 4; i++ {
- file.coverage[i] = getCoverage(file.regFile.inode.blkSize, i)
- }
-
- blkMap := file.regFile.inode.diskInode.Data()
- for i := 0; i < numDirectBlks; i++ {
- file.directBlks[i].UnmarshalBytes(blkMap[i*4 : (i+1)*4])
- }
- file.indirectBlk.UnmarshalBytes(blkMap[numDirectBlks*4 : (numDirectBlks+1)*4])
- file.doubleIndirectBlk.UnmarshalBytes(blkMap[(numDirectBlks+1)*4 : (numDirectBlks+2)*4])
- file.tripleIndirectBlk.UnmarshalBytes(blkMap[(numDirectBlks+2)*4 : (numDirectBlks+3)*4])
- return file, nil
-}
-
-// ReadAt implements io.ReaderAt.ReadAt.
-func (f *blockMapFile) ReadAt(dst []byte, off int64) (int, error) {
- if len(dst) == 0 {
- return 0, nil
- }
-
- if off < 0 {
- return 0, syserror.EINVAL
- }
-
- offset := uint64(off)
- size := f.regFile.inode.diskInode.Size()
- if offset >= size {
- return 0, io.EOF
- }
-
- // dirBlksEnd is the file offset until which direct blocks cover file data.
- // Direct blocks cover 0 <= file offset < dirBlksEnd.
- dirBlksEnd := numDirectBlks * f.coverage[0]
-
- // indirBlkEnd is the file offset until which the indirect block covers file
- // data. The indirect block covers dirBlksEnd <= file offset < indirBlkEnd.
- indirBlkEnd := dirBlksEnd + f.coverage[1]
-
- // doubIndirBlkEnd is the file offset until which the double indirect block
- // covers file data. The double indirect block covers the range
- // indirBlkEnd <= file offset < doubIndirBlkEnd.
- doubIndirBlkEnd := indirBlkEnd + f.coverage[2]
-
- read := 0
- toRead := len(dst)
- if uint64(toRead)+offset > size {
- toRead = int(size - offset)
- }
- for read < toRead {
- var err error
- var curR int
-
- // Figure out which block to delegate the read to.
- switch {
- case offset < dirBlksEnd:
- // Direct block.
- curR, err = f.read(uint32(f.directBlks[offset/f.regFile.inode.blkSize]), offset%f.regFile.inode.blkSize, 0, dst[read:])
- case offset < indirBlkEnd:
- // Indirect block.
- curR, err = f.read(uint32(f.indirectBlk), offset-dirBlksEnd, 1, dst[read:])
- case offset < doubIndirBlkEnd:
- // Doubly indirect block.
- curR, err = f.read(uint32(f.doubleIndirectBlk), offset-indirBlkEnd, 2, dst[read:])
- default:
- // Triply indirect block.
- curR, err = f.read(uint32(f.tripleIndirectBlk), offset-doubIndirBlkEnd, 3, dst[read:])
- }
-
- read += curR
- offset += uint64(curR)
- if err != nil {
- return read, err
- }
- }
-
- if read < len(dst) {
- return read, io.EOF
- }
- return read, nil
-}
-
-// read is the recursive step of the ReadAt function. It relies on knowing the
-// current node's location on disk (curPhyBlk) and its height in the block map
-// tree. A height of 0 shows that the current node is actually holding file
-// data. relFileOff tells the offset from which we need to start to reading
-// under the current node. It is completely relative to the current node.
-func (f *blockMapFile) read(curPhyBlk uint32, relFileOff uint64, height uint, dst []byte) (int, error) {
- curPhyBlkOff := int64(curPhyBlk) * int64(f.regFile.inode.blkSize)
- if height == 0 {
- toRead := int(f.regFile.inode.blkSize - relFileOff)
- if len(dst) < toRead {
- toRead = len(dst)
- }
-
- n, _ := f.regFile.inode.fs.dev.ReadAt(dst[:toRead], curPhyBlkOff+int64(relFileOff))
- if n < toRead {
- return n, syserror.EIO
- }
- return n, nil
- }
-
- childCov := f.coverage[height-1]
- startIdx := relFileOff / childCov
- endIdx := f.regFile.inode.blkSize / 4 // This is exclusive.
- wantEndIdx := (relFileOff + uint64(len(dst))) / childCov
- wantEndIdx++ // Make this exclusive.
- if wantEndIdx < endIdx {
- endIdx = wantEndIdx
- }
-
- read := 0
- curChildOff := relFileOff % childCov
- for i := startIdx; i < endIdx; i++ {
- var childPhyBlk primitive.Uint32
- err := readFromDisk(f.regFile.inode.fs.dev, curPhyBlkOff+int64(i*4), &childPhyBlk)
- if err != nil {
- return read, err
- }
-
- n, err := f.read(uint32(childPhyBlk), curChildOff, height-1, dst[read:])
- read += n
- if err != nil {
- return read, err
- }
-
- curChildOff = 0
- }
-
- return read, nil
-}
-
-// getCoverage returns the number of bytes a node at the given height covers.
-// Height 0 is the file data block itself. Height 1 is the indirect block.
-//
-// Formula: blkSize * ((blkSize / 4)^height)
-func getCoverage(blkSize uint64, height uint) uint64 {
- return blkSize * uint64(math.Pow(float64(blkSize/4), float64(height)))
-}
diff --git a/pkg/sentry/fsimpl/ext/block_map_test.go b/pkg/sentry/fsimpl/ext/block_map_test.go
deleted file mode 100644
index ed98b482e..000000000
--- a/pkg/sentry/fsimpl/ext/block_map_test.go
+++ /dev/null
@@ -1,160 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "bytes"
- "math/rand"
- "testing"
-
- "github.com/google/go-cmp/cmp"
- "gvisor.dev/gvisor/pkg/marshal/primitive"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
-)
-
-// These consts are for mocking the block map tree.
-const (
- mockBMBlkSize = uint32(16)
- mockBMDiskSize = 2500
-)
-
-// TestBlockMapReader stress tests block map reader functionality. It performs
-// random length reads from all possible positions in the block map structure.
-func TestBlockMapReader(t *testing.T) {
- mockBMFile, want := blockMapSetUp(t)
- n := len(want)
-
- for from := 0; from < n; from++ {
- got := make([]byte, n-from)
-
- if read, err := mockBMFile.ReadAt(got, int64(from)); err != nil {
- t.Fatalf("file read operation from offset %d to %d only read %d bytes: %v", from, n, read, err)
- }
-
- if diff := cmp.Diff(got, want[from:]); diff != "" {
- t.Fatalf("file data from offset %d to %d mismatched (-want +got):\n%s", from, n, diff)
- }
- }
-}
-
-// blkNumGen is a number generator which gives block numbers for building the
-// block map file on disk. It gives unique numbers in a random order which
-// facilitates in creating an extremely fragmented filesystem.
-type blkNumGen struct {
- nums []uint32
-}
-
-// newBlkNumGen is the blkNumGen constructor.
-func newBlkNumGen() *blkNumGen {
- blkNums := &blkNumGen{}
- lim := mockBMDiskSize / mockBMBlkSize
- blkNums.nums = make([]uint32, lim)
- for i := uint32(0); i < lim; i++ {
- blkNums.nums[i] = i
- }
-
- rand.Shuffle(int(lim), func(i, j int) {
- blkNums.nums[i], blkNums.nums[j] = blkNums.nums[j], blkNums.nums[i]
- })
- return blkNums
-}
-
-// next returns the next random block number.
-func (n *blkNumGen) next() uint32 {
- ret := n.nums[0]
- n.nums = n.nums[1:]
- return ret
-}
-
-// blockMapSetUp creates a mock disk and a block map file. It initializes the
-// block map file with 12 direct block, 1 indirect block, 1 double indirect
-// block and 1 triple indirect block (basically fill it till the rim). It
-// initializes the disk to reflect the inode. Also returns the file data that
-// the inode covers and that is written to disk.
-func blockMapSetUp(t *testing.T) (*blockMapFile, []byte) {
- mockDisk := make([]byte, mockBMDiskSize)
- var fileData []byte
- blkNums := newBlkNumGen()
- off := 0
- data := make([]byte, (numDirectBlks+3)*(*primitive.Uint32)(nil).SizeBytes())
-
- // Write the direct blocks.
- for i := 0; i < numDirectBlks; i++ {
- curBlkNum := primitive.Uint32(blkNums.next())
- curBlkNum.MarshalBytes(data[off:])
- off += curBlkNum.SizeBytes()
- fileData = append(fileData, writeFileDataToBlock(mockDisk, uint32(curBlkNum), 0, blkNums)...)
- }
-
- // Write to indirect block.
- indirectBlk := primitive.Uint32(blkNums.next())
- indirectBlk.MarshalBytes(data[off:])
- off += indirectBlk.SizeBytes()
- fileData = append(fileData, writeFileDataToBlock(mockDisk, uint32(indirectBlk), 1, blkNums)...)
-
- // Write to double indirect block.
- doublyIndirectBlk := primitive.Uint32(blkNums.next())
- doublyIndirectBlk.MarshalBytes(data[off:])
- off += doublyIndirectBlk.SizeBytes()
- fileData = append(fileData, writeFileDataToBlock(mockDisk, uint32(doublyIndirectBlk), 2, blkNums)...)
-
- // Write to triple indirect block.
- triplyIndirectBlk := primitive.Uint32(blkNums.next())
- triplyIndirectBlk.MarshalBytes(data[off:])
- fileData = append(fileData, writeFileDataToBlock(mockDisk, uint32(triplyIndirectBlk), 3, blkNums)...)
-
- args := inodeArgs{
- fs: &filesystem{
- dev: bytes.NewReader(mockDisk),
- },
- diskInode: &disklayout.InodeNew{
- InodeOld: disklayout.InodeOld{
- SizeLo: getMockBMFileFize(),
- },
- },
- blkSize: uint64(mockBMBlkSize),
- }
- copy(args.diskInode.Data(), data)
-
- mockFile, err := newBlockMapFile(args)
- if err != nil {
- t.Fatalf("newBlockMapFile failed: %v", err)
- }
- return mockFile, fileData
-}
-
-// writeFileDataToBlock writes random bytes to the block on disk.
-func writeFileDataToBlock(disk []byte, blkNum uint32, height uint, blkNums *blkNumGen) []byte {
- if height == 0 {
- start := blkNum * mockBMBlkSize
- end := start + mockBMBlkSize
- rand.Read(disk[start:end])
- return disk[start:end]
- }
-
- var fileData []byte
- for off := blkNum * mockBMBlkSize; off < (blkNum+1)*mockBMBlkSize; off += 4 {
- curBlkNum := primitive.Uint32(blkNums.next())
- curBlkNum.MarshalBytes(disk[off : off+4])
- fileData = append(fileData, writeFileDataToBlock(disk, uint32(curBlkNum), height-1, blkNums)...)
- }
- return fileData
-}
-
-// getMockBMFileFize gets the size of the mock block map file which is used for
-// testing.
-func getMockBMFileFize() uint32 {
- return uint32(numDirectBlks*getCoverage(uint64(mockBMBlkSize), 0) + getCoverage(uint64(mockBMBlkSize), 1) + getCoverage(uint64(mockBMBlkSize), 2) + getCoverage(uint64(mockBMBlkSize), 3))
-}
diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go
deleted file mode 100644
index 9bfed883a..000000000
--- a/pkg/sentry/fsimpl/ext/dentry.go
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-// dentry implements vfs.DentryImpl.
-//
-// +stateify savable
-type dentry struct {
- vfsd vfs.Dentry
-
- // Protected by filesystem.mu.
- parent *dentry
- name string
-
- // inode is the inode represented by this dentry. Multiple Dentries may
- // share a single non-directory Inode (with hard links). inode is
- // immutable.
- inode *inode
-}
-
-// Compiles only if dentry implements vfs.DentryImpl.
-var _ vfs.DentryImpl = (*dentry)(nil)
-
-// newDentry is the dentry constructor.
-func newDentry(in *inode) *dentry {
- d := &dentry{
- inode: in,
- }
- d.vfsd.Init(d)
- return d
-}
-
-// IncRef implements vfs.DentryImpl.IncRef.
-func (d *dentry) IncRef() {
- d.inode.incRef()
-}
-
-// TryIncRef implements vfs.DentryImpl.TryIncRef.
-func (d *dentry) TryIncRef() bool {
- return d.inode.tryIncRef()
-}
-
-// DecRef implements vfs.DentryImpl.DecRef.
-func (d *dentry) DecRef(ctx context.Context) {
- // FIXME(b/134676337): filesystem.mu may not be locked as required by
- // inode.decRef().
- d.inode.decRef()
-}
-
-// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
-//
-// TODO(b/134676337): Implement inotify.
-func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) {}
-
-// Watches implements vfs.DentryImpl.Watches.
-//
-// TODO(b/134676337): Implement inotify.
-func (d *dentry) Watches() *vfs.Watches {
- return nil
-}
-
-// OnZeroWatches implements vfs.Dentry.OnZeroWatches.
-//
-// TODO(b/134676337): Implement inotify.
-func (d *dentry) OnZeroWatches(context.Context) {}
diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go
deleted file mode 100644
index 512b70ede..000000000
--- a/pkg/sentry/fsimpl/ext/directory.go
+++ /dev/null
@@ -1,312 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// directory represents a directory inode. It holds the childList in memory.
-//
-// +stateify savable
-type directory struct {
- inode inode
-
- // childCache maps filenames to dentries for children for which dentries
- // have been instantiated. childCache is protected by filesystem.mu.
- childCache map[string]*dentry
-
- // mu serializes the changes to childList.
- // Lock Order (outermost locks must be taken first):
- // directory.mu
- // filesystem.mu
- mu sync.Mutex `state:"nosave"`
-
- // childList is a list containing (1) child dirents and (2) fake dirents
- // (with diskDirent == nil) that represent the iteration position of
- // directoryFDs. childList is used to support directoryFD.IterDirents()
- // efficiently. childList is protected by mu.
- childList direntList
-
- // childMap maps the child's filename to the dirent structure stored in
- // childList. This adds some data replication but helps in faster path
- // traversal. For consistency, key == childMap[key].diskDirent.FileName().
- // Immutable.
- childMap map[string]*dirent
-}
-
-// newDirectory is the directory constructor.
-func newDirectory(args inodeArgs, newDirent bool) (*directory, error) {
- file := &directory{
- childCache: make(map[string]*dentry),
- childMap: make(map[string]*dirent),
- }
- file.inode.init(args, file)
-
- // Initialize childList by reading dirents from the underlying file.
- if args.diskInode.Flags().Index {
- // TODO(b/134676337): Support hash tree directories. Currently only the '.'
- // and '..' entries are read in.
-
- // Users cannot navigate this hash tree directory yet.
- log.Warningf("hash tree directory being used which is unsupported")
- return file, nil
- }
-
- // The dirents are organized in a linear array in the file data.
- // Extract the file data and decode the dirents.
- regFile, err := newRegularFile(args)
- if err != nil {
- return nil, err
- }
-
- // buf is used as scratch space for reading in dirents from disk and
- // unmarshalling them into dirent structs.
- buf := make([]byte, disklayout.DirentSize)
- size := args.diskInode.Size()
- for off, inc := uint64(0), uint64(0); off < size; off += inc {
- toRead := size - off
- if toRead > disklayout.DirentSize {
- toRead = disklayout.DirentSize
- }
- if n, err := regFile.impl.ReadAt(buf[:toRead], int64(off)); uint64(n) < toRead {
- return nil, err
- }
-
- var curDirent dirent
- if newDirent {
- curDirent.diskDirent = &disklayout.DirentNew{}
- } else {
- curDirent.diskDirent = &disklayout.DirentOld{}
- }
- curDirent.diskDirent.UnmarshalBytes(buf)
-
- if curDirent.diskDirent.Inode() != 0 && len(curDirent.diskDirent.FileName()) != 0 {
- // Inode number and name length fields being set to 0 is used to indicate
- // an unused dirent.
- file.childList.PushBack(&curDirent)
- file.childMap[curDirent.diskDirent.FileName()] = &curDirent
- }
-
- // The next dirent is placed exactly after this dirent record on disk.
- inc = uint64(curDirent.diskDirent.RecordSize())
- }
-
- return file, nil
-}
-
-func (i *inode) isDir() bool {
- _, ok := i.impl.(*directory)
- return ok
-}
-
-// dirent is the directory.childList node.
-//
-// +stateify savable
-type dirent struct {
- diskDirent disklayout.Dirent
-
- // direntEntry links dirents into their parent directory.childList.
- direntEntry
-}
-
-// directoryFD represents a directory file description. It implements
-// vfs.FileDescriptionImpl.
-//
-// +stateify savable
-type directoryFD struct {
- fileDescription
- vfs.DirectoryFileDescriptionDefaultImpl
-
- // Protected by directory.mu.
- iter *dirent
- off int64
-}
-
-// Compiles only if directoryFD implements vfs.FileDescriptionImpl.
-var _ vfs.FileDescriptionImpl = (*directoryFD)(nil)
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *directoryFD) Release(ctx context.Context) {
- if fd.iter == nil {
- return
- }
-
- dir := fd.inode().impl.(*directory)
- dir.mu.Lock()
- dir.childList.Remove(fd.iter)
- dir.mu.Unlock()
- fd.iter = nil
-}
-
-// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
-func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
- extfs := fd.filesystem()
- dir := fd.inode().impl.(*directory)
-
- dir.mu.Lock()
- defer dir.mu.Unlock()
-
- // Ensure that fd.iter exists and is not linked into dir.childList.
- var child *dirent
- if fd.iter == nil {
- // Start iteration at the beginning of dir.
- child = dir.childList.Front()
- fd.iter = &dirent{}
- } else {
- // Continue iteration from where we left off.
- child = fd.iter.Next()
- dir.childList.Remove(fd.iter)
- }
- for ; child != nil; child = child.Next() {
- // Skip other directoryFD iterators.
- if child.diskDirent != nil {
- childType, ok := child.diskDirent.FileType()
- if !ok {
- // We will need to read the inode off disk. Do not increment
- // ref count here because this inode is not being added to the
- // dentry tree.
- extfs.mu.Lock()
- childInode, err := extfs.getOrCreateInodeLocked(child.diskDirent.Inode())
- extfs.mu.Unlock()
- if err != nil {
- // Usage of the file description after the error is
- // undefined. This implementation would continue reading
- // from the next dirent.
- fd.off++
- dir.childList.InsertAfter(child, fd.iter)
- return err
- }
- childType = fs.ToInodeType(childInode.diskInode.Mode().FileType())
- }
-
- if err := cb.Handle(vfs.Dirent{
- Name: child.diskDirent.FileName(),
- Type: fs.ToDirentType(childType),
- Ino: uint64(child.diskDirent.Inode()),
- NextOff: fd.off + 1,
- }); err != nil {
- dir.childList.InsertBefore(child, fd.iter)
- return err
- }
- fd.off++
- }
- }
- dir.childList.PushBack(fd.iter)
- return nil
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- if whence != linux.SEEK_SET && whence != linux.SEEK_CUR {
- return 0, syserror.EINVAL
- }
-
- dir := fd.inode().impl.(*directory)
-
- dir.mu.Lock()
- defer dir.mu.Unlock()
-
- // Find resulting offset.
- if whence == linux.SEEK_CUR {
- offset += fd.off
- }
-
- if offset < 0 {
- // lseek(2) specifies that EINVAL should be returned if the resulting offset
- // is negative.
- return 0, syserror.EINVAL
- }
-
- n := int64(len(dir.childMap))
- realWantOff := offset
- if realWantOff > n {
- realWantOff = n
- }
- realCurOff := fd.off
- if realCurOff > n {
- realCurOff = n
- }
-
- // Ensure that fd.iter exists and is linked into dir.childList so we can
- // intelligently seek from the optimal position.
- if fd.iter == nil {
- fd.iter = &dirent{}
- dir.childList.PushFront(fd.iter)
- }
-
- // Guess that iterating from the current position is optimal.
- child := fd.iter
- diff := realWantOff - realCurOff // Shows direction and magnitude of travel.
-
- // See if starting from the beginning or end is better.
- abDiff := diff
- if diff < 0 {
- abDiff = -diff
- }
- if abDiff > realWantOff {
- // Starting from the beginning is best.
- child = dir.childList.Front()
- diff = realWantOff
- } else if abDiff > (n - realWantOff) {
- // Starting from the end is best.
- child = dir.childList.Back()
- // (n - 1) because the last non-nil dirent represents the (n-1)th offset.
- diff = realWantOff - (n - 1)
- }
-
- for child != nil {
- // Skip other directoryFD iterators.
- if child.diskDirent != nil {
- if diff == 0 {
- if child != fd.iter {
- dir.childList.Remove(fd.iter)
- dir.childList.InsertBefore(child, fd.iter)
- }
-
- fd.off = offset
- return offset, nil
- }
-
- if diff < 0 {
- diff++
- child = child.Prev()
- } else {
- diff--
- child = child.Next()
- }
- continue
- }
-
- if diff < 0 {
- child = child.Prev()
- } else {
- child = child.Next()
- }
- }
-
- // Reaching here indicates that the offset is beyond the end of the childList.
- dir.childList.Remove(fd.iter)
- dir.childList.PushBack(fd.iter)
- fd.off = offset
- return offset, nil
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/BUILD b/pkg/sentry/fsimpl/ext/disklayout/BUILD
deleted file mode 100644
index d98a05dd8..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/BUILD
+++ /dev/null
@@ -1,48 +0,0 @@
-load("//tools:defs.bzl", "go_library", "go_test")
-
-package(licenses = ["notice"])
-
-go_library(
- name = "disklayout",
- srcs = [
- "block_group.go",
- "block_group_32.go",
- "block_group_64.go",
- "dirent.go",
- "dirent_new.go",
- "dirent_old.go",
- "disklayout.go",
- "extent.go",
- "inode.go",
- "inode_new.go",
- "inode_old.go",
- "superblock.go",
- "superblock_32.go",
- "superblock_64.go",
- "superblock_old.go",
- "test_utils.go",
- ],
- marshal = True,
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/marshal",
- "//pkg/sentry/fs",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/time",
- ],
-)
-
-go_test(
- name = "disklayout_test",
- size = "small",
- srcs = [
- "block_group_test.go",
- "dirent_test.go",
- "extent_test.go",
- "inode_test.go",
- "superblock_test.go",
- ],
- library = ":disklayout",
- deps = ["//pkg/sentry/kernel/time"],
-)
diff --git a/pkg/sentry/fsimpl/ext/disklayout/block_group.go b/pkg/sentry/fsimpl/ext/disklayout/block_group.go
deleted file mode 100644
index 0d56ae9da..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/block_group.go
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "gvisor.dev/gvisor/pkg/marshal"
-)
-
-// BlockGroup represents a Linux ext block group descriptor. An ext file system
-// is split into a series of block groups. This provides an access layer to
-// information needed to access and use a block group.
-//
-// Location:
-// - The block group descriptor table is always placed in the blocks
-// immediately after the block containing the superblock.
-// - The 1st block group descriptor in the original table is in the
-// (sb.FirstDataBlock() + 1)th block.
-// - See SuperBlock docs to see where the block group descriptor table is
-// replicated.
-// - sb.BgDescSize() must be used as the block group descriptor entry size
-// while reading the table from disk.
-//
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#block-group-descriptors.
-type BlockGroup interface {
- marshal.Marshallable
-
- // InodeTable returns the absolute block number of the block containing the
- // inode table. This points to an array of Inode structs. Inode tables are
- // statically allocated at mkfs time. The superblock records the number of
- // inodes per group (length of this table) and the size of each inode struct.
- InodeTable() uint64
-
- // BlockBitmap returns the absolute block number of the block containing the
- // block bitmap. This bitmap tracks the usage of data blocks within this block
- // group and has its own checksum.
- BlockBitmap() uint64
-
- // InodeBitmap returns the absolute block number of the block containing the
- // inode bitmap. This bitmap tracks the usage of this group's inode table
- // entries and has its own checksum.
- InodeBitmap() uint64
-
- // ExclusionBitmap returns the absolute block number of the snapshot exclusion
- // bitmap.
- ExclusionBitmap() uint64
-
- // FreeBlocksCount returns the number of free blocks in the group.
- FreeBlocksCount() uint32
-
- // FreeInodesCount returns the number of free inodes in the group.
- FreeInodesCount() uint32
-
- // DirectoryCount returns the number of inodes that represent directories
- // under this block group.
- DirectoryCount() uint32
-
- // UnusedInodeCount returns the number of unused inodes beyond the last used
- // inode in this group's inode table. As a result, we needn’t scan past the
- // (InodesPerGroup - UnusedInodeCount())th entry in the inode table.
- UnusedInodeCount() uint32
-
- // BlockBitmapChecksum returns the block bitmap checksum. This is calculated
- // using crc32c(FS UUID + group number + entire bitmap).
- BlockBitmapChecksum() uint32
-
- // InodeBitmapChecksum returns the inode bitmap checksum. This is calculated
- // using crc32c(FS UUID + group number + entire bitmap).
- InodeBitmapChecksum() uint32
-
- // Checksum returns this block group's checksum.
- //
- // If SbMetadataCsum feature is set:
- // - checksum is crc32c(FS UUID + group number + group descriptor
- // structure) & 0xFFFF.
- //
- // If SbGdtCsum feature is set:
- // - checksum is crc16(FS UUID + group number + group descriptor
- // structure).
- //
- // SbMetadataCsum and SbGdtCsum should not be both set.
- // If they are, Linux warns and asks to run fsck.
- Checksum() uint16
-
- // Flags returns BGFlags which represents the block group flags.
- Flags() BGFlags
-}
-
-// These are the different block group flags.
-const (
- // BgInodeUninit indicates that inode table and bitmap are not initialized.
- BgInodeUninit uint16 = 0x1
-
- // BgBlockUninit indicates that block bitmap is not initialized.
- BgBlockUninit uint16 = 0x2
-
- // BgInodeZeroed indicates that inode table is zeroed.
- BgInodeZeroed uint16 = 0x4
-)
-
-// BGFlags represents all the different combinations of block group flags.
-type BGFlags struct {
- InodeUninit bool
- BlockUninit bool
- InodeZeroed bool
-}
-
-// ToInt converts a BGFlags struct back to its 16-bit representation.
-func (f BGFlags) ToInt() uint16 {
- var res uint16
-
- if f.InodeUninit {
- res |= BgInodeUninit
- }
- if f.BlockUninit {
- res |= BgBlockUninit
- }
- if f.InodeZeroed {
- res |= BgInodeZeroed
- }
-
- return res
-}
-
-// BGFlagsFromInt converts the 16-bit flag representation to a BGFlags struct.
-func BGFlagsFromInt(flags uint16) BGFlags {
- return BGFlags{
- InodeUninit: flags&BgInodeUninit > 0,
- BlockUninit: flags&BgBlockUninit > 0,
- InodeZeroed: flags&BgInodeZeroed > 0,
- }
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/block_group_32.go b/pkg/sentry/fsimpl/ext/disklayout/block_group_32.go
deleted file mode 100644
index a35fa22a0..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/block_group_32.go
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// BlockGroup32Bit emulates the first half of struct ext4_group_desc in
-// fs/ext4/ext4.h. It is the block group descriptor struct for ext2, ext3 and
-// 32-bit ext4 filesystems. It implements BlockGroup interface.
-//
-// +marshal
-type BlockGroup32Bit struct {
- BlockBitmapLo uint32
- InodeBitmapLo uint32
- InodeTableLo uint32
- FreeBlocksCountLo uint16
- FreeInodesCountLo uint16
- UsedDirsCountLo uint16
- FlagsRaw uint16
- ExcludeBitmapLo uint32
- BlockBitmapChecksumLo uint16
- InodeBitmapChecksumLo uint16
- ItableUnusedLo uint16
- ChecksumRaw uint16
-}
-
-// Compiles only if BlockGroup32Bit implements BlockGroup.
-var _ BlockGroup = (*BlockGroup32Bit)(nil)
-
-// InodeTable implements BlockGroup.InodeTable.
-func (bg *BlockGroup32Bit) InodeTable() uint64 { return uint64(bg.InodeTableLo) }
-
-// BlockBitmap implements BlockGroup.BlockBitmap.
-func (bg *BlockGroup32Bit) BlockBitmap() uint64 { return uint64(bg.BlockBitmapLo) }
-
-// InodeBitmap implements BlockGroup.InodeBitmap.
-func (bg *BlockGroup32Bit) InodeBitmap() uint64 { return uint64(bg.InodeBitmapLo) }
-
-// ExclusionBitmap implements BlockGroup.ExclusionBitmap.
-func (bg *BlockGroup32Bit) ExclusionBitmap() uint64 { return uint64(bg.ExcludeBitmapLo) }
-
-// FreeBlocksCount implements BlockGroup.FreeBlocksCount.
-func (bg *BlockGroup32Bit) FreeBlocksCount() uint32 { return uint32(bg.FreeBlocksCountLo) }
-
-// FreeInodesCount implements BlockGroup.FreeInodesCount.
-func (bg *BlockGroup32Bit) FreeInodesCount() uint32 { return uint32(bg.FreeInodesCountLo) }
-
-// DirectoryCount implements BlockGroup.DirectoryCount.
-func (bg *BlockGroup32Bit) DirectoryCount() uint32 { return uint32(bg.UsedDirsCountLo) }
-
-// UnusedInodeCount implements BlockGroup.UnusedInodeCount.
-func (bg *BlockGroup32Bit) UnusedInodeCount() uint32 { return uint32(bg.ItableUnusedLo) }
-
-// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum.
-func (bg *BlockGroup32Bit) BlockBitmapChecksum() uint32 { return uint32(bg.BlockBitmapChecksumLo) }
-
-// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum.
-func (bg *BlockGroup32Bit) InodeBitmapChecksum() uint32 { return uint32(bg.InodeBitmapChecksumLo) }
-
-// Checksum implements BlockGroup.Checksum.
-func (bg *BlockGroup32Bit) Checksum() uint16 { return bg.ChecksumRaw }
-
-// Flags implements BlockGroup.Flags.
-func (bg *BlockGroup32Bit) Flags() BGFlags { return BGFlagsFromInt(bg.FlagsRaw) }
diff --git a/pkg/sentry/fsimpl/ext/disklayout/block_group_64.go b/pkg/sentry/fsimpl/ext/disklayout/block_group_64.go
deleted file mode 100644
index d54d1d345..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/block_group_64.go
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// BlockGroup64Bit emulates struct ext4_group_desc in fs/ext4/ext4.h.
-// It is the block group descriptor struct for 64-bit ext4 filesystems.
-// It implements BlockGroup interface. It is an extension of the 32-bit
-// version of BlockGroup.
-//
-// +marshal
-type BlockGroup64Bit struct {
- // We embed the 32-bit struct here because 64-bit version is just an extension
- // of the 32-bit version.
- BlockGroup32Bit
-
- // 64-bit specific fields.
- BlockBitmapHi uint32
- InodeBitmapHi uint32
- InodeTableHi uint32
- FreeBlocksCountHi uint16
- FreeInodesCountHi uint16
- UsedDirsCountHi uint16
- ItableUnusedHi uint16
- ExcludeBitmapHi uint32
- BlockBitmapChecksumHi uint16
- InodeBitmapChecksumHi uint16
- _ uint32 // Padding to 64 bytes.
-}
-
-// Compiles only if BlockGroup64Bit implements BlockGroup.
-var _ BlockGroup = (*BlockGroup64Bit)(nil)
-
-// Methods to override. Checksum() and Flags() are not overridden.
-
-// InodeTable implements BlockGroup.InodeTable.
-func (bg *BlockGroup64Bit) InodeTable() uint64 {
- return (uint64(bg.InodeTableHi) << 32) | uint64(bg.InodeTableLo)
-}
-
-// BlockBitmap implements BlockGroup.BlockBitmap.
-func (bg *BlockGroup64Bit) BlockBitmap() uint64 {
- return (uint64(bg.BlockBitmapHi) << 32) | uint64(bg.BlockBitmapLo)
-}
-
-// InodeBitmap implements BlockGroup.InodeBitmap.
-func (bg *BlockGroup64Bit) InodeBitmap() uint64 {
- return (uint64(bg.InodeBitmapHi) << 32) | uint64(bg.InodeBitmapLo)
-}
-
-// ExclusionBitmap implements BlockGroup.ExclusionBitmap.
-func (bg *BlockGroup64Bit) ExclusionBitmap() uint64 {
- return (uint64(bg.ExcludeBitmapHi) << 32) | uint64(bg.ExcludeBitmapLo)
-}
-
-// FreeBlocksCount implements BlockGroup.FreeBlocksCount.
-func (bg *BlockGroup64Bit) FreeBlocksCount() uint32 {
- return (uint32(bg.FreeBlocksCountHi) << 16) | uint32(bg.FreeBlocksCountLo)
-}
-
-// FreeInodesCount implements BlockGroup.FreeInodesCount.
-func (bg *BlockGroup64Bit) FreeInodesCount() uint32 {
- return (uint32(bg.FreeInodesCountHi) << 16) | uint32(bg.FreeInodesCountLo)
-}
-
-// DirectoryCount implements BlockGroup.DirectoryCount.
-func (bg *BlockGroup64Bit) DirectoryCount() uint32 {
- return (uint32(bg.UsedDirsCountHi) << 16) | uint32(bg.UsedDirsCountLo)
-}
-
-// UnusedInodeCount implements BlockGroup.UnusedInodeCount.
-func (bg *BlockGroup64Bit) UnusedInodeCount() uint32 {
- return (uint32(bg.ItableUnusedHi) << 16) | uint32(bg.ItableUnusedLo)
-}
-
-// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum.
-func (bg *BlockGroup64Bit) BlockBitmapChecksum() uint32 {
- return (uint32(bg.BlockBitmapChecksumHi) << 16) | uint32(bg.BlockBitmapChecksumLo)
-}
-
-// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum.
-func (bg *BlockGroup64Bit) InodeBitmapChecksum() uint32 {
- return (uint32(bg.InodeBitmapChecksumHi) << 16) | uint32(bg.InodeBitmapChecksumLo)
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/block_group_test.go b/pkg/sentry/fsimpl/ext/disklayout/block_group_test.go
deleted file mode 100644
index e4ce484e4..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/block_group_test.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "testing"
-)
-
-// TestBlockGroupSize tests that the block group descriptor structs are of the
-// correct size.
-func TestBlockGroupSize(t *testing.T) {
- var bgSmall BlockGroup32Bit
- assertSize(t, &bgSmall, 32)
- var bgBig BlockGroup64Bit
- assertSize(t, &bgBig, 64)
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/dirent.go b/pkg/sentry/fsimpl/ext/disklayout/dirent.go
deleted file mode 100644
index 568c8cb4c..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/dirent.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "gvisor.dev/gvisor/pkg/marshal"
- "gvisor.dev/gvisor/pkg/sentry/fs"
-)
-
-const (
- // MaxFileName is the maximum length of an ext fs file's name.
- MaxFileName = 255
-
- // DirentSize is the size of ext dirent structures.
- DirentSize = 263
-)
-
-var (
- // inodeTypeByFileType maps ext4 file types to vfs inode types.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#ftype.
- inodeTypeByFileType = map[uint8]fs.InodeType{
- 0: fs.Anonymous,
- 1: fs.RegularFile,
- 2: fs.Directory,
- 3: fs.CharacterDevice,
- 4: fs.BlockDevice,
- 5: fs.Pipe,
- 6: fs.Socket,
- 7: fs.Symlink,
- }
-)
-
-// The Dirent interface should be implemented by structs representing ext
-// directory entries. These are for the linear classical directories which
-// just store a list of dirent structs. A directory is a series of data blocks
-// where is each data block contains a linear array of dirents. The last entry
-// of the block has a record size that takes it to the end of the block. The
-// end of the directory is when you read dirInode.Size() bytes from the blocks.
-//
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#linear-classic-directories.
-type Dirent interface {
- marshal.Marshallable
-
- // Inode returns the absolute inode number of the underlying inode.
- // Inode number 0 signifies an unused dirent.
- Inode() uint32
-
- // RecordSize returns the record length of this dirent on disk. The next
- // dirent in the dirent list should be read after these many bytes from
- // the current dirent. Must be a multiple of 4.
- RecordSize() uint16
-
- // FileName returns the name of the file. Can be at most 255 is length.
- FileName() string
-
- // FileType returns the inode type of the underlying inode. This is a
- // performance hack so that we do not have to read the underlying inode struct
- // to know the type of inode. This will only work when the SbDirentFileType
- // feature is set. If not, the second returned value will be false indicating
- // that user code has to use the inode mode to extract the file type.
- FileType() (fs.InodeType, bool)
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/dirent_new.go b/pkg/sentry/fsimpl/ext/disklayout/dirent_new.go
deleted file mode 100644
index 51f9c2946..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/dirent_new.go
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "fmt"
-
- "gvisor.dev/gvisor/pkg/sentry/fs"
-)
-
-// DirentNew represents the ext4 directory entry struct. This emulates Linux's
-// ext4_dir_entry_2 struct. The FileName can not be more than 255 bytes so we
-// only need 8 bits to store the NameLength. As a result, NameLength has been
-// shortened and the other 8 bits are used to encode the file type. Use the
-// FileTypeRaw field only if the SbDirentFileType feature is set.
-//
-// Note: This struct can be of variable size on disk. The one described below
-// is of maximum size and the FileName beyond NameLength bytes might contain
-// garbage.
-//
-// +marshal
-type DirentNew struct {
- InodeNumber uint32
- RecordLength uint16
- NameLength uint8
- FileTypeRaw uint8
- FileNameRaw [MaxFileName]byte `marshal:"unaligned"`
-}
-
-// Compiles only if DirentNew implements Dirent.
-var _ Dirent = (*DirentNew)(nil)
-
-// Inode implements Dirent.Inode.
-func (d *DirentNew) Inode() uint32 { return d.InodeNumber }
-
-// RecordSize implements Dirent.RecordSize.
-func (d *DirentNew) RecordSize() uint16 { return d.RecordLength }
-
-// FileName implements Dirent.FileName.
-func (d *DirentNew) FileName() string {
- return string(d.FileNameRaw[:d.NameLength])
-}
-
-// FileType implements Dirent.FileType.
-func (d *DirentNew) FileType() (fs.InodeType, bool) {
- if inodeType, ok := inodeTypeByFileType[d.FileTypeRaw]; ok {
- return inodeType, true
- }
-
- panic(fmt.Sprintf("unknown file type %v", d.FileTypeRaw))
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/dirent_old.go b/pkg/sentry/fsimpl/ext/disklayout/dirent_old.go
deleted file mode 100644
index d4b19e086..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/dirent_old.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import "gvisor.dev/gvisor/pkg/sentry/fs"
-
-// DirentOld represents the old directory entry struct which does not contain
-// the file type. This emulates Linux's ext4_dir_entry struct.
-//
-// Note: This struct can be of variable size on disk. The one described below
-// is of maximum size and the FileName beyond NameLength bytes might contain
-// garbage.
-//
-// +marshal
-type DirentOld struct {
- InodeNumber uint32
- RecordLength uint16
- NameLength uint16
- FileNameRaw [MaxFileName]byte `marshal:"unaligned"`
-}
-
-// Compiles only if DirentOld implements Dirent.
-var _ Dirent = (*DirentOld)(nil)
-
-// Inode implements Dirent.Inode.
-func (d *DirentOld) Inode() uint32 { return d.InodeNumber }
-
-// RecordSize implements Dirent.RecordSize.
-func (d *DirentOld) RecordSize() uint16 { return d.RecordLength }
-
-// FileName implements Dirent.FileName.
-func (d *DirentOld) FileName() string {
- return string(d.FileNameRaw[:d.NameLength])
-}
-
-// FileType implements Dirent.FileType.
-func (d *DirentOld) FileType() (fs.InodeType, bool) {
- return fs.Anonymous, false
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/dirent_test.go b/pkg/sentry/fsimpl/ext/disklayout/dirent_test.go
deleted file mode 100644
index 3486864dc..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/dirent_test.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "testing"
-)
-
-// TestDirentSize tests that the dirent structs are of the correct
-// size.
-func TestDirentSize(t *testing.T) {
- var dOld DirentOld
- assertSize(t, &dOld, DirentSize)
- var dNew DirentNew
- assertSize(t, &dNew, DirentSize)
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/disklayout.go b/pkg/sentry/fsimpl/ext/disklayout/disklayout.go
deleted file mode 100644
index 0834e9ba8..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/disklayout.go
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package disklayout provides Linux ext file system's disk level structures
-// which can be directly read into from the underlying device. Structs aim to
-// emulate structures `exactly` how they are layed out on disk.
-//
-// This library aims to be compatible with all ext(2/3/4) systems so it
-// provides a generic interface for all major structures and various
-// implementations (for different versions). The user code is responsible for
-// using appropriate implementations based on the underlying device.
-//
-// Interfacing all major structures here serves a few purposes:
-// - Abstracts away the complexity of the underlying structure from client
-// code. The client only has to figure out versioning on set up and then
-// can use these as black boxes and pass it higher up the stack.
-// - Having pointer receivers forces the user to use pointers to these
-// heavy structs. Hence, prevents the client code from unintentionally
-// copying these by value while passing the interface around.
-// - Version-based implementation selection is resolved on set up hence
-// avoiding per call overhead of choosing implementation.
-// - All interface methods are pretty light weight (do not take in any
-// parameters by design). Passing pointer arguments to interface methods
-// can lead to heap allocation as the compiler won't be able to perform
-// escape analysis on an unknown implementation at compile time.
-//
-// Notes:
-// - All structures on disk are in little-endian order. Only jbd2 (journal)
-// structures are in big-endian order.
-// - All OS dependent fields in these structures will be interpretted using
-// the Linux version of that field.
-// - The suffix `Lo` in field names stands for lower bits of that field.
-// - The suffix `Hi` in field names stands for upper bits of that field.
-// - The suffix `Raw` has been added to indicate that the field is not split
-// into Lo and Hi fields and also to resolve name collision with the
-// respective interface.
-package disklayout
diff --git a/pkg/sentry/fsimpl/ext/disklayout/extent.go b/pkg/sentry/fsimpl/ext/disklayout/extent.go
deleted file mode 100644
index b13999bfc..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/extent.go
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "gvisor.dev/gvisor/pkg/marshal"
-)
-
-// Extents were introduced in ext4 and provide huge performance gains in terms
-// data locality and reduced metadata block usage. Extents are organized in
-// extent trees. The root node is contained in inode.BlocksRaw.
-//
-// Terminology:
-// - Physical Block:
-// Filesystem data block which is addressed normally wrt the entire
-// filesystem (addressed with 48 bits).
-//
-// - File Block:
-// Data block containing *only* file data and addressed wrt to the file
-// with only 32 bits. The (i)th file block contains file data from
-// byte (i * sb.BlockSize()) to ((i+1) * sb.BlockSize()).
-
-const (
- // ExtentHeaderSize is the size of the header of an extent tree node.
- ExtentHeaderSize = 12
-
- // ExtentEntrySize is the size of an entry in an extent tree node.
- // This size is the same for both leaf and internal nodes.
- ExtentEntrySize = 12
-
- // ExtentMagic is the magic number which must be present in the header.
- ExtentMagic = 0xf30a
-)
-
-// ExtentEntryPair couples an in-memory ExtendNode with the ExtentEntry that
-// points to it. We want to cache these structs in memory to avoid repeated
-// disk reads.
-//
-// Note: This struct itself does not represent an on-disk struct.
-type ExtentEntryPair struct {
- // Entry points to the child node on disk.
- Entry ExtentEntry
- // Node points to child node in memory. Is nil if the current node is a leaf.
- Node *ExtentNode
-}
-
-// ExtentNode represents an extent tree node. For internal nodes, all Entries
-// will be ExtendIdxs. For leaf nodes, they will all be Extents.
-//
-// Note: This struct itself does not represent an on-disk struct.
-type ExtentNode struct {
- Header ExtentHeader
- Entries []ExtentEntryPair
-}
-
-// ExtentEntry represents an extent tree node entry. The entry can either be
-// an ExtentIdx or Extent itself. This exists to simplify navigation logic.
-type ExtentEntry interface {
- marshal.Marshallable
-
- // FileBlock returns the first file block number covered by this entry.
- FileBlock() uint32
-
- // PhysicalBlock returns the child physical block that this entry points to.
- PhysicalBlock() uint64
-}
-
-// ExtentHeader emulates the ext4_extent_header struct in ext4. Each extent
-// tree node begins with this and is followed by `NumEntries` number of:
-// - Extent if `Depth` == 0
-// - ExtentIdx otherwise
-//
-// +marshal
-type ExtentHeader struct {
- // Magic in the extent magic number, must be 0xf30a.
- Magic uint16
-
- // NumEntries indicates the number of valid entries following the header.
- NumEntries uint16
-
- // MaxEntries that could follow the header. Used while adding entries.
- MaxEntries uint16
-
- // Height represents the distance of this node from the farthest leaf. Please
- // note that Linux incorrectly calls this `Depth` (which means the distance
- // of the node from the root).
- Height uint16
- _ uint32
-}
-
-// ExtentIdx emulates the ext4_extent_idx struct in ext4. Only present in
-// internal nodes. Sorted in ascending order based on FirstFileBlock since
-// Linux does a binary search on this. This points to a block containing the
-// child node.
-//
-// +marshal
-type ExtentIdx struct {
- FirstFileBlock uint32
- ChildBlockLo uint32
- ChildBlockHi uint16
- _ uint16
-}
-
-// Compiles only if ExtentIdx implements ExtentEntry.
-var _ ExtentEntry = (*ExtentIdx)(nil)
-
-// FileBlock implements ExtentEntry.FileBlock.
-func (ei *ExtentIdx) FileBlock() uint32 {
- return ei.FirstFileBlock
-}
-
-// PhysicalBlock implements ExtentEntry.PhysicalBlock. It returns the
-// physical block number of the child block.
-func (ei *ExtentIdx) PhysicalBlock() uint64 {
- return (uint64(ei.ChildBlockHi) << 32) | uint64(ei.ChildBlockLo)
-}
-
-// Extent represents the ext4_extent struct in ext4. Only present in leaf
-// nodes. Sorted in ascending order based on FirstFileBlock since Linux does a
-// binary search on this. This points to an array of data blocks containing the
-// file data. It covers `Length` data blocks starting from `StartBlock`.
-//
-// +marshal
-type Extent struct {
- FirstFileBlock uint32
- Length uint16
- StartBlockHi uint16
- StartBlockLo uint32
-}
-
-// Compiles only if Extent implements ExtentEntry.
-var _ ExtentEntry = (*Extent)(nil)
-
-// FileBlock implements ExtentEntry.FileBlock.
-func (e *Extent) FileBlock() uint32 {
- return e.FirstFileBlock
-}
-
-// PhysicalBlock implements ExtentEntry.PhysicalBlock. It returns the
-// physical block number of the first data block this extent covers.
-func (e *Extent) PhysicalBlock() uint64 {
- return (uint64(e.StartBlockHi) << 32) | uint64(e.StartBlockLo)
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/extent_test.go b/pkg/sentry/fsimpl/ext/disklayout/extent_test.go
deleted file mode 100644
index c96002e19..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/extent_test.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "testing"
-)
-
-// TestExtentSize tests that the extent structs are of the correct
-// size.
-func TestExtentSize(t *testing.T) {
- var h ExtentHeader
- assertSize(t, &h, ExtentHeaderSize)
- var i ExtentIdx
- assertSize(t, &i, ExtentEntrySize)
- var e Extent
- assertSize(t, &e, ExtentEntrySize)
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/inode.go b/pkg/sentry/fsimpl/ext/disklayout/inode.go
deleted file mode 100644
index ef25040a9..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/inode.go
+++ /dev/null
@@ -1,277 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/marshal"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/time"
-)
-
-// Special inodes. See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#special-inodes.
-const (
- // RootDirInode is the inode number of the root directory inode.
- RootDirInode = 2
-)
-
-// The Inode interface must be implemented by structs representing ext inodes.
-// The inode stores all the metadata pertaining to the file (except for the
-// file name which is held by the directory entry). It does NOT expose all
-// fields and should be extended if need be.
-//
-// Some file systems (e.g. FAT) use the directory entry to store all this
-// information. Ext file systems do not so that they can support hard links.
-// However, ext4 cheats a little bit and duplicates the file type in the
-// directory entry for performance gains.
-//
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#index-nodes.
-type Inode interface {
- marshal.Marshallable
-
- // Mode returns the linux file mode which is majorly used to extract
- // information like:
- // - File permissions (read/write/execute by user/group/others).
- // - Sticky, set UID and GID bits.
- // - File type.
- //
- // Masks to extract this information are provided in pkg/abi/linux/file.go.
- Mode() linux.FileMode
-
- // UID returns the owner UID.
- UID() auth.KUID
-
- // GID returns the owner GID.
- GID() auth.KGID
-
- // Size returns the size of the file in bytes.
- Size() uint64
-
- // InodeSize returns the size of this inode struct in bytes.
- // In ext2 and ext3, the inode struct and inode disk record size was fixed at
- // 128 bytes. Ext4 makes it possible for the inode struct to be bigger.
- // However, accessing any field beyond the 128 bytes marker must be verified
- // using this method.
- InodeSize() uint16
-
- // AccessTime returns the last access time. Shows when the file was last read.
- //
- // If InExtendedAttr is set, then this should NOT be used because the
- // underlying field is used to store the extended attribute value checksum.
- AccessTime() time.Time
-
- // ChangeTime returns the last change time. Shows when the file meta data
- // (like permissions) was last changed.
- //
- // If InExtendedAttr is set, then this should NOT be used because the
- // underlying field is used to store the lower 32 bits of the attribute
- // value’s reference count.
- ChangeTime() time.Time
-
- // ModificationTime returns the last modification time. Shows when the file
- // content was last modified.
- //
- // If InExtendedAttr is set, then this should NOT be used because
- // the underlying field contains the number of the inode that owns the
- // extended attribute.
- ModificationTime() time.Time
-
- // DeletionTime returns the deletion time. Inodes are marked as deleted by
- // writing to the underlying field. FS tools can restore files until they are
- // actually overwritten.
- DeletionTime() time.Time
-
- // LinksCount returns the number of hard links to this inode.
- //
- // Normally there is an upper limit on the number of hard links:
- // - ext2/ext3 = 32,000
- // - ext4 = 65,000
- //
- // This implies that an ext4 directory cannot have more than 64,998
- // subdirectories because each subdirectory will have a hard link to the
- // directory via the `..` entry. The directory has hard link via the `.` entry
- // of its own. And finally the inode is initiated with 1 hard link (itself).
- //
- // The underlying value is reset to 1 if all the following hold:
- // - Inode is a directory.
- // - SbDirNlink is enabled.
- // - Number of hard links is incremented past 64,999.
- // Hard link value of 1 for a directory would indicate that the number of hard
- // links is unknown because a directory can have minimum 2 hard links (itself
- // and `.` entry).
- LinksCount() uint16
-
- // Flags returns InodeFlags which represents the inode flags.
- Flags() InodeFlags
-
- // Data returns the underlying inode.i_block array as a slice so it's
- // modifiable. This field is special and is used to store various kinds of
- // things depending on the filesystem version and inode type. The underlying
- // field name in Linux is a little misleading.
- // - In ext2/ext3, it contains the block map.
- // - In ext4, it contains the extent tree root node.
- // - For inline files, it contains the file contents.
- // - For symlinks, it contains the link path (if it fits here).
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#the-contents-of-inode-i-block.
- Data() []byte
-}
-
-// Inode flags. This is not comprehensive and flags which were not used in
-// the Linux kernel have been excluded.
-const (
- // InSync indicates that all writes to the file must be synchronous.
- InSync = 0x8
-
- // InImmutable indicates that this file is immutable.
- InImmutable = 0x10
-
- // InAppend indicates that this file can only be appended to.
- InAppend = 0x20
-
- // InNoDump indicates that teh dump(1) utility should not dump this file.
- InNoDump = 0x40
-
- // InNoAccessTime indicates that the access time of this inode must not be
- // updated.
- InNoAccessTime = 0x80
-
- // InIndex indicates that this directory has hashed indexes.
- InIndex = 0x1000
-
- // InJournalData indicates that file data must always be written through a
- // journal device.
- InJournalData = 0x4000
-
- // InDirSync indicates that all the directory entiry data must be written
- // synchronously.
- InDirSync = 0x10000
-
- // InTopDir indicates that this inode is at the top of the directory hierarchy.
- InTopDir = 0x20000
-
- // InHugeFile indicates that this is a huge file.
- InHugeFile = 0x40000
-
- // InExtents indicates that this inode uses extents.
- InExtents = 0x80000
-
- // InExtendedAttr indicates that this inode stores a large extended attribute
- // value in its data blocks.
- InExtendedAttr = 0x200000
-
- // InInline indicates that this inode has inline data.
- InInline = 0x10000000
-
- // InReserved indicates that this inode is reserved for the ext4 library.
- InReserved = 0x80000000
-)
-
-// InodeFlags represents all possible combinations of inode flags. It aims to
-// cover the bit masks and provide a more user-friendly interface.
-type InodeFlags struct {
- Sync bool
- Immutable bool
- Append bool
- NoDump bool
- NoAccessTime bool
- Index bool
- JournalData bool
- DirSync bool
- TopDir bool
- HugeFile bool
- Extents bool
- ExtendedAttr bool
- Inline bool
- Reserved bool
-}
-
-// ToInt converts inode flags back to its 32-bit rep.
-func (f InodeFlags) ToInt() uint32 {
- var res uint32
-
- if f.Sync {
- res |= InSync
- }
- if f.Immutable {
- res |= InImmutable
- }
- if f.Append {
- res |= InAppend
- }
- if f.NoDump {
- res |= InNoDump
- }
- if f.NoAccessTime {
- res |= InNoAccessTime
- }
- if f.Index {
- res |= InIndex
- }
- if f.JournalData {
- res |= InJournalData
- }
- if f.DirSync {
- res |= InDirSync
- }
- if f.TopDir {
- res |= InTopDir
- }
- if f.HugeFile {
- res |= InHugeFile
- }
- if f.Extents {
- res |= InExtents
- }
- if f.ExtendedAttr {
- res |= InExtendedAttr
- }
- if f.Inline {
- res |= InInline
- }
- if f.Reserved {
- res |= InReserved
- }
-
- return res
-}
-
-// InodeFlagsFromInt converts the integer representation of inode flags to
-// a InodeFlags struct.
-func InodeFlagsFromInt(f uint32) InodeFlags {
- return InodeFlags{
- Sync: f&InSync > 0,
- Immutable: f&InImmutable > 0,
- Append: f&InAppend > 0,
- NoDump: f&InNoDump > 0,
- NoAccessTime: f&InNoAccessTime > 0,
- Index: f&InIndex > 0,
- JournalData: f&InJournalData > 0,
- DirSync: f&InDirSync > 0,
- TopDir: f&InTopDir > 0,
- HugeFile: f&InHugeFile > 0,
- Extents: f&InExtents > 0,
- ExtendedAttr: f&InExtendedAttr > 0,
- Inline: f&InInline > 0,
- Reserved: f&InReserved > 0,
- }
-}
-
-// These masks define how users can view/modify inode flags. The rest of the
-// flags are for internal kernel usage only.
-const (
- InUserReadFlagMask = 0x4BDFFF
- InUserWriteFlagMask = 0x4B80FF
-)
diff --git a/pkg/sentry/fsimpl/ext/disklayout/inode_new.go b/pkg/sentry/fsimpl/ext/disklayout/inode_new.go
deleted file mode 100644
index a4503f5cf..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/inode_new.go
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import "gvisor.dev/gvisor/pkg/sentry/kernel/time"
-
-// InodeNew represents ext4 inode structure which can be bigger than
-// OldInodeSize. The actual size of this struct should be determined using
-// inode.ExtraInodeSize. Accessing any field here should be verified with the
-// actual size. The extra space between the end of the inode struct and end of
-// the inode record can be used to store extended attr.
-//
-// If the TimeExtra fields are in scope, the lower 2 bits of those are used
-// to extend their counter part to be 34 bits wide; the rest (upper) 30 bits
-// are used to provide nanoscond precision. Hence, these timestamps will now
-// overflow in May 2446.
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps.
-//
-// +marshal
-type InodeNew struct {
- InodeOld
-
- ExtraInodeSize uint16
- ChecksumHi uint16
- ChangeTimeExtra uint32
- ModificationTimeExtra uint32
- AccessTimeExtra uint32
- CreationTime uint32
- CreationTimeExtra uint32
- VersionHi uint32
- ProjectID uint32
-}
-
-// Compiles only if InodeNew implements Inode.
-var _ Inode = (*InodeNew)(nil)
-
-// fromExtraTime decodes the extra time and constructs the kernel time struct
-// with nanosecond precision.
-func fromExtraTime(lo int32, extra uint32) time.Time {
- // See description above InodeNew for format.
- seconds := (int64(extra&0x3) << 32) + int64(lo)
- nanoseconds := int64(extra >> 2)
- return time.FromUnix(seconds, nanoseconds)
-}
-
-// Only override methods which change due to ext4 specific fields.
-
-// Size implements Inode.Size.
-func (in *InodeNew) Size() uint64 {
- return (uint64(in.SizeHi) << 32) | uint64(in.SizeLo)
-}
-
-// InodeSize implements Inode.InodeSize.
-func (in *InodeNew) InodeSize() uint16 {
- return OldInodeSize + in.ExtraInodeSize
-}
-
-// ChangeTime implements Inode.ChangeTime.
-func (in *InodeNew) ChangeTime() time.Time {
- // Apply new timestamp logic if inode.ChangeTimeExtra is in scope.
- if in.ExtraInodeSize >= 8 {
- return fromExtraTime(in.ChangeTimeRaw, in.ChangeTimeExtra)
- }
-
- return in.InodeOld.ChangeTime()
-}
-
-// ModificationTime implements Inode.ModificationTime.
-func (in *InodeNew) ModificationTime() time.Time {
- // Apply new timestamp logic if inode.ModificationTimeExtra is in scope.
- if in.ExtraInodeSize >= 12 {
- return fromExtraTime(in.ModificationTimeRaw, in.ModificationTimeExtra)
- }
-
- return in.InodeOld.ModificationTime()
-}
-
-// AccessTime implements Inode.AccessTime.
-func (in *InodeNew) AccessTime() time.Time {
- // Apply new timestamp logic if inode.AccessTimeExtra is in scope.
- if in.ExtraInodeSize >= 16 {
- return fromExtraTime(in.AccessTimeRaw, in.AccessTimeExtra)
- }
-
- return in.InodeOld.AccessTime()
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/inode_old.go b/pkg/sentry/fsimpl/ext/disklayout/inode_old.go
deleted file mode 100644
index e6b28babf..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/inode_old.go
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/time"
-)
-
-const (
- // OldInodeSize is the inode size in ext2/ext3.
- OldInodeSize = 128
-)
-
-// InodeOld implements Inode interface. It emulates ext2/ext3 inode struct.
-// Inode struct size and record size are both 128 bytes for this.
-//
-// All fields representing time are in seconds since the epoch. Which means that
-// they will overflow in January 2038.
-//
-// +marshal
-type InodeOld struct {
- ModeRaw uint16
- UIDLo uint16
- SizeLo uint32
-
- // The time fields are signed integers because they could be negative to
- // represent time before the epoch.
- AccessTimeRaw int32
- ChangeTimeRaw int32
- ModificationTimeRaw int32
- DeletionTimeRaw int32
-
- GIDLo uint16
- LinksCountRaw uint16
- BlocksCountLo uint32
- FlagsRaw uint32
- VersionLo uint32 // This is OS dependent.
- DataRaw [60]byte
- Generation uint32
- FileACLLo uint32
- SizeHi uint32
- ObsoFaddr uint32
-
- // OS dependent fields have been inlined here.
- BlocksCountHi uint16
- FileACLHi uint16
- UIDHi uint16
- GIDHi uint16
- ChecksumLo uint16
- _ uint16
-}
-
-// Compiles only if InodeOld implements Inode.
-var _ Inode = (*InodeOld)(nil)
-
-// Mode implements Inode.Mode.
-func (in *InodeOld) Mode() linux.FileMode { return linux.FileMode(in.ModeRaw) }
-
-// UID implements Inode.UID.
-func (in *InodeOld) UID() auth.KUID {
- return auth.KUID((uint32(in.UIDHi) << 16) | uint32(in.UIDLo))
-}
-
-// GID implements Inode.GID.
-func (in *InodeOld) GID() auth.KGID {
- return auth.KGID((uint32(in.GIDHi) << 16) | uint32(in.GIDLo))
-}
-
-// Size implements Inode.Size.
-func (in *InodeOld) Size() uint64 {
- // In ext2/ext3, in.SizeHi did not exist, it was instead named in.DirACL.
- return uint64(in.SizeLo)
-}
-
-// InodeSize implements Inode.InodeSize.
-func (in *InodeOld) InodeSize() uint16 { return OldInodeSize }
-
-// AccessTime implements Inode.AccessTime.
-func (in *InodeOld) AccessTime() time.Time {
- return time.FromUnix(int64(in.AccessTimeRaw), 0)
-}
-
-// ChangeTime implements Inode.ChangeTime.
-func (in *InodeOld) ChangeTime() time.Time {
- return time.FromUnix(int64(in.ChangeTimeRaw), 0)
-}
-
-// ModificationTime implements Inode.ModificationTime.
-func (in *InodeOld) ModificationTime() time.Time {
- return time.FromUnix(int64(in.ModificationTimeRaw), 0)
-}
-
-// DeletionTime implements Inode.DeletionTime.
-func (in *InodeOld) DeletionTime() time.Time {
- return time.FromUnix(int64(in.DeletionTimeRaw), 0)
-}
-
-// LinksCount implements Inode.LinksCount.
-func (in *InodeOld) LinksCount() uint16 { return in.LinksCountRaw }
-
-// Flags implements Inode.Flags.
-func (in *InodeOld) Flags() InodeFlags { return InodeFlagsFromInt(in.FlagsRaw) }
-
-// Data implements Inode.Data.
-func (in *InodeOld) Data() []byte { return in.DataRaw[:] }
diff --git a/pkg/sentry/fsimpl/ext/disklayout/inode_test.go b/pkg/sentry/fsimpl/ext/disklayout/inode_test.go
deleted file mode 100644
index 90744e956..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/inode_test.go
+++ /dev/null
@@ -1,224 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "fmt"
- "strconv"
- "testing"
-
- "gvisor.dev/gvisor/pkg/sentry/kernel/time"
-)
-
-// TestInodeSize tests that the inode structs are of the correct size.
-func TestInodeSize(t *testing.T) {
- var iOld InodeOld
- assertSize(t, &iOld, OldInodeSize)
-
- // This was updated from 156 bytes to 160 bytes in Oct 2015.
- var iNew InodeNew
- assertSize(t, &iNew, 160)
-}
-
-// TestTimestampSeconds tests that the seconds part of [a/c/m] timestamps in
-// ext4 inode structs are decoded correctly.
-//
-// These tests are derived from the table under https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps.
-func TestTimestampSeconds(t *testing.T) {
- type timestampTest struct {
- // msbSet tells if the most significant bit of InodeOld.[X]TimeRaw is set.
- // If this is set then the 32-bit time is negative.
- msbSet bool
-
- // lowerBound tells if we should take the lowest possible value of
- // InodeOld.[X]TimeRaw while satisfying test.msbSet condition. If set to
- // false it tells to take the highest possible value.
- lowerBound bool
-
- // extraBits is InodeNew.[X]TimeExtra.
- extraBits uint32
-
- // want is the kernel time struct that is expected.
- want time.Time
- }
-
- tests := []timestampTest{
- // 1901-12-13
- {
- msbSet: true,
- lowerBound: true,
- extraBits: 0,
- want: time.FromUnix(int64(-0x80000000), 0),
- },
-
- // 1969-12-31
- {
- msbSet: true,
- lowerBound: false,
- extraBits: 0,
- want: time.FromUnix(int64(-1), 0),
- },
-
- // 1970-01-01
- {
- msbSet: false,
- lowerBound: true,
- extraBits: 0,
- want: time.FromUnix(int64(0), 0),
- },
-
- // 2038-01-19
- {
- msbSet: false,
- lowerBound: false,
- extraBits: 0,
- want: time.FromUnix(int64(0x7fffffff), 0),
- },
-
- // 2038-01-19
- {
- msbSet: true,
- lowerBound: true,
- extraBits: 1,
- want: time.FromUnix(int64(0x80000000), 0),
- },
-
- // 2106-02-07
- {
- msbSet: true,
- lowerBound: false,
- extraBits: 1,
- want: time.FromUnix(int64(0xffffffff), 0),
- },
-
- // 2106-02-07
- {
- msbSet: false,
- lowerBound: true,
- extraBits: 1,
- want: time.FromUnix(int64(0x100000000), 0),
- },
-
- // 2174-02-25
- {
- msbSet: false,
- lowerBound: false,
- extraBits: 1,
- want: time.FromUnix(int64(0x17fffffff), 0),
- },
-
- // 2174-02-25
- {
- msbSet: true,
- lowerBound: true,
- extraBits: 2,
- want: time.FromUnix(int64(0x180000000), 0),
- },
-
- // 2242-03-16
- {
- msbSet: true,
- lowerBound: false,
- extraBits: 2,
- want: time.FromUnix(int64(0x1ffffffff), 0),
- },
-
- // 2242-03-16
- {
- msbSet: false,
- lowerBound: true,
- extraBits: 2,
- want: time.FromUnix(int64(0x200000000), 0),
- },
-
- // 2310-04-04
- {
- msbSet: false,
- lowerBound: false,
- extraBits: 2,
- want: time.FromUnix(int64(0x27fffffff), 0),
- },
-
- // 2310-04-04
- {
- msbSet: true,
- lowerBound: true,
- extraBits: 3,
- want: time.FromUnix(int64(0x280000000), 0),
- },
-
- // 2378-04-22
- {
- msbSet: true,
- lowerBound: false,
- extraBits: 3,
- want: time.FromUnix(int64(0x2ffffffff), 0),
- },
-
- // 2378-04-22
- {
- msbSet: false,
- lowerBound: true,
- extraBits: 3,
- want: time.FromUnix(int64(0x300000000), 0),
- },
-
- // 2446-05-10
- {
- msbSet: false,
- lowerBound: false,
- extraBits: 3,
- want: time.FromUnix(int64(0x37fffffff), 0),
- },
- }
-
- lowerMSB0 := int32(0) // binary: 00000000 00000000 00000000 00000000
- upperMSB0 := int32(0x7fffffff) // binary: 01111111 11111111 11111111 11111111
- lowerMSB1 := int32(-0x80000000) // binary: 10000000 00000000 00000000 00000000
- upperMSB1 := int32(-1) // binary: 11111111 11111111 11111111 11111111
-
- get32BitTime := func(test timestampTest) int32 {
- if test.msbSet {
- if test.lowerBound {
- return lowerMSB1
- }
-
- return upperMSB1
- }
-
- if test.lowerBound {
- return lowerMSB0
- }
-
- return upperMSB0
- }
-
- getTestName := func(test timestampTest) string {
- return fmt.Sprintf(
- "Tests time decoding with epoch bits 0b%s and 32-bit raw time: MSB set=%t, lower bound=%t",
- strconv.FormatInt(int64(test.extraBits), 2),
- test.msbSet,
- test.lowerBound,
- )
- }
-
- for _, test := range tests {
- t.Run(getTestName(test), func(t *testing.T) {
- if got := fromExtraTime(get32BitTime(test), test.extraBits); got != test.want {
- t.Errorf("Expected: %v, Got: %v", test.want, got)
- }
- })
- }
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock.go b/pkg/sentry/fsimpl/ext/disklayout/superblock.go
deleted file mode 100644
index 70948ebe9..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/superblock.go
+++ /dev/null
@@ -1,477 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "gvisor.dev/gvisor/pkg/marshal"
-)
-
-const (
- // SbOffset is the absolute offset at which the superblock is placed.
- SbOffset = 1024
-)
-
-// SuperBlock should be implemented by structs representing the ext superblock.
-// The superblock holds a lot of information about the enclosing filesystem.
-// This interface aims to provide access methods to important information held
-// by the superblock. It does NOT expose all fields of the superblock, only the
-// ones necessary. This can be expanded when need be.
-//
-// Location and replication:
-// - The superblock is located at offset 1024 in block group 0.
-// - Redundant copies of the superblock and group descriptors are kept in
-// all groups if SbSparse feature flag is NOT set. If it is set, the
-// replicas only exist in groups whose group number is either 0 or a
-// power of 3, 5, or 7.
-// - There is also a sparse superblock feature v2 in which there are just
-// two replicas saved in the block groups pointed by sb.s_backup_bgs.
-//
-// Replicas should eventually be updated if the superblock is updated.
-//
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#super-block.
-type SuperBlock interface {
- marshal.Marshallable
-
- // InodesCount returns the total number of inodes in this filesystem.
- InodesCount() uint32
-
- // BlocksCount returns the total number of data blocks in this filesystem.
- BlocksCount() uint64
-
- // FreeBlocksCount returns the number of free blocks in this filesystem.
- FreeBlocksCount() uint64
-
- // FreeInodesCount returns the number of free inodes in this filesystem.
- FreeInodesCount() uint32
-
- // MountCount returns the number of mounts since the last fsck.
- MountCount() uint16
-
- // MaxMountCount returns the number of mounts allowed beyond which a fsck is
- // needed.
- MaxMountCount() uint16
-
- // FirstDataBlock returns the absolute block number of the first data block,
- // which contains the super block itself.
- //
- // If the filesystem has 1kb data blocks then this should return 1. For all
- // other configurations, this typically returns 0.
- FirstDataBlock() uint32
-
- // BlockSize returns the size of one data block in this filesystem.
- // This can be calculated by 2^(10 + sb.s_log_block_size). This ensures that
- // the smallest block size is 1kb.
- BlockSize() uint64
-
- // BlocksPerGroup returns the number of data blocks in a block group.
- BlocksPerGroup() uint32
-
- // ClusterSize returns block cluster size (set during mkfs time by admin).
- // This can be calculated by 2^(10 + sb.s_log_cluster_size). This ensures that
- // the smallest cluster size is 1kb.
- //
- // sb.s_log_cluster_size must equal sb.s_log_block_size if bigalloc feature
- // is NOT set and consequently BlockSize() = ClusterSize() in that case.
- ClusterSize() uint64
-
- // ClustersPerGroup returns:
- // - number of clusters per group if bigalloc is enabled.
- // - BlocksPerGroup() otherwise.
- ClustersPerGroup() uint32
-
- // InodeSize returns the size of the inode disk record size in bytes. Use this
- // to iterate over inode arrays on disk.
- //
- // In ext2 and ext3:
- // - Each inode had a disk record of 128 bytes.
- // - The inode struct size was fixed at 128 bytes.
- //
- // In ext4 its possible to allocate larger on-disk inodes:
- // - Inode disk record size = sb.s_inode_size (function return value).
- // = 256 (default)
- // - Inode struct size = 128 + inode.i_extra_isize.
- // = 128 + 32 = 160 (default)
- InodeSize() uint16
-
- // InodesPerGroup returns the number of inodes in a block group.
- InodesPerGroup() uint32
-
- // BgDescSize returns the size of the block group descriptor struct.
- //
- // In ext2, ext3, ext4 (without 64-bit feature), the block group descriptor
- // is only 32 bytes long.
- // In ext4 with 64-bit feature, the block group descriptor expands to AT LEAST
- // 64 bytes. It might be bigger than that.
- BgDescSize() uint16
-
- // CompatibleFeatures returns the CompatFeatures struct which holds all the
- // compatible features this fs supports.
- CompatibleFeatures() CompatFeatures
-
- // IncompatibleFeatures returns the CompatFeatures struct which holds all the
- // incompatible features this fs supports.
- IncompatibleFeatures() IncompatFeatures
-
- // ReadOnlyCompatibleFeatures returns the CompatFeatures struct which holds all the
- // readonly compatible features this fs supports.
- ReadOnlyCompatibleFeatures() RoCompatFeatures
-
- // Magic() returns the magic signature which must be 0xef53.
- Magic() uint16
-
- // Revision returns the superblock revision. Superblock struct fields from
- // offset 0x54 till 0x150 should only be used if superblock has DynamicRev.
- Revision() SbRevision
-}
-
-// SbRevision is the type for superblock revisions.
-type SbRevision uint32
-
-// Super block revisions.
-const (
- // OldRev is the good old (original) format.
- OldRev SbRevision = 0
-
- // DynamicRev is v2 format w/ dynamic inode sizes.
- DynamicRev SbRevision = 1
-)
-
-// Superblock compatible features.
-// This is not exhaustive, unused features are not listed.
-const (
- // SbDirPrealloc indicates directory preallocation.
- SbDirPrealloc = 0x1
-
- // SbHasJournal indicates the presence of a journal. jbd2 should only work
- // with this being set.
- SbHasJournal = 0x4
-
- // SbExtAttr indicates extended attributes support.
- SbExtAttr = 0x8
-
- // SbResizeInode indicates that the fs has reserved GDT blocks (right after
- // group descriptors) for fs expansion.
- SbResizeInode = 0x10
-
- // SbDirIndex indicates that the fs has directory indices.
- SbDirIndex = 0x20
-
- // SbSparseV2 stands for Sparse superblock version 2.
- SbSparseV2 = 0x200
-)
-
-// CompatFeatures represents a superblock's compatible feature set. If the
-// kernel does not understand any of these feature, it can still read/write
-// to this fs.
-type CompatFeatures struct {
- DirPrealloc bool
- HasJournal bool
- ExtAttr bool
- ResizeInode bool
- DirIndex bool
- SparseV2 bool
-}
-
-// ToInt converts superblock compatible features back to its 32-bit rep.
-func (f CompatFeatures) ToInt() uint32 {
- var res uint32
-
- if f.DirPrealloc {
- res |= SbDirPrealloc
- }
- if f.HasJournal {
- res |= SbHasJournal
- }
- if f.ExtAttr {
- res |= SbExtAttr
- }
- if f.ResizeInode {
- res |= SbResizeInode
- }
- if f.DirIndex {
- res |= SbDirIndex
- }
- if f.SparseV2 {
- res |= SbSparseV2
- }
-
- return res
-}
-
-// CompatFeaturesFromInt converts the integer representation of superblock
-// compatible features to CompatFeatures struct.
-func CompatFeaturesFromInt(f uint32) CompatFeatures {
- return CompatFeatures{
- DirPrealloc: f&SbDirPrealloc > 0,
- HasJournal: f&SbHasJournal > 0,
- ExtAttr: f&SbExtAttr > 0,
- ResizeInode: f&SbResizeInode > 0,
- DirIndex: f&SbDirIndex > 0,
- SparseV2: f&SbSparseV2 > 0,
- }
-}
-
-// Superblock incompatible features.
-// This is not exhaustive, unused features are not listed.
-const (
- // SbDirentFileType indicates that directory entries record the file type.
- // We should use struct DirentNew for dirents then.
- SbDirentFileType = 0x2
-
- // SbRecovery indicates that the filesystem needs recovery.
- SbRecovery = 0x4
-
- // SbJournalDev indicates that the filesystem has a separate journal device.
- SbJournalDev = 0x8
-
- // SbMetaBG indicates that the filesystem is using Meta block groups. Moves
- // the group descriptors from the congested first block group into the first
- // group of each metablock group to increase the maximum block groups limit
- // and hence support much larger filesystems.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#meta-block-groups.
- SbMetaBG = 0x10
-
- // SbExtents indicates that the filesystem uses extents. Must be set in ext4
- // filesystems.
- SbExtents = 0x40
-
- // SbIs64Bit indicates that this filesystem addresses blocks with 64-bits.
- // Hence can support 2^64 data blocks.
- SbIs64Bit = 0x80
-
- // SbMMP indicates that this filesystem has multiple mount protection.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#multiple-mount-protection.
- SbMMP = 0x100
-
- // SbFlexBg indicates that this filesystem has flexible block groups. Several
- // block groups are tied into one logical block group so that all the metadata
- // for the block groups (bitmaps and inode tables) are close together for
- // faster loading. Consequently, large files will be continuous on disk.
- // However, this does not affect the placement of redundant superblocks and
- // group descriptors.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#flexible-block-groups.
- SbFlexBg = 0x200
-
- // SbLargeDir shows that large directory enabled. Directory htree can be 3
- // levels deep. Directory htrees are allowed to be 2 levels deep otherwise.
- SbLargeDir = 0x4000
-
- // SbInlineData allows inline data in inodes for really small files.
- SbInlineData = 0x8000
-
- // SbEncrypted indicates that this fs contains encrypted inodes.
- SbEncrypted = 0x10000
-)
-
-// IncompatFeatures represents a superblock's incompatible feature set. If the
-// kernel does not understand any of these feature, it should refuse to mount.
-type IncompatFeatures struct {
- DirentFileType bool
- Recovery bool
- JournalDev bool
- MetaBG bool
- Extents bool
- Is64Bit bool
- MMP bool
- FlexBg bool
- LargeDir bool
- InlineData bool
- Encrypted bool
-}
-
-// ToInt converts superblock incompatible features back to its 32-bit rep.
-func (f IncompatFeatures) ToInt() uint32 {
- var res uint32
-
- if f.DirentFileType {
- res |= SbDirentFileType
- }
- if f.Recovery {
- res |= SbRecovery
- }
- if f.JournalDev {
- res |= SbJournalDev
- }
- if f.MetaBG {
- res |= SbMetaBG
- }
- if f.Extents {
- res |= SbExtents
- }
- if f.Is64Bit {
- res |= SbIs64Bit
- }
- if f.MMP {
- res |= SbMMP
- }
- if f.FlexBg {
- res |= SbFlexBg
- }
- if f.LargeDir {
- res |= SbLargeDir
- }
- if f.InlineData {
- res |= SbInlineData
- }
- if f.Encrypted {
- res |= SbEncrypted
- }
-
- return res
-}
-
-// IncompatFeaturesFromInt converts the integer representation of superblock
-// incompatible features to IncompatFeatures struct.
-func IncompatFeaturesFromInt(f uint32) IncompatFeatures {
- return IncompatFeatures{
- DirentFileType: f&SbDirentFileType > 0,
- Recovery: f&SbRecovery > 0,
- JournalDev: f&SbJournalDev > 0,
- MetaBG: f&SbMetaBG > 0,
- Extents: f&SbExtents > 0,
- Is64Bit: f&SbIs64Bit > 0,
- MMP: f&SbMMP > 0,
- FlexBg: f&SbFlexBg > 0,
- LargeDir: f&SbLargeDir > 0,
- InlineData: f&SbInlineData > 0,
- Encrypted: f&SbEncrypted > 0,
- }
-}
-
-// Superblock readonly compatible features.
-// This is not exhaustive, unused features are not listed.
-const (
- // SbSparse indicates sparse superblocks. Only groups with number either 0 or
- // a power of 3, 5, or 7 will have redundant copies of the superblock and
- // block descriptors.
- SbSparse = 0x1
-
- // SbLargeFile indicates that this fs has been used to store a file >= 2GiB.
- SbLargeFile = 0x2
-
- // SbHugeFile indicates that this fs contains files whose sizes are
- // represented in units of logicals blocks, not 512-byte sectors.
- SbHugeFile = 0x8
-
- // SbGdtCsum indicates that group descriptors have checksums.
- SbGdtCsum = 0x10
-
- // SbDirNlink indicates that the new subdirectory limit is 64,999. Ext3 has a
- // 32,000 subdirectory limit.
- SbDirNlink = 0x20
-
- // SbExtraIsize indicates that large inodes exist on this filesystem.
- SbExtraIsize = 0x40
-
- // SbHasSnapshot indicates the existence of a snapshot.
- SbHasSnapshot = 0x80
-
- // SbQuota enables usage tracking for all quota types.
- SbQuota = 0x100
-
- // SbBigalloc maps to the bigalloc feature. When set, the minimum allocation
- // unit becomes a cluster rather than a data block. Then block bitmaps track
- // clusters, not data blocks.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#bigalloc.
- SbBigalloc = 0x200
-
- // SbMetadataCsum indicates that the fs supports metadata checksumming.
- SbMetadataCsum = 0x400
-
- // SbReadOnly marks this filesystem as readonly. Should refuse to mount in
- // read/write mode.
- SbReadOnly = 0x1000
-)
-
-// RoCompatFeatures represents a superblock's readonly compatible feature set.
-// If the kernel does not understand any of these feature, it can still mount
-// readonly. But if the user wants to mount read/write, the kernel should
-// refuse to mount.
-type RoCompatFeatures struct {
- Sparse bool
- LargeFile bool
- HugeFile bool
- GdtCsum bool
- DirNlink bool
- ExtraIsize bool
- HasSnapshot bool
- Quota bool
- Bigalloc bool
- MetadataCsum bool
- ReadOnly bool
-}
-
-// ToInt converts superblock readonly compatible features to its 32-bit rep.
-func (f RoCompatFeatures) ToInt() uint32 {
- var res uint32
-
- if f.Sparse {
- res |= SbSparse
- }
- if f.LargeFile {
- res |= SbLargeFile
- }
- if f.HugeFile {
- res |= SbHugeFile
- }
- if f.GdtCsum {
- res |= SbGdtCsum
- }
- if f.DirNlink {
- res |= SbDirNlink
- }
- if f.ExtraIsize {
- res |= SbExtraIsize
- }
- if f.HasSnapshot {
- res |= SbHasSnapshot
- }
- if f.Quota {
- res |= SbQuota
- }
- if f.Bigalloc {
- res |= SbBigalloc
- }
- if f.MetadataCsum {
- res |= SbMetadataCsum
- }
- if f.ReadOnly {
- res |= SbReadOnly
- }
-
- return res
-}
-
-// RoCompatFeaturesFromInt converts the integer representation of superblock
-// readonly compatible features to RoCompatFeatures struct.
-func RoCompatFeaturesFromInt(f uint32) RoCompatFeatures {
- return RoCompatFeatures{
- Sparse: f&SbSparse > 0,
- LargeFile: f&SbLargeFile > 0,
- HugeFile: f&SbHugeFile > 0,
- GdtCsum: f&SbGdtCsum > 0,
- DirNlink: f&SbDirNlink > 0,
- ExtraIsize: f&SbExtraIsize > 0,
- HasSnapshot: f&SbHasSnapshot > 0,
- Quota: f&SbQuota > 0,
- Bigalloc: f&SbBigalloc > 0,
- MetadataCsum: f&SbMetadataCsum > 0,
- ReadOnly: f&SbReadOnly > 0,
- }
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock_32.go b/pkg/sentry/fsimpl/ext/disklayout/superblock_32.go
deleted file mode 100644
index 4dc6080fb..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/superblock_32.go
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// SuperBlock32Bit implements SuperBlock and represents the 32-bit version of
-// the ext4_super_block struct in fs/ext4/ext4.h. Should be used only if
-// RevLevel = DynamicRev and 64-bit feature is disabled.
-//
-// +marshal
-type SuperBlock32Bit struct {
- // We embed the old superblock struct here because the 32-bit version is just
- // an extension of the old version.
- SuperBlockOld
-
- FirstInode uint32
- InodeSizeRaw uint16
- BlockGroupNumber uint16
- FeatureCompat uint32
- FeatureIncompat uint32
- FeatureRoCompat uint32
- UUID [16]byte
- VolumeName [16]byte
- LastMounted [64]byte
- AlgoUsageBitmap uint32
- PreallocBlocks uint8
- PreallocDirBlocks uint8
- ReservedGdtBlocks uint16
- JournalUUID [16]byte
- JournalInum uint32
- JournalDev uint32
- LastOrphan uint32
- HashSeed [4]uint32
- DefaultHashVersion uint8
- JnlBackupType uint8
- BgDescSizeRaw uint16
- DefaultMountOpts uint32
- FirstMetaBg uint32
- MkfsTime uint32
- JnlBlocks [17]uint32
-}
-
-// Compiles only if SuperBlock32Bit implements SuperBlock.
-var _ SuperBlock = (*SuperBlock32Bit)(nil)
-
-// Only override methods which change based on the additional fields above.
-// Not overriding SuperBlock.BgDescSize because it would still return 32 here.
-
-// InodeSize implements SuperBlock.InodeSize.
-func (sb *SuperBlock32Bit) InodeSize() uint16 {
- return sb.InodeSizeRaw
-}
-
-// CompatibleFeatures implements SuperBlock.CompatibleFeatures.
-func (sb *SuperBlock32Bit) CompatibleFeatures() CompatFeatures {
- return CompatFeaturesFromInt(sb.FeatureCompat)
-}
-
-// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures.
-func (sb *SuperBlock32Bit) IncompatibleFeatures() IncompatFeatures {
- return IncompatFeaturesFromInt(sb.FeatureIncompat)
-}
-
-// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures.
-func (sb *SuperBlock32Bit) ReadOnlyCompatibleFeatures() RoCompatFeatures {
- return RoCompatFeaturesFromInt(sb.FeatureRoCompat)
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock_64.go b/pkg/sentry/fsimpl/ext/disklayout/superblock_64.go
deleted file mode 100644
index 2c9039327..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/superblock_64.go
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// SuperBlock64Bit implements SuperBlock and represents the 64-bit version of
-// the ext4_super_block struct in fs/ext4/ext4.h. This sums up to be exactly
-// 1024 bytes (smallest possible block size) and hence the superblock always
-// fits in no more than one data block. Should only be used when the 64-bit
-// feature is set.
-//
-// +marshal
-type SuperBlock64Bit struct {
- // We embed the 32-bit struct here because 64-bit version is just an extension
- // of the 32-bit version.
- SuperBlock32Bit
-
- BlocksCountHi uint32
- ReservedBlocksCountHi uint32
- FreeBlocksCountHi uint32
- MinInodeSize uint16
- WantInodeSize uint16
- Flags uint32
- RaidStride uint16
- MmpInterval uint16
- MmpBlock uint64
- RaidStripeWidth uint32
- LogGroupsPerFlex uint8
- ChecksumType uint8
- _ uint16
- KbytesWritten uint64
- SnapshotInum uint32
- SnapshotID uint32
- SnapshotRsrvBlocksCount uint64
- SnapshotList uint32
- ErrorCount uint32
- FirstErrorTime uint32
- FirstErrorInode uint32
- FirstErrorBlock uint64
- FirstErrorFunction [32]byte
- FirstErrorLine uint32
- LastErrorTime uint32
- LastErrorInode uint32
- LastErrorLine uint32
- LastErrorBlock uint64
- LastErrorFunction [32]byte
- MountOpts [64]byte
- UserQuotaInum uint32
- GroupQuotaInum uint32
- OverheadBlocks uint32
- BackupBgs [2]uint32
- EncryptAlgos [4]uint8
- EncryptPwSalt [16]uint8
- LostFoundInode uint32
- ProjectQuotaInode uint32
- ChecksumSeed uint32
- WtimeHi uint8
- MtimeHi uint8
- MkfsTimeHi uint8
- LastCheckHi uint8
- FirstErrorTimeHi uint8
- LastErrorTimeHi uint8
- _ [2]uint8
- Encoding uint16
- EncodingFlags uint16
- _ [95]uint32
- Checksum uint32
-}
-
-// Compiles only if SuperBlock64Bit implements SuperBlock.
-var _ SuperBlock = (*SuperBlock64Bit)(nil)
-
-// Only override methods which change based on the 64-bit feature.
-
-// BlocksCount implements SuperBlock.BlocksCount.
-func (sb *SuperBlock64Bit) BlocksCount() uint64 {
- return (uint64(sb.BlocksCountHi) << 32) | uint64(sb.BlocksCountLo)
-}
-
-// FreeBlocksCount implements SuperBlock.FreeBlocksCount.
-func (sb *SuperBlock64Bit) FreeBlocksCount() uint64 {
- return (uint64(sb.FreeBlocksCountHi) << 32) | uint64(sb.FreeBlocksCountLo)
-}
-
-// BgDescSize implements SuperBlock.BgDescSize.
-func (sb *SuperBlock64Bit) BgDescSize() uint16 { return sb.BgDescSizeRaw }
diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock_old.go b/pkg/sentry/fsimpl/ext/disklayout/superblock_old.go
deleted file mode 100644
index e4709f23c..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/superblock_old.go
+++ /dev/null
@@ -1,107 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// SuperBlockOld implements SuperBlock and represents the old version of the
-// superblock struct. Should be used only if RevLevel = OldRev.
-//
-// +marshal
-type SuperBlockOld struct {
- InodesCountRaw uint32
- BlocksCountLo uint32
- ReservedBlocksCount uint32
- FreeBlocksCountLo uint32
- FreeInodesCountRaw uint32
- FirstDataBlockRaw uint32
- LogBlockSize uint32
- LogClusterSize uint32
- BlocksPerGroupRaw uint32
- ClustersPerGroupRaw uint32
- InodesPerGroupRaw uint32
- Mtime uint32
- Wtime uint32
- MountCountRaw uint16
- MaxMountCountRaw uint16
- MagicRaw uint16
- State uint16
- Errors uint16
- MinorRevLevel uint16
- LastCheck uint32
- CheckInterval uint32
- CreatorOS uint32
- RevLevel uint32
- DefResUID uint16
- DefResGID uint16
-}
-
-// Compiles only if SuperBlockOld implements SuperBlock.
-var _ SuperBlock = (*SuperBlockOld)(nil)
-
-// InodesCount implements SuperBlock.InodesCount.
-func (sb *SuperBlockOld) InodesCount() uint32 { return sb.InodesCountRaw }
-
-// BlocksCount implements SuperBlock.BlocksCount.
-func (sb *SuperBlockOld) BlocksCount() uint64 { return uint64(sb.BlocksCountLo) }
-
-// FreeBlocksCount implements SuperBlock.FreeBlocksCount.
-func (sb *SuperBlockOld) FreeBlocksCount() uint64 { return uint64(sb.FreeBlocksCountLo) }
-
-// FreeInodesCount implements SuperBlock.FreeInodesCount.
-func (sb *SuperBlockOld) FreeInodesCount() uint32 { return sb.FreeInodesCountRaw }
-
-// MountCount implements SuperBlock.MountCount.
-func (sb *SuperBlockOld) MountCount() uint16 { return sb.MountCountRaw }
-
-// MaxMountCount implements SuperBlock.MaxMountCount.
-func (sb *SuperBlockOld) MaxMountCount() uint16 { return sb.MaxMountCountRaw }
-
-// FirstDataBlock implements SuperBlock.FirstDataBlock.
-func (sb *SuperBlockOld) FirstDataBlock() uint32 { return sb.FirstDataBlockRaw }
-
-// BlockSize implements SuperBlock.BlockSize.
-func (sb *SuperBlockOld) BlockSize() uint64 { return 1 << (10 + sb.LogBlockSize) }
-
-// BlocksPerGroup implements SuperBlock.BlocksPerGroup.
-func (sb *SuperBlockOld) BlocksPerGroup() uint32 { return sb.BlocksPerGroupRaw }
-
-// ClusterSize implements SuperBlock.ClusterSize.
-func (sb *SuperBlockOld) ClusterSize() uint64 { return 1 << (10 + sb.LogClusterSize) }
-
-// ClustersPerGroup implements SuperBlock.ClustersPerGroup.
-func (sb *SuperBlockOld) ClustersPerGroup() uint32 { return sb.ClustersPerGroupRaw }
-
-// InodeSize implements SuperBlock.InodeSize.
-func (sb *SuperBlockOld) InodeSize() uint16 { return OldInodeSize }
-
-// InodesPerGroup implements SuperBlock.InodesPerGroup.
-func (sb *SuperBlockOld) InodesPerGroup() uint32 { return sb.InodesPerGroupRaw }
-
-// BgDescSize implements SuperBlock.BgDescSize.
-func (sb *SuperBlockOld) BgDescSize() uint16 { return 32 }
-
-// CompatibleFeatures implements SuperBlock.CompatibleFeatures.
-func (sb *SuperBlockOld) CompatibleFeatures() CompatFeatures { return CompatFeatures{} }
-
-// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures.
-func (sb *SuperBlockOld) IncompatibleFeatures() IncompatFeatures { return IncompatFeatures{} }
-
-// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures.
-func (sb *SuperBlockOld) ReadOnlyCompatibleFeatures() RoCompatFeatures { return RoCompatFeatures{} }
-
-// Magic implements SuperBlock.Magic.
-func (sb *SuperBlockOld) Magic() uint16 { return sb.MagicRaw }
-
-// Revision implements SuperBlock.Revision.
-func (sb *SuperBlockOld) Revision() SbRevision { return SbRevision(sb.RevLevel) }
diff --git a/pkg/sentry/fsimpl/ext/disklayout/superblock_test.go b/pkg/sentry/fsimpl/ext/disklayout/superblock_test.go
deleted file mode 100644
index b734b6987..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/superblock_test.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "testing"
-)
-
-// TestSuperBlockSize tests that the superblock structs are of the correct
-// size.
-func TestSuperBlockSize(t *testing.T) {
- var sbOld SuperBlockOld
- assertSize(t, &sbOld, 84)
- var sb32 SuperBlock32Bit
- assertSize(t, &sb32, 336)
- var sb64 SuperBlock64Bit
- assertSize(t, &sb64, 1024)
-}
diff --git a/pkg/sentry/fsimpl/ext/disklayout/test_utils.go b/pkg/sentry/fsimpl/ext/disklayout/test_utils.go
deleted file mode 100644
index a4bc08411..000000000
--- a/pkg/sentry/fsimpl/ext/disklayout/test_utils.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "reflect"
- "testing"
-
- "gvisor.dev/gvisor/pkg/marshal"
-)
-
-func assertSize(t *testing.T, v marshal.Marshallable, want int) {
- t.Helper()
-
- if got := v.SizeBytes(); got != want {
- t.Errorf("struct %s should be exactly %d bytes but is %d bytes", reflect.TypeOf(v).Name(), want, got)
- }
-}
diff --git a/pkg/sentry/fsimpl/ext/ext.go b/pkg/sentry/fsimpl/ext/ext.go
deleted file mode 100644
index 38fb7962b..000000000
--- a/pkg/sentry/fsimpl/ext/ext.go
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package ext implements readonly ext(2/3/4) filesystems.
-package ext
-
-import (
- "errors"
- "fmt"
- "io"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fd"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// Name is the name of this filesystem.
-const Name = "ext"
-
-// FilesystemType implements vfs.FilesystemType.
-//
-// +stateify savable
-type FilesystemType struct{}
-
-// getDeviceFd returns an io.ReaderAt to the underlying device.
-// Currently there are two ways of mounting an ext(2/3/4) fs:
-// 1. Specify a mount with our internal special MountType in the OCI spec.
-// 2. Expose the device to the container and mount it from application layer.
-func getDeviceFd(source string, opts vfs.GetFilesystemOptions) (io.ReaderAt, error) {
- if opts.InternalData == nil {
- // User mount call.
- // TODO(b/134676337): Open the device specified by `source` and return that.
- panic("unimplemented")
- }
-
- // GetFilesystem call originated from within the sentry.
- devFd, ok := opts.InternalData.(int)
- if !ok {
- return nil, errors.New("internal data for ext fs must be an int containing the file descriptor to device")
- }
-
- if devFd < 0 {
- return nil, fmt.Errorf("ext device file descriptor is not valid: %d", devFd)
- }
-
- // The fd.ReadWriter returned from fd.NewReadWriter() does not take ownership
- // of the file descriptor and hence will not close it when it is garbage
- // collected.
- return fd.NewReadWriter(devFd), nil
-}
-
-// isCompatible checks if the superblock has feature sets which are compatible.
-// We only need to check the superblock incompatible feature set since we are
-// mounting readonly. We will also need to check readonly compatible feature
-// set when mounting for read/write.
-func isCompatible(sb disklayout.SuperBlock) bool {
- // Please note that what is being checked is limited based on the fact that we
- // are mounting readonly and that we are not journaling. When mounting
- // read/write or with a journal, this must be reevaluated.
- incompatFeatures := sb.IncompatibleFeatures()
- if incompatFeatures.MetaBG {
- log.Warningf("ext fs: meta block groups are not supported")
- return false
- }
- if incompatFeatures.MMP {
- log.Warningf("ext fs: multiple mount protection is not supported")
- return false
- }
- if incompatFeatures.Encrypted {
- log.Warningf("ext fs: encrypted inodes not supported")
- return false
- }
- if incompatFeatures.InlineData {
- log.Warningf("ext fs: inline files not supported")
- return false
- }
- return true
-}
-
-// Name implements vfs.FilesystemType.Name.
-func (FilesystemType) Name() string {
- return Name
-}
-
-// Release implements vfs.FilesystemType.Release.
-func (FilesystemType) Release(ctx context.Context) {}
-
-// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
-func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
- // TODO(b/134676337): Ensure that the user is mounting readonly. If not,
- // EACCESS should be returned according to mount(2). Filesystem independent
- // flags (like readonly) are currently not available in pkg/sentry/vfs.
-
- devMinor, err := vfsObj.GetAnonBlockDevMinor()
- if err != nil {
- return nil, nil, err
- }
-
- dev, err := getDeviceFd(source, opts)
- if err != nil {
- return nil, nil, err
- }
-
- fs := filesystem{
- dev: dev,
- inodeCache: make(map[uint32]*inode),
- devMinor: devMinor,
- }
- fs.vfsfs.Init(vfsObj, &fsType, &fs)
- fs.sb, err = readSuperBlock(dev)
- if err != nil {
- fs.vfsfs.DecRef(ctx)
- return nil, nil, err
- }
-
- if fs.sb.Magic() != linux.EXT_SUPER_MAGIC {
- // mount(2) specifies that EINVAL should be returned if the superblock is
- // invalid.
- fs.vfsfs.DecRef(ctx)
- return nil, nil, syserror.EINVAL
- }
-
- // Refuse to mount if the filesystem is incompatible.
- if !isCompatible(fs.sb) {
- fs.vfsfs.DecRef(ctx)
- return nil, nil, syserror.EINVAL
- }
-
- fs.bgs, err = readBlockGroups(dev, fs.sb)
- if err != nil {
- fs.vfsfs.DecRef(ctx)
- return nil, nil, err
- }
-
- rootInode, err := fs.getOrCreateInodeLocked(disklayout.RootDirInode)
- if err != nil {
- fs.vfsfs.DecRef(ctx)
- return nil, nil, err
- }
- rootInode.incRef()
-
- return &fs.vfsfs, &newDentry(rootInode).vfsd, nil
-}
diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go
deleted file mode 100644
index d9fd4590c..000000000
--- a/pkg/sentry/fsimpl/ext/ext_test.go
+++ /dev/null
@@ -1,926 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "fmt"
- "io"
- "os"
- "path"
- "sort"
- "testing"
-
- "github.com/google/go-cmp/cmp"
- "github.com/google/go-cmp/cmp/cmpopts"
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/test/testutil"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-const (
- assetsDir = "pkg/sentry/fsimpl/ext/assets"
-)
-
-var (
- ext2ImagePath = path.Join(assetsDir, "tiny.ext2")
- ext3ImagePath = path.Join(assetsDir, "tiny.ext3")
- ext4ImagePath = path.Join(assetsDir, "tiny.ext4")
-)
-
-// setUp opens imagePath as an ext Filesystem and returns all necessary
-// elements required to run tests. If error is non-nil, it also returns a tear
-// down function which must be called after the test is run for clean up.
-func setUp(t *testing.T, imagePath string) (context.Context, *vfs.VirtualFilesystem, *vfs.VirtualDentry, func(), error) {
- localImagePath, err := testutil.FindFile(imagePath)
- if err != nil {
- return nil, nil, nil, nil, fmt.Errorf("failed to open local image at path %s: %v", imagePath, err)
- }
-
- f, err := os.Open(localImagePath)
- if err != nil {
- return nil, nil, nil, nil, err
- }
-
- ctx := contexttest.Context(t)
- creds := auth.CredentialsFromContext(ctx)
-
- // Create VFS.
- vfsObj := &vfs.VirtualFilesystem{}
- if err := vfsObj.Init(ctx); err != nil {
- t.Fatalf("VFS init: %v", err)
- }
- vfsObj.MustRegisterFilesystemType("extfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- })
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, localImagePath, "extfs", &vfs.MountOptions{
- GetFilesystemOptions: vfs.GetFilesystemOptions{
- InternalData: int(f.Fd()),
- },
- })
- if err != nil {
- f.Close()
- return nil, nil, nil, nil, err
- }
-
- root := mntns.Root()
- root.IncRef()
-
- tearDown := func() {
- root.DecRef(ctx)
-
- if err := f.Close(); err != nil {
- t.Fatalf("tearDown failed: %v", err)
- }
- }
- return ctx, vfsObj, &root, tearDown, nil
-}
-
-// TODO(b/134676337): Test vfs.FilesystemImpl.ReadlinkAt and
-// vfs.FilesystemImpl.StatFSAt which are not implemented in
-// vfs.VirtualFilesystem yet.
-
-// TestSeek tests vfs.FileDescriptionImpl.Seek functionality.
-func TestSeek(t *testing.T) {
- type seekTest struct {
- name string
- image string
- path string
- }
-
- tests := []seekTest{
- {
- name: "ext4 root dir seek",
- image: ext4ImagePath,
- path: "/",
- },
- {
- name: "ext3 root dir seek",
- image: ext3ImagePath,
- path: "/",
- },
- {
- name: "ext2 root dir seek",
- image: ext2ImagePath,
- path: "/",
- },
- {
- name: "ext4 reg file seek",
- image: ext4ImagePath,
- path: "/file.txt",
- },
- {
- name: "ext3 reg file seek",
- image: ext3ImagePath,
- path: "/file.txt",
- },
- {
- name: "ext2 reg file seek",
- image: ext2ImagePath,
- path: "/file.txt",
- },
- }
-
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- ctx, vfsfs, root, tearDown, err := setUp(t, test.image)
- if err != nil {
- t.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- fd, err := vfsfs.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)},
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt failed: %v", err)
- }
-
- if n, err := fd.Seek(ctx, 0, linux.SEEK_SET); n != 0 || err != nil {
- t.Errorf("expected seek position 0, got %d and error %v", n, err)
- }
-
- stat, err := fd.Stat(ctx, vfs.StatOptions{})
- if err != nil {
- t.Errorf("fd.stat failed for file %s in image %s: %v", test.path, test.image, err)
- }
-
- // We should be able to seek beyond the end of file.
- size := int64(stat.Size)
- if n, err := fd.Seek(ctx, size, linux.SEEK_SET); n != size || err != nil {
- t.Errorf("expected seek position %d, got %d and error %v", size, n, err)
- }
-
- // EINVAL should be returned if the resulting offset is negative.
- if _, err := fd.Seek(ctx, -1, linux.SEEK_SET); err != syserror.EINVAL {
- t.Errorf("expected error EINVAL but got %v", err)
- }
-
- if n, err := fd.Seek(ctx, 3, linux.SEEK_CUR); n != size+3 || err != nil {
- t.Errorf("expected seek position %d, got %d and error %v", size+3, n, err)
- }
-
- // Make sure negative offsets work with SEEK_CUR.
- if n, err := fd.Seek(ctx, -2, linux.SEEK_CUR); n != size+1 || err != nil {
- t.Errorf("expected seek position %d, got %d and error %v", size+1, n, err)
- }
-
- // EINVAL should be returned if the resulting offset is negative.
- if _, err := fd.Seek(ctx, -(size + 2), linux.SEEK_CUR); err != syserror.EINVAL {
- t.Errorf("expected error EINVAL but got %v", err)
- }
-
- // Make sure SEEK_END works with regular files.
- if _, ok := fd.Impl().(*regularFileFD); ok {
- // Seek back to 0.
- if n, err := fd.Seek(ctx, -size, linux.SEEK_END); n != 0 || err != nil {
- t.Errorf("expected seek position %d, got %d and error %v", 0, n, err)
- }
-
- // Seek forward beyond EOF.
- if n, err := fd.Seek(ctx, 1, linux.SEEK_END); n != size+1 || err != nil {
- t.Errorf("expected seek position %d, got %d and error %v", size+1, n, err)
- }
-
- // EINVAL should be returned if the resulting offset is negative.
- if _, err := fd.Seek(ctx, -(size + 1), linux.SEEK_END); err != syserror.EINVAL {
- t.Errorf("expected error EINVAL but got %v", err)
- }
- }
- })
- }
-}
-
-// TestStatAt tests filesystem.StatAt functionality.
-func TestStatAt(t *testing.T) {
- type statAtTest struct {
- name string
- image string
- path string
- want linux.Statx
- }
-
- tests := []statAtTest{
- {
- name: "ext4 statx small file",
- image: ext4ImagePath,
- path: "/file.txt",
- want: linux.Statx{
- Blksize: 0x400,
- Nlink: 1,
- UID: 0,
- GID: 0,
- Mode: 0644 | linux.ModeRegular,
- Size: 13,
- },
- },
- {
- name: "ext3 statx small file",
- image: ext3ImagePath,
- path: "/file.txt",
- want: linux.Statx{
- Blksize: 0x400,
- Nlink: 1,
- UID: 0,
- GID: 0,
- Mode: 0644 | linux.ModeRegular,
- Size: 13,
- },
- },
- {
- name: "ext2 statx small file",
- image: ext2ImagePath,
- path: "/file.txt",
- want: linux.Statx{
- Blksize: 0x400,
- Nlink: 1,
- UID: 0,
- GID: 0,
- Mode: 0644 | linux.ModeRegular,
- Size: 13,
- },
- },
- {
- name: "ext4 statx big file",
- image: ext4ImagePath,
- path: "/bigfile.txt",
- want: linux.Statx{
- Blksize: 0x400,
- Nlink: 1,
- UID: 0,
- GID: 0,
- Mode: 0644 | linux.ModeRegular,
- Size: 13042,
- },
- },
- {
- name: "ext3 statx big file",
- image: ext3ImagePath,
- path: "/bigfile.txt",
- want: linux.Statx{
- Blksize: 0x400,
- Nlink: 1,
- UID: 0,
- GID: 0,
- Mode: 0644 | linux.ModeRegular,
- Size: 13042,
- },
- },
- {
- name: "ext2 statx big file",
- image: ext2ImagePath,
- path: "/bigfile.txt",
- want: linux.Statx{
- Blksize: 0x400,
- Nlink: 1,
- UID: 0,
- GID: 0,
- Mode: 0644 | linux.ModeRegular,
- Size: 13042,
- },
- },
- {
- name: "ext4 statx symlink file",
- image: ext4ImagePath,
- path: "/symlink.txt",
- want: linux.Statx{
- Blksize: 0x400,
- Nlink: 1,
- UID: 0,
- GID: 0,
- Mode: 0777 | linux.ModeSymlink,
- Size: 8,
- },
- },
- {
- name: "ext3 statx symlink file",
- image: ext3ImagePath,
- path: "/symlink.txt",
- want: linux.Statx{
- Blksize: 0x400,
- Nlink: 1,
- UID: 0,
- GID: 0,
- Mode: 0777 | linux.ModeSymlink,
- Size: 8,
- },
- },
- {
- name: "ext2 statx symlink file",
- image: ext2ImagePath,
- path: "/symlink.txt",
- want: linux.Statx{
- Blksize: 0x400,
- Nlink: 1,
- UID: 0,
- GID: 0,
- Mode: 0777 | linux.ModeSymlink,
- Size: 8,
- },
- },
- }
-
- // Ignore the fields that are not supported by filesystem.StatAt yet and
- // those which are likely to change as the image does.
- ignoredFields := map[string]bool{
- "Attributes": true,
- "AttributesMask": true,
- "Atime": true,
- "Blocks": true,
- "Btime": true,
- "Ctime": true,
- "DevMajor": true,
- "DevMinor": true,
- "Ino": true,
- "Mask": true,
- "Mtime": true,
- "RdevMajor": true,
- "RdevMinor": true,
- }
-
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- ctx, vfsfs, root, tearDown, err := setUp(t, test.image)
- if err != nil {
- t.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- got, err := vfsfs.StatAt(ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)},
- &vfs.StatOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.StatAt failed for file %s in image %s: %v", test.path, test.image, err)
- }
-
- cmpIgnoreFields := cmp.FilterPath(func(p cmp.Path) bool {
- _, ok := ignoredFields[p.String()]
- return ok
- }, cmp.Ignore())
- if diff := cmp.Diff(got, test.want, cmpIgnoreFields, cmpopts.IgnoreUnexported(linux.Statx{})); diff != "" {
- t.Errorf("stat mismatch (-want +got):\n%s", diff)
- }
- })
- }
-}
-
-// TestRead tests the read functionality for vfs file descriptions.
-func TestRead(t *testing.T) {
- type readTest struct {
- name string
- image string
- absPath string
- }
-
- tests := []readTest{
- {
- name: "ext4 read small file",
- image: ext4ImagePath,
- absPath: "/file.txt",
- },
- {
- name: "ext3 read small file",
- image: ext3ImagePath,
- absPath: "/file.txt",
- },
- {
- name: "ext2 read small file",
- image: ext2ImagePath,
- absPath: "/file.txt",
- },
- {
- name: "ext4 read big file",
- image: ext4ImagePath,
- absPath: "/bigfile.txt",
- },
- {
- name: "ext3 read big file",
- image: ext3ImagePath,
- absPath: "/bigfile.txt",
- },
- {
- name: "ext2 read big file",
- image: ext2ImagePath,
- absPath: "/bigfile.txt",
- },
- }
-
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- ctx, vfsfs, root, tearDown, err := setUp(t, test.image)
- if err != nil {
- t.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- fd, err := vfsfs.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.absPath)},
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt failed: %v", err)
- }
-
- // Get a local file descriptor and compare its functionality with a vfs file
- // description for the same file.
- localFile, err := testutil.FindFile(path.Join(assetsDir, test.absPath))
- if err != nil {
- t.Fatalf("testutil.FindFile failed for %s: %v", test.absPath, err)
- }
-
- f, err := os.Open(localFile)
- if err != nil {
- t.Fatalf("os.Open failed for %s: %v", localFile, err)
- }
- defer f.Close()
-
- // Read the entire file by reading one byte repeatedly. Doing this stress
- // tests the underlying file reader implementation.
- got := make([]byte, 1)
- want := make([]byte, 1)
- for {
- n, err := f.Read(want)
- fd.Read(ctx, usermem.BytesIOSequence(got), vfs.ReadOptions{})
-
- if diff := cmp.Diff(got, want); diff != "" {
- t.Errorf("file data mismatch (-want +got):\n%s", diff)
- }
-
- // Make sure there is no more file data left after getting EOF.
- if n == 0 || err == io.EOF {
- if n, _ := fd.Read(ctx, usermem.BytesIOSequence(got), vfs.ReadOptions{}); n != 0 {
- t.Errorf("extra unexpected file data in file %s in image %s", test.absPath, test.image)
- }
-
- break
- }
-
- if err != nil {
- t.Fatalf("read failed: %v", err)
- }
- }
- })
- }
-}
-
-// iterDirentsCb is a simple callback which just keeps adding the dirents to an
-// internal list. Implements vfs.IterDirentsCallback.
-type iterDirentsCb struct {
- dirents []vfs.Dirent
-}
-
-// Compiles only if iterDirentCb implements vfs.IterDirentsCallback.
-var _ vfs.IterDirentsCallback = (*iterDirentsCb)(nil)
-
-// newIterDirentsCb is the iterDirent
-func newIterDirentCb() *iterDirentsCb {
- return &iterDirentsCb{dirents: make([]vfs.Dirent, 0)}
-}
-
-// Handle implements vfs.IterDirentsCallback.Handle.
-func (cb *iterDirentsCb) Handle(dirent vfs.Dirent) error {
- cb.dirents = append(cb.dirents, dirent)
- return nil
-}
-
-// TestIterDirents tests the FileDescriptionImpl.IterDirents functionality.
-func TestIterDirents(t *testing.T) {
- type iterDirentTest struct {
- name string
- image string
- path string
- want []vfs.Dirent
- }
-
- wantDirents := []vfs.Dirent{
- {
- Name: ".",
- Type: linux.DT_DIR,
- },
- {
- Name: "..",
- Type: linux.DT_DIR,
- },
- {
- Name: "lost+found",
- Type: linux.DT_DIR,
- },
- {
- Name: "file.txt",
- Type: linux.DT_REG,
- },
- {
- Name: "bigfile.txt",
- Type: linux.DT_REG,
- },
- {
- Name: "symlink.txt",
- Type: linux.DT_LNK,
- },
- }
- tests := []iterDirentTest{
- {
- name: "ext4 root dir iteration",
- image: ext4ImagePath,
- path: "/",
- want: wantDirents,
- },
- {
- name: "ext3 root dir iteration",
- image: ext3ImagePath,
- path: "/",
- want: wantDirents,
- },
- {
- name: "ext2 root dir iteration",
- image: ext2ImagePath,
- path: "/",
- want: wantDirents,
- },
- }
-
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- ctx, vfsfs, root, tearDown, err := setUp(t, test.image)
- if err != nil {
- t.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- fd, err := vfsfs.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)},
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt failed: %v", err)
- }
-
- cb := &iterDirentsCb{}
- if err = fd.IterDirents(ctx, cb); err != nil {
- t.Fatalf("dir fd.IterDirents() failed: %v", err)
- }
-
- sort.Slice(cb.dirents, func(i int, j int) bool { return cb.dirents[i].Name < cb.dirents[j].Name })
- sort.Slice(test.want, func(i int, j int) bool { return test.want[i].Name < test.want[j].Name })
-
- // Ignore the inode number and offset of dirents because those are likely to
- // change as the underlying image changes.
- cmpIgnoreFields := cmp.FilterPath(func(p cmp.Path) bool {
- return p.String() == "Ino" || p.String() == "NextOff"
- }, cmp.Ignore())
- if diff := cmp.Diff(cb.dirents, test.want, cmpIgnoreFields); diff != "" {
- t.Errorf("dirents mismatch (-want +got):\n%s", diff)
- }
- })
- }
-}
-
-// TestRootDir tests that the root directory inode is correctly initialized and
-// returned from setUp.
-func TestRootDir(t *testing.T) {
- type inodeProps struct {
- Mode linux.FileMode
- UID auth.KUID
- GID auth.KGID
- Size uint64
- InodeSize uint16
- Links uint16
- Flags disklayout.InodeFlags
- }
-
- type rootDirTest struct {
- name string
- image string
- wantInode inodeProps
- }
-
- tests := []rootDirTest{
- {
- name: "ext4 root dir",
- image: ext4ImagePath,
- wantInode: inodeProps{
- Mode: linux.ModeDirectory | 0755,
- Size: 0x400,
- InodeSize: 0x80,
- Links: 3,
- Flags: disklayout.InodeFlags{Extents: true},
- },
- },
- {
- name: "ext3 root dir",
- image: ext3ImagePath,
- wantInode: inodeProps{
- Mode: linux.ModeDirectory | 0755,
- Size: 0x400,
- InodeSize: 0x80,
- Links: 3,
- },
- },
- {
- name: "ext2 root dir",
- image: ext2ImagePath,
- wantInode: inodeProps{
- Mode: linux.ModeDirectory | 0755,
- Size: 0x400,
- InodeSize: 0x80,
- Links: 3,
- },
- },
- }
-
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- _, _, vd, tearDown, err := setUp(t, test.image)
- if err != nil {
- t.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- d, ok := vd.Dentry().Impl().(*dentry)
- if !ok {
- t.Fatalf("ext dentry of incorrect type: %T", vd.Dentry().Impl())
- }
-
- // Offload inode contents into local structs for comparison.
- gotInode := inodeProps{
- Mode: d.inode.diskInode.Mode(),
- UID: d.inode.diskInode.UID(),
- GID: d.inode.diskInode.GID(),
- Size: d.inode.diskInode.Size(),
- InodeSize: d.inode.diskInode.InodeSize(),
- Links: d.inode.diskInode.LinksCount(),
- Flags: d.inode.diskInode.Flags(),
- }
-
- if diff := cmp.Diff(gotInode, test.wantInode); diff != "" {
- t.Errorf("inode mismatch (-want +got):\n%s", diff)
- }
- })
- }
-}
-
-// TestFilesystemInit tests that the filesystem superblock and block group
-// descriptors are correctly read in and initialized.
-func TestFilesystemInit(t *testing.T) {
- // sb only contains the immutable properties of the superblock.
- type sb struct {
- InodesCount uint32
- BlocksCount uint64
- MaxMountCount uint16
- FirstDataBlock uint32
- BlockSize uint64
- BlocksPerGroup uint32
- ClusterSize uint64
- ClustersPerGroup uint32
- InodeSize uint16
- InodesPerGroup uint32
- BgDescSize uint16
- Magic uint16
- Revision disklayout.SbRevision
- CompatFeatures disklayout.CompatFeatures
- IncompatFeatures disklayout.IncompatFeatures
- RoCompatFeatures disklayout.RoCompatFeatures
- }
-
- // bg only contains the immutable properties of the block group descriptor.
- type bg struct {
- InodeTable uint64
- BlockBitmap uint64
- InodeBitmap uint64
- ExclusionBitmap uint64
- Flags disklayout.BGFlags
- }
-
- type fsInitTest struct {
- name string
- image string
- wantSb sb
- wantBgs []bg
- }
-
- tests := []fsInitTest{
- {
- name: "ext4 filesystem init",
- image: ext4ImagePath,
- wantSb: sb{
- InodesCount: 0x10,
- BlocksCount: 0x40,
- MaxMountCount: 0xffff,
- FirstDataBlock: 0x1,
- BlockSize: 0x400,
- BlocksPerGroup: 0x2000,
- ClusterSize: 0x400,
- ClustersPerGroup: 0x2000,
- InodeSize: 0x80,
- InodesPerGroup: 0x10,
- BgDescSize: 0x40,
- Magic: linux.EXT_SUPER_MAGIC,
- Revision: disklayout.DynamicRev,
- CompatFeatures: disklayout.CompatFeatures{
- ExtAttr: true,
- ResizeInode: true,
- DirIndex: true,
- },
- IncompatFeatures: disklayout.IncompatFeatures{
- DirentFileType: true,
- Extents: true,
- Is64Bit: true,
- FlexBg: true,
- },
- RoCompatFeatures: disklayout.RoCompatFeatures{
- Sparse: true,
- LargeFile: true,
- HugeFile: true,
- DirNlink: true,
- ExtraIsize: true,
- MetadataCsum: true,
- },
- },
- wantBgs: []bg{
- {
- InodeTable: 0x23,
- BlockBitmap: 0x3,
- InodeBitmap: 0x13,
- Flags: disklayout.BGFlags{
- InodeZeroed: true,
- },
- },
- },
- },
- {
- name: "ext3 filesystem init",
- image: ext3ImagePath,
- wantSb: sb{
- InodesCount: 0x10,
- BlocksCount: 0x40,
- MaxMountCount: 0xffff,
- FirstDataBlock: 0x1,
- BlockSize: 0x400,
- BlocksPerGroup: 0x2000,
- ClusterSize: 0x400,
- ClustersPerGroup: 0x2000,
- InodeSize: 0x80,
- InodesPerGroup: 0x10,
- BgDescSize: 0x20,
- Magic: linux.EXT_SUPER_MAGIC,
- Revision: disklayout.DynamicRev,
- CompatFeatures: disklayout.CompatFeatures{
- ExtAttr: true,
- ResizeInode: true,
- DirIndex: true,
- },
- IncompatFeatures: disklayout.IncompatFeatures{
- DirentFileType: true,
- },
- RoCompatFeatures: disklayout.RoCompatFeatures{
- Sparse: true,
- LargeFile: true,
- },
- },
- wantBgs: []bg{
- {
- InodeTable: 0x5,
- BlockBitmap: 0x3,
- InodeBitmap: 0x4,
- Flags: disklayout.BGFlags{
- InodeZeroed: true,
- },
- },
- },
- },
- {
- name: "ext2 filesystem init",
- image: ext2ImagePath,
- wantSb: sb{
- InodesCount: 0x10,
- BlocksCount: 0x40,
- MaxMountCount: 0xffff,
- FirstDataBlock: 0x1,
- BlockSize: 0x400,
- BlocksPerGroup: 0x2000,
- ClusterSize: 0x400,
- ClustersPerGroup: 0x2000,
- InodeSize: 0x80,
- InodesPerGroup: 0x10,
- BgDescSize: 0x20,
- Magic: linux.EXT_SUPER_MAGIC,
- Revision: disklayout.DynamicRev,
- CompatFeatures: disklayout.CompatFeatures{
- ExtAttr: true,
- ResizeInode: true,
- DirIndex: true,
- },
- IncompatFeatures: disklayout.IncompatFeatures{
- DirentFileType: true,
- },
- RoCompatFeatures: disklayout.RoCompatFeatures{
- Sparse: true,
- LargeFile: true,
- },
- },
- wantBgs: []bg{
- {
- InodeTable: 0x5,
- BlockBitmap: 0x3,
- InodeBitmap: 0x4,
- Flags: disklayout.BGFlags{
- InodeZeroed: true,
- },
- },
- },
- },
- }
-
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- _, _, vd, tearDown, err := setUp(t, test.image)
- if err != nil {
- t.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- fs, ok := vd.Mount().Filesystem().Impl().(*filesystem)
- if !ok {
- t.Fatalf("ext filesystem of incorrect type: %T", vd.Mount().Filesystem().Impl())
- }
-
- // Offload superblock and block group descriptors contents into
- // local structs for comparison.
- totalFreeInodes := uint32(0)
- totalFreeBlocks := uint64(0)
- gotSb := sb{
- InodesCount: fs.sb.InodesCount(),
- BlocksCount: fs.sb.BlocksCount(),
- MaxMountCount: fs.sb.MaxMountCount(),
- FirstDataBlock: fs.sb.FirstDataBlock(),
- BlockSize: fs.sb.BlockSize(),
- BlocksPerGroup: fs.sb.BlocksPerGroup(),
- ClusterSize: fs.sb.ClusterSize(),
- ClustersPerGroup: fs.sb.ClustersPerGroup(),
- InodeSize: fs.sb.InodeSize(),
- InodesPerGroup: fs.sb.InodesPerGroup(),
- BgDescSize: fs.sb.BgDescSize(),
- Magic: fs.sb.Magic(),
- Revision: fs.sb.Revision(),
- CompatFeatures: fs.sb.CompatibleFeatures(),
- IncompatFeatures: fs.sb.IncompatibleFeatures(),
- RoCompatFeatures: fs.sb.ReadOnlyCompatibleFeatures(),
- }
- gotNumBgs := len(fs.bgs)
- gotBgs := make([]bg, gotNumBgs)
- for i := 0; i < gotNumBgs; i++ {
- gotBgs[i].InodeTable = fs.bgs[i].InodeTable()
- gotBgs[i].BlockBitmap = fs.bgs[i].BlockBitmap()
- gotBgs[i].InodeBitmap = fs.bgs[i].InodeBitmap()
- gotBgs[i].ExclusionBitmap = fs.bgs[i].ExclusionBitmap()
- gotBgs[i].Flags = fs.bgs[i].Flags()
-
- totalFreeInodes += fs.bgs[i].FreeInodesCount()
- totalFreeBlocks += uint64(fs.bgs[i].FreeBlocksCount())
- }
-
- if diff := cmp.Diff(gotSb, test.wantSb); diff != "" {
- t.Errorf("superblock mismatch (-want +got):\n%s", diff)
- }
-
- if diff := cmp.Diff(gotBgs, test.wantBgs); diff != "" {
- t.Errorf("block group descriptors mismatch (-want +got):\n%s", diff)
- }
-
- if diff := cmp.Diff(totalFreeInodes, fs.sb.FreeInodesCount()); diff != "" {
- t.Errorf("total free inodes mismatch (-want +got):\n%s", diff)
- }
-
- if diff := cmp.Diff(totalFreeBlocks, fs.sb.FreeBlocksCount()); diff != "" {
- t.Errorf("total free blocks mismatch (-want +got):\n%s", diff)
- }
- })
- }
-}
diff --git a/pkg/sentry/fsimpl/ext/extent_file.go b/pkg/sentry/fsimpl/ext/extent_file.go
deleted file mode 100644
index 778460107..000000000
--- a/pkg/sentry/fsimpl/ext/extent_file.go
+++ /dev/null
@@ -1,239 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "io"
- "sort"
-
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// extentFile is a type of regular file which uses extents to store file data.
-//
-// +stateify savable
-type extentFile struct {
- regFile regularFile
-
- // root is the root extent node. This lives in the 60 byte diskInode.Data().
- // Immutable.
- root disklayout.ExtentNode
-}
-
-// Compiles only if extentFile implements io.ReaderAt.
-var _ io.ReaderAt = (*extentFile)(nil)
-
-// newExtentFile is the extent file constructor. It reads the entire extent
-// tree into memory.
-// TODO(b/134676337): Build extent tree on demand to reduce memory usage.
-func newExtentFile(args inodeArgs) (*extentFile, error) {
- file := &extentFile{}
- file.regFile.impl = file
- file.regFile.inode.init(args, &file.regFile)
- err := file.buildExtTree()
- if err != nil {
- return nil, err
- }
- return file, nil
-}
-
-// buildExtTree builds the extent tree by reading it from disk by doing
-// running a simple DFS. It first reads the root node from the inode struct in
-// memory. Then it recursively builds the rest of the tree by reading it off
-// disk.
-//
-// Precondition: inode flag InExtents must be set.
-func (f *extentFile) buildExtTree() error {
- rootNodeData := f.regFile.inode.diskInode.Data()
-
- f.root.Header.UnmarshalBytes(rootNodeData[:disklayout.ExtentHeaderSize])
-
- // Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries.
- if f.root.Header.NumEntries > 4 {
- // read(2) specifies that EINVAL should be returned if the file is unsuitable
- // for reading.
- return syserror.EINVAL
- }
-
- f.root.Entries = make([]disklayout.ExtentEntryPair, f.root.Header.NumEntries)
- for i, off := uint16(0), disklayout.ExtentEntrySize; i < f.root.Header.NumEntries; i, off = i+1, off+disklayout.ExtentEntrySize {
- var curEntry disklayout.ExtentEntry
- if f.root.Header.Height == 0 {
- // Leaf node.
- curEntry = &disklayout.Extent{}
- } else {
- // Internal node.
- curEntry = &disklayout.ExtentIdx{}
- }
- curEntry.UnmarshalBytes(rootNodeData[off : off+disklayout.ExtentEntrySize])
- f.root.Entries[i].Entry = curEntry
- }
-
- // If this node is internal, perform DFS.
- if f.root.Header.Height > 0 {
- for i := uint16(0); i < f.root.Header.NumEntries; i++ {
- var err error
- if f.root.Entries[i].Node, err = f.buildExtTreeFromDisk(f.root.Entries[i].Entry); err != nil {
- return err
- }
- }
- }
-
- return nil
-}
-
-// buildExtTreeFromDisk reads the extent tree nodes from disk and recursively
-// builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to
-// by the ExtentEntry.
-func (f *extentFile) buildExtTreeFromDisk(entry disklayout.ExtentEntry) (*disklayout.ExtentNode, error) {
- var header disklayout.ExtentHeader
- off := entry.PhysicalBlock() * f.regFile.inode.blkSize
- err := readFromDisk(f.regFile.inode.fs.dev, int64(off), &header)
- if err != nil {
- return nil, err
- }
-
- entries := make([]disklayout.ExtentEntryPair, header.NumEntries)
- for i, off := uint16(0), off+disklayout.ExtentEntrySize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentEntrySize {
- var curEntry disklayout.ExtentEntry
- if header.Height == 0 {
- // Leaf node.
- curEntry = &disklayout.Extent{}
- } else {
- // Internal node.
- curEntry = &disklayout.ExtentIdx{}
- }
-
- err := readFromDisk(f.regFile.inode.fs.dev, int64(off), curEntry)
- if err != nil {
- return nil, err
- }
- entries[i].Entry = curEntry
- }
-
- // If this node is internal, perform DFS.
- if header.Height > 0 {
- for i := uint16(0); i < header.NumEntries; i++ {
- var err error
- entries[i].Node, err = f.buildExtTreeFromDisk(entries[i].Entry)
- if err != nil {
- return nil, err
- }
- }
- }
-
- return &disklayout.ExtentNode{header, entries}, nil
-}
-
-// ReadAt implements io.ReaderAt.ReadAt.
-func (f *extentFile) ReadAt(dst []byte, off int64) (int, error) {
- if len(dst) == 0 {
- return 0, nil
- }
-
- if off < 0 {
- return 0, syserror.EINVAL
- }
-
- if uint64(off) >= f.regFile.inode.diskInode.Size() {
- return 0, io.EOF
- }
-
- n, err := f.read(&f.root, uint64(off), dst)
- if n < len(dst) && err == nil {
- err = io.EOF
- }
- return n, err
-}
-
-// read is the recursive step of extentFile.ReadAt which traverses the extent
-// tree from the node passed and reads file data.
-func (f *extentFile) read(node *disklayout.ExtentNode, off uint64, dst []byte) (int, error) {
- // Perform a binary search for the node covering bytes starting at r.fileOff.
- // A highly fragmented filesystem can have upto 340 entries and so linear
- // search should be avoided. Finds the first entry which does not cover the
- // file block we want and subtracts 1 to get the desired index.
- fileBlk := uint32(off / f.regFile.inode.blkSize)
- n := len(node.Entries)
- found := sort.Search(n, func(i int) bool {
- return node.Entries[i].Entry.FileBlock() > fileBlk
- }) - 1
-
- // We should be in this recursive step only if the data we want exists under
- // the current node.
- if found < 0 {
- panic("searching for a file block in an extent entry which does not cover it")
- }
-
- read := 0
- toRead := len(dst)
- var curR int
- var err error
- for i := found; i < n && read < toRead; i++ {
- if node.Header.Height == 0 {
- curR, err = f.readFromExtent(node.Entries[i].Entry.(*disklayout.Extent), off, dst[read:])
- } else {
- curR, err = f.read(node.Entries[i].Node, off, dst[read:])
- }
-
- read += curR
- off += uint64(curR)
- if err != nil {
- return read, err
- }
- }
-
- return read, nil
-}
-
-// readFromExtent reads file data from the extent. It takes advantage of the
-// sequential nature of extents and reads file data from multiple blocks in one
-// call.
-//
-// A non-nil error indicates that this is a partial read and there is probably
-// more to read from this extent. The caller should propagate the error upward
-// and not move to the next extent in the tree.
-//
-// A subsequent call to extentReader.Read should continue reading from where we
-// left off as expected.
-func (f *extentFile) readFromExtent(ex *disklayout.Extent, off uint64, dst []byte) (int, error) {
- curFileBlk := uint32(off / f.regFile.inode.blkSize)
- exFirstFileBlk := ex.FileBlock()
- exLastFileBlk := exFirstFileBlk + uint32(ex.Length) // This is exclusive.
-
- // We should be in this recursive step only if the data we want exists under
- // the current extent.
- if curFileBlk < exFirstFileBlk || exLastFileBlk <= curFileBlk {
- panic("searching for a file block in an extent which does not cover it")
- }
-
- curPhyBlk := uint64(curFileBlk-exFirstFileBlk) + ex.PhysicalBlock()
- readStart := curPhyBlk*f.regFile.inode.blkSize + (off % f.regFile.inode.blkSize)
-
- endPhyBlk := ex.PhysicalBlock() + uint64(ex.Length)
- extentEnd := endPhyBlk * f.regFile.inode.blkSize // This is exclusive.
-
- toRead := int(extentEnd - readStart)
- if len(dst) < toRead {
- toRead = len(dst)
- }
-
- n, _ := f.regFile.inode.fs.dev.ReadAt(dst[:toRead], int64(readStart))
- if n < toRead {
- return n, syserror.EIO
- }
- return n, nil
-}
diff --git a/pkg/sentry/fsimpl/ext/extent_test.go b/pkg/sentry/fsimpl/ext/extent_test.go
deleted file mode 100644
index 985f76ac0..000000000
--- a/pkg/sentry/fsimpl/ext/extent_test.go
+++ /dev/null
@@ -1,266 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "bytes"
- "math/rand"
- "testing"
-
- "github.com/google/go-cmp/cmp"
- "github.com/google/go-cmp/cmp/cmpopts"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
-)
-
-const (
- // mockExtentBlkSize is the mock block size used for testing.
- // No block has more than 1 header + 4 entries.
- mockExtentBlkSize = uint64(64)
-)
-
-// The tree described below looks like:
-//
-// 0.{Head}[Idx][Idx]
-// / \
-// / \
-// 1.{Head}[Ext][Ext] 2.{Head}[Idx]
-// / | \
-// [Phy] [Phy, Phy] 3.{Head}[Ext]
-// |
-// [Phy, Phy, Phy]
-//
-// Legend:
-// - Head = ExtentHeader
-// - Idx = ExtentIdx
-// - Ext = Extent
-// - Phy = Physical Block
-//
-// Please note that ext4 might not construct extent trees looking like this.
-// This is purely for testing the tree traversal logic.
-var (
- node3 = &disklayout.ExtentNode{
- Header: disklayout.ExtentHeader{
- Magic: disklayout.ExtentMagic,
- NumEntries: 1,
- MaxEntries: 4,
- Height: 0,
- },
- Entries: []disklayout.ExtentEntryPair{
- {
- Entry: &disklayout.Extent{
- FirstFileBlock: 3,
- Length: 3,
- StartBlockLo: 6,
- },
- Node: nil,
- },
- },
- }
-
- node2 = &disklayout.ExtentNode{
- Header: disklayout.ExtentHeader{
- Magic: disklayout.ExtentMagic,
- NumEntries: 1,
- MaxEntries: 4,
- Height: 1,
- },
- Entries: []disklayout.ExtentEntryPair{
- {
- Entry: &disklayout.ExtentIdx{
- FirstFileBlock: 3,
- ChildBlockLo: 2,
- },
- Node: node3,
- },
- },
- }
-
- node1 = &disklayout.ExtentNode{
- Header: disklayout.ExtentHeader{
- Magic: disklayout.ExtentMagic,
- NumEntries: 2,
- MaxEntries: 4,
- Height: 0,
- },
- Entries: []disklayout.ExtentEntryPair{
- {
- Entry: &disklayout.Extent{
- FirstFileBlock: 0,
- Length: 1,
- StartBlockLo: 3,
- },
- Node: nil,
- },
- {
- Entry: &disklayout.Extent{
- FirstFileBlock: 1,
- Length: 2,
- StartBlockLo: 4,
- },
- Node: nil,
- },
- },
- }
-
- node0 = &disklayout.ExtentNode{
- Header: disklayout.ExtentHeader{
- Magic: disklayout.ExtentMagic,
- NumEntries: 2,
- MaxEntries: 4,
- Height: 2,
- },
- Entries: []disklayout.ExtentEntryPair{
- {
- Entry: &disklayout.ExtentIdx{
- FirstFileBlock: 0,
- ChildBlockLo: 0,
- },
- Node: node1,
- },
- {
- Entry: &disklayout.ExtentIdx{
- FirstFileBlock: 3,
- ChildBlockLo: 1,
- },
- Node: node2,
- },
- },
- }
-)
-
-// TestExtentReader stress tests extentReader functionality. It performs random
-// length reads from all possible positions in the extent tree.
-func TestExtentReader(t *testing.T) {
- mockExtentFile, want := extentTreeSetUp(t, node0)
- n := len(want)
-
- for from := 0; from < n; from++ {
- got := make([]byte, n-from)
-
- if read, err := mockExtentFile.ReadAt(got, int64(from)); err != nil {
- t.Fatalf("file read operation from offset %d to %d only read %d bytes: %v", from, n, read, err)
- }
-
- if diff := cmp.Diff(got, want[from:]); diff != "" {
- t.Fatalf("file data from offset %d to %d mismatched (-want +got):\n%s", from, n, diff)
- }
- }
-}
-
-// TestBuildExtentTree tests the extent tree building logic.
-func TestBuildExtentTree(t *testing.T) {
- mockExtentFile, _ := extentTreeSetUp(t, node0)
-
- opt := cmpopts.IgnoreUnexported(disklayout.ExtentIdx{}, disklayout.ExtentHeader{})
- if diff := cmp.Diff(&mockExtentFile.root, node0, opt); diff != "" {
- t.Errorf("extent tree mismatch (-want +got):\n%s", diff)
- }
-}
-
-// extentTreeSetUp writes the passed extent tree to a mock disk as an extent
-// tree. It also constucts a mock extent file with the same tree built in it.
-// It also writes random data file data and returns it.
-func extentTreeSetUp(t *testing.T, root *disklayout.ExtentNode) (*extentFile, []byte) {
- t.Helper()
-
- mockDisk := make([]byte, mockExtentBlkSize*10)
- mockExtentFile := &extentFile{}
- args := inodeArgs{
- fs: &filesystem{
- dev: bytes.NewReader(mockDisk),
- },
- diskInode: &disklayout.InodeNew{
- InodeOld: disklayout.InodeOld{
- SizeLo: uint32(mockExtentBlkSize) * getNumPhyBlks(root),
- },
- },
- blkSize: mockExtentBlkSize,
- }
- mockExtentFile.regFile.inode.init(args, &mockExtentFile.regFile)
-
- fileData := writeTree(&mockExtentFile.regFile.inode, mockDisk, node0, mockExtentBlkSize)
-
- if err := mockExtentFile.buildExtTree(); err != nil {
- t.Fatalf("inode.buildExtTree failed: %v", err)
- }
- return mockExtentFile, fileData
-}
-
-// writeTree writes the tree represented by `root` to the inode and disk. It
-// also writes random file data on disk.
-func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, mockExtentBlkSize uint64) []byte {
- rootData := in.diskInode.Data()
- root.Header.MarshalBytes(rootData)
- off := root.Header.SizeBytes()
- for _, ep := range root.Entries {
- ep.Entry.MarshalBytes(rootData[off:])
- off += ep.Entry.SizeBytes()
- }
-
- var fileData []byte
- for _, ep := range root.Entries {
- if root.Header.Height == 0 {
- fileData = append(fileData, writeFileDataToExtent(disk, ep.Entry.(*disklayout.Extent))...)
- } else {
- fileData = append(fileData, writeTreeToDisk(disk, ep)...)
- }
- }
- return fileData
-}
-
-// writeTreeToDisk is the recursive step for writeTree which writes the tree
-// on the disk only. Also writes random file data on disk.
-func writeTreeToDisk(disk []byte, curNode disklayout.ExtentEntryPair) []byte {
- nodeData := disk[curNode.Entry.PhysicalBlock()*mockExtentBlkSize:]
- curNode.Node.Header.MarshalBytes(nodeData)
- off := curNode.Node.Header.SizeBytes()
- for _, ep := range curNode.Node.Entries {
- ep.Entry.MarshalBytes(nodeData[off:])
- off += ep.Entry.SizeBytes()
- }
-
- var fileData []byte
- for _, ep := range curNode.Node.Entries {
- if curNode.Node.Header.Height == 0 {
- fileData = append(fileData, writeFileDataToExtent(disk, ep.Entry.(*disklayout.Extent))...)
- } else {
- fileData = append(fileData, writeTreeToDisk(disk, ep)...)
- }
- }
- return fileData
-}
-
-// writeFileDataToExtent writes random bytes to the blocks on disk that the
-// passed extent points to.
-func writeFileDataToExtent(disk []byte, ex *disklayout.Extent) []byte {
- phyExStartBlk := ex.PhysicalBlock()
- phyExStartOff := phyExStartBlk * mockExtentBlkSize
- phyExEndOff := phyExStartOff + uint64(ex.Length)*mockExtentBlkSize
- rand.Read(disk[phyExStartOff:phyExEndOff])
- return disk[phyExStartOff:phyExEndOff]
-}
-
-// getNumPhyBlks returns the number of physical blocks covered under the node.
-func getNumPhyBlks(node *disklayout.ExtentNode) uint32 {
- var res uint32
- for _, ep := range node.Entries {
- if node.Header.Height == 0 {
- res += uint32(ep.Entry.(*disklayout.Extent).Length)
- } else {
- res += getNumPhyBlks(ep.Node)
- }
- }
- return res
-}
diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go
deleted file mode 100644
index 90b086468..000000000
--- a/pkg/sentry/fsimpl/ext/file_description.go
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// fileDescription is embedded by ext implementations of
-// vfs.FileDescriptionImpl.
-type fileDescription struct {
- vfsfd vfs.FileDescription
- vfs.FileDescriptionDefaultImpl
- vfs.LockFD
-}
-
-func (fd *fileDescription) filesystem() *filesystem {
- return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
-}
-
-func (fd *fileDescription) inode() *inode {
- return fd.vfsfd.Dentry().Impl().(*dentry).inode
-}
-
-// Stat implements vfs.FileDescriptionImpl.Stat.
-func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
- var stat linux.Statx
- fd.inode().statTo(&stat)
- return stat, nil
-}
-
-// SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
- if opts.Stat.Mask == 0 {
- return nil
- }
- return syserror.EPERM
-}
-
-// SetStat implements vfs.FileDescriptionImpl.StatFS.
-func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) {
- var stat linux.Statfs
- fd.filesystem().statTo(&stat)
- return stat, nil
-}
-
-// Sync implements vfs.FileDescriptionImpl.Sync.
-func (fd *fileDescription) Sync(ctx context.Context) error {
- return nil
-}
diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go
deleted file mode 100644
index d4fc484a2..000000000
--- a/pkg/sentry/fsimpl/ext/filesystem.go
+++ /dev/null
@@ -1,555 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "errors"
- "io"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-var (
- // errResolveDirent indicates that the vfs.ResolvingPath.Component() does
- // not exist on the dentry tree but does exist on disk. So it has to be read in
- // using the in-memory dirent and added to the dentry tree. Usually indicates
- // the need to lock filesystem.mu for writing.
- errResolveDirent = errors.New("resolve path component using dirent")
-)
-
-// filesystem implements vfs.FilesystemImpl.
-//
-// +stateify savable
-type filesystem struct {
- vfsfs vfs.Filesystem
-
- // mu serializes changes to the Dentry tree.
- mu sync.RWMutex `state:"nosave"`
-
- // dev represents the underlying fs device. It does not require protection
- // because io.ReaderAt permits concurrent read calls to it. It translates to
- // the pread syscall which passes on the read request directly to the device
- // driver. Device drivers are intelligent in serving multiple concurrent read
- // requests in the optimal order (taking locality into consideration).
- dev io.ReaderAt
-
- // inodeCache maps absolute inode numbers to the corresponding Inode struct.
- // Inodes should be removed from this once their reference count hits 0.
- //
- // Protected by mu because most additions (see IterDirents) and all removals
- // from this corresponds to a change in the dentry tree.
- inodeCache map[uint32]*inode
-
- // sb represents the filesystem superblock. Immutable after initialization.
- sb disklayout.SuperBlock
-
- // bgs represents all the block group descriptors for the filesystem.
- // Immutable after initialization.
- bgs []disklayout.BlockGroup
-
- // devMinor is this filesystem's device minor number. Immutable after
- // initialization.
- devMinor uint32
-}
-
-// Compiles only if filesystem implements vfs.FilesystemImpl.
-var _ vfs.FilesystemImpl = (*filesystem)(nil)
-
-// stepLocked resolves rp.Component() in parent directory vfsd. The write
-// parameter passed tells if the caller has acquired filesystem.mu for writing
-// or not. If set to true, an existing inode on disk can be added to the dentry
-// tree if not present already.
-//
-// stepLocked is loosely analogous to fs/namei.c:walk_component().
-//
-// Preconditions:
-// * filesystem.mu must be locked (for writing if write param is true).
-// * !rp.Done().
-// * inode == vfsd.Impl().(*Dentry).inode.
-func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write bool) (*vfs.Dentry, *inode, error) {
- if !inode.isDir() {
- return nil, nil, syserror.ENOTDIR
- }
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
- return nil, nil, err
- }
-
- for {
- name := rp.Component()
- if name == "." {
- rp.Advance()
- return vfsd, inode, nil
- }
- d := vfsd.Impl().(*dentry)
- if name == ".." {
- isRoot, err := rp.CheckRoot(ctx, vfsd)
- if err != nil {
- return nil, nil, err
- }
- if isRoot || d.parent == nil {
- rp.Advance()
- return vfsd, inode, nil
- }
- if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil {
- return nil, nil, err
- }
- rp.Advance()
- return &d.parent.vfsd, d.parent.inode, nil
- }
-
- dir := inode.impl.(*directory)
- child, ok := dir.childCache[name]
- if !ok {
- // We may need to instantiate a new dentry for this child.
- childDirent, ok := dir.childMap[name]
- if !ok {
- // The underlying inode does not exist on disk.
- return nil, nil, syserror.ENOENT
- }
-
- if !write {
- // filesystem.mu must be held for writing to add to the dentry tree.
- return nil, nil, errResolveDirent
- }
-
- // Create and add the component's dirent to the dentry tree.
- fs := rp.Mount().Filesystem().Impl().(*filesystem)
- childInode, err := fs.getOrCreateInodeLocked(childDirent.diskDirent.Inode())
- if err != nil {
- return nil, nil, err
- }
- // incRef because this is being added to the dentry tree.
- childInode.incRef()
- child = newDentry(childInode)
- child.parent = d
- child.name = name
- dir.childCache[name] = child
- }
- if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
- return nil, nil, err
- }
- if child.inode.isSymlink() && rp.ShouldFollowSymlink() {
- if err := rp.HandleSymlink(child.inode.impl.(*symlink).target); err != nil {
- return nil, nil, err
- }
- continue
- }
- rp.Advance()
- return &child.vfsd, child.inode, nil
- }
-}
-
-// walkLocked resolves rp to an existing file. The write parameter
-// passed tells if the caller has acquired filesystem.mu for writing or not.
-// If set to true, additions can be made to the dentry tree while walking.
-// If errResolveDirent is returned, the walk needs to be continued with an
-// upgraded filesystem.mu.
-//
-// walkLocked is loosely analogous to Linux's fs/namei.c:path_lookupat().
-//
-// Preconditions:
-// * filesystem.mu must be locked (for writing if write param is true).
-func walkLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) {
- vfsd := rp.Start()
- inode := vfsd.Impl().(*dentry).inode
- for !rp.Done() {
- var err error
- vfsd, inode, err = stepLocked(ctx, rp, vfsd, inode, write)
- if err != nil {
- return nil, nil, err
- }
- }
- if rp.MustBeDir() && !inode.isDir() {
- return nil, nil, syserror.ENOTDIR
- }
- return vfsd, inode, nil
-}
-
-// walkParentLocked resolves all but the last path component of rp to an
-// existing directory. It does not check that the returned directory is
-// searchable by the provider of rp. The write parameter passed tells if the
-// caller has acquired filesystem.mu for writing or not. If set to true,
-// additions can be made to the dentry tree while walking.
-// If errResolveDirent is returned, the walk needs to be continued with an
-// upgraded filesystem.mu.
-//
-// walkParentLocked is loosely analogous to Linux's fs/namei.c:path_parentat().
-//
-// Preconditions:
-// * filesystem.mu must be locked (for writing if write param is true).
-// * !rp.Done().
-func walkParentLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) {
- vfsd := rp.Start()
- inode := vfsd.Impl().(*dentry).inode
- for !rp.Final() {
- var err error
- vfsd, inode, err = stepLocked(ctx, rp, vfsd, inode, write)
- if err != nil {
- return nil, nil, err
- }
- }
- if !inode.isDir() {
- return nil, nil, syserror.ENOTDIR
- }
- return vfsd, inode, nil
-}
-
-// walk resolves rp to an existing file. If parent is set to true, it resolves
-// the rp till the parent of the last component which should be an existing
-// directory. If parent is false then resolves rp entirely. Attemps to resolve
-// the path as far as it can with a read lock and upgrades the lock if needed.
-func (fs *filesystem) walk(ctx context.Context, rp *vfs.ResolvingPath, parent bool) (*vfs.Dentry, *inode, error) {
- var (
- vfsd *vfs.Dentry
- inode *inode
- err error
- )
-
- // Try walking with the hopes that all dentries have already been pulled out
- // of disk. This reduces congestion (allows concurrent walks).
- fs.mu.RLock()
- if parent {
- vfsd, inode, err = walkParentLocked(ctx, rp, false)
- } else {
- vfsd, inode, err = walkLocked(ctx, rp, false)
- }
- fs.mu.RUnlock()
-
- if err == errResolveDirent {
- // Upgrade lock and continue walking. Lock upgrading in the middle of the
- // walk is fine as this is a read only filesystem.
- fs.mu.Lock()
- if parent {
- vfsd, inode, err = walkParentLocked(ctx, rp, true)
- } else {
- vfsd, inode, err = walkLocked(ctx, rp, true)
- }
- fs.mu.Unlock()
- }
-
- return vfsd, inode, err
-}
-
-// getOrCreateInodeLocked gets the inode corresponding to the inode number passed in.
-// It creates a new one with the given inode number if one does not exist.
-// The caller must increment the ref count if adding this to the dentry tree.
-//
-// Precondition: must be holding fs.mu for writing.
-func (fs *filesystem) getOrCreateInodeLocked(inodeNum uint32) (*inode, error) {
- if in, ok := fs.inodeCache[inodeNum]; ok {
- return in, nil
- }
-
- in, err := newInode(fs, inodeNum)
- if err != nil {
- return nil, err
- }
-
- fs.inodeCache[inodeNum] = in
- return in, nil
-}
-
-// statTo writes the statfs fields to the output parameter.
-func (fs *filesystem) statTo(stat *linux.Statfs) {
- stat.Type = uint64(fs.sb.Magic())
- stat.BlockSize = int64(fs.sb.BlockSize())
- stat.Blocks = fs.sb.BlocksCount()
- stat.BlocksFree = fs.sb.FreeBlocksCount()
- stat.BlocksAvailable = fs.sb.FreeBlocksCount()
- stat.Files = uint64(fs.sb.InodesCount())
- stat.FilesFree = uint64(fs.sb.FreeInodesCount())
- stat.NameLength = disklayout.MaxFileName
- stat.FragmentSize = int64(fs.sb.BlockSize())
- // TODO(b/134676337): Set Statfs.Flags and Statfs.FSID.
-}
-
-// AccessAt implements vfs.Filesystem.Impl.AccessAt.
-func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
- _, inode, err := fs.walk(ctx, rp, false)
- if err != nil {
- return err
- }
- return inode.checkPermissions(rp.Credentials(), ats)
-}
-
-// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
-func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
- vfsd, inode, err := fs.walk(ctx, rp, false)
- if err != nil {
- return nil, err
- }
-
- if opts.CheckSearchable {
- if !inode.isDir() {
- return nil, syserror.ENOTDIR
- }
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
- return nil, err
- }
- }
-
- inode.incRef()
- return vfsd, nil
-}
-
-// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
-func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
- vfsd, inode, err := fs.walk(ctx, rp, true)
- if err != nil {
- return nil, err
- }
- inode.incRef()
- return vfsd, nil
-}
-
-// OpenAt implements vfs.FilesystemImpl.OpenAt.
-func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- vfsd, inode, err := fs.walk(ctx, rp, false)
- if err != nil {
- return nil, err
- }
-
- // EROFS is returned if write access is needed.
- if vfs.MayWriteFileWithOpenFlags(opts.Flags) || opts.Flags&(linux.O_CREAT|linux.O_EXCL|linux.O_TMPFILE) != 0 {
- return nil, syserror.EROFS
- }
- return inode.open(rp, vfsd, &opts)
-}
-
-// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
-func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
- _, inode, err := fs.walk(ctx, rp, false)
- if err != nil {
- return "", err
- }
- symlink, ok := inode.impl.(*symlink)
- if !ok {
- return "", syserror.EINVAL
- }
- return symlink.target, nil
-}
-
-// StatAt implements vfs.FilesystemImpl.StatAt.
-func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
- _, inode, err := fs.walk(ctx, rp, false)
- if err != nil {
- return linux.Statx{}, err
- }
- var stat linux.Statx
- inode.statTo(&stat)
- return stat, nil
-}
-
-// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
-func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
- if _, _, err := fs.walk(ctx, rp, false); err != nil {
- return linux.Statfs{}, err
- }
-
- var stat linux.Statfs
- fs.statTo(&stat)
- return stat, nil
-}
-
-// Release implements vfs.FilesystemImpl.Release.
-func (fs *filesystem) Release(ctx context.Context) {
- fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
-}
-
-// Sync implements vfs.FilesystemImpl.Sync.
-func (fs *filesystem) Sync(ctx context.Context) error {
- // This is a readonly filesystem for now.
- return nil
-}
-
-// The vfs.FilesystemImpl functions below return EROFS because their respective
-// man pages say that EROFS must be returned if the path resolves to a file on
-// this read-only filesystem.
-
-// LinkAt implements vfs.FilesystemImpl.LinkAt.
-func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
- if rp.Done() {
- return syserror.EEXIST
- }
-
- if _, _, err := fs.walk(ctx, rp, true); err != nil {
- return err
- }
-
- return syserror.EROFS
-}
-
-// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
-func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
- if rp.Done() {
- return syserror.EEXIST
- }
-
- if _, _, err := fs.walk(ctx, rp, true); err != nil {
- return err
- }
-
- return syserror.EROFS
-}
-
-// MknodAt implements vfs.FilesystemImpl.MknodAt.
-func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
- if rp.Done() {
- return syserror.EEXIST
- }
-
- _, _, err := fs.walk(ctx, rp, true)
- if err != nil {
- return err
- }
-
- return syserror.EROFS
-}
-
-// RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
- if rp.Done() {
- return syserror.ENOENT
- }
-
- _, _, err := fs.walk(ctx, rp, false)
- if err != nil {
- return err
- }
-
- return syserror.EROFS
-}
-
-// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
-func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- _, inode, err := fs.walk(ctx, rp, false)
- if err != nil {
- return err
- }
-
- if !inode.isDir() {
- return syserror.ENOTDIR
- }
-
- return syserror.EROFS
-}
-
-// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
-func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
- _, _, err := fs.walk(ctx, rp, false)
- if err != nil {
- return err
- }
-
- return syserror.EROFS
-}
-
-// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
-func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
- if rp.Done() {
- return syserror.EEXIST
- }
-
- _, _, err := fs.walk(ctx, rp, true)
- if err != nil {
- return err
- }
-
- return syserror.EROFS
-}
-
-// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
-func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- _, inode, err := fs.walk(ctx, rp, false)
- if err != nil {
- return err
- }
-
- if inode.isDir() {
- return syserror.EISDIR
- }
-
- return syserror.EROFS
-}
-
-// BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
-func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
- _, inode, err := fs.walk(ctx, rp, false)
- if err != nil {
- return nil, err
- }
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
- return nil, err
- }
-
- // TODO(b/134676337): Support sockets.
- return nil, syserror.ECONNREFUSED
-}
-
-// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
-func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
- _, _, err := fs.walk(ctx, rp, false)
- if err != nil {
- return nil, err
- }
- return nil, syserror.ENOTSUP
-}
-
-// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
-func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
- _, _, err := fs.walk(ctx, rp, false)
- if err != nil {
- return "", err
- }
- return "", syserror.ENOTSUP
-}
-
-// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
-func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
- _, _, err := fs.walk(ctx, rp, false)
- if err != nil {
- return err
- }
- return syserror.ENOTSUP
-}
-
-// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
-func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
- _, _, err := fs.walk(ctx, rp, false)
- if err != nil {
- return err
- }
- return syserror.ENOTSUP
-}
-
-// PrependPath implements vfs.FilesystemImpl.PrependPath.
-func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
-}
-
-// MountOptions implements vfs.FilesystemImpl.MountOptions.
-func (fs *filesystem) MountOptions() string {
- return ""
-}
diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go
deleted file mode 100644
index 4a555bf72..000000000
--- a/pkg/sentry/fsimpl/ext/inode.go
+++ /dev/null
@@ -1,246 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "fmt"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// inode represents an ext inode.
-//
-// inode uses the same inheritance pattern that pkg/sentry/vfs structures use.
-// This has been done to increase memory locality.
-//
-// Implementations:
-// inode --
-// |-- dir
-// |-- symlink
-// |-- regular--
-// |-- extent file
-// |-- block map file
-//
-// +stateify savable
-type inode struct {
- // refs is a reference count. refs is accessed using atomic memory operations.
- refs int64
-
- // fs is the containing filesystem.
- fs *filesystem
-
- // inodeNum is the inode number of this inode on disk. This is used to
- // identify inodes within the ext filesystem.
- inodeNum uint32
-
- // blkSize is the fs data block size. Same as filesystem.sb.BlockSize().
- blkSize uint64
-
- // diskInode gives us access to the inode struct on disk. Immutable.
- diskInode disklayout.Inode
-
- locks vfs.FileLocks
-
- // This is immutable. The first field of the implementations must have inode
- // as the first field to ensure temporality.
- impl interface{}
-}
-
-// incRef increments the inode ref count.
-func (in *inode) incRef() {
- atomic.AddInt64(&in.refs, 1)
-}
-
-// tryIncRef tries to increment the ref count. Returns true if successful.
-func (in *inode) tryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&in.refs)
- if refs == 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&in.refs, refs, refs+1) {
- return true
- }
- }
-}
-
-// decRef decrements the inode ref count and releases the inode resources if
-// the ref count hits 0.
-//
-// Precondition: Must have locked filesystem.mu.
-func (in *inode) decRef() {
- if refs := atomic.AddInt64(&in.refs, -1); refs == 0 {
- delete(in.fs.inodeCache, in.inodeNum)
- } else if refs < 0 {
- panic("ext.inode.decRef() called without holding a reference")
- }
-}
-
-// newInode is the inode constructor. Reads the inode off disk. Identifies
-// inodes based on the absolute inode number on disk.
-func newInode(fs *filesystem, inodeNum uint32) (*inode, error) {
- if inodeNum == 0 {
- panic("inode number 0 on ext filesystems is not possible")
- }
-
- inodeRecordSize := fs.sb.InodeSize()
- var diskInode disklayout.Inode
- if inodeRecordSize == disklayout.OldInodeSize {
- diskInode = &disklayout.InodeOld{}
- } else {
- diskInode = &disklayout.InodeNew{}
- }
-
- // Calculate where the inode is actually placed.
- inodesPerGrp := fs.sb.InodesPerGroup()
- blkSize := fs.sb.BlockSize()
- inodeTableOff := fs.bgs[getBGNum(inodeNum, inodesPerGrp)].InodeTable() * blkSize
- inodeOff := inodeTableOff + uint64(uint32(inodeRecordSize)*getBGOff(inodeNum, inodesPerGrp))
-
- if err := readFromDisk(fs.dev, int64(inodeOff), diskInode); err != nil {
- return nil, err
- }
-
- // Build the inode based on its type.
- args := inodeArgs{
- fs: fs,
- inodeNum: inodeNum,
- blkSize: blkSize,
- diskInode: diskInode,
- }
-
- switch diskInode.Mode().FileType() {
- case linux.ModeSymlink:
- f, err := newSymlink(args)
- if err != nil {
- return nil, err
- }
- return &f.inode, nil
- case linux.ModeRegular:
- f, err := newRegularFile(args)
- if err != nil {
- return nil, err
- }
- return &f.inode, nil
- case linux.ModeDirectory:
- f, err := newDirectory(args, fs.sb.IncompatibleFeatures().DirentFileType)
- if err != nil {
- return nil, err
- }
- return &f.inode, nil
- default:
- // TODO(b/134676337): Return appropriate errors for sockets, pipes and devices.
- return nil, syserror.EINVAL
- }
-}
-
-type inodeArgs struct {
- fs *filesystem
- inodeNum uint32
- blkSize uint64
- diskInode disklayout.Inode
-}
-
-func (in *inode) init(args inodeArgs, impl interface{}) {
- in.fs = args.fs
- in.inodeNum = args.inodeNum
- in.blkSize = args.blkSize
- in.diskInode = args.diskInode
- in.impl = impl
-}
-
-// open creates and returns a file description for the dentry passed in.
-func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
- ats := vfs.AccessTypesForOpenFlags(opts)
- if err := in.checkPermissions(rp.Credentials(), ats); err != nil {
- return nil, err
- }
- mnt := rp.Mount()
- switch in.impl.(type) {
- case *regularFile:
- var fd regularFileFD
- fd.LockFD.Init(&in.locks)
- if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil {
- return nil, err
- }
- return &fd.vfsfd, nil
- case *directory:
- // Can't open directories writably. This check is not necessary for a read
- // only filesystem but will be required when write is implemented.
- if ats&vfs.MayWrite != 0 {
- return nil, syserror.EISDIR
- }
- var fd directoryFD
- fd.LockFD.Init(&in.locks)
- if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil {
- return nil, err
- }
- return &fd.vfsfd, nil
- case *symlink:
- if opts.Flags&linux.O_PATH == 0 {
- // Can't open symlinks without O_PATH.
- return nil, syserror.ELOOP
- }
- var fd symlinkFD
- fd.LockFD.Init(&in.locks)
- if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil {
- return nil, err
- }
- return &fd.vfsfd, nil
- default:
- panic(fmt.Sprintf("unknown inode type: %T", in.impl))
- }
-}
-
-func (in *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
- return vfs.GenericCheckPermissions(creds, ats, in.diskInode.Mode(), in.diskInode.UID(), in.diskInode.GID())
-}
-
-// statTo writes the statx fields to the output parameter.
-func (in *inode) statTo(stat *linux.Statx) {
- stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK |
- linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_SIZE |
- linux.STATX_ATIME | linux.STATX_CTIME | linux.STATX_MTIME
- stat.Blksize = uint32(in.blkSize)
- stat.Mode = uint16(in.diskInode.Mode())
- stat.Nlink = uint32(in.diskInode.LinksCount())
- stat.UID = uint32(in.diskInode.UID())
- stat.GID = uint32(in.diskInode.GID())
- stat.Ino = uint64(in.inodeNum)
- stat.Size = in.diskInode.Size()
- stat.Atime = in.diskInode.AccessTime().StatxTimestamp()
- stat.Ctime = in.diskInode.ChangeTime().StatxTimestamp()
- stat.Mtime = in.diskInode.ModificationTime().StatxTimestamp()
- stat.DevMajor = linux.UNNAMED_MAJOR
- stat.DevMinor = in.fs.devMinor
- // TODO(b/134676337): Set stat.Blocks which is the number of 512 byte blocks
- // (including metadata blocks) required to represent this file.
-}
-
-// getBGNum returns the block group number that a given inode belongs to.
-func getBGNum(inodeNum uint32, inodesPerGrp uint32) uint32 {
- return (inodeNum - 1) / inodesPerGrp
-}
-
-// getBGOff returns the offset at which the given inode lives in the block
-// group's inode table, i.e. the index of the inode in the inode table.
-func getBGOff(inodeNum uint32, inodesPerGrp uint32) uint32 {
- return (inodeNum - 1) % inodesPerGrp
-}
diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go
deleted file mode 100644
index 5ad9befcd..000000000
--- a/pkg/sentry/fsimpl/ext/regular_file.go
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "io"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/safemem"
- "gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// regularFile represents a regular file's inode. This too follows the
-// inheritance pattern prevelant in the vfs layer described in
-// pkg/sentry/vfs/README.md.
-//
-// +stateify savable
-type regularFile struct {
- inode inode
-
- // This is immutable. The first field of fileReader implementations must be
- // regularFile to ensure temporality.
- // io.ReaderAt is more strict than io.Reader in the sense that a partial read
- // is always accompanied by an error. If a read spans past the end of file, a
- // partial read (within file range) is done and io.EOF is returned.
- impl io.ReaderAt
-}
-
-// newRegularFile is the regularFile constructor. It figures out what kind of
-// file this is and initializes the fileReader.
-func newRegularFile(args inodeArgs) (*regularFile, error) {
- if args.diskInode.Flags().Extents {
- file, err := newExtentFile(args)
- if err != nil {
- return nil, err
- }
- return &file.regFile, nil
- }
-
- file, err := newBlockMapFile(args)
- if err != nil {
- return nil, err
- }
- return &file.regFile, nil
-}
-
-func (in *inode) isRegular() bool {
- _, ok := in.impl.(*regularFile)
- return ok
-}
-
-// directoryFD represents a directory file description. It implements
-// vfs.FileDescriptionImpl.
-//
-// +stateify savable
-type regularFileFD struct {
- fileDescription
- vfs.LockFD
-
- // off is the file offset. off is accessed using atomic memory operations.
- off int64
-
- // offMu serializes operations that may mutate off.
- offMu sync.Mutex `state:"nosave"`
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *regularFileFD) Release(context.Context) {}
-
-// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- safeReader := safemem.FromIOReaderAt{
- ReaderAt: fd.inode().impl.(*regularFile).impl,
- Offset: offset,
- }
-
- // Copies data from disk directly into usermem without any intermediate
- // allocations (if dst is converted into BlockSeq such that it does not need
- // safe copying).
- return dst.CopyOutFrom(ctx, safeReader)
-}
-
-// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- n, err := fd.PRead(ctx, dst, fd.off, opts)
- fd.offMu.Lock()
- fd.off += n
- fd.offMu.Unlock()
- return n, err
-}
-
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- // write(2) specifies that EBADF must be returned if the fd is not open for
- // writing.
- return 0, syserror.EBADF
-}
-
-// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- n, err := fd.PWrite(ctx, src, fd.off, opts)
- fd.offMu.Lock()
- fd.off += n
- fd.offMu.Unlock()
- return n, err
-}
-
-// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
-func (fd *regularFileFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
- return syserror.ENOTDIR
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- fd.offMu.Lock()
- defer fd.offMu.Unlock()
- switch whence {
- case linux.SEEK_SET:
- // Use offset as specified.
- case linux.SEEK_CUR:
- offset += fd.off
- case linux.SEEK_END:
- offset += int64(fd.inode().diskInode.Size())
- default:
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- fd.off = offset
- return offset, nil
-}
-
-// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
-func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
- // TODO(b/134676337): Implement mmap(2).
- return syserror.ENODEV
-}
diff --git a/pkg/sentry/fsimpl/ext/symlink.go b/pkg/sentry/fsimpl/ext/symlink.go
deleted file mode 100644
index 5e2bcc837..000000000
--- a/pkg/sentry/fsimpl/ext/symlink.go
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// symlink represents a symlink inode.
-//
-// +stateify savable
-type symlink struct {
- inode inode
- target string // immutable
-}
-
-// newSymlink is the symlink constructor. It reads out the symlink target from
-// the inode (however it might have been stored).
-func newSymlink(args inodeArgs) (*symlink, error) {
- var link []byte
-
- // If the symlink target is lesser than 60 bytes, its stores in inode.Data().
- // Otherwise either extents or block maps will be used to store the link.
- size := args.diskInode.Size()
- if size < 60 {
- link = args.diskInode.Data()[:size]
- } else {
- // Create a regular file out of this inode and read out the target.
- regFile, err := newRegularFile(args)
- if err != nil {
- return nil, err
- }
-
- link = make([]byte, size)
- if n, err := regFile.impl.ReadAt(link, 0); uint64(n) < size {
- return nil, err
- }
- }
-
- file := &symlink{target: string(link)}
- file.inode.init(args, file)
- return file, nil
-}
-
-func (in *inode) isSymlink() bool {
- _, ok := in.impl.(*symlink)
- return ok
-}
-
-// symlinkFD represents a symlink file description and implements
-// vfs.FileDescriptionImpl. which may only be used if open options contains
-// O_PATH. For this reason most of the functions return EBADF.
-//
-// +stateify savable
-type symlinkFD struct {
- fileDescription
- vfs.NoLockFD
-}
-
-// Compiles only if symlinkFD implements vfs.FileDescriptionImpl.
-var _ vfs.FileDescriptionImpl = (*symlinkFD)(nil)
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *symlinkFD) Release(context.Context) {}
-
-// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *symlinkFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- return 0, syserror.EBADF
-}
-
-// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *symlinkFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- return 0, syserror.EBADF
-}
-
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *symlinkFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EBADF
-}
-
-// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *symlinkFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EBADF
-}
-
-// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
-func (fd *symlinkFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
- return syserror.ENOTDIR
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *symlinkFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- return 0, syserror.EBADF
-}
-
-// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
-func (fd *symlinkFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
- return syserror.EBADF
-}
diff --git a/pkg/sentry/fsimpl/ext/utils.go b/pkg/sentry/fsimpl/ext/utils.go
deleted file mode 100644
index 58ef7b9b8..000000000
--- a/pkg/sentry/fsimpl/ext/utils.go
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "io"
-
- "gvisor.dev/gvisor/pkg/marshal"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// readFromDisk performs a binary read from disk into the given struct from
-// the absolute offset provided.
-func readFromDisk(dev io.ReaderAt, abOff int64, v marshal.Marshallable) error {
- n := v.SizeBytes()
- buf := make([]byte, n)
- if read, _ := dev.ReadAt(buf, abOff); read < int(n) {
- return syserror.EIO
- }
-
- v.UnmarshalBytes(buf)
- return nil
-}
-
-// readSuperBlock reads the SuperBlock from block group 0 in the underlying
-// device. There are three versions of the superblock. This function identifies
-// and returns the correct version.
-func readSuperBlock(dev io.ReaderAt) (disklayout.SuperBlock, error) {
- var sb disklayout.SuperBlock = &disklayout.SuperBlockOld{}
- if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil {
- return nil, err
- }
- if sb.Revision() == disklayout.OldRev {
- return sb, nil
- }
-
- sb = &disklayout.SuperBlock32Bit{}
- if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil {
- return nil, err
- }
- if !sb.IncompatibleFeatures().Is64Bit {
- return sb, nil
- }
-
- sb = &disklayout.SuperBlock64Bit{}
- if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil {
- return nil, err
- }
- return sb, nil
-}
-
-// blockGroupsCount returns the number of block groups in the ext fs.
-func blockGroupsCount(sb disklayout.SuperBlock) uint64 {
- blocksCount := sb.BlocksCount()
- blocksPerGroup := uint64(sb.BlocksPerGroup())
-
- // Round up the result. float64 can compromise precision so do it manually.
- return (blocksCount + blocksPerGroup - 1) / blocksPerGroup
-}
-
-// readBlockGroups reads the block group descriptor table from block group 0 in
-// the underlying device.
-func readBlockGroups(dev io.ReaderAt, sb disklayout.SuperBlock) ([]disklayout.BlockGroup, error) {
- bgCount := blockGroupsCount(sb)
- bgdSize := uint64(sb.BgDescSize())
- is64Bit := sb.IncompatibleFeatures().Is64Bit
- bgds := make([]disklayout.BlockGroup, bgCount)
-
- for i, off := uint64(0), uint64(sb.FirstDataBlock()+1)*sb.BlockSize(); i < bgCount; i, off = i+1, off+bgdSize {
- if is64Bit {
- bgds[i] = &disklayout.BlockGroup64Bit{}
- } else {
- bgds[i] = &disklayout.BlockGroup32Bit{}
- }
-
- if err := readFromDisk(dev, int64(off), bgds[i]); err != nil {
- return nil, err
- }
- }
- return bgds, nil
-}
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 3a4777fbe..05c4fbeb2 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -46,6 +46,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/hostarch",
"//pkg/log",
"//pkg/marshal",
@@ -58,7 +59,6 @@ go_library(
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
@@ -76,13 +76,13 @@ go_test(
library = ":fuse",
deps = [
"//pkg/abi/linux",
+ "//pkg/errors/linuxerr",
"//pkg/hostarch",
"//pkg/marshal",
"//pkg/sentry/fsimpl/testutil",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index 077bf9307..d404edaf0 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -19,9 +19,9 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -252,11 +252,11 @@ func (conn *connection) Call(t *kernel.Task, r *Request) (*Response, error) {
}
if !conn.connected {
- return nil, syserror.ENOTCONN
+ return nil, linuxerr.ENOTCONN
}
if conn.connInitError {
- return nil, syserror.ECONNREFUSED
+ return nil, linuxerr.ECONNREFUSED
}
fut, err := conn.callFuture(t, r)
@@ -306,7 +306,7 @@ func (conn *connection) callFutureLocked(t *kernel.Task, r *Request) (*futureRes
conn.mu.Unlock()
// we checked connected before,
// this must be due to aborted connection.
- return nil, syserror.ECONNABORTED
+ return nil, linuxerr.ECONNABORTED
}
conn.mu.Unlock()
diff --git a/pkg/sentry/fsimpl/fuse/connection_test.go b/pkg/sentry/fsimpl/fuse/connection_test.go
index 78ea6a31e..1fddd858e 100644
--- a/pkg/sentry/fsimpl/fuse/connection_test.go
+++ b/pkg/sentry/fsimpl/fuse/connection_test.go
@@ -19,9 +19,9 @@ import (
"testing"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/syserror"
)
// TestConnectionInitBlock tests if initialization
@@ -104,7 +104,7 @@ func TestConnectionAbort(t *testing.T) {
// After abort, Call() should return directly with ENOTCONN.
req := conn.NewRequest(creds, 0, 0, 0, testObj)
_, err = conn.Call(task, req)
- if err != syserror.ENOTCONN {
+ if !linuxerr.Equals(linuxerr.ENOTCONN, err) {
t.Fatalf("Incorrect error code received for Call() after connection aborted")
}
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index 5d2bae14e..0f855ac59 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -18,11 +18,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -37,7 +37,7 @@ type fuseDevice struct{}
// Open implements vfs.Device.Open.
func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
if !kernel.FUSEEnabled {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
var fd DeviceFD
@@ -122,17 +122,17 @@ func (fd *DeviceFD) Release(ctx context.Context) {
func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
if fd.fs == nil {
- return 0, syserror.EPERM
+ return 0, linuxerr.EPERM
}
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// Read implements vfs.FileDescriptionImpl.Read.
func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
if fd.fs == nil {
- return 0, syserror.EPERM
+ return 0, linuxerr.EPERM
}
// We require that any Read done on this filesystem have a sane minimum
@@ -149,7 +149,7 @@ func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.R
// If the read buffer is too small, error out.
if dst.NumBytes() < int64(minBuffSize) {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
fd.mu.Lock()
@@ -191,7 +191,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
}
if req == nil {
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// We already checked the size: dst must be able to fit the whole request.
@@ -204,7 +204,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
return 0, err
}
if n != len(req.data) {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
if req.hdr.Opcode == linux.FUSE_WRITE {
@@ -213,7 +213,7 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
return 0, err
}
if written != len(req.payload) {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
n += int(written)
}
@@ -234,10 +234,10 @@ func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts
func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
if fd.fs == nil {
- return 0, syserror.EPERM
+ return 0, linuxerr.EPERM
}
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// Write implements vfs.FileDescriptionImpl.Write.
@@ -251,12 +251,12 @@ func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.
func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
if fd.fs == nil {
- return 0, syserror.EPERM
+ return 0, linuxerr.EPERM
}
// Return ENODEV if the filesystem is umounted.
if fd.fs.umounted {
- return 0, syserror.ENODEV
+ return 0, linuxerr.ENODEV
}
var cn, n int64
@@ -293,7 +293,7 @@ func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opt
// Assert that the header isn't read into the writeBuf yet.
if fd.writeCursor >= hdrLen {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// We don't have the full common response header yet.
@@ -322,7 +322,7 @@ func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opt
if !ok {
// Server sent us a response for a request we never sent,
// or for which we already received a reply (e.g. aborted), an unlikely event.
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
delete(fd.completions, hdr.Unique)
@@ -391,10 +391,10 @@ func (fd *DeviceFD) EventUnregister(e *waiter.Entry) {
func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
if fd.fs == nil {
- return 0, syserror.EPERM
+ return 0, linuxerr.EPERM
}
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
// sendResponse sends a response to the waiting task (if any).
@@ -434,7 +434,7 @@ func (fd *DeviceFD) sendError(ctx context.Context, errno int32, unique linux.FUS
if !ok {
// A response for a request we never sent,
// or for which we already received a reply (e.g. aborted).
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
delete(fd.completions, respHdr.Unique)
diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go
index 04250d796..8951b5ba8 100644
--- a/pkg/sentry/fsimpl/fuse/dev_test.go
+++ b/pkg/sentry/fsimpl/fuse/dev_test.go
@@ -20,11 +20,11 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -186,7 +186,7 @@ func ReadTest(serverTask *kernel.Task, fd *vfs.FileDescription, inIOseq usermem.
// "would block".
n, err = dev.Read(serverTask, inIOseq, vfs.ReadOptions{})
total += n
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
break
}
diff --git a/pkg/sentry/fsimpl/fuse/directory.go b/pkg/sentry/fsimpl/fuse/directory.go
index fcc5d9a2a..9611edd5a 100644
--- a/pkg/sentry/fsimpl/fuse/directory.go
+++ b/pkg/sentry/fsimpl/fuse/directory.go
@@ -19,10 +19,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -32,27 +32,27 @@ type directoryFD struct {
// Allocate implements directoryFD.Allocate.
func (*directoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
// PRead implements vfs.FileDescriptionImpl.PRead.
func (*directoryFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// Read implements vfs.FileDescriptionImpl.Read.
func (*directoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (*directoryFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// Write implements vfs.FileDescriptionImpl.Write.
func (*directoryFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EISDIR
+ return 0, linuxerr.EISDIR
}
// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 167c899e2..af16098d2 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -23,13 +23,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -121,30 +121,30 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
deviceDescriptorStr, ok := mopts["fd"]
if !ok {
ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option fd missing")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
delete(mopts, "fd")
deviceDescriptor, err := strconv.ParseInt(deviceDescriptorStr, 10 /* base */, 32 /* bitSize */)
if err != nil {
ctx.Debugf("fusefs.FilesystemType.GetFilesystem: invalid fd: %q (%v)", deviceDescriptorStr, err)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("%s.GetFilesystem: couldn't get kernel task from context", fsType.Name())
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
fuseFDGeneric := kernelTask.GetFileVFS2(int32(deviceDescriptor))
if fuseFDGeneric == nil {
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
defer fuseFDGeneric.DecRef(ctx)
fuseFD, ok := fuseFDGeneric.Impl().(*DeviceFD)
if !ok {
log.Warningf("%s.GetFilesystem: device FD is %T, not a FUSE device", fsType.Name, fuseFDGeneric)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
// Parse and set all the other supported FUSE mount options.
@@ -154,17 +154,17 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
uid, err := strconv.ParseUint(uidStr, 10, 32)
if err != nil {
log.Warningf("%s.GetFilesystem: invalid user_id: user_id=%s", fsType.Name(), uidStr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
kuid := creds.UserNamespace.MapToKUID(auth.UID(uid))
if !kuid.Ok() {
ctx.Warningf("fusefs.FilesystemType.GetFilesystem: unmapped uid: %d", uid)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
fsopts.uid = kuid
} else {
ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option user_id missing")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
if gidStr, ok := mopts["group_id"]; ok {
@@ -172,17 +172,17 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
gid, err := strconv.ParseUint(gidStr, 10, 32)
if err != nil {
log.Warningf("%s.GetFilesystem: invalid group_id: group_id=%s", fsType.Name(), gidStr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
kgid := creds.UserNamespace.MapToKGID(auth.GID(gid))
if !kgid.Ok() {
ctx.Warningf("fusefs.FilesystemType.GetFilesystem: unmapped gid: %d", gid)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
fsopts.gid = kgid
} else {
ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option group_id missing")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
if modeStr, ok := mopts["rootmode"]; ok {
@@ -190,12 +190,12 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
mode, err := strconv.ParseUint(modeStr, 8, 32)
if err != nil {
log.Warningf("%s.GetFilesystem: invalid mode: %q", fsType.Name(), modeStr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
fsopts.rootMode = linux.FileMode(mode)
} else {
ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option rootmode missing")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
// Set the maxInFlightRequests option.
@@ -206,7 +206,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
maxRead, err := strconv.ParseUint(maxReadStr, 10, 32)
if err != nil {
log.Warningf("%s.GetFilesystem: invalid max_read: max_read=%s", fsType.Name(), maxReadStr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
if maxRead < fuseMinMaxRead {
maxRead = fuseMinMaxRead
@@ -229,7 +229,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// Check for unparsed options.
if len(mopts) != 0 {
log.Warningf("%s.GetFilesystem: unsupported or unknown options: %v", fsType.Name(), mopts)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
// Create a new FUSE filesystem.
@@ -258,7 +258,7 @@ func newFUSEFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, fsTyp
conn, err := newFUSEConnection(ctx, fuseFD, opts)
if err != nil {
log.Warningf("fuse.NewFUSEFilesystem: NewFUSEConnection failed with error: %v", err)
- return nil, syserror.EINVAL
+ return nil, linuxerr.EINVAL
}
fs := &filesystem{
@@ -375,7 +375,7 @@ func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, a
creds.RealKGID != i.fs.opts.gid ||
creds.EffectiveKGID != i.fs.opts.gid ||
creds.SavedKGID != i.fs.opts.gid {
- return syserror.EACCES
+ return linuxerr.EACCES
}
}
@@ -393,10 +393,10 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr
isDir := i.InodeAttrs.Mode().IsDir()
// return error if specified to open directory but inode is not a directory.
if !isDir && opts.Mode.IsDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
if opts.Flags&linux.O_LARGEFILE == 0 && atomic.LoadUint64(&i.size) > linux.MAX_NON_LFS {
- return nil, syserror.EOVERFLOW
+ return nil, linuxerr.EOVERFLOW
}
var fd *fileDescription
@@ -418,7 +418,7 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.Open: couldn't get kernel task from context")
- return nil, syserror.EINVAL
+ return nil, linuxerr.EINVAL
}
// Build the request.
@@ -440,7 +440,7 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr
if err != nil {
return nil, err
}
- if err := res.Error(); err == syserror.ENOSYS && !isDir {
+ if err := res.Error(); linuxerr.Equals(linuxerr.ENOSYS, err) && !isDir {
i.fs.conn.noOpen = true
} else if err != nil {
return nil, err
@@ -512,7 +512,7 @@ func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions)
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.NewFile: couldn't get kernel task from context", i.nodeID)
- return nil, syserror.EINVAL
+ return nil, linuxerr.EINVAL
}
in := linux.FUSECreateIn{
CreateMeta: linux.FUSECreateMeta{
@@ -552,7 +552,7 @@ func (i *inode) Unlink(ctx context.Context, name string, child kernfs.Inode) err
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID)
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
in := linux.FUSEUnlinkIn{Name: name}
req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, linux.FUSE_UNLINK, &in)
@@ -596,7 +596,7 @@ func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMo
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID)
- return nil, syserror.EINVAL
+ return nil, linuxerr.EINVAL
}
req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, opcode, payload)
res, err := i.fs.conn.Call(kernelTask, req)
@@ -611,7 +611,7 @@ func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMo
return nil, err
}
if opcode != linux.FUSE_LOOKUP && ((out.Attr.Mode&linux.S_IFMT)^uint32(fileType) != 0 || out.NodeID == 0 || out.NodeID == linux.FUSE_ROOT_ID) {
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
child := i.fs.newInode(ctx, out.NodeID, out.Attr)
return child, nil
@@ -626,13 +626,13 @@ func (i *inode) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry,
// Readlink implements kernfs.Inode.Readlink.
func (i *inode) Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
if i.Mode().FileType()&linux.S_IFLNK == 0 {
- return "", syserror.EINVAL
+ return "", linuxerr.EINVAL
}
if len(i.link) == 0 {
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.Readlink: couldn't get kernel task from context")
- return "", syserror.EINVAL
+ return "", linuxerr.EINVAL
}
req := i.fs.conn.NewRequest(auth.CredentialsFromContext(ctx), uint32(kernelTask.ThreadID()), i.nodeID, linux.FUSE_READLINK, &linux.FUSEEmptyIn{})
res, err := i.fs.conn.Call(kernelTask, req)
@@ -728,7 +728,7 @@ func (i *inode) getAttr(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOp
task := kernel.TaskFromContext(ctx)
if task == nil {
log.Warningf("couldn't get kernel task from context")
- return linux.FUSEAttr{}, syserror.EINVAL
+ return linux.FUSEAttr{}, linuxerr.EINVAL
}
creds := auth.CredentialsFromContext(ctx)
@@ -833,7 +833,7 @@ func (i *inode) setAttr(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
task := kernel.TaskFromContext(ctx)
if task == nil {
log.Warningf("couldn't get kernel task from context")
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
// We should retain the original file type when assigning new mode.
diff --git a/pkg/sentry/fsimpl/fuse/read_write.go b/pkg/sentry/fsimpl/fuse/read_write.go
index 66ea889f9..fe119aa43 100644
--- a/pkg/sentry/fsimpl/fuse/read_write.go
+++ b/pkg/sentry/fsimpl/fuse/read_write.go
@@ -20,11 +20,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/syserror"
)
// ReadInPages sends FUSE_READ requests for the size after round it up to
@@ -39,7 +39,7 @@ func (fs *filesystem) ReadInPages(ctx context.Context, fd *regularFileFD, off ui
t := kernel.TaskFromContext(ctx)
if t == nil {
log.Warningf("fusefs.Read: couldn't get kernel task from context")
- return nil, 0, syserror.EINVAL
+ return nil, 0, linuxerr.EINVAL
}
// Round up to a multiple of page size.
@@ -155,7 +155,7 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64,
t := kernel.TaskFromContext(ctx)
if t == nil {
log.Warningf("fusefs.Read: couldn't get kernel task from context")
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// One request cannnot exceed either maxWrite or maxPages.
@@ -220,7 +220,7 @@ func (fs *filesystem) Write(ctx context.Context, fd *regularFileFD, off uint64,
// Write more than requested? EIO.
if out.Size > toWrite {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
written += out.Size
diff --git a/pkg/sentry/fsimpl/fuse/regular_file.go b/pkg/sentry/fsimpl/fuse/regular_file.go
index 5bdd096c3..38cde8208 100644
--- a/pkg/sentry/fsimpl/fuse/regular_file.go
+++ b/pkg/sentry/fsimpl/fuse/regular_file.go
@@ -22,8 +22,8 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -39,14 +39,14 @@ type regularFileFD struct {
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, linuxerr.EOPNOTSUPP
}
size := dst.NumBytes()
@@ -56,7 +56,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
} else if size > math.MaxUint32 {
// FUSE only supports uint32 for size.
// Overflow.
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// TODO(gvisor.dev/issue/3678): Add direct IO support.
@@ -107,7 +107,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
return 0, err
}
if int64(cp) != toCopy {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
copied += toCopy
}
@@ -143,14 +143,14 @@ func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts
// final offset should be ignored by PWrite.
func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
if offset < 0 {
- return 0, offset, syserror.EINVAL
+ return 0, offset, linuxerr.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, offset, syserror.EOPNOTSUPP
+ return 0, offset, linuxerr.EOPNOTSUPP
}
inode := fd.inode()
@@ -171,11 +171,11 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
if srclen > math.MaxUint32 {
// FUSE only supports uint32 for size.
// Overflow.
- return 0, offset, syserror.EINVAL
+ return 0, offset, linuxerr.EINVAL
}
if end := offset + srclen; end < offset {
// Overflow.
- return 0, offset, syserror.EINVAL
+ return 0, offset, linuxerr.EINVAL
}
srclen, err = vfs.CheckLimit(ctx, offset, srclen)
@@ -204,7 +204,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
return 0, offset, err
}
if int64(cp) != srclen {
- return 0, offset, syserror.EIO
+ return 0, offset, linuxerr.EIO
}
n, err := fd.inode().fs.Write(ctx, fd, uint64(offset), uint32(srclen), data)
@@ -215,7 +215,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
if n == 0 {
// We have checked srclen != 0 previously.
// If err == nil, then it's a short write and we return EIO.
- return 0, offset, syserror.EIO
+ return 0, offset, linuxerr.EIO
}
written = int64(n)
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
index 368272f12..4244f2cf5 100644
--- a/pkg/sentry/fsimpl/gofer/BUILD
+++ b/pkg/sentry/fsimpl/gofer/BUILD
@@ -49,6 +49,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fd",
"//pkg/fdnotifier",
"//pkg/fspath",
@@ -78,7 +79,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/unet",
"//pkg/usermem",
"//pkg/waiter",
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index 177e42649..5c48a9fee 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/refsvfs2"
@@ -28,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
func (d *dentry) isDir() bool {
@@ -297,7 +297,7 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in
switch whence {
case linux.SEEK_SET:
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if offset == 0 {
// Ensure that the next call to fd.IterDirents() calls
@@ -309,13 +309,13 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in
case linux.SEEK_CUR:
offset += fd.off
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// Don't clear fd.dirents in this case, even if offset == 0.
fd.off = offset
return fd.off, nil
default:
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index eb09d54c3..00228c469 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
@@ -32,32 +33,19 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Sync implements vfs.FilesystemImpl.Sync.
func (fs *filesystem) Sync(ctx context.Context) error {
// Snapshot current syncable dentries and special file FDs.
- fs.renameMu.RLock()
fs.syncMu.Lock()
ds := make([]*dentry, 0, len(fs.syncableDentries))
for d := range fs.syncableDentries {
- // It's safe to use IncRef here even though fs.syncableDentries doesn't
- // hold references since we hold fs.renameMu. Note that we can't use
- // TryIncRef since cached dentries at zero references should still be
- // synced.
- d.IncRef()
ds = append(ds, d)
}
- fs.renameMu.RUnlock()
sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs))
for sffd := range fs.specialFileFDs {
- // As above, fs.specialFileFDs doesn't hold references. However, unlike
- // dentries, an FD that has reached zero references can't be
- // resurrected, so we can use TryIncRef.
- if sffd.vfsfd.TryIncRef() {
- sffds = append(sffds, sffd)
- }
+ sffds = append(sffds, sffd)
}
fs.syncMu.Unlock()
@@ -67,9 +55,7 @@ func (fs *filesystem) Sync(ctx context.Context) error {
// Sync syncable dentries.
for _, d := range ds {
- err := d.syncCachedFile(ctx, true /* forFilesystemSync */)
- d.DecRef(ctx)
- if err != nil {
+ if err := d.syncCachedFile(ctx, true /* forFilesystemSync */); err != nil {
ctx.Infof("gofer.filesystem.Sync: dentry.syncCachedFile failed: %v", err)
if retErr == nil {
retErr = err
@@ -80,9 +66,7 @@ func (fs *filesystem) Sync(ctx context.Context) error {
// Sync special files, which may be writable but do not use dentry shared
// handles (so they won't be synced by the above).
for _, sffd := range sffds {
- err := sffd.sync(ctx, true /* forFilesystemSync */)
- sffd.vfsfd.DecRef(ctx)
- if err != nil {
+ if err := sffd.sync(ctx, true /* forFilesystemSync */); err != nil {
ctx.Infof("gofer.filesystem.Sync: specialFileFD.sync failed: %v", err)
if retErr == nil {
retErr = err
@@ -146,6 +130,7 @@ func putDentrySlice(ds *[]*dentry) {
// but dentry slices are allocated lazily, and it's much easier to say "defer
// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **[]*dentry) {
fs.renameMu.RUnlock()
if *dsp == nil {
@@ -158,6 +143,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **
putDentrySlice(*dsp)
}
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
if *ds == nil {
fs.renameMu.Unlock()
@@ -186,7 +172,7 @@ func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]
// Postconditions: The returned dentry's cached metadata is up to date.
func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, bool, error) {
if !d.isDir() {
- return nil, false, syserror.ENOTDIR
+ return nil, false, linuxerr.ENOTDIR
}
if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
return nil, false, err
@@ -244,18 +230,18 @@ afterSymlink:
// * dentry at name has been revalidated
func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
if len(name) > maxFilenameLen {
- return nil, syserror.ENAMETOOLONG
+ return nil, linuxerr.ENAMETOOLONG
}
if child, ok := parent.children[name]; ok || parent.isSynthetic() {
if child == nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
return child, nil
}
qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name)
if err != nil {
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
parent.cacheNegativeLookupLocked(name)
}
return nil, err
@@ -302,7 +288,7 @@ func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving
}
}
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return d, nil
}
@@ -330,7 +316,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath,
}
}
if rp.MustBeDir() && !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return d, nil
}
@@ -359,10 +345,10 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
}
name := rp.Component()
if name == "." || name == ".." {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
if parent.isDeleted() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, name, &ds); err != nil {
return err
@@ -372,20 +358,20 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
defer parent.dirMu.Unlock()
if len(name) > maxFilenameLen {
- return syserror.ENAMETOOLONG
+ return linuxerr.ENAMETOOLONG
}
// Check for existence only if caching information is available. Otherwise,
// don't check for existence just yet. We will check for existence if the
// checks for writability fail below. Existence check is done by the creation
// RPCs themselves.
if child, ok := parent.children[name]; ok && child != nil {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
checkExistence := func() error {
- if child, err := fs.getChildLocked(ctx, parent, name, &ds); err != nil && err != syserror.ENOENT {
+ if child, err := fs.getChildLocked(ctx, parent, name, &ds); err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
return err
} else if child != nil {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
return nil
}
@@ -408,11 +394,11 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
return err
}
if !dir && rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if parent.isSynthetic() {
if createInSyntheticDir == nil {
- return syserror.EPERM
+ return linuxerr.EPERM
}
if err := createInSyntheticDir(parent, name); err != nil {
return err
@@ -469,14 +455,14 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
name := rp.Component()
if dir {
if name == "." {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
if name == ".." {
- return syserror.ENOTEMPTY
+ return linuxerr.ENOTEMPTY
}
} else {
if name == "." || name == ".." {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
}
@@ -499,7 +485,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
child, ok = parent.children[name]
if ok && child == nil {
// Hit a negative cached entry, child doesn't exist.
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
} else {
child, _, err = fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
@@ -539,8 +525,8 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
if child.syntheticChildren != 0 {
// This is definitely not an empty directory, irrespective of
// fs.opts.interop.
- vfsObj.AbortDeleteDentry(&child.vfsd)
- return syserror.ENOTEMPTY
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: PrepareDeleteDentry called if child != nil.
+ return linuxerr.ENOTEMPTY
}
// If InteropModeShared is in effect and the first call to
// PrepareDeleteDentry above succeeded, then child wasn't
@@ -549,13 +535,13 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
// still exist) would be a waste of time.
if child.cachedMetadataAuthoritative() {
if !child.isDir() {
- vfsObj.AbortDeleteDentry(&child.vfsd)
- return syserror.ENOTDIR
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
+ return linuxerr.ENOTDIR
}
for _, grandchild := range child.children {
if grandchild != nil {
- vfsObj.AbortDeleteDentry(&child.vfsd)
- return syserror.ENOTEMPTY
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
+ return linuxerr.ENOTEMPTY
}
}
}
@@ -564,25 +550,25 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
} else {
// child must be a non-directory file.
if child != nil && child.isDir() {
- vfsObj.AbortDeleteDentry(&child.vfsd)
- return syserror.EISDIR
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
+ return linuxerr.EISDIR
}
if rp.MustBeDir() {
if child != nil {
- vfsObj.AbortDeleteDentry(&child.vfsd)
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
}
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
}
if parent.isSynthetic() {
if child == nil {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
} else if child == nil || !child.isSynthetic() {
err = parent.file.unlinkAt(ctx, name, flags)
if err != nil {
if child != nil {
- vfsObj.AbortDeleteDentry(&child.vfsd)
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
}
return err
}
@@ -600,7 +586,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
}
if child != nil {
- vfsObj.CommitDeleteDentry(ctx, &child.vfsd)
+ vfsObj.CommitDeleteDentry(ctx, &child.vfsd) // +checklocksforce: see above.
child.setDeleted()
if child.isSynthetic() {
parent.syntheticChildren--
@@ -642,7 +628,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
}
if opts.CheckSearchable {
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
@@ -674,11 +660,11 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa
func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, _ **[]*dentry) error {
if rp.Mount() != vd.Mount() {
- return syserror.EXDEV
+ return linuxerr.EXDEV
}
d := vd.Dentry().Impl().(*dentry)
if d.isDir() {
- return syserror.EPERM
+ return linuxerr.EPERM
}
gid := auth.KGID(atomic.LoadUint32(&d.gid))
uid := auth.KUID(atomic.LoadUint32(&d.uid))
@@ -687,10 +673,10 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return err
}
if d.nlink == 0 {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if d.nlink == math.MaxUint32 {
- return syserror.EMLINK
+ return linuxerr.EMLINK
}
if err := parent.file.link(ctx, d.file, childName); err != nil {
return err
@@ -715,7 +701,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
mode |= linux.S_ISGID
}
if _, err := parent.file.mkdir(ctx, name, p9.FileMode(mode), (p9.UID)(creds.EffectiveKUID), p9.GID(kgid)); err != nil {
- if !opts.ForSyntheticMountpoint || err == syserror.EEXIST {
+ if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) {
return err
}
ctx.Infof("Failed to create remote directory %q: %v; falling back to synthetic directory", name, err)
@@ -734,7 +720,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
}, func(parent *dentry, name string) error {
if !opts.ForSyntheticMountpoint {
// Can't create non-synthetic files in synthetic directories.
- return syserror.EPERM
+ return linuxerr.EPERM
}
parent.createSyntheticChildLocked(&createSyntheticOpts{
name: name,
@@ -752,7 +738,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) error {
creds := rp.Credentials()
_, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
- if err != syserror.EPERM {
+ if !linuxerr.Equals(linuxerr.EPERM, err) {
return err
}
@@ -764,8 +750,8 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
switch {
case err == nil:
// Step succeeded, another file exists.
- return syserror.EEXIST
- case err != syserror.ENOENT:
+ return linuxerr.EEXIST
+ case !linuxerr.Equals(linuxerr.ENOENT, err):
// Unexpected error.
return err
}
@@ -793,7 +779,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return nil
}
// Retain error from gofer if synthetic file cannot be created internally.
- return syserror.EPERM
+ return linuxerr.EPERM
}, nil)
}
@@ -804,7 +790,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
// support, and it isn't clear that there's any way to implement this in
// 9P.
if opts.Flags&linux.O_TMPFILE != 0 {
- return nil, syserror.EOPNOTSUPP
+ return nil, linuxerr.EOPNOTSUPP
}
mayCreate := opts.Flags&linux.O_CREAT != 0
mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL)
@@ -824,10 +810,10 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if rp.Done() {
// Reject attempts to open mount root directory with O_CREAT.
if mayCreate && rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if mustCreate {
- return nil, syserror.EEXIST
+ return nil, linuxerr.EEXIST
}
if !start.cachedMetadataAuthoritative() {
// Refresh dentry's attributes before opening.
@@ -854,7 +840,7 @@ afterTrailingSymlink:
}
// Reject attempts to open directories with O_CREAT.
if mayCreate && rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, rp.Component(), &ds); err != nil {
return nil, err
@@ -862,10 +848,10 @@ afterTrailingSymlink:
// Determine whether or not we need to create a file.
parent.dirMu.Lock()
child, _, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
- if err == syserror.ENOENT && mayCreate {
+ if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate {
if parent.isSynthetic() {
parent.dirMu.Unlock()
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts, &ds)
parent.dirMu.Unlock()
@@ -876,7 +862,7 @@ afterTrailingSymlink:
return nil, err
}
if mustCreate {
- return nil, syserror.EEXIST
+ return nil, linuxerr.EEXIST
}
// Open existing child or follow symlink.
if child.isSymlink() && rp.ShouldFollowSymlink() {
@@ -891,7 +877,7 @@ afterTrailingSymlink:
goto afterTrailingSymlink
}
if rp.MustBeDir() && !child.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
child.IncRef()
defer child.DecRef(ctx)
@@ -935,14 +921,14 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
case linux.S_IFDIR:
// Can't open directories with O_CREAT.
if opts.Flags&linux.O_CREAT != 0 {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
// Can't open directories writably.
if ats&vfs.MayWrite != 0 {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if opts.Flags&linux.O_DIRECT != 0 {
- return nil, syserror.EINVAL
+ return nil, linuxerr.EINVAL
}
if !d.isSynthetic() {
if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil {
@@ -962,10 +948,10 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
return &fd.vfsfd, nil
case linux.S_IFLNK:
// Can't open symlinks without O_PATH, which is handled at the VFS layer.
- return nil, syserror.ELOOP
+ return nil, linuxerr.ELOOP
case linux.S_IFSOCK:
if d.isSynthetic() {
- return nil, syserror.ENXIO
+ return nil, linuxerr.ENXIO
}
if d.fs.iopts.OpenSocketsByConnecting {
return d.openSocketByConnecting(ctx, opts)
@@ -998,7 +984,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
if opts.Flags&linux.O_DIRECT != 0 {
- return nil, syserror.EINVAL
+ return nil, linuxerr.EINVAL
}
fdObj, err := d.file.connect(ctx, p9.AnonymousSocket)
if err != nil {
@@ -1019,7 +1005,7 @@ func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptio
func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
ats := vfs.AccessTypesForOpenFlags(opts)
if opts.Flags&linux.O_DIRECT != 0 {
- return nil, syserror.EINVAL
+ return nil, linuxerr.EINVAL
}
// We assume that the server silently inserts O_NONBLOCK in the open flags
// for all named pipes (because all existing gofers do this).
@@ -1033,7 +1019,7 @@ func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs.
retry:
h, err := openHandle(ctx, d.file, ats.MayRead(), ats.MayWrite(), opts.Flags&linux.O_TRUNC != 0)
if err != nil {
- if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && err == syserror.ENXIO {
+ if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && linuxerr.Equals(linuxerr.ENXIO, err) {
// An attempt to open a named pipe with O_WRONLY|O_NONBLOCK fails
// with ENXIO if opening the same named pipe with O_WRONLY would
// block because there are no readers of the pipe.
@@ -1067,7 +1053,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
return nil, err
}
if d.isDeleted() {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
mnt := rp.Mount()
if err := mnt.CheckBeginWrite(); err != nil {
@@ -1187,7 +1173,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
return "", err
}
if !d.isSymlink() {
- return "", syserror.EINVAL
+ return "", linuxerr.EINVAL
}
return d.readlink(ctx, rp.Mount())
}
@@ -1204,24 +1190,24 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
if fs.opts.interop == InteropModeShared && opts.Flags&linux.RENAME_NOREPLACE != 0 {
// Requires 9P support to synchronize with other remote filesystem
// users.
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
newName := rp.Component()
if newName == "." || newName == ".." {
if opts.Flags&linux.RENAME_NOREPLACE != 0 {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
- return syserror.EBUSY
+ return linuxerr.EBUSY
}
mnt := rp.Mount()
if mnt != oldParentVD.Mount() {
- return syserror.EXDEV
+ return linuxerr.EXDEV
}
if err := mnt.CheckBeginWrite(); err != nil {
return err
@@ -1260,7 +1246,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
if renamed.isDir() {
if renamed == newParent || genericIsAncestorDentry(renamed, newParent) {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
if oldParent != newParent {
if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil {
@@ -1269,7 +1255,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
} else {
if opts.MustBeDir || rp.MustBeDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
}
@@ -1281,28 +1267,28 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
defer newParent.dirMu.Unlock()
}
if newParent.isDeleted() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
replaced, err := fs.getChildLocked(ctx, newParent, newName, &ds)
- if err != nil && err != syserror.ENOENT {
+ if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
return err
}
var replacedVFSD *vfs.Dentry
if replaced != nil {
if opts.Flags&linux.RENAME_NOREPLACE != 0 {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
replacedVFSD = &replaced.vfsd
if replaced.isDir() {
if !renamed.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if genericIsAncestorDentry(replaced, renamed) {
- return syserror.ENOTEMPTY
+ return linuxerr.ENOTEMPTY
}
} else {
if rp.MustBeDir() || renamed.isDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
}
}
@@ -1507,7 +1493,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
return d.endpoint, nil
}
}
- return nil, syserror.ECONNREFUSED
+ return nil, linuxerr.ECONNREFUSED
}
// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index cf69e1b7a..43440ec19 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -46,6 +46,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
@@ -61,7 +62,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/unet"
)
@@ -318,7 +318,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
mfp := pgalloc.MemoryFileProviderFromContext(ctx)
if mfp == nil {
ctx.Warningf("gofer.FilesystemType.GetFilesystem: context does not provide a pgalloc.MemoryFileProvider")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
mopts := vfs.GenericParseMountOptions(opts.Data)
@@ -354,7 +354,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fsopts.interop = InteropModeShared
default:
ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: %s=%s", moptCache, cache)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
}
@@ -365,7 +365,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
dfltuid, err := strconv.ParseUint(dfltuidstr, 10, 32)
if err != nil {
ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: %s=%s", moptDfltUID, dfltuidstr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
// In Linux, dfltuid is interpreted as a UID and is converted to a KUID
// in the caller's user namespace, but goferfs isn't
@@ -378,7 +378,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
dfltgid, err := strconv.ParseUint(dfltgidstr, 10, 32)
if err != nil {
ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: %s=%s", moptDfltGID, dfltgidstr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
fsopts.dfltgid = auth.KGID(dfltgid)
}
@@ -390,7 +390,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
msize, err := strconv.ParseUint(msizestr, 10, 32)
if err != nil {
ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: %s=%s", moptMsize, msizestr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
fsopts.msize = uint32(msize)
}
@@ -409,7 +409,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
maxCachedDentries, err := strconv.ParseUint(str, 10, 64)
if err != nil {
ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: %s=%s", moptDentryCacheLimit, str)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
fsopts.maxCachedDentries = maxCachedDentries
}
@@ -433,14 +433,14 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// Check for unparsed options.
if len(mopts) != 0 {
ctx.Warningf("gofer.FilesystemType.GetFilesystem: unknown options: %v", mopts)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
// Handle internal options.
iopts, ok := opts.InternalData.(InternalFilesystemOptions)
if opts.InternalData != nil && !ok {
ctx.Warningf("gofer.FilesystemType.GetFilesystem: GetFilesystemOptions.InternalData has type %T, wanted gofer.InternalFilesystemOptions", opts.InternalData)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
// If !ok, iopts being the zero value is correct.
@@ -503,7 +503,7 @@ func getFDFromMountOptionsMap(ctx context.Context, mopts map[string]string) (int
trans, ok := mopts[moptTransport]
if !ok || trans != transportModeFD {
ctx.Warningf("gofer.getFDFromMountOptionsMap: transport must be specified as '%s=%s'", moptTransport, transportModeFD)
- return -1, syserror.EINVAL
+ return -1, linuxerr.EINVAL
}
delete(mopts, moptTransport)
@@ -511,28 +511,28 @@ func getFDFromMountOptionsMap(ctx context.Context, mopts map[string]string) (int
rfdstr, ok := mopts[moptReadFD]
if !ok {
ctx.Warningf("gofer.getFDFromMountOptionsMap: read FD must be specified as '%s=<file descriptor>'", moptReadFD)
- return -1, syserror.EINVAL
+ return -1, linuxerr.EINVAL
}
delete(mopts, moptReadFD)
rfd, err := strconv.Atoi(rfdstr)
if err != nil {
ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid read FD: %s=%s", moptReadFD, rfdstr)
- return -1, syserror.EINVAL
+ return -1, linuxerr.EINVAL
}
wfdstr, ok := mopts[moptWriteFD]
if !ok {
ctx.Warningf("gofer.getFDFromMountOptionsMap: write FD must be specified as '%s=<file descriptor>'", moptWriteFD)
- return -1, syserror.EINVAL
+ return -1, linuxerr.EINVAL
}
delete(mopts, moptWriteFD)
wfd, err := strconv.Atoi(wfdstr)
if err != nil {
ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid write FD: %s=%s", moptWriteFD, wfdstr)
- return -1, syserror.EINVAL
+ return -1, linuxerr.EINVAL
}
if rfd != wfd {
ctx.Warningf("gofer.getFDFromMountOptionsMap: read FD (%d) and write FD (%d) must be equal", rfd, wfd)
- return -1, syserror.EINVAL
+ return -1, linuxerr.EINVAL
}
return rfd, nil
}
@@ -581,10 +581,10 @@ func (fs *filesystem) Release(ctx context.Context) {
d.dataMu.Unlock()
// Close host FDs if they exist.
if d.readFD >= 0 {
- unix.Close(int(d.readFD))
+ _ = unix.Close(int(d.readFD))
}
if d.writeFD >= 0 && d.readFD != d.writeFD {
- unix.Close(int(d.writeFD))
+ _ = unix.Close(int(d.writeFD))
}
d.readFD = -1
d.writeFD = -1
@@ -864,11 +864,11 @@ func dentryAttrMask() p9.AttrMask {
func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, mask p9.AttrMask, attr *p9.Attr) (*dentry, error) {
if !mask.Mode {
ctx.Warningf("can't create gofer.dentry without file type")
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
if attr.Mode.FileType() == p9.ModeRegular && !mask.Size {
ctx.Warningf("can't create regular file gofer.dentry without file size")
- return nil, syserror.EIO
+ return nil, linuxerr.EIO
}
d := &dentry{
@@ -946,10 +946,10 @@ func (d *dentry) cachedMetadataAuthoritative() bool {
// updateFromP9Attrs is called to update d's metadata after an update from the
// remote filesystem.
// Precondition: d.metadataMu must be locked.
+// +checklocks:d.metadataMu
func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) {
if mask.Mode {
if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want {
- d.metadataMu.Unlock()
panic(fmt.Sprintf("gofer.dentry file type changed from %#o to %#o", want, got))
}
atomic.StoreUint32(&d.mode, uint32(attr.Mode))
@@ -988,13 +988,14 @@ func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) {
// Preconditions: !d.isSynthetic().
// Preconditions: d.metadataMu is locked.
+// +checklocks:d.metadataMu
func (d *dentry) refreshSizeLocked(ctx context.Context) error {
d.handleMu.RLock()
if d.writeFD < 0 {
d.handleMu.RUnlock()
// Ask the gofer if we don't have a host FD.
- return d.updateFromGetattrLocked(ctx)
+ return d.updateFromGetattrLocked(ctx, p9file{})
}
var stat unix.Statx_t
@@ -1013,37 +1014,41 @@ func (d *dentry) updateFromGetattr(ctx context.Context) error {
// updating stale attributes in d.updateFromP9AttrsLocked().
d.metadataMu.Lock()
defer d.metadataMu.Unlock()
- return d.updateFromGetattrLocked(ctx)
+ return d.updateFromGetattrLocked(ctx, p9file{})
}
// Preconditions:
// * !d.isSynthetic().
// * d.metadataMu is locked.
-func (d *dentry) updateFromGetattrLocked(ctx context.Context) error {
- // Use d.readFile or d.writeFile, which represent 9P FIDs that have been
- // opened, in preference to d.file, which represents a 9P fid that has not.
- // This may be significantly more efficient in some implementations. Prefer
- // d.writeFile over d.readFile since some filesystem implementations may
- // update a writable handle's metadata after writes to that handle, without
- // making metadata updates immediately visible to read-only handles
- // representing the same file.
- d.handleMu.RLock()
- handleMuRLocked := true
- var file p9file
- switch {
- case !d.writeFile.isNil():
- file = d.writeFile
- case !d.readFile.isNil():
- file = d.readFile
- default:
- file = d.file
- d.handleMu.RUnlock()
- handleMuRLocked = false
+// +checklocks:d.metadataMu
+func (d *dentry) updateFromGetattrLocked(ctx context.Context, file p9file) error {
+ handleMuRLocked := false
+ if file.isNil() {
+ // Use d.readFile or d.writeFile, which represent 9P FIDs that have
+ // been opened, in preference to d.file, which represents a 9P fid that
+ // has not. This may be significantly more efficient in some
+ // implementations. Prefer d.writeFile over d.readFile since some
+ // filesystem implementations may update a writable handle's metadata
+ // after writes to that handle, without making metadata updates
+ // immediately visible to read-only handles representing the same file.
+ d.handleMu.RLock()
+ switch {
+ case !d.writeFile.isNil():
+ file = d.writeFile
+ handleMuRLocked = true
+ case !d.readFile.isNil():
+ file = d.readFile
+ handleMuRLocked = true
+ default:
+ file = d.file
+ d.handleMu.RUnlock()
+ }
}
_, attrMask, attr, err := file.getAttr(ctx, dentryAttrMask())
if handleMuRLocked {
- d.handleMu.RUnlock() // must be released before updateFromP9AttrsLocked()
+ // handleMu must be released before updateFromP9AttrsLocked().
+ d.handleMu.RUnlock() // +checklocksforce: complex case.
}
if err != nil {
return err
@@ -1090,7 +1095,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs
return nil
}
if stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_SIZE) != 0 {
- return syserror.EPERM
+ return linuxerr.EPERM
}
mode := linux.FileMode(atomic.LoadUint32(&d.mode))
if err := vfs.CheckSetStat(ctx, creds, opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
@@ -1108,9 +1113,9 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs
case linux.S_IFREG:
// ok
case linux.S_IFDIR:
- return syserror.EISDIR
+ return linuxerr.EISDIR
default:
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
}
@@ -1157,6 +1162,13 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs
if !d.isSynthetic() {
if stat.Mask != 0 {
+ if stat.Mask&linux.STATX_SIZE != 0 {
+ // d.dataMu must be held around the update to both the remote
+ // file's size and d.size to serialize with writeback (which
+ // might otherwise write data back up to the old d.size after
+ // the remote file has been truncated).
+ d.dataMu.Lock()
+ }
if err := d.file.setAttr(ctx, p9.SetAttrMask{
Permissions: stat.Mask&linux.STATX_MODE != 0,
UID: stat.Mask&linux.STATX_UID != 0,
@@ -1176,13 +1188,16 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs
MTimeSeconds: uint64(stat.Mtime.Sec),
MTimeNanoSeconds: uint64(stat.Mtime.Nsec),
}); err != nil {
+ if stat.Mask&linux.STATX_SIZE != 0 {
+ d.dataMu.Unlock() // +checklocksforce: locked conditionally above
+ }
return err
}
if stat.Mask&linux.STATX_SIZE != 0 {
// d.size should be kept up to date, and privatized
// copy-on-write mappings of truncated pages need to be
// invalidated, even if InteropModeShared is in effect.
- d.updateSizeLocked(stat.Size)
+ d.updateSizeAndUnlockDataMuLocked(stat.Size) // +checklocksforce: locked conditionally above
}
}
if d.fs.opts.interop == InteropModeShared {
@@ -1245,6 +1260,14 @@ func (d *dentry) doAllocate(ctx context.Context, offset, length uint64, allocate
// Preconditions: d.metadataMu must be locked.
func (d *dentry) updateSizeLocked(newSize uint64) {
d.dataMu.Lock()
+ d.updateSizeAndUnlockDataMuLocked(newSize)
+}
+
+// Preconditions: d.metadataMu and d.dataMu must be locked.
+//
+// Postconditions: d.dataMu is unlocked.
+// +checklocksrelease:d.dataMu
+func (d *dentry) updateSizeAndUnlockDataMuLocked(newSize uint64) {
oldSize := d.size
atomic.StoreUint64(&d.size, newSize)
// d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings
@@ -1253,9 +1276,9 @@ func (d *dentry) updateSizeLocked(newSize uint64) {
// contents beyond the new d.size. (We are still holding d.metadataMu,
// so we can't race with Write or another truncate.)
d.dataMu.Unlock()
- if d.size < oldSize {
+ if newSize < oldSize {
oldpgend, _ := hostarch.PageRoundUp(oldSize)
- newpgend, _ := hostarch.PageRoundUp(d.size)
+ newpgend, _ := hostarch.PageRoundUp(newSize)
if oldpgend != newpgend {
d.mapsMu.Lock()
d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
@@ -1271,8 +1294,8 @@ func (d *dentry) updateSizeLocked(newSize uint64) {
// truncated pages have been removed from the remote file, they
// should be dropped without being written back.
d.dataMu.Lock()
- d.cache.Truncate(d.size, d.fs.mfp.MemoryFile())
- d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend})
+ d.cache.Truncate(newSize, d.fs.mfp.MemoryFile())
+ d.dirty.KeepClean(memmap.MappableRange{newSize, oldpgend})
d.dataMu.Unlock()
}
}
@@ -1288,7 +1311,7 @@ func (d *dentry) checkXattrPermissions(creds *auth.Credentials, name string, ats
// to the remote filesystem. This is inconsistent with Linux's 9p client,
// but consistent with other filesystems (e.g. FUSE).
if strings.HasPrefix(name, linux.XATTR_SECURITY_PREFIX) || strings.HasPrefix(name, linux.XATTR_SYSTEM_PREFIX) {
- return syserror.EOPNOTSUPP
+ return linuxerr.EOPNOTSUPP
}
mode := linux.FileMode(atomic.LoadUint32(&d.mode))
kuid := auth.KUID(atomic.LoadUint32(&d.uid))
@@ -1469,7 +1492,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo
if d.isDeleted() {
d.watches.HandleDeletion(ctx)
}
- d.destroyLocked(ctx)
+ d.destroyLocked(ctx) // +checklocksforce: renameMu must be acquired at this point.
return
}
// If d still has inotify watches and it is not deleted or invalidated, it
@@ -1497,7 +1520,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo
delete(d.parent.children, d.name)
d.parent.dirMu.Unlock()
}
- d.destroyLocked(ctx)
+ d.destroyLocked(ctx) // +checklocksforce: see above.
return
}
@@ -1526,7 +1549,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo
d.fs.renameMu.Lock()
defer d.fs.renameMu.Unlock()
}
- d.fs.evictCachedDentryLocked(ctx)
+ d.fs.evictCachedDentryLocked(ctx) // +checklocksforce: see above.
}
}
@@ -1543,6 +1566,7 @@ func (d *dentry) removeFromCacheLocked() {
// Precondition: fs.renameMu must be locked for writing; it may be temporarily
// unlocked.
+// +checklocks:fs.renameMu
func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
for fs.cachedDentriesLen != 0 {
fs.evictCachedDentryLocked(ctx)
@@ -1551,6 +1575,7 @@ func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
// Preconditions:
// * fs.renameMu must be locked for writing; it may be temporarily unlocked.
+// +checklocks:fs.renameMu
func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
fs.cacheMu.Lock()
victim := fs.cachedDentries.Back()
@@ -1587,7 +1612,7 @@ func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
// will try to acquire fs.renameMu (which we have already acquired). Hence,
// fs.renameMu will synchronize the destroy attempts.
victim.cachingMu.Unlock()
- victim.destroyLocked(ctx)
+ victim.destroyLocked(ctx) // +checklocksforce: owned as precondition, victim.fs == fs.
}
// destroyLocked destroys the dentry.
@@ -1597,6 +1622,7 @@ func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
// * d.refs == 0.
// * d.parent.children[d.name] != d, i.e. d is not reachable by path traversal
// from its former parent dentry.
+// +checklocks:d.fs.renameMu
func (d *dentry) destroyLocked(ctx context.Context) {
switch atomic.LoadInt64(&d.refs) {
case 0:
@@ -1630,18 +1656,18 @@ func (d *dentry) destroyLocked(ctx context.Context) {
d.dataMu.Unlock()
// Clunk open fids and close open host FDs.
if !d.readFile.isNil() {
- d.readFile.close(ctx)
+ _ = d.readFile.close(ctx)
}
if !d.writeFile.isNil() && d.readFile != d.writeFile {
- d.writeFile.close(ctx)
+ _ = d.writeFile.close(ctx)
}
d.readFile = p9file{}
d.writeFile = p9file{}
if d.readFD >= 0 {
- unix.Close(int(d.readFD))
+ _ = unix.Close(int(d.readFD))
}
if d.writeFD >= 0 && d.readFD != d.writeFD {
- unix.Close(int(d.writeFD))
+ _ = unix.Close(int(d.writeFD))
}
d.readFD = -1
d.writeFD = -1
@@ -1703,7 +1729,7 @@ func (d *dentry) listXattr(ctx context.Context, creds *auth.Credentials, size ui
func (d *dentry) getXattr(ctx context.Context, creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) {
if d.file.isNil() {
- return "", syserror.ENODATA
+ return "", linuxerr.ENODATA
}
if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil {
return "", err
@@ -1713,7 +1739,7 @@ func (d *dentry) getXattr(ctx context.Context, creds *auth.Credentials, opts *vf
func (d *dentry) setXattr(ctx context.Context, creds *auth.Credentials, opts *vfs.SetXattrOptions) error {
if d.file.isNil() {
- return syserror.EPERM
+ return linuxerr.EPERM
}
if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil {
return err
@@ -1723,7 +1749,7 @@ func (d *dentry) setXattr(ctx context.Context, creds *auth.Credentials, opts *vf
func (d *dentry) removeXattr(ctx context.Context, creds *auth.Credentials, name string) error {
if d.file.isNil() {
- return syserror.EPERM
+ return linuxerr.EPERM
}
if err := d.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil {
return err
@@ -1763,7 +1789,7 @@ func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool
openReadable := !d.readFile.isNil() || read
openWritable := !d.writeFile.isNil() || write
h, err := openHandle(ctx, d.file, openReadable, openWritable, trunc)
- if err == syserror.EACCES && (openReadable != read || openWritable != write) {
+ if linuxerr.Equals(linuxerr.EACCES, err) && (openReadable != read || openWritable != write) {
// It may not be possible to use a single handle for both
// reading and writing, since permissions on the file may have
// changed to e.g. disallow reading after previously being
@@ -2020,9 +2046,17 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
d := fd.dentry()
const validMask = uint32(linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_SIZE | linux.STATX_BLOCKS | linux.STATX_BTIME)
if !d.cachedMetadataAuthoritative() && opts.Mask&validMask != 0 && opts.Sync != linux.AT_STATX_DONT_SYNC {
- // TODO(jamieliu): Use specialFileFD.handle.file for the getattr if
- // available?
- if err := d.updateFromGetattr(ctx); err != nil {
+ // Use specialFileFD.handle.file for the getattr if available, for the
+ // same reason that we try to use open file handles in
+ // dentry.updateFromGetattrLocked().
+ var file p9file
+ if sffd, ok := fd.vfsfd.Impl().(*specialFileFD); ok {
+ file = sffd.handle.file
+ }
+ d.metadataMu.Lock()
+ err := d.updateFromGetattrLocked(ctx, file)
+ d.metadataMu.Unlock()
+ if err != nil {
return linux.Statx{}, err
}
}
diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go
index 5c57f6fea..02540a754 100644
--- a/pkg/sentry/fsimpl/gofer/handle.go
+++ b/pkg/sentry/fsimpl/gofer/handle.go
@@ -20,6 +20,7 @@ import (
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
+ "gvisor.dev/gvisor/pkg/sync"
)
// handle represents a remote "open file descriptor", consisting of an opened
@@ -130,3 +131,43 @@ func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, o
}
return uint64(n), cperr
}
+
+type handleReadWriter struct {
+ ctx context.Context
+ h *handle
+ off uint64
+}
+
+var handleReadWriterPool = sync.Pool{
+ New: func() interface{} {
+ return &handleReadWriter{}
+ },
+}
+
+func getHandleReadWriter(ctx context.Context, h *handle, offset int64) *handleReadWriter {
+ rw := handleReadWriterPool.Get().(*handleReadWriter)
+ rw.ctx = ctx
+ rw.h = h
+ rw.off = uint64(offset)
+ return rw
+}
+
+func putHandleReadWriter(rw *handleReadWriter) {
+ rw.ctx = nil
+ rw.h = nil
+ handleReadWriterPool.Put(rw)
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks.
+func (rw *handleReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+ n, err := rw.h.readToBlocksAt(rw.ctx, dsts, rw.off)
+ rw.off += n
+ return n, err
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+func (rw *handleReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+ n, err := rw.h.writeFromBlocksAt(rw.ctx, srcs, rw.off)
+ rw.off += n
+ return n, err
+}
diff --git a/pkg/sentry/fsimpl/gofer/host_named_pipe.go b/pkg/sentry/fsimpl/gofer/host_named_pipe.go
index c7bf10007..505916a57 100644
--- a/pkg/sentry/fsimpl/gofer/host_named_pipe.go
+++ b/pkg/sentry/fsimpl/gofer/host_named_pipe.go
@@ -21,7 +21,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
)
// Global pipe used by blockUntilNonblockingPipeHasWriter since we can't create
@@ -78,7 +78,7 @@ func nonblockingPipeHasWriter(fd int32) (bool, error) {
defer tempPipeMu.Unlock()
// Copy 1 byte from fd into the temporary pipe.
n, err := unix.Tee(int(fd), tempPipeWriteFD, 1, unix.SPLICE_F_NONBLOCK)
- if err == syserror.EAGAIN {
+ if linuxerr.Equals(linuxerr.EAGAIN, err) {
// The pipe represented by fd is empty, but has a writer.
return true, nil
}
@@ -108,6 +108,6 @@ func sleepBetweenNamedPipeOpenChecks(ctx context.Context) error {
return nil
case <-cancel:
ctx.SleepFinish(false)
- return syserror.ErrInterrupted
+ return linuxerr.ErrInterrupted
}
}
diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go
index b0a429d42..5a3ddfc9d 100644
--- a/pkg/sentry/fsimpl/gofer/p9file.go
+++ b/pkg/sentry/fsimpl/gofer/p9file.go
@@ -16,9 +16,9 @@ package gofer
import (
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/p9"
- "gvisor.dev/gvisor/pkg/syserror"
)
// p9file is a wrapper around p9.File that provides methods that are
@@ -59,7 +59,7 @@ func (f p9file) walkGetAttrOne(ctx context.Context, name string) (p9.QID, p9file
if newfile != nil {
p9file{newfile}.close(ctx)
}
- return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, syserror.EIO
+ return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, linuxerr.EIO
}
return qids[0], p9file{newfile}, attrMask, attr, nil
}
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index eed05e369..947dbe05f 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/metric"
@@ -34,7 +35,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -79,17 +79,22 @@ func (fd *regularFileFD) OnClose(ctx context.Context) error {
if !fd.vfsfd.IsWritable() {
return nil
}
- // Skip flushing if there are client-buffered writes, since (as with the
- // VFS1 client) we don't flush buffered writes on close anyway.
d := fd.dentry()
- if d.fs.opts.interop != InteropModeExclusive {
- return nil
- }
- d.dataMu.RLock()
- haveDirtyPages := !d.dirty.IsEmpty()
- d.dataMu.RUnlock()
- if haveDirtyPages {
- return nil
+ if d.fs.opts.interop == InteropModeExclusive {
+ // d may have dirty pages that we won't write back now (and wouldn't
+ // have in VFS1), making a flushf RPC ineffective. If this is the case,
+ // skip the flushf.
+ //
+ // Note that it's also possible to have dirty pages under other interop
+ // modes if forcePageCache is in effect; we conservatively assume that
+ // applications have some way of tolerating this and still want the
+ // flushf.
+ d.dataMu.RLock()
+ haveDirtyPages := !d.dirty.IsEmpty()
+ d.dataMu.RUnlock()
+ if haveDirtyPages {
+ return nil
+ }
}
d.handleMu.RLock()
defer d.handleMu.RUnlock()
@@ -124,14 +129,14 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
}()
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, linuxerr.EOPNOTSUPP
}
// Check for reading at EOF before calling into MM (but not under
@@ -194,14 +199,14 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
// offset should be ignored by PWrite.
func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
if offset < 0 {
- return 0, offset, syserror.EINVAL
+ return 0, offset, linuxerr.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, offset, syserror.EOPNOTSUPP
+ return 0, offset, linuxerr.EOPNOTSUPP
}
d := fd.dentry()
@@ -297,7 +302,7 @@ func (fd *regularFileFD) writeCache(ctx context.Context, d *dentry, offset int64
pgstart := hostarch.PageRoundDown(uint64(offset))
pgend, ok := hostarch.PageRoundUp(uint64(offset + src.NumBytes()))
if !ok {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
mr := memmap.MappableRange{pgstart, pgend}
var freed []memmap.FileRange
@@ -652,20 +657,20 @@ func regularFileSeekLocked(ctx context.Context, d *dentry, fdOffset, offset int6
offset += size
case linux.SEEK_DATA:
if offset > size {
- return 0, syserror.ENXIO
+ return 0, linuxerr.ENXIO
}
// Use offset as specified.
case linux.SEEK_HOLE:
if offset > size {
- return 0, syserror.ENXIO
+ return 0, linuxerr.ENXIO
}
offset = size
}
default:
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
return offset, nil
}
@@ -678,28 +683,28 @@ func (fd *regularFileFD) Sync(ctx context.Context) error {
// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
d := fd.dentry()
- switch d.fs.opts.interop {
- case InteropModeExclusive:
- // Any mapping is fine.
- case InteropModeWritethrough:
- // Shared writable mappings require a host FD, since otherwise we can't
- // synchronously flush memory-mapped writes to the remote file.
- if opts.Private || !opts.MaxPerms.Write {
- break
- }
- fallthrough
- case InteropModeShared:
- // All mappings require a host FD to be coherent with other filesystem
- // users.
- if d.fs.opts.forcePageCache {
- // Whether or not we have a host FD, we're not allowed to use it.
- return syserror.ENODEV
- }
- if atomic.LoadInt32(&d.mmapFD) < 0 {
- return syserror.ENODEV
+ // Force sentry page caching at your own risk.
+ if !d.fs.opts.forcePageCache {
+ switch d.fs.opts.interop {
+ case InteropModeExclusive:
+ // Any mapping is fine.
+ case InteropModeWritethrough:
+ // Shared writable mappings require a host FD, since otherwise we
+ // can't synchronously flush memory-mapped writes to the remote
+ // file.
+ if opts.Private || !opts.MaxPerms.Write {
+ break
+ }
+ fallthrough
+ case InteropModeShared:
+ // All mappings require a host FD to be coherent with other
+ // filesystem users.
+ if atomic.LoadInt32(&d.mmapFD) < 0 {
+ return linuxerr.ENODEV
+ }
+ default:
+ panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop))
}
- default:
- panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop))
}
// After this point, d may be used as a memmap.Mappable.
d.pf.hostFileMapperInitOnce.Do(d.pf.hostFileMapper.Init)
@@ -707,14 +712,8 @@ func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpt
return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts)
}
-func (d *dentry) mayCachePages() bool {
- if d.fs.opts.interop == InteropModeShared {
- return false
- }
- if d.fs.opts.forcePageCache {
- return true
- }
- return atomic.LoadInt32(&d.mmapFD) >= 0
+func (fs *filesystem) mayCachePagesInMemoryFile() bool {
+ return fs.opts.forcePageCache || fs.opts.interop != InteropModeShared
}
// AddMapping implements memmap.Mappable.AddMapping.
@@ -726,7 +725,7 @@ func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar host
for _, r := range mapped {
d.pf.hostFileMapper.IncRefOn(r)
}
- if d.mayCachePages() {
+ if d.fs.mayCachePagesInMemoryFile() {
// d.Evict() will refuse to evict memory-mapped pages, so tell the
// MemoryFile to not bother trying.
mf := d.fs.mfp.MemoryFile()
@@ -745,7 +744,7 @@ func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar h
for _, r := range unmapped {
d.pf.hostFileMapper.DecRefOn(r)
}
- if d.mayCachePages() {
+ if d.fs.mayCachePagesInMemoryFile() {
// Pages that are no longer referenced by any application memory
// mappings are now considered unused; allow MemoryFile to evict them
// when necessary.
diff --git a/pkg/sentry/fsimpl/gofer/revalidate.go b/pkg/sentry/fsimpl/gofer/revalidate.go
index 8f81f0822..226790a11 100644
--- a/pkg/sentry/fsimpl/gofer/revalidate.go
+++ b/pkg/sentry/fsimpl/gofer/revalidate.go
@@ -247,16 +247,16 @@ func (fs *filesystem) revalidateHelper(ctx context.Context, vfsObj *vfs.VirtualF
if found && !d.isSynthetic() {
// First dentry is where the search is starting, just update attributes
// since it cannot be replaced.
- d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr)
+ d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) // +checklocksforce: acquired by lockAllMetadata.
}
- d.metadataMu.Unlock()
+ d.metadataMu.Unlock() // +checklocksforce: see above.
continue
}
// Note that synthetic dentries will always fails the comparison check
// below.
if !found || d.qidPath != stats[i].QID.Path {
- d.metadataMu.Unlock()
+ d.metadataMu.Unlock() // +checklocksforce: see above.
if !found && d.isSynthetic() {
// We have a synthetic file, and no remote file has arisen to replace
// it.
@@ -298,7 +298,7 @@ func (fs *filesystem) revalidateHelper(ctx context.Context, vfsObj *vfs.VirtualF
}
// The file at this path hasn't changed. Just update cached metadata.
- d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr)
+ d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) // +checklocksforce: see above.
d.metadataMu.Unlock()
}
@@ -354,6 +354,7 @@ func (r *revalidateState) add(name string, d *dentry) {
r.dentries = append(r.dentries, d)
}
+// +checklocksignore
func (r *revalidateState) lockAllMetadata() {
for _, d := range r.dentries {
d.metadataMu.Lock()
@@ -372,6 +373,7 @@ func (r *revalidateState) popFront() *dentry {
// reset releases all metadata locks and resets all fields to allow this
// instance to be reused.
+// +checklocksignore
func (r *revalidateState) reset() {
if r.locked {
// Unlock any remaining dentries.
diff --git a/pkg/sentry/fsimpl/gofer/save_restore.go b/pkg/sentry/fsimpl/gofer/save_restore.go
index 83e841a51..8dcbc61ed 100644
--- a/pkg/sentry/fsimpl/gofer/save_restore.go
+++ b/pkg/sentry/fsimpl/gofer/save_restore.go
@@ -21,13 +21,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/refsvfs2"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
type saveRestoreContextID int
@@ -92,7 +92,7 @@ func (fd *specialFileFD) savePipeData(ctx context.Context) error {
fd.buf = append(fd.buf, buf[:n]...)
}
if err != nil {
- if err == io.EOF || err == syserror.EAGAIN {
+ if err == io.EOF || linuxerr.Equals(linuxerr.EAGAIN, err) {
break
}
return err
@@ -158,6 +158,10 @@ func (d *dentryPlatformFile) afterLoad() {
// afterLoad is invoked by stateify.
func (fd *specialFileFD) afterLoad() {
fd.handle.fd = -1
+ if fd.hostFileMapper.IsInited() {
+ // Ensure that we don't call fd.hostFileMapper.Init() again.
+ fd.hostFileMapperInitOnce.Do(func() {})
+ }
}
// CompleteRestore implements
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index c12444b7e..a8d47b65b 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -20,14 +20,17 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/fsmetric"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -41,6 +44,11 @@ import (
type specialFileFD struct {
fileDescription
+ // releaseMu synchronizes the closing of fd.handle with fd.sync(). It's safe
+ // to access fd.handle without locking for operations that require a ref to
+ // be held by the caller, e.g. vfs.FileDescriptionImpl implementations.
+ releaseMu sync.RWMutex `state:"nosave"`
+
// handle is used for file I/O. handle is immutable.
handle handle `state:"nosave"`
@@ -70,6 +78,16 @@ type specialFileFD struct {
bufMu sync.Mutex `state:"nosave"`
haveBuf uint32
buf []byte
+
+ // If handle.fd >= 0, hostFileMapper caches mappings of handle.fd, and
+ // hostFileMapperInitOnce is used to initialize it on first use.
+ hostFileMapperInitOnce sync.Once `state:"nosave"`
+ hostFileMapper fsutil.HostFileMapper
+
+ // If handle.fd >= 0, fileRefs counts references on memmap.File offsets.
+ // fileRefs is protected by fileRefsMu.
+ fileRefsMu sync.Mutex `state:"nosave"`
+ fileRefs fsutil.FrameRefSet
}
func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) {
@@ -116,7 +134,10 @@ func (fd *specialFileFD) Release(ctx context.Context) {
if fd.haveQueue {
fdnotifier.RemoveFD(fd.handle.fd)
}
+ fd.releaseMu.Lock()
fd.handle.close(ctx)
+ fd.releaseMu.Unlock()
+
fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
fs.syncMu.Lock()
delete(fs.specialFileFDs, fd)
@@ -183,14 +204,14 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
}()
if fd.seekable && offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, linuxerr.EOPNOTSUPP
}
if d := fd.dentry(); d.cachedMetadataAuthoritative() {
@@ -221,23 +242,13 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
}
}
- // Going through dst.CopyOutFrom() would hold MM locks around file
- // operations of unknown duration. For regularFileFD, doing so is necessary
- // to support mmap due to lock ordering; MM locks precede dentry.dataMu.
- // That doesn't hold here since specialFileFD doesn't client-cache data.
- // Just buffer the read instead.
- buf := make([]byte, dst.NumBytes())
- n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
- if err == syserror.EAGAIN {
- err = syserror.ErrWouldBlock
- }
- if n == 0 {
- return bufN, err
+ rw := getHandleReadWriter(ctx, &fd.handle, offset)
+ n, err := dst.CopyOutFrom(ctx, rw)
+ putHandleReadWriter(rw)
+ if linuxerr.Equals(linuxerr.EAGAIN, err) {
+ err = linuxerr.ErrWouldBlock
}
- if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil {
- return bufN + int64(cp), cperr
- }
- return bufN + int64(n), err
+ return bufN + n, err
}
// Read implements vfs.FileDescriptionImpl.Read.
@@ -263,14 +274,14 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
// offset should be ignored by PWrite.
func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
if fd.seekable && offset < 0 {
- return 0, offset, syserror.EINVAL
+ return 0, offset, linuxerr.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, offset, syserror.EOPNOTSUPP
+ return 0, offset, linuxerr.EOPNOTSUPP
}
d := fd.dentry()
@@ -308,20 +319,15 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
}
}
- // Do a buffered write. See rationale in PRead.
- buf := make([]byte, src.NumBytes())
- copied, copyErr := src.CopyIn(ctx, buf)
- if copied == 0 && copyErr != nil {
- // Only return the error if we didn't get any data.
- return 0, offset, copyErr
- }
- n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:copied])), uint64(offset))
- if err == syserror.EAGAIN {
- err = syserror.ErrWouldBlock
+ rw := getHandleReadWriter(ctx, &fd.handle, offset)
+ n, err := src.CopyInTo(ctx, rw)
+ putHandleReadWriter(rw)
+ if linuxerr.Equals(linuxerr.EAGAIN, err) {
+ err = linuxerr.ErrWouldBlock
}
// Update offset if the offset is valid.
if offset >= 0 {
- offset += int64(n)
+ offset += n
}
// Update file size for regular files.
if fd.isRegularFile {
@@ -332,10 +338,7 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
atomic.StoreUint64(&d.size, uint64(offset))
}
}
- if err != nil {
- return int64(n), offset, err
- }
- return int64(n), offset, copyErr
+ return int64(n), offset, err
}
// Write implements vfs.FileDescriptionImpl.Write.
@@ -354,7 +357,7 @@ func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts
// Seek implements vfs.FileDescriptionImpl.Seek.
func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
if !fd.seekable {
- return 0, syserror.ESPIPE
+ return 0, linuxerr.ESPIPE
}
fd.mu.Lock()
defer fd.mu.Unlock()
@@ -372,6 +375,13 @@ func (fd *specialFileFD) Sync(ctx context.Context) error {
}
func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error {
+ // Locks to ensure it didn't race with fd.Release().
+ fd.releaseMu.RLock()
+ defer fd.releaseMu.RUnlock()
+
+ if !fd.handle.isOpen() {
+ return nil
+ }
err := func() error {
// If we have a host FD, fsyncing it is likely to be faster than an fsync
// RPC.
@@ -396,3 +406,85 @@ func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error
}
return nil
}
+
+// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+func (fd *specialFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+ if fd.handle.fd < 0 || fd.filesystem().opts.forcePageCache {
+ return linuxerr.ENODEV
+ }
+ // After this point, fd may be used as a memmap.Mappable and memmap.File.
+ fd.hostFileMapperInitOnce.Do(fd.hostFileMapper.Init)
+ return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (fd *specialFileFD) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
+ fd.hostFileMapper.IncRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
+ return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (fd *specialFileFD) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
+ fd.hostFileMapper.DecRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (fd *specialFileFD) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
+ return fd.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (fd *specialFileFD) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
+ mr := optional
+ if fd.filesystem().opts.limitHostFDTranslation {
+ mr = maxFillRange(required, optional)
+ }
+ return []memmap.Translation{
+ {
+ Source: mr,
+ File: fd,
+ Offset: mr.Start,
+ Perms: hostarch.AnyAccess,
+ },
+ }, nil
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (fd *specialFileFD) InvalidateUnsavable(ctx context.Context) error {
+ return nil
+}
+
+// IncRef implements memmap.File.IncRef.
+func (fd *specialFileFD) IncRef(fr memmap.FileRange) {
+ fd.fileRefsMu.Lock()
+ defer fd.fileRefsMu.Unlock()
+ fd.fileRefs.IncRefAndAccount(fr)
+}
+
+// DecRef implements memmap.File.DecRef.
+func (fd *specialFileFD) DecRef(fr memmap.FileRange) {
+ fd.fileRefsMu.Lock()
+ defer fd.fileRefsMu.Unlock()
+ fd.fileRefs.DecRefAndAccount(fr)
+}
+
+// MapInternal implements memmap.File.MapInternal.
+func (fd *specialFileFD) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
+ fd.requireHostFD()
+ return fd.hostFileMapper.MapInternal(fr, int(fd.handle.fd), at.Write)
+}
+
+// FD implements memmap.File.FD.
+func (fd *specialFileFD) FD() int {
+ fd.requireHostFD()
+ return int(fd.handle.fd)
+}
+
+func (fd *specialFileFD) requireHostFD() {
+ if fd.handle.fd < 0 {
+ // This is possible if fd was successfully mmapped before saving, then
+ // was restored without a host FD. This is unrecoverable: without a
+ // host FD, we can't mmap this file post-restore.
+ panic("gofer.specialFileFD can no longer be memory-mapped without a host FD")
+ }
+}
diff --git a/pkg/sentry/fsimpl/gofer/symlink.go b/pkg/sentry/fsimpl/gofer/symlink.go
index 2ec819f86..dbd834c67 100644
--- a/pkg/sentry/fsimpl/gofer/symlink.go
+++ b/pkg/sentry/fsimpl/gofer/symlink.go
@@ -41,7 +41,7 @@ func (d *dentry) readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
d.haveTarget = true
d.target = target
}
- d.dataMu.Unlock()
+ d.dataMu.Unlock() // +checklocksforce: guaranteed locked from above.
}
return target, err
}
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index b94dfeb7f..180a35583 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -45,10 +45,10 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fdnotifier",
"//pkg/fspath",
"//pkg/hostarch",
- "//pkg/iovec",
"//pkg/log",
"//pkg/marshal/primitive",
"//pkg/refs",
@@ -70,7 +70,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserr",
- "//pkg/syserror",
"//pkg/tcpip",
"//pkg/unet",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index a81f550b1..984c6e8ee 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -24,6 +24,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/hostarch"
@@ -36,11 +37,40 @@ import (
unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
+// These are the modes that are stored with virtualOwner.
+const virtualOwnerModes = linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID
+
+// +stateify savable
+type virtualOwner struct {
+ // This field is initialized at creation time and is immutable.
+ enabled bool
+
+ // mu protects the fields below and they can be accessed using atomic memory
+ // operations.
+ mu sync.Mutex `state:"nosave"`
+ uid uint32
+ gid uint32
+ // mode is also stored, otherwise setting the host file to `0000` could remove
+ // access to the file.
+ mode uint32
+}
+
+func (v *virtualOwner) atomicUID() uint32 {
+ return atomic.LoadUint32(&v.uid)
+}
+
+func (v *virtualOwner) atomicGID() uint32 {
+ return atomic.LoadUint32(&v.gid)
+}
+
+func (v *virtualOwner) atomicMode() uint32 {
+ return atomic.LoadUint32(&v.mode)
+}
+
// inode implements kernfs.Inode.
//
// +stateify savable
@@ -97,6 +127,11 @@ type inode struct {
// Event queue for blocking operations.
queue waiter.Queue
+ // virtualOwner caches ownership and permission information to override the
+ // underlying file owner and permission. This is used to allow the unstrusted
+ // application to change these fields without affecting the host.
+ virtualOwner virtualOwner
+
// If haveBuf is non-zero, hostFD represents a pipe, and buf contains data
// read from the pipe from previous calls to inode.beforeSave(). haveBuf
// and buf are protected by bufMu. haveBuf is accessed using atomic memory
@@ -109,12 +144,12 @@ type inode struct {
func newInode(ctx context.Context, fs *filesystem, hostFD int, savable bool, fileType linux.FileMode, isTTY bool) (*inode, error) {
// Determine if hostFD is seekable.
_, err := unix.Seek(hostFD, 0, linux.SEEK_CUR)
- seekable := err != syserror.ESPIPE
+ seekable := !linuxerr.Equals(linuxerr.ESPIPE, err)
// We expect regular files to be seekable, as this is required for them to
// be memory-mappable.
if !seekable && fileType == unix.S_IFREG {
ctx.Infof("host.newInode: host FD %d is a non-seekable regular file", hostFD)
- return nil, syserror.ESPIPE
+ return nil, linuxerr.ESPIPE
}
i := &inode{
@@ -146,7 +181,7 @@ func newInode(ctx context.Context, fs *filesystem, hostFD int, savable bool, fil
type NewFDOptions struct {
// If Savable is true, the host file descriptor may be saved/restored by
// numeric value; the sandbox API requires a corresponding host FD with the
- // same numeric value to be provieded at time of restore.
+ // same numeric value to be provided at time of restore.
Savable bool
// If IsTTY is true, the file descriptor is a TTY.
@@ -156,6 +191,12 @@ type NewFDOptions struct {
// the new file description will inherit flags from hostFD.
HaveFlags bool
Flags uint32
+
+ // VirtualOwner allow the host file to have owner and permissions different
+ // than the underlying host file.
+ VirtualOwner bool
+ UID auth.KUID
+ GID auth.KGID
}
// NewFD returns a vfs.FileDescription representing the given host file
@@ -167,8 +208,8 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
}
// Retrieve metadata.
- var s unix.Stat_t
- if err := unix.Fstat(hostFD, &s); err != nil {
+ var stat unix.Stat_t
+ if err := unix.Fstat(hostFD, &stat); err != nil {
return nil, err
}
@@ -182,11 +223,19 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
flags = uint32(flagsInt)
}
- d := &kernfs.Dentry{}
- i, err := newInode(ctx, fs, hostFD, opts.Savable, linux.FileMode(s.Mode).FileType(), opts.IsTTY)
+ fileType := linux.FileMode(stat.Mode).FileType()
+ i, err := newInode(ctx, fs, hostFD, opts.Savable, fileType, opts.IsTTY)
if err != nil {
return nil, err
}
+ if opts.VirtualOwner {
+ i.virtualOwner.enabled = true
+ i.virtualOwner.uid = uint32(opts.UID)
+ i.virtualOwner.gid = uint32(opts.GID)
+ i.virtualOwner.mode = stat.Mode
+ }
+
+ d := &kernfs.Dentry{}
d.Init(&fs.Filesystem, i)
// i.open will take a reference on d.
@@ -195,15 +244,7 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
// For simplicity, fileDescription.offset is set to 0. Technically, we
// should only set to 0 on files that are not seekable (sockets, pipes,
// etc.), and use the offset from the host fd otherwise when importing.
- return i.open(ctx, d, mnt, flags)
-}
-
-// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
-func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
- return NewFD(ctx, mnt, hostFD, &NewFDOptions{
- Savable: true,
- IsTTY: isTTY,
- })
+ return i.open(ctx, d, mnt, fileType, flags)
}
// filesystemType implements vfs.FilesystemType.
@@ -269,7 +310,7 @@ func (fs *filesystem) MountOptions() string {
// CheckPermissions implements kernfs.Inode.CheckPermissions.
func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
var s unix.Stat_t
- if err := unix.Fstat(i.hostFD, &s); err != nil {
+ if err := i.stat(&s); err != nil {
return err
}
return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid))
@@ -278,7 +319,7 @@ func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, a
// Mode implements kernfs.Inode.Mode.
func (i *inode) Mode() linux.FileMode {
var s unix.Stat_t
- if err := unix.Fstat(i.hostFD, &s); err != nil {
+ if err := i.stat(&s); err != nil {
// Retrieving the mode from the host fd using fstat(2) should not fail.
// If the syscall does not succeed, something is fundamentally wrong.
panic(fmt.Sprintf("failed to retrieve mode from host fd %d: %v", i.hostFD, err))
@@ -289,10 +330,10 @@ func (i *inode) Mode() linux.FileMode {
// Stat implements kernfs.Inode.Stat.
func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
if opts.Mask&linux.STATX__RESERVED != 0 {
- return linux.Statx{}, syserror.EINVAL
+ return linux.Statx{}, linuxerr.EINVAL
}
if opts.Sync&linux.AT_STATX_SYNC_TYPE == linux.AT_STATX_SYNC_TYPE {
- return linux.Statx{}, syserror.EINVAL
+ return linux.Statx{}, linuxerr.EINVAL
}
fs := vfsfs.Impl().(*filesystem)
@@ -301,11 +342,11 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
mask := opts.Mask & linux.STATX_ALL
var s unix.Statx_t
err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(mask), &s)
- if err == syserror.ENOSYS {
+ if linuxerr.Equals(linuxerr.ENOSYS, err) {
// Fallback to fstat(2), if statx(2) is not supported on the host.
//
// TODO(b/151263641): Remove fallback.
- return i.fstat(fs)
+ return i.statxFromStat(fs)
}
if err != nil {
return linux.Statx{}, err
@@ -329,19 +370,35 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
// device numbers.
ls.Mask |= s.Mask & linux.STATX_ALL
if s.Mask&linux.STATX_TYPE != 0 {
- ls.Mode |= s.Mode & linux.S_IFMT
+ if i.virtualOwner.enabled {
+ ls.Mode |= uint16(i.virtualOwner.atomicMode()) & linux.S_IFMT
+ } else {
+ ls.Mode |= s.Mode & linux.S_IFMT
+ }
}
if s.Mask&linux.STATX_MODE != 0 {
- ls.Mode |= s.Mode &^ linux.S_IFMT
+ if i.virtualOwner.enabled {
+ ls.Mode |= uint16(i.virtualOwner.atomicMode()) &^ linux.S_IFMT
+ } else {
+ ls.Mode |= s.Mode &^ linux.S_IFMT
+ }
}
if s.Mask&linux.STATX_NLINK != 0 {
ls.Nlink = s.Nlink
}
if s.Mask&linux.STATX_UID != 0 {
- ls.UID = s.Uid
+ if i.virtualOwner.enabled {
+ ls.UID = i.virtualOwner.atomicUID()
+ } else {
+ ls.UID = s.Uid
+ }
}
if s.Mask&linux.STATX_GID != 0 {
- ls.GID = s.Gid
+ if i.virtualOwner.enabled {
+ ls.GID = i.virtualOwner.atomicGID()
+ } else {
+ ls.GID = s.Gid
+ }
}
if s.Mask&linux.STATX_ATIME != 0 {
ls.Atime = unixToLinuxStatxTimestamp(s.Atime)
@@ -365,7 +422,7 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
return ls, nil
}
-// fstat is a best-effort fallback for inode.Stat() if the host does not
+// statxFromStat is a best-effort fallback for inode.Stat() if the host does not
// support statx(2).
//
// We ignore the mask and sync flags in opts and simply supply
@@ -373,9 +430,9 @@ func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOp
// of a mask or sync flags. fstat(2) does not provide any metadata
// equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so
// those fields remain empty.
-func (i *inode) fstat(fs *filesystem) (linux.Statx, error) {
+func (i *inode) statxFromStat(fs *filesystem) (linux.Statx, error) {
var s unix.Stat_t
- if err := unix.Fstat(i.hostFD, &s); err != nil {
+ if err := i.stat(&s); err != nil {
return linux.Statx{}, err
}
@@ -399,7 +456,21 @@ func (i *inode) fstat(fs *filesystem) (linux.Statx, error) {
}, nil
}
+func (i *inode) stat(stat *unix.Stat_t) error {
+ if err := unix.Fstat(i.hostFD, stat); err != nil {
+ return err
+ }
+ if i.virtualOwner.enabled {
+ stat.Uid = i.virtualOwner.atomicUID()
+ stat.Gid = i.virtualOwner.atomicGID()
+ stat.Mode = i.virtualOwner.atomicMode()
+ }
+ return nil
+}
+
// SetStat implements kernfs.Inode.SetStat.
+//
+// +checklocksignore
func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
s := &opts.Stat
@@ -407,11 +478,22 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
if m == 0 {
return nil
}
- if m&^(linux.STATX_MODE|linux.STATX_SIZE|linux.STATX_ATIME|linux.STATX_MTIME) != 0 {
- return syserror.EPERM
+ supportedModes := uint32(linux.STATX_MODE | linux.STATX_SIZE | linux.STATX_ATIME | linux.STATX_MTIME)
+ if i.virtualOwner.enabled {
+ if m&virtualOwnerModes != 0 {
+ // Take lock if any of the virtual owner fields will be updated.
+ i.virtualOwner.mu.Lock()
+ defer i.virtualOwner.mu.Unlock()
+ }
+
+ supportedModes |= virtualOwnerModes
}
+ if m&^supportedModes != 0 {
+ return linuxerr.EPERM
+ }
+
var hostStat unix.Stat_t
- if err := unix.Fstat(i.hostFD, &hostStat); err != nil {
+ if err := i.stat(&hostStat); err != nil {
return err
}
if err := vfs.CheckSetStat(ctx, creds, &opts, linux.FileMode(hostStat.Mode), auth.KUID(hostStat.Uid), auth.KGID(hostStat.Gid)); err != nil {
@@ -419,13 +501,17 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
}
if m&linux.STATX_MODE != 0 {
- if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil {
- return err
+ if i.virtualOwner.enabled {
+ i.virtualOwner.mode = uint32(opts.Stat.Mode)
+ } else {
+ if err := unix.Fchmod(i.hostFD, uint32(s.Mode)); err != nil {
+ return err
+ }
}
}
if m&linux.STATX_SIZE != 0 {
if hostStat.Mode&linux.S_IFMT != linux.S_IFREG {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
if err := unix.Ftruncate(i.hostFD, int64(s.Size)); err != nil {
return err
@@ -448,6 +534,14 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
return err
}
}
+ if i.virtualOwner.enabled {
+ if m&linux.STATX_UID != 0 {
+ i.virtualOwner.uid = opts.Stat.UID
+ }
+ if m&linux.STATX_GID != 0 {
+ i.virtualOwner.gid = opts.Stat.GID
+ }
+ }
return nil
}
@@ -470,18 +564,17 @@ func (i *inode) DecRef(ctx context.Context) {
func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
// Once created, we cannot re-open a socket fd through /proc/[pid]/fd/.
if i.Mode().FileType() == linux.S_IFSOCK {
- return nil, syserror.ENXIO
+ return nil, linuxerr.ENXIO
}
- return i.open(ctx, d, rp.Mount(), opts.Flags)
-}
-
-func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flags uint32) (*vfs.FileDescription, error) {
- var s unix.Stat_t
- if err := unix.Fstat(i.hostFD, &s); err != nil {
+ var stat unix.Stat_t
+ if err := i.stat(&stat); err != nil {
return nil, err
}
- fileType := s.Mode & linux.FileTypeMask
+ fileType := linux.FileMode(stat.Mode).FileType()
+ return i.open(ctx, d, rp.Mount(), fileType, opts.Flags)
+}
+func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, fileType linux.FileMode, flags uint32) (*vfs.FileDescription, error) {
// Constrain flags to a subset we can handle.
//
// TODO(gvisor.dev/issue/2601): Support O_NONBLOCK by adding RWF_NOWAIT to pread/pwrite calls.
@@ -491,7 +584,7 @@ func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flag
case unix.S_IFSOCK:
if i.isTTY {
log.Warningf("cannot use host socket fd %d as TTY", i.hostFD)
- return nil, syserror.ENOTTY
+ return nil, linuxerr.ENOTTY
}
ep, err := newEndpoint(ctx, i.hostFD, &i.queue)
@@ -529,7 +622,7 @@ func (i *inode) open(ctx context.Context, d *kernfs.Dentry, mnt *vfs.Mount, flag
default:
log.Warningf("cannot import host fd %d with file type %o", i.hostFD, fileType)
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
}
@@ -584,12 +677,12 @@ func (f *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, off
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, linuxerr.EOPNOTSUPP
}
i := f.inode
if !i.seekable {
- return 0, syserror.ESPIPE
+ return 0, linuxerr.ESPIPE
}
return readFromHostFD(ctx, i.hostFD, dst, offset, opts.Flags)
@@ -601,7 +694,7 @@ func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, linuxerr.EOPNOTSUPP
}
i := f.inode
@@ -618,7 +711,7 @@ func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts
if total != 0 {
err = nil
} else {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
}
return total, err
@@ -660,7 +753,7 @@ func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, off
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (f *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
if !f.inode.seekable {
- return 0, syserror.ESPIPE
+ return 0, linuxerr.ESPIPE
}
return f.writeToHostFD(ctx, src, offset, opts.Flags)
@@ -672,7 +765,7 @@ func (f *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opt
if !i.seekable {
n, err := f.writeToHostFD(ctx, src, -1, opts.Flags)
if isBlockError(err) {
- err = syserror.ErrWouldBlock
+ err = linuxerr.ErrWouldBlock
}
return n, err
}
@@ -700,7 +793,7 @@ func (f *fileDescription) writeToHostFD(ctx context.Context, src usermem.IOSeque
hostFD := f.inode.hostFD
// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
if flags != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, linuxerr.EOPNOTSUPP
}
writer := hostfd.GetReadWriterAt(int32(hostFD), offset, flags)
n, err := src.CopyInTo(ctx, writer)
@@ -721,7 +814,7 @@ func (f *fileDescription) writeToHostFD(ctx context.Context, src usermem.IOSeque
func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (int64, error) {
i := f.inode
if !i.seekable {
- return 0, syserror.ESPIPE
+ return 0, linuxerr.ESPIPE
}
f.offsetMu.Lock()
@@ -730,17 +823,17 @@ func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (i
switch whence {
case linux.SEEK_SET:
if offset < 0 {
- return f.offset, syserror.EINVAL
+ return f.offset, linuxerr.EINVAL
}
f.offset = offset
case linux.SEEK_CUR:
// Check for overflow. Note that underflow cannot occur, since f.offset >= 0.
if offset > math.MaxInt64-f.offset {
- return f.offset, syserror.EOVERFLOW
+ return f.offset, linuxerr.EOVERFLOW
}
if f.offset+offset < 0 {
- return f.offset, syserror.EINVAL
+ return f.offset, linuxerr.EINVAL
}
f.offset += offset
@@ -753,10 +846,10 @@ func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (i
// Check for overflow. Note that underflow cannot occur, since size >= 0.
if offset > math.MaxInt64-size {
- return f.offset, syserror.EOVERFLOW
+ return f.offset, linuxerr.EOVERFLOW
}
if size+offset < 0 {
- return f.offset, syserror.EINVAL
+ return f.offset, linuxerr.EINVAL
}
f.offset = size + offset
@@ -773,7 +866,7 @@ func (f *fileDescription) Seek(_ context.Context, offset int64, whence int32) (i
default:
// Invalid whence.
- return f.offset, syserror.EINVAL
+ return f.offset, linuxerr.EINVAL
}
return f.offset, nil
@@ -790,7 +883,7 @@ func (f *fileDescription) ConfigureMMap(_ context.Context, opts *memmap.MMapOpts
// NOTE(b/38213152): Technically, some obscure char devices can be memory
// mapped, but we only allow regular files.
if f.inode.ftype != unix.S_IFREG {
- return syserror.ENODEV
+ return linuxerr.ENODEV
}
i := f.inode
i.CachedMappable.InitFileMapperOnce()
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
index ca85f5601..709d5747d 100644
--- a/pkg/sentry/fsimpl/host/socket.go
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -21,6 +21,7 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/socket/control"
@@ -28,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/uniqueid"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/unet"
"gvisor.dev/gvisor/pkg/waiter"
@@ -158,9 +158,9 @@ func (c *ConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMess
if n < totalLen && err == nil {
// The host only returns a short write if it would otherwise
// block (and only for stream sockets).
- err = syserror.EAGAIN
+ err = linuxerr.EAGAIN
}
- if n > 0 && err != syserror.EAGAIN {
+ if n > 0 && !linuxerr.Equals(linuxerr.EAGAIN, err) {
// The caller may need to block to send more data, but
// otherwise there isn't anything that can be done about an
// error with a partial write.
diff --git a/pkg/sentry/fsimpl/host/socket_iovec.go b/pkg/sentry/fsimpl/host/socket_iovec.go
index b123a63ee..292b44c43 100644
--- a/pkg/sentry/fsimpl/host/socket_iovec.go
+++ b/pkg/sentry/fsimpl/host/socket_iovec.go
@@ -16,8 +16,8 @@ package host
import (
"golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/iovec"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
+ "gvisor.dev/gvisor/pkg/sentry/hostfd"
)
// copyToMulti copies as many bytes from src to dst as possible.
@@ -64,13 +64,13 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec
if length > maxlen {
if truncate {
stopLen = maxlen
- err = syserror.EAGAIN
+ err = linuxerr.EAGAIN
} else {
- return 0, nil, nil, syserror.EMSGSIZE
+ return 0, nil, nil, linuxerr.EMSGSIZE
}
}
- if iovsRequired > iovec.MaxIovs {
+ if iovsRequired > hostfd.MaxSendRecvMsgIov {
// The kernel will reject our call if we pass this many iovs.
// Use a single intermediate buffer instead.
b := make([]byte, stopLen)
diff --git a/pkg/sentry/fsimpl/host/tty.go b/pkg/sentry/fsimpl/host/tty.go
index 0f9e20a84..04ac73255 100644
--- a/pkg/sentry/fsimpl/host/tty.go
+++ b/pkg/sentry/fsimpl/host/tty.go
@@ -17,13 +17,13 @@ package host
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/unimpl"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -147,7 +147,7 @@ func (t *TTYFileDescription) Write(ctx context.Context, src usermem.IOSequence,
func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
task := kernel.TaskFromContext(ctx)
if task == nil {
- return 0, syserror.ENOTTY
+ return 0, linuxerr.ENOTTY
}
// Ignore arg[0]. This is the real FD:
@@ -188,7 +188,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
pidns := kernel.PIDNamespaceFromContext(ctx)
if pidns == nil {
- return 0, syserror.ENOTTY
+ return 0, linuxerr.ENOTTY
}
t.mu.Lock()
@@ -211,15 +211,15 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
if err := t.checkChange(ctx, linux.SIGTTOU); err != nil {
// drivers/tty/tty_io.c:tiocspgrp() converts -EIO from tty_check_change()
// to -ENOTTY.
- if err == syserror.EIO {
- return 0, syserror.ENOTTY
+ if linuxerr.Equals(linuxerr.EIO, err) {
+ return 0, linuxerr.ENOTTY
}
return 0, err
}
// Check that calling task's process group is in the TTY session.
if task.ThreadGroup().Session() != t.session {
- return 0, syserror.ENOTTY
+ return 0, linuxerr.ENOTTY
}
var pgIDP primitive.Int32
@@ -230,19 +230,19 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
// pgID must be non-negative.
if pgID < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// Process group with pgID must exist in this PID namespace.
pidns := task.PIDNamespace()
pg := pidns.ProcessGroupWithID(pgID)
if pg == nil {
- return 0, syserror.ESRCH
+ return 0, linuxerr.ESRCH
}
// Check that new process group is in the TTY session.
if pg.Session() != t.session {
- return 0, syserror.EPERM
+ return 0, linuxerr.EPERM
}
t.fgProcessGroup = pg
@@ -302,7 +302,7 @@ func (t *TTYFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch
unimpl.EmitUnimplementedEvent(ctx)
fallthrough
default:
- return 0, syserror.ENOTTY
+ return 0, linuxerr.ENOTTY
}
}
@@ -345,7 +345,7 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
// If the signal is SIGTTIN, then we are attempting to read
// from the TTY. Don't send the signal and return EIO.
if sig == linux.SIGTTIN {
- return syserror.EIO
+ return linuxerr.EIO
}
// Otherwise, we are writing or changing terminal state. This is allowed.
@@ -354,7 +354,7 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
// If the process group is an orphan, return EIO.
if pg.IsOrphan() {
- return syserror.EIO
+ return linuxerr.EIO
}
// Otherwise, send the signal to the process group and return ERESTARTSYS.
@@ -367,5 +367,5 @@ func (t *TTYFileDescription) checkChange(ctx context.Context, sig linux.Signal)
//
// Linux ignores the result of kill_pgrp().
_ = pg.SendSignal(kernel.SignalInfoPriv(sig))
- return syserror.ERESTARTSYS
+ return linuxerr.ERESTARTSYS
}
diff --git a/pkg/sentry/fsimpl/host/util.go b/pkg/sentry/fsimpl/host/util.go
index 63b465859..9850f3f41 100644
--- a/pkg/sentry/fsimpl/host/util.go
+++ b/pkg/sentry/fsimpl/host/util.go
@@ -17,7 +17,7 @@ package host
import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
)
func toTimespec(ts linux.StatxTimestamp, omit bool) unix.Timespec {
@@ -42,7 +42,7 @@ func timespecToStatxTimestamp(ts unix.Timespec) linux.StatxTimestamp {
}
// isBlockError checks if an error is EAGAIN or EWOULDBLOCK.
-// If so, they can be transformed into syserror.ErrWouldBlock.
+// If so, they can be transformed into linuxerr.ErrWouldBlock.
func isBlockError(err error) bool {
- return err == syserror.EAGAIN || err == syserror.EWOULDBLOCK
+ return linuxerr.Equals(linuxerr.EAGAIN, err) || linuxerr.Equals(linuxerr.EWOULDBLOCK, err)
}
diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD
index b7d13cced..4b577ea43 100644
--- a/pkg/sentry/fsimpl/kernfs/BUILD
+++ b/pkg/sentry/fsimpl/kernfs/BUILD
@@ -104,6 +104,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/hostarch",
"//pkg/log",
@@ -118,7 +119,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
@@ -135,6 +135,8 @@ go_test(
":kernfs",
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
+ "//pkg/fspath",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
@@ -142,7 +144,6 @@ go_test(
"//pkg/sentry/fsimpl/testutil",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
"@com_github_google_go_cmp//cmp:go_default_library",
],
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index 84b1c3745..9d7526e47 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -19,9 +19,9 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -71,7 +71,7 @@ func (f *DynamicBytesFile) Open(ctx context.Context, rp *vfs.ResolvingPath, d *D
// inode attributes to be changed. Override SetStat() making it call
// f.InodeAttrs to allow it.
func (*DynamicBytesFile) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// DynamicBytesFD implements vfs.FileDescriptionImpl for an FD backed by a
@@ -137,5 +137,5 @@ func (fd *DynamicBytesFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux
// SetStat implements vfs.FileDescriptionImpl.SetStat.
func (fd *DynamicBytesFD) SetStat(context.Context, vfs.SetStatOptions) error {
// DynamicBytesFiles are immutable.
- return syserror.EPERM
+ return linuxerr.EPERM
}
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index e55111af0..7db1473c4 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -19,11 +19,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -98,7 +98,7 @@ func NewGenericDirectoryFD(m *vfs.Mount, d *Dentry, children *OrderedChildren, l
func (fd *GenericDirectoryFD) Init(children *OrderedChildren, locks *vfs.FileLocks, opts *vfs.OpenOptions, fdOpts GenericDirectoryFDOptions) error {
if vfs.AccessTypesForOpenFlags(opts)&vfs.MayWrite != 0 {
// Can't open directories for writing.
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
fd.LockFD.Init(locks)
fd.seekEnd = fdOpts.SeekEnd
@@ -248,10 +248,10 @@ func (fd *GenericDirectoryFD) Seek(ctx context.Context, offset int64, whence int
panic(fmt.Sprintf("Invalid GenericDirectoryFD.seekEnd = %v", fd.seekEnd))
}
default:
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
fd.off = offset
return offset, nil
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 8fac53c60..363ebc466 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -21,11 +21,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// stepExistingLocked resolves rp.Component() in parent directory vfsd.
@@ -39,7 +39,7 @@ import (
// Postcondition: Caller must call fs.processDeferredDecRefs*.
func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, mayFollowSymlinks bool) (*Dentry, error) {
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
// Directory searchable?
if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
@@ -70,7 +70,7 @@ afterSymlink:
return d.parent, nil
}
if len(name) > linux.NAME_MAX {
- return nil, syserror.ENAMETOOLONG
+ return nil, linuxerr.ENAMETOOLONG
}
d.dirMu.Lock()
next, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, d.children[name])
@@ -169,7 +169,7 @@ func (fs *Filesystem) walkExistingLocked(ctx context.Context, rp *vfs.ResolvingP
}
}
if rp.MustBeDir() && !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return d, nil
}
@@ -196,7 +196,7 @@ func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving
}
}
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return d, nil
}
@@ -214,16 +214,16 @@ func checkCreateLocked(ctx context.Context, creds *auth.Credentials, name string
return err
}
if name == "." || name == ".." {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
if len(name) > linux.NAME_MAX {
- return syserror.ENAMETOOLONG
+ return linuxerr.ENAMETOOLONG
}
if _, ok := parent.children[name]; ok {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
if parent.VFSDentry().IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parent.inode.CheckPermissions(ctx, creds, vfs.MayWrite); err != nil {
return err
@@ -237,10 +237,10 @@ func checkCreateLocked(ctx context.Context, creds *auth.Credentials, name string
func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) error {
parent := d.parent
if parent == nil {
- return syserror.EBUSY
+ return linuxerr.EBUSY
}
if parent.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
return err
@@ -317,7 +317,7 @@ func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
if opts.CheckSearchable {
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
@@ -344,7 +344,7 @@ func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa
// LinkAt implements vfs.FilesystemImpl.LinkAt.
func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
if rp.Done() {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
fs.mu.Lock()
defer fs.processDeferredDecRefs(ctx)
@@ -361,10 +361,10 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return err
}
if rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if rp.Mount() != vd.Mount() {
- return syserror.EXDEV
+ return linuxerr.EXDEV
}
if err := rp.Mount().CheckBeginWrite(); err != nil {
return err
@@ -373,7 +373,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
d := vd.Dentry().Impl().(*Dentry)
if d.isDir() {
- return syserror.EPERM
+ return linuxerr.EPERM
}
childI, err := parent.inode.NewLink(ctx, pc, d.inode)
@@ -389,7 +389,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
if rp.Done() {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
fs.mu.Lock()
defer fs.processDeferredDecRefs(ctx)
@@ -411,7 +411,7 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
defer rp.Mount().EndWrite()
childI, err := parent.inode.NewDir(ctx, pc, opts)
if err != nil {
- if !opts.ForSyntheticMountpoint || err == syserror.EEXIST {
+ if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) {
return err
}
childI = newSyntheticDirectory(ctx, rp.Credentials(), opts.Mode)
@@ -425,7 +425,7 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
// MknodAt implements vfs.FilesystemImpl.MknodAt.
func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
if rp.Done() {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
fs.mu.Lock()
defer fs.processDeferredDecRefs(ctx)
@@ -442,7 +442,7 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
if rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := rp.Mount().CheckBeginWrite(); err != nil {
return err
@@ -508,10 +508,10 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
defer unlock()
if rp.Done() {
if rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if mustCreate {
- return nil, syserror.EEXIST
+ return nil, linuxerr.EEXIST
}
if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
@@ -535,18 +535,18 @@ afterTrailingSymlink:
}
// Reject attempts to open directories with O_CREAT.
if rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
pc := rp.Component()
if pc == "." || pc == ".." {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if len(pc) > linux.NAME_MAX {
- return nil, syserror.ENAMETOOLONG
+ return nil, linuxerr.ENAMETOOLONG
}
// Determine whether or not we need to create a file.
child, err := fs.stepExistingLocked(ctx, rp, parent, false /* mayFollowSymlinks */)
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
// Already checked for searchability above; now check for writability.
if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil {
return nil, err
@@ -576,7 +576,7 @@ afterTrailingSymlink:
}
// Open existing file or follow symlink.
if mustCreate {
- return nil, syserror.EEXIST
+ return nil, linuxerr.EEXIST
}
if rp.ShouldFollowSymlink() && child.isSymlink() {
targetVD, targetPathname, err := child.inode.Getlink(ctx, rp.Mount())
@@ -622,7 +622,7 @@ func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
}
if !d.isSymlink() {
fs.mu.RUnlock()
- return "", syserror.EINVAL
+ return "", linuxerr.EINVAL
}
// Inode.Readlink() cannot be called holding fs locks.
@@ -648,13 +648,13 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
// Only RENAME_NOREPLACE is supported.
if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0
mnt := rp.Mount()
if mnt != oldParentVD.Mount() {
- return syserror.EXDEV
+ return linuxerr.EXDEV
}
if err := mnt.CheckBeginWrite(); err != nil {
return err
@@ -680,17 +680,19 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
newName := rp.Component()
if newName == "." || newName == ".." {
if noReplace {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
- return syserror.EBUSY
+ return linuxerr.EBUSY
}
- switch err := checkCreateLocked(ctx, rp.Credentials(), newName, dstDir); err {
- case nil:
+
+ err = checkCreateLocked(ctx, rp.Credentials(), newName, dstDir)
+ switch {
+ case err == nil:
// Ok, continue with rename as replacement.
- case syserror.EEXIST:
+ case linuxerr.Equals(linuxerr.EEXIST, err):
if noReplace {
// Won't overwrite existing node since RENAME_NOREPLACE was requested.
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
dst = dstDir.children[newName]
if dst == nil {
@@ -749,7 +751,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
fs.deferDecRef(replaced)
replaceVFSD = replaced.VFSDentry()
}
- virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD)
+ virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) // +checklocksforce: to may be nil, that's okay.
return nil
}
@@ -771,10 +773,10 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
if !d.isDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
if d.inode.HasChildren() {
- return syserror.ENOTEMPTY
+ return linuxerr.ENOTEMPTY
}
virtfs := rp.VirtualFilesystem()
parentDentry := d.parent
@@ -785,7 +787,7 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
defer mntns.DecRef(ctx)
vfsd := d.VFSDentry()
if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
- return err
+ return err // +checklocksforce: vfsd is not locked.
}
if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil {
@@ -841,7 +843,7 @@ func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu
// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
if rp.Done() {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
fs.mu.Lock()
defer fs.processDeferredDecRefs(ctx)
@@ -858,7 +860,7 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
return err
}
if rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := rp.Mount().CheckBeginWrite(); err != nil {
return err
@@ -892,7 +894,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
if d.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
virtfs := rp.VirtualFilesystem()
parentDentry := d.parent
@@ -927,7 +929,7 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil {
return nil, err
}
- return nil, syserror.ECONNREFUSED
+ return nil, linuxerr.ECONNREFUSED
}
// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
@@ -940,7 +942,7 @@ func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
return nil, err
}
// kernfs currently does not support extended attributes.
- return nil, syserror.ENOTSUP
+ return nil, linuxerr.ENOTSUP
}
// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
@@ -953,7 +955,7 @@ func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
return "", err
}
// kernfs currently does not support extended attributes.
- return "", syserror.ENOTSUP
+ return "", linuxerr.ENOTSUP
}
// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
@@ -966,7 +968,7 @@ func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
return err
}
// kernfs currently does not support extended attributes.
- return syserror.ENOTSUP
+ return linuxerr.ENOTSUP
}
// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
@@ -979,7 +981,7 @@ func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath,
return err
}
// kernfs currently does not support extended attributes.
- return syserror.ENOTSUP
+ return linuxerr.ENOTSUP
}
// PrependPath implements vfs.FilesystemImpl.PrependPath.
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 3d0866ecf..b96dc9ef7 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -20,12 +20,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// InodeNoopRefCount partially implements the Inode interface, specifically the
@@ -61,27 +61,27 @@ type InodeDirectoryNoNewChildren struct{}
// NewFile implements Inode.NewFile.
func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
// NewDir implements Inode.NewDir.
func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
// NewLink implements Inode.NewLink.
func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
// NewSymlink implements Inode.NewSymlink.
func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
// NewNode implements Inode.NewNode.
func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
// InodeNotDirectory partially implements the Inode interface, specifically the
@@ -158,12 +158,12 @@ type InodeNotSymlink struct{}
// Readlink implements Inode.Readlink.
func (InodeNotSymlink) Readlink(context.Context, *vfs.Mount) (string, error) {
- return "", syserror.EINVAL
+ return "", linuxerr.EINVAL
}
// Getlink implements Inode.Getlink.
func (InodeNotSymlink) Getlink(context.Context, *vfs.Mount) (vfs.VirtualDentry, string, error) {
- return vfs.VirtualDentry{}, "", syserror.EINVAL
+ return vfs.VirtualDentry{}, "", linuxerr.EINVAL
}
// InodeAttrs partially implements the Inode interface, specifically the
@@ -233,6 +233,11 @@ func (a *InodeAttrs) Mode() linux.FileMode {
return linux.FileMode(atomic.LoadUint32(&a.mode))
}
+// Links returns the link count.
+func (a *InodeAttrs) Links() uint32 {
+ return atomic.LoadUint32(&a.nlink)
+}
+
// TouchAtime updates a.atime to the current time.
func (a *InodeAttrs) TouchAtime(ctx context.Context, mnt *vfs.Mount) {
if mnt.Flags.NoATime || mnt.ReadOnly() {
@@ -285,10 +290,10 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut
// allowed by kernfs files but does not do anything. If some other behavior is
// needed, the embedder should consider extending SetStat.
if opts.Stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_SIZE) != 0 {
- return syserror.EPERM
+ return linuxerr.EPERM
}
if opts.Stat.Mask&linux.STATX_SIZE != 0 && a.Mode().IsDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if err := vfs.CheckSetStat(ctx, creds, &opts, a.Mode(), auth.KUID(atomic.LoadUint32(&a.uid)), auth.KGID(atomic.LoadUint32(&a.gid))); err != nil {
return err
@@ -474,7 +479,7 @@ func (o *OrderedChildren) Lookup(ctx context.Context, name string) (Inode, error
s, ok := o.set[name]
if !ok {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
s.inode.IncRef() // This ref is passed to the dentry upon creation via Init.
@@ -501,6 +506,30 @@ func (o *OrderedChildren) Insert(name string, child Inode) error {
return o.insert(name, child, false)
}
+// Inserter is like Insert, but obtains the child to insert by calling
+// makeChild. makeChild is only called if the insert will succeed. This allows
+// the caller to atomically check and insert a child without having to
+// clean up the child on failure.
+func (o *OrderedChildren) Inserter(name string, makeChild func() Inode) (Inode, error) {
+ o.mu.Lock()
+ defer o.mu.Unlock()
+ if _, ok := o.set[name]; ok {
+ return nil, linuxerr.EEXIST
+ }
+
+ // Note: We must not fail after we call makeChild().
+
+ child := makeChild()
+ s := &slot{
+ name: name,
+ inode: child,
+ static: false,
+ }
+ o.order.PushBack(s)
+ o.set[name] = s
+ return child, nil
+}
+
// insert inserts child into o.
//
// Precondition: Caller must be holding a ref on child if static is true.
@@ -510,7 +539,7 @@ func (o *OrderedChildren) insert(name string, child Inode, static bool) error {
o.mu.Lock()
defer o.mu.Unlock()
if _, ok := o.set[name]; ok {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
s := &slot{
name: name,
@@ -558,7 +587,7 @@ func (o *OrderedChildren) replaceChildLocked(ctx context.Context, name string, n
func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error {
s, ok := o.set[name]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if s.inode != child {
panic(fmt.Sprintf("Inode doesn't match what kernfs thinks! OrderedChild: %+v, kernfs: %+v", s.inode, child))
@@ -569,7 +598,7 @@ func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error {
// Unlink implements Inode.Unlink.
func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode) error {
if !o.writable {
- return syserror.EPERM
+ return linuxerr.EPERM
}
o.mu.Lock()
defer o.mu.Unlock()
@@ -599,15 +628,15 @@ func (o *OrderedChildren) RmDir(ctx context.Context, name string, child Inode) e
// Postcondition: reference on any replaced dentry transferred to caller.
func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error {
if !o.writable {
- return syserror.EPERM
+ return linuxerr.EPERM
}
dst, ok := dstDir.(interface{}).(*OrderedChildren)
if !ok {
- return syserror.EXDEV
+ return linuxerr.EXDEV
}
if !dst.writable {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// Note: There's a potential deadlock below if concurrent calls to Rename
@@ -653,7 +682,7 @@ type InodeSymlink struct {
// Open implements Inode.Open.
func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- return nil, syserror.ELOOP
+ return nil, linuxerr.ELOOP
}
// StaticDirectory is a standard implementation of a directory with static
@@ -709,7 +738,7 @@ func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *De
// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// DecRef implements Inode.DecRef.
@@ -745,5 +774,5 @@ type InodeNoStatFS struct{}
// StatFS implements Inode.StatFS.
func (*InodeNoStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) {
- return linux.Statfs{}, syserror.ENOSYS
+ return linux.Statfs{}, linuxerr.ENOSYS
}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 6f699c9cd..544698694 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -52,7 +52,7 @@
// vfs.VirtualFilesystem.mountMu
// vfs.Dentry.mu
// (inode implementation locks, if any)
-// kernfs.Filesystem.droppedDentriesMu
+// kernfs.Filesystem.deferredDecRefsMu
package kernfs
import (
@@ -61,6 +61,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/refsvfs2"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -76,12 +77,12 @@ import (
type Filesystem struct {
vfsfs vfs.Filesystem
- droppedDentriesMu sync.Mutex `state:"nosave"`
+ deferredDecRefsMu sync.Mutex `state:"nosave"`
- // droppedDentries is a list of dentries waiting to be DecRef()ed. This is
+ // deferredDecRefs is a list of dentries waiting to be DecRef()ed. This is
// used to defer dentry destruction until mu can be acquired for
- // writing. Protected by droppedDentriesMu.
- droppedDentries []*Dentry
+ // writing. Protected by deferredDecRefsMu.
+ deferredDecRefs []refsvfs2.RefCounter
// mu synchronizes the lifetime of Dentries on this filesystem. Holding it
// for reading guarantees continued existence of any resolved dentries, but
@@ -131,25 +132,49 @@ type Filesystem struct {
// deferDecRef defers dropping a dentry ref until the next call to
// processDeferredDecRefs{,Locked}. See comment on Filesystem.mu.
// This may be called while Filesystem.mu or Dentry.dirMu is locked.
-func (fs *Filesystem) deferDecRef(d *Dentry) {
- fs.droppedDentriesMu.Lock()
- fs.droppedDentries = append(fs.droppedDentries, d)
- fs.droppedDentriesMu.Unlock()
+func (fs *Filesystem) deferDecRef(d refsvfs2.RefCounter) {
+ fs.deferredDecRefsMu.Lock()
+ fs.deferredDecRefs = append(fs.deferredDecRefs, d)
+ fs.deferredDecRefsMu.Unlock()
+}
+
+// SafeDecRefFD safely DecRef the FileDescription making sure DecRef is deferred
+// in case Filesystem.mu is held. See comment on Filesystem.mu.
+func (fs *Filesystem) SafeDecRefFD(ctx context.Context, fd *vfs.FileDescription) {
+ if d, ok := fd.Dentry().Impl().(*Dentry); ok && d.fs == fs {
+ // Only defer if dentry belongs to this filesystem, since locks cannot cross
+ // filesystems.
+ fs.deferDecRef(fd)
+ return
+ }
+ fd.DecRef(ctx)
+}
+
+// SafeDecRef safely DecRef the virtual dentry making sure DecRef is deferred
+// in case Filesystem.mu is held. See comment on Filesystem.mu.
+func (fs *Filesystem) SafeDecRef(ctx context.Context, vd vfs.VirtualDentry) {
+ if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs {
+ // Only defer if dentry belongs to this filesystem, since locks cannot cross
+ // filesystems.
+ fs.deferDecRef(&vd)
+ return
+ }
+ vd.DecRef(ctx)
}
// processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the
-// droppedDentries list. See comment on Filesystem.mu.
+// deferredDecRefs list. See comment on Filesystem.mu.
//
// Precondition: Filesystem.mu or Dentry.dirMu must NOT be locked.
func (fs *Filesystem) processDeferredDecRefs(ctx context.Context) {
- fs.droppedDentriesMu.Lock()
- for _, d := range fs.droppedDentries {
- // Defer the DecRef call so that we are not holding droppedDentriesMu
+ fs.deferredDecRefsMu.Lock()
+ for _, d := range fs.deferredDecRefs {
+ // Defer the DecRef call so that we are not holding deferredDecRefsMu
// when DecRef is called.
defer d.DecRef(ctx)
}
- fs.droppedDentries = fs.droppedDentries[:0] // Keep slice memory for reuse.
- fs.droppedDentriesMu.Unlock()
+ fs.deferredDecRefs = fs.deferredDecRefs[:0] // Keep slice memory for reuse.
+ fs.deferredDecRefsMu.Unlock()
}
// VFSFilesystem returns the generic vfs filesystem object.
@@ -518,6 +543,63 @@ func (d *Dentry) FSLocalPath() string {
return b.String()
}
+// WalkDentryTree traverses p in the dentry tree for this filesystem. Note that
+// this only traverses the dentry tree and is not a general path traversal. No
+// symlinks and dynamic children are resolved, and no permission checks are
+// performed. The caller is responsible for ensuring the returned Dentry exists
+// for an appropriate lifetime.
+//
+// p is interpreted starting at d, and may be absolute or relative (absolute vs
+// relative paths both refer to the same target here, since p is absolute from
+// d). p may contain "." and "..", but will not allow traversal above d (similar
+// to ".." at the root dentry).
+//
+// This is useful for filesystem internals, where the filesystem may not be
+// mounted yet. For a mounted filesystem, use GetDentryAt.
+func (d *Dentry) WalkDentryTree(ctx context.Context, vfsObj *vfs.VirtualFilesystem, p fspath.Path) (*Dentry, error) {
+ d.fs.mu.RLock()
+ defer d.fs.processDeferredDecRefs(ctx)
+ defer d.fs.mu.RUnlock()
+
+ target := d
+
+ for pit := p.Begin; pit.Ok(); pit = pit.Next() {
+ pc := pit.String()
+
+ switch {
+ case target == nil:
+ return nil, linuxerr.ENOENT
+ case pc == ".":
+ // No-op, consume component and continue.
+ case pc == "..":
+ if target == d {
+ // Don't let .. traverse above the start point of the walk.
+ continue
+ }
+ target = target.parent
+ // Parent doesn't need revalidation since we revalidated it on the
+ // way to the child, and we're still holding fs.mu.
+ default:
+ var err error
+
+ d.dirMu.Lock()
+ target, err = d.fs.revalidateChildLocked(ctx, vfsObj, target, pc, target.children[pc])
+ d.dirMu.Unlock()
+
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ if target == nil {
+ return nil, linuxerr.ENOENT
+ }
+
+ target.IncRef()
+ return target, nil
+}
+
// The Inode interface maps filesystem-level operations that operate on paths to
// equivalent operations on specific filesystem nodes.
//
@@ -643,12 +725,15 @@ type inodeDirectory interface {
// RmDir removes an empty child directory from this directory
// inode. Implementations must update the parent directory's link count,
// if required. Implementations are not responsible for checking that child
- // is a directory, checking for an empty directory.
+ // is a directory, or checking for an empty directory.
RmDir(ctx context.Context, name string, child Inode) error
// Rename is called on the source directory containing an inode being
- // renamed. child should point to the resolved child in the source
- // directory.
+ // renamed. child points to the resolved child in the source directory.
+ // dstDir is guaranteed to be a directory inode.
+ //
+ // On a successful call to Rename, the caller updates the dentry tree to
+ // reflect the name change.
//
// Precondition: Caller must serialize concurrent calls to Rename.
Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index 1cd3137e6..a2aba9321 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -22,12 +22,13 @@ import (
"github.com/google/go-cmp/cmp"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -94,7 +95,7 @@ type attrs struct {
}
func (*attrs) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
type readonlyDir struct {
@@ -196,15 +197,15 @@ func (d *dir) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (k
}
func (*dir) NewLink(context.Context, string, kernfs.Inode) (kernfs.Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
func (*dir) NewSymlink(context.Context, string, string) (kernfs.Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
func (*dir) NewNode(context.Context, string, vfs.MknodOptions) (kernfs.Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
func (fsType) Name() string {
@@ -318,10 +319,10 @@ func TestDirFDReadWrite(t *testing.T) {
defer fd.DecRef(sys.Ctx)
// Read/Write should fail for directory FDs.
- if _, err := fd.Read(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); err != syserror.EISDIR {
+ if _, err := fd.Read(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); !linuxerr.Equals(linuxerr.EISDIR, err) {
t.Fatalf("Read for directory FD failed with unexpected error: %v", err)
}
- if _, err := fd.Write(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); err != syserror.EBADF {
+ if _, err := fd.Write(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); !linuxerr.Equals(linuxerr.EBADF, err) {
t.Fatalf("Write for directory FD failed with unexpected error: %v", err)
}
}
@@ -346,3 +347,63 @@ func TestDirFDIterDirents(t *testing.T) {
"file1": linux.DT_REG,
})
}
+
+func TestDirWalkDentryTree(t *testing.T) {
+ sys := newTestSystem(t, func(ctx context.Context, creds *auth.Credentials, fs *filesystem) kernfs.Inode {
+ return fs.newDir(ctx, creds, 0755, map[string]kernfs.Inode{
+ "dir1": fs.newDir(ctx, creds, 0755, nil),
+ "dir2": fs.newDir(ctx, creds, 0755, map[string]kernfs.Inode{
+ "file1": fs.newFile(ctx, creds, staticFileContent),
+ "dir3": fs.newDir(ctx, creds, 0755, nil),
+ }),
+ })
+ })
+ defer sys.Destroy()
+
+ testWalk := func(from *kernfs.Dentry, getDentryPath, walkPath string, expectedErr error) {
+ var d *kernfs.Dentry
+ if getDentryPath != "" {
+ pop := sys.PathOpAtRoot(getDentryPath)
+ vd := sys.GetDentryOrDie(pop)
+ defer vd.DecRef(sys.Ctx)
+ d = vd.Dentry().Impl().(*kernfs.Dentry)
+ }
+
+ match, err := from.WalkDentryTree(sys.Ctx, sys.VFS, fspath.Parse(walkPath))
+ if err == nil {
+ defer match.DecRef(sys.Ctx)
+ }
+
+ if err != expectedErr {
+ t.Fatalf("WalkDentryTree from %q to %q (with expected error: %v) unexpected error, want: %v, got: %v", from.FSLocalPath(), walkPath, expectedErr, expectedErr, err)
+ }
+ if expectedErr != nil {
+ return
+ }
+
+ if d != match {
+ t.Fatalf("WalkDentryTree from %q to %q (with expected error: %v) found unexpected dentry; want: %v, got: %v", from.FSLocalPath(), walkPath, expectedErr, d, match)
+ }
+ }
+
+ rootD := sys.Root.Dentry().Impl().(*kernfs.Dentry)
+
+ testWalk(rootD, "dir1", "/dir1", nil)
+ testWalk(rootD, "", "/dir-non-existent", linuxerr.ENOENT)
+ testWalk(rootD, "", "/dir1/child-non-existent", linuxerr.ENOENT)
+ testWalk(rootD, "", "/dir2/inner-non-existent/dir3", linuxerr.ENOENT)
+
+ testWalk(rootD, "dir2/dir3", "/dir2/../dir2/dir3", nil)
+ testWalk(rootD, "dir2/dir3", "/dir2/././dir3", nil)
+ testWalk(rootD, "dir2/dir3", "/dir2/././dir3/.././dir3", nil)
+
+ pop := sys.PathOpAtRoot("dir2")
+ dir2VD := sys.GetDentryOrDie(pop)
+ defer dir2VD.DecRef(sys.Ctx)
+ dir2D := dir2VD.Dentry().Impl().(*kernfs.Dentry)
+
+ testWalk(dir2D, "dir2/dir3", "/dir3", nil)
+ testWalk(dir2D, "dir2/dir3", "/../../../dir3", nil)
+ testWalk(dir2D, "dir2/file1", "/file1", nil)
+ testWalk(dir2D, "dir2/file1", "file1", nil)
+}
diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go
index a0736c0d6..4adf76ce6 100644
--- a/pkg/sentry/fsimpl/kernfs/symlink.go
+++ b/pkg/sentry/fsimpl/kernfs/symlink.go
@@ -17,9 +17,9 @@ package kernfs
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// StaticSymlink provides an Inode implementation for symlinks that point to
@@ -62,5 +62,5 @@ func (s *StaticSymlink) Getlink(context.Context, *vfs.Mount) (vfs.VirtualDentry,
// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
func (*StaticSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
index 11694c392..c91d23b56 100644
--- a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
+++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
@@ -19,9 +19,9 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// syntheticDirectory implements kernfs.Inode for a directory created by
@@ -65,13 +65,13 @@ func (dir *syntheticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath,
// NewFile implements Inode.NewFile.
func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
// NewDir implements Inode.NewDir.
func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) {
if !opts.ForSyntheticMountpoint {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
subdirI := newSyntheticDirectory(ctx, auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask)
if err := dir.OrderedChildren.Insert(name, subdirI); err != nil {
@@ -84,17 +84,17 @@ func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs
// NewLink implements Inode.NewLink.
func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
// NewSymlink implements Inode.NewSymlink.
func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
// NewNode implements Inode.NewNode.
func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
// DecRef implements Inode.DecRef.
diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD
index 5504476c8..d16dfef9b 100644
--- a/pkg/sentry/fsimpl/overlay/BUILD
+++ b/pkg/sentry/fsimpl/overlay/BUILD
@@ -29,6 +29,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/hostarch",
"//pkg/log",
@@ -41,7 +42,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go
index 45aa5a494..520487066 100644
--- a/pkg/sentry/fsimpl/overlay/copy_up.go
+++ b/pkg/sentry/fsimpl/overlay/copy_up.go
@@ -20,12 +20,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
func (d *dentry) isCopiedUp() bool {
@@ -36,6 +36,10 @@ func (d *dentry) isCopiedUp() bool {
//
// Preconditions: filesystem.renameMu must be locked.
func (d *dentry) copyUpLocked(ctx context.Context) error {
+ return d.copyUpMaybeSyntheticMountpointLocked(ctx, false /* forSyntheticMountpoint */)
+}
+
+func (d *dentry) copyUpMaybeSyntheticMountpointLocked(ctx context.Context, forSyntheticMountpoint bool) error {
// Fast path.
if d.isCopiedUp() {
return nil
@@ -51,15 +55,15 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
// Can be copied-up.
default:
// Can't be copied-up.
- return syserror.EPERM
+ return linuxerr.EPERM
}
// Ensure that our parent directory is copied-up.
if d.parent == nil {
// d is a filesystem root with no upper layer.
- return syserror.EROFS
+ return linuxerr.EROFS
}
- if err := d.parent.copyUpLocked(ctx); err != nil {
+ if err := d.parent.copyUpMaybeSyntheticMountpointLocked(ctx, forSyntheticMountpoint); err != nil {
return err
}
@@ -71,7 +75,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
}
if d.vfsd.IsDead() {
// Raced with deletion of d.
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// Obtain settable timestamps from the lower layer.
@@ -168,7 +172,8 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
case linux.S_IFDIR:
if err := vfsObj.MkdirAt(ctx, d.fs.creds, &newpop, &vfs.MkdirOptions{
- Mode: linux.FileMode(d.mode &^ linux.S_IFMT),
+ Mode: linux.FileMode(d.mode &^ linux.S_IFMT),
+ ForSyntheticMountpoint: forSyntheticMountpoint,
}); err != nil {
return err
}
@@ -271,7 +276,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
}
if upperStat.Mask&linux.STATX_INO == 0 {
cleanupUndoCopyUp()
- return syserror.EREMOTE
+ return linuxerr.EREMOTE
}
atomic.StoreUint32(&d.devMajor, upperStat.DevMajor)
atomic.StoreUint32(&d.devMinor, upperStat.DevMinor)
@@ -349,7 +354,7 @@ func (d *dentry) copyXattrsLocked(ctx context.Context) error {
lowerXattrs, err := vfsObj.ListXattrAt(ctx, d.fs.creds, lowerPop, 0)
if err != nil {
- if err == syserror.EOPNOTSUPP {
+ if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) {
// There are no guarantees as to the contents of lowerXattrs.
return nil
}
diff --git a/pkg/sentry/fsimpl/overlay/directory.go b/pkg/sentry/fsimpl/overlay/directory.go
index df4492346..ad3cdbb56 100644
--- a/pkg/sentry/fsimpl/overlay/directory.go
+++ b/pkg/sentry/fsimpl/overlay/directory.go
@@ -19,10 +19,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
func (d *dentry) isDir() bool {
@@ -69,7 +69,7 @@ func (d *dentry) collectWhiteoutsForRmdirLocked(ctx context.Context) (map[string
return nil
}
// Non-whiteout file in the directory prevents rmdir.
- return syserror.ENOTEMPTY
+ return linuxerr.ENOTEMPTY
}))
if err != nil {
readdirErr = err
@@ -88,7 +88,7 @@ func (d *dentry) collectWhiteoutsForRmdirLocked(ctx context.Context) (map[string
}
if stat.RdevMajor != 0 || stat.RdevMinor != 0 {
// This file is a real character device, not a whiteout.
- readdirErr = syserror.ENOTEMPTY
+ readdirErr = linuxerr.ENOTEMPTY
return false
}
whiteouts[maybeWhiteoutName] = isUpper
@@ -256,7 +256,7 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in
switch whence {
case linux.SEEK_SET:
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if offset == 0 {
// Ensure that the next call to fd.IterDirents() calls
@@ -268,13 +268,13 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in
case linux.SEEK_CUR:
offset += fd.off
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// Don't clear fd.dirents in this case, even if offset == 0.
fd.off = offset
return fd.off, nil
default:
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
}
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 6b6fa0bd5..3b3dcf836 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -21,13 +21,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// _OVL_XATTR_PREFIX is an extended attribute key prefix to identify overlayfs
@@ -86,7 +86,7 @@ func putDentrySlice(ds *[]*dentry) {
// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
//
-// +checklocks:fs.renameMu
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*dentry) {
fs.renameMu.RUnlock()
if *dsp == nil {
@@ -112,7 +112,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*
putDentrySlice(*dsp)
}
-// +checklocks:fs.renameMu
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
if *ds == nil {
fs.renameMu.Unlock()
@@ -137,7 +137,7 @@ func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*de
// * !rp.Done().
func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, lookupLayer, error) {
if !d.isDir() {
- return nil, lookupLayerNone, syserror.ENOTDIR
+ return nil, lookupLayerNone, linuxerr.ENOTDIR
}
if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
return nil, lookupLayerNone, err
@@ -218,7 +218,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
Start: parentVD,
Path: childPath,
}, &vfs.GetDentryOptions{})
- if err == syserror.ENOENT || err == syserror.ENAMETOOLONG {
+ if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) {
// The file doesn't exist on this layer. Proceed to the next one.
return true
}
@@ -245,7 +245,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
return false
}
if stat.Mask&mask != mask {
- lookupErr = syserror.EREMOTE
+ lookupErr = linuxerr.EREMOTE
return false
}
@@ -313,7 +313,7 @@ func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name str
}
if !topLookupLayer.existsInOverlay() {
child.destroyLocked(ctx)
- return nil, topLookupLayer, syserror.ENOENT
+ return nil, topLookupLayer, linuxerr.ENOENT
}
// Device and inode numbers were copied from the topmost layer above. Remap
@@ -352,7 +352,7 @@ func (fs *filesystem) lookupLayerLocked(ctx context.Context, parent *dentry, nam
}, &vfs.StatOptions{
Mask: linux.STATX_TYPE,
})
- if err == syserror.ENOENT || err == syserror.ENAMETOOLONG {
+ if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) {
// The file doesn't exist on this layer. Proceed to the next
// one.
return true
@@ -365,7 +365,7 @@ func (fs *filesystem) lookupLayerLocked(ctx context.Context, parent *dentry, nam
// Linux's overlayfs tends to return EREMOTE in cases where a file
// is unusable for reasons that are not better captured by another
// errno.
- lookupErr = syserror.EREMOTE
+ lookupErr = linuxerr.EREMOTE
return false
}
if isWhiteout(&stat) {
@@ -437,7 +437,7 @@ func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving
d = next
}
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return d, nil
}
@@ -457,18 +457,26 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath,
d = next
}
if rp.MustBeDir() && !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return d, nil
}
+type createType int
+
+const (
+ createNonDirectory createType = iota
+ createDirectory
+ createSyntheticMountpoint
+)
+
// doCreateAt checks that creating a file at rp is permitted, then invokes
// create to do so.
//
// Preconditions:
// * !rp.Done().
// * For the final path component in rp, !rp.ShouldFollowSymlink().
-func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string, haveUpperWhiteout bool) error) error {
+func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, ct createType, create func(parent *dentry, name string, haveUpperWhiteout bool) error) error {
var ds *[]*dentry
fs.renameMu.RLock()
defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
@@ -479,10 +487,10 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
}
name := rp.Component()
if name == "." || name == ".." {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
if parent.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
@@ -494,18 +502,18 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
// Determine if a file already exists at name.
if _, ok := parent.children[name]; ok {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
childLayer, err := fs.lookupLayerLocked(ctx, parent, name)
if err != nil {
return err
}
if childLayer.existsInOverlay() {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
- if !dir && rp.MustBeDir() {
- return syserror.ENOENT
+ if ct == createNonDirectory && rp.MustBeDir() {
+ return linuxerr.ENOENT
}
mnt := rp.Mount()
@@ -518,7 +526,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
}
// Ensure that the parent directory is copied-up so that we can create the
// new file in the upper layer.
- if err := parent.copyUpLocked(ctx); err != nil {
+ if err := parent.copyUpMaybeSyntheticMountpointLocked(ctx, ct == createSyntheticMountpoint); err != nil {
return err
}
@@ -529,7 +537,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
parent.dirents = nil
ev := linux.IN_CREATE
- if dir {
+ if ct != createNonDirectory {
ev |= linux.IN_ISDIR
}
parent.watches.Notify(ctx, name, uint32(ev), 0 /* cookie */, vfs.InodeEvent, false /* unlinked */)
@@ -592,7 +600,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
}
if opts.CheckSearchable {
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
@@ -618,13 +626,13 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa
// LinkAt implements vfs.FilesystemImpl.LinkAt.
func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
- return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
+ return fs.doCreateAt(ctx, rp, createNonDirectory, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
if rp.Mount() != vd.Mount() {
- return syserror.EXDEV
+ return linuxerr.EXDEV
}
old := vd.Dentry().Impl().(*dentry)
if old.isDir() {
- return syserror.EPERM
+ return linuxerr.EPERM
}
if err := old.copyUpLocked(ctx); err != nil {
return err
@@ -671,7 +679,11 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
- return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
+ ct := createDirectory
+ if opts.ForSyntheticMountpoint {
+ ct = createSyntheticMountpoint
+ }
+ return fs.doCreateAt(ctx, rp, ct, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
vfsObj := fs.vfsfs.VirtualFilesystem()
pop := vfs.PathOperation{
Root: parent.upperVD,
@@ -722,10 +734,10 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
// MknodAt implements vfs.FilesystemImpl.MknodAt.
func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
- return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
+ return fs.doCreateAt(ctx, rp, createNonDirectory, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
// Disallow attempts to create whiteouts.
if opts.Mode&linux.S_IFMT == linux.S_IFCHR && opts.DevMajor == 0 && opts.DevMinor == 0 {
- return syserror.EPERM
+ return linuxerr.EPERM
}
vfsObj := fs.vfsfs.VirtualFilesystem()
pop := vfs.PathOperation{
@@ -779,10 +791,10 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
start := rp.Start().Impl().(*dentry)
if rp.Done() {
if mayCreate && rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if mustCreate {
- return nil, syserror.EEXIST
+ return nil, linuxerr.EEXIST
}
if start.isRegularFile() && mayWrite {
if err := start.copyUpLocked(ctx); err != nil {
@@ -806,12 +818,12 @@ afterTrailingSymlink:
}
// Reject attempts to open directories with O_CREAT.
if mayCreate && rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
// Determine whether or not we need to create a file.
parent.dirMu.Lock()
child, topLookupLayer, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
- if err == syserror.ENOENT && mayCreate {
+ if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate {
fd, err := fs.createAndOpenLocked(ctx, rp, parent, &opts, &ds, topLookupLayer == lookupLayerUpperWhiteout)
parent.dirMu.Unlock()
return fd, err
@@ -822,7 +834,7 @@ afterTrailingSymlink:
}
// Open existing child or follow symlink.
if mustCreate {
- return nil, syserror.EEXIST
+ return nil, linuxerr.EEXIST
}
if child.isSymlink() && rp.ShouldFollowSymlink() {
target, err := child.readlink(ctx)
@@ -836,7 +848,7 @@ afterTrailingSymlink:
goto afterTrailingSymlink
}
if rp.MustBeDir() && !child.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
if child.isRegularFile() && mayWrite {
if err := child.copyUpLocked(ctx); err != nil {
@@ -864,14 +876,14 @@ func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *
if ftype == linux.S_IFDIR {
// Can't open directories with O_CREAT.
if opts.Flags&linux.O_CREAT != 0 {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
// Can't open directories writably.
if ats.MayWrite() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if opts.Flags&linux.O_DIRECT != 0 {
- return nil, syserror.EINVAL
+ return nil, linuxerr.EINVAL
}
fd := &directoryFD{}
fd.LockFD.Init(&d.locks)
@@ -918,7 +930,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
return nil, err
}
if parent.vfsd.IsDead() {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
mnt := rp.Mount()
if err := mnt.CheckBeginWrite(); err != nil {
@@ -1027,19 +1039,19 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
newName := rp.Component()
if newName == "." || newName == ".." {
if opts.Flags&linux.RENAME_NOREPLACE != 0 {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
- return syserror.EBUSY
+ return linuxerr.EBUSY
}
mnt := rp.Mount()
if mnt != oldParentVD.Mount() {
- return syserror.EXDEV
+ return linuxerr.EXDEV
}
if err := mnt.CheckBeginWrite(); err != nil {
return err
@@ -1064,7 +1076,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
if renamed.isDir() {
if renamed == newParent || genericIsAncestorDentry(renamed, newParent) {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
if oldParent != newParent {
if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil {
@@ -1073,7 +1085,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
} else {
if opts.MustBeDir || rp.MustBeDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
}
@@ -1085,7 +1097,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
defer newParent.dirMu.Unlock()
}
if newParent.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
var (
replaced *dentry
@@ -1094,20 +1106,20 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
whiteouts map[string]bool
)
replaced, replacedLayer, err = fs.getChildLocked(ctx, newParent, newName, &ds)
- if err != nil && err != syserror.ENOENT {
+ if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
return err
}
if replaced != nil {
if opts.Flags&linux.RENAME_NOREPLACE != 0 {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
replacedVFSD = &replaced.vfsd
if replaced.isDir() {
if !renamed.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if genericIsAncestorDentry(replaced, renamed) {
- return syserror.ENOTEMPTY
+ return linuxerr.ENOTEMPTY
}
replaced.dirMu.Lock()
defer replaced.dirMu.Unlock()
@@ -1117,7 +1129,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
} else {
if rp.MustBeDir() || renamed.isDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
}
}
@@ -1177,7 +1189,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
Root: replaced.upperVD,
Start: replaced.upperVD,
Path: fspath.Parse(whiteoutName),
- }); err != nil && err != syserror.EEXIST {
+ }); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RenameAt failure: %v", err))
}
}
@@ -1285,10 +1297,10 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
defer rp.Mount().EndWrite()
name := rp.Component()
if name == "." {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
if name == ".." {
- return syserror.ENOTEMPTY
+ return linuxerr.ENOTEMPTY
}
vfsObj := rp.VirtualFilesystem()
mntns := vfs.MountNamespaceFromContext(ctx)
@@ -1309,7 +1321,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
if !child.isDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
if err := parent.mayDelete(rp.Credentials(), child); err != nil {
return err
@@ -1344,7 +1356,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
Root: child.upperVD,
Start: child.upperVD,
Path: fspath.Parse(whiteoutName),
- }); err != nil && err != syserror.EEXIST {
+ }); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) {
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RmdirAt failure: %v", err))
}
}
@@ -1476,7 +1488,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu
// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
- return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
+ return fs.doCreateAt(ctx, rp, createNonDirectory, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
vfsObj := fs.vfsfs.VirtualFilesystem()
pop := vfs.PathOperation{
Root: parent.upperVD,
@@ -1532,10 +1544,10 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
defer rp.Mount().EndWrite()
name := rp.Component()
if name == "." || name == ".." {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if rp.MustBeDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
vfsObj := rp.VirtualFilesystem()
mntns := vfs.MountNamespaceFromContext(ctx)
@@ -1556,7 +1568,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
if child.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if err := parent.mayDelete(rp.Credentials(), child); err != nil {
return err
@@ -1658,7 +1670,7 @@ func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Crede
// Return EOPNOTSUPP when fetching an overlay attribute.
// See fs/overlayfs/super.c:ovl_own_xattr_get().
if isOverlayXattr(opts.Name) {
- return "", syserror.EOPNOTSUPP
+ return "", linuxerr.EOPNOTSUPP
}
// Analogous to fs/overlayfs/super.c:ovl_other_xattr_get().
@@ -1696,7 +1708,7 @@ func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mo
// Return EOPNOTSUPP when setting an overlay attribute.
// See fs/overlayfs/super.c:ovl_own_xattr_set().
if isOverlayXattr(opts.Name) {
- return syserror.EOPNOTSUPP
+ return linuxerr.EOPNOTSUPP
}
// Analogous to fs/overlayfs/super.c:ovl_other_xattr_set().
@@ -1741,7 +1753,7 @@ func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs
// Linux passes the remove request to xattr_handler->set.
// See fs/xattr.c:vfs_removexattr().
if isOverlayXattr(name) {
- return syserror.EOPNOTSUPP
+ return linuxerr.EOPNOTSUPP
}
if err := mnt.CheckBeginWrite(); err != nil {
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index 454c20d4f..46d9f1f1d 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -40,13 +40,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/refsvfs2"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Name is the default filesystem name.
@@ -135,7 +135,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fsopts, ok := fsoptsRaw.(FilesystemOptions)
if fsoptsRaw != nil && !ok {
ctx.Infof("overlay.FilesystemType.GetFilesystem: GetFilesystemOptions.InternalData has type %T, wanted overlay.FilesystemOptions or nil", fsoptsRaw)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
vfsroot := vfs.RootFromContext(ctx)
if vfsroot.Ok() {
@@ -145,7 +145,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if upperPathname, ok := mopts["upperdir"]; ok {
if fsopts.UpperRoot.Ok() {
ctx.Infof("overlay.FilesystemType.GetFilesystem: both upperdir and FilesystemOptions.UpperRoot are specified")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
delete(mopts, "upperdir")
// Linux overlayfs also requires a workdir when upperdir is
@@ -154,7 +154,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
upperPath := fspath.Parse(upperPathname)
if !upperPath.Absolute {
ctx.Infof("overlay.FilesystemType.GetFilesystem: upperdir %q must be absolute", upperPathname)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
upperRoot, err := vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{
Root: vfsroot,
@@ -181,7 +181,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if lowerPathnamesStr, ok := mopts["lowerdir"]; ok {
if len(fsopts.LowerRoots) != 0 {
ctx.Infof("overlay.FilesystemType.GetFilesystem: both lowerdir and FilesystemOptions.LowerRoots are specified")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
delete(mopts, "lowerdir")
lowerPathnames := strings.Split(lowerPathnamesStr, ":")
@@ -189,7 +189,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
lowerPath := fspath.Parse(lowerPathname)
if !lowerPath.Absolute {
ctx.Infof("overlay.FilesystemType.GetFilesystem: lowerdir %q must be absolute", lowerPathname)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
lowerRoot, err := vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{
Root: vfsroot,
@@ -216,21 +216,21 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if len(mopts) != 0 {
ctx.Infof("overlay.FilesystemType.GetFilesystem: unused options: %v", mopts)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
if len(fsopts.LowerRoots) == 0 {
ctx.Infof("overlay.FilesystemType.GetFilesystem: at least one lower layer is required")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
if len(fsopts.LowerRoots) < 2 && !fsopts.UpperRoot.Ok() {
ctx.Infof("overlay.FilesystemType.GetFilesystem: at least two lower layers are required when no upper layer is present")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
const maxLowerLayers = 500 // Linux: fs/overlay/super.c:OVL_MAX_STACK
if len(fsopts.LowerRoots) > maxLowerLayers {
ctx.Infof("overlay.FilesystemType.GetFilesystem: %d lower layers specified, maximum %d", len(fsopts.LowerRoots), maxLowerLayers)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
// Take extra references held by the filesystem.
@@ -277,13 +277,13 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if rootStat.Mask&rootStatMask != rootStatMask {
root.destroyLocked(ctx)
fs.vfsfs.DecRef(ctx)
- return nil, nil, syserror.EREMOTE
+ return nil, nil, linuxerr.EREMOTE
}
if isWhiteout(&rootStat) {
ctx.Infof("overlay.FilesystemType.GetFilesystem: filesystem root is a whiteout")
root.destroyLocked(ctx)
fs.vfsfs.DecRef(ctx)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
root.mode = uint32(rootStat.Mode)
root.uid = rootStat.UID
diff --git a/pkg/sentry/fsimpl/overlay/regular_file.go b/pkg/sentry/fsimpl/overlay/regular_file.go
index 82491a0f8..156ffeaeb 100644
--- a/pkg/sentry/fsimpl/overlay/regular_file.go
+++ b/pkg/sentry/fsimpl/overlay/regular_file.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -26,7 +27,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -415,7 +415,7 @@ func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOp
// Only permit mmap of regular files, since other file types may have
// unpredictable behavior when mmapped (e.g. /dev/zero).
if atomic.LoadUint32(&d.mode)&linux.S_IFMT != linux.S_IFREG {
- return syserror.ENODEV
+ return linuxerr.ENODEV
}
// Get a Mappable for the current top layer.
diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD
index 278ee3c92..a50510031 100644
--- a/pkg/sentry/fsimpl/pipefs/BUILD
+++ b/pkg/sentry/fsimpl/pipefs/BUILD
@@ -9,6 +9,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/hostarch",
"//pkg/sentry/fsimpl/kernfs",
@@ -16,6 +17,5 @@ go_library(
"//pkg/sentry/kernel/pipe",
"//pkg/sentry/kernel/time",
"//pkg/sentry/vfs",
- "//pkg/syserror",
],
)
diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go
index 08aedc2ad..af09195a7 100644
--- a/pkg/sentry/fsimpl/pipefs/pipefs.go
+++ b/pkg/sentry/fsimpl/pipefs/pipefs.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
@@ -28,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// +stateify savable
@@ -152,7 +152,7 @@ func (i *inode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.
if opts.Stat.Mask == 0 {
return nil
}
- return syserror.EPERM
+ return linuxerr.EPERM
}
// Open implements kernfs.Inode.Open.
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
index 2b628bd55..95cfbdc42 100644
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -81,6 +81,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/hostarch",
"//pkg/log",
"//pkg/refs",
@@ -101,7 +102,6 @@ go_library(
"//pkg/sentry/usage",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/tcpip/header",
"//pkg/tcpip/network/ipv4",
"//pkg/usermem",
@@ -119,6 +119,7 @@ go_test(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/sentry/contexttest",
"//pkg/sentry/fsimpl/testutil",
@@ -127,7 +128,6 @@ go_test(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
index ce8f55b1f..f2697c12d 100644
--- a/pkg/sentry/fsimpl/proc/filesystem.go
+++ b/pkg/sentry/fsimpl/proc/filesystem.go
@@ -21,11 +21,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -76,7 +76,7 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF
maxCachedDentries, err = strconv.ParseUint(str, 10, 64)
if err != nil {
ctx.Warningf("proc.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
}
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index c53cc0122..e04ae6660 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -20,11 +20,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// subtasksInode represents the inode for /proc/[pid]/task/ directory.
@@ -70,15 +70,15 @@ func (fs *filesystem) newSubtasks(ctx context.Context, task *kernel.Task, pidns
func (i *subtasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
tid, err := strconv.ParseUint(name, 10, 32)
if err != nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
subTask := i.pidns.TaskWithID(kernel.ThreadID(tid))
if subTask == nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
if subTask.ThreadGroup() != i.task.ThreadGroup() {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
return i.fs.newTaskInode(ctx, subTask, i.pidns, false, i.cgroupControllers)
}
@@ -87,7 +87,7 @@ func (i *subtasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode,
func (i *subtasksInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
tasks := i.task.ThreadGroup().MemberIDs(i.pidns)
if len(tasks) == 0 {
- return offset, syserror.ENOENT
+ return offset, linuxerr.ENOENT
}
if relOffset >= int64(len(tasks)) {
return offset, nil
@@ -123,7 +123,7 @@ type subtasksFD struct {
func (fd *subtasksFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
if fd.task.ExitState() >= kernel.TaskExitZombie {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
return fd.GenericDirectoryFD.IterDirents(ctx, cb)
}
@@ -131,7 +131,7 @@ func (fd *subtasksFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallbac
// Seek implements vfs.FileDescriptionImpl.Seek.
func (fd *subtasksFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
if fd.task.ExitState() >= kernel.TaskExitZombie {
- return 0, syserror.ENOENT
+ return 0, linuxerr.ENOENT
}
return fd.GenericDirectoryFD.Seek(ctx, offset, whence)
}
@@ -139,7 +139,7 @@ func (fd *subtasksFD) Seek(ctx context.Context, offset int64, whence int32) (int
// Stat implements vfs.FileDescriptionImpl.Stat.
func (fd *subtasksFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
if fd.task.ExitState() >= kernel.TaskExitZombie {
- return linux.Statx{}, syserror.ENOENT
+ return linux.Statx{}, linuxerr.ENOENT
}
return fd.GenericDirectoryFD.Stat(ctx, opts)
}
@@ -147,7 +147,7 @@ func (fd *subtasksFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Sta
// SetStat implements vfs.FileDescriptionImpl.SetStat.
func (fd *subtasksFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
if fd.task.ExitState() >= kernel.TaskExitZombie {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
return fd.GenericDirectoryFD.SetStat(ctx, opts)
}
@@ -180,7 +180,7 @@ func (i *subtasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs
// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// DecRef implements kernfs.Inode.DecRef.
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index d05cc1508..f54811edf 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -20,12 +20,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// taskInode represents the inode for /proc/PID/ directory.
@@ -49,7 +49,7 @@ var _ kernfs.Inode = (*taskInode)(nil)
func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, fakeCgroupControllers map[string]string) (kernfs.Inode, error) {
if task.ExitState() == kernel.TaskExitDead {
- return nil, syserror.ESRCH
+ return nil, linuxerr.ESRCH
}
contents := map[string]kernfs.Inode{
@@ -65,8 +65,8 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns
"io": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0400, newIO(task, isThreadGroup)),
"maps": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mapsData{task: task}),
"mem": fs.newMemInode(ctx, task, fs.NextIno(), 0400),
- "mountinfo": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountInfoData{task: task}),
- "mounts": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountsData{task: task}),
+ "mountinfo": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountInfoData{fs: fs, task: task}),
+ "mounts": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountsData{fs: fs, task: task}),
"net": fs.newTaskNetDir(ctx, task),
"ns": fs.newTaskOwnedDir(ctx, task, fs.NextIno(), 0511, map[string]kernfs.Inode{
"net": fs.newNamespaceSymlink(ctx, task, fs.NextIno(), "net"),
@@ -78,7 +78,7 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns
"smaps": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &smapsData{task: task}),
"stat": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
"statm": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statmData{task: task}),
- "status": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statusData{task: task, pidns: pidns}),
+ "status": fs.newStatusInode(ctx, task, pidns, fs.NextIno(), 0444),
"uid_map": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}),
}
if isThreadGroup {
@@ -124,7 +124,7 @@ func (i *taskInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.D
// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// DecRef implements kernfs.Inode.DecRef.
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index 4718fac7a..5c6412fc0 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -22,11 +22,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
func getTaskFD(t *kernel.Task, fd int32) (*vfs.FileDescription, kernel.FDFlags) {
@@ -42,12 +42,12 @@ func getTaskFD(t *kernel.Task, fd int32) (*vfs.FileDescription, kernel.FDFlags)
return file, flags
}
-func taskFDExists(ctx context.Context, t *kernel.Task, fd int32) bool {
+func taskFDExists(ctx context.Context, fs *filesystem, t *kernel.Task, fd int32) bool {
file, _ := getTaskFD(t, fd)
if file == nil {
return false
}
- file.DecRef(ctx)
+ fs.SafeDecRefFD(ctx, file)
return true
}
@@ -142,11 +142,11 @@ func (i *fdDirInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.Ite
func (i *fdDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
fdInt, err := strconv.ParseInt(name, 10, 32)
if err != nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
fd := int32(fdInt)
- if !taskFDExists(ctx, i.task, fd) {
- return nil, syserror.ENOENT
+ if !taskFDExists(ctx, i.fs, i.task, fd) {
+ return nil, linuxerr.ENOENT
}
return i.fs.newFDSymlink(ctx, i.task, fd, i.fs.NextIno()), nil
}
@@ -198,6 +198,7 @@ type fdSymlink struct {
kernfs.InodeNoopRefCount
kernfs.InodeSymlink
+ fs *filesystem
task *kernel.Task
fd int32
}
@@ -206,6 +207,7 @@ var _ kernfs.Inode = (*fdSymlink)(nil)
func (fs *filesystem) newFDSymlink(ctx context.Context, task *kernel.Task, fd int32, ino uint64) kernfs.Inode {
inode := &fdSymlink{
+ fs: fs,
task: task,
fd: fd,
}
@@ -216,11 +218,11 @@ func (fs *filesystem) newFDSymlink(ctx context.Context, task *kernel.Task, fd in
func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
file, _ := getTaskFD(s.task, s.fd)
if file == nil {
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
- defer file.DecRef(ctx)
+ defer s.fs.SafeDecRefFD(ctx, file)
root := vfs.RootFromContext(ctx)
- defer root.DecRef(ctx)
+ defer s.fs.SafeDecRef(ctx, root)
// Note: it's safe to reenter kernfs from Readlink if needed to resolve path.
return s.task.Kernel().VFS().PathnameWithDeleted(ctx, root, file.VirtualDentry())
@@ -229,9 +231,9 @@ func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error)
func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) {
file, _ := getTaskFD(s.task, s.fd)
if file == nil {
- return vfs.VirtualDentry{}, "", syserror.ENOENT
+ return vfs.VirtualDentry{}, "", linuxerr.ENOENT
}
- defer file.DecRef(ctx)
+ defer s.fs.SafeDecRefFD(ctx, file)
vd := file.VirtualDentry()
vd.IncRef()
return vd, "", nil
@@ -239,7 +241,7 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen
// Valid implements kernfs.Inode.Valid.
func (s *fdSymlink) Valid(ctx context.Context) bool {
- return taskFDExists(ctx, s.task, s.fd)
+ return taskFDExists(ctx, s.fs, s.task, s.fd)
}
// fdInfoDirInode represents the inode for /proc/[pid]/fdinfo directory.
@@ -276,13 +278,14 @@ func (fs *filesystem) newFDInfoDirInode(ctx context.Context, task *kernel.Task)
func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
fdInt, err := strconv.ParseInt(name, 10, 32)
if err != nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
fd := int32(fdInt)
- if !taskFDExists(ctx, i.task, fd) {
- return nil, syserror.ENOENT
+ if !taskFDExists(ctx, i.fs, i.task, fd) {
+ return nil, linuxerr.ENOENT
}
data := &fdInfoData{
+ fs: i.fs,
task: i.task,
fd: fd,
}
@@ -316,6 +319,7 @@ func (i *fdInfoDirInode) DecRef(ctx context.Context) {
type fdInfoData struct {
kernfs.DynamicBytesFile
+ fs *filesystem
task *kernel.Task
fd int32
}
@@ -326,9 +330,9 @@ var _ dynamicInode = (*fdInfoData)(nil)
func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
file, descriptorFlags := getTaskFD(d.task, d.fd)
if file == nil {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
- defer file.DecRef(ctx)
+ defer d.fs.SafeDecRefFD(ctx, file)
// TODO(b/121266871): Include pos, locks, and other data. For now we only
// have flags.
// See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
@@ -339,5 +343,5 @@ func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// Valid implements kernfs.Inode.Valid.
func (d *fdInfoData) Valid(ctx context.Context) bool {
- return taskFDExists(ctx, d.task, d.fd)
+ return taskFDExists(ctx, d.fs, d.task, d.fd)
}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index b294dfd6a..d3f9cf489 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
@@ -32,7 +33,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -40,7 +40,7 @@ import (
// Linux 3.18, the limit is five lines." - user_namespaces(7)
const maxIDMapLines = 5
-// mm gets the kernel task's MemoryManager. No additional reference is taken on
+// getMM gets the kernel task's MemoryManager. No additional reference is taken on
// mm here. This is safe because MemoryManager.destroy is required to leave the
// MemoryManager in a state where it's still usable as a DynamicBytesSource.
func getMM(task *kernel.Task) *mm.MemoryManager {
@@ -70,9 +70,9 @@ func getMMIncRef(task *kernel.Task) (*mm.MemoryManager, error) {
func checkTaskState(t *kernel.Task) error {
switch t.ExitState() {
case kernel.TaskExitZombie:
- return syserror.EACCES
+ return linuxerr.EACCES
case kernel.TaskExitDead:
- return syserror.ESRCH
+ return linuxerr.ESRCH
}
return nil
}
@@ -109,7 +109,7 @@ var _ dynamicInode = (*auxvData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error {
if d.task.ExitState() == kernel.TaskExitDead {
- return syserror.ESRCH
+ return linuxerr.ESRCH
}
m, err := getMMIncRef(d.task)
if err != nil {
@@ -159,7 +159,7 @@ var _ dynamicInode = (*cmdlineData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
if d.task.ExitState() == kernel.TaskExitDead {
- return syserror.ESRCH
+ return linuxerr.ESRCH
}
m, err := getMMIncRef(d.task)
if err != nil {
@@ -227,7 +227,7 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
if int(arEnvv.Length()) > remaining {
end, ok := arEnvv.Start.AddLength(uint64(remaining))
if !ok {
- return syserror.EFAULT
+ return linuxerr.EFAULT
}
arEnvv.End = end
}
@@ -325,7 +325,7 @@ func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset in
// the file ..." - user_namespaces(7)
srclen := src.NumBytes()
if srclen >= hostarch.PageSize || offset != 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
b := make([]byte, srclen)
if _, err := src.CopyIn(ctx, b); err != nil {
@@ -345,7 +345,7 @@ func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset in
}
lines := bytes.SplitN(b, []byte("\n"), maxIDMapLines+1)
if len(lines) > maxIDMapLines {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
entries := make([]auth.IDMapEntry, len(lines))
@@ -353,7 +353,7 @@ func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset in
var e auth.IDMapEntry
_, err := fmt.Sscan(string(l), &e.FirstID, &e.FirstParentID, &e.Length)
if err != nil {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
entries[i] = e
}
@@ -408,7 +408,7 @@ func (f *memInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.De
// Permission to read this file is governed by PTRACE_MODE_ATTACH_FSCREDS
// Since we dont implement setfsuid/setfsgid we can just use PTRACE_MODE_ATTACH
if !kernel.ContextCanTrace(ctx, f.task, true) {
- return nil, syserror.EACCES
+ return nil, linuxerr.EACCES
}
if err := checkTaskState(f.task); err != nil {
return nil, err
@@ -422,7 +422,7 @@ func (f *memInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.De
// SetStat implements kernfs.Inode.SetStat.
func (*memInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
var _ vfs.FileDescriptionImpl = (*memFD)(nil)
@@ -461,10 +461,10 @@ func (fd *memFD) Seek(ctx context.Context, offset int64, whence int32) (int64, e
case linux.SEEK_CUR:
offset += fd.offset
default:
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
fd.offset = offset
return offset, nil
@@ -485,12 +485,12 @@ func (fd *memFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64
n, readErr := m.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true})
if n > 0 {
if _, err := dst.CopyOut(ctx, buf[:n]); err != nil {
- return 0, syserror.EFAULT
+ return 0, linuxerr.EFAULT
}
return int64(n), nil
}
if readErr != nil {
- return 0, syserror.EIO
+ return 0, linuxerr.EIO
}
return 0, nil
}
@@ -512,7 +512,7 @@ func (fd *memFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, e
// SetStat implements vfs.FileDescriptionImpl.SetStat.
func (fd *memFD) SetStat(context.Context, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// Release implements vfs.FileDescriptionImpl.Release.
@@ -608,12 +608,10 @@ func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.task.StartTime().Sub(s.task.Kernel().Timekeeper().BootTime())))
var vss, rss uint64
- s.task.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- }
- })
+ if mm := getMM(s.task); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
fmt.Fprintf(buf, "%d %d ", vss, rss/hostarch.PageSize)
// rsslim.
@@ -649,63 +647,159 @@ var _ dynamicInode = (*statmData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
var vss, rss uint64
- s.task.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- }
- })
-
+ if mm := getMM(s.task); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize)
return nil
}
-// statusData implements vfs.DynamicBytesSource for /proc/[pid]/status.
+// statusInode implements kernfs.Inode for /proc/[pid]/status.
//
// +stateify savable
-type statusData struct {
- kernfs.DynamicBytesFile
+type statusInode struct {
+ kernfs.InodeAttrs
+ kernfs.InodeNoStatFS
+ kernfs.InodeNoopRefCount
+ kernfs.InodeNotDirectory
+ kernfs.InodeNotSymlink
task *kernel.Task
pidns *kernel.PIDNamespace
+ locks vfs.FileLocks
+}
+
+// statusFD implements vfs.FileDescriptionImpl and vfs.DynamicByteSource for
+// /proc/[pid]/status.
+//
+// +stateify savable
+type statusFD struct {
+ statusFDLowerBase
+ vfs.DynamicBytesFileDescriptionImpl
+ vfs.LockFD
+
+ vfsfd vfs.FileDescription
+
+ inode *statusInode
+ task *kernel.Task
+ pidns *kernel.PIDNamespace
+ userns *auth.UserNamespace // equivalent to struct file::f_cred::user_ns
}
-var _ dynamicInode = (*statusData)(nil)
+// statusFDLowerBase is a dumb hack to ensure that statusFD prefers
+// vfs.DynamicBytesFileDescriptionImpl methods to vfs.FileDescriptinDefaultImpl
+// methods.
+//
+// +stateify savable
+type statusFDLowerBase struct {
+ vfs.FileDescriptionDefaultImpl
+}
+
+func (fs *filesystem) newStatusInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, ino uint64, perm linux.FileMode) kernfs.Inode {
+ // Note: credentials are overridden by taskOwnedInode.
+ inode := &statusInode{
+ task: task,
+ pidns: pidns,
+ }
+ inode.InodeAttrs.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeRegular|perm)
+ return &taskOwnedInode{Inode: inode, owner: task}
+}
+
+// Open implements kernfs.Inode.Open.
+func (s *statusInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ fd := &statusFD{
+ inode: s,
+ task: s.task,
+ pidns: s.pidns,
+ userns: rp.Credentials().UserNamespace,
+ }
+ fd.LockFD.Init(&s.locks)
+ if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
+ fd.SetDataSource(fd)
+ return &fd.vfsfd, nil
+}
+
+// SetStat implements kernfs.Inode.SetStat.
+func (*statusInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
+ return linuxerr.EPERM
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (s *statusFD) Release(ctx context.Context) {
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (s *statusFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+ fs := s.vfsfd.VirtualDentry().Mount().Filesystem()
+ return s.inode.Stat(ctx, fs, opts)
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (s *statusFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+ return linuxerr.EPERM
+}
// Generate implements vfs.DynamicBytesSource.Generate.
-func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+func (s *statusFD) Generate(ctx context.Context, buf *bytes.Buffer) error {
fmt.Fprintf(buf, "Name:\t%s\n", s.task.Name())
fmt.Fprintf(buf, "State:\t%s\n", s.task.StateStatus())
fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.task.ThreadGroup()))
fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.task))
+
ppid := kernel.ThreadID(0)
if parent := s.task.Parent(); parent != nil {
ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
}
fmt.Fprintf(buf, "PPid:\t%d\n", ppid)
+
tpid := kernel.ThreadID(0)
if tracer := s.task.Tracer(); tracer != nil {
tpid = s.pidns.IDOfTask(tracer)
}
fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid)
+
+ creds := s.task.Credentials()
+ ruid := creds.RealKUID.In(s.userns).OrOverflow()
+ euid := creds.EffectiveKUID.In(s.userns).OrOverflow()
+ suid := creds.SavedKUID.In(s.userns).OrOverflow()
+ rgid := creds.RealKGID.In(s.userns).OrOverflow()
+ egid := creds.EffectiveKGID.In(s.userns).OrOverflow()
+ sgid := creds.SavedKGID.In(s.userns).OrOverflow()
var fds int
var vss, rss, data uint64
s.task.WithMuLocked(func(t *kernel.Task) {
if fdTable := t.FDTable(); fdTable != nil {
fds = fdTable.CurrentMaxFDs()
}
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- data = mm.VirtualDataSize()
- }
})
+ if mm := getMM(s.task); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ data = mm.VirtualDataSize()
+ }
+ // Filesystem user/group IDs aren't implemented; effective UID/GID are used
+ // instead.
+ fmt.Fprintf(buf, "Uid:\t%d\t%d\t%d\t%d\n", ruid, euid, suid, euid)
+ fmt.Fprintf(buf, "Gid:\t%d\t%d\t%d\t%d\n", rgid, egid, sgid, egid)
fmt.Fprintf(buf, "FDSize:\t%d\n", fds)
+ buf.WriteString("Groups:\t ")
+ // There is a space between each pair of supplemental GIDs, as well as an
+ // unconditional trailing space that some applications actually depend on.
+ var sep string
+ for _, kgid := range creds.ExtraKGIDs {
+ fmt.Fprintf(buf, "%s%d", sep, kgid.In(s.userns).OrOverflow())
+ sep = " "
+ }
+ buf.WriteString(" \n")
+
fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10)
fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10)
fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10)
+
fmt.Fprintf(buf, "Threads:\t%d\n", s.task.ThreadGroup().Count())
- creds := s.task.Credentials()
fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps)
fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps)
fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps)
@@ -762,7 +856,7 @@ var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error {
if o.task.ExitState() == kernel.TaskExitDead {
- return syserror.ESRCH
+ return linuxerr.ESRCH
}
fmt.Fprintf(buf, "%d\n", o.task.OOMScoreAdj())
return nil
@@ -784,7 +878,7 @@ func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset
}
if o.task.ExitState() == kernel.TaskExitDead {
- return 0, syserror.ESRCH
+ return 0, linuxerr.ESRCH
}
if err := o.task.SetOOMScoreAdj(v); err != nil {
return 0, err
@@ -802,13 +896,17 @@ type exeSymlink struct {
kernfs.InodeNoopRefCount
kernfs.InodeSymlink
+ fs *filesystem
task *kernel.Task
}
var _ kernfs.Inode = (*exeSymlink)(nil)
func (fs *filesystem) newExeSymlink(ctx context.Context, task *kernel.Task, ino uint64) kernfs.Inode {
- inode := &exeSymlink{task: task}
+ inode := &exeSymlink{
+ fs: fs,
+ task: task,
+ }
inode.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
return inode
}
@@ -819,14 +917,14 @@ func (s *exeSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error)
if err != nil {
return "", err
}
- defer exec.DecRef(ctx)
+ defer s.fs.SafeDecRef(ctx, exec)
root := vfs.RootFromContext(ctx)
if !root.Ok() {
// It could have raced with process deletion.
- return "", syserror.ESRCH
+ return "", linuxerr.ESRCH
}
- defer root.DecRef(ctx)
+ defer s.fs.SafeDecRef(ctx, root)
vfsObj := exec.Mount().Filesystem().VirtualFilesystem()
name, _ := vfsObj.PathnameWithDeleted(ctx, root, exec)
@@ -836,31 +934,23 @@ func (s *exeSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error)
// Getlink implements kernfs.Inode.Getlink.
func (s *exeSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) {
if !kernel.ContextCanTrace(ctx, s.task, false) {
- return vfs.VirtualDentry{}, "", syserror.EACCES
+ return vfs.VirtualDentry{}, "", linuxerr.EACCES
}
if err := checkTaskState(s.task); err != nil {
return vfs.VirtualDentry{}, "", err
}
- var err error
- var exec fsbridge.File
- s.task.WithMuLocked(func(t *kernel.Task) {
- mm := t.MemoryManager()
- if mm == nil {
- err = syserror.EACCES
- return
- }
+ mm := getMM(s.task)
+ if mm == nil {
+ return vfs.VirtualDentry{}, "", linuxerr.EACCES
+ }
- // The MemoryManager may be destroyed, in which case
- // MemoryManager.destroy will simply set the executable to nil
- // (with locks held).
- exec = mm.Executable()
- if exec == nil {
- err = syserror.ESRCH
- }
- })
- if err != nil {
- return vfs.VirtualDentry{}, "", err
+ // The MemoryManager may be destroyed, in which case
+ // MemoryManager.destroy will simply set the executable to nil
+ // (with locks held).
+ exec := mm.Executable()
+ if exec == nil {
+ return vfs.VirtualDentry{}, "", linuxerr.ESRCH
}
defer exec.DecRef(ctx)
@@ -878,13 +968,17 @@ type cwdSymlink struct {
kernfs.InodeNoopRefCount
kernfs.InodeSymlink
+ fs *filesystem
task *kernel.Task
}
var _ kernfs.Inode = (*cwdSymlink)(nil)
func (fs *filesystem) newCwdSymlink(ctx context.Context, task *kernel.Task, ino uint64) kernfs.Inode {
- inode := &cwdSymlink{task: task}
+ inode := &cwdSymlink{
+ fs: fs,
+ task: task,
+ }
inode.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
return inode
}
@@ -895,14 +989,14 @@ func (s *cwdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error)
if err != nil {
return "", err
}
- defer cwd.DecRef(ctx)
+ defer s.fs.SafeDecRef(ctx, cwd)
root := vfs.RootFromContext(ctx)
if !root.Ok() {
// It could have raced with process deletion.
- return "", syserror.ESRCH
+ return "", linuxerr.ESRCH
}
- defer root.DecRef(ctx)
+ defer s.fs.SafeDecRef(ctx, root)
vfsObj := cwd.Mount().Filesystem().VirtualFilesystem()
name, _ := vfsObj.PathnameWithDeleted(ctx, root, cwd)
@@ -912,7 +1006,7 @@ func (s *cwdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error)
// Getlink implements kernfs.Inode.Getlink.
func (s *cwdSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDentry, string, error) {
if !kernel.ContextCanTrace(ctx, s.task, false) {
- return vfs.VirtualDentry{}, "", syserror.EACCES
+ return vfs.VirtualDentry{}, "", linuxerr.EACCES
}
if err := checkTaskState(s.task); err != nil {
return vfs.VirtualDentry{}, "", err
@@ -920,8 +1014,9 @@ func (s *cwdSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDent
cwd := s.task.FSContext().WorkingDirectoryVFS2()
if !cwd.Ok() {
// It could have raced with process deletion.
- return vfs.VirtualDentry{}, "", syserror.ESRCH
+ return vfs.VirtualDentry{}, "", linuxerr.ESRCH
}
+ // The reference is transferred to the caller.
return cwd, "", nil
}
@@ -931,6 +1026,7 @@ func (s *cwdSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDent
type mountInfoData struct {
kernfs.DynamicBytesFile
+ fs *filesystem
task *kernel.Task
}
@@ -951,7 +1047,7 @@ func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// Root has been destroyed. Don't try to read mounts.
return nil
}
- defer rootDir.DecRef(ctx)
+ defer i.fs.SafeDecRef(ctx, rootDir)
i.task.Kernel().VFS().GenerateProcMountInfo(ctx, rootDir, buf)
return nil
}
@@ -962,6 +1058,7 @@ func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
type mountsData struct {
kernfs.DynamicBytesFile
+ fs *filesystem
task *kernel.Task
}
@@ -982,7 +1079,7 @@ func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// Root has been destroyed. Don't try to read mounts.
return nil
}
- defer rootDir.DecRef(ctx)
+ defer i.fs.SafeDecRef(ctx, rootDir)
i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf)
return nil
}
@@ -1123,7 +1220,7 @@ func (d *taskCgroupData) Generate(ctx context.Context, buf *bytes.Buffer) error
// exit this file show a task is in no cgroups, which is incorrect. Instead,
// once a task has left its cgroups, we return an error.
if d.task.ExitState() >= kernel.TaskExitInitiated {
- return syserror.ESRCH
+ return linuxerr.ESRCH
}
d.task.GenerateProcTaskCgroup(buf)
diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go
index 177cb828f..ab47ea5a7 100644
--- a/pkg/sentry/fsimpl/proc/task_net.go
+++ b/pkg/sentry/fsimpl/proc/task_net.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
@@ -33,7 +34,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip/header"
)
@@ -679,7 +679,7 @@ func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error {
continue
}
if err := d.stack.Statistics(stat, line.prefix); err != nil {
- if err == syserror.EOPNOTSUPP {
+ if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) {
log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
} else {
log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index cf905fae4..26d44744b 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -21,11 +21,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -116,12 +116,12 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, err
case threadSelfName:
return i.newThreadSelfSymlink(ctx, root), nil
}
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
task := i.pidns.TaskWithID(kernel.ThreadID(tid))
if task == nil {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers)
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
index 045ed7a2d..4d3a2f7e6 100644
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -28,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// +stateify savable
@@ -53,11 +53,11 @@ func (s *selfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error
t := kernel.TaskFromContext(ctx)
if t == nil {
// Who is reading this link?
- return "", syserror.EINVAL
+ return "", linuxerr.EINVAL
}
tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
if tgid == 0 {
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
return strconv.FormatUint(uint64(tgid), 10), nil
}
@@ -69,7 +69,7 @@ func (s *selfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualD
// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// +stateify savable
@@ -94,12 +94,12 @@ func (s *threadSelfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string,
t := kernel.TaskFromContext(ctx)
if t == nil {
// Who is reading this link?
- return "", syserror.EINVAL
+ return "", linuxerr.EINVAL
}
tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
tid := s.pidns.IDOfTask(t)
if tid == 0 || tgid == 0 {
- return "", syserror.ENOENT
+ return "", linuxerr.ENOENT
}
return fmt.Sprintf("%d/task/%d", tgid, tid), nil
}
@@ -111,7 +111,7 @@ func (s *threadSelfSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.Vi
// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
func (*threadSelfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// dynamicBytesFileSetAttr implements a special file that allows inode
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 2bc98a94f..99f64a9d8 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/inet"
@@ -28,7 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -209,7 +209,7 @@ func (d *tcpSackData) Generate(ctx context.Context, buf *bytes.Buffer) error {
func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
if offset != 0 {
// No need to handle partial writes thus far.
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if src.NumBytes() == 0 {
return 0, nil
@@ -257,7 +257,7 @@ func (d *tcpRecoveryData) Generate(ctx context.Context, buf *bytes.Buffer) error
func (d *tcpRecoveryData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
if offset != 0 {
// No need to handle partial writes thus far.
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if src.NumBytes() == 0 {
return 0, nil
@@ -311,7 +311,7 @@ func (d *tcpMemData) Generate(ctx context.Context, buf *bytes.Buffer) error {
func (d *tcpMemData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
if offset != 0 {
// No need to handle partial writes thus far.
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if src.NumBytes() == 0 {
return 0, nil
@@ -396,7 +396,7 @@ func (ipf *ipForwarding) Generate(ctx context.Context, buf *bytes.Buffer) error
func (ipf *ipForwarding) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
if offset != 0 {
// No need to handle partial writes thus far.
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if src.NumBytes() == 0 {
return 0, nil
@@ -449,7 +449,7 @@ func (pr *portRange) Generate(ctx context.Context, buf *bytes.Buffer) error {
func (pr *portRange) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
if offset != 0 {
// No need to handle partial writes thus far.
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if src.NumBytes() == 0 {
return 0, nil
@@ -467,7 +467,7 @@ func (pr *portRange) Write(ctx context.Context, src usermem.IOSequence, offset i
// Port numbers must be uint16s.
if ports[0] < 0 || ports[1] < 0 || ports[0] > math.MaxUint16 || ports[1] > math.MaxUint16 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if err := pr.stack.SetPortRange(uint16(ports[0]), uint16(ports[1])); err != nil {
diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go
index e534fbca8..14f806c3c 100644
--- a/pkg/sentry/fsimpl/proc/tasks_test.go
+++ b/pkg/sentry/fsimpl/proc/tasks_test.go
@@ -23,13 +23,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -227,7 +227,7 @@ func TestTasks(t *testing.T) {
defer fd.DecRef(s.Ctx)
buf := make([]byte, 1)
bufIOSeq := usermem.BytesIOSequence(buf)
- if _, err := fd.Read(s.Ctx, bufIOSeq, vfs.ReadOptions{}); err != syserror.EISDIR {
+ if _, err := fd.Read(s.Ctx, bufIOSeq, vfs.ReadOptions{}); !linuxerr.Equals(linuxerr.EISDIR, err) {
t.Errorf("wrong error reading directory: %v", err)
}
}
@@ -237,7 +237,7 @@ func TestTasks(t *testing.T) {
s.Creds,
s.PathOpAtRoot("/proc/9999"),
&vfs.OpenOptions{},
- ); err != syserror.ENOENT {
+ ); !linuxerr.Equals(linuxerr.ENOENT, err) {
t.Fatalf("wrong error from vfsfs.OpenAt(/proc/9999): %v", err)
}
}
diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go
index e039ec45e..7240563d7 100644
--- a/pkg/sentry/fsimpl/proc/yama.go
+++ b/pkg/sentry/fsimpl/proc/yama.go
@@ -21,11 +21,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -56,7 +56,7 @@ func (s *yamaPtraceScope) Generate(ctx context.Context, buf *bytes.Buffer) error
func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
if offset != 0 {
// Ignore partial writes.
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if src.NumBytes() == 0 {
return 0, nil
@@ -73,7 +73,7 @@ func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, off
// We do not support YAMA levels > YAMA_SCOPE_RELATIONAL.
if v < linux.YAMA_SCOPE_DISABLED || v > linux.YAMA_SCOPE_RELATIONAL {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
atomic.StoreInt32(s.level, v)
diff --git a/pkg/sentry/fsimpl/signalfd/BUILD b/pkg/sentry/fsimpl/signalfd/BUILD
index adb610213..403c6f254 100644
--- a/pkg/sentry/fsimpl/signalfd/BUILD
+++ b/pkg/sentry/fsimpl/signalfd/BUILD
@@ -9,10 +9,10 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/sentry/kernel",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/fsimpl/signalfd/signalfd.go b/pkg/sentry/fsimpl/signalfd/signalfd.go
index a7f5928b7..bdb03ef96 100644
--- a/pkg/sentry/fsimpl/signalfd/signalfd.go
+++ b/pkg/sentry/fsimpl/signalfd/signalfd.go
@@ -18,10 +18,10 @@ package signalfd
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -91,7 +91,7 @@ func (sfd *SignalFileDescription) Read(ctx context.Context, dst usermem.IOSequen
info, err := sfd.target.Sigtimedwait(sfd.Mask(), 0)
if err != nil {
// There must be no signal available.
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// Copy out the signal info using the specified format.
diff --git a/pkg/sentry/fsimpl/sockfs/BUILD b/pkg/sentry/fsimpl/sockfs/BUILD
index 9453277b8..9defca936 100644
--- a/pkg/sentry/fsimpl/sockfs/BUILD
+++ b/pkg/sentry/fsimpl/sockfs/BUILD
@@ -9,10 +9,10 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
],
)
diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go
index 735756280..75934ecd0 100644
--- a/pkg/sentry/fsimpl/sockfs/sockfs.go
+++ b/pkg/sentry/fsimpl/sockfs/sockfs.go
@@ -20,11 +20,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// filesystemType implements vfs.FilesystemType.
@@ -102,7 +102,7 @@ type inode struct {
// Open implements kernfs.Inode.Open.
func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- return nil, syserror.ENXIO
+ return nil, linuxerr.ENXIO
}
// StatFS implements kernfs.Inode.StatFS.
diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD
index 09043b572..ab21f028e 100644
--- a/pkg/sentry/fsimpl/sys/BUILD
+++ b/pkg/sentry/fsimpl/sys/BUILD
@@ -26,6 +26,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/context",
"//pkg/coverage",
+ "//pkg/errors/linuxerr",
"//pkg/log",
"//pkg/refs",
"//pkg/refsvfs2",
@@ -35,7 +36,6 @@ go_library(
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go
index b13f141a8..51f0bf3d8 100644
--- a/pkg/sentry/fsimpl/sys/kcov.go
+++ b/pkg/sentry/fsimpl/sys/kcov.go
@@ -17,13 +17,13 @@ package sys
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -85,11 +85,11 @@ func (fd *kcovFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallAr
case linux.KCOV_DISABLE:
if arg != 0 {
// This arg is unused; it should be 0.
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
return 0, fd.kcov.DisableTrace(ctx)
default:
- return 0, syserror.ENOTTY
+ return 0, linuxerr.ENOTTY
}
}
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index 14eb10dcd..f322d2747 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -23,12 +23,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/coverage"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
const (
@@ -74,7 +74,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
maxCachedDentries, err = strconv.ParseUint(str, 10, 64)
if err != nil {
ctx.Warningf("sys.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
}
@@ -174,7 +174,7 @@ func (fs *filesystem) newDir(ctx context.Context, creds *auth.Credentials, mode
// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
+ return linuxerr.EPERM
}
// Open implements kernfs.Inode.Open.
diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go
index 97aa20cd1..473b41cff 100644
--- a/pkg/sentry/fsimpl/testutil/kernel.go
+++ b/pkg/sentry/fsimpl/testutil/kernel.go
@@ -80,12 +80,8 @@ func Boot() (*kernel.Kernel, error) {
}
// Create timekeeper.
- tk, err := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange())
- if err != nil {
- return nil, fmt.Errorf("creating timekeeper: %v", err)
- }
+ tk := kernel.NewTimekeeper(k, vdso.ParamPage.FileRange())
tk.SetClocks(time.NewCalibratedClocks())
- k.SetTimekeeper(tk)
creds := auth.NewRootCredentials(auth.NewRootUserNamespace())
@@ -94,6 +90,7 @@ func Boot() (*kernel.Kernel, error) {
if err = k.Init(kernel.InitKernelArgs{
ApplicationCores: uint(runtime.GOMAXPROCS(-1)),
FeatureSet: cpuid.HostFeatureSet(),
+ Timekeeper: tk,
RootUserNamespace: creds.UserNamespace,
Vdso: vdso,
RootUTSNamespace: kernel.NewUTSNamespace("hostname", "domain", creds.UserNamespace),
diff --git a/pkg/sentry/fsimpl/timerfd/BUILD b/pkg/sentry/fsimpl/timerfd/BUILD
index 7ce7dc429..2b83d7d9a 100644
--- a/pkg/sentry/fsimpl/timerfd/BUILD
+++ b/pkg/sentry/fsimpl/timerfd/BUILD
@@ -8,10 +8,10 @@ go_library(
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/hostarch",
"//pkg/sentry/kernel/time",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
"//pkg/waiter",
],
diff --git a/pkg/sentry/fsimpl/timerfd/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go
index cbb8b67c5..68b785791 100644
--- a/pkg/sentry/fsimpl/timerfd/timerfd.go
+++ b/pkg/sentry/fsimpl/timerfd/timerfd.go
@@ -19,10 +19,10 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -69,7 +69,7 @@ func New(ctx context.Context, vfsObj *vfs.VirtualFilesystem, clock ktime.Clock,
func (tfd *TimerFileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
const sizeofUint64 = 8
if dst.NumBytes() < sizeofUint64 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if val := atomic.SwapUint64(&tfd.val, 0); val != 0 {
var buf [sizeofUint64]byte
@@ -81,7 +81,7 @@ func (tfd *TimerFileDescription) Read(ctx context.Context, dst usermem.IOSequenc
}
return sizeofUint64, nil
}
- return 0, syserror.ErrWouldBlock
+ return 0, linuxerr.ErrWouldBlock
}
// Clock returns the timer fd's Clock.
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index e21fddd7f..94486bb63 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -58,6 +58,7 @@ go_library(
"//pkg/abi/linux",
"//pkg/amutex",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/hostarch",
"//pkg/log",
@@ -81,7 +82,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sentry/vfs/memxattr",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
@@ -94,6 +94,7 @@ go_test(
":tmpfs",
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/refs",
"//pkg/sentry/contexttest",
@@ -101,7 +102,6 @@ go_test(
"//pkg/sentry/fs/tmpfs",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
],
)
@@ -118,12 +118,12 @@ go_test(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/sentry/contexttest",
"//pkg/sentry/fs/lock",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
index 3cc63e732..2c29343c1 100644
--- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
@@ -30,7 +31,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Differences from stat_benchmark:
@@ -68,7 +68,7 @@ func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent
rel = wd
} else {
// Need to extract the given FD.
- return syserror.EBADF
+ return linuxerr.EBADF
}
// Lookup the node.
@@ -146,7 +146,7 @@ func BenchmarkVFS1TmpfsStat(b *testing.B) {
for i := 0; i < b.N; i++ {
err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
if dirPath && !fs.IsDir(d.Inode.StableAttr) {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
uattr, err := d.Inode.UnstableAttr(ctx)
if err != nil {
@@ -341,7 +341,7 @@ func BenchmarkVFS1TmpfsMountStat(b *testing.B) {
for i := 0; i < b.N; i++ {
err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
if dirPath && !fs.IsDir(d.Inode.StableAttr) {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
uattr, err := d.Inode.UnstableAttr(ctx)
if err != nil {
diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go
index e8d256495..c25494c0b 100644
--- a/pkg/sentry/fsimpl/tmpfs/directory.go
+++ b/pkg/sentry/fsimpl/tmpfs/directory.go
@@ -19,10 +19,10 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// +stateify savable
@@ -196,10 +196,10 @@ func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (in
case linux.SEEK_CUR:
offset += fd.off
default:
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index f0f4297ef..e067f136e 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -20,12 +20,12 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/fsmetric"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Sync implements vfs.FilesystemImpl.Sync.
@@ -45,7 +45,7 @@ func (fs *filesystem) Sync(ctx context.Context) error {
func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
dir, ok := d.inode.impl.(*directory)
if !ok {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
@@ -70,11 +70,11 @@ afterSymlink:
return d.parent, nil
}
if len(name) > linux.NAME_MAX {
- return nil, syserror.ENAMETOOLONG
+ return nil, linuxerr.ENAMETOOLONG
}
child, ok := dir.childMap[name]
if !ok {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
return nil, err
@@ -112,7 +112,7 @@ func walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry)
}
dir, ok := d.inode.impl.(*directory)
if !ok {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return dir, nil
}
@@ -132,7 +132,7 @@ func resolveLocked(ctx context.Context, rp *vfs.ResolvingPath) (*dentry, error)
d = next
}
if rp.MustBeDir() && !d.inode.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return d, nil
}
@@ -161,21 +161,21 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
}
name := rp.Component()
if name == "." || name == ".." {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
if len(name) > linux.NAME_MAX {
- return syserror.ENAMETOOLONG
+ return linuxerr.ENAMETOOLONG
}
if _, ok := parentDir.childMap[name]; ok {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
if !dir && rp.MustBeDir() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// tmpfs never calls VFS.InvalidateDentry(), so parentDir.dentry can only
// be dead if it was deleted.
if parentDir.dentry.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
mnt := rp.Mount()
if err := mnt.CheckBeginWrite(); err != nil {
@@ -220,7 +220,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
}
if opts.CheckSearchable {
if !d.inode.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
@@ -246,21 +246,21 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa
func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
return fs.doCreateAt(ctx, rp, false /* dir */, func(parentDir *directory, name string) error {
if rp.Mount() != vd.Mount() {
- return syserror.EXDEV
+ return linuxerr.EXDEV
}
d := vd.Dentry().Impl().(*dentry)
i := d.inode
if i.isDir() {
- return syserror.EPERM
+ return linuxerr.EPERM
}
if err := vfs.MayLink(auth.CredentialsFromContext(ctx), linux.FileMode(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil {
return err
}
if i.nlink == 0 {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if i.nlink == maxLinks {
- return syserror.EMLINK
+ return linuxerr.EMLINK
}
i.incLinksLocked()
i.watches.Notify(ctx, "", linux.IN_ATTRIB, 0, vfs.InodeEvent, false /* unlinked */)
@@ -274,7 +274,7 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return fs.doCreateAt(ctx, rp, true /* dir */, func(parentDir *directory, name string) error {
creds := rp.Credentials()
if parentDir.inode.nlink == maxLinks {
- return syserror.EMLINK
+ return linuxerr.EMLINK
}
parentDir.inode.incLinksLocked() // from child's ".."
childDir := fs.newDirectory(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, parentDir)
@@ -300,7 +300,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
case linux.S_IFSOCK:
childInode = fs.newSocketFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode, opts.Endpoint, parentDir)
default:
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
child := fs.newDentry(childInode)
parentDir.insertChildLocked(child, name)
@@ -312,7 +312,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
if opts.Flags&linux.O_TMPFILE != 0 {
// Not yet supported.
- return nil, syserror.EOPNOTSUPP
+ return nil, linuxerr.EOPNOTSUPP
}
// Handle O_CREAT and !O_CREAT separately, since in the latter case we
@@ -344,10 +344,10 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if rp.Done() {
// Reject attempts to open mount root directory with O_CREAT.
if rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if mustCreate {
- return nil, syserror.EEXIST
+ return nil, linuxerr.EEXIST
}
start.IncRef()
defer start.DecRef(ctx)
@@ -365,14 +365,14 @@ afterTrailingSymlink:
}
// Reject attempts to open directories with O_CREAT.
if rp.MustBeDir() {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
name := rp.Component()
if name == "." || name == ".." {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
if len(name) > linux.NAME_MAX {
- return nil, syserror.ENAMETOOLONG
+ return nil, linuxerr.ENAMETOOLONG
}
// Determine whether or not we need to create a file.
child, ok := parentDir.childMap[name]
@@ -401,7 +401,7 @@ afterTrailingSymlink:
return fd, nil
}
if mustCreate {
- return nil, syserror.EEXIST
+ return nil, linuxerr.EEXIST
}
// Is the file mounted over?
if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
@@ -418,7 +418,7 @@ afterTrailingSymlink:
goto afterTrailingSymlink
}
if rp.MustBeDir() && !child.inode.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
child.IncRef()
defer child.DecRef(ctx)
@@ -456,7 +456,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
case *directory:
// Can't open directories writably.
if ats&vfs.MayWrite != 0 {
- return nil, syserror.EISDIR
+ return nil, linuxerr.EISDIR
}
var fd directoryFD
fd.LockFD.Init(&d.inode.locks)
@@ -466,13 +466,13 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
return &fd.vfsfd, nil
case *symlink:
// Can't open symlinks without O_PATH, which is handled at the VFS layer.
- return nil, syserror.ELOOP
+ return nil, linuxerr.ELOOP
case *namedPipe:
return impl.pipe.Open(ctx, rp.Mount(), &d.vfsd, opts.Flags, &d.inode.locks)
case *deviceFile:
return rp.VirtualFilesystem().OpenDeviceSpecialFile(ctx, rp.Mount(), &d.vfsd, impl.kind, impl.major, impl.minor, opts)
case *socketFile:
- return nil, syserror.ENXIO
+ return nil, linuxerr.ENXIO
default:
panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl))
}
@@ -488,7 +488,7 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
}
symlink, ok := d.inode.impl.(*symlink)
if !ok {
- return "", syserror.EINVAL
+ return "", linuxerr.EINVAL
}
symlink.inode.touchAtime(rp.Mount())
return symlink.target, nil
@@ -506,19 +506,19 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
// TODO(b/145974740): Support other renameat2 flags.
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
newName := rp.Component()
if newName == "." || newName == ".." {
if opts.Flags&linux.RENAME_NOREPLACE != 0 {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
- return syserror.EBUSY
+ return linuxerr.EBUSY
}
mnt := rp.Mount()
if mnt != oldParentVD.Mount() {
- return syserror.EXDEV
+ return linuxerr.EXDEV
}
if err := mnt.CheckBeginWrite(); err != nil {
return err
@@ -531,7 +531,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
renamed, ok := oldParentDir.childMap[oldName]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := oldParentDir.mayDelete(rp.Credentials(), renamed); err != nil {
return err
@@ -541,7 +541,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
// mounted filesystem.
if renamed.inode.isDir() {
if renamed == &newParentDir.dentry || genericIsAncestorDentry(renamed, &newParentDir.dentry) {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
if oldParentDir != newParentDir {
// Writability is needed to change renamed's "..".
@@ -551,7 +551,7 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
}
} else {
if opts.MustBeDir || rp.MustBeDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
}
@@ -561,33 +561,33 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
replaced, ok := newParentDir.childMap[newName]
if ok {
if opts.Flags&linux.RENAME_NOREPLACE != 0 {
- return syserror.EEXIST
+ return linuxerr.EEXIST
}
replacedDir, ok := replaced.inode.impl.(*directory)
if ok {
if !renamed.inode.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if len(replacedDir.childMap) != 0 {
- return syserror.ENOTEMPTY
+ return linuxerr.ENOTEMPTY
}
} else {
if rp.MustBeDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
if renamed.inode.isDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
}
} else {
if renamed.inode.isDir() && newParentDir.inode.nlink == maxLinks {
- return syserror.EMLINK
+ return linuxerr.EMLINK
}
}
// tmpfs never calls VFS.InvalidateDentry(), so newParentDir.dentry can
// only be dead if it was deleted.
if newParentDir.dentry.vfsd.IsDead() {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
// Linux places this check before some of those above; we do it here for
@@ -646,24 +646,24 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
}
name := rp.Component()
if name == "." {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
if name == ".." {
- return syserror.ENOTEMPTY
+ return linuxerr.ENOTEMPTY
}
child, ok := parentDir.childMap[name]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parentDir.mayDelete(rp.Credentials(), child); err != nil {
return err
}
childDir, ok := child.inode.impl.(*directory)
if !ok {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
if len(childDir.childMap) != 0 {
- return syserror.ENOTEMPTY
+ return linuxerr.ENOTEMPTY
}
mnt := rp.Mount()
if err := mnt.CheckBeginWrite(); err != nil {
@@ -753,20 +753,20 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
}
name := rp.Component()
if name == "." || name == ".." {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
child, ok := parentDir.childMap[name]
if !ok {
- return syserror.ENOENT
+ return linuxerr.ENOENT
}
if err := parentDir.mayDelete(rp.Credentials(), child); err != nil {
return err
}
if child.inode.isDir() {
- return syserror.EISDIR
+ return linuxerr.EISDIR
}
if rp.MustBeDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
mnt := rp.Mount()
if err := mnt.CheckBeginWrite(); err != nil {
@@ -806,11 +806,11 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
switch impl := d.inode.impl.(type) {
case *socketFile:
if impl.ep == nil {
- return nil, syserror.ECONNREFUSED
+ return nil, linuxerr.ECONNREFUSED
}
return impl.ep, nil
default:
- return nil, syserror.ECONNREFUSED
+ return nil, linuxerr.ECONNREFUSED
}
}
diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
index 2f856ce36..99afd9817 100644
--- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
@@ -20,11 +20,11 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -114,7 +114,7 @@ func TestNonblockingWriteError(t *testing.T) {
}
openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK}
_, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
- if err != syserror.ENXIO {
+ if !linuxerr.Equals(linuxerr.ENXIO, err) {
t.Fatalf("expected ENXIO, but got error: %v", err)
}
}
@@ -201,7 +201,7 @@ func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) {
readData := make([]byte, 1)
dst := usermem.BytesIOSequence(readData)
bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{})
- if err != syserror.ErrWouldBlock {
+ if err != linuxerr.ErrWouldBlock {
t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err)
}
if bytesRead != 0 {
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index c45bddff6..0f2ac6144 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -22,6 +22,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -33,7 +34,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -185,7 +185,7 @@ func (rf *regularFile) truncateLocked(newSize uint64) (bool, error) {
// Can we grow the file?
if rf.seals&linux.F_SEAL_GROW != 0 {
rf.dataMu.Unlock()
- return false, syserror.EPERM
+ return false, linuxerr.EPERM
}
// We only need to update the file size.
atomic.StoreUint64(&rf.size, newSize)
@@ -196,7 +196,7 @@ func (rf *regularFile) truncateLocked(newSize uint64) (bool, error) {
// We are shrinking the file. First check if this is allowed.
if rf.seals&linux.F_SEAL_SHRINK != 0 {
rf.dataMu.Unlock()
- return false, syserror.EPERM
+ return false, linuxerr.EPERM
}
// Update the file size.
@@ -233,7 +233,7 @@ func (rf *regularFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, a
// Reject writable mapping if F_SEAL_WRITE is set.
if rf.seals&linux.F_SEAL_WRITE != 0 && writable {
- return syserror.EPERM
+ return linuxerr.EPERM
}
rf.mappings.AddMapping(ms, ar, offset, writable)
@@ -366,7 +366,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
fsmetric.TmpfsReads.Increment()
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
// Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since
@@ -374,7 +374,7 @@ func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, linuxerr.EOPNOTSUPP
}
if dst.NumBytes() == 0 {
@@ -407,7 +407,7 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
// final offset should be ignored by PWrite.
func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
if offset < 0 {
- return 0, offset, syserror.EINVAL
+ return 0, offset, linuxerr.EINVAL
}
// Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since
@@ -415,7 +415,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 {
- return 0, offset, syserror.EOPNOTSUPP
+ return 0, offset, linuxerr.EOPNOTSUPP
}
srclen := src.NumBytes()
@@ -432,7 +432,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
}
if end := offset + srclen; end < offset {
// Overflow.
- return 0, offset, syserror.EINVAL
+ return 0, offset, linuxerr.EINVAL
}
srclen, err = vfs.CheckLimit(ctx, offset, srclen)
@@ -476,10 +476,10 @@ func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (
case linux.SEEK_END:
offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size))
default:
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
fd.off = offset
return offset, nil
@@ -594,7 +594,7 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64,
// Check if seals prevent either file growth or all writes.
switch {
case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed
- return 0, syserror.EPERM
+ return 0, linuxerr.EPERM
case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed
// When growth is sealed, Linux effectively allows writes which would
// normally grow the file to partially succeed up to the current EOF,
@@ -615,7 +615,7 @@ func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64,
}
if end <= rw.off {
// Truncation would result in no data being written.
- return 0, syserror.EPERM
+ return 0, linuxerr.EPERM
}
}
@@ -684,7 +684,7 @@ exitLoop:
func GetSeals(fd *vfs.FileDescription) (uint32, error) {
f, ok := fd.Impl().(*regularFileFD)
if !ok {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
rf := f.inode().impl.(*regularFile)
rf.dataMu.RLock()
@@ -696,7 +696,7 @@ func GetSeals(fd *vfs.FileDescription) (uint32, error) {
func AddSeals(fd *vfs.FileDescription, val uint32) error {
f, ok := fd.Impl().(*regularFileFD)
if !ok {
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
rf := f.inode().impl.(*regularFile)
rf.mapsMu.Lock()
@@ -706,13 +706,13 @@ func AddSeals(fd *vfs.FileDescription, val uint32) error {
if rf.seals&linux.F_SEAL_SEAL != 0 {
// Seal applied which prevents addition of any new seals.
- return syserror.EPERM
+ return linuxerr.EPERM
}
// F_SEAL_WRITE can only be added if there are no active writable maps.
if rf.seals&linux.F_SEAL_WRITE == 0 && val&linux.F_SEAL_WRITE != 0 {
if rf.writableMappingPages > 0 {
- return syserror.EBUSY
+ return linuxerr.EBUSY
}
}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
index 4393cc13b..cb7711b39 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
@@ -21,10 +21,10 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -146,7 +146,7 @@ func TestLocks(t *testing.T) {
if err := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, nil); err != nil {
t.Fatalf("fd.Impl().LockBSD failed: err = %v", err)
}
- if got, want := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil), syserror.ErrWouldBlock; got != want {
+ if got, want := fd.Impl().LockBSD(ctx, uid2, 0 /* ownerPID */, lock.WriteLock, nil), linuxerr.ErrWouldBlock; got != want {
t.Fatalf("fd.Impl().LockBSD failed: got = %v, want = %v", got, want)
}
if err := fd.Impl().UnlockBSD(ctx, uid1); err != nil {
@@ -165,7 +165,7 @@ func TestLocks(t *testing.T) {
if err := fd.Impl().LockPOSIX(ctx, uid1, 0 /* ownerPID */, lock.WriteLock, lock.LockRange{Start: 0, End: 1}, nil); err != nil {
t.Fatalf("fd.Impl().LockPOSIX failed: err = %v", err)
}
- if got, want := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil), syserror.ErrWouldBlock; got != want {
+ if got, want := fd.Impl().LockPOSIX(ctx, uid2, 0 /* ownerPID */, lock.ReadLock, lock.LockRange{Start: 0, End: 1}, nil), linuxerr.ErrWouldBlock; got != want {
t.Fatalf("fd.Impl().LockPOSIX failed: got = %v, want = %v", got, want)
}
if err := fd.Impl().UnlockPOSIX(ctx, uid1, lock.LockRange{Start: 0, End: 1}); err != nil {
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 6b4367c42..feafb06e4 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -36,6 +36,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/time"
@@ -43,7 +44,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sentry/vfs/memxattr"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Name is the default filesystem name.
@@ -138,7 +138,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
mode, err := strconv.ParseUint(modeStr, 8, 32)
if err != nil {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid mode: %q", modeStr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
rootMode = linux.FileMode(mode & 07777)
}
@@ -149,12 +149,12 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
uid, err := strconv.ParseUint(uidStr, 10, 32)
if err != nil {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid uid: %q", uidStr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
kuid := creds.UserNamespace.MapToKUID(auth.UID(uid))
if !kuid.Ok() {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unmapped uid: %d", uid)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
rootKUID = kuid
}
@@ -165,18 +165,18 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
gid, err := strconv.ParseUint(gidStr, 10, 32)
if err != nil {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: invalid gid: %q", gidStr)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
kgid := creds.UserNamespace.MapToKGID(auth.GID(gid))
if !kgid.Ok() {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unmapped gid: %d", gid)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
rootKGID = kgid
}
if len(mopts) != 0 {
ctx.Warningf("tmpfs.FilesystemType.GetFilesystem: unknown options: %v", mopts)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
devMinor, err := vfsObj.GetAnonBlockDevMinor()
@@ -396,8 +396,8 @@ func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth
}
// Inherit the group and setgid bit as in fs/inode.c:inode_init_owner().
- if parentDir != nil && parentDir.inode.mode&linux.S_ISGID == linux.S_ISGID {
- kgid = auth.KGID(parentDir.inode.gid)
+ if parentDir != nil && atomic.LoadUint32(&parentDir.inode.mode)&linux.S_ISGID == linux.S_ISGID {
+ kgid = auth.KGID(atomic.LoadUint32(&parentDir.inode.gid))
if mode&linux.S_IFDIR == linux.S_IFDIR {
mode |= linux.S_ISGID
}
@@ -527,7 +527,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs.
return nil
}
if stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME|linux.STATX_SIZE) != 0 {
- return syserror.EPERM
+ return linuxerr.EPERM
}
mode := linux.FileMode(atomic.LoadUint32(&i.mode))
if err := vfs.CheckSetStat(ctx, creds, opts, mode, auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil {
@@ -555,9 +555,9 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs.
needsCtimeBump = true
}
case *directory:
- return syserror.EISDIR
+ return linuxerr.EISDIR
default:
- return syserror.EINVAL
+ return linuxerr.EINVAL
}
}
if mask&linux.STATX_UID != 0 {
@@ -730,7 +730,7 @@ func checkXattrName(name string) error {
if strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
return nil
}
- return syserror.EOPNOTSUPP
+ return linuxerr.EOPNOTSUPP
}
func (i *inode) listXattr(creds *auth.Credentials, size uint64) ([]string, error) {
diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD
index d473a922d..c12abdf33 100644
--- a/pkg/sentry/fsimpl/verity/BUILD
+++ b/pkg/sentry/fsimpl/verity/BUILD
@@ -1,10 +1,24 @@
load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
licenses(["notice"])
+go_template_instance(
+ name = "dentry_list",
+ out = "dentry_list.go",
+ package = "verity",
+ prefix = "dentry",
+ template = "//pkg/ilist:generic_list",
+ types = {
+ "Element": "*dentry",
+ "Linker": "*dentry",
+ },
+)
+
go_library(
name = "verity",
srcs = [
+ "dentry_list.go",
"filesystem.go",
"save_restore.go",
"verity.go",
@@ -13,6 +27,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/hostarch",
"//pkg/marshal/primitive",
@@ -27,7 +42,6 @@ go_library(
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/vfs",
"//pkg/sync",
- "//pkg/syserror",
"//pkg/usermem",
],
)
@@ -41,6 +55,7 @@ go_test(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/errors/linuxerr",
"//pkg/fspath",
"//pkg/sentry/arch",
"//pkg/sentry/fsimpl/testutil",
@@ -48,7 +63,6 @@ go_test(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "//pkg/syserror",
"//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
index 3582d14c9..52d47994d 100644
--- a/pkg/sentry/fsimpl/verity/filesystem.go
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -25,13 +25,13 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/merkletree"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -66,38 +66,23 @@ func putDentrySlice(ds *[]*dentry) {
dentrySlicePool.Put(ds)
}
-// renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls
-// dentry.checkDropLocked on all dentries in *ds with fs.renameMu locked for
+// renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls
+// dentry.checkCachingLocked on all dentries in *ds with fs.renameMu locked for
// writing.
//
// ds is a pointer-to-pointer since defer evaluates its arguments immediately,
// but dentry slices are allocated lazily, and it's much easier to say "defer
-// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
-// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
-func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
+// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
+// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
+// +checklocksrelease:fs.renameMu
+func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
fs.renameMu.RUnlock()
if *ds == nil {
return
}
- if len(**ds) != 0 {
- fs.renameMu.Lock()
- for _, d := range **ds {
- d.checkDropLocked(ctx)
- }
- fs.renameMu.Unlock()
- }
- putDentrySlice(*ds)
-}
-
-func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
- if *ds == nil {
- fs.renameMu.Unlock()
- return
- }
for _, d := range **ds {
- d.checkDropLocked(ctx)
+ d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
}
- fs.renameMu.Unlock()
putDentrySlice(*ds)
}
@@ -113,7 +98,7 @@ func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*de
// * !rp.Done().
func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, error) {
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
@@ -163,7 +148,7 @@ afterSymlink:
// verifyChildLocked verifies the hash of child against the already verified
// hash of the parent to ensure the child is expected. verifyChild triggers a
// sentry panic if unexpected modifications to the file system are detected. In
-// ErrorOnViolation mode it returns a syserror instead.
+// ErrorOnViolation mode it returns a linuxerr instead.
//
// Preconditions:
// * fs.renameMu must be locked.
@@ -195,7 +180,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi
// The Merkle tree file for the child should have been created and
// contains the expected xattrs. If the file or the xattr does not
// exist, it indicates unexpected modifications to the file system.
- if err == syserror.ENOENT || err == syserror.ENODATA {
+ if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) {
return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleOffsetInParentXattr, childPath, err))
}
if err != nil {
@@ -218,7 +203,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi
// The parent Merkle tree file should have been created. If it's
// missing, it indicates an unexpected modification to the file system.
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to open parent Merkle file for %s: %v", childPath, err))
}
if err != nil {
@@ -238,7 +223,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi
// The Merkle tree file for the child should have been created and
// contains the expected xattrs. If the file or the xattr does not
// exist, it indicates unexpected modifications to the file system.
- if err == syserror.ENOENT || err == syserror.ENODATA {
+ if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) {
return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleSizeXattr, childPath, err))
}
if err != nil {
@@ -261,7 +246,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi
Root: parent.lowerVD,
Start: parent.lowerVD,
}, &vfs.StatOptions{})
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
return nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get parent stat for %s: %v", childPath, err))
}
if err != nil {
@@ -282,7 +267,7 @@ func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, chi
Mode: uint32(parentStat.Mode),
UID: parentStat.UID,
GID: parentStat.GID,
- Children: parent.childrenNames,
+ Children: parent.childrenList,
HashAlgorithms: fs.alg.toLinuxHashAlg(),
ReadOffset: int64(offset),
ReadSize: int64(merkletree.DigestSize(fs.alg.toLinuxHashAlg())),
@@ -327,7 +312,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry
}, &vfs.OpenOptions{
Flags: linux.O_RDONLY,
})
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
return fs.alertIntegrityViolation(fmt.Sprintf("Failed to open merkle file for %s: %v", childPath, err))
}
if err != nil {
@@ -341,7 +326,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry
Size: sizeOfStringInt32,
})
- if err == syserror.ENODATA {
+ if linuxerr.Equals(linuxerr.ENODATA, err) {
return fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", merkleSizeXattr, childPath, err))
}
if err != nil {
@@ -359,7 +344,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry
Size: sizeOfStringInt32,
})
- if err == syserror.ENODATA {
+ if linuxerr.Equals(linuxerr.ENODATA, err) {
return fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", childrenOffsetXattr, childPath, err))
}
if err != nil {
@@ -375,7 +360,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry
Size: sizeOfStringInt32,
})
- if err == syserror.ENODATA {
+ if linuxerr.Equals(linuxerr.ENODATA, err) {
return fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", childrenSizeXattr, childPath, err))
}
if err != nil {
@@ -403,6 +388,9 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry
var buf bytes.Buffer
d.hashMu.RLock()
+
+ d.generateChildrenList()
+
params := &merkletree.VerifyParams{
Out: &buf,
Tree: &fdReader,
@@ -411,7 +399,7 @@ func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry
Mode: uint32(stat.Mode),
UID: stat.UID,
GID: stat.GID,
- Children: d.childrenNames,
+ Children: d.childrenList,
HashAlgorithms: fs.alg.toLinuxHashAlg(),
ReadOffset: 0,
// Set read size to 0 so only the metadata is verified.
@@ -465,7 +453,7 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s
}
childVD, err := parent.getLowerAt(ctx, vfsObj, name)
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
// The file was previously accessed. If the
// file does not exist now, it indicates an
// unexpected modification to the file system.
@@ -480,7 +468,7 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s
// The Merkle tree file was previous accessed. If it
// does not exist now, it indicates an unexpected
// modification to the file system.
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
return nil, fs.alertIntegrityViolation(fmt.Sprintf("Expected Merkle file for target %s but none found", path))
}
if err != nil {
@@ -541,7 +529,7 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry,
if parent.verityEnabled() {
if _, ok := parent.childrenNames[name]; !ok {
- return nil, syserror.ENOENT
+ return nil, linuxerr.ENOENT
}
}
@@ -551,7 +539,7 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry,
}
childVD, err := parent.getLowerAt(ctx, vfsObj, name)
- if parent.verityEnabled() && err == syserror.ENOENT {
+ if parent.verityEnabled() && linuxerr.Equals(linuxerr.ENOENT, err) {
return nil, fs.alertIntegrityViolation(fmt.Sprintf("file %s expected but not found", parentPath+"/"+name))
}
if err != nil {
@@ -564,7 +552,7 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry,
childMerkleVD, err := parent.getLowerAt(ctx, vfsObj, merklePrefix+name)
if err != nil {
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
if parent.verityEnabled() {
return nil, fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", parentPath+"/"+name))
}
@@ -589,23 +577,6 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry,
}
}
- // Clear the Merkle tree file if they are to be generated at runtime.
- // TODO(b/182315468): Optimize the Merkle tree generate process to
- // allow only updating certain files/directories.
- if fs.allowRuntimeEnable {
- childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
- Root: childMerkleVD,
- Start: childMerkleVD,
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_TRUNC,
- Mode: 0644,
- })
- if err != nil {
- return nil, err
- }
- childMerkleFD.DecRef(ctx)
- }
-
// The dentry needs to be cleaned up if any error occurs. IncRef will be
// called if a verity child dentry is successfully created.
defer childMerkleVD.DecRef(ctx)
@@ -679,7 +650,7 @@ func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving
d = next
}
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return d, nil
}
@@ -699,7 +670,7 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath,
d = next
}
if rp.MustBeDir() && !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
return d, nil
}
@@ -708,11 +679,11 @@ func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath,
func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
// Verity file system is read-only.
if ats&vfs.MayWrite != 0 {
- return syserror.EROFS
+ return linuxerr.EROFS
}
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return err
@@ -724,14 +695,14 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds
func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return nil, err
}
if opts.CheckSearchable {
if !d.isDir() {
- return nil, syserror.ENOTDIR
+ return nil, linuxerr.ENOTDIR
}
if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
@@ -745,7 +716,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
start := rp.Start().Impl().(*dentry)
d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
if err != nil {
@@ -758,31 +729,31 @@ func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPa
// LinkAt implements vfs.FilesystemImpl.LinkAt.
func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// MknodAt implements vfs.FilesystemImpl.MknodAt.
func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// OpenAt implements vfs.FilesystemImpl.OpenAt.
func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
// Verity fs is read-only.
if opts.Flags&(linux.O_WRONLY|linux.O_CREAT) != 0 {
- return nil, syserror.EROFS
+ return nil, linuxerr.EROFS
}
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
start := rp.Start().Impl().(*dentry)
if rp.Done() {
@@ -826,7 +797,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
// Users should not open the Merkle tree files. Those are for verity fs
// use only.
if strings.Contains(d.name, merklePrefix) {
- return nil, syserror.EPERM
+ return nil, linuxerr.EPERM
}
ats := vfs.AccessTypesForOpenFlags(opts)
if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
@@ -835,7 +806,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
// Verity fs is read-only.
if ats&vfs.MayWrite != 0 {
- return nil, syserror.EROFS
+ return nil, linuxerr.EROFS
}
// Get the path to the target file. This is only used to provide path
@@ -845,16 +816,23 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
return nil, err
}
+ tmpOpts := *opts
+
+ // Open the lowerFD with O_PATH if a symlink is opened for verity.
+ if tmpOpts.Flags&linux.O_NOFOLLOW != 0 && d.isSymlink() {
+ tmpOpts.Flags |= linux.O_PATH
+ }
+
// Open the file in the underlying file system.
lowerFD, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
Root: d.lowerVD,
Start: d.lowerVD,
- }, opts)
+ }, &tmpOpts)
// The file should exist, as we succeeded in finding its dentry. If it's
// missing, it indicates an unexpected modification to the file system.
if err != nil {
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("File %s expected but not found", path))
}
return nil, err
@@ -877,7 +855,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
// dentry. If it's missing, it indicates an unexpected modification to
// the file system.
if err != nil {
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", path))
}
return nil, err
@@ -887,7 +865,6 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
// be called if a verity FD is successfully created.
defer merkleReader.DecRef(ctx)
- lowerFlags := lowerFD.StatusFlags()
lowerFDOpts := lowerFD.Options()
var merkleWriter *vfs.FileDescription
var parentMerkleWriter *vfs.FileDescription
@@ -902,7 +879,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
Flags: linux.O_WRONLY | linux.O_APPEND,
})
if err != nil {
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", path))
}
return nil, err
@@ -919,7 +896,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
Flags: linux.O_WRONLY | linux.O_APPEND,
})
if err != nil {
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
parentPath, _ := d.fs.vfsfs.VirtualFilesystem().PathnameWithDeleted(ctx, d.fs.rootDentry.lowerVD, d.parent.lowerVD)
return nil, d.fs.alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", parentPath))
}
@@ -940,7 +917,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
isDir: d.isDir(),
}
- if err := fd.vfsfd.Init(fd, lowerFlags, rp.Mount(), &d.vfsd, &lowerFDOpts); err != nil {
+ if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), &d.vfsd, &lowerFDOpts); err != nil {
return nil, err
}
lowerFD.IncRef()
@@ -958,7 +935,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return "", err
@@ -969,26 +946,26 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
// RenameAt implements vfs.FilesystemImpl.RenameAt.
func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// StatAt implements vfs.FilesystemImpl.StatAt.
func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return linux.Statx{}, err
@@ -1021,31 +998,31 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu
// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
if _, err := fs.resolveLocked(ctx, rp, &ds); err != nil {
return nil, err
}
- return nil, syserror.ECONNREFUSED
+ return nil, linuxerr.ECONNREFUSED
}
// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return nil, err
@@ -1061,7 +1038,7 @@ func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
var ds *[]*dentry
fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
d, err := fs.resolveLocked(ctx, rp, &ds)
if err != nil {
return "", err
@@ -1076,13 +1053,13 @@ func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
// Verity file system is read-only.
- return syserror.EROFS
+ return linuxerr.EROFS
}
// PrependPath implements vfs.FilesystemImpl.PrependPath.
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index 969003613..d2526263c 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -23,10 +23,12 @@
// Lock order:
//
// filesystem.renameMu
-// dentry.dirMu
-// fileDescription.mu
-// filesystem.verityMu
-// dentry.hashMu
+// dentry.cachingMu
+// filesystem.cacheMu
+// dentry.dirMu
+// fileDescription.mu
+// filesystem.verityMu
+// dentry.hashMu
//
// Locking dentry.dirMu in multiple dentries requires that parent dentries are
// locked before child dentries, and that filesystem.renameMu is locked to
@@ -39,12 +41,14 @@ import (
"encoding/json"
"fmt"
"math"
+ "sort"
"strconv"
"strings"
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
@@ -58,7 +62,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -95,6 +98,9 @@ const (
// sizeOfStringInt32 is the size for a 32 bit integer stored as string in
// extended attributes. The maximum value of a 32 bit integer has 10 digits.
sizeOfStringInt32 = 10
+
+ // defaultMaxCachedDentries is the default limit of dentry cache.
+ defaultMaxCachedDentries = uint64(1000)
)
var (
@@ -105,9 +111,10 @@ var (
// Mount option names for verityfs.
const (
- moptLowerPath = "lower_path"
- moptRootHash = "root_hash"
- moptRootName = "root_name"
+ moptLowerPath = "lower_path"
+ moptRootHash = "root_hash"
+ moptRootName = "root_name"
+ moptDentryCacheLimit = "dentry_cache_limit"
)
// HashAlgorithm is a type specifying the algorithm used to hash the file
@@ -187,6 +194,17 @@ type filesystem struct {
// dentries.
renameMu sync.RWMutex `state:"nosave"`
+ // cachedDentries contains all dentries with 0 references. (Due to race
+ // conditions, it may also contain dentries with non-zero references.)
+ // cachedDentriesLen is the number of dentries in cachedDentries. These
+ // fields are protected by cacheMu.
+ cacheMu sync.Mutex `state:"nosave"`
+ cachedDentries dentryList
+ cachedDentriesLen uint64
+
+ // maxCachedDentries is the maximum size of filesystem.cachedDentries.
+ maxCachedDentries uint64
+
// verityMu synchronizes enabling verity files, protects files or
// directories from being enabled by different threads simultaneously.
// It also ensures that verity does not access files that are being
@@ -197,6 +215,10 @@ type filesystem struct {
// is for the whole file system to ensure that no more than one file is
// enabled the same time.
verityMu sync.RWMutex `state:"nosave"`
+
+ // released is nonzero once filesystem.Release has been called. It is accessed
+ // with atomic memory operations.
+ released int32
}
// InternalFilesystemOptions may be passed as
@@ -237,7 +259,7 @@ func (FilesystemType) Release(ctx context.Context) {}
// mode, it returns EIO, otherwise it panic.
func (fs *filesystem) alertIntegrityViolation(msg string) error {
if fs.action == ErrorOnViolation {
- return syserror.EIO
+ return linuxerr.EIO
}
panic(msg)
}
@@ -251,7 +273,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
hash, err := hex.DecodeString(encodedRootHash)
if err != nil {
ctx.Warningf("verity.FilesystemType.GetFilesystem: Failed to decode root hash: %v", err)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
rootHash = hash
}
@@ -265,23 +287,33 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
delete(mopts, moptRootName)
rootName = root
}
+ maxCachedDentries := defaultMaxCachedDentries
+ if str, ok := mopts[moptDentryCacheLimit]; ok {
+ delete(mopts, moptDentryCacheLimit)
+ maxCD, err := strconv.ParseUint(str, 10, 64)
+ if err != nil {
+ ctx.Warningf("verity.FilesystemType.GetFilesystem: invalid dentry cache limit: %s=%s", moptDentryCacheLimit, str)
+ return nil, nil, linuxerr.EINVAL
+ }
+ maxCachedDentries = maxCD
+ }
// Check for unparsed options.
if len(mopts) != 0 {
ctx.Warningf("verity.FilesystemType.GetFilesystem: unknown options: %v", mopts)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
// Handle internal options.
iopts, ok := opts.InternalData.(InternalFilesystemOptions)
if len(lowerPathname) == 0 && !ok {
ctx.Warningf("verity.FilesystemType.GetFilesystem: missing verity configs")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
if len(lowerPathname) != 0 {
if ok {
ctx.Warningf("verity.FilesystemType.GetFilesystem: unexpected verity configs with specified lower path")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
iopts = InternalFilesystemOptions{
AllowRuntimeEnable: len(rootHash) == 0,
@@ -300,7 +332,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
lowerPath := fspath.Parse(lowerPathname)
if !lowerPath.Absolute {
ctx.Infof("verity.FilesystemType.GetFilesystem: lower_path %q must be absolute", lowerPathname)
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
var err error
mountedLowerVD, err = vfsObj.GetDentryAt(ctx, creds, &vfs.PathOperation{
@@ -338,12 +370,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
action: iopts.Action,
opts: opts.Data,
allowRuntimeEnable: iopts.AllowRuntimeEnable,
+ maxCachedDentries: maxCachedDentries,
}
fs.vfsfs.Init(vfsObj, &fstype, fs)
// Construct the root dentry.
d := fs.newDentry()
- d.refs = 1
+ // Set the root's reference count to 2. One reference is returned to
+ // the caller, and the other is held by fs to prevent the root from
+ // being "cached" and subsequently evicted.
+ d.refs = 2
lowerVD := vfs.MakeVirtualDentry(lowerMount, lowerMount.Root())
lowerVD.IncRef()
d.lowerVD = lowerVD
@@ -358,7 +394,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// If runtime enable is allowed, the root merkle tree may be absent. We
// should create the tree file.
- if err == syserror.ENOENT && fs.allowRuntimeEnable {
+ if linuxerr.Equals(linuxerr.ENOENT, err) && fs.allowRuntimeEnable {
lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
Root: lowerVD,
Start: lowerVD,
@@ -439,7 +475,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if !d.isDir() {
ctx.Warningf("verity root must be a directory")
- return nil, nil, syserror.EINVAL
+ return nil, nil, linuxerr.EINVAL
}
if !fs.allowRuntimeEnable {
@@ -451,7 +487,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
Name: childrenOffsetXattr,
Size: sizeOfStringInt32,
})
- if err == syserror.ENOENT || err == syserror.ENODATA {
+ if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) {
return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenOffsetXattr, err))
}
if err != nil {
@@ -470,7 +506,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
Name: childrenSizeXattr,
Size: sizeOfStringInt32,
})
- if err == syserror.ENOENT || err == syserror.ENODATA {
+ if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENODATA, err) {
return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenSizeXattr, err))
}
if err != nil {
@@ -487,7 +523,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}, &vfs.OpenOptions{
Flags: linux.O_RDONLY,
})
- if err == syserror.ENOENT {
+ if linuxerr.Equals(linuxerr.ENOENT, err) {
return nil, nil, fs.alertIntegrityViolation(fmt.Sprintf("Failed to open root Merkle file: %v", err))
}
if err != nil {
@@ -508,6 +544,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if err := fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil {
return nil, nil, err
}
+ d.generateChildrenList()
}
d.vfsd.Init(d)
@@ -517,7 +554,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// Release implements vfs.FilesystemImpl.Release.
func (fs *filesystem) Release(ctx context.Context) {
+ atomic.StoreInt32(&fs.released, 1)
fs.lowerMount.DecRef(ctx)
+
+ fs.renameMu.Lock()
+ fs.evictAllCachedDentriesLocked(ctx)
+ fs.renameMu.Unlock()
+
+ // An extra reference was held by the filesystem on the root to prevent
+ // it from being cached/evicted.
+ fs.rootDentry.DecRef(ctx)
}
// MountOptions implements vfs.FilesystemImpl.MountOptions.
@@ -531,6 +577,11 @@ func (fs *filesystem) MountOptions() string {
type dentry struct {
vfsd vfs.Dentry
+ // refs is the reference count. Each dentry holds a reference on its
+ // parent, even if disowned. When refs reaches 0, the dentry may be
+ // added to the cache or destroyed. If refs == -1, the dentry has
+ // already been destroyed. refs is accessed using atomic memory
+ // operations.
refs int64
// fs is the owning filesystem. fs is immutable.
@@ -564,6 +615,11 @@ type dentry struct {
// populated by enableVerity. childrenNames is also protected by dirMu.
childrenNames map[string]struct{}
+ // childrenList is a complete sorted list of childrenNames. This list
+ // is generated when verity is enabled, or the first time the file is
+ // verified in non runtime enable mode.
+ childrenList []string
+
// lowerVD is the VirtualDentry in the underlying file system. It is
// never modified after initialized.
lowerVD vfs.VirtualDentry
@@ -580,13 +636,23 @@ type dentry struct {
// is protected by hashMu.
hashMu sync.RWMutex `state:"nosave"`
hash []byte
+
+ // cachingMu is used to synchronize concurrent dentry caching attempts on
+ // this dentry.
+ cachingMu sync.Mutex `state:"nosave"`
+
+ // If cached is true, dentryEntry links dentry into
+ // filesystem.cachedDentries. cached and dentryEntry are protected by
+ // cachingMu.
+ cached bool
+ dentryEntry
}
// newDentry creates a new dentry representing the given verity file. The
-// dentry initially has no references; it is the caller's responsibility to set
-// the dentry's reference count and/or call dentry.destroy() as appropriate.
-// The dentry is initially invalid in that it contains no underlying dentry;
-// the caller is responsible for setting them.
+// dentry initially has no references, but is not cached; it is the caller's
+// responsibility to set the dentry's reference count and/or call
+// dentry.destroy() as appropriate. The dentry is initially invalid in that it
+// contains no underlying dentry; the caller is responsible for setting them.
func (fs *filesystem) newDentry() *dentry {
d := &dentry{
fs: fs,
@@ -622,42 +688,23 @@ func (d *dentry) TryIncRef() bool {
// DecRef implements vfs.DentryImpl.DecRef.
func (d *dentry) DecRef(ctx context.Context) {
- r := atomic.AddInt64(&d.refs, -1)
- if d.LogRefs() {
- refsvfs2.LogDecRef(d, r)
- }
- if r == 0 {
- d.fs.renameMu.Lock()
- d.checkDropLocked(ctx)
- d.fs.renameMu.Unlock()
- } else if r < 0 {
- panic("verity.dentry.DecRef() called without holding a reference")
+ if d.decRefNoCaching() == 0 {
+ d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
}
}
-func (d *dentry) decRefLocked(ctx context.Context) {
+// decRefNoCaching decrements d's reference count without calling
+// d.checkCachingLocked, even if d's reference count reaches 0; callers are
+// responsible for ensuring that d.checkCachingLocked will be called later.
+func (d *dentry) decRefNoCaching() int64 {
r := atomic.AddInt64(&d.refs, -1)
if d.LogRefs() {
refsvfs2.LogDecRef(d, r)
}
- if r == 0 {
- d.checkDropLocked(ctx)
- } else if r < 0 {
- panic("verity.dentry.decRefLocked() called without holding a reference")
- }
-}
-
-// checkDropLocked should be called after d's reference count becomes 0 or it
-// becomes deleted.
-func (d *dentry) checkDropLocked(ctx context.Context) {
- // Dentries with a positive reference count must be retained. Dentries
- // with a negative reference count have already been destroyed.
- if atomic.LoadInt64(&d.refs) != 0 {
- return
+ if r < 0 {
+ panic("verity.dentry.decRefNoCaching() called without holding a reference")
}
- // Refs is still zero; destroy it.
- d.destroyLocked(ctx)
- return
+ return r
}
// destroyLocked destroys the dentry.
@@ -676,6 +723,12 @@ func (d *dentry) destroyLocked(ctx context.Context) {
panic("verity.dentry.destroyLocked() called with references on the dentry")
}
+ // Drop the reference held by d on its parent without recursively
+ // locking d.fs.renameMu.
+ if d.parent != nil && d.parent.decRefNoCaching() == 0 {
+ d.parent.checkCachingLocked(ctx, true /* renameMuWriteLocked */)
+ }
+
if d.lowerVD.Ok() {
d.lowerVD.DecRef(ctx)
}
@@ -688,7 +741,6 @@ func (d *dentry) destroyLocked(ctx context.Context) {
delete(d.parent.children, d.name)
}
d.parent.dirMu.Unlock()
- d.parent.decRefLocked(ctx)
}
refsvfs2.Unregister(d)
}
@@ -727,6 +779,140 @@ func (d *dentry) OnZeroWatches(context.Context) {
//TODO(b/159261227): Implement OnZeroWatches.
}
+// checkCachingLocked should be called after d's reference count becomes 0 or
+// it becomes disowned.
+//
+// For performance, checkCachingLocked can also be called after d's reference
+// count becomes non-zero, so that d can be removed from the LRU cache. This
+// may help in reducing the size of the cache and hence reduce evictions. Note
+// that this is not necessary for correctness.
+//
+// It may be called on a destroyed dentry. For example,
+// renameMu[R]UnlockAndCheckCaching may call checkCachingLocked multiple times
+// for the same dentry when the dentry is visited more than once in the same
+// operation. One of the calls may destroy the dentry, so subsequent calls will
+// do nothing.
+//
+// Preconditions: d.fs.renameMu must be locked for writing if
+// renameMuWriteLocked is true; it may be temporarily unlocked.
+func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked bool) {
+ d.cachingMu.Lock()
+ refs := atomic.LoadInt64(&d.refs)
+ if refs == -1 {
+ // Dentry has already been destroyed.
+ d.cachingMu.Unlock()
+ return
+ }
+ if refs > 0 {
+ // fs.cachedDentries is permitted to contain dentries with non-zero refs,
+ // which are skipped by fs.evictCachedDentryLocked() upon reaching the end
+ // of the LRU. But it is still beneficial to remove d from the cache as we
+ // are already holding d.cachingMu. Keeping a cleaner cache also reduces
+ // the number of evictions (which is expensive as it acquires fs.renameMu).
+ d.removeFromCacheLocked()
+ d.cachingMu.Unlock()
+ return
+ }
+
+ if atomic.LoadInt32(&d.fs.released) != 0 {
+ d.cachingMu.Unlock()
+ if !renameMuWriteLocked {
+ // Need to lock d.fs.renameMu to access d.parent. Lock it for writing as
+ // needed by d.destroyLocked() later.
+ d.fs.renameMu.Lock()
+ defer d.fs.renameMu.Unlock()
+ }
+ if d.parent != nil {
+ d.parent.dirMu.Lock()
+ delete(d.parent.children, d.name)
+ d.parent.dirMu.Unlock()
+ }
+ d.destroyLocked(ctx) // +checklocksforce: see above.
+ return
+ }
+
+ d.fs.cacheMu.Lock()
+ // If d is already cached, just move it to the front of the LRU.
+ if d.cached {
+ d.fs.cachedDentries.Remove(d)
+ d.fs.cachedDentries.PushFront(d)
+ d.fs.cacheMu.Unlock()
+ d.cachingMu.Unlock()
+ return
+ }
+ // Cache the dentry, then evict the least recently used cached dentry if
+ // the cache becomes over-full.
+ d.fs.cachedDentries.PushFront(d)
+ d.fs.cachedDentriesLen++
+ d.cached = true
+ shouldEvict := d.fs.cachedDentriesLen > d.fs.maxCachedDentries
+ d.fs.cacheMu.Unlock()
+ d.cachingMu.Unlock()
+
+ if shouldEvict {
+ if !renameMuWriteLocked {
+ // Need to lock d.fs.renameMu for writing as needed by
+ // d.evictCachedDentryLocked().
+ d.fs.renameMu.Lock()
+ defer d.fs.renameMu.Unlock()
+ }
+ d.fs.evictCachedDentryLocked(ctx) // +checklocksforce: see above.
+ }
+}
+
+// Preconditions: d.cachingMu must be locked.
+func (d *dentry) removeFromCacheLocked() {
+ if d.cached {
+ d.fs.cacheMu.Lock()
+ d.fs.cachedDentries.Remove(d)
+ d.fs.cachedDentriesLen--
+ d.fs.cacheMu.Unlock()
+ d.cached = false
+ }
+}
+
+// Precondition: fs.renameMu must be locked for writing; it may be temporarily
+// unlocked.
+// +checklocks:fs.renameMu
+func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
+ for fs.cachedDentriesLen != 0 {
+ fs.evictCachedDentryLocked(ctx)
+ }
+}
+
+// Preconditions:
+// * fs.renameMu must be locked for writing; it may be temporarily unlocked.
+// +checklocks:fs.renameMu
+func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
+ fs.cacheMu.Lock()
+ victim := fs.cachedDentries.Back()
+ fs.cacheMu.Unlock()
+ if victim == nil {
+ // fs.cachedDentries may have become empty between when it was
+ // checked and when we locked fs.cacheMu.
+ return
+ }
+
+ victim.cachingMu.Lock()
+ victim.removeFromCacheLocked()
+ // victim.refs may have become non-zero from an earlier path resolution
+ // since it was inserted into fs.cachedDentries.
+ if atomic.LoadInt64(&victim.refs) != 0 {
+ victim.cachingMu.Unlock()
+ return
+ }
+ if victim.parent != nil {
+ victim.parent.dirMu.Lock()
+ // Note that victim can't be a mount point (in any mount
+ // namespace), since VFS holds references on mount points.
+ fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, &victim.vfsd)
+ delete(victim.parent.children, victim.name)
+ victim.parent.dirMu.Unlock()
+ }
+ victim.cachingMu.Unlock()
+ victim.destroyLocked(ctx) // +checklocksforce: owned as precondition, victim.fs == fs.
+}
+
func (d *dentry) isSymlink() bool {
return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
}
@@ -749,6 +935,17 @@ func (d *dentry) verityEnabled() bool {
return !d.fs.allowRuntimeEnable || len(d.hash) != 0
}
+// generateChildrenList generates a sorted childrenList from childrenNames, and
+// cache it in d for hashing.
+func (d *dentry) generateChildrenList() {
+ if len(d.childrenList) == 0 && len(d.childrenNames) != 0 {
+ for child := range d.childrenNames {
+ d.childrenList = append(d.childrenList, child)
+ }
+ sort.Strings(d.childrenList)
+ }
+}
+
// getLowerAt returns the dentry in the underlying file system, which is
// represented by filename relative to d.
func (d *dentry) getLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, filename string) (vfs.VirtualDentry, error) {
@@ -857,13 +1054,13 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
// SetStat implements vfs.FileDescriptionImpl.SetStat.
func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
// Verity files are read-only.
- return syserror.EPERM
+ return linuxerr.EPERM
}
// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
if !fd.d.isDir() {
- return syserror.ENOTDIR
+ return linuxerr.ENOTDIR
}
fd.mu.Lock()
defer fd.mu.Unlock()
@@ -921,14 +1118,14 @@ func (fd *fileDescription) Seek(ctx context.Context, offset int64, whence int32)
case linux.SEEK_END:
n = int64(fd.d.size)
default:
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
if offset > math.MaxInt64-n {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
offset += n
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
fd.off = offset
return offset, nil
@@ -962,10 +1159,12 @@ func (fd *fileDescription) generateMerkleLocked(ctx context.Context) ([]byte, ui
return nil, 0, err
}
+ fd.d.generateChildrenList()
+
params := &merkletree.GenerateParams{
TreeReader: &merkleReader,
TreeWriter: &merkleWriter,
- Children: fd.d.childrenNames,
+ Children: fd.d.childrenList,
HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(),
Name: fd.d.name,
Mode: uint32(stat.Mode),
@@ -1007,7 +1206,7 @@ func (fd *fileDescription) generateMerkleLocked(ctx context.Context) ([]byte, ui
default:
// TODO(b/167728857): Investigate whether and how we should
// enable other types of file.
- return nil, 0, syserror.EINVAL
+ return nil, 0, linuxerr.EINVAL
}
hash, err := merkletree.Generate(params)
return hash, uint64(params.Size), err
@@ -1056,7 +1255,7 @@ func (fd *fileDescription) recordChildrenLocked(ctx context.Context) error {
// and stores its hash in its parent directory's Merkle tree.
func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) {
if !fd.d.fs.allowRuntimeEnable {
- return 0, syserror.EPERM
+ return 0, linuxerr.EPERM
}
fd.d.fs.verityMu.Lock()
@@ -1070,6 +1269,21 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) {
return 0, fd.d.fs.alertIntegrityViolation("Unexpected verity fd: missing expected underlying fds")
}
+ // Populate children names here. We cannot rely on the children
+ // dentries to populate parent dentry's children names, because the
+ // parent dentry may be destroyed before users enable verity if its ref
+ // count drops to zero.
+ if fd.d.isDir() {
+ if err := fd.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error {
+ if dirent.Name != "." && dirent.Name != ".." {
+ fd.d.childrenNames[dirent.Name] = struct{}{}
+ }
+ return nil
+ })); err != nil {
+ return 0, err
+ }
+ }
+
hash, dataSize, err := fd.generateMerkleLocked(ctx)
if err != nil {
return 0, err
@@ -1097,9 +1311,6 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) {
}); err != nil {
return 0, err
}
-
- // Add the current child's name to parent's childrenNames.
- fd.d.parent.childrenNames[fd.d.name] = struct{}{}
}
// Record the size of the data being hashed for fd.
@@ -1125,7 +1336,7 @@ func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) {
func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hostarch.Addr) (uintptr, error) {
t := kernel.TaskFromContext(ctx)
if t == nil {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
var metadata linux.DigestMetadata
@@ -1138,7 +1349,7 @@ func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hosta
// enabled, in which case fd.d.hash should be set.
if len(fd.d.hash) == 0 {
if fd.d.fs.allowRuntimeEnable {
- return 0, syserror.ENODATA
+ return 0, linuxerr.ENODATA
}
return 0, fd.d.fs.alertIntegrityViolation("Ioctl measureVerity: no hash found")
}
@@ -1148,7 +1359,7 @@ func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hosta
return 0, err
}
if metadata.DigestSize < uint16(len(fd.d.hash)) {
- return 0, syserror.EOVERFLOW
+ return 0, linuxerr.EOVERFLOW
}
// Populate the output digest size, since DigestSize is both input and
@@ -1178,7 +1389,7 @@ func (fd *fileDescription) verityFlags(ctx context.Context, flags hostarch.Addr)
t := kernel.TaskFromContext(ctx)
if t == nil {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
_, err := primitive.CopyInt32Out(t, flags, f)
return 0, err
@@ -1194,7 +1405,7 @@ func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.
case linux.FS_IOC_GETFLAGS:
return fd.verityFlags(ctx, args[2].Pointer())
default:
- return 0, syserror.ENOSYS
+ return 0, linuxerr.ENOSYS
}
}
@@ -1227,7 +1438,7 @@ func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of
// The Merkle tree file for the child should have been created and
// contains the expected xattrs. If the xattr does not exist, it
// indicates unexpected modifications to the file system.
- if err == syserror.ENODATA {
+ if linuxerr.Equals(linuxerr.ENODATA, err) {
return 0, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err))
}
if err != nil {
@@ -1261,7 +1472,7 @@ func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of
Mode: fd.d.mode,
UID: fd.d.uid,
GID: fd.d.gid,
- Children: fd.d.childrenNames,
+ Children: fd.d.childrenList,
HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(),
ReadOffset: offset,
ReadSize: dst.NumBytes(),
@@ -1277,12 +1488,12 @@ func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EROFS
+ return 0, linuxerr.EROFS
}
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EROFS
+ return 0, linuxerr.EROFS
}
// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
@@ -1298,7 +1509,7 @@ func (fd *fileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapO
// Check if mmap is allowed on the lower filesystem.
if !opts.SentryOwnedContent {
- return syserror.ENODEV
+ return linuxerr.ENODEV
}
return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
}
@@ -1349,7 +1560,7 @@ func (fd *fileDescription) Translate(ctx context.Context, required, optional mem
// The Merkle tree file for the child should have been created and
// contains the expected xattrs. If the xattr does not exist, it
// indicates unexpected modifications to the file system.
- if err == syserror.ENODATA {
+ if linuxerr.Equals(linuxerr.ENODATA, err) {
return nil, fd.d.fs.alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err))
}
if err != nil {
@@ -1433,7 +1644,7 @@ func (r *mmapReadSeeker) ReadAt(p []byte, off int64) (int, error) {
// mapped region.
readOffset := off - int64(r.Offset)
if readOffset < 0 {
- return 0, syserror.EINVAL
+ return 0, linuxerr.EINVAL
}
bs.DropFirst64(uint64(readOffset))
view := bs.TakeFirst64(uint64(len(p)))
diff --git a/pkg/sentry/fsimpl/verity/verity_test.go b/pkg/sentry/fsimpl/verity/verity_test.go
index 5c78a0019..af041bd50 100644
--- a/pkg/sentry/fsimpl/verity/verity_test.go
+++ b/pkg/sentry/fsimpl/verity/verity_test.go
@@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
@@ -31,7 +32,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -476,7 +476,7 @@ func TestOpenNonexistentFile(t *testing.T) {
// Ensure open an unexpected file in the parent directory fails with
// ENOENT rather than verification failure.
- if _, err = openVerityAt(ctx, vfsObj, root, filename+"abc", linux.O_RDONLY, linux.ModeRegular); err != syserror.ENOENT {
+ if _, err = openVerityAt(ctx, vfsObj, root, filename+"abc", linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.ENOENT, err) {
t.Errorf("OpenAt unexpected error: %v", err)
}
}
@@ -767,7 +767,7 @@ func TestOpenDeletedFileFails(t *testing.T) {
}
// Ensure reopening the verity enabled file fails.
- if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); err != syserror.EIO {
+ if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) {
t.Errorf("got OpenAt error: %v, expected EIO", err)
}
})
@@ -829,7 +829,7 @@ func TestOpenRenamedFileFails(t *testing.T) {
}
// Ensure reopening the verity enabled file fails.
- if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); err != syserror.EIO {
+ if _, err = openVerityAt(ctx, vfsObj, root, filename, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) {
t.Errorf("got OpenAt error: %v, expected EIO", err)
}
})
@@ -899,7 +899,7 @@ func TestUnmodifiedSymlinkFileReadSucceeds(t *testing.T) {
t.Fatalf("SymlinkAt: %v", err)
}
- fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_PATH|linux.O_NOFOLLOW, linux.ModeRegular)
+ fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular)
if err != nil {
t.Fatalf("openVerityAt symlink: %v", err)
@@ -1034,7 +1034,7 @@ func TestDeletedSymlinkFileReadFails(t *testing.T) {
t.Fatalf("SymlinkAt: %v", err)
}
- fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_PATH|linux.O_NOFOLLOW, linux.ModeRegular)
+ fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular)
if err != nil {
t.Fatalf("openVerityAt symlink: %v", err)
@@ -1063,14 +1063,14 @@ func TestDeletedSymlinkFileReadFails(t *testing.T) {
Root: root,
Start: root,
Path: fspath.Parse(symlink),
- }); err != syserror.EIO {
+ }); !linuxerr.Equals(linuxerr.EIO, err) {
t.Fatalf("ReadlinkAt succeeded with modified symlink: %v", err)
}
if tc.testWalk {
fileInSymlinkDirectory := symlink + "/verity-test-file"
// Ensure opening the verity enabled file in the symlink directory fails.
- if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); err != syserror.EIO {
+ if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) {
t.Errorf("Open succeeded with modified symlink: %v", err)
}
}
@@ -1136,7 +1136,7 @@ func TestModifiedSymlinkFileReadFails(t *testing.T) {
}
// Open symlink file to get the fd for ioctl in new step.
- fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_PATH|linux.O_NOFOLLOW, linux.ModeRegular)
+ fd, err := openVerityAt(ctx, vfsObj, root, symlink, linux.O_NOFOLLOW, linux.ModeRegular)
if err != nil {
t.Fatalf("OpenAt symlink: %v", err)
}
@@ -1195,14 +1195,14 @@ func TestModifiedSymlinkFileReadFails(t *testing.T) {
Root: root,
Start: root,
Path: fspath.Parse(symlink),
- }); err != syserror.EIO {
+ }); !linuxerr.Equals(linuxerr.EIO, err) {
t.Fatalf("ReadlinkAt succeeded with modified symlink: %v", err)
}
if tc.testWalk {
fileInSymlinkDirectory := symlink + "/verity-test-file"
// Ensure opening the verity enabled file in the symlink directory fails.
- if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); err != syserror.EIO {
+ if _, err := openVerityAt(ctx, vfsObj, root, fileInSymlinkDirectory, linux.O_RDONLY, linux.ModeRegular); !linuxerr.Equals(linuxerr.EIO, err) {
t.Errorf("Open succeeded with modified symlink: %v", err)
}
}