summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/memfs
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/memfs')
-rw-r--r--pkg/sentry/fsimpl/memfs/BUILD56
-rw-r--r--pkg/sentry/fsimpl/memfs/benchmark_test.go464
-rw-r--r--pkg/sentry/fsimpl/memfs/directory.go187
-rw-r--r--pkg/sentry/fsimpl/memfs/filesystem.go544
-rw-r--r--pkg/sentry/fsimpl/memfs/memfs.go300
-rw-r--r--pkg/sentry/fsimpl/memfs/regular_file.go154
-rw-r--r--pkg/sentry/fsimpl/memfs/symlink.go36
7 files changed, 0 insertions, 1741 deletions
diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD
deleted file mode 100644
index 7e364c5fd..000000000
--- a/pkg/sentry/fsimpl/memfs/BUILD
+++ /dev/null
@@ -1,56 +0,0 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
-
-package(licenses = ["notice"])
-
-load("//tools/go_generics:defs.bzl", "go_template_instance")
-
-go_template_instance(
- name = "dentry_list",
- out = "dentry_list.go",
- package = "memfs",
- prefix = "dentry",
- template = "//pkg/ilist:generic_list",
- types = {
- "Element": "*dentry",
- "Linker": "*dentry",
- },
-)
-
-go_library(
- name = "memfs",
- srcs = [
- "dentry_list.go",
- "directory.go",
- "filesystem.go",
- "memfs.go",
- "regular_file.go",
- "symlink.go",
- ],
- importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/memfs",
- deps = [
- "//pkg/abi/linux",
- "//pkg/sentry/context",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/usermem",
- "//pkg/sentry/vfs",
- "//pkg/syserror",
- ],
-)
-
-go_test(
- name = "benchmark_test",
- size = "small",
- srcs = ["benchmark_test.go"],
- deps = [
- ":memfs",
- "//pkg/abi/linux",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
- "//pkg/sentry/fs",
- "//pkg/sentry/fs/tmpfs",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/vfs",
- "//pkg/syserror",
- ],
-)
diff --git a/pkg/sentry/fsimpl/memfs/benchmark_test.go b/pkg/sentry/fsimpl/memfs/benchmark_test.go
deleted file mode 100644
index a94b17db6..000000000
--- a/pkg/sentry/fsimpl/memfs/benchmark_test.go
+++ /dev/null
@@ -1,464 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package benchmark_test
-
-import (
- "fmt"
- "runtime"
- "strings"
- "testing"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/memfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// Differences from stat_benchmark:
-//
-// - Syscall interception, CopyInPath, copyOutStat, and overlayfs overheads are
-// not included.
-//
-// - *MountStat benchmarks use a tmpfs root mount and a tmpfs submount at /tmp.
-// Non-MountStat benchmarks use a tmpfs root mount and no submounts.
-// stat_benchmark uses a varying root mount, a tmpfs submount at /tmp, and a
-// subdirectory /tmp/<top_dir> (assuming TEST_TMPDIR == "/tmp"). Thus
-// stat_benchmark at depth 1 does a comparable amount of work to *MountStat
-// benchmarks at depth 2, and non-MountStat benchmarks at depth 3.
-var depths = []int{1, 2, 3, 8, 64, 100}
-
-const (
- mountPointName = "tmp"
- filename = "gvisor_test_temp_0_1557494568"
-)
-
-// This is copied from syscalls/linux/sys_file.go, with the dependency on
-// kernel.Task stripped out.
-func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error {
- var (
- d *fs.Dirent // The file.
- rel *fs.Dirent // The relative directory for search (if required.)
- err error
- )
-
- // Extract the working directory (maybe).
- if len(path) > 0 && path[0] == '/' {
- // Absolute path; rel can be nil.
- } else if dirFD == linux.AT_FDCWD {
- // Need to reference the working directory.
- rel = wd
- } else {
- // Need to extract the given FD.
- return syserror.EBADF
- }
-
- // Lookup the node.
- remainingTraversals := uint(linux.MaxSymlinkTraversals)
- if resolve {
- d, err = mntns.FindInode(ctx, root, rel, path, &remainingTraversals)
- } else {
- d, err = mntns.FindLink(ctx, root, rel, path, &remainingTraversals)
- }
- if err != nil {
- return err
- }
-
- err = fn(root, d)
- d.DecRef()
- return err
-}
-
-func BenchmarkVFS1TmpfsStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- ctx := contexttest.Context(b)
-
- // Create VFS.
- tmpfsFS, ok := fs.FindFilesystem("tmpfs")
- if !ok {
- b.Fatalf("failed to find tmpfs filesystem type")
- }
- rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
- if err != nil {
- b.Fatalf("failed to create tmpfs root mount: %v", err)
- }
- mntns, err := fs.NewMountNamespace(ctx, rootInode)
- if err != nil {
- b.Fatalf("failed to create mount namespace: %v", err)
- }
- defer mntns.DecRef()
-
- var filePathBuilder strings.Builder
- filePathBuilder.WriteByte('/')
-
- // Create nested directories with given depth.
- root := mntns.Root()
- defer root.DecRef()
- d := root
- d.IncRef()
- defer d.DecRef()
- for i := depth; i > 0; i-- {
- name := fmt.Sprintf("%d", i)
- if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil {
- b.Fatalf("failed to create directory %q: %v", name, err)
- }
- next, err := d.Walk(ctx, root, name)
- if err != nil {
- b.Fatalf("failed to walk to directory %q: %v", name, err)
- }
- d.DecRef()
- d = next
- filePathBuilder.WriteString(name)
- filePathBuilder.WriteByte('/')
- }
-
- // Create the file that will be stat'd.
- file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644))
- if err != nil {
- b.Fatalf("failed to create file %q: %v", filename, err)
- }
- file.DecRef()
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- dirPath := false
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
- if dirPath && !fs.IsDir(d.Inode.StableAttr) {
- return syserror.ENOTDIR
- }
- uattr, err := d.Inode.UnstableAttr(ctx)
- if err != nil {
- return err
- }
- // Sanity check.
- if uattr.Perms.User.Execute {
- b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode())
- }
- return nil
- })
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- }
- })
- }
-}
-
-func BenchmarkVFS2MemfsStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- ctx := contexttest.Context(b)
- creds := auth.CredentialsFromContext(ctx)
-
- // Create VFS.
- vfsObj := vfs.New()
- vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{})
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.NewFilesystemOptions{})
- if err != nil {
- b.Fatalf("failed to create tmpfs root mount: %v", err)
- }
-
- var filePathBuilder strings.Builder
- filePathBuilder.WriteByte('/')
-
- // Create nested directories with given depth.
- root := mntns.Root()
- defer root.DecRef()
- vd := root
- vd.IncRef()
- defer vd.DecRef()
- for i := depth; i > 0; i-- {
- name := fmt.Sprintf("%d", i)
- pop := vfs.PathOperation{
- Root: root,
- Start: vd,
- Pathname: name,
- }
- if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
- Mode: 0755,
- }); err != nil {
- b.Fatalf("failed to create directory %q: %v", name, err)
- }
- nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- b.Fatalf("failed to walk to directory %q: %v", name, err)
- }
- vd.DecRef()
- vd = nextVD
- filePathBuilder.WriteString(name)
- filePathBuilder.WriteByte('/')
- }
-
- // Create the file that will be stat'd.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: vd,
- Pathname: filename,
- FollowFinalSymlink: true,
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
- Mode: 0644,
- })
- if err != nil {
- b.Fatalf("failed to create file %q: %v", filename, err)
- }
- defer fd.DecRef()
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Pathname: filePath,
- FollowFinalSymlink: true,
- }, &vfs.StatOptions{})
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- // Sanity check.
- if stat.Mode&^linux.S_IFMT != 0644 {
- b.Fatalf("got wrong permissions (%0o)", stat.Mode)
- }
- }
- })
- }
-}
-
-func BenchmarkVFS1TmpfsMountStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- ctx := contexttest.Context(b)
-
- // Create VFS.
- tmpfsFS, ok := fs.FindFilesystem("tmpfs")
- if !ok {
- b.Fatalf("failed to find tmpfs filesystem type")
- }
- rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
- if err != nil {
- b.Fatalf("failed to create tmpfs root mount: %v", err)
- }
- mntns, err := fs.NewMountNamespace(ctx, rootInode)
- if err != nil {
- b.Fatalf("failed to create mount namespace: %v", err)
- }
- defer mntns.DecRef()
-
- var filePathBuilder strings.Builder
- filePathBuilder.WriteByte('/')
-
- // Create and mount the submount.
- root := mntns.Root()
- defer root.DecRef()
- if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil {
- b.Fatalf("failed to create mount point: %v", err)
- }
- mountPoint, err := root.Walk(ctx, root, mountPointName)
- if err != nil {
- b.Fatalf("failed to walk to mount point: %v", err)
- }
- defer mountPoint.DecRef()
- submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
- if err != nil {
- b.Fatalf("failed to create tmpfs submount: %v", err)
- }
- if err := mntns.Mount(ctx, mountPoint, submountInode); err != nil {
- b.Fatalf("failed to mount tmpfs submount: %v", err)
- }
- filePathBuilder.WriteString(mountPointName)
- filePathBuilder.WriteByte('/')
-
- // Create nested directories with given depth.
- d, err := root.Walk(ctx, root, mountPointName)
- if err != nil {
- b.Fatalf("failed to walk to mount root: %v", err)
- }
- defer d.DecRef()
- for i := depth; i > 0; i-- {
- name := fmt.Sprintf("%d", i)
- if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil {
- b.Fatalf("failed to create directory %q: %v", name, err)
- }
- next, err := d.Walk(ctx, root, name)
- if err != nil {
- b.Fatalf("failed to walk to directory %q: %v", name, err)
- }
- d.DecRef()
- d = next
- filePathBuilder.WriteString(name)
- filePathBuilder.WriteByte('/')
- }
-
- // Create the file that will be stat'd.
- file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644))
- if err != nil {
- b.Fatalf("failed to create file %q: %v", filename, err)
- }
- file.DecRef()
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- dirPath := false
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
- if dirPath && !fs.IsDir(d.Inode.StableAttr) {
- return syserror.ENOTDIR
- }
- uattr, err := d.Inode.UnstableAttr(ctx)
- if err != nil {
- return err
- }
- // Sanity check.
- if uattr.Perms.User.Execute {
- b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode())
- }
- return nil
- })
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- }
- })
- }
-}
-
-func BenchmarkVFS2MemfsMountStat(b *testing.B) {
- for _, depth := range depths {
- b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
- ctx := contexttest.Context(b)
- creds := auth.CredentialsFromContext(ctx)
-
- // Create VFS.
- vfsObj := vfs.New()
- vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{})
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.NewFilesystemOptions{})
- if err != nil {
- b.Fatalf("failed to create tmpfs root mount: %v", err)
- }
-
- var filePathBuilder strings.Builder
- filePathBuilder.WriteByte('/')
-
- // Create the mount point.
- root := mntns.Root()
- defer root.DecRef()
- pop := vfs.PathOperation{
- Root: root,
- Start: root,
- Pathname: mountPointName,
- }
- if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
- Mode: 0755,
- }); err != nil {
- b.Fatalf("failed to create mount point: %v", err)
- }
- // Save the mount point for later use.
- mountPoint, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- b.Fatalf("failed to walk to mount point: %v", err)
- }
- defer mountPoint.DecRef()
- // Create and mount the submount.
- if err := vfsObj.NewMount(ctx, creds, "", &pop, "memfs", &vfs.NewFilesystemOptions{}); err != nil {
- b.Fatalf("failed to mount tmpfs submount: %v", err)
- }
- filePathBuilder.WriteString(mountPointName)
- filePathBuilder.WriteByte('/')
-
- // Create nested directories with given depth.
- vd, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- b.Fatalf("failed to walk to mount root: %v", err)
- }
- defer vd.DecRef()
- for i := depth; i > 0; i-- {
- name := fmt.Sprintf("%d", i)
- pop := vfs.PathOperation{
- Root: root,
- Start: vd,
- Pathname: name,
- }
- if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
- Mode: 0755,
- }); err != nil {
- b.Fatalf("failed to create directory %q: %v", name, err)
- }
- nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- b.Fatalf("failed to walk to directory %q: %v", name, err)
- }
- vd.DecRef()
- vd = nextVD
- filePathBuilder.WriteString(name)
- filePathBuilder.WriteByte('/')
- }
-
- // Verify that we didn't create any directories under the mount
- // point (i.e. they were all created on the submount).
- firstDirName := fmt.Sprintf("%d", depth)
- if child := mountPoint.Dentry().Child(firstDirName); child != nil {
- b.Fatalf("created directory %q under root mount, not submount", firstDirName)
- }
-
- // Create the file that will be stat'd.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: vd,
- Pathname: filename,
- FollowFinalSymlink: true,
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
- Mode: 0644,
- })
- if err != nil {
- b.Fatalf("failed to create file %q: %v", filename, err)
- }
- fd.DecRef()
- filePathBuilder.WriteString(filename)
- filePath := filePathBuilder.String()
-
- runtime.GC()
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Pathname: filePath,
- FollowFinalSymlink: true,
- }, &vfs.StatOptions{})
- if err != nil {
- b.Fatalf("stat(%q) failed: %v", filePath, err)
- }
- // Sanity check.
- if stat.Mode&^linux.S_IFMT != 0644 {
- b.Fatalf("got wrong permissions (%0o)", stat.Mode)
- }
- }
- })
- }
-}
diff --git a/pkg/sentry/fsimpl/memfs/directory.go b/pkg/sentry/fsimpl/memfs/directory.go
deleted file mode 100644
index c52dc781c..000000000
--- a/pkg/sentry/fsimpl/memfs/directory.go
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-type directory struct {
- inode inode
-
- // childList is a list containing (1) child Dentries and (2) fake Dentries
- // (with inode == nil) that represent the iteration position of
- // directoryFDs. childList is used to support directoryFD.IterDirents()
- // efficiently. childList is protected by filesystem.mu.
- childList dentryList
-}
-
-func (fs *filesystem) newDirectory(creds *auth.Credentials, mode uint16) *inode {
- dir := &directory{}
- dir.inode.init(dir, fs, creds, mode)
- dir.inode.nlink = 2 // from "." and parent directory or ".." for root
- return &dir.inode
-}
-
-func (i *inode) isDir() bool {
- _, ok := i.impl.(*directory)
- return ok
-}
-
-type directoryFD struct {
- fileDescription
- vfs.DirectoryFileDescriptionDefaultImpl
-
- // Protected by filesystem.mu.
- iter *dentry
- off int64
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *directoryFD) Release() {
- if fd.iter != nil {
- fs := fd.filesystem()
- dir := fd.inode().impl.(*directory)
- fs.mu.Lock()
- dir.childList.Remove(fd.iter)
- fs.mu.Unlock()
- fd.iter = nil
- }
-}
-
-// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
-func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
- fs := fd.filesystem()
- vfsd := fd.vfsfd.VirtualDentry().Dentry()
-
- fs.mu.Lock()
- defer fs.mu.Unlock()
-
- if fd.off == 0 {
- if !cb.Handle(vfs.Dirent{
- Name: ".",
- Type: linux.DT_DIR,
- Ino: vfsd.Impl().(*dentry).inode.ino,
- Off: 0,
- }) {
- return nil
- }
- fd.off++
- }
- if fd.off == 1 {
- parentInode := vfsd.ParentOrSelf().Impl().(*dentry).inode
- if !cb.Handle(vfs.Dirent{
- Name: "..",
- Type: parentInode.direntType(),
- Ino: parentInode.ino,
- Off: 1,
- }) {
- return nil
- }
- fd.off++
- }
-
- dir := vfsd.Impl().(*dentry).inode.impl.(*directory)
- var child *dentry
- if fd.iter == nil {
- // Start iteration at the beginning of dir.
- child = dir.childList.Front()
- fd.iter = &dentry{}
- } else {
- // Continue iteration from where we left off.
- child = fd.iter.Next()
- dir.childList.Remove(fd.iter)
- }
- for child != nil {
- // Skip other directoryFD iterators.
- if child.inode != nil {
- if !cb.Handle(vfs.Dirent{
- Name: child.vfsd.Name(),
- Type: child.inode.direntType(),
- Ino: child.inode.ino,
- Off: fd.off,
- }) {
- dir.childList.InsertBefore(child, fd.iter)
- return nil
- }
- fd.off++
- }
- child = child.Next()
- }
- dir.childList.PushBack(fd.iter)
- return nil
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- fs := fd.filesystem()
- fs.mu.Lock()
- defer fs.mu.Unlock()
-
- switch whence {
- case linux.SEEK_SET:
- // Use offset as given.
- case linux.SEEK_CUR:
- offset += fd.off
- default:
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
-
- // If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't
- // seek even if doing so might reposition the iterator due to concurrent
- // mutation of the directory. Compare fs/libfs.c:dcache_dir_lseek().
- if fd.off == offset {
- return offset, nil
- }
-
- fd.off = offset
- // Compensate for "." and "..".
- remChildren := int64(0)
- if offset >= 2 {
- remChildren = offset - 2
- }
-
- dir := fd.inode().impl.(*directory)
-
- // Ensure that fd.iter exists and is not linked into dir.childList.
- if fd.iter == nil {
- fd.iter = &dentry{}
- } else {
- dir.childList.Remove(fd.iter)
- }
- // Insert fd.iter before the remChildren'th child, or at the end of the
- // list if remChildren >= number of children.
- child := dir.childList.Front()
- for child != nil {
- // Skip other directoryFD iterators.
- if child.inode != nil {
- if remChildren == 0 {
- dir.childList.InsertBefore(child, fd.iter)
- return offset, nil
- }
- remChildren--
- }
- child = child.Next()
- }
- dir.childList.PushBack(fd.iter)
- return offset, nil
-}
diff --git a/pkg/sentry/fsimpl/memfs/filesystem.go b/pkg/sentry/fsimpl/memfs/filesystem.go
deleted file mode 100644
index f79e2d9c8..000000000
--- a/pkg/sentry/fsimpl/memfs/filesystem.go
+++ /dev/null
@@ -1,544 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
- "fmt"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// stepLocked resolves rp.Component() in parent directory vfsd.
-//
-// stepLocked is loosely analogous to fs/namei.c:walk_component().
-//
-// Preconditions: filesystem.mu must be locked. !rp.Done(). inode ==
-// vfsd.Impl().(*dentry).inode.
-func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode) (*vfs.Dentry, *inode, error) {
- if !inode.isDir() {
- return nil, nil, syserror.ENOTDIR
- }
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
- return nil, nil, err
- }
-afterSymlink:
- nextVFSD, err := rp.ResolveComponent(vfsd)
- if err != nil {
- return nil, nil, err
- }
- if nextVFSD == nil {
- // Since the Dentry tree is the sole source of truth for memfs, if it's
- // not in the Dentry tree, it doesn't exist.
- return nil, nil, syserror.ENOENT
- }
- nextInode := nextVFSD.Impl().(*dentry).inode
- if symlink, ok := nextInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
- // TODO: symlink traversals update access time
- if err := rp.HandleSymlink(symlink.target); err != nil {
- return nil, nil, err
- }
- goto afterSymlink // don't check the current directory again
- }
- rp.Advance()
- return nextVFSD, nextInode, nil
-}
-
-// walkExistingLocked resolves rp to an existing file.
-//
-// walkExistingLocked is loosely analogous to Linux's
-// fs/namei.c:path_lookupat().
-//
-// Preconditions: filesystem.mu must be locked.
-func walkExistingLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) {
- vfsd := rp.Start()
- inode := vfsd.Impl().(*dentry).inode
- for !rp.Done() {
- var err error
- vfsd, inode, err = stepLocked(rp, vfsd, inode)
- if err != nil {
- return nil, nil, err
- }
- }
- if rp.MustBeDir() && !inode.isDir() {
- return nil, nil, syserror.ENOTDIR
- }
- return vfsd, inode, nil
-}
-
-// walkParentDirLocked resolves all but the last path component of rp to an
-// existing directory. It does not check that the returned directory is
-// searchable by the provider of rp.
-//
-// walkParentDirLocked is loosely analogous to Linux's
-// fs/namei.c:path_parentat().
-//
-// Preconditions: filesystem.mu must be locked. !rp.Done().
-func walkParentDirLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) {
- vfsd := rp.Start()
- inode := vfsd.Impl().(*dentry).inode
- for !rp.Final() {
- var err error
- vfsd, inode, err = stepLocked(rp, vfsd, inode)
- if err != nil {
- return nil, nil, err
- }
- }
- if !inode.isDir() {
- return nil, nil, syserror.ENOTDIR
- }
- return vfsd, inode, nil
-}
-
-// checkCreateLocked checks that a file named rp.Component() may be created in
-// directory parentVFSD, then returns rp.Component().
-//
-// Preconditions: filesystem.mu must be locked. parentInode ==
-// parentVFSD.Impl().(*dentry).inode. parentInode.isDir() == true.
-func checkCreateLocked(rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode *inode) (string, error) {
- if err := parentInode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
- return "", err
- }
- pc := rp.Component()
- if pc == "." || pc == ".." {
- return "", syserror.EEXIST
- }
- childVFSD, err := rp.ResolveChild(parentVFSD, pc)
- if err != nil {
- return "", err
- }
- if childVFSD != nil {
- return "", syserror.EEXIST
- }
- if parentVFSD.IsDisowned() {
- return "", syserror.ENOENT
- }
- return pc, nil
-}
-
-// checkDeleteLocked checks that the file represented by vfsd may be deleted.
-func checkDeleteLocked(vfsd *vfs.Dentry) error {
- parentVFSD := vfsd.Parent()
- if parentVFSD == nil {
- return syserror.EBUSY
- }
- if parentVFSD.IsDisowned() {
- return syserror.ENOENT
- }
- return nil
-}
-
-// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
-func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- vfsd, inode, err := walkExistingLocked(rp)
- if err != nil {
- return nil, err
- }
- if opts.CheckSearchable {
- if !inode.isDir() {
- return nil, syserror.ENOTDIR
- }
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
- return nil, err
- }
- }
- inode.incRef() // vfsd.IncRef(&fs.vfsfs)
- return vfsd, nil
-}
-
-// LinkAt implements vfs.FilesystemImpl.LinkAt.
-func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
- if rp.Done() {
- return syserror.EEXIST
- }
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
- if err != nil {
- return err
- }
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
- if err != nil {
- return err
- }
- if rp.Mount() != vd.Mount() {
- return syserror.EXDEV
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- d := vd.Dentry().Impl().(*dentry)
- if d.inode.isDir() {
- return syserror.EPERM
- }
- d.inode.incLinksLocked()
- child := fs.newDentry(d.inode)
- parentVFSD.InsertChild(&child.vfsd, pc)
- parentInode.impl.(*directory).childList.PushBack(child)
- return nil
-}
-
-// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
-func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
- if rp.Done() {
- return syserror.EEXIST
- }
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
- if err != nil {
- return err
- }
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
- if err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode))
- parentVFSD.InsertChild(&child.vfsd, pc)
- parentInode.impl.(*directory).childList.PushBack(child)
- parentInode.incLinksLocked() // from child's ".."
- return nil
-}
-
-// MknodAt implements vfs.FilesystemImpl.MknodAt.
-func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
- if rp.Done() {
- return syserror.EEXIST
- }
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
- if err != nil {
- return err
- }
- _, err = checkCreateLocked(rp, parentVFSD, parentInode)
- if err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- // TODO: actually implement mknod
- return syserror.EPERM
-}
-
-// OpenAt implements vfs.FilesystemImpl.OpenAt.
-func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- // Filter out flags that are not supported by memfs. O_DIRECTORY and
- // O_NOFOLLOW have no effect here (they're handled by VFS by setting
- // appropriate bits in rp), but are returned by
- // FileDescriptionImpl.StatusFlags().
- opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | linux.O_DIRECTORY | linux.O_NOFOLLOW
-
- if opts.Flags&linux.O_CREAT == 0 {
- fs.mu.RLock()
- defer fs.mu.RUnlock()
- vfsd, inode, err := walkExistingLocked(rp)
- if err != nil {
- return nil, err
- }
- return inode.open(rp, vfsd, opts.Flags, false)
- }
-
- mustCreate := opts.Flags&linux.O_EXCL != 0
- vfsd := rp.Start()
- inode := vfsd.Impl().(*dentry).inode
- fs.mu.Lock()
- defer fs.mu.Unlock()
- if rp.Done() {
- if rp.MustBeDir() {
- return nil, syserror.EISDIR
- }
- if mustCreate {
- return nil, syserror.EEXIST
- }
- return inode.open(rp, vfsd, opts.Flags, false)
- }
-afterTrailingSymlink:
- // Walk to the parent directory of the last path component.
- for !rp.Final() {
- var err error
- vfsd, inode, err = stepLocked(rp, vfsd, inode)
- if err != nil {
- return nil, err
- }
- }
- if !inode.isDir() {
- return nil, syserror.ENOTDIR
- }
- // Check for search permission in the parent directory.
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
- return nil, err
- }
- // Reject attempts to open directories with O_CREAT.
- if rp.MustBeDir() {
- return nil, syserror.EISDIR
- }
- pc := rp.Component()
- if pc == "." || pc == ".." {
- return nil, syserror.EISDIR
- }
- // Determine whether or not we need to create a file.
- childVFSD, err := rp.ResolveChild(vfsd, pc)
- if err != nil {
- return nil, err
- }
- if childVFSD == nil {
- // Already checked for searchability above; now check for writability.
- if err := inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
- return nil, err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return nil, err
- }
- defer rp.Mount().EndWrite()
- // Create and open the child.
- childInode := fs.newRegularFile(rp.Credentials(), opts.Mode)
- child := fs.newDentry(childInode)
- vfsd.InsertChild(&child.vfsd, pc)
- inode.impl.(*directory).childList.PushBack(child)
- return childInode.open(rp, &child.vfsd, opts.Flags, true)
- }
- // Open existing file or follow symlink.
- if mustCreate {
- return nil, syserror.EEXIST
- }
- childInode := childVFSD.Impl().(*dentry).inode
- if symlink, ok := childInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
- // TODO: symlink traversals update access time
- if err := rp.HandleSymlink(symlink.target); err != nil {
- return nil, err
- }
- // rp.Final() may no longer be true since we now need to resolve the
- // symlink target.
- goto afterTrailingSymlink
- }
- return childInode.open(rp, childVFSD, opts.Flags, false)
-}
-
-func (i *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32, afterCreate bool) (*vfs.FileDescription, error) {
- ats := vfs.AccessTypesForOpenFlags(flags)
- if !afterCreate {
- if err := i.checkPermissions(rp.Credentials(), ats, i.isDir()); err != nil {
- return nil, err
- }
- }
- switch impl := i.impl.(type) {
- case *regularFile:
- var fd regularFileFD
- fd.flags = flags
- fd.readable = vfs.MayReadFileWithOpenFlags(flags)
- fd.writable = vfs.MayWriteFileWithOpenFlags(flags)
- if fd.writable {
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return nil, err
- }
- // Mount.EndWrite() is called by regularFileFD.Release().
- }
- fd.vfsfd.Init(&fd, rp.Mount(), vfsd)
- if flags&linux.O_TRUNC != 0 {
- impl.mu.Lock()
- impl.data = impl.data[:0]
- atomic.StoreInt64(&impl.dataLen, 0)
- impl.mu.Unlock()
- }
- return &fd.vfsfd, nil
- case *directory:
- // Can't open directories writably.
- if ats&vfs.MayWrite != 0 {
- return nil, syserror.EISDIR
- }
- var fd directoryFD
- fd.vfsfd.Init(&fd, rp.Mount(), vfsd)
- fd.flags = flags
- return &fd.vfsfd, nil
- case *symlink:
- // Can't open symlinks without O_PATH (which is unimplemented).
- return nil, syserror.ELOOP
- default:
- panic(fmt.Sprintf("unknown inode type: %T", i.impl))
- }
-}
-
-// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
-func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
- fs.mu.RLock()
- _, inode, err := walkExistingLocked(rp)
- fs.mu.RUnlock()
- if err != nil {
- return "", err
- }
- symlink, ok := inode.impl.(*symlink)
- if !ok {
- return "", syserror.EINVAL
- }
- return symlink.target, nil
-}
-
-// RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error {
- if rp.Done() {
- return syserror.ENOENT
- }
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
- if err != nil {
- return err
- }
- _, err = checkCreateLocked(rp, parentVFSD, parentInode)
- if err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- // TODO: actually implement RenameAt
- return syserror.EPERM
-}
-
-// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
-func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- fs.mu.Lock()
- defer fs.mu.Unlock()
- vfsd, inode, err := walkExistingLocked(rp)
- if err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- if err := checkDeleteLocked(vfsd); err != nil {
- return err
- }
- if !inode.isDir() {
- return syserror.ENOTDIR
- }
- if vfsd.HasChildren() {
- return syserror.ENOTEMPTY
- }
- if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil {
- return err
- }
- // Remove from parent directory's childList.
- vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry))
- inode.decRef()
- return nil
-}
-
-// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
-func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
- fs.mu.RLock()
- _, _, err := walkExistingLocked(rp)
- fs.mu.RUnlock()
- if err != nil {
- return err
- }
- if opts.Stat.Mask == 0 {
- return nil
- }
- // TODO: implement inode.setStat
- return syserror.EPERM
-}
-
-// StatAt implements vfs.FilesystemImpl.StatAt.
-func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
- fs.mu.RLock()
- _, inode, err := walkExistingLocked(rp)
- fs.mu.RUnlock()
- if err != nil {
- return linux.Statx{}, err
- }
- var stat linux.Statx
- inode.statTo(&stat)
- return stat, nil
-}
-
-// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
-func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
- fs.mu.RLock()
- _, _, err := walkExistingLocked(rp)
- fs.mu.RUnlock()
- if err != nil {
- return linux.Statfs{}, err
- }
- // TODO: actually implement statfs
- return linux.Statfs{}, syserror.ENOSYS
-}
-
-// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
-func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
- if rp.Done() {
- return syserror.EEXIST
- }
- fs.mu.Lock()
- defer fs.mu.Unlock()
- parentVFSD, parentInode, err := walkParentDirLocked(rp)
- if err != nil {
- return err
- }
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
- if err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- child := fs.newDentry(fs.newSymlink(rp.Credentials(), target))
- parentVFSD.InsertChild(&child.vfsd, pc)
- parentInode.impl.(*directory).childList.PushBack(child)
- return nil
-}
-
-// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
-func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- fs.mu.Lock()
- defer fs.mu.Unlock()
- vfsd, inode, err := walkExistingLocked(rp)
- if err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
- if err := checkDeleteLocked(vfsd); err != nil {
- return err
- }
- if inode.isDir() {
- return syserror.EISDIR
- }
- if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil {
- return err
- }
- // Remove from parent directory's childList.
- vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry))
- inode.decLinksLocked()
- return nil
-}
diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go
deleted file mode 100644
index 45cd42b3e..000000000
--- a/pkg/sentry/fsimpl/memfs/memfs.go
+++ /dev/null
@@ -1,300 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package memfs provides a filesystem implementation that behaves like tmpfs:
-// the Dentry tree is the sole source of truth for the state of the filesystem.
-//
-// memfs is intended primarily to demonstrate filesystem implementation
-// patterns. Real uses cases for an in-memory filesystem should use tmpfs
-// instead.
-//
-// Lock order:
-//
-// filesystem.mu
-// regularFileFD.offMu
-// regularFile.mu
-// inode.mu
-package memfs
-
-import (
- "fmt"
- "sync"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// FilesystemType implements vfs.FilesystemType.
-type FilesystemType struct{}
-
-// filesystem implements vfs.FilesystemImpl.
-type filesystem struct {
- vfsfs vfs.Filesystem
-
- // mu serializes changes to the Dentry tree.
- mu sync.RWMutex
-
- nextInoMinusOne uint64 // accessed using atomic memory operations
-}
-
-// NewFilesystem implements vfs.FilesystemType.NewFilesystem.
-func (fstype FilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts vfs.NewFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
- var fs filesystem
- fs.vfsfs.Init(&fs)
- root := fs.newDentry(fs.newDirectory(creds, 01777))
- return &fs.vfsfs, &root.vfsd, nil
-}
-
-// Release implements vfs.FilesystemImpl.Release.
-func (fs *filesystem) Release() {
-}
-
-// Sync implements vfs.FilesystemImpl.Sync.
-func (fs *filesystem) Sync(ctx context.Context) error {
- // All filesystem state is in-memory.
- return nil
-}
-
-// dentry implements vfs.DentryImpl.
-type dentry struct {
- vfsd vfs.Dentry
-
- // inode is the inode represented by this dentry. Multiple Dentries may
- // share a single non-directory inode (with hard links). inode is
- // immutable.
- inode *inode
-
- // memfs doesn't count references on dentries; because the dentry tree is
- // the sole source of truth, it is by definition always consistent with the
- // state of the filesystem. However, it does count references on inodes,
- // because inode resources are released when all references are dropped.
- // (memfs doesn't really have resources to release, but we implement
- // reference counting because tmpfs regular files will.)
-
- // dentryEntry (ugh) links dentries into their parent directory.childList.
- dentryEntry
-}
-
-func (fs *filesystem) newDentry(inode *inode) *dentry {
- d := &dentry{
- inode: inode,
- }
- d.vfsd.Init(d)
- return d
-}
-
-// IncRef implements vfs.DentryImpl.IncRef.
-func (d *dentry) IncRef(vfsfs *vfs.Filesystem) {
- d.inode.incRef()
-}
-
-// TryIncRef implements vfs.DentryImpl.TryIncRef.
-func (d *dentry) TryIncRef(vfsfs *vfs.Filesystem) bool {
- return d.inode.tryIncRef()
-}
-
-// DecRef implements vfs.DentryImpl.DecRef.
-func (d *dentry) DecRef(vfsfs *vfs.Filesystem) {
- d.inode.decRef()
-}
-
-// inode represents a filesystem object.
-type inode struct {
- // refs is a reference count. refs is accessed using atomic memory
- // operations.
- //
- // A reference is held on all inodes that are reachable in the filesystem
- // tree. For non-directories (which may have multiple hard links), this
- // means that a reference is dropped when nlink reaches 0. For directories,
- // nlink never reaches 0 due to the "." entry; instead,
- // filesystem.RmdirAt() drops the reference.
- refs int64
-
- // Inode metadata; protected by mu and accessed using atomic memory
- // operations unless otherwise specified.
- mu sync.RWMutex
- mode uint32 // excluding file type bits, which are based on impl
- nlink uint32 // protected by filesystem.mu instead of inode.mu
- uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
- gid uint32 // auth.KGID, but ...
- ino uint64 // immutable
-
- impl interface{} // immutable
-}
-
-func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode uint16) {
- i.refs = 1
- i.mode = uint32(mode)
- i.uid = uint32(creds.EffectiveKUID)
- i.gid = uint32(creds.EffectiveKGID)
- i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1)
- // i.nlink initialized by caller
- i.impl = impl
-}
-
-// Preconditions: filesystem.mu must be locked for writing.
-func (i *inode) incLinksLocked() {
- if atomic.AddUint32(&i.nlink, 1) <= 1 {
- panic("memfs.inode.incLinksLocked() called with no existing links")
- }
-}
-
-// Preconditions: filesystem.mu must be locked for writing.
-func (i *inode) decLinksLocked() {
- if nlink := atomic.AddUint32(&i.nlink, ^uint32(0)); nlink == 0 {
- i.decRef()
- } else if nlink == ^uint32(0) { // negative overflow
- panic("memfs.inode.decLinksLocked() called with no existing links")
- }
-}
-
-func (i *inode) incRef() {
- if atomic.AddInt64(&i.refs, 1) <= 1 {
- panic("memfs.inode.incRef() called without holding a reference")
- }
-}
-
-func (i *inode) tryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&i.refs)
- if refs == 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) {
- return true
- }
- }
-}
-
-func (i *inode) decRef() {
- if refs := atomic.AddInt64(&i.refs, -1); refs == 0 {
- // This is unnecessary; it's mostly to simulate what tmpfs would do.
- if regfile, ok := i.impl.(*regularFile); ok {
- regfile.mu.Lock()
- regfile.data = nil
- atomic.StoreInt64(&regfile.dataLen, 0)
- regfile.mu.Unlock()
- }
- } else if refs < 0 {
- panic("memfs.inode.decRef() called without holding a reference")
- }
-}
-
-func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error {
- return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid)))
-}
-
-// Go won't inline this function, and returning linux.Statx (which is quite
-// big) means spending a lot of time in runtime.duffcopy(), so instead it's an
-// output parameter.
-func (i *inode) statTo(stat *linux.Statx) {
- stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
- stat.Blksize = 1 // usermem.PageSize in tmpfs
- stat.Nlink = atomic.LoadUint32(&i.nlink)
- stat.UID = atomic.LoadUint32(&i.uid)
- stat.GID = atomic.LoadUint32(&i.gid)
- stat.Mode = uint16(atomic.LoadUint32(&i.mode))
- stat.Ino = i.ino
- // TODO: device number
- switch impl := i.impl.(type) {
- case *regularFile:
- stat.Mode |= linux.S_IFREG
- stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
- stat.Size = uint64(atomic.LoadInt64(&impl.dataLen))
- // In tmpfs, this will be FileRangeSet.Span() / 512 (but also cached in
- // a uint64 accessed using atomic memory operations to avoid taking
- // locks).
- stat.Blocks = allocatedBlocksForSize(stat.Size)
- case *directory:
- stat.Mode |= linux.S_IFDIR
- case *symlink:
- stat.Mode |= linux.S_IFLNK
- stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
- stat.Size = uint64(len(impl.target))
- stat.Blocks = allocatedBlocksForSize(stat.Size)
- default:
- panic(fmt.Sprintf("unknown inode type: %T", i.impl))
- }
-}
-
-// allocatedBlocksForSize returns the number of 512B blocks needed to
-// accommodate the given size in bytes, as appropriate for struct
-// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block
-// size is independent of the "preferred block size for I/O", struct
-// stat::st_blksize and struct statx::stx_blksize.)
-func allocatedBlocksForSize(size uint64) uint64 {
- return (size + 511) / 512
-}
-
-func (i *inode) direntType() uint8 {
- switch i.impl.(type) {
- case *regularFile:
- return linux.DT_REG
- case *directory:
- return linux.DT_DIR
- case *symlink:
- return linux.DT_LNK
- default:
- panic(fmt.Sprintf("unknown inode type: %T", i.impl))
- }
-}
-
-// fileDescription is embedded by memfs implementations of
-// vfs.FileDescriptionImpl.
-type fileDescription struct {
- vfsfd vfs.FileDescription
- vfs.FileDescriptionDefaultImpl
-
- flags uint32 // status flags; immutable
-}
-
-func (fd *fileDescription) filesystem() *filesystem {
- return fd.vfsfd.VirtualDentry().Mount().Filesystem().Impl().(*filesystem)
-}
-
-func (fd *fileDescription) inode() *inode {
- return fd.vfsfd.VirtualDentry().Dentry().Impl().(*dentry).inode
-}
-
-// StatusFlags implements vfs.FileDescriptionImpl.StatusFlags.
-func (fd *fileDescription) StatusFlags(ctx context.Context) (uint32, error) {
- return fd.flags, nil
-}
-
-// SetStatusFlags implements vfs.FileDescriptionImpl.SetStatusFlags.
-func (fd *fileDescription) SetStatusFlags(ctx context.Context, flags uint32) error {
- // None of the flags settable by fcntl(F_SETFL) are supported, so this is a
- // no-op.
- return nil
-}
-
-// Stat implements vfs.FileDescriptionImpl.Stat.
-func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
- var stat linux.Statx
- fd.inode().statTo(&stat)
- return stat, nil
-}
-
-// SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
- if opts.Stat.Mask == 0 {
- return nil
- }
- // TODO: implement inode.setStat
- return syserror.EPERM
-}
diff --git a/pkg/sentry/fsimpl/memfs/regular_file.go b/pkg/sentry/fsimpl/memfs/regular_file.go
deleted file mode 100644
index 55f869798..000000000
--- a/pkg/sentry/fsimpl/memfs/regular_file.go
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
- "io"
- "sync"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-type regularFile struct {
- inode inode
-
- mu sync.RWMutex
- data []byte
- // dataLen is len(data), but accessed using atomic memory operations to
- // avoid locking in inode.stat().
- dataLen int64
-}
-
-func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode uint16) *inode {
- file := &regularFile{}
- file.inode.init(file, fs, creds, mode)
- file.inode.nlink = 1 // from parent directory
- return &file.inode
-}
-
-type regularFileFD struct {
- fileDescription
-
- // These are immutable.
- readable bool
- writable bool
-
- // off is the file offset. off is accessed using atomic memory operations.
- // offMu serializes operations that may mutate off.
- off int64
- offMu sync.Mutex
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *regularFileFD) Release() {
- if fd.writable {
- fd.vfsfd.VirtualDentry().Mount().EndWrite()
- }
-}
-
-// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- if !fd.readable {
- return 0, syserror.EINVAL
- }
- f := fd.inode().impl.(*regularFile)
- f.mu.RLock()
- if offset >= int64(len(f.data)) {
- f.mu.RUnlock()
- return 0, io.EOF
- }
- n, err := dst.CopyOut(ctx, f.data[offset:])
- f.mu.RUnlock()
- return int64(n), err
-}
-
-// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- fd.offMu.Lock()
- n, err := fd.PRead(ctx, dst, fd.off, opts)
- fd.off += n
- fd.offMu.Unlock()
- return n, err
-}
-
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- if !fd.writable {
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- srclen := src.NumBytes()
- if srclen == 0 {
- return 0, nil
- }
- f := fd.inode().impl.(*regularFile)
- f.mu.Lock()
- end := offset + srclen
- if end < offset {
- // Overflow.
- f.mu.Unlock()
- return 0, syserror.EFBIG
- }
- if end > f.dataLen {
- f.data = append(f.data, make([]byte, end-f.dataLen)...)
- atomic.StoreInt64(&f.dataLen, end)
- }
- n, err := src.CopyIn(ctx, f.data[offset:end])
- f.mu.Unlock()
- return int64(n), err
-}
-
-// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- fd.offMu.Lock()
- n, err := fd.PWrite(ctx, src, fd.off, opts)
- fd.off += n
- fd.offMu.Unlock()
- return n, err
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- fd.offMu.Lock()
- defer fd.offMu.Unlock()
- switch whence {
- case linux.SEEK_SET:
- // use offset as specified
- case linux.SEEK_CUR:
- offset += fd.off
- case linux.SEEK_END:
- offset += atomic.LoadInt64(&fd.inode().impl.(*regularFile).dataLen)
- default:
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- fd.off = offset
- return offset, nil
-}
-
-// Sync implements vfs.FileDescriptionImpl.Sync.
-func (fd *regularFileFD) Sync(ctx context.Context) error {
- return nil
-}
diff --git a/pkg/sentry/fsimpl/memfs/symlink.go b/pkg/sentry/fsimpl/memfs/symlink.go
deleted file mode 100644
index b2ac2cbeb..000000000
--- a/pkg/sentry/fsimpl/memfs/symlink.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-)
-
-type symlink struct {
- inode inode
- target string // immutable
-}
-
-func (fs *filesystem) newSymlink(creds *auth.Credentials, target string) *inode {
- link := &symlink{
- target: target,
- }
- link.inode.init(link, fs, creds, 0777)
- link.inode.nlink = 1 // from parent directory
- return &link.inode
-}
-
-// O_PATH is unimplemented, so there's no way to get a FileDescription
-// representing a symlink yet.