summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl')
-rw-r--r--pkg/sentry/fsimpl/ext/BUILD18
-rw-r--r--pkg/sentry/fsimpl/ext/benchmark/BUILD6
-rw-r--r--pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go4
-rw-r--r--pkg/sentry/fsimpl/ext/directory.go2
-rw-r--r--pkg/sentry/fsimpl/ext/disklayout/BUILD6
-rw-r--r--pkg/sentry/fsimpl/ext/ext.go2
-rw-r--r--pkg/sentry/fsimpl/ext/ext_test.go6
-rw-r--r--pkg/sentry/fsimpl/ext/file_description.go2
-rw-r--r--pkg/sentry/fsimpl/ext/filesystem.go2
-rw-r--r--pkg/sentry/fsimpl/ext/inode.go8
-rw-r--r--pkg/sentry/fsimpl/ext/regular_file.go6
-rw-r--r--pkg/sentry/fsimpl/ext/symlink.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/BUILD19
-rw-r--r--pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go15
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go15
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go65
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go48
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_test.go191
-rw-r--r--pkg/sentry/fsimpl/kernfs/symlink.go23
-rw-r--r--pkg/sentry/fsimpl/proc/BUILD47
-rw-r--r--pkg/sentry/fsimpl/proc/filesystem.go26
-rw-r--r--pkg/sentry/fsimpl/proc/loadavg.go42
-rw-r--r--pkg/sentry/fsimpl/proc/meminfo.go79
-rw-r--r--pkg/sentry/fsimpl/proc/mounts.go33
-rw-r--r--pkg/sentry/fsimpl/proc/net.go338
-rw-r--r--pkg/sentry/fsimpl/proc/stat.go129
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go128
-rw-r--r--pkg/sentry/fsimpl/proc/sys.go51
-rw-r--r--pkg/sentry/fsimpl/proc/task.go99
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go319
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go54
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_files.go247
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_net.go784
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go143
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys_test.go (renamed from pkg/sentry/fsimpl/proc/net_test.go)6
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_test.go415
-rw-r--r--pkg/sentry/fsimpl/proc/version.go70
-rw-r--r--pkg/sentry/fsimpl/sys/BUILD33
-rw-r--r--pkg/sentry/fsimpl/sys/sys.go124
-rw-r--r--pkg/sentry/fsimpl/sys/sys_test.go90
-rw-r--r--pkg/sentry/fsimpl/testutil/BUILD35
-rw-r--r--pkg/sentry/fsimpl/testutil/kernel.go (renamed from pkg/sentry/fsimpl/proc/boot_test.go)12
-rw-r--r--pkg/sentry/fsimpl/testutil/testutil.go281
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD27
-rw-r--r--pkg/sentry/fsimpl/tmpfs/benchmark_test.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/device_file.go39
-rw-r--r--pkg/sentry/fsimpl/tmpfs/directory.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go72
-rw-r--r--pkg/sentry/fsimpl/tmpfs/named_pipe.go9
-rw-r--r--pkg/sentry/fsimpl/tmpfs/pipe_test.go6
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go55
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file_test.go254
-rw-r--r--pkg/sentry/fsimpl/tmpfs/stat_test.go232
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go136
55 files changed, 3388 insertions, 1479 deletions
diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD
index 903874141..6f78f478f 100644
--- a/pkg/sentry/fsimpl/ext/BUILD
+++ b/pkg/sentry/fsimpl/ext/BUILD
@@ -1,5 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
load("//tools/go_generics:defs.bzl", "go_template_instance")
package(licenses = ["notice"])
@@ -32,26 +31,25 @@ go_library(
"symlink.go",
"utils.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
"//pkg/binary",
+ "//pkg/context",
"//pkg/fd",
"//pkg/fspath",
"//pkg/log",
+ "//pkg/safemem",
"//pkg/sentry/arch",
- "//pkg/sentry/context",
"//pkg/sentry/fs",
"//pkg/sentry/fsimpl/ext/disklayout",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
- "//pkg/sentry/safemem",
"//pkg/sentry/syscalls/linux",
- "//pkg/sentry/usermem",
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserror",
+ "//pkg/usermem",
"//pkg/waiter",
],
)
@@ -71,18 +69,18 @@ go_test(
"//pkg/sentry/fsimpl/ext:assets/tiny.ext3",
"//pkg/sentry/fsimpl/ext:assets/tiny.ext4",
],
- embed = [":ext"],
+ library = ":ext",
deps = [
"//pkg/abi/linux",
"//pkg/binary",
+ "//pkg/context",
"//pkg/fspath",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
+ "//pkg/sentry/contexttest",
"//pkg/sentry/fsimpl/ext/disklayout",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/usermem",
"//pkg/sentry/vfs",
"//pkg/syserror",
+ "//pkg/usermem",
"//runsc/testutil",
"@com_github_google_go-cmp//cmp:go_default_library",
"@com_github_google_go-cmp//cmp/cmpopts:go_default_library",
diff --git a/pkg/sentry/fsimpl/ext/benchmark/BUILD b/pkg/sentry/fsimpl/ext/benchmark/BUILD
index 4fc8296ef..6c5a559fd 100644
--- a/pkg/sentry/fsimpl/ext/benchmark/BUILD
+++ b/pkg/sentry/fsimpl/ext/benchmark/BUILD
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_test")
package(licenses = ["notice"])
@@ -7,9 +7,9 @@ go_test(
size = "small",
srcs = ["benchmark_test.go"],
deps = [
+ "//pkg/context",
"//pkg/fspath",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
+ "//pkg/sentry/contexttest",
"//pkg/sentry/fsimpl/ext",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
index a56b03711..d1436b943 100644
--- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
+++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
@@ -24,9 +24,9 @@ import (
"strings"
"testing"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
diff --git a/pkg/sentry/fsimpl/ext/directory.go b/pkg/sentry/fsimpl/ext/directory.go
index 8944171c8..ebb72b75e 100644
--- a/pkg/sentry/fsimpl/ext/directory.go
+++ b/pkg/sentry/fsimpl/ext/directory.go
@@ -17,8 +17,8 @@ package ext
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
"gvisor.dev/gvisor/pkg/sentry/memmap"
diff --git a/pkg/sentry/fsimpl/ext/disklayout/BUILD b/pkg/sentry/fsimpl/ext/disklayout/BUILD
index fcfaf5c3e..9bd9c76c0 100644
--- a/pkg/sentry/fsimpl/ext/disklayout/BUILD
+++ b/pkg/sentry/fsimpl/ext/disklayout/BUILD
@@ -1,5 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -23,7 +22,6 @@ go_library(
"superblock_old.go",
"test_utils.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
@@ -44,6 +42,6 @@ go_test(
"inode_test.go",
"superblock_test.go",
],
- embed = [":disklayout"],
+ library = ":disklayout",
deps = ["//pkg/sentry/kernel/time"],
)
diff --git a/pkg/sentry/fsimpl/ext/ext.go b/pkg/sentry/fsimpl/ext/ext.go
index 4b7d17dc6..373d23b74 100644
--- a/pkg/sentry/fsimpl/ext/ext.go
+++ b/pkg/sentry/fsimpl/ext/ext.go
@@ -21,9 +21,9 @@ import (
"io"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go
index 6c14a1e2d..05f992826 100644
--- a/pkg/sentry/fsimpl/ext/ext_test.go
+++ b/pkg/sentry/fsimpl/ext/ext_test.go
@@ -25,14 +25,14 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/runsc/testutil"
)
diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go
index 841274daf..92f7da40d 100644
--- a/pkg/sentry/fsimpl/ext/file_description.go
+++ b/pkg/sentry/fsimpl/ext/file_description.go
@@ -16,7 +16,7 @@ package ext
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
)
diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go
index 9afb1a84c..07bf58953 100644
--- a/pkg/sentry/fsimpl/ext/filesystem.go
+++ b/pkg/sentry/fsimpl/ext/filesystem.go
@@ -19,8 +19,8 @@ import (
"io"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go
index 8608805bf..191b39970 100644
--- a/pkg/sentry/fsimpl/ext/inode.go
+++ b/pkg/sentry/fsimpl/ext/inode.go
@@ -157,7 +157,9 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*v
switch in.impl.(type) {
case *regularFile:
var fd regularFileFD
- fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{})
+ if err := fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
return &fd.vfsfd, nil
case *directory:
// Can't open directories writably. This check is not necessary for a read
@@ -166,7 +168,9 @@ func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*v
return nil, syserror.EISDIR
}
var fd directoryFD
- fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{})
+ if err := fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
return &fd.vfsfd, nil
case *symlink:
if flags&linux.O_PATH == 0 {
diff --git a/pkg/sentry/fsimpl/ext/regular_file.go b/pkg/sentry/fsimpl/ext/regular_file.go
index d11153c90..30135ddb0 100644
--- a/pkg/sentry/fsimpl/ext/regular_file.go
+++ b/pkg/sentry/fsimpl/ext/regular_file.go
@@ -18,13 +18,13 @@ import (
"io"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/safemem"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// regularFile represents a regular file's inode. This too follows the
diff --git a/pkg/sentry/fsimpl/ext/symlink.go b/pkg/sentry/fsimpl/ext/symlink.go
index bdf8705c1..1447a4dc1 100644
--- a/pkg/sentry/fsimpl/ext/symlink.go
+++ b/pkg/sentry/fsimpl/ext/symlink.go
@@ -15,11 +15,11 @@
package ext
import (
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// symlink represents a symlink inode.
diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD
index 809178250..e73f1f857 100644
--- a/pkg/sentry/fsimpl/kernfs/BUILD
+++ b/pkg/sentry/fsimpl/kernfs/BUILD
@@ -1,8 +1,7 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
load("//tools/go_generics:defs.bzl", "go_template_instance")
-package(licenses = ["notice"])
+licenses(["notice"])
go_template_instance(
name = "slot_list",
@@ -27,20 +26,19 @@ go_library(
"slot_list.go",
"symlink.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/context",
"//pkg/fspath",
"//pkg/log",
"//pkg/refs",
- "//pkg/sentry/context",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
- "//pkg/sentry/usermem",
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserror",
+ "//pkg/usermem",
],
)
@@ -51,14 +49,13 @@ go_test(
deps = [
":kernfs",
"//pkg/abi/linux",
- "//pkg/fspath",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
+ "//pkg/context",
+ "//pkg/sentry/contexttest",
+ "//pkg/sentry/fsimpl/testutil",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/usermem",
"//pkg/sentry/vfs",
- "//pkg/sync",
"//pkg/syserror",
+ "//pkg/usermem",
"@com_github_google_go-cmp//cmp:go_default_library",
],
)
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index 606ca692d..373f801ff 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -18,11 +18,11 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// DynamicBytesFile implements kernfs.Inode and represents a read-only
@@ -55,7 +55,9 @@ func (f *DynamicBytesFile) Init(creds *auth.Credentials, ino uint64, data vfs.Dy
// Open implements Inode.Open.
func (f *DynamicBytesFile) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
fd := &DynamicBytesFD{}
- fd.Init(rp.Mount(), vfsd, f.data, flags)
+ if err := fd.Init(rp.Mount(), vfsd, f.data, flags); err != nil {
+ return nil, err
+ }
return &fd.vfsfd, nil
}
@@ -80,10 +82,13 @@ type DynamicBytesFD struct {
}
// Init initializes a DynamicBytesFD.
-func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, flags uint32) {
+func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, flags uint32) error {
+ if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil {
+ return err
+ }
fd.inode = d.Impl().(*Dentry).inode
fd.SetDataSource(data)
- fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{})
+ return nil
}
// Seek implements vfs.FileDescriptionImpl.Seek.
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index bcf069b5f..6104751c8 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -16,11 +16,11 @@ package kernfs
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// GenericDirectoryFD implements vfs.FileDescriptionImpl for a generic directory
@@ -43,9 +43,16 @@ type GenericDirectoryFD struct {
}
// Init initializes a GenericDirectoryFD.
-func (fd *GenericDirectoryFD) Init(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, flags uint32) {
+func (fd *GenericDirectoryFD) Init(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, flags uint32) error {
+ if vfs.AccessTypesForOpenFlags(flags)&vfs.MayWrite != 0 {
+ // Can't open directories for writing.
+ return syserror.EISDIR
+ }
+ if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil {
+ return err
+ }
fd.children = children
- fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{})
+ return nil
}
// VFSFileDescription returns a pointer to the vfs.FileDescription representing
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 79759e0fc..9d65d0179 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -20,9 +20,8 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -40,7 +39,7 @@ func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingP
return nil, syserror.ENOTDIR
}
// Directory searchable?
- if err := d.inode.CheckPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+ if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
}
afterSymlink:
@@ -182,8 +181,8 @@ func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving
//
// Preconditions: Filesystem.mu must be locked for at least reading. parentInode
// == parentVFSD.Impl().(*Dentry).Inode. isDir(parentInode) == true.
-func checkCreateLocked(rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode Inode) (string, error) {
- if err := parentInode.CheckPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
+func checkCreateLocked(ctx context.Context, rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode Inode) (string, error) {
+ if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
return "", err
}
pc := rp.Component()
@@ -206,7 +205,7 @@ func checkCreateLocked(rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInod
// checkDeleteLocked checks that the file represented by vfsd may be deleted.
//
// Preconditions: Filesystem.mu must be locked for at least reading.
-func checkDeleteLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry) error {
+func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry) error {
parentVFSD := vfsd.Parent()
if parentVFSD == nil {
return syserror.EBUSY
@@ -214,36 +213,12 @@ func checkDeleteLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry) error {
if parentVFSD.IsDisowned() {
return syserror.ENOENT
}
- if err := parentVFSD.Impl().(*Dentry).inode.CheckPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
+ if err := parentVFSD.Impl().(*Dentry).inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
return err
}
return nil
}
-// checkRenameLocked checks that a rename operation may be performed on the
-// target dentry across the given set of parent directories. The target dentry
-// may be nil.
-//
-// Precondition: isDir(dstInode) == true.
-func checkRenameLocked(creds *auth.Credentials, src, dstDir *vfs.Dentry, dstInode Inode) error {
- srcDir := src.Parent()
- if srcDir == nil {
- return syserror.EBUSY
- }
- if srcDir.IsDisowned() {
- return syserror.ENOENT
- }
- if dstDir.IsDisowned() {
- return syserror.ENOENT
- }
- // Check for creation permissions on dst dir.
- if err := dstInode.CheckPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
- return err
- }
-
- return nil
-}
-
// Release implements vfs.FilesystemImpl.Release.
func (fs *Filesystem) Release() {
}
@@ -269,7 +244,7 @@ func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
if !d.isDir() {
return nil, syserror.ENOTDIR
}
- if err := inode.CheckPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+ if err := inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
}
}
@@ -302,7 +277,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
if err != nil {
return err
}
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
+ pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode)
if err != nil {
return err
}
@@ -339,7 +314,7 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
if err != nil {
return err
}
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
+ pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode)
if err != nil {
return err
}
@@ -367,7 +342,7 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
if err != nil {
return err
}
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
+ pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode)
if err != nil {
return err
}
@@ -401,7 +376,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if err != nil {
return nil, err
}
- if err := inode.CheckPermissions(rp.Credentials(), ats); err != nil {
+ if err := inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
return inode.Open(rp, vfsd, opts.Flags)
@@ -420,7 +395,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if mustCreate {
return nil, syserror.EEXIST
}
- if err := inode.CheckPermissions(rp.Credentials(), ats); err != nil {
+ if err := inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
return inode.Open(rp, vfsd, opts.Flags)
@@ -432,7 +407,7 @@ afterTrailingSymlink:
return nil, err
}
// Check for search permission in the parent directory.
- if err := parentInode.CheckPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+ if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
}
// Reject attempts to open directories with O_CREAT.
@@ -450,7 +425,7 @@ afterTrailingSymlink:
}
if childVFSD == nil {
// Already checked for searchability above; now check for writability.
- if err := parentInode.CheckPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
+ if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil {
return nil, err
}
if err := rp.Mount().CheckBeginWrite(); err != nil {
@@ -485,7 +460,7 @@ afterTrailingSymlink:
goto afterTrailingSymlink
}
}
- if err := childInode.CheckPermissions(rp.Credentials(), ats); err != nil {
+ if err := childInode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
return childInode.Open(rp, childVFSD, opts.Flags)
@@ -545,13 +520,13 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
srcVFSD := &src.vfsd
// Can we remove the src dentry?
- if err := checkDeleteLocked(rp, srcVFSD); err != nil {
+ if err := checkDeleteLocked(ctx, rp, srcVFSD); err != nil {
return err
}
// Can we create the dst dentry?
var dstVFSD *vfs.Dentry
- pc, err := checkCreateLocked(rp, dstDirVFSD, dstDirInode)
+ pc, err := checkCreateLocked(ctx, rp, dstDirVFSD, dstDirInode)
switch err {
case nil:
// Ok, continue with rename as replacement.
@@ -607,7 +582,7 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
defer rp.Mount().EndWrite()
- if err := checkDeleteLocked(rp, vfsd); err != nil {
+ if err := checkDeleteLocked(ctx, rp, vfsd); err != nil {
return err
}
if !vfsd.Impl().(*Dentry).isDir() {
@@ -683,7 +658,7 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
if err != nil {
return err
}
- pc, err := checkCreateLocked(rp, parentVFSD, parentInode)
+ pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode)
if err != nil {
return err
}
@@ -712,7 +687,7 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
defer rp.Mount().EndWrite()
- if err := checkDeleteLocked(rp, vfsd); err != nil {
+ if err := checkDeleteLocked(ctx, rp, vfsd); err != nil {
return err
}
if vfsd.Impl().(*Dentry).isDir() {
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 1d469a0db..adca2313f 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -19,8 +19,8 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/refs"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -262,7 +262,7 @@ func (a *InodeAttrs) SetStat(_ *vfs.Filesystem, opts vfs.SetStatOptions) error {
}
// CheckPermissions implements Inode.CheckPermissions.
-func (a *InodeAttrs) CheckPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
+func (a *InodeAttrs) CheckPermissions(_ context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
mode := a.Mode()
return vfs.GenericCheckPermissions(
creds,
@@ -510,3 +510,47 @@ type InodeSymlink struct {
func (InodeSymlink) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
return nil, syserror.ELOOP
}
+
+// StaticDirectory is a standard implementation of a directory with static
+// contents.
+//
+// +stateify savable
+type StaticDirectory struct {
+ InodeNotSymlink
+ InodeDirectoryNoNewChildren
+ InodeAttrs
+ InodeNoDynamicLookup
+ OrderedChildren
+}
+
+var _ Inode = (*StaticDirectory)(nil)
+
+// NewStaticDir creates a new static directory and returns its dentry.
+func NewStaticDir(creds *auth.Credentials, ino uint64, perm linux.FileMode, children map[string]*Dentry) *Dentry {
+ inode := &StaticDirectory{}
+ inode.Init(creds, ino, perm)
+
+ dentry := &Dentry{}
+ dentry.Init(inode)
+
+ inode.OrderedChildren.Init(OrderedChildrenOptions{})
+ links := inode.OrderedChildren.Populate(dentry, children)
+ inode.IncLinks(links)
+
+ return dentry
+}
+
+// Init initializes StaticDirectory.
+func (s *StaticDirectory) Init(creds *auth.Credentials, ino uint64, perm linux.FileMode) {
+ if perm&^linux.PermissionsMask != 0 {
+ panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask))
+ }
+ s.InodeAttrs.Init(creds, ino, linux.ModeDirectory|perm)
+}
+
+// Open implements kernfs.Inode.
+func (s *StaticDirectory) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
+ fd := &GenericDirectoryFD{}
+ fd.Init(rp.Mount(), vfsd, &s.OrderedChildren, flags)
+ return fd.VFSFileDescription(), nil
+}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index bb12f39a2..79ebea8a5 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -56,8 +56,8 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/refs"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -320,7 +320,7 @@ type inodeMetadata interface {
// CheckPermissions checks that creds may access this inode for the
// requested access type, per the the rules of
// fs/namei.c:generic_permission().
- CheckPermissions(creds *auth.Credentials, atx vfs.AccessTypes) error
+ CheckPermissions(ctx context.Context, creds *auth.Credentials, atx vfs.AccessTypes) error
// Mode returns the (struct stat)::st_mode value for this inode. This is
// separated from Stat for performance.
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index 5c9d580e1..ee65cf491 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -17,21 +17,18 @@ package kernfs_test
import (
"bytes"
"fmt"
- "io"
- "runtime"
"testing"
"github.com/google/go-cmp/cmp"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
const defaultMode linux.FileMode = 01777
@@ -41,21 +38,11 @@ const staticFileContent = "This is sample content for a static test file."
// filesystem. See newTestSystem.
type RootDentryFn func(*auth.Credentials, *filesystem) *kernfs.Dentry
-// TestSystem represents the context for a single test.
-type TestSystem struct {
- t *testing.T
- ctx context.Context
- creds *auth.Credentials
- vfs *vfs.VirtualFilesystem
- mns *vfs.MountNamespace
- root vfs.VirtualDentry
-}
-
// newTestSystem sets up a minimal environment for running a test, including an
// instance of a test filesystem. Tests can control the contents of the
// filesystem by providing an appropriate rootFn, which should return a
// pre-populated root dentry.
-func newTestSystem(t *testing.T, rootFn RootDentryFn) *TestSystem {
+func newTestSystem(t *testing.T, rootFn RootDentryFn) *testutil.System {
ctx := contexttest.Context(t)
creds := auth.CredentialsFromContext(ctx)
v := vfs.New()
@@ -66,57 +53,7 @@ func newTestSystem(t *testing.T, rootFn RootDentryFn) *TestSystem {
if err != nil {
t.Fatalf("Failed to create testfs root mount: %v", err)
}
-
- s := &TestSystem{
- t: t,
- ctx: ctx,
- creds: creds,
- vfs: v,
- mns: mns,
- root: mns.Root(),
- }
- runtime.SetFinalizer(s, func(s *TestSystem) { s.root.DecRef() })
- return s
-}
-
-// PathOpAtRoot constructs a vfs.PathOperation for a path from the
-// root of the test filesystem.
-//
-// Precondition: path should be relative path.
-func (s *TestSystem) PathOpAtRoot(path string) vfs.PathOperation {
- return vfs.PathOperation{
- Root: s.root,
- Start: s.root,
- Path: fspath.Parse(path),
- }
-}
-
-// GetDentryOrDie attempts to resolve a dentry referred to by the
-// provided path operation. If unsuccessful, the test fails.
-func (s *TestSystem) GetDentryOrDie(pop vfs.PathOperation) vfs.VirtualDentry {
- vd, err := s.vfs.GetDentryAt(s.ctx, s.creds, &pop, &vfs.GetDentryOptions{})
- if err != nil {
- s.t.Fatalf("GetDentryAt(pop:%+v) failed: %v", pop, err)
- }
- return vd
-}
-
-func (s *TestSystem) ReadToEnd(fd *vfs.FileDescription) (string, error) {
- buf := make([]byte, usermem.PageSize)
- bufIOSeq := usermem.BytesIOSequence(buf)
- opts := vfs.ReadOptions{}
-
- var content bytes.Buffer
- for {
- n, err := fd.Impl().Read(s.ctx, bufIOSeq, opts)
- if n == 0 || err != nil {
- if err == io.EOF {
- err = nil
- }
- return content.String(), err
- }
- content.Write(buf[:n])
- }
+ return testutil.NewSystem(ctx, t, v, mns)
}
type fsType struct {
@@ -178,7 +115,9 @@ func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMod
func (d *readonlyDir) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
fd := &kernfs.GenericDirectoryFD{}
- fd.Init(rp.Mount(), vfsd, &d.OrderedChildren, flags)
+ if err := fd.Init(rp.Mount(), vfsd, &d.OrderedChildren, flags); err != nil {
+ return nil, err
+ }
return fd.VFSFileDescription(), nil
}
@@ -260,6 +199,7 @@ func TestBasic(t *testing.T) {
"file1": fs.newFile(creds, staticFileContent),
})
})
+ defer sys.Destroy()
sys.GetDentryOrDie(sys.PathOpAtRoot("file1")).DecRef()
}
@@ -269,9 +209,10 @@ func TestMkdirGetDentry(t *testing.T) {
"dir1": fs.newDir(creds, 0755, nil),
})
})
+ defer sys.Destroy()
pop := sys.PathOpAtRoot("dir1/a new directory")
- if err := sys.vfs.MkdirAt(sys.ctx, sys.creds, &pop, &vfs.MkdirOptions{Mode: 0755}); err != nil {
+ if err := sys.VFS.MkdirAt(sys.Ctx, sys.Creds, pop, &vfs.MkdirOptions{Mode: 0755}); err != nil {
t.Fatalf("MkdirAt for PathOperation %+v failed: %v", pop, err)
}
sys.GetDentryOrDie(pop).DecRef()
@@ -283,20 +224,23 @@ func TestReadStaticFile(t *testing.T) {
"file1": fs.newFile(creds, staticFileContent),
})
})
+ defer sys.Destroy()
pop := sys.PathOpAtRoot("file1")
- fd, err := sys.vfs.OpenAt(sys.ctx, sys.creds, &pop, &vfs.OpenOptions{})
+ fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, pop, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY,
+ })
if err != nil {
- sys.t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err)
+ t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err)
}
defer fd.DecRef()
content, err := sys.ReadToEnd(fd)
if err != nil {
- sys.t.Fatalf("Read failed: %v", err)
+ t.Fatalf("Read failed: %v", err)
}
if diff := cmp.Diff(staticFileContent, content); diff != "" {
- sys.t.Fatalf("Read returned unexpected data:\n--- want\n+++ got\n%v", diff)
+ t.Fatalf("Read returned unexpected data:\n--- want\n+++ got\n%v", diff)
}
}
@@ -306,83 +250,48 @@ func TestCreateNewFileInStaticDir(t *testing.T) {
"dir1": fs.newDir(creds, 0755, nil),
})
})
+ defer sys.Destroy()
pop := sys.PathOpAtRoot("dir1/newfile")
opts := &vfs.OpenOptions{Flags: linux.O_CREAT | linux.O_EXCL, Mode: defaultMode}
- fd, err := sys.vfs.OpenAt(sys.ctx, sys.creds, &pop, opts)
+ fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, pop, opts)
if err != nil {
- sys.t.Fatalf("OpenAt(pop:%+v, opts:%+v) failed: %v", pop, opts, err)
+ t.Fatalf("OpenAt(pop:%+v, opts:%+v) failed: %v", pop, opts, err)
}
// Close the file. The file should persist.
fd.DecRef()
- fd, err = sys.vfs.OpenAt(sys.ctx, sys.creds, &pop, &vfs.OpenOptions{})
+ fd, err = sys.VFS.OpenAt(sys.Ctx, sys.Creds, pop, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY,
+ })
if err != nil {
- sys.t.Fatalf("OpenAt(pop:%+v) = %+v failed: %v", pop, fd, err)
+ t.Fatalf("OpenAt(pop:%+v) = %+v failed: %v", pop, fd, err)
}
fd.DecRef()
}
-// direntCollector provides an implementation for vfs.IterDirentsCallback for
-// testing. It simply iterates to the end of a given directory FD and collects
-// all dirents emitted by the callback.
-type direntCollector struct {
- mu sync.Mutex
- dirents map[string]vfs.Dirent
-}
-
-// Handle implements vfs.IterDirentsCallback.Handle.
-func (d *direntCollector) Handle(dirent vfs.Dirent) bool {
- d.mu.Lock()
- if d.dirents == nil {
- d.dirents = make(map[string]vfs.Dirent)
- }
- d.dirents[dirent.Name] = dirent
- d.mu.Unlock()
- return true
-}
-
-// count returns the number of dirents currently in the collector.
-func (d *direntCollector) count() int {
- d.mu.Lock()
- defer d.mu.Unlock()
- return len(d.dirents)
-}
-
-// contains checks whether the collector has a dirent with the given name and
-// type.
-func (d *direntCollector) contains(name string, typ uint8) error {
- d.mu.Lock()
- defer d.mu.Unlock()
- dirent, ok := d.dirents[name]
- if !ok {
- return fmt.Errorf("No dirent named %q found", name)
- }
- if dirent.Type != typ {
- return fmt.Errorf("Dirent named %q found, but was expecting type %d, got: %+v", name, typ, dirent)
- }
- return nil
-}
-
func TestDirFDReadWrite(t *testing.T) {
sys := newTestSystem(t, func(creds *auth.Credentials, fs *filesystem) *kernfs.Dentry {
return fs.newReadonlyDir(creds, 0755, nil)
})
+ defer sys.Destroy()
pop := sys.PathOpAtRoot("/")
- fd, err := sys.vfs.OpenAt(sys.ctx, sys.creds, &pop, &vfs.OpenOptions{})
+ fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, pop, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY,
+ })
if err != nil {
- sys.t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err)
+ t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err)
}
defer fd.DecRef()
// Read/Write should fail for directory FDs.
- if _, err := fd.Read(sys.ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); err != syserror.EISDIR {
- sys.t.Fatalf("Read for directory FD failed with unexpected error: %v", err)
+ if _, err := fd.Read(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); err != syserror.EISDIR {
+ t.Fatalf("Read for directory FD failed with unexpected error: %v", err)
}
- if _, err := fd.Write(sys.ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); err != syserror.EISDIR {
- sys.t.Fatalf("Wrire for directory FD failed with unexpected error: %v", err)
+ if _, err := fd.Write(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); err != syserror.EBADF {
+ t.Fatalf("Write for directory FD failed with unexpected error: %v", err)
}
}
@@ -397,30 +306,12 @@ func TestDirFDIterDirents(t *testing.T) {
"file1": fs.newFile(creds, staticFileContent),
})
})
+ defer sys.Destroy()
pop := sys.PathOpAtRoot("/")
- fd, err := sys.vfs.OpenAt(sys.ctx, sys.creds, &pop, &vfs.OpenOptions{})
- if err != nil {
- sys.t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err)
- }
- defer fd.DecRef()
-
- collector := &direntCollector{}
- if err := fd.IterDirents(sys.ctx, collector); err != nil {
- sys.t.Fatalf("IterDirent failed: %v", err)
- }
-
- // Root directory should contain ".", ".." and 3 children:
- if collector.count() != 5 {
- sys.t.Fatalf("IterDirent returned too many dirents")
- }
- for _, dirName := range []string{".", "..", "dir1", "dir2"} {
- if err := collector.contains(dirName, linux.DT_DIR); err != nil {
- sys.t.Fatalf("IterDirent had unexpected results: %v", err)
- }
- }
- if err := collector.contains("file1", linux.DT_REG); err != nil {
- sys.t.Fatalf("IterDirent had unexpected results: %v", err)
- }
-
+ sys.AssertAllDirentTypes(sys.ListDirents(pop), map[string]testutil.DirentType{
+ "dir1": linux.DT_DIR,
+ "dir2": linux.DT_DIR,
+ "file1": linux.DT_REG,
+ })
}
diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go
index 068063f4e..0ee7eb9b7 100644
--- a/pkg/sentry/fsimpl/kernfs/symlink.go
+++ b/pkg/sentry/fsimpl/kernfs/symlink.go
@@ -16,11 +16,13 @@ package kernfs
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
-type staticSymlink struct {
+// StaticSymlink provides an Inode implementation for symlinks that point to
+// a immutable target.
+type StaticSymlink struct {
InodeAttrs
InodeNoopRefCount
InodeSymlink
@@ -28,18 +30,25 @@ type staticSymlink struct {
target string
}
-var _ Inode = (*staticSymlink)(nil)
+var _ Inode = (*StaticSymlink)(nil)
// NewStaticSymlink creates a new symlink file pointing to 'target'.
-func NewStaticSymlink(creds *auth.Credentials, ino uint64, perm linux.FileMode, target string) *Dentry {
- inode := &staticSymlink{target: target}
- inode.Init(creds, ino, linux.ModeSymlink|perm)
+func NewStaticSymlink(creds *auth.Credentials, ino uint64, target string) *Dentry {
+ inode := &StaticSymlink{}
+ inode.Init(creds, ino, target)
d := &Dentry{}
d.Init(inode)
return d
}
-func (s *staticSymlink) Readlink(_ context.Context) (string, error) {
+// Init initializes the instance.
+func (s *StaticSymlink) Init(creds *auth.Credentials, ino uint64, target string) {
+ s.target = target
+ s.InodeAttrs.Init(creds, ino, linux.ModeSymlink|0777)
+}
+
+// Readlink implements Inode.
+func (s *StaticSymlink) Readlink(_ context.Context) (string, error) {
return s.target, nil
}
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
index 1f44b3217..12aac2e6a 100644
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -1,44 +1,40 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
-package(licenses = ["notice"])
+licenses(["notice"])
go_library(
name = "proc",
srcs = [
"filesystem.go",
- "loadavg.go",
- "meminfo.go",
- "mounts.go",
- "net.go",
- "stat.go",
- "sys.go",
+ "subtasks.go",
"task.go",
"task_files.go",
"tasks.go",
"tasks_files.go",
- "version.go",
+ "tasks_net.go",
+ "tasks_sys.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc",
deps = [
"//pkg/abi/linux",
- "//pkg/binary",
+ "//pkg/context",
"//pkg/log",
- "//pkg/sentry/context",
+ "//pkg/safemem",
"//pkg/sentry/fs",
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/time",
"//pkg/sentry/limits",
"//pkg/sentry/mm",
"//pkg/sentry/socket",
"//pkg/sentry/socket/unix",
"//pkg/sentry/socket/unix/transport",
"//pkg/sentry/usage",
- "//pkg/sentry/usermem",
"//pkg/sentry/vfs",
"//pkg/syserror",
+ "//pkg/tcpip/header",
+ "//pkg/usermem",
],
)
@@ -46,32 +42,21 @@ go_test(
name = "proc_test",
size = "small",
srcs = [
- "boot_test.go",
- "net_test.go",
+ "tasks_sys_test.go",
"tasks_test.go",
],
- embed = [":proc"],
+ library = ":proc",
deps = [
"//pkg/abi/linux",
- "//pkg/cpuid",
+ "//pkg/context",
"//pkg/fspath",
- "//pkg/memutil",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
- "//pkg/sentry/fs",
+ "//pkg/sentry/contexttest",
+ "//pkg/sentry/fsimpl/testutil",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/sched",
- "//pkg/sentry/limits",
- "//pkg/sentry/loader",
- "//pkg/sentry/pgalloc",
- "//pkg/sentry/platform",
- "//pkg/sentry/platform/kvm",
- "//pkg/sentry/platform/ptrace",
- "//pkg/sentry/time",
- "//pkg/sentry/usermem",
"//pkg/sentry/vfs",
"//pkg/syserror",
+ "//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
index d09182c77..11477b6a9 100644
--- a/pkg/sentry/fsimpl/proc/filesystem.go
+++ b/pkg/sentry/fsimpl/proc/filesystem.go
@@ -19,7 +19,7 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -47,7 +47,12 @@ func (ft *procFSType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFile
procfs := &kernfs.Filesystem{}
procfs.VFSFilesystem().Init(vfsObj, procfs)
- _, dentry := newTasksInode(procfs, k, pidns)
+ var data *InternalData
+ if opts.InternalData != nil {
+ data = opts.InternalData.(*InternalData)
+ }
+
+ _, dentry := newTasksInode(procfs, k, pidns, data.Cgroups)
return procfs.VFSFilesystem(), dentry.VFSDentry(), nil
}
@@ -67,3 +72,20 @@ func newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode d
d.Init(inode)
return d
}
+
+type staticFile struct {
+ kernfs.DynamicBytesFile
+ vfs.StaticData
+}
+
+var _ dynamicInode = (*staticFile)(nil)
+
+func newStaticFile(data string) *staticFile {
+ return &staticFile{StaticData: vfs.StaticData{Data: data}}
+}
+
+// InternalData contains internal data passed in to the procfs mount via
+// vfs.GetFilesystemOptions.InternalData.
+type InternalData struct {
+ Cgroups map[string]string
+}
diff --git a/pkg/sentry/fsimpl/proc/loadavg.go b/pkg/sentry/fsimpl/proc/loadavg.go
deleted file mode 100644
index 5351d86e8..000000000
--- a/pkg/sentry/fsimpl/proc/loadavg.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
-
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
-)
-
-// loadavgData backs /proc/loadavg.
-//
-// +stateify savable
-type loadavgData struct {
- kernfs.DynamicBytesFile
-}
-
-var _ dynamicInode = (*loadavgData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- // TODO(b/62345059): Include real data in fields.
- // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
- // Column 4-5: currently running processes and the total number of processes.
- // Column 6: the last process ID used.
- fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
- return nil
-}
diff --git a/pkg/sentry/fsimpl/proc/meminfo.go b/pkg/sentry/fsimpl/proc/meminfo.go
deleted file mode 100644
index cbdd4f3fc..000000000
--- a/pkg/sentry/fsimpl/proc/meminfo.go
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
-
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
-)
-
-// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo.
-//
-// +stateify savable
-type meminfoData struct {
- kernfs.DynamicBytesFile
-
- // k is the owning Kernel.
- k *kernel.Kernel
-}
-
-var _ dynamicInode = (*meminfoData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- mf := d.k.MemoryFile()
- mf.UpdateUsage()
- snapshot, totalUsage := usage.MemoryAccounting.Copy()
- totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
- anon := snapshot.Anonymous + snapshot.Tmpfs
- file := snapshot.PageCache + snapshot.Mapped
- // We don't actually have active/inactive LRUs, so just make up numbers.
- activeFile := (file / 2) &^ (usermem.PageSize - 1)
- inactiveFile := file - activeFile
-
- fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024)
- memFree := (totalSize - totalUsage) / 1024
- // We use MemFree as MemAvailable because we don't swap.
- // TODO(rahat): When reclaim is implemented the value of MemAvailable
- // should change.
- fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree)
- fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree)
- fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices
- fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024)
- // Emulate a system with no swap, which disables inactivation of anon pages.
- fmt.Fprintf(buf, "SwapCache: 0 kB\n")
- fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024)
- fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024)
- fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024)
- fmt.Fprintf(buf, "Inactive(anon): 0 kB\n")
- fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024)
- fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
- fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263)
- fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263)
- fmt.Fprintf(buf, "SwapTotal: 0 kB\n")
- fmt.Fprintf(buf, "SwapFree: 0 kB\n")
- fmt.Fprintf(buf, "Dirty: 0 kB\n")
- fmt.Fprintf(buf, "Writeback: 0 kB\n")
- fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024)
- fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
- fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024)
- return nil
-}
diff --git a/pkg/sentry/fsimpl/proc/mounts.go b/pkg/sentry/fsimpl/proc/mounts.go
deleted file mode 100644
index 8683cf677..000000000
--- a/pkg/sentry/fsimpl/proc/mounts.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import "gvisor.dev/gvisor/pkg/sentry/kernel"
-
-// TODO(gvisor.dev/issue/1195): Implement mountInfoFile and mountsFile.
-
-// mountInfoFile implements vfs.DynamicBytesSource for /proc/[pid]/mountinfo.
-//
-// +stateify savable
-type mountInfoFile struct {
- t *kernel.Task
-}
-
-// mountsFile implements vfs.DynamicBytesSource for /proc/[pid]/mounts.
-//
-// +stateify savable
-type mountsFile struct {
- t *kernel.Task
-}
diff --git a/pkg/sentry/fsimpl/proc/net.go b/pkg/sentry/fsimpl/proc/net.go
deleted file mode 100644
index fd46eebf8..000000000
--- a/pkg/sentry/fsimpl/proc/net.go
+++ /dev/null
@@ -1,338 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/inet"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/socket"
- "gvisor.dev/gvisor/pkg/sentry/socket/unix"
- "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6.
-//
-// +stateify savable
-type ifinet6 struct {
- s inet.Stack
-}
-
-var _ vfs.DynamicBytesSource = (*ifinet6)(nil)
-
-func (n *ifinet6) contents() []string {
- var lines []string
- nics := n.s.Interfaces()
- for id, naddrs := range n.s.InterfaceAddrs() {
- nic, ok := nics[id]
- if !ok {
- // NIC was added after NICNames was called. We'll just
- // ignore it.
- continue
- }
-
- for _, a := range naddrs {
- // IPv6 only.
- if a.Family != linux.AF_INET6 {
- continue
- }
-
- // Fields:
- // IPv6 address displayed in 32 hexadecimal chars without colons
- // Netlink device number (interface index) in hexadecimal (use nic id)
- // Prefix length in hexadecimal
- // Scope value (use 0)
- // Interface flags
- // Device name
- lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name))
- }
- }
- return lines
-}
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error {
- for _, l := range n.contents() {
- buf.WriteString(l)
- }
- return nil
-}
-
-// netDev implements vfs.DynamicBytesSource for /proc/net/dev.
-//
-// +stateify savable
-type netDev struct {
- s inet.Stack
-}
-
-var _ vfs.DynamicBytesSource = (*netDev)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (n *netDev) Generate(ctx context.Context, buf *bytes.Buffer) error {
- interfaces := n.s.Interfaces()
- buf.WriteString("Inter-| Receive | Transmit\n")
- buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n")
-
- for _, i := range interfaces {
- // Implements the same format as
- // net/core/net-procfs.c:dev_seq_printf_stats.
- var stats inet.StatDev
- if err := n.s.Statistics(&stats, i.Name); err != nil {
- log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err)
- continue
- }
- fmt.Fprintf(
- buf,
- "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n",
- i.Name,
- // Received
- stats[0], // bytes
- stats[1], // packets
- stats[2], // errors
- stats[3], // dropped
- stats[4], // fifo
- stats[5], // frame
- stats[6], // compressed
- stats[7], // multicast
- // Transmitted
- stats[8], // bytes
- stats[9], // packets
- stats[10], // errors
- stats[11], // dropped
- stats[12], // fifo
- stats[13], // frame
- stats[14], // compressed
- stats[15], // multicast
- )
- }
-
- return nil
-}
-
-// netUnix implements vfs.DynamicBytesSource for /proc/net/unix.
-//
-// +stateify savable
-type netUnix struct {
- k *kernel.Kernel
-}
-
-var _ vfs.DynamicBytesSource = (*netUnix)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (n *netUnix) Generate(ctx context.Context, buf *bytes.Buffer) error {
- buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n")
- for _, se := range n.k.ListSockets() {
- s := se.Sock.Get()
- if s == nil {
- log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock)
- continue
- }
- sfile := s.(*fs.File)
- if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX {
- s.DecRef()
- // Not a unix socket.
- continue
- }
- sops := sfile.FileOperations.(*unix.SocketOperations)
-
- addr, err := sops.Endpoint().GetLocalAddress()
- if err != nil {
- log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err)
- addr.Addr = "<unknown>"
- }
-
- sockFlags := 0
- if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok {
- if ce.Listening() {
- // For unix domain sockets, linux reports a single flag
- // value if the socket is listening, of __SO_ACCEPTCON.
- sockFlags = linux.SO_ACCEPTCON
- }
- }
-
- // In the socket entry below, the value for the 'Num' field requires
- // some consideration. Linux prints the address to the struct
- // unix_sock representing a socket in the kernel, but may redact the
- // value for unprivileged users depending on the kptr_restrict
- // sysctl.
- //
- // One use for this field is to allow a privileged user to
- // introspect into the kernel memory to determine information about
- // a socket not available through procfs, such as the socket's peer.
- //
- // In gvisor, returning a pointer to our internal structures would
- // be pointless, as it wouldn't match the memory layout for struct
- // unix_sock, making introspection difficult. We could populate a
- // struct unix_sock with the appropriate data, but even that
- // requires consideration for which kernel version to emulate, as
- // the definition of this struct changes over time.
- //
- // For now, we always redact this pointer.
- fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d",
- (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct.
- sfile.ReadRefs()-1, // RefCount, don't count our own ref.
- 0, // Protocol, always 0 for UDS.
- sockFlags, // Flags.
- sops.Endpoint().Type(), // Type.
- sops.State(), // State.
- sfile.InodeID(), // Inode.
- )
-
- // Path
- if len(addr.Addr) != 0 {
- if addr.Addr[0] == 0 {
- // Abstract path.
- fmt.Fprintf(buf, " @%s", string(addr.Addr[1:]))
- } else {
- fmt.Fprintf(buf, " %s", string(addr.Addr))
- }
- }
- fmt.Fprintf(buf, "\n")
-
- s.DecRef()
- }
- return nil
-}
-
-// netTCP implements vfs.DynamicBytesSource for /proc/net/tcp.
-//
-// +stateify savable
-type netTCP struct {
- k *kernel.Kernel
-}
-
-var _ vfs.DynamicBytesSource = (*netTCP)(nil)
-
-func (n *netTCP) Generate(ctx context.Context, buf *bytes.Buffer) error {
- t := kernel.TaskFromContext(ctx)
- buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n")
- for _, se := range n.k.ListSockets() {
- s := se.Sock.Get()
- if s == nil {
- log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock)
- continue
- }
- sfile := s.(*fs.File)
- sops, ok := sfile.FileOperations.(socket.Socket)
- if !ok {
- panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
- }
- if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) {
- s.DecRef()
- // Not tcp4 sockets.
- continue
- }
-
- // Linux's documentation for the fields below can be found at
- // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
- // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
- // Note that the header doesn't contain labels for all the fields.
-
- // Field: sl; entry number.
- fmt.Fprintf(buf, "%4d: ", se.ID)
-
- portBuf := make([]byte, 2)
-
- // Field: local_adddress.
- var localAddr linux.SockAddrInet
- if local, _, err := sops.GetSockName(t); err == nil {
- localAddr = *local.(*linux.SockAddrInet)
- }
- binary.LittleEndian.PutUint16(portBuf, localAddr.Port)
- fmt.Fprintf(buf, "%08X:%04X ",
- binary.LittleEndian.Uint32(localAddr.Addr[:]),
- portBuf)
-
- // Field: rem_address.
- var remoteAddr linux.SockAddrInet
- if remote, _, err := sops.GetPeerName(t); err == nil {
- remoteAddr = *remote.(*linux.SockAddrInet)
- }
- binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port)
- fmt.Fprintf(buf, "%08X:%04X ",
- binary.LittleEndian.Uint32(remoteAddr.Addr[:]),
- portBuf)
-
- // Field: state; socket state.
- fmt.Fprintf(buf, "%02X ", sops.State())
-
- // Field: tx_queue, rx_queue; number of packets in the transmit and
- // receive queue. Unimplemented.
- fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
-
- // Field: tr, tm->when; timer active state and number of jiffies
- // until timer expires. Unimplemented.
- fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
-
- // Field: retrnsmt; number of unrecovered RTO timeouts.
- // Unimplemented.
- fmt.Fprintf(buf, "%08X ", 0)
-
- // Field: uid.
- uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
- if err != nil {
- log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
- fmt.Fprintf(buf, "%5d ", 0)
- } else {
- fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()))
- }
-
- // Field: timeout; number of unanswered 0-window probes.
- // Unimplemented.
- fmt.Fprintf(buf, "%8d ", 0)
-
- // Field: inode.
- fmt.Fprintf(buf, "%8d ", sfile.InodeID())
-
- // Field: refcount. Don't count the ref we obtain while deferencing
- // the weakref to this socket.
- fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1)
-
- // Field: Socket struct address. Redacted due to the same reason as
- // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
- fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
-
- // Field: retransmit timeout. Unimplemented.
- fmt.Fprintf(buf, "%d ", 0)
-
- // Field: predicted tick of soft clock (delayed ACK control data).
- // Unimplemented.
- fmt.Fprintf(buf, "%d ", 0)
-
- // Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
- fmt.Fprintf(buf, "%d ", 0)
-
- // Field: sending congestion window, Unimplemented.
- fmt.Fprintf(buf, "%d ", 0)
-
- // Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
- // Unimplemented, report as large threshold.
- fmt.Fprintf(buf, "%d", -1)
-
- fmt.Fprintf(buf, "\n")
-
- s.DecRef()
- }
-
- return nil
-}
diff --git a/pkg/sentry/fsimpl/proc/stat.go b/pkg/sentry/fsimpl/proc/stat.go
deleted file mode 100644
index 50894a534..000000000
--- a/pkg/sentry/fsimpl/proc/stat.go
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
-)
-
-// cpuStats contains the breakdown of CPU time for /proc/stat.
-type cpuStats struct {
- // user is time spent in userspace tasks with non-positive niceness.
- user uint64
-
- // nice is time spent in userspace tasks with positive niceness.
- nice uint64
-
- // system is time spent in non-interrupt kernel context.
- system uint64
-
- // idle is time spent idle.
- idle uint64
-
- // ioWait is time spent waiting for IO.
- ioWait uint64
-
- // irq is time spent in interrupt context.
- irq uint64
-
- // softirq is time spent in software interrupt context.
- softirq uint64
-
- // steal is involuntary wait time.
- steal uint64
-
- // guest is time spent in guests with non-positive niceness.
- guest uint64
-
- // guestNice is time spent in guests with positive niceness.
- guestNice uint64
-}
-
-// String implements fmt.Stringer.
-func (c cpuStats) String() string {
- return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
-}
-
-// statData implements vfs.DynamicBytesSource for /proc/stat.
-//
-// +stateify savable
-type statData struct {
- kernfs.DynamicBytesFile
-
- // k is the owning Kernel.
- k *kernel.Kernel
-}
-
-var _ dynamicInode = (*statData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (s *statData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- // TODO(b/37226836): We currently export only zero CPU stats. We could
- // at least provide some aggregate stats.
- var cpu cpuStats
- fmt.Fprintf(buf, "cpu %s\n", cpu)
-
- for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ {
- fmt.Fprintf(buf, "cpu%d %s\n", c, cpu)
- }
-
- // The total number of interrupts is dependent on the CPUs and PCI
- // devices on the system. See arch_probe_nr_irqs.
- //
- // Since we don't report real interrupt stats, just choose an arbitrary
- // value from a representative VM.
- const numInterrupts = 256
-
- // The Kernel doesn't handle real interrupts, so report all zeroes.
- // TODO(b/37226836): We could count page faults as #PF.
- fmt.Fprintf(buf, "intr 0") // total
- for i := 0; i < numInterrupts; i++ {
- fmt.Fprintf(buf, " 0")
- }
- fmt.Fprintf(buf, "\n")
-
- // Total number of context switches.
- // TODO(b/37226836): Count this.
- fmt.Fprintf(buf, "ctxt 0\n")
-
- // CLOCK_REALTIME timestamp from boot, in seconds.
- fmt.Fprintf(buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds())
-
- // Total number of clones.
- // TODO(b/37226836): Count this.
- fmt.Fprintf(buf, "processes 0\n")
-
- // Number of runnable tasks.
- // TODO(b/37226836): Count this.
- fmt.Fprintf(buf, "procs_running 0\n")
-
- // Number of tasks waiting on IO.
- // TODO(b/37226836): Count this.
- fmt.Fprintf(buf, "procs_blocked 0\n")
-
- // Number of each softirq handled.
- fmt.Fprintf(buf, "softirq 0") // total
- for i := 0; i < linux.NumSoftIRQ; i++ {
- fmt.Fprintf(buf, " 0")
- }
- fmt.Fprintf(buf, "\n")
- return nil
-}
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
new file mode 100644
index 000000000..353e37195
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -0,0 +1,128 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "sort"
+ "strconv"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// subtasksInode represents the inode for /proc/[pid]/task/ directory.
+//
+// +stateify savable
+type subtasksInode struct {
+ kernfs.InodeNotSymlink
+ kernfs.InodeDirectoryNoNewChildren
+ kernfs.InodeAttrs
+ kernfs.OrderedChildren
+
+ task *kernel.Task
+ pidns *kernel.PIDNamespace
+ inoGen InoGenerator
+ cgroupControllers map[string]string
+}
+
+var _ kernfs.Inode = (*subtasksInode)(nil)
+
+func newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, inoGen InoGenerator, cgroupControllers map[string]string) *kernfs.Dentry {
+ subInode := &subtasksInode{
+ task: task,
+ pidns: pidns,
+ inoGen: inoGen,
+ cgroupControllers: cgroupControllers,
+ }
+ // Note: credentials are overridden by taskOwnedInode.
+ subInode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
+ subInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+
+ inode := &taskOwnedInode{Inode: subInode, owner: task}
+ dentry := &kernfs.Dentry{}
+ dentry.Init(inode)
+
+ return dentry
+}
+
+// Valid implements kernfs.inodeDynamicLookup.
+func (i *subtasksInode) Valid(ctx context.Context) bool {
+ return true
+}
+
+// Lookup implements kernfs.inodeDynamicLookup.
+func (i *subtasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
+ tid, err := strconv.ParseUint(name, 10, 32)
+ if err != nil {
+ return nil, syserror.ENOENT
+ }
+
+ subTask := i.pidns.TaskWithID(kernel.ThreadID(tid))
+ if subTask == nil {
+ return nil, syserror.ENOENT
+ }
+ if subTask.ThreadGroup() != i.task.ThreadGroup() {
+ return nil, syserror.ENOENT
+ }
+
+ subTaskDentry := newTaskInode(i.inoGen, subTask, i.pidns, false, i.cgroupControllers)
+ return subTaskDentry.VFSDentry(), nil
+}
+
+// IterDirents implements kernfs.inodeDynamicLookup.
+func (i *subtasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
+ tasks := i.task.ThreadGroup().MemberIDs(i.pidns)
+ if len(tasks) == 0 {
+ return offset, syserror.ENOENT
+ }
+
+ tids := make([]int, 0, len(tasks))
+ for _, tid := range tasks {
+ tids = append(tids, int(tid))
+ }
+
+ sort.Ints(tids)
+ for _, tid := range tids[relOffset:] {
+ dirent := vfs.Dirent{
+ Name: strconv.FormatUint(uint64(tid), 10),
+ Type: linux.DT_DIR,
+ Ino: i.inoGen.NextIno(),
+ NextOff: offset + 1,
+ }
+ if !cb.Handle(dirent) {
+ return offset, nil
+ }
+ offset++
+ }
+ return offset, nil
+}
+
+// Open implements kernfs.Inode.
+func (i *subtasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
+ fd := &kernfs.GenericDirectoryFD{}
+ fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, flags)
+ return fd.VFSFileDescription(), nil
+}
+
+// Stat implements kernfs.Inode.
+func (i *subtasksInode) Stat(vsfs *vfs.Filesystem) linux.Statx {
+ stat := i.InodeAttrs.Stat(vsfs)
+ stat.Nlink += uint32(i.task.ThreadGroup().Count())
+ return stat
+}
diff --git a/pkg/sentry/fsimpl/proc/sys.go b/pkg/sentry/fsimpl/proc/sys.go
deleted file mode 100644
index b88256e12..000000000
--- a/pkg/sentry/fsimpl/proc/sys.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
-
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-// mmapMinAddrData implements vfs.DynamicBytesSource for
-// /proc/sys/vm/mmap_min_addr.
-//
-// +stateify savable
-type mmapMinAddrData struct {
- k *kernel.Kernel
-}
-
-var _ vfs.DynamicBytesSource = (*mmapMinAddrData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress())
- return nil
-}
-
-// +stateify savable
-type overcommitMemory struct{}
-
-var _ vfs.DynamicBytesSource = (*overcommitMemory)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *overcommitMemory) Generate(ctx context.Context, buf *bytes.Buffer) error {
- fmt.Fprintf(buf, "0\n")
- return nil
-}
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index 11a64c777..eb5bc62c0 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -15,8 +15,11 @@
package proc
import (
+ "bytes"
+ "fmt"
+
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -40,33 +43,37 @@ type taskInode struct {
var _ kernfs.Inode = (*taskInode)(nil)
-func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool) *kernfs.Dentry {
+func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) *kernfs.Dentry {
contents := map[string]*kernfs.Dentry{
- //"auxv": newAuxvec(t, msrc),
- //"cmdline": newExecArgInode(t, msrc, cmdlineExecArg),
- //"comm": newComm(t, msrc),
- //"environ": newExecArgInode(t, msrc, environExecArg),
+ "auxv": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &auxvData{task: task}),
+ "cmdline": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}),
+ "comm": newComm(task, inoGen.NextIno(), 0444),
+ "environ": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}),
//"exe": newExe(t, msrc),
//"fd": newFdDir(t, msrc),
//"fdinfo": newFdInfoDir(t, msrc),
- //"gid_map": newGIDMap(t, msrc),
- "io": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, newIO(task, isThreadGroup)),
- "maps": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &mapsData{task: task}),
+ "gid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: true}),
+ "io": newTaskOwnedFile(task, inoGen.NextIno(), 0400, newIO(task, isThreadGroup)),
+ "maps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mapsData{task: task}),
//"mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc),
//"mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc),
- //"ns": newNamespaceDir(t, msrc),
- "smaps": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &smapsData{task: task}),
- "stat": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &taskStatData{t: task, pidns: pidns, tgstats: isThreadGroup}),
- "statm": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &statmData{t: task}),
- "status": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &statusData{t: task, pidns: pidns}),
- //"uid_map": newUIDMap(t, msrc),
+ "ns": newTaskOwnedDir(task, inoGen.NextIno(), 0511, map[string]*kernfs.Dentry{
+ "net": newNamespaceSymlink(task, inoGen.NextIno(), "net"),
+ "pid": newNamespaceSymlink(task, inoGen.NextIno(), "pid"),
+ "user": newNamespaceSymlink(task, inoGen.NextIno(), "user"),
+ }),
+ "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}),
+ "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
+ "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{task: task}),
+ "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{task: task, pidns: pidns}),
+ "uid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: false}),
}
if isThreadGroup {
- //contents["task"] = p.newSubtasks(t, msrc)
+ contents["task"] = newSubtasks(task, pidns, inoGen, cgroupControllers)
+ }
+ if len(cgroupControllers) > 0 {
+ contents["cgroup"] = newTaskOwnedFile(task, inoGen.NextIno(), 0444, newCgroupData(cgroupControllers))
}
- //if len(p.cgroupControllers) > 0 {
- // contents["cgroup"] = newCGroupInode(t, msrc, p.cgroupControllers)
- //}
taskInode := &taskInode{task: task}
// Note: credentials are overridden by taskOwnedInode.
@@ -127,6 +134,23 @@ func newTaskOwnedFile(task *kernel.Task, ino uint64, perm linux.FileMode, inode
return d
}
+func newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry {
+ dir := &kernfs.StaticDirectory{}
+
+ // Note: credentials are overridden by taskOwnedInode.
+ dir.Init(task.Credentials(), ino, perm)
+
+ inode := &taskOwnedInode{Inode: dir, owner: task}
+ d := &kernfs.Dentry{}
+ d.Init(inode)
+
+ dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ links := dir.OrderedChildren.Populate(d, children)
+ dir.IncLinks(links)
+
+ return d
+}
+
// Stat implements kernfs.Inode.
func (i *taskOwnedInode) Stat(fs *vfs.Filesystem) linux.Statx {
stat := i.Inode.Stat(fs)
@@ -137,7 +161,7 @@ func (i *taskOwnedInode) Stat(fs *vfs.Filesystem) linux.Statx {
}
// CheckPermissions implements kernfs.Inode.
-func (i *taskOwnedInode) CheckPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
+func (i *taskOwnedInode) CheckPermissions(_ context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
mode := i.Mode()
uid, gid := i.getOwner(mode)
return vfs.GenericCheckPermissions(
@@ -188,3 +212,38 @@ func newIO(t *kernel.Task, isThreadGroup bool) *ioData {
}
return &ioData{ioUsage: t}
}
+
+func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry {
+ // Namespace symlinks should contain the namespace name and the inode number
+ // for the namespace instance, so for example user:[123456]. We currently fake
+ // the inode number by sticking the symlink inode in its place.
+ target := fmt.Sprintf("%s:[%d]", ns, ino)
+
+ inode := &kernfs.StaticSymlink{}
+ // Note: credentials are overridden by taskOwnedInode.
+ inode.Init(task.Credentials(), ino, target)
+
+ taskInode := &taskOwnedInode{Inode: inode, owner: task}
+ d := &kernfs.Dentry{}
+ d.Init(taskInode)
+ return d
+}
+
+// newCgroupData creates inode that shows cgroup information.
+// From man 7 cgroups: "For each cgroup hierarchy of which the process is a
+// member, there is one entry containing three colon-separated fields:
+// hierarchy-ID:controller-list:cgroup-path"
+func newCgroupData(controllers map[string]string) dynamicInode {
+ buf := bytes.Buffer{}
+
+ // The hierarchy ids must be positive integers (for cgroup v1), but the
+ // exact number does not matter, so long as they are unique. We can
+ // just use a counter, but since linux sorts this file in descending
+ // order, we must count down to preserve this behavior.
+ i := len(controllers)
+ for name, dir := range controllers {
+ fmt.Fprintf(&buf, "%d:%s:%s\n", i, name, dir)
+ i--
+ }
+ return newStaticFile(buf.String())
+}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 93f0e1aa8..efd3b3453 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -17,15 +17,20 @@ package proc
import (
"bytes"
"fmt"
+ "io"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// mm gets the kernel task's MemoryManager. No additional reference is taken on
@@ -41,6 +46,256 @@ func getMM(task *kernel.Task) *mm.MemoryManager {
return tmm
}
+// getMMIncRef returns t's MemoryManager. If getMMIncRef succeeds, the
+// MemoryManager's users count is incremented, and must be decremented by the
+// caller when it is no longer in use.
+func getMMIncRef(task *kernel.Task) (*mm.MemoryManager, error) {
+ if task.ExitState() == kernel.TaskExitDead {
+ return nil, syserror.ESRCH
+ }
+ var m *mm.MemoryManager
+ task.WithMuLocked(func(t *kernel.Task) {
+ m = t.MemoryManager()
+ })
+ if m == nil || !m.IncUsers() {
+ return nil, io.EOF
+ }
+ return m, nil
+}
+
+type bufferWriter struct {
+ buf *bytes.Buffer
+}
+
+// WriteFromBlocks writes up to srcs.NumBytes() bytes from srcs and returns
+// the number of bytes written. It may return a partial write without an
+// error (i.e. (n, nil) where 0 < n < srcs.NumBytes()). It should not
+// return a full write with an error (i.e. srcs.NumBytes(), err) where err
+// != nil).
+func (w *bufferWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+ written := srcs.NumBytes()
+ for !srcs.IsEmpty() {
+ w.buf.Write(srcs.Head().ToSlice())
+ srcs = srcs.Tail()
+ }
+ return written, nil
+}
+
+// auxvData implements vfs.DynamicBytesSource for /proc/[pid]/auxv.
+//
+// +stateify savable
+type auxvData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*auxvData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ m, err := getMMIncRef(d.task)
+ if err != nil {
+ return err
+ }
+ defer m.DecUsers(ctx)
+
+ // Space for buffer with AT_NULL (0) terminator at the end.
+ auxv := m.Auxv()
+ buf.Grow((len(auxv) + 1) * 16)
+ for _, e := range auxv {
+ var tmp [8]byte
+ usermem.ByteOrder.PutUint64(tmp[:], e.Key)
+ buf.Write(tmp[:])
+
+ usermem.ByteOrder.PutUint64(tmp[:], uint64(e.Value))
+ buf.Write(tmp[:])
+ }
+ return nil
+}
+
+// execArgType enumerates the types of exec arguments that are exposed through
+// proc.
+type execArgType int
+
+const (
+ cmdlineDataArg execArgType = iota
+ environDataArg
+)
+
+// cmdlineData implements vfs.DynamicBytesSource for /proc/[pid]/cmdline.
+//
+// +stateify savable
+type cmdlineData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+
+ // arg is the type of exec argument this file contains.
+ arg execArgType
+}
+
+var _ dynamicInode = (*cmdlineData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ m, err := getMMIncRef(d.task)
+ if err != nil {
+ return err
+ }
+ defer m.DecUsers(ctx)
+
+ // Figure out the bounds of the exec arg we are trying to read.
+ var ar usermem.AddrRange
+ switch d.arg {
+ case cmdlineDataArg:
+ ar = usermem.AddrRange{
+ Start: m.ArgvStart(),
+ End: m.ArgvEnd(),
+ }
+ case environDataArg:
+ ar = usermem.AddrRange{
+ Start: m.EnvvStart(),
+ End: m.EnvvEnd(),
+ }
+ default:
+ panic(fmt.Sprintf("unknown exec arg type %v", d.arg))
+ }
+ if ar.Start == 0 || ar.End == 0 {
+ // Don't attempt to read before the start/end are set up.
+ return io.EOF
+ }
+
+ // N.B. Technically this should be usermem.IOOpts.IgnorePermissions = true
+ // until Linux 4.9 (272ddc8b3735 "proc: don't use FOLL_FORCE for reading
+ // cmdline and environment").
+ writer := &bufferWriter{buf: buf}
+ if n, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(ar), writer, usermem.IOOpts{}); n == 0 || err != nil {
+ // Nothing to copy or something went wrong.
+ return err
+ }
+
+ // On Linux, if the NULL byte at the end of the argument vector has been
+ // overwritten, it continues reading the environment vector as part of
+ // the argument vector.
+ if d.arg == cmdlineDataArg && buf.Bytes()[buf.Len()-1] != 0 {
+ if end := bytes.IndexByte(buf.Bytes(), 0); end != -1 {
+ // If we found a NULL character somewhere else in argv, truncate the
+ // return up to the NULL terminator (including it).
+ buf.Truncate(end)
+ return nil
+ }
+
+ // There is no NULL terminator in the string, return into envp.
+ arEnvv := usermem.AddrRange{
+ Start: m.EnvvStart(),
+ End: m.EnvvEnd(),
+ }
+
+ // Upstream limits the returned amount to one page of slop.
+ // https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208
+ // we'll return one page total between argv and envp because of the
+ // above page restrictions.
+ if buf.Len() >= usermem.PageSize {
+ // Returned at least one page already, nothing else to add.
+ return nil
+ }
+ remaining := usermem.PageSize - buf.Len()
+ if int(arEnvv.Length()) > remaining {
+ end, ok := arEnvv.Start.AddLength(uint64(remaining))
+ if !ok {
+ return syserror.EFAULT
+ }
+ arEnvv.End = end
+ }
+ if _, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(arEnvv), writer, usermem.IOOpts{}); err != nil {
+ return err
+ }
+
+ // Linux will return envp up to and including the first NULL character,
+ // so find it.
+ if end := bytes.IndexByte(buf.Bytes()[ar.Length():], 0); end != -1 {
+ buf.Truncate(end)
+ }
+ }
+
+ return nil
+}
+
+// +stateify savable
+type commInode struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+func newComm(task *kernel.Task, ino uint64, perm linux.FileMode) *kernfs.Dentry {
+ inode := &commInode{task: task}
+ inode.DynamicBytesFile.Init(task.Credentials(), ino, &commData{task: task}, perm)
+
+ d := &kernfs.Dentry{}
+ d.Init(inode)
+ return d
+}
+
+func (i *commInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
+ // This file can always be read or written by members of the same thread
+ // group. See fs/proc/base.c:proc_tid_comm_permission.
+ //
+ // N.B. This check is currently a no-op as we don't yet support writing and
+ // this file is world-readable anyways.
+ t := kernel.TaskFromContext(ctx)
+ if t != nil && t.ThreadGroup() == i.task.ThreadGroup() && !ats.MayExec() {
+ return nil
+ }
+
+ return i.DynamicBytesFile.CheckPermissions(ctx, creds, ats)
+}
+
+// commData implements vfs.DynamicBytesSource for /proc/[pid]/comm.
+//
+// +stateify savable
+type commData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*commData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *commData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString(d.task.Name())
+ buf.WriteString("\n")
+ return nil
+}
+
+// idMapData implements vfs.DynamicBytesSource for /proc/[pid]/{gid_map|uid_map}.
+//
+// +stateify savable
+type idMapData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+ gids bool
+}
+
+var _ dynamicInode = (*idMapData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ var entries []auth.IDMapEntry
+ if d.gids {
+ entries = d.task.UserNamespace().GIDMap()
+ } else {
+ entries = d.task.UserNamespace().UIDMap()
+ }
+ for _, e := range entries {
+ fmt.Fprintf(buf, "%10d %10d %10d\n", e.FirstID, e.FirstParentID, e.Length)
+ }
+ return nil
+}
+
// mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps.
//
// +stateify savable
@@ -83,7 +338,7 @@ func (d *smapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
type taskStatData struct {
kernfs.DynamicBytesFile
- t *kernel.Task
+ task *kernel.Task
// If tgstats is true, accumulate fault stats (not implemented) and CPU
// time across all tasks in t's thread group.
@@ -98,40 +353,40 @@ var _ dynamicInode = (*taskStatData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- fmt.Fprintf(buf, "%d ", s.pidns.IDOfTask(s.t))
- fmt.Fprintf(buf, "(%s) ", s.t.Name())
- fmt.Fprintf(buf, "%c ", s.t.StateStatus()[0])
+ fmt.Fprintf(buf, "%d ", s.pidns.IDOfTask(s.task))
+ fmt.Fprintf(buf, "(%s) ", s.task.Name())
+ fmt.Fprintf(buf, "%c ", s.task.StateStatus()[0])
ppid := kernel.ThreadID(0)
- if parent := s.t.Parent(); parent != nil {
+ if parent := s.task.Parent(); parent != nil {
ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
}
fmt.Fprintf(buf, "%d ", ppid)
- fmt.Fprintf(buf, "%d ", s.pidns.IDOfProcessGroup(s.t.ThreadGroup().ProcessGroup()))
- fmt.Fprintf(buf, "%d ", s.pidns.IDOfSession(s.t.ThreadGroup().Session()))
+ fmt.Fprintf(buf, "%d ", s.pidns.IDOfProcessGroup(s.task.ThreadGroup().ProcessGroup()))
+ fmt.Fprintf(buf, "%d ", s.pidns.IDOfSession(s.task.ThreadGroup().Session()))
fmt.Fprintf(buf, "0 0 " /* tty_nr tpgid */)
fmt.Fprintf(buf, "0 " /* flags */)
fmt.Fprintf(buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */)
var cputime usage.CPUStats
if s.tgstats {
- cputime = s.t.ThreadGroup().CPUStats()
+ cputime = s.task.ThreadGroup().CPUStats()
} else {
- cputime = s.t.CPUStats()
+ cputime = s.task.CPUStats()
}
fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
- cputime = s.t.ThreadGroup().JoinedChildCPUStats()
+ cputime = s.task.ThreadGroup().JoinedChildCPUStats()
fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
- fmt.Fprintf(buf, "%d %d ", s.t.Priority(), s.t.Niceness())
- fmt.Fprintf(buf, "%d ", s.t.ThreadGroup().Count())
+ fmt.Fprintf(buf, "%d %d ", s.task.Priority(), s.task.Niceness())
+ fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Count())
// itrealvalue. Since kernel 2.6.17, this field is no longer
// maintained, and is hard coded as 0.
fmt.Fprintf(buf, "0 ")
// Start time is relative to boot time, expressed in clock ticks.
- fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.t.StartTime().Sub(s.t.Kernel().Timekeeper().BootTime())))
+ fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.task.StartTime().Sub(s.task.Kernel().Timekeeper().BootTime())))
var vss, rss uint64
- s.t.WithMuLocked(func(t *kernel.Task) {
+ s.task.WithMuLocked(func(t *kernel.Task) {
if mm := t.MemoryManager(); mm != nil {
vss = mm.VirtualMemorySize()
rss = mm.ResidentSetSize()
@@ -140,14 +395,14 @@ func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
fmt.Fprintf(buf, "%d %d ", vss, rss/usermem.PageSize)
// rsslim.
- fmt.Fprintf(buf, "%d ", s.t.ThreadGroup().Limits().Get(limits.Rss).Cur)
+ fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Limits().Get(limits.Rss).Cur)
fmt.Fprintf(buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */)
fmt.Fprintf(buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */)
fmt.Fprintf(buf, "0 0 " /* nswap cnswap */)
terminationSignal := linux.Signal(0)
- if s.t == s.t.ThreadGroup().Leader() {
- terminationSignal = s.t.ThreadGroup().TerminationSignal()
+ if s.task == s.task.ThreadGroup().Leader() {
+ terminationSignal = s.task.ThreadGroup().TerminationSignal()
}
fmt.Fprintf(buf, "%d ", terminationSignal)
fmt.Fprintf(buf, "0 0 0 " /* processor rt_priority policy */)
@@ -164,7 +419,7 @@ func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
type statmData struct {
kernfs.DynamicBytesFile
- t *kernel.Task
+ task *kernel.Task
}
var _ dynamicInode = (*statmData)(nil)
@@ -172,7 +427,7 @@ var _ dynamicInode = (*statmData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
var vss, rss uint64
- s.t.WithMuLocked(func(t *kernel.Task) {
+ s.task.WithMuLocked(func(t *kernel.Task) {
if mm := t.MemoryManager(); mm != nil {
vss = mm.VirtualMemorySize()
rss = mm.ResidentSetSize()
@@ -189,7 +444,7 @@ func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
type statusData struct {
kernfs.DynamicBytesFile
- t *kernel.Task
+ task *kernel.Task
pidns *kernel.PIDNamespace
}
@@ -197,23 +452,23 @@ var _ dynamicInode = (*statusData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- fmt.Fprintf(buf, "Name:\t%s\n", s.t.Name())
- fmt.Fprintf(buf, "State:\t%s\n", s.t.StateStatus())
- fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.t.ThreadGroup()))
- fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.t))
+ fmt.Fprintf(buf, "Name:\t%s\n", s.task.Name())
+ fmt.Fprintf(buf, "State:\t%s\n", s.task.StateStatus())
+ fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.task.ThreadGroup()))
+ fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.task))
ppid := kernel.ThreadID(0)
- if parent := s.t.Parent(); parent != nil {
+ if parent := s.task.Parent(); parent != nil {
ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
}
fmt.Fprintf(buf, "PPid:\t%d\n", ppid)
tpid := kernel.ThreadID(0)
- if tracer := s.t.Tracer(); tracer != nil {
+ if tracer := s.task.Tracer(); tracer != nil {
tpid = s.pidns.IDOfTask(tracer)
}
fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid)
var fds int
var vss, rss, data uint64
- s.t.WithMuLocked(func(t *kernel.Task) {
+ s.task.WithMuLocked(func(t *kernel.Task) {
if fdTable := t.FDTable(); fdTable != nil {
fds = fdTable.Size()
}
@@ -227,13 +482,13 @@ func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error {
fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10)
fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10)
fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10)
- fmt.Fprintf(buf, "Threads:\t%d\n", s.t.ThreadGroup().Count())
- creds := s.t.Credentials()
+ fmt.Fprintf(buf, "Threads:\t%d\n", s.task.ThreadGroup().Count())
+ creds := s.task.Credentials()
fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps)
fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps)
fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps)
fmt.Fprintf(buf, "CapBnd:\t%016x\n", creds.BoundingCaps)
- fmt.Fprintf(buf, "Seccomp:\t%d\n", s.t.SeccompMode())
+ fmt.Fprintf(buf, "Seccomp:\t%d\n", s.task.SeccompMode())
// We unconditionally report a single NUMA node. See
// pkg/sentry/syscalls/linux/sys_mempolicy.go.
fmt.Fprintf(buf, "Mems_allowed:\t1\n")
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index d8f92d52f..e0cb9c47b 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -15,11 +15,12 @@
package proc
import (
+ "bytes"
"sort"
"strconv"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -28,9 +29,8 @@ import (
)
const (
- defaultPermission = 0444
- selfName = "self"
- threadSelfName = "thread-self"
+ selfName = "self"
+ threadSelfName = "thread-self"
)
// InoGenerator generates unique inode numbers for a given filesystem.
@@ -54,22 +54,28 @@ type tasksInode struct {
// Linux. So handle them outside of OrderedChildren.
selfSymlink *vfs.Dentry
threadSelfSymlink *vfs.Dentry
+
+ // cgroupControllers is a map of controller name to directory in the
+ // cgroup hierarchy. These controllers are immutable and will be listed
+ // in /proc/pid/cgroup if not nil.
+ cgroupControllers map[string]string
}
var _ kernfs.Inode = (*tasksInode)(nil)
-func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace) (*tasksInode, *kernfs.Dentry) {
+func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) (*tasksInode, *kernfs.Dentry) {
root := auth.NewRootCredentials(pidns.UserNamespace())
contents := map[string]*kernfs.Dentry{
- //"cpuinfo": newCPUInfo(ctx, msrc),
- //"filesystems": seqfile.NewSeqFileInode(ctx, &filesystemsData{}, msrc),
- "loadavg": newDentry(root, inoGen.NextIno(), defaultPermission, &loadavgData{}),
- "meminfo": newDentry(root, inoGen.NextIno(), defaultPermission, &meminfoData{k: k}),
- "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), defaultPermission, "self/mounts"),
- "stat": newDentry(root, inoGen.NextIno(), defaultPermission, &statData{k: k}),
- //"uptime": newUptime(ctx, msrc),
- //"version": newVersionData(root, inoGen.NextIno(), k),
- "version": newDentry(root, inoGen.NextIno(), defaultPermission, &versionData{k: k}),
+ "cpuinfo": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(cpuInfoData(k))),
+ //"filesystems": newDentry(root, inoGen.NextIno(), 0444, &filesystemsData{}),
+ "loadavg": newDentry(root, inoGen.NextIno(), 0444, &loadavgData{}),
+ "sys": newSysDir(root, inoGen),
+ "meminfo": newDentry(root, inoGen.NextIno(), 0444, &meminfoData{}),
+ "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/mounts"),
+ "net": newNetDir(root, inoGen, k),
+ "stat": newDentry(root, inoGen.NextIno(), 0444, &statData{}),
+ "uptime": newDentry(root, inoGen.NextIno(), 0444, &uptimeData{}),
+ "version": newDentry(root, inoGen.NextIno(), 0444, &versionData{}),
}
inode := &tasksInode{
@@ -77,6 +83,7 @@ func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNames
inoGen: inoGen,
selfSymlink: newSelfSymlink(root, inoGen.NextIno(), 0444, pidns).VFSDentry(),
threadSelfSymlink: newThreadSelfSymlink(root, inoGen.NextIno(), 0444, pidns).VFSDentry(),
+ cgroupControllers: cgroupControllers,
}
inode.InodeAttrs.Init(root, inoGen.NextIno(), linux.ModeDirectory|0555)
@@ -110,7 +117,7 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro
return nil, syserror.ENOENT
}
- taskDentry := newTaskInode(i.inoGen, task, i.pidns, true)
+ taskDentry := newTaskInode(i.inoGen, task, i.pidns, true, i.cgroupControllers)
return taskDentry.VFSDentry(), nil
}
@@ -216,3 +223,20 @@ func (i *tasksInode) Stat(vsfs *vfs.Filesystem) linux.Statx {
return stat
}
+
+func cpuInfoData(k *kernel.Kernel) string {
+ features := k.FeatureSet()
+ if features == nil {
+ // Kernel is always initialized with a FeatureSet.
+ panic("cpuinfo read with nil FeatureSet")
+ }
+ var buf bytes.Buffer
+ for i, max := uint(0), k.ApplicationCores(); i < max; i++ {
+ features.WriteCPUInfoTo(i, &buf)
+ }
+ return buf.String()
+}
+
+func shmData(v uint64) dynamicInode {
+ return newStaticFile(strconv.FormatUint(v, 10))
+}
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
index 91f30a798..434998910 100644
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -15,15 +15,19 @@
package proc
import (
+ "bytes"
"fmt"
"strconv"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
type selfSymlink struct {
@@ -90,3 +94,244 @@ func (s *threadSelfSymlink) Readlink(ctx context.Context) (string, error) {
}
return fmt.Sprintf("%d/task/%d", tgid, tid), nil
}
+
+// cpuStats contains the breakdown of CPU time for /proc/stat.
+type cpuStats struct {
+ // user is time spent in userspace tasks with non-positive niceness.
+ user uint64
+
+ // nice is time spent in userspace tasks with positive niceness.
+ nice uint64
+
+ // system is time spent in non-interrupt kernel context.
+ system uint64
+
+ // idle is time spent idle.
+ idle uint64
+
+ // ioWait is time spent waiting for IO.
+ ioWait uint64
+
+ // irq is time spent in interrupt context.
+ irq uint64
+
+ // softirq is time spent in software interrupt context.
+ softirq uint64
+
+ // steal is involuntary wait time.
+ steal uint64
+
+ // guest is time spent in guests with non-positive niceness.
+ guest uint64
+
+ // guestNice is time spent in guests with positive niceness.
+ guestNice uint64
+}
+
+// String implements fmt.Stringer.
+func (c cpuStats) String() string {
+ return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
+}
+
+// statData implements vfs.DynamicBytesSource for /proc/stat.
+//
+// +stateify savable
+type statData struct {
+ kernfs.DynamicBytesFile
+
+ // k is the owning Kernel.
+ k *kernel.Kernel
+}
+
+var _ dynamicInode = (*statData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *statData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // TODO(b/37226836): We currently export only zero CPU stats. We could
+ // at least provide some aggregate stats.
+ var cpu cpuStats
+ fmt.Fprintf(buf, "cpu %s\n", cpu)
+
+ for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ {
+ fmt.Fprintf(buf, "cpu%d %s\n", c, cpu)
+ }
+
+ // The total number of interrupts is dependent on the CPUs and PCI
+ // devices on the system. See arch_probe_nr_irqs.
+ //
+ // Since we don't report real interrupt stats, just choose an arbitrary
+ // value from a representative VM.
+ const numInterrupts = 256
+
+ // The Kernel doesn't handle real interrupts, so report all zeroes.
+ // TODO(b/37226836): We could count page faults as #PF.
+ fmt.Fprintf(buf, "intr 0") // total
+ for i := 0; i < numInterrupts; i++ {
+ fmt.Fprintf(buf, " 0")
+ }
+ fmt.Fprintf(buf, "\n")
+
+ // Total number of context switches.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "ctxt 0\n")
+
+ // CLOCK_REALTIME timestamp from boot, in seconds.
+ fmt.Fprintf(buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds())
+
+ // Total number of clones.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "processes 0\n")
+
+ // Number of runnable tasks.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "procs_running 0\n")
+
+ // Number of tasks waiting on IO.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "procs_blocked 0\n")
+
+ // Number of each softirq handled.
+ fmt.Fprintf(buf, "softirq 0") // total
+ for i := 0; i < linux.NumSoftIRQ; i++ {
+ fmt.Fprintf(buf, " 0")
+ }
+ fmt.Fprintf(buf, "\n")
+ return nil
+}
+
+// loadavgData backs /proc/loadavg.
+//
+// +stateify savable
+type loadavgData struct {
+ kernfs.DynamicBytesFile
+}
+
+var _ dynamicInode = (*loadavgData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // TODO(b/62345059): Include real data in fields.
+ // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
+ // Column 4-5: currently running processes and the total number of processes.
+ // Column 6: the last process ID used.
+ fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
+ return nil
+}
+
+// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo.
+//
+// +stateify savable
+type meminfoData struct {
+ kernfs.DynamicBytesFile
+
+ // k is the owning Kernel.
+ k *kernel.Kernel
+}
+
+var _ dynamicInode = (*meminfoData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ mf := d.k.MemoryFile()
+ mf.UpdateUsage()
+ snapshot, totalUsage := usage.MemoryAccounting.Copy()
+ totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
+ anon := snapshot.Anonymous + snapshot.Tmpfs
+ file := snapshot.PageCache + snapshot.Mapped
+ // We don't actually have active/inactive LRUs, so just make up numbers.
+ activeFile := (file / 2) &^ (usermem.PageSize - 1)
+ inactiveFile := file - activeFile
+
+ fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024)
+ memFree := (totalSize - totalUsage) / 1024
+ // We use MemFree as MemAvailable because we don't swap.
+ // TODO(rahat): When reclaim is implemented the value of MemAvailable
+ // should change.
+ fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree)
+ fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree)
+ fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices
+ fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024)
+ // Emulate a system with no swap, which disables inactivation of anon pages.
+ fmt.Fprintf(buf, "SwapCache: 0 kB\n")
+ fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024)
+ fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024)
+ fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024)
+ fmt.Fprintf(buf, "Inactive(anon): 0 kB\n")
+ fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024)
+ fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
+ fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263)
+ fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263)
+ fmt.Fprintf(buf, "SwapTotal: 0 kB\n")
+ fmt.Fprintf(buf, "SwapFree: 0 kB\n")
+ fmt.Fprintf(buf, "Dirty: 0 kB\n")
+ fmt.Fprintf(buf, "Writeback: 0 kB\n")
+ fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024)
+ fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
+ fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024)
+ return nil
+}
+
+// uptimeData implements vfs.DynamicBytesSource for /proc/uptime.
+//
+// +stateify savable
+type uptimeData struct {
+ kernfs.DynamicBytesFile
+}
+
+var _ dynamicInode = (*uptimeData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (*uptimeData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ k := kernel.KernelFromContext(ctx)
+ now := time.NowFromContext(ctx)
+
+ // Pretend that we've spent zero time sleeping (second number).
+ fmt.Fprintf(buf, "%.2f 0.00\n", now.Sub(k.Timekeeper().BootTime()).Seconds())
+ return nil
+}
+
+// versionData implements vfs.DynamicBytesSource for /proc/version.
+//
+// +stateify savable
+type versionData struct {
+ kernfs.DynamicBytesFile
+
+ // k is the owning Kernel.
+ k *kernel.Kernel
+}
+
+var _ dynamicInode = (*versionData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (v *versionData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ init := v.k.GlobalInit()
+ if init == nil {
+ // Attempted to read before the init Task is created. This can
+ // only occur during startup, which should never need to read
+ // this file.
+ panic("Attempted to read version before initial Task is available")
+ }
+
+ // /proc/version takes the form:
+ //
+ // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
+ // (COMPILER_VERSION) VERSION"
+ //
+ // where:
+ // - SYSNAME, RELEASE, and VERSION are the same as returned by
+ // sys_utsname
+ // - COMPILE_USER is the user that build the kernel
+ // - COMPILE_HOST is the hostname of the machine on which the kernel
+ // was built
+ // - COMPILER_VERSION is the version reported by the building compiler
+ //
+ // Since we don't really want to expose build information to
+ // applications, those fields are omitted.
+ //
+ // FIXME(mpratt): Using Version from the init task SyscallTable
+ // disregards the different version a task may have (e.g., in a uts
+ // namespace).
+ ver := init.Leader().SyscallTable().Version
+ fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/tasks_net.go b/pkg/sentry/fsimpl/proc/tasks_net.go
new file mode 100644
index 000000000..608fec017
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/tasks_net.go
@@ -0,0 +1,784 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "reflect"
+ "time"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+func newNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry {
+ var contents map[string]*kernfs.Dentry
+ if stack := k.NetworkStack(); stack != nil {
+ const (
+ arp = "IP address HW type Flags HW address Mask Device\n"
+ netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n"
+ packet = "sk RefCnt Type Proto Iface R Rmem User Inode\n"
+ protocols = "protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"
+ ptype = "Type Device Function\n"
+ upd6 = " sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n"
+ )
+ psched := fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond))
+
+ contents = map[string]*kernfs.Dentry{
+ "dev": newDentry(root, inoGen.NextIno(), 0444, &netDevData{stack: stack}),
+ "snmp": newDentry(root, inoGen.NextIno(), 0444, &netSnmpData{stack: stack}),
+
+ // The following files are simple stubs until they are implemented in
+ // netstack, if the file contains a header the stub is just the header
+ // otherwise it is an empty file.
+ "arp": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(arp)),
+ "netlink": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(netlink)),
+ "netstat": newDentry(root, inoGen.NextIno(), 0444, &netStatData{}),
+ "packet": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(packet)),
+ "protocols": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(protocols)),
+
+ // Linux sets psched values to: nsec per usec, psched tick in ns, 1000000,
+ // high res timer ticks per sec (ClockGetres returns 1ns resolution).
+ "psched": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(psched)),
+ "ptype": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(ptype)),
+ "route": newDentry(root, inoGen.NextIno(), 0444, &netRouteData{stack: stack}),
+ "tcp": newDentry(root, inoGen.NextIno(), 0444, &netTCPData{kernel: k}),
+ "udp": newDentry(root, inoGen.NextIno(), 0444, &netUDPData{kernel: k}),
+ "unix": newDentry(root, inoGen.NextIno(), 0444, &netUnixData{kernel: k}),
+ }
+
+ if stack.SupportsIPv6() {
+ contents["if_inet6"] = newDentry(root, inoGen.NextIno(), 0444, &ifinet6{stack: stack})
+ contents["ipv6_route"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(""))
+ contents["tcp6"] = newDentry(root, inoGen.NextIno(), 0444, &netTCP6Data{kernel: k})
+ contents["udp6"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(upd6))
+ }
+ }
+
+ return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, contents)
+}
+
+// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6.
+//
+// +stateify savable
+type ifinet6 struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*ifinet6)(nil)
+
+func (n *ifinet6) contents() []string {
+ var lines []string
+ nics := n.stack.Interfaces()
+ for id, naddrs := range n.stack.InterfaceAddrs() {
+ nic, ok := nics[id]
+ if !ok {
+ // NIC was added after NICNames was called. We'll just ignore it.
+ continue
+ }
+
+ for _, a := range naddrs {
+ // IPv6 only.
+ if a.Family != linux.AF_INET6 {
+ continue
+ }
+
+ // Fields:
+ // IPv6 address displayed in 32 hexadecimal chars without colons
+ // Netlink device number (interface index) in hexadecimal (use nic id)
+ // Prefix length in hexadecimal
+ // Scope value (use 0)
+ // Interface flags
+ // Device name
+ lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name))
+ }
+ }
+ return lines
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ for _, l := range n.contents() {
+ buf.WriteString(l)
+ }
+ return nil
+}
+
+// netDevData implements vfs.DynamicBytesSource for /proc/net/dev.
+//
+// +stateify savable
+type netDevData struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*netDevData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *netDevData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ interfaces := n.stack.Interfaces()
+ buf.WriteString("Inter-| Receive | Transmit\n")
+ buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n")
+
+ for _, i := range interfaces {
+ // Implements the same format as
+ // net/core/net-procfs.c:dev_seq_printf_stats.
+ var stats inet.StatDev
+ if err := n.stack.Statistics(&stats, i.Name); err != nil {
+ log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err)
+ continue
+ }
+ fmt.Fprintf(
+ buf,
+ "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n",
+ i.Name,
+ // Received
+ stats[0], // bytes
+ stats[1], // packets
+ stats[2], // errors
+ stats[3], // dropped
+ stats[4], // fifo
+ stats[5], // frame
+ stats[6], // compressed
+ stats[7], // multicast
+ // Transmitted
+ stats[8], // bytes
+ stats[9], // packets
+ stats[10], // errors
+ stats[11], // dropped
+ stats[12], // fifo
+ stats[13], // frame
+ stats[14], // compressed
+ stats[15], // multicast
+ )
+ }
+
+ return nil
+}
+
+// netUnixData implements vfs.DynamicBytesSource for /proc/net/unix.
+//
+// +stateify savable
+type netUnixData struct {
+ kernfs.DynamicBytesFile
+
+ kernel *kernel.Kernel
+}
+
+var _ dynamicInode = (*netUnixData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n")
+ for _, se := range n.kernel.ListSockets() {
+ s := se.Sock.Get()
+ if s == nil {
+ log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock)
+ continue
+ }
+ sfile := s.(*fs.File)
+ if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX {
+ s.DecRef()
+ // Not a unix socket.
+ continue
+ }
+ sops := sfile.FileOperations.(*unix.SocketOperations)
+
+ addr, err := sops.Endpoint().GetLocalAddress()
+ if err != nil {
+ log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err)
+ addr.Addr = "<unknown>"
+ }
+
+ sockFlags := 0
+ if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok {
+ if ce.Listening() {
+ // For unix domain sockets, linux reports a single flag
+ // value if the socket is listening, of __SO_ACCEPTCON.
+ sockFlags = linux.SO_ACCEPTCON
+ }
+ }
+
+ // In the socket entry below, the value for the 'Num' field requires
+ // some consideration. Linux prints the address to the struct
+ // unix_sock representing a socket in the kernel, but may redact the
+ // value for unprivileged users depending on the kptr_restrict
+ // sysctl.
+ //
+ // One use for this field is to allow a privileged user to
+ // introspect into the kernel memory to determine information about
+ // a socket not available through procfs, such as the socket's peer.
+ //
+ // In gvisor, returning a pointer to our internal structures would
+ // be pointless, as it wouldn't match the memory layout for struct
+ // unix_sock, making introspection difficult. We could populate a
+ // struct unix_sock with the appropriate data, but even that
+ // requires consideration for which kernel version to emulate, as
+ // the definition of this struct changes over time.
+ //
+ // For now, we always redact this pointer.
+ fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d",
+ (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct.
+ sfile.ReadRefs()-1, // RefCount, don't count our own ref.
+ 0, // Protocol, always 0 for UDS.
+ sockFlags, // Flags.
+ sops.Endpoint().Type(), // Type.
+ sops.State(), // State.
+ sfile.InodeID(), // Inode.
+ )
+
+ // Path
+ if len(addr.Addr) != 0 {
+ if addr.Addr[0] == 0 {
+ // Abstract path.
+ fmt.Fprintf(buf, " @%s", string(addr.Addr[1:]))
+ } else {
+ fmt.Fprintf(buf, " %s", string(addr.Addr))
+ }
+ }
+ fmt.Fprintf(buf, "\n")
+
+ s.DecRef()
+ }
+ return nil
+}
+
+func networkToHost16(n uint16) uint16 {
+ // n is in network byte order, so is big-endian. The most-significant byte
+ // should be stored in the lower address.
+ //
+ // We manually inline binary.BigEndian.Uint16() because Go does not support
+ // non-primitive consts, so binary.BigEndian is a (mutable) var, so calls to
+ // binary.BigEndian.Uint16() require a read of binary.BigEndian and an
+ // interface method call, defeating inlining.
+ buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)}
+ return usermem.ByteOrder.Uint16(buf[:])
+}
+
+func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
+ switch family {
+ case linux.AF_INET:
+ var a linux.SockAddrInet
+ if i != nil {
+ a = *i.(*linux.SockAddrInet)
+ }
+
+ // linux.SockAddrInet.Port is stored in the network byte order and is
+ // printed like a number in host byte order. Note that all numbers in host
+ // byte order are printed with the most-significant byte first when
+ // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux.
+ port := networkToHost16(a.Port)
+
+ // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order
+ // (i.e. most-significant byte in index 0). Linux represents this as a
+ // __be32 which is a typedef for an unsigned int, and is printed with
+ // %X. This means that for a little-endian machine, Linux prints the
+ // least-significant byte of the address first. To emulate this, we first
+ // invert the byte order for the address using usermem.ByteOrder.Uint32,
+ // which makes it have the equivalent encoding to a __be32 on a little
+ // endian machine. Note that this operation is a no-op on a big endian
+ // machine. Then similar to Linux, we format it with %X, which will print
+ // the most-significant byte of the __be32 address first, which is now
+ // actually the least-significant byte of the original address in
+ // linux.SockAddrInet.Addr on little endian machines, due to the conversion.
+ addr := usermem.ByteOrder.Uint32(a.Addr[:])
+
+ fmt.Fprintf(w, "%08X:%04X ", addr, port)
+ case linux.AF_INET6:
+ var a linux.SockAddrInet6
+ if i != nil {
+ a = *i.(*linux.SockAddrInet6)
+ }
+
+ port := networkToHost16(a.Port)
+ addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4])
+ addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8])
+ addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12])
+ addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16])
+ fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port)
+ }
+}
+
+func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error {
+ // t may be nil here if our caller is not part of a task goroutine. This can
+ // happen for example if we're here for "sentryctl cat". When t is nil,
+ // degrade gracefully and retrieve what we can.
+ t := kernel.TaskFromContext(ctx)
+
+ for _, se := range k.ListSockets() {
+ s := se.Sock.Get()
+ if s == nil {
+ log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID)
+ continue
+ }
+ sfile := s.(*fs.File)
+ sops, ok := sfile.FileOperations.(socket.Socket)
+ if !ok {
+ panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
+ }
+ if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) {
+ s.DecRef()
+ // Not tcp4 sockets.
+ continue
+ }
+
+ // Linux's documentation for the fields below can be found at
+ // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
+ // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
+ // Note that the header doesn't contain labels for all the fields.
+
+ // Field: sl; entry number.
+ fmt.Fprintf(buf, "%4d: ", se.ID)
+
+ // Field: local_adddress.
+ var localAddr linux.SockAddr
+ if t != nil {
+ if local, _, err := sops.GetSockName(t); err == nil {
+ localAddr = local
+ }
+ }
+ writeInetAddr(buf, family, localAddr)
+
+ // Field: rem_address.
+ var remoteAddr linux.SockAddr
+ if t != nil {
+ if remote, _, err := sops.GetPeerName(t); err == nil {
+ remoteAddr = remote
+ }
+ }
+ writeInetAddr(buf, family, remoteAddr)
+
+ // Field: state; socket state.
+ fmt.Fprintf(buf, "%02X ", sops.State())
+
+ // Field: tx_queue, rx_queue; number of packets in the transmit and
+ // receive queue. Unimplemented.
+ fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
+
+ // Field: tr, tm->when; timer active state and number of jiffies
+ // until timer expires. Unimplemented.
+ fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
+
+ // Field: retrnsmt; number of unrecovered RTO timeouts.
+ // Unimplemented.
+ fmt.Fprintf(buf, "%08X ", 0)
+
+ // Field: uid.
+ uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
+ if err != nil {
+ log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
+ fmt.Fprintf(buf, "%5d ", 0)
+ } else {
+ creds := auth.CredentialsFromContext(ctx)
+ fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow()))
+ }
+
+ // Field: timeout; number of unanswered 0-window probes.
+ // Unimplemented.
+ fmt.Fprintf(buf, "%8d ", 0)
+
+ // Field: inode.
+ fmt.Fprintf(buf, "%8d ", sfile.InodeID())
+
+ // Field: refcount. Don't count the ref we obtain while deferencing
+ // the weakref to this socket.
+ fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1)
+
+ // Field: Socket struct address. Redacted due to the same reason as
+ // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
+ fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
+
+ // Field: retransmit timeout. Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: predicted tick of soft clock (delayed ACK control data).
+ // Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: sending congestion window, Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
+ // Unimplemented, report as large threshold.
+ fmt.Fprintf(buf, "%d", -1)
+
+ fmt.Fprintf(buf, "\n")
+
+ s.DecRef()
+ }
+
+ return nil
+}
+
+// netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp.
+//
+// +stateify savable
+type netTCPData struct {
+ kernfs.DynamicBytesFile
+
+ kernel *kernel.Kernel
+}
+
+var _ dynamicInode = (*netTCPData)(nil)
+
+func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n")
+ return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET)
+}
+
+// netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6.
+//
+// +stateify savable
+type netTCP6Data struct {
+ kernfs.DynamicBytesFile
+
+ kernel *kernel.Kernel
+}
+
+var _ dynamicInode = (*netTCP6Data)(nil)
+
+func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n")
+ return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6)
+}
+
+// netUDPData implements vfs.DynamicBytesSource for /proc/net/udp.
+//
+// +stateify savable
+type netUDPData struct {
+ kernfs.DynamicBytesFile
+
+ kernel *kernel.Kernel
+}
+
+var _ dynamicInode = (*netUDPData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // t may be nil here if our caller is not part of a task goroutine. This can
+ // happen for example if we're here for "sentryctl cat". When t is nil,
+ // degrade gracefully and retrieve what we can.
+ t := kernel.TaskFromContext(ctx)
+
+ for _, se := range d.kernel.ListSockets() {
+ s := se.Sock.Get()
+ if s == nil {
+ log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID)
+ continue
+ }
+ sfile := s.(*fs.File)
+ sops, ok := sfile.FileOperations.(socket.Socket)
+ if !ok {
+ panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
+ }
+ if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM {
+ s.DecRef()
+ // Not udp4 socket.
+ continue
+ }
+
+ // For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock().
+
+ // Field: sl; entry number.
+ fmt.Fprintf(buf, "%5d: ", se.ID)
+
+ // Field: local_adddress.
+ var localAddr linux.SockAddrInet
+ if t != nil {
+ if local, _, err := sops.GetSockName(t); err == nil {
+ localAddr = *local.(*linux.SockAddrInet)
+ }
+ }
+ writeInetAddr(buf, linux.AF_INET, &localAddr)
+
+ // Field: rem_address.
+ var remoteAddr linux.SockAddrInet
+ if t != nil {
+ if remote, _, err := sops.GetPeerName(t); err == nil {
+ remoteAddr = *remote.(*linux.SockAddrInet)
+ }
+ }
+ writeInetAddr(buf, linux.AF_INET, &remoteAddr)
+
+ // Field: state; socket state.
+ fmt.Fprintf(buf, "%02X ", sops.State())
+
+ // Field: tx_queue, rx_queue; number of packets in the transmit and
+ // receive queue. Unimplemented.
+ fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
+
+ // Field: tr, tm->when. Always 0 for UDP.
+ fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
+
+ // Field: retrnsmt. Always 0 for UDP.
+ fmt.Fprintf(buf, "%08X ", 0)
+
+ // Field: uid.
+ uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
+ if err != nil {
+ log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
+ fmt.Fprintf(buf, "%5d ", 0)
+ } else {
+ creds := auth.CredentialsFromContext(ctx)
+ fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow()))
+ }
+
+ // Field: timeout. Always 0 for UDP.
+ fmt.Fprintf(buf, "%8d ", 0)
+
+ // Field: inode.
+ fmt.Fprintf(buf, "%8d ", sfile.InodeID())
+
+ // Field: ref; reference count on the socket inode. Don't count the ref
+ // we obtain while deferencing the weakref to this socket.
+ fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1)
+
+ // Field: Socket struct address. Redacted due to the same reason as
+ // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
+ fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
+
+ // Field: drops; number of dropped packets. Unimplemented.
+ fmt.Fprintf(buf, "%d", 0)
+
+ fmt.Fprintf(buf, "\n")
+
+ s.DecRef()
+ }
+ return nil
+}
+
+// netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp.
+//
+// +stateify savable
+type netSnmpData struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*netSnmpData)(nil)
+
+type snmpLine struct {
+ prefix string
+ header string
+}
+
+var snmp = []snmpLine{
+ {
+ prefix: "Ip",
+ header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates",
+ },
+ {
+ prefix: "Icmp",
+ header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps",
+ },
+ {
+ prefix: "IcmpMsg",
+ },
+ {
+ prefix: "Tcp",
+ header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors",
+ },
+ {
+ prefix: "Udp",
+ header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
+ },
+ {
+ prefix: "UdpLite",
+ header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
+ },
+}
+
+func toSlice(a interface{}) []uint64 {
+ v := reflect.Indirect(reflect.ValueOf(a))
+ return v.Slice(0, v.Len()).Interface().([]uint64)
+}
+
+func sprintSlice(s []uint64) string {
+ if len(s) == 0 {
+ return ""
+ }
+ r := fmt.Sprint(s)
+ return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice.
+}
+
+// Generate implements vfs.DynamicBytesSource.
+func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ types := []interface{}{
+ &inet.StatSNMPIP{},
+ &inet.StatSNMPICMP{},
+ nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats.
+ &inet.StatSNMPTCP{},
+ &inet.StatSNMPUDP{},
+ &inet.StatSNMPUDPLite{},
+ }
+ for i, stat := range types {
+ line := snmp[i]
+ if stat == nil {
+ fmt.Fprintf(buf, "%s:\n", line.prefix)
+ fmt.Fprintf(buf, "%s:\n", line.prefix)
+ continue
+ }
+ if err := d.stack.Statistics(stat, line.prefix); err != nil {
+ if err == syserror.EOPNOTSUPP {
+ log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
+ } else {
+ log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
+ }
+ }
+
+ fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header)
+
+ if line.prefix == "Tcp" {
+ tcp := stat.(*inet.StatSNMPTCP)
+ // "Tcp" needs special processing because MaxConn is signed. RFC 2012.
+ fmt.Sprintf("%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:]))
+ } else {
+ fmt.Sprintf("%s: %s\n", line.prefix, sprintSlice(toSlice(stat)))
+ }
+ }
+ return nil
+}
+
+// netRouteData implements vfs.DynamicBytesSource for /proc/net/route.
+//
+// +stateify savable
+type netRouteData struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*netRouteData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.
+// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
+func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT")
+
+ interfaces := d.stack.Interfaces()
+ for _, rt := range d.stack.RouteTable() {
+ // /proc/net/route only includes ipv4 routes.
+ if rt.Family != linux.AF_INET {
+ continue
+ }
+
+ // /proc/net/route does not include broadcast or multicast routes.
+ if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST {
+ continue
+ }
+
+ iface, ok := interfaces[rt.OutputInterface]
+ if !ok || iface.Name == "lo" {
+ continue
+ }
+
+ var (
+ gw uint32
+ prefix uint32
+ flags = linux.RTF_UP
+ )
+ if len(rt.GatewayAddr) == header.IPv4AddressSize {
+ flags |= linux.RTF_GATEWAY
+ gw = usermem.ByteOrder.Uint32(rt.GatewayAddr)
+ }
+ if len(rt.DstAddr) == header.IPv4AddressSize {
+ prefix = usermem.ByteOrder.Uint32(rt.DstAddr)
+ }
+ l := fmt.Sprintf(
+ "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d",
+ iface.Name,
+ prefix,
+ gw,
+ flags,
+ 0, // RefCnt.
+ 0, // Use.
+ 0, // Metric.
+ (uint32(1)<<rt.DstLen)-1,
+ 0, // MTU.
+ 0, // Window.
+ 0, // RTT.
+ )
+ fmt.Fprintf(buf, "%-127s\n", l)
+ }
+ return nil
+}
+
+// netStatData implements vfs.DynamicBytesSource for /proc/net/netstat.
+//
+// +stateify savable
+type netStatData struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*netStatData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.
+// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
+func (d *netStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed " +
+ "EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps " +
+ "LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive " +
+ "PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost " +
+ "ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog " +
+ "TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser " +
+ "TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging " +
+ "TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo " +
+ "TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit " +
+ "TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans " +
+ "TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes " +
+ "TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail " +
+ "TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent " +
+ "TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose " +
+ "TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed " +
+ "TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld " +
+ "TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected " +
+ "TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback " +
+ "TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter " +
+ "TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail " +
+ "TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK " +
+ "TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail " +
+ "TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow " +
+ "TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets " +
+ "TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv " +
+ "TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect " +
+ "TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd " +
+ "TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq " +
+ "TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge " +
+ "TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
new file mode 100644
index 000000000..ad963870b
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -0,0 +1,143 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+// newSysDir returns the dentry corresponding to /proc/sys directory.
+func newSysDir(root *auth.Credentials, inoGen InoGenerator) *kernfs.Dentry {
+ return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "kernel": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "hostname": newDentry(root, inoGen.NextIno(), 0444, &hostnameData{}),
+ "shmall": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMALL)),
+ "shmmax": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMAX)),
+ "shmmni": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMNI)),
+ }),
+ "vm": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "mmap_min_addr": newDentry(root, inoGen.NextIno(), 0444, &mmapMinAddrData{}),
+ "overcommit_memory": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0\n")),
+ }),
+ "net": newSysNetDir(root, inoGen),
+ })
+}
+
+// newSysNetDir returns the dentry corresponding to /proc/sys/net directory.
+func newSysNetDir(root *auth.Credentials, inoGen InoGenerator) *kernfs.Dentry {
+ return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "net": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "ipv4": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ // Add tcp_sack.
+ // TODO(gvisor.dev/issue/1195): tcp_sack allows write(2)
+ // "tcp_sack": newTCPSackInode(ctx, msrc, s),
+
+ // The following files are simple stubs until they are implemented in
+ // netstack, most of these files are configuration related. We use the
+ // value closest to the actual netstack behavior or any empty file, all
+ // of these files will have mode 0444 (read-only for all users).
+ "ip_local_port_range": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("16000 65535")),
+ "ip_local_reserved_ports": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
+ "ipfrag_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("30")),
+ "ip_nonlocal_bind": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "ip_no_pmtu_disc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+
+ // tcp_allowed_congestion_control tell the user what they are able to
+ // do as an unprivledged process so we leave it empty.
+ "tcp_allowed_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
+ "tcp_available_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")),
+ "tcp_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")),
+
+ // Many of the following stub files are features netstack doesn't
+ // support. The unsupported features return "0" to indicate they are
+ // disabled.
+ "tcp_base_mss": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1280")),
+ "tcp_dsack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_early_retrans": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_fack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_fastopen": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_fastopen_key": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
+ "tcp_invalid_ratelimit": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_keepalive_intvl": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_keepalive_probes": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_keepalive_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("7200")),
+ "tcp_mtu_probing": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_no_metrics_save": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ "tcp_probe_interval": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_probe_threshold": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_retries1": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")),
+ "tcp_retries2": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("15")),
+ "tcp_rfc1337": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ "tcp_slow_start_after_idle": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ "tcp_synack_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")),
+ "tcp_syn_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")),
+ "tcp_timestamps": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ }),
+ "core": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "default_qdisc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("pfifo_fast")),
+ "message_burst": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("10")),
+ "message_cost": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")),
+ "optmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "rmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
+ "rmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
+ "somaxconn": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("128")),
+ "wmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
+ "wmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
+ }),
+ }),
+ })
+}
+
+// mmapMinAddrData implements vfs.DynamicBytesSource for
+// /proc/sys/vm/mmap_min_addr.
+//
+// +stateify savable
+type mmapMinAddrData struct {
+ kernfs.DynamicBytesFile
+
+ k *kernel.Kernel
+}
+
+var _ dynamicInode = (*mmapMinAddrData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress())
+ return nil
+}
+
+// hostnameData implements vfs.DynamicBytesSource for /proc/sys/kernel/hostname.
+//
+// +stateify savable
+type hostnameData struct {
+ kernfs.DynamicBytesFile
+}
+
+var _ dynamicInode = (*hostnameData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (*hostnameData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ utsns := kernel.UTSNamespaceFromContext(ctx)
+ buf.WriteString(utsns.HostName())
+ buf.WriteString("\n")
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/net_test.go b/pkg/sentry/fsimpl/proc/tasks_sys_test.go
index 20a77a8ca..be54897bb 100644
--- a/pkg/sentry/fsimpl/proc/net_test.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys_test.go
@@ -20,7 +20,7 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/inet"
)
@@ -31,7 +31,7 @@ func newIPv6TestStack() *inet.TestStack {
}
func TestIfinet6NoAddresses(t *testing.T) {
- n := &ifinet6{s: newIPv6TestStack()}
+ n := &ifinet6{stack: newIPv6TestStack()}
var buf bytes.Buffer
n.Generate(contexttest.Context(t), &buf)
if buf.Len() > 0 {
@@ -62,7 +62,7 @@ func TestIfinet6(t *testing.T) {
"101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {},
}
- n := &ifinet6{s: s}
+ n := &ifinet6{stack: s}
contents := n.contents()
if len(contents) != len(want) {
t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want))
diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go
index ca8c87ec2..6fc3524db 100644
--- a/pkg/sentry/fsimpl/proc/tasks_test.go
+++ b/pkg/sentry/fsimpl/proc/tasks_test.go
@@ -22,13 +22,14 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
var (
@@ -43,87 +44,47 @@ var (
proc3 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 3 + 1}
)
-type testIterDirentsCallback struct {
- dirents []vfs.Dirent
-}
-
-func (t *testIterDirentsCallback) Handle(d vfs.Dirent) bool {
- t.dirents = append(t.dirents, d)
- return true
-}
-
-func checkDots(dirs []vfs.Dirent) ([]vfs.Dirent, error) {
- if got := len(dirs); got < 2 {
- return dirs, fmt.Errorf("wrong number of dirents, want at least: 2, got: %d: %v", got, dirs)
- }
- for i, want := range []string{".", ".."} {
- if got := dirs[i].Name; got != want {
- return dirs, fmt.Errorf("wrong name, want: %s, got: %s", want, got)
- }
- if got := dirs[i].Type; got != linux.DT_DIR {
- return dirs, fmt.Errorf("wrong type, want: %d, got: %d", linux.DT_DIR, got)
- }
- }
- return dirs[2:], nil
-}
-
-func checkTasksStaticFiles(gots []vfs.Dirent) ([]vfs.Dirent, error) {
- wants := map[string]vfs.Dirent{
- "loadavg": {Type: linux.DT_REG},
- "meminfo": {Type: linux.DT_REG},
- "mounts": {Type: linux.DT_LNK},
- "self": selfLink,
- "stat": {Type: linux.DT_REG},
- "thread-self": threadSelfLink,
- "version": {Type: linux.DT_REG},
- }
- return checkFiles(gots, wants)
-}
-
-func checkTaskStaticFiles(gots []vfs.Dirent) ([]vfs.Dirent, error) {
- wants := map[string]vfs.Dirent{
- "io": {Type: linux.DT_REG},
- "maps": {Type: linux.DT_REG},
- "smaps": {Type: linux.DT_REG},
- "stat": {Type: linux.DT_REG},
- "statm": {Type: linux.DT_REG},
- "status": {Type: linux.DT_REG},
- }
- return checkFiles(gots, wants)
-}
-
-func checkFiles(gots []vfs.Dirent, wants map[string]vfs.Dirent) ([]vfs.Dirent, error) {
- // Go over all files, when there is a match, the file is removed from both
- // 'gots' and 'wants'. wants is expected to reach 0, as all files must
- // be present. Remaining files in 'gots', is returned to caller to decide
- // whether this is valid or not.
- for i := 0; i < len(gots); i++ {
- got := gots[i]
- want, ok := wants[got.Name]
- if !ok {
- continue
- }
- if want.Type != got.Type {
- return gots, fmt.Errorf("wrong file type, want: %v, got: %v: %+v", want.Type, got.Type, got)
- }
- if want.NextOff != 0 && want.NextOff != got.NextOff {
- return gots, fmt.Errorf("wrong dirent offset, want: %v, got: %v: %+v", want.NextOff, got.NextOff, got)
- }
-
- delete(wants, got.Name)
- gots = append(gots[0:i], gots[i+1:]...)
- i--
- }
- if len(wants) != 0 {
- return gots, fmt.Errorf("not all files were found, missing: %+v", wants)
+var (
+ tasksStaticFiles = map[string]testutil.DirentType{
+ "cpuinfo": linux.DT_REG,
+ "loadavg": linux.DT_REG,
+ "meminfo": linux.DT_REG,
+ "mounts": linux.DT_LNK,
+ "net": linux.DT_DIR,
+ "self": linux.DT_LNK,
+ "stat": linux.DT_REG,
+ "sys": linux.DT_DIR,
+ "thread-self": linux.DT_LNK,
+ "uptime": linux.DT_REG,
+ "version": linux.DT_REG,
+ }
+ tasksStaticFilesNextOffs = map[string]int64{
+ "self": selfLink.NextOff,
+ "thread-self": threadSelfLink.NextOff,
+ }
+ taskStaticFiles = map[string]testutil.DirentType{
+ "auxv": linux.DT_REG,
+ "cgroup": linux.DT_REG,
+ "cmdline": linux.DT_REG,
+ "comm": linux.DT_REG,
+ "environ": linux.DT_REG,
+ "gid_map": linux.DT_REG,
+ "io": linux.DT_REG,
+ "maps": linux.DT_REG,
+ "ns": linux.DT_DIR,
+ "smaps": linux.DT_REG,
+ "stat": linux.DT_REG,
+ "statm": linux.DT_REG,
+ "status": linux.DT_REG,
+ "task": linux.DT_DIR,
+ "uid_map": linux.DT_REG,
}
- return gots, nil
-}
+)
-func setup() (context.Context, *vfs.VirtualFilesystem, vfs.VirtualDentry, error) {
- k, err := boot()
+func setup(t *testing.T) *testutil.System {
+ k, err := testutil.Boot()
if err != nil {
- return nil, nil, vfs.VirtualDentry{}, fmt.Errorf("creating kernel: %v", err)
+ t.Fatalf("Error creating kernel: %v", err)
}
ctx := k.SupervisorContext()
@@ -133,95 +94,70 @@ func setup() (context.Context, *vfs.VirtualFilesystem, vfs.VirtualDentry, error)
vfsObj.MustRegisterFilesystemType("procfs", &procFSType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
})
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "procfs", &vfs.GetFilesystemOptions{})
+ fsOpts := vfs.GetFilesystemOptions{
+ InternalData: &InternalData{
+ Cgroups: map[string]string{
+ "cpuset": "/foo/cpuset",
+ "memory": "/foo/memory",
+ },
+ },
+ }
+ mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "procfs", &fsOpts)
if err != nil {
- return nil, nil, vfs.VirtualDentry{}, fmt.Errorf("NewMountNamespace(): %v", err)
+ t.Fatalf("NewMountNamespace(): %v", err)
}
- return ctx, vfsObj, mntns.Root(), nil
+ return testutil.NewSystem(ctx, t, vfsObj, mntns)
}
func TestTasksEmpty(t *testing.T) {
- ctx, vfsObj, root, err := setup()
- if err != nil {
- t.Fatalf("Setup failed: %v", err)
- }
- defer root.DecRef()
-
- fd, err := vfsObj.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse("/")},
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt failed: %v", err)
- }
+ s := setup(t)
+ defer s.Destroy()
- cb := testIterDirentsCallback{}
- if err := fd.Impl().IterDirents(ctx, &cb); err != nil {
- t.Fatalf("IterDirents(): %v", err)
- }
- cb.dirents, err = checkDots(cb.dirents)
- if err != nil {
- t.Error(err.Error())
- }
- cb.dirents, err = checkTasksStaticFiles(cb.dirents)
- if err != nil {
- t.Error(err.Error())
- }
- if len(cb.dirents) != 0 {
- t.Errorf("found more files than expected: %+v", cb.dirents)
- }
+ collector := s.ListDirents(s.PathOpAtRoot("/"))
+ s.AssertAllDirentTypes(collector, tasksStaticFiles)
+ s.AssertDirentOffsets(collector, tasksStaticFilesNextOffs)
}
func TestTasks(t *testing.T) {
- ctx, vfsObj, root, err := setup()
- if err != nil {
- t.Fatalf("Setup failed: %v", err)
+ s := setup(t)
+ defer s.Destroy()
+
+ expectedDirents := make(map[string]testutil.DirentType)
+ for n, d := range tasksStaticFiles {
+ expectedDirents[n] = d
}
- defer root.DecRef()
- k := kernel.KernelFromContext(ctx)
+ k := kernel.KernelFromContext(s.Ctx)
var tasks []*kernel.Task
for i := 0; i < 5; i++ {
tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- task, err := createTask(ctx, fmt.Sprintf("name-%d", i), tc)
+ task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc)
if err != nil {
t.Fatalf("CreateTask(): %v", err)
}
tasks = append(tasks, task)
+ expectedDirents[fmt.Sprintf("%d", i+1)] = linux.DT_DIR
}
- fd, err := vfsObj.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse("/")},
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt(/) failed: %v", err)
- }
+ collector := s.ListDirents(s.PathOpAtRoot("/"))
+ s.AssertAllDirentTypes(collector, expectedDirents)
+ s.AssertDirentOffsets(collector, tasksStaticFilesNextOffs)
- cb := testIterDirentsCallback{}
- if err := fd.Impl().IterDirents(ctx, &cb); err != nil {
- t.Fatalf("IterDirents(): %v", err)
- }
- cb.dirents, err = checkDots(cb.dirents)
- if err != nil {
- t.Error(err.Error())
- }
- cb.dirents, err = checkTasksStaticFiles(cb.dirents)
- if err != nil {
- t.Error(err.Error())
- }
lastPid := 0
- for _, d := range cb.dirents {
+ dirents := collector.OrderedDirents()
+ doneSkippingNonTaskDirs := false
+ for _, d := range dirents {
pid, err := strconv.Atoi(d.Name)
if err != nil {
+ if !doneSkippingNonTaskDirs {
+ // We haven't gotten to the task dirs yet.
+ continue
+ }
t.Fatalf("Invalid process directory %q", d.Name)
}
+ doneSkippingNonTaskDirs = true
if lastPid > pid {
- t.Errorf("pids not in order: %v", cb.dirents)
+ t.Errorf("pids not in order: %v", dirents)
}
found := false
for _, t := range tasks {
@@ -238,13 +174,16 @@ func TestTasks(t *testing.T) {
t.Errorf("Wrong dirent offset want: %d got: %d: %+v", want, d.NextOff, d)
}
}
+ if !doneSkippingNonTaskDirs {
+ t.Fatalf("Never found any process directories.")
+ }
// Test lookup.
for _, path := range []string{"/1", "/2"} {
- fd, err := vfsObj.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse(path)},
+ fd, err := s.VFS.OpenAt(
+ s.Ctx,
+ s.Creds,
+ s.PathOpAtRoot(path),
&vfs.OpenOptions{},
)
if err != nil {
@@ -252,15 +191,15 @@ func TestTasks(t *testing.T) {
}
buf := make([]byte, 1)
bufIOSeq := usermem.BytesIOSequence(buf)
- if _, err := fd.Read(ctx, bufIOSeq, vfs.ReadOptions{}); err != syserror.EISDIR {
+ if _, err := fd.Read(s.Ctx, bufIOSeq, vfs.ReadOptions{}); err != syserror.EISDIR {
t.Errorf("wrong error reading directory: %v", err)
}
}
- if _, err := vfsObj.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse("/9999")},
+ if _, err := s.VFS.OpenAt(
+ s.Ctx,
+ s.Creds,
+ s.PathOpAtRoot("/9999"),
&vfs.OpenOptions{},
); err != syserror.ENOENT {
t.Fatalf("wrong error from vfsfs.OpenAt(/9999): %v", err)
@@ -268,16 +207,13 @@ func TestTasks(t *testing.T) {
}
func TestTasksOffset(t *testing.T) {
- ctx, vfsObj, root, err := setup()
- if err != nil {
- t.Fatalf("Setup failed: %v", err)
- }
- defer root.DecRef()
+ s := setup(t)
+ defer s.Destroy()
- k := kernel.KernelFromContext(ctx)
+ k := kernel.KernelFromContext(s.Ctx)
for i := 0; i < 3; i++ {
tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- if _, err := createTask(ctx, fmt.Sprintf("name-%d", i), tc); err != nil {
+ if _, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc); err != nil {
t.Fatalf("CreateTask(): %v", err)
}
}
@@ -360,134 +296,100 @@ func TestTasksOffset(t *testing.T) {
},
} {
t.Run(tc.name, func(t *testing.T) {
- fd, err := vfsObj.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse("/")},
+ s := s.WithSubtest(t)
+ fd, err := s.VFS.OpenAt(
+ s.Ctx,
+ s.Creds,
+ s.PathOpAtRoot("/"),
&vfs.OpenOptions{},
)
if err != nil {
t.Fatalf("vfsfs.OpenAt(/) failed: %v", err)
}
- if _, err := fd.Impl().Seek(ctx, tc.offset, linux.SEEK_SET); err != nil {
+ if _, err := fd.Seek(s.Ctx, tc.offset, linux.SEEK_SET); err != nil {
t.Fatalf("Seek(%d, SEEK_SET): %v", tc.offset, err)
}
- cb := testIterDirentsCallback{}
- if err := fd.Impl().IterDirents(ctx, &cb); err != nil {
- t.Fatalf("IterDirents(): %v", err)
+ var collector testutil.DirentCollector
+ if err := fd.IterDirents(s.Ctx, &collector); err != nil {
+ t.Fatalf("IterDirent(): %v", err)
}
- if cb.dirents, err = checkFiles(cb.dirents, tc.wants); err != nil {
- t.Error(err.Error())
- }
- if len(cb.dirents) != 0 {
- t.Errorf("found more files than expected: %+v", cb.dirents)
+
+ expectedTypes := make(map[string]testutil.DirentType)
+ expectedOffsets := make(map[string]int64)
+ for name, want := range tc.wants {
+ expectedTypes[name] = want.Type
+ if want.NextOff != 0 {
+ expectedOffsets[name] = want.NextOff
+ }
}
+
+ collector.SkipDotsChecks(true) // We seek()ed past the dots.
+ s.AssertAllDirentTypes(&collector, expectedTypes)
+ s.AssertDirentOffsets(&collector, expectedOffsets)
})
}
}
func TestTask(t *testing.T) {
- ctx, vfsObj, root, err := setup()
- if err != nil {
- t.Fatalf("Setup failed: %v", err)
- }
- defer root.DecRef()
+ s := setup(t)
+ defer s.Destroy()
- k := kernel.KernelFromContext(ctx)
+ k := kernel.KernelFromContext(s.Ctx)
tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- _, err = createTask(ctx, "name", tc)
+ _, err := testutil.CreateTask(s.Ctx, "name", tc)
if err != nil {
t.Fatalf("CreateTask(): %v", err)
}
- fd, err := vfsObj.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse("/1")},
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt(/1) failed: %v", err)
- }
-
- cb := testIterDirentsCallback{}
- if err := fd.Impl().IterDirents(ctx, &cb); err != nil {
- t.Fatalf("IterDirents(): %v", err)
- }
- cb.dirents, err = checkDots(cb.dirents)
- if err != nil {
- t.Error(err.Error())
- }
- cb.dirents, err = checkTaskStaticFiles(cb.dirents)
- if err != nil {
- t.Error(err.Error())
- }
- if len(cb.dirents) != 0 {
- t.Errorf("found more files than expected: %+v", cb.dirents)
- }
+ collector := s.ListDirents(s.PathOpAtRoot("/1"))
+ s.AssertAllDirentTypes(collector, taskStaticFiles)
}
func TestProcSelf(t *testing.T) {
- ctx, vfsObj, root, err := setup()
- if err != nil {
- t.Fatalf("Setup failed: %v", err)
- }
- defer root.DecRef()
+ s := setup(t)
+ defer s.Destroy()
- k := kernel.KernelFromContext(ctx)
+ k := kernel.KernelFromContext(s.Ctx)
tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- task, err := createTask(ctx, "name", tc)
+ task, err := testutil.CreateTask(s.Ctx, "name", tc)
if err != nil {
t.Fatalf("CreateTask(): %v", err)
}
- fd, err := vfsObj.OpenAt(
- task,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse("/self/"), FollowFinalSymlink: true},
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt(/self/) failed: %v", err)
- }
-
- cb := testIterDirentsCallback{}
- if err := fd.Impl().IterDirents(ctx, &cb); err != nil {
- t.Fatalf("IterDirents(): %v", err)
- }
- cb.dirents, err = checkDots(cb.dirents)
- if err != nil {
- t.Error(err.Error())
- }
- cb.dirents, err = checkTaskStaticFiles(cb.dirents)
- if err != nil {
- t.Error(err.Error())
- }
- if len(cb.dirents) != 0 {
- t.Errorf("found more files than expected: %+v", cb.dirents)
- }
+ collector := s.WithTemporaryContext(task).ListDirents(&vfs.PathOperation{
+ Root: s.Root,
+ Start: s.Root,
+ Path: fspath.Parse("/self/"),
+ FollowFinalSymlink: true,
+ })
+ s.AssertAllDirentTypes(collector, taskStaticFiles)
}
-func iterateDir(ctx context.Context, t *testing.T, vfsObj *vfs.VirtualFilesystem, root vfs.VirtualDentry, fd *vfs.FileDescription) {
+func iterateDir(ctx context.Context, t *testing.T, s *testutil.System, fd *vfs.FileDescription) {
t.Logf("Iterating: /proc%s", fd.MappedName(ctx))
- cb := testIterDirentsCallback{}
- if err := fd.Impl().IterDirents(ctx, &cb); err != nil {
+ var collector testutil.DirentCollector
+ if err := fd.IterDirents(ctx, &collector); err != nil {
t.Fatalf("IterDirents(): %v", err)
}
- var err error
- cb.dirents, err = checkDots(cb.dirents)
- if err != nil {
+ if err := collector.Contains(".", linux.DT_DIR); err != nil {
t.Error(err.Error())
}
- for _, d := range cb.dirents {
+ if err := collector.Contains("..", linux.DT_DIR); err != nil {
+ t.Error(err.Error())
+ }
+
+ for _, d := range collector.Dirents() {
+ if d.Name == "." || d.Name == ".." {
+ continue
+ }
childPath := path.Join(fd.MappedName(ctx), d.Name)
if d.Type == linux.DT_LNK {
- link, err := vfsObj.ReadlinkAt(
+ link, err := s.VFS.ReadlinkAt(
ctx,
auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse(childPath)},
+ &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse(childPath)},
)
if err != nil {
t.Errorf("vfsfs.ReadlinkAt(%v) failed: %v", childPath, err)
@@ -498,10 +400,10 @@ func iterateDir(ctx context.Context, t *testing.T, vfsObj *vfs.VirtualFilesystem
}
t.Logf("Opening: /proc%s", childPath)
- child, err := vfsObj.OpenAt(
+ child, err := s.VFS.OpenAt(
ctx,
auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse(childPath)},
+ &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse(childPath)},
&vfs.OpenOptions{},
)
if err != nil {
@@ -517,24 +419,21 @@ func iterateDir(ctx context.Context, t *testing.T, vfsObj *vfs.VirtualFilesystem
}
if d.Type == linux.DT_DIR {
// Found another dir, let's do it again!
- iterateDir(ctx, t, vfsObj, root, child)
+ iterateDir(ctx, t, s, child)
}
}
}
// TestTree iterates all directories and stats every file.
func TestTree(t *testing.T) {
- uberCtx, vfsObj, root, err := setup()
- if err != nil {
- t.Fatalf("Setup failed: %v", err)
- }
- defer root.DecRef()
+ s := setup(t)
+ defer s.Destroy()
- k := kernel.KernelFromContext(uberCtx)
+ k := kernel.KernelFromContext(s.Ctx)
var tasks []*kernel.Task
for i := 0; i < 5; i++ {
tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- task, err := createTask(uberCtx, fmt.Sprintf("name-%d", i), tc)
+ task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc)
if err != nil {
t.Fatalf("CreateTask(): %v", err)
}
@@ -542,14 +441,14 @@ func TestTree(t *testing.T) {
}
ctx := tasks[0]
- fd, err := vfsObj.OpenAt(
+ fd, err := s.VFS.OpenAt(
ctx,
- auth.CredentialsFromContext(uberCtx),
- &vfs.PathOperation{Root: root, Start: root, Path: fspath.Parse("/")},
+ auth.CredentialsFromContext(s.Ctx),
+ &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse("/")},
&vfs.OpenOptions{},
)
if err != nil {
t.Fatalf("vfsfs.OpenAt(/) failed: %v", err)
}
- iterateDir(ctx, t, vfsObj, root, fd)
+ iterateDir(ctx, t, s, fd)
}
diff --git a/pkg/sentry/fsimpl/proc/version.go b/pkg/sentry/fsimpl/proc/version.go
deleted file mode 100644
index 367f2396b..000000000
--- a/pkg/sentry/fsimpl/proc/version.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
-
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
-)
-
-// versionData implements vfs.DynamicBytesSource for /proc/version.
-//
-// +stateify savable
-type versionData struct {
- kernfs.DynamicBytesFile
-
- // k is the owning Kernel.
- k *kernel.Kernel
-}
-
-var _ dynamicInode = (*versionData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (v *versionData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- init := v.k.GlobalInit()
- if init == nil {
- // Attempted to read before the init Task is created. This can
- // only occur during startup, which should never need to read
- // this file.
- panic("Attempted to read version before initial Task is available")
- }
-
- // /proc/version takes the form:
- //
- // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
- // (COMPILER_VERSION) VERSION"
- //
- // where:
- // - SYSNAME, RELEASE, and VERSION are the same as returned by
- // sys_utsname
- // - COMPILE_USER is the user that build the kernel
- // - COMPILE_HOST is the hostname of the machine on which the kernel
- // was built
- // - COMPILER_VERSION is the version reported by the building compiler
- //
- // Since we don't really want to expose build information to
- // applications, those fields are omitted.
- //
- // FIXME(mpratt): Using Version from the init task SyscallTable
- // disregards the different version a task may have (e.g., in a uts
- // namespace).
- ver := init.Leader().SyscallTable().Version
- fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)
- return nil
-}
diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD
new file mode 100644
index 000000000..66c0d8bc8
--- /dev/null
+++ b/pkg/sentry/fsimpl/sys/BUILD
@@ -0,0 +1,33 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+licenses(["notice"])
+
+go_library(
+ name = "sys",
+ srcs = [
+ "sys.go",
+ ],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/context",
+ "//pkg/sentry/fsimpl/kernfs",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/vfs",
+ "//pkg/syserror",
+ ],
+)
+
+go_test(
+ name = "sys_test",
+ srcs = ["sys_test.go"],
+ deps = [
+ ":sys",
+ "//pkg/abi/linux",
+ "//pkg/sentry/fsimpl/testutil",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/vfs",
+ "@com_github_google_go-cmp//cmp:go_default_library",
+ ],
+)
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
new file mode 100644
index 000000000..e35d52d17
--- /dev/null
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -0,0 +1,124 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package sys implements sysfs.
+package sys
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// FilesystemType implements vfs.FilesystemType.
+type FilesystemType struct{}
+
+// filesystem implements vfs.FilesystemImpl.
+type filesystem struct {
+ kernfs.Filesystem
+}
+
+// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
+func (FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+ fs := &filesystem{}
+ fs.Filesystem.Init(vfsObj)
+ k := kernel.KernelFromContext(ctx)
+ maxCPUCores := k.ApplicationCores()
+ defaultSysDirMode := linux.FileMode(0755)
+
+ root := fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{
+ "block": fs.newDir(creds, defaultSysDirMode, nil),
+ "bus": fs.newDir(creds, defaultSysDirMode, nil),
+ "class": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{
+ "power_supply": fs.newDir(creds, defaultSysDirMode, nil),
+ }),
+ "dev": fs.newDir(creds, defaultSysDirMode, nil),
+ "devices": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{
+ "system": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{
+ "cpu": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{
+ "online": fs.newCPUFile(creds, maxCPUCores, linux.FileMode(0444)),
+ "possible": fs.newCPUFile(creds, maxCPUCores, linux.FileMode(0444)),
+ "present": fs.newCPUFile(creds, maxCPUCores, linux.FileMode(0444)),
+ }),
+ }),
+ }),
+ "firmware": fs.newDir(creds, defaultSysDirMode, nil),
+ "fs": fs.newDir(creds, defaultSysDirMode, nil),
+ "kernel": fs.newDir(creds, defaultSysDirMode, nil),
+ "module": fs.newDir(creds, defaultSysDirMode, nil),
+ "power": fs.newDir(creds, defaultSysDirMode, nil),
+ })
+ return fs.VFSFilesystem(), root.VFSDentry(), nil
+}
+
+// dir implements kernfs.Inode.
+type dir struct {
+ kernfs.InodeAttrs
+ kernfs.InodeNoDynamicLookup
+ kernfs.InodeNotSymlink
+ kernfs.InodeDirectoryNoNewChildren
+
+ kernfs.OrderedChildren
+ dentry kernfs.Dentry
+}
+
+func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry {
+ d := &dir{}
+ d.InodeAttrs.Init(creds, fs.NextIno(), linux.ModeDirectory|0755)
+ d.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ d.dentry.Init(d)
+
+ d.IncLinks(d.OrderedChildren.Populate(&d.dentry, contents))
+
+ return &d.dentry
+}
+
+// SetStat implements kernfs.Inode.SetStat.
+func (d *dir) SetStat(fs *vfs.Filesystem, opts vfs.SetStatOptions) error {
+ return syserror.EPERM
+}
+
+// Open implements kernfs.Inode.Open.
+func (d *dir) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
+ fd := &kernfs.GenericDirectoryFD{}
+ fd.Init(rp.Mount(), vfsd, &d.OrderedChildren, flags)
+ return fd.VFSFileDescription(), nil
+}
+
+// cpuFile implements kernfs.Inode.
+type cpuFile struct {
+ kernfs.DynamicBytesFile
+ maxCores uint
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (c *cpuFile) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "0-%d", c.maxCores-1)
+ return nil
+}
+
+func (fs *filesystem) newCPUFile(creds *auth.Credentials, maxCores uint, mode linux.FileMode) *kernfs.Dentry {
+ c := &cpuFile{maxCores: maxCores}
+ c.DynamicBytesFile.Init(creds, fs.NextIno(), c, mode)
+ d := &kernfs.Dentry{}
+ d.Init(c)
+ return d
+}
diff --git a/pkg/sentry/fsimpl/sys/sys_test.go b/pkg/sentry/fsimpl/sys/sys_test.go
new file mode 100644
index 000000000..8b1cf0bd0
--- /dev/null
+++ b/pkg/sentry/fsimpl/sys/sys_test.go
@@ -0,0 +1,90 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sys_test
+
+import (
+ "fmt"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+func newTestSystem(t *testing.T) *testutil.System {
+ k, err := testutil.Boot()
+ if err != nil {
+ t.Fatalf("Failed to create test kernel: %v", err)
+ }
+ ctx := k.SupervisorContext()
+ creds := auth.CredentialsFromContext(ctx)
+ v := vfs.New()
+ v.MustRegisterFilesystemType("sysfs", sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ })
+
+ mns, err := v.NewMountNamespace(ctx, creds, "", "sysfs", &vfs.GetFilesystemOptions{})
+ if err != nil {
+ t.Fatalf("Failed to create new mount namespace: %v", err)
+ }
+ return testutil.NewSystem(ctx, t, v, mns)
+}
+
+func TestReadCPUFile(t *testing.T) {
+ s := newTestSystem(t)
+ defer s.Destroy()
+ k := kernel.KernelFromContext(s.Ctx)
+ maxCPUCores := k.ApplicationCores()
+
+ expected := fmt.Sprintf("0-%d", maxCPUCores-1)
+
+ for _, fname := range []string{"online", "possible", "present"} {
+ pop := s.PathOpAtRoot(fmt.Sprintf("devices/system/cpu/%s", fname))
+ fd, err := s.VFS.OpenAt(s.Ctx, s.Creds, pop, &vfs.OpenOptions{})
+ if err != nil {
+ t.Fatalf("OpenAt(pop:%+v) = %+v failed: %v", pop, fd, err)
+ }
+ defer fd.DecRef()
+ content, err := s.ReadToEnd(fd)
+ if err != nil {
+ t.Fatalf("Read failed: %v", err)
+ }
+ if diff := cmp.Diff(expected, content); diff != "" {
+ t.Fatalf("Read returned unexpected data:\n--- want\n+++ got\n%v", diff)
+ }
+ }
+}
+
+func TestSysRootContainsExpectedEntries(t *testing.T) {
+ s := newTestSystem(t)
+ defer s.Destroy()
+ pop := s.PathOpAtRoot("/")
+ s.AssertAllDirentTypes(s.ListDirents(pop), map[string]testutil.DirentType{
+ "block": linux.DT_DIR,
+ "bus": linux.DT_DIR,
+ "class": linux.DT_DIR,
+ "dev": linux.DT_DIR,
+ "devices": linux.DT_DIR,
+ "firmware": linux.DT_DIR,
+ "fs": linux.DT_DIR,
+ "kernel": linux.DT_DIR,
+ "module": linux.DT_DIR,
+ "power": linux.DT_DIR,
+ })
+}
diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD
new file mode 100644
index 000000000..efd5974c4
--- /dev/null
+++ b/pkg/sentry/fsimpl/testutil/BUILD
@@ -0,0 +1,35 @@
+load("//tools:defs.bzl", "go_library")
+
+licenses(["notice"])
+
+go_library(
+ name = "testutil",
+ testonly = 1,
+ srcs = [
+ "kernel.go",
+ "testutil.go",
+ ],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/context",
+ "//pkg/cpuid",
+ "//pkg/fspath",
+ "//pkg/memutil",
+ "//pkg/sentry/fs",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/sched",
+ "//pkg/sentry/limits",
+ "//pkg/sentry/loader",
+ "//pkg/sentry/pgalloc",
+ "//pkg/sentry/platform",
+ "//pkg/sentry/platform/kvm",
+ "//pkg/sentry/platform/ptrace",
+ "//pkg/sentry/time",
+ "//pkg/sentry/vfs",
+ "//pkg/sync",
+ "//pkg/usermem",
+ "@com_github_google_go-cmp//cmp:go_default_library",
+ ],
+)
diff --git a/pkg/sentry/fsimpl/proc/boot_test.go b/pkg/sentry/fsimpl/testutil/kernel.go
index 84a93ee56..89f8c4915 100644
--- a/pkg/sentry/fsimpl/proc/boot_test.go
+++ b/pkg/sentry/fsimpl/testutil/kernel.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package proc
+package testutil
import (
"flag"
@@ -21,9 +21,9 @@ import (
"runtime"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/memutil"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -43,8 +43,8 @@ var (
platformFlag = flag.String("platform", "ptrace", "specify which platform to use")
)
-// boot initializes a new bare bones kernel for test.
-func boot() (*kernel.Kernel, error) {
+// Boot initializes a new bare bones kernel for test.
+func Boot() (*kernel.Kernel, error) {
platformCtr, err := platform.Lookup(*platformFlag)
if err != nil {
return nil, fmt.Errorf("platform not found: %v", err)
@@ -117,8 +117,8 @@ func boot() (*kernel.Kernel, error) {
return k, nil
}
-// createTask creates a new bare bones task for tests.
-func createTask(ctx context.Context, name string, tc *kernel.ThreadGroup) (*kernel.Task, error) {
+// CreateTask creates a new bare bones task for tests.
+func CreateTask(ctx context.Context, name string, tc *kernel.ThreadGroup) (*kernel.Task, error) {
k := kernel.KernelFromContext(ctx)
config := &kernel.TaskConfig{
Kernel: k,
diff --git a/pkg/sentry/fsimpl/testutil/testutil.go b/pkg/sentry/fsimpl/testutil/testutil.go
new file mode 100644
index 000000000..1c98335c1
--- /dev/null
+++ b/pkg/sentry/fsimpl/testutil/testutil.go
@@ -0,0 +1,281 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package testutil provides common test utilities for kernfs-based
+// filesystems.
+package testutil
+
+import (
+ "fmt"
+ "io"
+ "strings"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// System represents the context for a single test.
+//
+// Test systems must be explicitly destroyed with System.Destroy.
+type System struct {
+ t *testing.T
+ Ctx context.Context
+ Creds *auth.Credentials
+ VFS *vfs.VirtualFilesystem
+ Root vfs.VirtualDentry
+ mns *vfs.MountNamespace
+}
+
+// NewSystem constructs a System.
+//
+// Precondition: Caller must hold a reference on mns, whose ownership
+// is transferred to the new System.
+func NewSystem(ctx context.Context, t *testing.T, v *vfs.VirtualFilesystem, mns *vfs.MountNamespace) *System {
+ s := &System{
+ t: t,
+ Ctx: ctx,
+ Creds: auth.CredentialsFromContext(ctx),
+ VFS: v,
+ mns: mns,
+ Root: mns.Root(),
+ }
+ return s
+}
+
+// WithSubtest creates a temporary test system with a new test harness,
+// referencing all other resources from the original system. This is useful when
+// a system is reused for multiple subtests, and the T needs to change for each
+// case. Note that this is safe when test cases run in parallel, as all
+// resources referenced by the system are immutable, or handle interior
+// mutations in a thread-safe manner.
+//
+// The returned system must not outlive the original and should not be destroyed
+// via System.Destroy.
+func (s *System) WithSubtest(t *testing.T) *System {
+ return &System{
+ t: t,
+ Ctx: s.Ctx,
+ Creds: s.Creds,
+ VFS: s.VFS,
+ mns: s.mns,
+ Root: s.Root,
+ }
+}
+
+// WithTemporaryContext constructs a temporary test system with a new context
+// ctx. The temporary system borrows all resources and references from the
+// original system. The returned temporary system must not outlive the original
+// system, and should not be destroyed via System.Destroy.
+func (s *System) WithTemporaryContext(ctx context.Context) *System {
+ return &System{
+ t: s.t,
+ Ctx: ctx,
+ Creds: s.Creds,
+ VFS: s.VFS,
+ mns: s.mns,
+ Root: s.Root,
+ }
+}
+
+// Destroy release resources associated with a test system.
+func (s *System) Destroy() {
+ s.Root.DecRef()
+ s.mns.DecRef(s.VFS) // Reference on mns passed to NewSystem.
+}
+
+// ReadToEnd reads the contents of fd until EOF to a string.
+func (s *System) ReadToEnd(fd *vfs.FileDescription) (string, error) {
+ buf := make([]byte, usermem.PageSize)
+ bufIOSeq := usermem.BytesIOSequence(buf)
+ opts := vfs.ReadOptions{}
+
+ var content strings.Builder
+ for {
+ n, err := fd.Read(s.Ctx, bufIOSeq, opts)
+ if n == 0 || err != nil {
+ if err == io.EOF {
+ err = nil
+ }
+ return content.String(), err
+ }
+ content.Write(buf[:n])
+ }
+}
+
+// PathOpAtRoot constructs a PathOperation with the given path from
+// the root of the filesystem.
+func (s *System) PathOpAtRoot(path string) *vfs.PathOperation {
+ return &vfs.PathOperation{
+ Root: s.Root,
+ Start: s.Root,
+ Path: fspath.Parse(path),
+ }
+}
+
+// GetDentryOrDie attempts to resolve a dentry referred to by the
+// provided path operation. If unsuccessful, the test fails.
+func (s *System) GetDentryOrDie(pop *vfs.PathOperation) vfs.VirtualDentry {
+ vd, err := s.VFS.GetDentryAt(s.Ctx, s.Creds, pop, &vfs.GetDentryOptions{})
+ if err != nil {
+ s.t.Fatalf("GetDentryAt(pop:%+v) failed: %v", pop, err)
+ }
+ return vd
+}
+
+// DirentType is an alias for values for linux_dirent64.d_type.
+type DirentType = uint8
+
+// ListDirents lists the Dirents for a directory at pop.
+func (s *System) ListDirents(pop *vfs.PathOperation) *DirentCollector {
+ fd, err := s.VFS.OpenAt(s.Ctx, s.Creds, pop, &vfs.OpenOptions{Flags: linux.O_RDONLY})
+ if err != nil {
+ s.t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err)
+ }
+ defer fd.DecRef()
+
+ collector := &DirentCollector{}
+ if err := fd.IterDirents(s.Ctx, collector); err != nil {
+ s.t.Fatalf("IterDirent failed: %v", err)
+ }
+ return collector
+}
+
+// AssertAllDirentTypes verifies that the set of dirents in collector contains
+// exactly the specified set of expected entries. AssertAllDirentTypes respects
+// collector.skipDots, and implicitly checks for "." and ".." accordingly.
+func (s *System) AssertAllDirentTypes(collector *DirentCollector, expected map[string]DirentType) {
+ // Also implicitly check for "." and "..", if enabled.
+ if !collector.skipDots {
+ expected["."] = linux.DT_DIR
+ expected[".."] = linux.DT_DIR
+ }
+
+ dentryTypes := make(map[string]DirentType)
+ collector.mu.Lock()
+ for _, dirent := range collector.dirents {
+ dentryTypes[dirent.Name] = dirent.Type
+ }
+ collector.mu.Unlock()
+ if diff := cmp.Diff(expected, dentryTypes); diff != "" {
+ s.t.Fatalf("IterDirent had unexpected results:\n--- want\n+++ got\n%v", diff)
+ }
+}
+
+// AssertDirentOffsets verifies that collector contains at least the entries
+// specified in expected, with the given NextOff field. Entries specified in
+// expected but missing from collector result in failure. Extra entries in
+// collector are ignored. AssertDirentOffsets respects collector.skipDots, and
+// implicitly checks for "." and ".." accordingly.
+func (s *System) AssertDirentOffsets(collector *DirentCollector, expected map[string]int64) {
+ // Also implicitly check for "." and "..", if enabled.
+ if !collector.skipDots {
+ expected["."] = 1
+ expected[".."] = 2
+ }
+
+ dentryNextOffs := make(map[string]int64)
+ collector.mu.Lock()
+ for _, dirent := range collector.dirents {
+ // Ignore extra entries in dentries that are not in expected.
+ if _, ok := expected[dirent.Name]; ok {
+ dentryNextOffs[dirent.Name] = dirent.NextOff
+ }
+ }
+ collector.mu.Unlock()
+ if diff := cmp.Diff(expected, dentryNextOffs); diff != "" {
+ s.t.Fatalf("IterDirent had unexpected results:\n--- want\n+++ got\n%v", diff)
+ }
+}
+
+// DirentCollector provides an implementation for vfs.IterDirentsCallback for
+// testing. It simply iterates to the end of a given directory FD and collects
+// all dirents emitted by the callback.
+type DirentCollector struct {
+ mu sync.Mutex
+ order []*vfs.Dirent
+ dirents map[string]*vfs.Dirent
+ // When the collector is used in various Assert* functions, should "." and
+ // ".." be implicitly checked?
+ skipDots bool
+}
+
+// SkipDotsChecks enables or disables the implicit checks on "." and ".." when
+// the collector is used in various Assert* functions. Note that "." and ".."
+// are still collected if passed to d.Handle, so the caller should only disable
+// the checks when they aren't expected.
+func (d *DirentCollector) SkipDotsChecks(value bool) {
+ d.skipDots = value
+}
+
+// Handle implements vfs.IterDirentsCallback.Handle.
+func (d *DirentCollector) Handle(dirent vfs.Dirent) bool {
+ d.mu.Lock()
+ if d.dirents == nil {
+ d.dirents = make(map[string]*vfs.Dirent)
+ }
+ d.order = append(d.order, &dirent)
+ d.dirents[dirent.Name] = &dirent
+ d.mu.Unlock()
+ return true
+}
+
+// Count returns the number of dirents currently in the collector.
+func (d *DirentCollector) Count() int {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ return len(d.dirents)
+}
+
+// Contains checks whether the collector has a dirent with the given name and
+// type.
+func (d *DirentCollector) Contains(name string, typ uint8) error {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ dirent, ok := d.dirents[name]
+ if !ok {
+ return fmt.Errorf("No dirent named %q found", name)
+ }
+ if dirent.Type != typ {
+ return fmt.Errorf("Dirent named %q found, but was expecting type %s, got: %+v", name, linux.DirentType.Parse(uint64(typ)), dirent)
+ }
+ return nil
+}
+
+// Dirents returns all dirents discovered by this collector.
+func (d *DirentCollector) Dirents() map[string]*vfs.Dirent {
+ d.mu.Lock()
+ dirents := make(map[string]*vfs.Dirent)
+ for n, d := range d.dirents {
+ dirents[n] = d
+ }
+ d.mu.Unlock()
+ return dirents
+}
+
+// OrderedDirents returns an ordered list of dirents as discovered by this
+// collector.
+func (d *DirentCollector) OrderedDirents() []*vfs.Dirent {
+ d.mu.Lock()
+ dirents := make([]*vfs.Dirent, len(d.order))
+ copy(dirents, d.order)
+ d.mu.Unlock()
+ return dirents
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index 82f5c2f41..fb436860c 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -1,8 +1,7 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
load("//tools/go_generics:defs.bzl", "go_template_instance")
-package(licenses = ["notice"])
+licenses(["notice"])
go_template_instance(
name = "dentry_list",
@@ -20,6 +19,7 @@ go_library(
name = "tmpfs",
srcs = [
"dentry_list.go",
+ "device_file.go",
"directory.go",
"filesystem.go",
"named_pipe.go",
@@ -27,28 +27,28 @@ go_library(
"symlink.go",
"tmpfs.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs",
deps = [
"//pkg/abi/linux",
"//pkg/amutex",
+ "//pkg/context",
"//pkg/fspath",
"//pkg/log",
+ "//pkg/safemem",
"//pkg/sentry/arch",
- "//pkg/sentry/context",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/pipe",
+ "//pkg/sentry/kernel/time",
"//pkg/sentry/memmap",
"//pkg/sentry/pgalloc",
"//pkg/sentry/platform",
- "//pkg/sentry/safemem",
"//pkg/sentry/usage",
- "//pkg/sentry/usermem",
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/syserror",
+ "//pkg/usermem",
],
)
@@ -59,10 +59,10 @@ go_test(
deps = [
":tmpfs",
"//pkg/abi/linux",
+ "//pkg/context",
"//pkg/fspath",
"//pkg/refs",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
+ "//pkg/sentry/contexttest",
"//pkg/sentry/fs",
"//pkg/sentry/fs/tmpfs",
"//pkg/sentry/kernel/auth",
@@ -77,17 +77,18 @@ go_test(
srcs = [
"pipe_test.go",
"regular_file_test.go",
+ "stat_test.go",
],
- embed = [":tmpfs"],
+ library = ":tmpfs",
deps = [
"//pkg/abi/linux",
+ "//pkg/context",
"//pkg/fspath",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
+ "//pkg/sentry/contexttest",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/contexttest",
- "//pkg/sentry/usermem",
"//pkg/sentry/vfs",
"//pkg/syserror",
+ "//pkg/usermem",
],
)
diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
index d88c83499..54241c8e8 100644
--- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
@@ -21,10 +21,10 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/refs"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs"
_ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
diff --git a/pkg/sentry/fsimpl/tmpfs/device_file.go b/pkg/sentry/fsimpl/tmpfs/device_file.go
new file mode 100644
index 000000000..84b181b90
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/device_file.go
@@ -0,0 +1,39 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+type deviceFile struct {
+ inode inode
+ kind vfs.DeviceKind
+ major uint32
+ minor uint32
+}
+
+func (fs *filesystem) newDeviceFile(creds *auth.Credentials, mode linux.FileMode, kind vfs.DeviceKind, major, minor uint32) *inode {
+ file := &deviceFile{
+ kind: kind,
+ major: major,
+ minor: minor,
+ }
+ file.inode.init(file, fs, creds, mode)
+ file.inode.nlink = 1 // from parent directory
+ return &file.inode
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go
index 887ca2619..dc0d27cf9 100644
--- a/pkg/sentry/fsimpl/tmpfs/directory.go
+++ b/pkg/sentry/fsimpl/tmpfs/directory.go
@@ -16,7 +16,7 @@ package tmpfs
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 26979729e..5ee9cf1e9 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -19,8 +19,8 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -56,7 +56,8 @@ afterSymlink:
}
next := nextVFSD.Impl().(*dentry)
if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
- // TODO: symlink traversals update access time
+ // TODO(gvisor.dev/issues/1197): Symlink traversals updates
+ // access time.
if err := rp.HandleSymlink(symlink.target); err != nil {
return nil, err
}
@@ -227,23 +228,26 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
// MknodAt implements vfs.FilesystemImpl.MknodAt.
func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
+ var childInode *inode
switch opts.Mode.FileType() {
case 0, linux.S_IFREG:
- child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
- parent.vfsd.InsertChild(&child.vfsd, name)
- parent.inode.impl.(*directory).childList.PushBack(child)
- return nil
+ childInode = fs.newRegularFile(rp.Credentials(), opts.Mode)
case linux.S_IFIFO:
- child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode))
- parent.vfsd.InsertChild(&child.vfsd, name)
- parent.inode.impl.(*directory).childList.PushBack(child)
- return nil
- case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK:
+ childInode = fs.newNamedPipe(rp.Credentials(), opts.Mode)
+ case linux.S_IFBLK:
+ childInode = fs.newDeviceFile(rp.Credentials(), opts.Mode, vfs.BlockDevice, opts.DevMajor, opts.DevMinor)
+ case linux.S_IFCHR:
+ childInode = fs.newDeviceFile(rp.Credentials(), opts.Mode, vfs.CharDevice, opts.DevMajor, opts.DevMinor)
+ case linux.S_IFSOCK:
// Not yet supported.
return syserror.EPERM
default:
return syserror.EINVAL
}
+ child := fs.newDentry(childInode)
+ parent.vfsd.InsertChild(&child.vfsd, name)
+ parent.inode.impl.(*directory).childList.PushBack(child)
+ return nil
})
}
@@ -263,7 +267,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if err != nil {
return nil, err
}
- return d.open(ctx, rp, opts.Flags, false /* afterCreate */)
+ return d.open(ctx, rp, &opts, false /* afterCreate */)
}
mustCreate := opts.Flags&linux.O_EXCL != 0
@@ -278,7 +282,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if mustCreate {
return nil, syserror.EEXIST
}
- return start.open(ctx, rp, opts.Flags, false /* afterCreate */)
+ return start.open(ctx, rp, &opts, false /* afterCreate */)
}
afterTrailingSymlink:
parent, err := walkParentDirLocked(rp, start)
@@ -312,7 +316,7 @@ afterTrailingSymlink:
child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
parent.vfsd.InsertChild(&child.vfsd, name)
parent.inode.impl.(*directory).childList.PushBack(child)
- return child.open(ctx, rp, opts.Flags, true)
+ return child.open(ctx, rp, &opts, true)
}
if err != nil {
return nil, err
@@ -326,30 +330,23 @@ afterTrailingSymlink:
if mustCreate {
return nil, syserror.EEXIST
}
- return child.open(ctx, rp, opts.Flags, false)
+ return child.open(ctx, rp, &opts, false)
}
-func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) {
- ats := vfs.AccessTypesForOpenFlags(flags)
+func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, afterCreate bool) (*vfs.FileDescription, error) {
+ ats := vfs.AccessTypesForOpenFlags(opts.Flags)
if !afterCreate {
if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil {
return nil, err
}
}
- mnt := rp.Mount()
switch impl := d.inode.impl.(type) {
case *regularFile:
var fd regularFileFD
- fd.readable = vfs.MayReadFileWithOpenFlags(flags)
- fd.writable = vfs.MayWriteFileWithOpenFlags(flags)
- if fd.writable {
- if err := mnt.CheckBeginWrite(); err != nil {
- return nil, err
- }
- // mnt.EndWrite() is called by regularFileFD.Release().
+ if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
}
- fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
- if flags&linux.O_TRUNC != 0 {
+ if opts.Flags&linux.O_TRUNC != 0 {
impl.mu.Lock()
impl.data.Truncate(0, impl.memFile)
atomic.StoreUint64(&impl.size, 0)
@@ -362,13 +359,17 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32,
return nil, syserror.EISDIR
}
var fd directoryFD
- fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
+ if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
return &fd.vfsfd, nil
case *symlink:
// Can't open symlinks without O_PATH (which is unimplemented).
return nil, syserror.ELOOP
case *namedPipe:
- return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags)
+ return newNamedPipeFD(ctx, impl, rp, &d.vfsd, opts.Flags)
+ case *deviceFile:
+ return rp.VirtualFilesystem().OpenDeviceSpecialFile(ctx, rp.Mount(), &d.vfsd, impl.kind, impl.major, impl.minor, opts)
default:
panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl))
}
@@ -501,7 +502,8 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
oldParent.inode.decLinksLocked()
newParent.inode.incLinksLocked()
}
- // TODO: update timestamps and parent directory sizes
+ // TODO(gvisor.dev/issues/1197): Update timestamps and parent directory
+ // sizes.
vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD)
return nil
}
@@ -555,15 +557,11 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
fs.mu.RLock()
defer fs.mu.RUnlock()
- _, err := resolveLocked(rp)
+ d, err := resolveLocked(rp)
if err != nil {
return err
}
- if opts.Stat.Mask == 0 {
- return nil
- }
- // TODO: implement inode.setStat
- return syserror.EPERM
+ return d.inode.setStat(opts.Stat)
}
// StatAt implements vfs.FilesystemImpl.StatAt.
@@ -587,7 +585,7 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu
if err != nil {
return linux.Statfs{}, err
}
- // TODO: actually implement statfs
+ // TODO(gvisor.dev/issues/1197): Actually implement statfs.
return linux.Statfs{}, syserror.ENOSYS
}
diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go
index 40bde54de..0c57fdca3 100644
--- a/pkg/sentry/fsimpl/tmpfs/named_pipe.go
+++ b/pkg/sentry/fsimpl/tmpfs/named_pipe.go
@@ -16,11 +16,11 @@ package tmpfs
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/usermem"
)
type namedPipe struct {
@@ -50,11 +50,10 @@ type namedPipeFD struct {
func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
var err error
var fd namedPipeFD
- fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, rp, vfsd, &fd.vfsfd, flags)
+ fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, vfsd, &fd.vfsfd, flags)
if err != nil {
return nil, err
}
- mnt := rp.Mount()
- fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{})
+ fd.vfsfd.Init(&fd, flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{})
return &fd.vfsfd, nil
}
diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
index 70b42a6ec..5ee7f2a72 100644
--- a/pkg/sentry/fsimpl/tmpfs/pipe_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
@@ -19,13 +19,13 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
const fileName = "mypipe"
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index f200e767d..e9e6faf67 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -20,17 +20,17 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
- "gvisor.dev/gvisor/pkg/sentry/safemem"
"gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
type regularFile struct {
@@ -63,13 +63,44 @@ func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMod
return &file.inode
}
+// truncate grows or shrinks the file to the given size. It returns true if the
+// file size was updated.
+func (rf *regularFile) truncate(size uint64) (bool, error) {
+ rf.mu.Lock()
+ defer rf.mu.Unlock()
+
+ if size == rf.size {
+ // Nothing to do.
+ return false, nil
+ }
+
+ if size > rf.size {
+ // Growing the file.
+ if rf.seals&linux.F_SEAL_GROW != 0 {
+ // Seal does not allow growth.
+ return false, syserror.EPERM
+ }
+ rf.size = size
+ return true, nil
+ }
+
+ // Shrinking the file
+ if rf.seals&linux.F_SEAL_SHRINK != 0 {
+ // Seal does not allow shrink.
+ return false, syserror.EPERM
+ }
+
+ // TODO(gvisor.dev/issues/1197): Invalidate mappings once we have
+ // mappings.
+
+ rf.data.Truncate(size, rf.memFile)
+ rf.size = size
+ return true, nil
+}
+
type regularFileFD struct {
fileDescription
- // These are immutable.
- readable bool
- writable bool
-
// off is the file offset. off is accessed using atomic memory operations.
// offMu serializes operations that may mutate off.
off int64
@@ -78,16 +109,11 @@ type regularFileFD struct {
// Release implements vfs.FileDescriptionImpl.Release.
func (fd *regularFileFD) Release() {
- if fd.writable {
- fd.vfsfd.VirtualDentry().Mount().EndWrite()
- }
+ // noop
}
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- if !fd.readable {
- return 0, syserror.EINVAL
- }
if offset < 0 {
return 0, syserror.EINVAL
}
@@ -112,9 +138,6 @@ func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- if !fd.writable {
- return 0, syserror.EINVAL
- }
if offset < 0 {
return 0, syserror.EINVAL
}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
index 3731c5b6f..32552e261 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
@@ -18,21 +18,24 @@ import (
"bytes"
"fmt"
"io"
+ "sync/atomic"
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/usermem"
)
-// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If
-// the returned err is not nil, then cleanup should be called when the FD is no
-// longer needed.
-func newFileFD(ctx context.Context, filename string) (*vfs.FileDescription, func(), error) {
+// nextFileID is used to generate unique file names.
+var nextFileID int64
+
+// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error
+// is not nil, then cleanup should be called when the root is no longer needed.
+func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) {
creds := auth.CredentialsFromContext(ctx)
vfsObj := vfs.New()
@@ -41,36 +44,124 @@ func newFileFD(ctx context.Context, filename string) (*vfs.FileDescription, func
})
mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
if err != nil {
- return nil, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err)
+ return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err)
}
root := mntns.Root()
+ return vfsObj, root, func() {
+ root.DecRef()
+ mntns.DecRef(vfsObj)
+ }, nil
+}
+
+// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If
+// the returned err is not nil, then cleanup should be called when the FD is no
+// longer needed.
+func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+ vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1))
// Create the file that will be write/read.
fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(filename),
- FollowFinalSymlink: true,
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(filename),
}, &vfs.OpenOptions{
Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
- Mode: 0644,
+ Mode: linux.ModeRegular | mode,
})
if err != nil {
- root.DecRef()
- mntns.DecRef(vfsObj)
+ cleanup()
return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err)
}
- return fd, func() {
- root.DecRef()
- mntns.DecRef(vfsObj)
- }, nil
+ return fd, cleanup, nil
+}
+
+// newDirFD is like newFileFD, but for directories.
+func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+ vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1))
+
+ // Create the dir.
+ if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(dirname),
+ }, &vfs.MkdirOptions{
+ Mode: linux.ModeDirectory | mode,
+ }); err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err)
+ }
+
+ // Open the dir and return it.
+ fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(dirname),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY | linux.O_DIRECTORY,
+ })
+ if err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err)
+ }
+
+ return fd, cleanup, nil
+}
+
+// newPipeFD is like newFileFD, but for pipes.
+func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+ vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ pipename := fmt.Sprintf("tmpfs-test-pipe-%d", atomic.AddInt64(&nextFileID, 1))
+
+ // Create the pipe.
+ if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(pipename),
+ }, &vfs.MknodOptions{
+ Mode: linux.ModeNamedPipe | mode,
+ }); err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to create pipe %q: %v", pipename, err)
+ }
+
+ // Open the pipe and return it.
+ fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(pipename),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDWR,
+ })
+ if err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to open pipe %q: %v", pipename, err)
+ }
+
+ return fd, cleanup, nil
}
// Test that we can write some data to a file and read it back.`
func TestSimpleWriteRead(t *testing.T) {
ctx := contexttest.Context(t)
- fd, cleanup, err := newFileFD(ctx, "simpleReadWrite")
+ fd, cleanup, err := newFileFD(ctx, 0644)
if err != nil {
t.Fatal(err)
}
@@ -116,7 +207,7 @@ func TestSimpleWriteRead(t *testing.T) {
func TestPWrite(t *testing.T) {
ctx := contexttest.Context(t)
- fd, cleanup, err := newFileFD(ctx, "PRead")
+ fd, cleanup, err := newFileFD(ctx, 0644)
if err != nil {
t.Fatal(err)
}
@@ -171,7 +262,7 @@ func TestPWrite(t *testing.T) {
func TestPRead(t *testing.T) {
ctx := contexttest.Context(t)
- fd, cleanup, err := newFileFD(ctx, "PRead")
+ fd, cleanup, err := newFileFD(ctx, 0644)
if err != nil {
t.Fatal(err)
}
@@ -222,3 +313,124 @@ func TestPRead(t *testing.T) {
}
}
}
+
+func TestTruncate(t *testing.T) {
+ ctx := contexttest.Context(t)
+ fd, cleanup, err := newFileFD(ctx, 0644)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer cleanup()
+
+ // Fill the file with some data.
+ data := bytes.Repeat([]byte("gVisor is awsome"), 100)
+ written, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{})
+ if err != nil {
+ t.Fatalf("fd.Write failed: %v", err)
+ }
+
+ // Size should be same as written.
+ sizeStatOpts := vfs.StatOptions{Mask: linux.STATX_SIZE}
+ stat, err := fd.Stat(ctx, sizeStatOpts)
+ if err != nil {
+ t.Fatalf("fd.Stat failed: %v", err)
+ }
+ if got, want := int64(stat.Size), written; got != want {
+ t.Errorf("fd.Stat got size %d, want %d", got, want)
+ }
+
+ // Truncate down.
+ newSize := uint64(10)
+ if err := fd.SetStat(ctx, vfs.SetStatOptions{
+ Stat: linux.Statx{
+ Mask: linux.STATX_SIZE,
+ Size: newSize,
+ },
+ }); err != nil {
+ t.Errorf("fd.Truncate failed: %v", err)
+ }
+ // Size should be updated.
+ statAfterTruncateDown, err := fd.Stat(ctx, sizeStatOpts)
+ if err != nil {
+ t.Fatalf("fd.Stat failed: %v", err)
+ }
+ if got, want := statAfterTruncateDown.Size, newSize; got != want {
+ t.Errorf("fd.Stat got size %d, want %d", got, want)
+ }
+ // We should only read newSize worth of data.
+ buf := make([]byte, 1000)
+ if n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), 0, vfs.ReadOptions{}); err != nil && err != io.EOF {
+ t.Fatalf("fd.PRead failed: %v", err)
+ } else if uint64(n) != newSize {
+ t.Errorf("fd.PRead got size %d, want %d", n, newSize)
+ }
+ // Mtime and Ctime should be bumped.
+ if got := statAfterTruncateDown.Mtime.ToNsec(); got <= stat.Mtime.ToNsec() {
+ t.Errorf("fd.Stat got Mtime %v, want > %v", got, stat.Mtime)
+ }
+ if got := statAfterTruncateDown.Ctime.ToNsec(); got <= stat.Ctime.ToNsec() {
+ t.Errorf("fd.Stat got Ctime %v, want > %v", got, stat.Ctime)
+ }
+
+ // Truncate up.
+ newSize = 100
+ if err := fd.SetStat(ctx, vfs.SetStatOptions{
+ Stat: linux.Statx{
+ Mask: linux.STATX_SIZE,
+ Size: newSize,
+ },
+ }); err != nil {
+ t.Errorf("fd.Truncate failed: %v", err)
+ }
+ // Size should be updated.
+ statAfterTruncateUp, err := fd.Stat(ctx, sizeStatOpts)
+ if err != nil {
+ t.Fatalf("fd.Stat failed: %v", err)
+ }
+ if got, want := statAfterTruncateUp.Size, newSize; got != want {
+ t.Errorf("fd.Stat got size %d, want %d", got, want)
+ }
+ // We should read newSize worth of data.
+ buf = make([]byte, 1000)
+ if n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), 0, vfs.ReadOptions{}); err != nil && err != io.EOF {
+ t.Fatalf("fd.PRead failed: %v", err)
+ } else if uint64(n) != newSize {
+ t.Errorf("fd.PRead got size %d, want %d", n, newSize)
+ }
+ // Bytes should be null after 10, since we previously truncated to 10.
+ for i := uint64(10); i < newSize; i++ {
+ if buf[i] != 0 {
+ t.Errorf("fd.PRead got byte %d=%x, want 0", i, buf[i])
+ break
+ }
+ }
+ // Mtime and Ctime should be bumped.
+ if got := statAfterTruncateUp.Mtime.ToNsec(); got <= statAfterTruncateDown.Mtime.ToNsec() {
+ t.Errorf("fd.Stat got Mtime %v, want > %v", got, statAfterTruncateDown.Mtime)
+ }
+ if got := statAfterTruncateUp.Ctime.ToNsec(); got <= statAfterTruncateDown.Ctime.ToNsec() {
+ t.Errorf("fd.Stat got Ctime %v, want > %v", got, stat.Ctime)
+ }
+
+ // Truncate to the current size.
+ newSize = statAfterTruncateUp.Size
+ if err := fd.SetStat(ctx, vfs.SetStatOptions{
+ Stat: linux.Statx{
+ Mask: linux.STATX_SIZE,
+ Size: newSize,
+ },
+ }); err != nil {
+ t.Errorf("fd.Truncate failed: %v", err)
+ }
+ statAfterTruncateNoop, err := fd.Stat(ctx, sizeStatOpts)
+ if err != nil {
+ t.Fatalf("fd.Stat failed: %v", err)
+ }
+ // Mtime and Ctime should not be bumped, since operation is a noop.
+ if got := statAfterTruncateNoop.Mtime.ToNsec(); got != statAfterTruncateUp.Mtime.ToNsec() {
+ t.Errorf("fd.Stat got Mtime %v, want %v", got, statAfterTruncateUp.Mtime)
+ }
+ if got := statAfterTruncateNoop.Ctime.ToNsec(); got != statAfterTruncateUp.Ctime.ToNsec() {
+ t.Errorf("fd.Stat got Ctime %v, want %v", got, statAfterTruncateUp.Ctime)
+ }
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go
new file mode 100644
index 000000000..ebe035dee
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go
@@ -0,0 +1,232 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+ "fmt"
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+func TestStatAfterCreate(t *testing.T) {
+ ctx := contexttest.Context(t)
+ mode := linux.FileMode(0644)
+
+ // Run with different file types.
+ // TODO(gvisor.dev/issues/1197): Also test symlinks and sockets.
+ for _, typ := range []string{"file", "dir", "pipe"} {
+ t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) {
+ var (
+ fd *vfs.FileDescription
+ cleanup func()
+ err error
+ )
+ switch typ {
+ case "file":
+ fd, cleanup, err = newFileFD(ctx, mode)
+ case "dir":
+ fd, cleanup, err = newDirFD(ctx, mode)
+ case "pipe":
+ fd, cleanup, err = newPipeFD(ctx, mode)
+ default:
+ panic(fmt.Sprintf("unknown typ %q", typ))
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer cleanup()
+
+ got, err := fd.Stat(ctx, vfs.StatOptions{})
+ if err != nil {
+ t.Fatalf("Stat failed: %v", err)
+ }
+
+ // Atime, Ctime, Mtime should all be current time (non-zero).
+ atime, ctime, mtime := got.Atime.ToNsec(), got.Ctime.ToNsec(), got.Mtime.ToNsec()
+ if atime != ctime || ctime != mtime {
+ t.Errorf("got atime=%d ctime=%d mtime=%d, wanted equal values", atime, ctime, mtime)
+ }
+ if atime == 0 {
+ t.Errorf("got atime=%d, want non-zero", atime)
+ }
+
+ // Btime should be 0, as it is not set by tmpfs.
+ if btime := got.Btime.ToNsec(); btime != 0 {
+ t.Errorf("got btime %d, want 0", got.Btime.ToNsec())
+ }
+
+ // Size should be 0.
+ if got.Size != 0 {
+ t.Errorf("got size %d, want 0", got.Size)
+ }
+
+ // Nlink should be 1 for files, 2 for dirs.
+ wantNlink := uint32(1)
+ if typ == "dir" {
+ wantNlink = 2
+ }
+ if got.Nlink != wantNlink {
+ t.Errorf("got nlink %d, want %d", got.Nlink, wantNlink)
+ }
+
+ // UID and GID are set from context creds.
+ creds := auth.CredentialsFromContext(ctx)
+ if got.UID != uint32(creds.EffectiveKUID) {
+ t.Errorf("got uid %d, want %d", got.UID, uint32(creds.EffectiveKUID))
+ }
+ if got.GID != uint32(creds.EffectiveKGID) {
+ t.Errorf("got gid %d, want %d", got.GID, uint32(creds.EffectiveKGID))
+ }
+
+ // Mode.
+ wantMode := uint16(mode)
+ switch typ {
+ case "file":
+ wantMode |= linux.S_IFREG
+ case "dir":
+ wantMode |= linux.S_IFDIR
+ case "pipe":
+ wantMode |= linux.S_IFIFO
+ default:
+ panic(fmt.Sprintf("unknown typ %q", typ))
+ }
+
+ if got.Mode != wantMode {
+ t.Errorf("got mode %x, want %x", got.Mode, wantMode)
+ }
+
+ // Ino.
+ if got.Ino == 0 {
+ t.Errorf("got ino %d, want not 0", got.Ino)
+ }
+ })
+ }
+}
+
+func TestSetStatAtime(t *testing.T) {
+ ctx := contexttest.Context(t)
+ fd, cleanup, err := newFileFD(ctx, 0644)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer cleanup()
+
+ allStatOptions := vfs.StatOptions{Mask: linux.STATX_ALL}
+
+ // Get initial stat.
+ initialStat, err := fd.Stat(ctx, allStatOptions)
+ if err != nil {
+ t.Fatalf("Stat failed: %v", err)
+ }
+
+ // Set atime, but without the mask.
+ if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: linux.Statx{
+ Mask: 0,
+ Atime: linux.NsecToStatxTimestamp(100),
+ }}); err != nil {
+ t.Errorf("SetStat atime without mask failed: %v")
+ }
+ // Atime should be unchanged.
+ if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil {
+ t.Errorf("Stat got error: %v", err)
+ } else if gotStat.Atime != initialStat.Atime {
+ t.Errorf("Stat got atime %d, want %d", gotStat.Atime, initialStat.Atime)
+ }
+
+ // Set atime, this time included in the mask.
+ setStat := linux.Statx{
+ Mask: linux.STATX_ATIME,
+ Atime: linux.NsecToStatxTimestamp(100),
+ }
+ if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil {
+ t.Errorf("SetStat atime with mask failed: %v")
+ }
+ if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil {
+ t.Errorf("Stat got error: %v", err)
+ } else if gotStat.Atime != setStat.Atime {
+ t.Errorf("Stat got atime %d, want %d", gotStat.Atime, setStat.Atime)
+ }
+}
+
+func TestSetStat(t *testing.T) {
+ ctx := contexttest.Context(t)
+ mode := linux.FileMode(0644)
+
+ // Run with different file types.
+ // TODO(gvisor.dev/issues/1197): Also test symlinks and sockets.
+ for _, typ := range []string{"file", "dir", "pipe"} {
+ t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) {
+ var (
+ fd *vfs.FileDescription
+ cleanup func()
+ err error
+ )
+ switch typ {
+ case "file":
+ fd, cleanup, err = newFileFD(ctx, mode)
+ case "dir":
+ fd, cleanup, err = newDirFD(ctx, mode)
+ case "pipe":
+ fd, cleanup, err = newPipeFD(ctx, mode)
+ default:
+ panic(fmt.Sprintf("unknown typ %q", typ))
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer cleanup()
+
+ allStatOptions := vfs.StatOptions{Mask: linux.STATX_ALL}
+
+ // Get initial stat.
+ initialStat, err := fd.Stat(ctx, allStatOptions)
+ if err != nil {
+ t.Fatalf("Stat failed: %v", err)
+ }
+
+ // Set atime, but without the mask.
+ if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: linux.Statx{
+ Mask: 0,
+ Atime: linux.NsecToStatxTimestamp(100),
+ }}); err != nil {
+ t.Errorf("SetStat atime without mask failed: %v")
+ }
+ // Atime should be unchanged.
+ if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil {
+ t.Errorf("Stat got error: %v", err)
+ } else if gotStat.Atime != initialStat.Atime {
+ t.Errorf("Stat got atime %d, want %d", gotStat.Atime, initialStat.Atime)
+ }
+
+ // Set atime, this time included in the mask.
+ setStat := linux.Statx{
+ Mask: linux.STATX_ATIME,
+ Atime: linux.NsecToStatxTimestamp(100),
+ }
+ if err := fd.SetStat(ctx, vfs.SetStatOptions{Stat: setStat}); err != nil {
+ t.Errorf("SetStat atime with mask failed: %v")
+ }
+ if gotStat, err := fd.Stat(ctx, allStatOptions); err != nil {
+ t.Errorf("Stat got error: %v", err)
+ } else if gotStat.Atime != setStat.Atime {
+ t.Errorf("Stat got atime %d, want %d", gotStat.Atime, setStat.Atime)
+ }
+ })
+ }
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 701826f90..88dbd6e35 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -29,8 +29,9 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -47,6 +48,9 @@ type filesystem struct {
// memFile is used to allocate pages to for regular files.
memFile *pgalloc.MemoryFile
+ // clock is a realtime clock used to set timestamps in file operations.
+ clock time.Clock
+
// mu serializes changes to the Dentry tree.
mu sync.RWMutex
@@ -59,8 +63,10 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if memFileProvider == nil {
panic("MemoryFileProviderFromContext returned nil")
}
+ clock := time.RealtimeClockFromContext(ctx)
fs := filesystem{
memFile: memFileProvider.MemoryFile(),
+ clock: clock,
}
fs.vfsfs.Init(vfsObj, &fs)
root := fs.newDentry(fs.newDirectory(creds, 01777))
@@ -116,6 +122,9 @@ func (d *dentry) DecRef() {
// inode represents a filesystem object.
type inode struct {
+ // clock is a realtime clock used to set timestamps in file operations.
+ clock time.Clock
+
// refs is a reference count. refs is accessed using atomic memory
// operations.
//
@@ -126,26 +135,41 @@ type inode struct {
// filesystem.RmdirAt() drops the reference.
refs int64
- // Inode metadata; protected by mu and accessed using atomic memory
- // operations unless otherwise specified.
- mu sync.RWMutex
+ // Inode metadata. Writing multiple fields atomically requires holding
+ // mu, othewise atomic operations can be used.
+ mu sync.Mutex
mode uint32 // excluding file type bits, which are based on impl
nlink uint32 // protected by filesystem.mu instead of inode.mu
uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
gid uint32 // auth.KGID, but ...
ino uint64 // immutable
+ // Linux's tmpfs has no concept of btime.
+ atime int64 // nanoseconds
+ ctime int64 // nanoseconds
+ mtime int64 // nanoseconds
+
+ // Only meaningful for device special files.
+ rdevMajor uint32
+ rdevMinor uint32
+
impl interface{} // immutable
}
const maxLinks = math.MaxUint32
func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) {
+ i.clock = fs.clock
i.refs = 1
i.mode = uint32(mode)
i.uid = uint32(creds.EffectiveKUID)
i.gid = uint32(creds.EffectiveKGID)
i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1)
+ // Tmpfs creation sets atime, ctime, and mtime to current time.
+ now := i.clock.Now().Nanoseconds()
+ i.atime = now
+ i.ctime = now
+ i.mtime = now
// i.nlink initialized by caller
i.impl = impl
}
@@ -213,15 +237,24 @@ func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, i
// Go won't inline this function, and returning linux.Statx (which is quite
// big) means spending a lot of time in runtime.duffcopy(), so instead it's an
// output parameter.
+//
+// Note that Linux does not guarantee to return consistent data (in the case of
+// a concurrent modification), so we do not require holding inode.mu.
func (i *inode) statTo(stat *linux.Statx) {
- stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
+ stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK |
+ linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_ATIME |
+ linux.STATX_BTIME | linux.STATX_CTIME | linux.STATX_MTIME
stat.Blksize = 1 // usermem.PageSize in tmpfs
stat.Nlink = atomic.LoadUint32(&i.nlink)
stat.UID = atomic.LoadUint32(&i.uid)
stat.GID = atomic.LoadUint32(&i.gid)
stat.Mode = uint16(atomic.LoadUint32(&i.mode))
stat.Ino = i.ino
- // TODO: device number
+ // Linux's tmpfs has no concept of btime, so zero-value is returned.
+ stat.Atime = linux.NsecToStatxTimestamp(i.atime)
+ stat.Ctime = linux.NsecToStatxTimestamp(i.ctime)
+ stat.Mtime = linux.NsecToStatxTimestamp(i.mtime)
+ // TODO(gvisor.dev/issues/1197): Device number.
switch impl := i.impl.(type) {
case *regularFile:
stat.Mode |= linux.S_IFREG
@@ -240,11 +273,85 @@ func (i *inode) statTo(stat *linux.Statx) {
stat.Blocks = allocatedBlocksForSize(stat.Size)
case *namedPipe:
stat.Mode |= linux.S_IFIFO
+ case *deviceFile:
+ switch impl.kind {
+ case vfs.BlockDevice:
+ stat.Mode |= linux.S_IFBLK
+ case vfs.CharDevice:
+ stat.Mode |= linux.S_IFCHR
+ }
+ stat.RdevMajor = impl.major
+ stat.RdevMinor = impl.minor
default:
panic(fmt.Sprintf("unknown inode type: %T", i.impl))
}
}
+func (i *inode) setStat(stat linux.Statx) error {
+ if stat.Mask == 0 {
+ return nil
+ }
+ i.mu.Lock()
+ var (
+ needsMtimeBump bool
+ needsCtimeBump bool
+ )
+ mask := stat.Mask
+ if mask&linux.STATX_MODE != 0 {
+ atomic.StoreUint32(&i.mode, uint32(stat.Mode))
+ needsCtimeBump = true
+ }
+ if mask&linux.STATX_UID != 0 {
+ atomic.StoreUint32(&i.uid, stat.UID)
+ needsCtimeBump = true
+ }
+ if mask&linux.STATX_GID != 0 {
+ atomic.StoreUint32(&i.gid, stat.GID)
+ needsCtimeBump = true
+ }
+ if mask&linux.STATX_SIZE != 0 {
+ switch impl := i.impl.(type) {
+ case *regularFile:
+ updated, err := impl.truncate(stat.Size)
+ if err != nil {
+ return err
+ }
+ if updated {
+ needsMtimeBump = true
+ needsCtimeBump = true
+ }
+ case *directory:
+ return syserror.EISDIR
+ default:
+ return syserror.EINVAL
+ }
+ }
+ if mask&linux.STATX_ATIME != 0 {
+ atomic.StoreInt64(&i.atime, stat.Atime.ToNsecCapped())
+ needsCtimeBump = true
+ }
+ if mask&linux.STATX_MTIME != 0 {
+ atomic.StoreInt64(&i.mtime, stat.Mtime.ToNsecCapped())
+ needsCtimeBump = true
+ // Ignore the mtime bump, since we just set it ourselves.
+ needsMtimeBump = false
+ }
+ if mask&linux.STATX_CTIME != 0 {
+ atomic.StoreInt64(&i.ctime, stat.Ctime.ToNsecCapped())
+ // Ignore the ctime bump, since we just set it ourselves.
+ needsCtimeBump = false
+ }
+ now := i.clock.Now().Nanoseconds()
+ if needsMtimeBump {
+ atomic.StoreInt64(&i.mtime, now)
+ }
+ if needsCtimeBump {
+ atomic.StoreInt64(&i.ctime, now)
+ }
+ i.mu.Unlock()
+ return nil
+}
+
// allocatedBlocksForSize returns the number of 512B blocks needed to
// accommodate the given size in bytes, as appropriate for struct
// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block
@@ -255,13 +362,22 @@ func allocatedBlocksForSize(size uint64) uint64 {
}
func (i *inode) direntType() uint8 {
- switch i.impl.(type) {
+ switch impl := i.impl.(type) {
case *regularFile:
return linux.DT_REG
case *directory:
return linux.DT_DIR
case *symlink:
return linux.DT_LNK
+ case *deviceFile:
+ switch impl.kind {
+ case vfs.BlockDevice:
+ return linux.DT_BLK
+ case vfs.CharDevice:
+ return linux.DT_CHR
+ default:
+ panic(fmt.Sprintf("unknown vfs.DeviceKind: %v", impl.kind))
+ }
default:
panic(fmt.Sprintf("unknown inode type: %T", i.impl))
}
@@ -291,9 +407,5 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
// SetStat implements vfs.FileDescriptionImpl.SetStat.
func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
- if opts.Stat.Mask == 0 {
- return nil
- }
- // TODO: implement inode.setStat
- return syserror.EPERM
+ return fd.inode().setStat(opts.Stat)
}