summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fs')
-rw-r--r--pkg/sentry/fs/dirent.go133
-rw-r--r--pkg/sentry/fs/file_overlay_test.go84
-rw-r--r--pkg/sentry/fs/host/BUILD4
-rw-r--r--pkg/sentry/fs/host/control.go2
-rw-r--r--pkg/sentry/fs/host/descriptor.go37
-rw-r--r--pkg/sentry/fs/host/descriptor_state.go2
-rw-r--r--pkg/sentry/fs/host/descriptor_test.go4
-rw-r--r--pkg/sentry/fs/host/file.go4
-rw-r--r--pkg/sentry/fs/host/fs.go339
-rw-r--r--pkg/sentry/fs/host/fs_test.go380
-rw-r--r--pkg/sentry/fs/host/host.go59
-rw-r--r--pkg/sentry/fs/host/inode.go141
-rw-r--r--pkg/sentry/fs/host/inode_state.go32
-rw-r--r--pkg/sentry/fs/host/inode_test.go66
-rw-r--r--pkg/sentry/fs/host/ioctl_unsafe.go4
-rw-r--r--pkg/sentry/fs/host/tty.go5
-rw-r--r--pkg/sentry/fs/host/util.go92
-rw-r--r--pkg/sentry/fs/host/util_unsafe.go41
-rw-r--r--pkg/sentry/fs/mounts.go13
-rw-r--r--pkg/sentry/fs/proc/task.go17
20 files changed, 120 insertions, 1339 deletions
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go
index e0b32e1c1..0266a5287 100644
--- a/pkg/sentry/fs/dirent.go
+++ b/pkg/sentry/fs/dirent.go
@@ -17,7 +17,6 @@ package fs
import (
"fmt"
"path"
- "sort"
"sync/atomic"
"syscall"
@@ -121,9 +120,6 @@ type Dirent struct {
// deleted may be set atomically when removed.
deleted int32
- // frozen indicates this entry can't walk to unknown nodes.
- frozen bool
-
// mounted is true if Dirent is a mount point, similar to include/linux/dcache.h:DCACHE_MOUNTED.
mounted bool
@@ -253,8 +249,7 @@ func (d *Dirent) IsNegative() bool {
return d.Inode == nil
}
-// hashChild will hash child into the children list of its new parent d, carrying over
-// any "frozen" state from d.
+// hashChild will hash child into the children list of its new parent d.
//
// Returns (*WeakRef, true) if hashing child caused a Dirent to be unhashed. The caller must
// validate the returned unhashed weak reference. Common cases:
@@ -282,9 +277,6 @@ func (d *Dirent) hashChild(child *Dirent) (*refs.WeakRef, bool) {
d.IncRef()
}
- // Carry over parent's frozen state.
- child.frozen = d.frozen
-
return d.hashChildParentSet(child)
}
@@ -400,38 +392,6 @@ func (d *Dirent) MountRoot() *Dirent {
return mountRoot
}
-// Freeze prevents this dirent from walking to more nodes. Freeze is applied
-// recursively to all children.
-//
-// If this particular Dirent represents a Virtual node, then Walks and Creates
-// may proceed as before.
-//
-// Freeze can only be called before the application starts running, otherwise
-// the root it might be out of sync with the application root if modified by
-// sys_chroot.
-func (d *Dirent) Freeze() {
- d.mu.Lock()
- defer d.mu.Unlock()
- if d.frozen {
- // Already frozen.
- return
- }
- d.frozen = true
-
- // Take a reference when freezing.
- for _, w := range d.children {
- if child := w.Get(); child != nil {
- // NOTE: We would normally drop the reference here. But
- // instead we're hanging on to it.
- ch := child.(*Dirent)
- ch.Freeze()
- }
- }
-
- // Drop all expired weak references.
- d.flush()
-}
-
// descendantOf returns true if the receiver dirent is equal to, or a
// descendant of, the argument dirent.
//
@@ -524,11 +484,6 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl
w.Drop()
}
- // Are we allowed to do the lookup?
- if d.frozen && !d.Inode.IsVirtual() {
- return nil, syscall.ENOENT
- }
-
// Slow path: load the InodeOperations into memory. Since this is a hot path and the lookup may be
// expensive, if possible release the lock and re-acquire it.
if walkMayUnlock {
@@ -659,11 +614,6 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi
return nil, syscall.EEXIST
}
- // Are we frozen?
- if d.frozen && !d.Inode.IsVirtual() {
- return nil, syscall.ENOENT
- }
-
// Try the create. We need to trust the file system to return EEXIST (or something
// that will translate to EEXIST) if name already exists.
file, err := d.Inode.Create(ctx, d, name, flags, perms)
@@ -727,11 +677,6 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c
return syscall.EEXIST
}
- // Are we frozen?
- if d.frozen && !d.Inode.IsVirtual() {
- return syscall.ENOENT
- }
-
// Remove any negative Dirent. We've already asserted above with d.exists
// that the only thing remaining here can be a negative Dirent.
if w, ok := d.children[name]; ok {
@@ -862,49 +807,6 @@ func (d *Dirent) GetDotAttrs(root *Dirent) (DentAttr, DentAttr) {
return dot, dot
}
-// readdirFrozen returns readdir results based solely on the frozen children.
-func (d *Dirent) readdirFrozen(root *Dirent, offset int64, dirCtx *DirCtx) (int64, error) {
- // Collect attrs for "." and "..".
- attrs := make(map[string]DentAttr)
- names := []string{".", ".."}
- attrs["."], attrs[".."] = d.GetDotAttrs(root)
-
- // Get info from all children.
- d.mu.Lock()
- defer d.mu.Unlock()
- for name, w := range d.children {
- if child := w.Get(); child != nil {
- defer child.DecRef()
-
- // Skip negative children.
- if child.(*Dirent).IsNegative() {
- continue
- }
-
- sattr := child.(*Dirent).Inode.StableAttr
- attrs[name] = DentAttr{
- Type: sattr.Type,
- InodeID: sattr.InodeID,
- }
- names = append(names, name)
- }
- }
-
- sort.Strings(names)
-
- if int(offset) >= len(names) {
- return offset, nil
- }
- names = names[int(offset):]
- for _, name := range names {
- if err := dirCtx.DirEmit(name, attrs[name]); err != nil {
- return offset, err
- }
- offset++
- }
- return offset, nil
-}
-
// DirIterator is an open directory containing directory entries that can be read.
type DirIterator interface {
// IterateDir emits directory entries by calling dirCtx.EmitDir, beginning
@@ -964,10 +866,6 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent,
return offset, nil
}
- if d.frozen {
- return d.readdirFrozen(root, offset, dirCtx)
- }
-
// Collect attrs for "." and "..".
dot, dotdot := d.GetDotAttrs(root)
@@ -1068,11 +966,6 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err
return nil, syserror.EINVAL
}
- // Are we frozen?
- if d.parent.frozen && !d.parent.Inode.IsVirtual() {
- return nil, syserror.ENOENT
- }
-
// Dirent that'll replace d.
//
// Note that NewDirent returns with one reference taken; the reference
@@ -1101,11 +994,6 @@ func (d *Dirent) unmount(ctx context.Context, replacement *Dirent) error {
return syserror.ENOENT
}
- // Are we frozen?
- if d.parent.frozen && !d.parent.Inode.IsVirtual() {
- return syserror.ENOENT
- }
-
// Remount our former child in its place.
//
// As replacement used to be our child, it must already have the right
@@ -1135,11 +1023,6 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath
unlock := d.lockDirectory()
defer unlock()
- // Are we frozen?
- if d.frozen && !d.Inode.IsVirtual() {
- return syscall.ENOENT
- }
-
// Try to walk to the node.
child, err := d.walk(ctx, root, name, false /* may unlock */)
if err != nil {
@@ -1201,11 +1084,6 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string)
unlock := d.lockDirectory()
defer unlock()
- // Are we frozen?
- if d.frozen && !d.Inode.IsVirtual() {
- return syscall.ENOENT
- }
-
// Check for dots.
if name == "." {
// Rejected as the last component by rmdir(2).
@@ -1519,15 +1397,6 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
return err
}
- // Are we frozen?
- // TODO(jamieliu): Is this the right errno?
- if oldParent.frozen && !oldParent.Inode.IsVirtual() {
- return syscall.ENOENT
- }
- if newParent.frozen && !newParent.Inode.IsVirtual() {
- return syscall.ENOENT
- }
-
// Do we have general permission to remove from oldParent and
// create/replace in newParent?
if err := oldParent.Inode.CheckPermission(ctx, PermMask{Write: true, Execute: true}); err != nil {
diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go
index a76d87e3a..1971cc680 100644
--- a/pkg/sentry/fs/file_overlay_test.go
+++ b/pkg/sentry/fs/file_overlay_test.go
@@ -175,90 +175,6 @@ func TestReaddirRevalidation(t *testing.T) {
}
}
-// TestReaddirOverlayFrozen tests that calling Readdir on an overlay file with
-// a frozen dirent tree does not make Readdir calls to the underlying files.
-// This is a regression test for b/114808269.
-func TestReaddirOverlayFrozen(t *testing.T) {
- ctx := contexttest.Context(t)
-
- // Create an overlay with two directories, each with two files.
- upper := newTestRamfsDir(ctx, []dirContent{{name: "upper-file1"}, {name: "upper-file2"}}, nil)
- lower := newTestRamfsDir(ctx, []dirContent{{name: "lower-file1"}, {name: "lower-file2"}}, nil)
- overlayInode := fs.NewTestOverlayDir(ctx, upper, lower, false)
-
- // Set that overlay as the root.
- root := fs.NewDirent(ctx, overlayInode, "root")
- ctx = &rootContext{
- Context: ctx,
- root: root,
- }
-
- // Check that calling Readdir on the root now returns all 4 files (2
- // from each layer in the overlay).
- rootFile, err := root.Inode.GetFile(ctx, root, fs.FileFlags{Read: true})
- if err != nil {
- t.Fatalf("root.Inode.GetFile failed: %v", err)
- }
- defer rootFile.DecRef()
- ser := &fs.CollectEntriesSerializer{}
- if err := rootFile.Readdir(ctx, ser); err != nil {
- t.Fatalf("rootFile.Readdir failed: %v", err)
- }
- if got, want := ser.Order, []string{".", "..", "lower-file1", "lower-file2", "upper-file1", "upper-file2"}; !reflect.DeepEqual(got, want) {
- t.Errorf("Readdir got names %v, want %v", got, want)
- }
-
- // Readdir should have been called on upper and lower.
- upperDir := upper.InodeOperations.(*dir)
- lowerDir := lower.InodeOperations.(*dir)
- if !upperDir.ReaddirCalled {
- t.Errorf("upperDir.ReaddirCalled got %v, want true", upperDir.ReaddirCalled)
- }
- if !lowerDir.ReaddirCalled {
- t.Errorf("lowerDir.ReaddirCalled got %v, want true", lowerDir.ReaddirCalled)
- }
-
- // Reset.
- upperDir.ReaddirCalled = false
- lowerDir.ReaddirCalled = false
-
- // Take references on "upper-file1" and "lower-file1", pinning them in
- // the dirent tree.
- for _, name := range []string{"upper-file1", "lower-file1"} {
- if _, err := root.Walk(ctx, root, name); err != nil {
- t.Fatalf("root.Walk(%q) failed: %v", name, err)
- }
- // Don't drop a reference on the returned dirent so that it
- // will stay in the tree.
- }
-
- // Freeze the dirent tree.
- root.Freeze()
-
- // Seek back to the beginning of the file.
- if _, err := rootFile.Seek(ctx, fs.SeekSet, 0); err != nil {
- t.Fatalf("error seeking to beginning of directory: %v", err)
- }
-
- // Calling Readdir on the root now will return only the pinned
- // children.
- ser = &fs.CollectEntriesSerializer{}
- if err := rootFile.Readdir(ctx, ser); err != nil {
- t.Fatalf("rootFile.Readdir failed: %v", err)
- }
- if got, want := ser.Order, []string{".", "..", "lower-file1", "upper-file1"}; !reflect.DeepEqual(got, want) {
- t.Errorf("Readdir got names %v, want %v", got, want)
- }
-
- // Readdir should NOT have been called on upper or lower.
- if upperDir.ReaddirCalled {
- t.Errorf("upperDir.ReaddirCalled got %v, want false", upperDir.ReaddirCalled)
- }
- if lowerDir.ReaddirCalled {
- t.Errorf("lowerDir.ReaddirCalled got %v, want false", lowerDir.ReaddirCalled)
- }
-}
-
type rootContext struct {
context.Context
root *fs.Dirent
diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD
index 21003ea45..011625c80 100644
--- a/pkg/sentry/fs/host/BUILD
+++ b/pkg/sentry/fs/host/BUILD
@@ -10,7 +10,7 @@ go_library(
"descriptor_state.go",
"device.go",
"file.go",
- "fs.go",
+ "host.go",
"inode.go",
"inode_state.go",
"ioctl_unsafe.go",
@@ -62,14 +62,12 @@ go_test(
size = "small",
srcs = [
"descriptor_test.go",
- "fs_test.go",
"inode_test.go",
"socket_test.go",
"wait_test.go",
],
library = ":host",
deps = [
- "//pkg/context",
"//pkg/fd",
"//pkg/fdnotifier",
"//pkg/sentry/contexttest",
diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go
index 1658979fc..cd84e1337 100644
--- a/pkg/sentry/fs/host/control.go
+++ b/pkg/sentry/fs/host/control.go
@@ -32,6 +32,8 @@ func newSCMRights(fds []int) control.SCMRights {
}
// Files implements control.SCMRights.Files.
+//
+// TODO(gvisor.dev/issue/2017): Port to VFS2.
func (c *scmRights) Files(ctx context.Context, max int) (control.RightsFiles, bool) {
n := max
var trunc bool
diff --git a/pkg/sentry/fs/host/descriptor.go b/pkg/sentry/fs/host/descriptor.go
index 2a4d1b291..cfdce6a74 100644
--- a/pkg/sentry/fs/host/descriptor.go
+++ b/pkg/sentry/fs/host/descriptor.go
@@ -16,7 +16,6 @@ package host
import (
"fmt"
- "path"
"syscall"
"gvisor.dev/gvisor/pkg/fdnotifier"
@@ -28,12 +27,9 @@ import (
//
// +stateify savable
type descriptor struct {
- // donated is true if the host fd was donated by another process.
- donated bool
-
// If origFD >= 0, it is the host fd that this file was originally created
// from, which must be available at time of restore. The FD can be closed
- // after descriptor is created. Only set if donated is true.
+ // after descriptor is created.
origFD int
// wouldBlock is true if value (below) points to a file that can
@@ -41,15 +37,13 @@ type descriptor struct {
wouldBlock bool
// value is the wrapped host fd. It is never saved or restored
- // directly. How it is restored depends on whether it was
- // donated and the fs.MountSource it was originally
- // opened/created from.
+ // directly.
value int `state:"nosave"`
}
// newDescriptor returns a wrapped host file descriptor. On success,
// the descriptor is registered for event notifications with queue.
-func newDescriptor(fd int, donated bool, saveable bool, wouldBlock bool, queue *waiter.Queue) (*descriptor, error) {
+func newDescriptor(fd int, saveable bool, wouldBlock bool, queue *waiter.Queue) (*descriptor, error) {
ownedFD := fd
origFD := -1
if saveable {
@@ -69,7 +63,6 @@ func newDescriptor(fd int, donated bool, saveable bool, wouldBlock bool, queue *
}
}
return &descriptor{
- donated: donated,
origFD: origFD,
wouldBlock: wouldBlock,
value: ownedFD,
@@ -77,25 +70,11 @@ func newDescriptor(fd int, donated bool, saveable bool, wouldBlock bool, queue *
}
// initAfterLoad initializes the value of the descriptor after Load.
-func (d *descriptor) initAfterLoad(mo *superOperations, id uint64, queue *waiter.Queue) error {
- if d.donated {
- var err error
- d.value, err = syscall.Dup(d.origFD)
- if err != nil {
- return fmt.Errorf("failed to dup restored fd %d: %v", d.origFD, err)
- }
- } else {
- name, ok := mo.inodeMappings[id]
- if !ok {
- return fmt.Errorf("failed to find path for inode number %d", id)
- }
- fullpath := path.Join(mo.root, name)
-
- var err error
- d.value, err = open(nil, fullpath)
- if err != nil {
- return fmt.Errorf("failed to open %q: %v", fullpath, err)
- }
+func (d *descriptor) initAfterLoad(id uint64, queue *waiter.Queue) error {
+ var err error
+ d.value, err = syscall.Dup(d.origFD)
+ if err != nil {
+ return fmt.Errorf("failed to dup restored fd %d: %v", d.origFD, err)
}
if d.wouldBlock {
if err := syscall.SetNonblock(d.value, true); err != nil {
diff --git a/pkg/sentry/fs/host/descriptor_state.go b/pkg/sentry/fs/host/descriptor_state.go
index 8167390a9..e880582ab 100644
--- a/pkg/sentry/fs/host/descriptor_state.go
+++ b/pkg/sentry/fs/host/descriptor_state.go
@@ -16,7 +16,7 @@ package host
// beforeSave is invoked by stateify.
func (d *descriptor) beforeSave() {
- if d.donated && d.origFD < 0 {
+ if d.origFD < 0 {
panic("donated file descriptor cannot be saved")
}
}
diff --git a/pkg/sentry/fs/host/descriptor_test.go b/pkg/sentry/fs/host/descriptor_test.go
index 4205981f5..d8e4605b6 100644
--- a/pkg/sentry/fs/host/descriptor_test.go
+++ b/pkg/sentry/fs/host/descriptor_test.go
@@ -47,10 +47,10 @@ func TestDescriptorRelease(t *testing.T) {
// FD ownership is transferred to the descritor.
queue := &waiter.Queue{}
- d, err := newDescriptor(fd, false /* donated*/, tc.saveable, tc.wouldBlock, queue)
+ d, err := newDescriptor(fd, tc.saveable, tc.wouldBlock, queue)
if err != nil {
syscall.Close(fd)
- t.Fatalf("newDescriptor(%d, %t, false, %t, queue) failed, err: %v", fd, tc.saveable, tc.wouldBlock, err)
+ t.Fatalf("newDescriptor(%d, %t, %t, queue) failed, err: %v", fd, tc.saveable, tc.wouldBlock, err)
}
if tc.saveable {
if d.origFD < 0 {
diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go
index e08f56d04..034862694 100644
--- a/pkg/sentry/fs/host/file.go
+++ b/pkg/sentry/fs/host/file.go
@@ -101,8 +101,8 @@ func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner
})
return s, nil
default:
- msrc := newMountSource(ctx, "/", mounter, &Filesystem{}, fs.MountSourceFlags{}, false /* dontTranslateOwnership */)
- inode, err := newInode(ctx, msrc, donated, saveable, true /* donated */)
+ msrc := fs.NewNonCachingMountSource(ctx, &filesystem{}, fs.MountSourceFlags{})
+ inode, err := newInode(ctx, msrc, donated, saveable)
if err != nil {
return nil, err
}
diff --git a/pkg/sentry/fs/host/fs.go b/pkg/sentry/fs/host/fs.go
deleted file mode 100644
index d3e8e3a36..000000000
--- a/pkg/sentry/fs/host/fs.go
+++ /dev/null
@@ -1,339 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package host implements an fs.Filesystem for files backed by host
-// file descriptors.
-package host
-
-import (
- "fmt"
- "path"
- "path/filepath"
- "strconv"
- "strings"
-
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/fs"
-)
-
-// FilesystemName is the name under which Filesystem is registered.
-const FilesystemName = "whitelistfs"
-
-const (
- // whitelistKey is the mount option containing a comma-separated list
- // of host paths to whitelist.
- whitelistKey = "whitelist"
-
- // rootPathKey is the mount option containing the root path of the
- // mount.
- rootPathKey = "root"
-
- // dontTranslateOwnershipKey is the key to superOperations.dontTranslateOwnership.
- dontTranslateOwnershipKey = "dont_translate_ownership"
-)
-
-// maxTraversals determines link traversals in building the whitelist.
-const maxTraversals = 10
-
-// Filesystem is a pseudo file system that is only available during the setup
-// to lock down the configurations. This filesystem should only be mounted at root.
-//
-// Think twice before exposing this to applications.
-//
-// +stateify savable
-type Filesystem struct {
- // whitelist is a set of host paths to whitelist.
- paths []string
-}
-
-var _ fs.Filesystem = (*Filesystem)(nil)
-
-// Name is the identifier of this file system.
-func (*Filesystem) Name() string {
- return FilesystemName
-}
-
-// AllowUserMount prohibits users from using mount(2) with this file system.
-func (*Filesystem) AllowUserMount() bool {
- return false
-}
-
-// AllowUserList allows this filesystem to be listed in /proc/filesystems.
-func (*Filesystem) AllowUserList() bool {
- return true
-}
-
-// Flags returns that there is nothing special about this file system.
-func (*Filesystem) Flags() fs.FilesystemFlags {
- return 0
-}
-
-// Mount returns an fs.Inode exposing the host file system. It is intended to be locked
-// down in PreExec below.
-func (f *Filesystem) Mount(ctx context.Context, _ string, flags fs.MountSourceFlags, data string, _ interface{}) (*fs.Inode, error) {
- // Parse generic comma-separated key=value options.
- options := fs.GenericMountSourceOptions(data)
-
- // Grab the whitelist if one was specified.
- // TODO(edahlgren/mpratt/hzy): require another option "testonly" in order to allow
- // no whitelist.
- if wl, ok := options[whitelistKey]; ok {
- f.paths = strings.Split(wl, "|")
- delete(options, whitelistKey)
- }
-
- // If the rootPath was set, use it. Othewise default to the root of the
- // host fs.
- rootPath := "/"
- if rp, ok := options[rootPathKey]; ok {
- rootPath = rp
- delete(options, rootPathKey)
-
- // We must relativize the whitelisted paths to the new root.
- for i, p := range f.paths {
- rel, err := filepath.Rel(rootPath, p)
- if err != nil {
- return nil, fmt.Errorf("whitelist path %q must be a child of root path %q", p, rootPath)
- }
- f.paths[i] = path.Join("/", rel)
- }
- }
- fd, err := open(nil, rootPath)
- if err != nil {
- return nil, fmt.Errorf("failed to find root: %v", err)
- }
-
- var dontTranslateOwnership bool
- if v, ok := options[dontTranslateOwnershipKey]; ok {
- b, err := strconv.ParseBool(v)
- if err != nil {
- return nil, fmt.Errorf("invalid value for %q: %v", dontTranslateOwnershipKey, err)
- }
- dontTranslateOwnership = b
- delete(options, dontTranslateOwnershipKey)
- }
-
- // Fail if the caller passed us more options than we know about.
- if len(options) > 0 {
- return nil, fmt.Errorf("unsupported mount options: %v", options)
- }
-
- // The mounting EUID/EGID will be cached by this file system. This will
- // be used to assign ownership to files that we own.
- owner := fs.FileOwnerFromContext(ctx)
-
- // Construct the host file system mount and inode.
- msrc := newMountSource(ctx, rootPath, owner, f, flags, dontTranslateOwnership)
- return newInode(ctx, msrc, fd, false /* saveable */, false /* donated */)
-}
-
-// InstallWhitelist locks down the MountNamespace to only the currently installed
-// Dirents and the given paths.
-func (f *Filesystem) InstallWhitelist(ctx context.Context, m *fs.MountNamespace) error {
- return installWhitelist(ctx, m, f.paths)
-}
-
-func installWhitelist(ctx context.Context, m *fs.MountNamespace, paths []string) error {
- if len(paths) == 0 || (len(paths) == 1 && paths[0] == "") {
- // Warning will be logged during filter installation if the empty
- // whitelist matters (allows for host file access).
- return nil
- }
-
- // Done tracks entries already added.
- done := make(map[string]bool)
- root := m.Root()
- defer root.DecRef()
-
- for i := 0; i < len(paths); i++ {
- // Make sure the path is absolute. This is a sanity check.
- if !path.IsAbs(paths[i]) {
- return fmt.Errorf("path %q is not absolute", paths[i])
- }
-
- // We need to add all the intermediate paths, in case one of
- // them is a symlink that needs to be resolved.
- for j := 1; j <= len(paths[i]); j++ {
- if j < len(paths[i]) && paths[i][j] != '/' {
- continue
- }
- current := paths[i][:j]
-
- // Lookup the given component in the tree.
- remainingTraversals := uint(maxTraversals)
- d, err := m.FindLink(ctx, root, nil, current, &remainingTraversals)
- if err != nil {
- log.Warningf("populate failed for %q: %v", current, err)
- continue
- }
-
- // It's critical that this DecRef happens after the
- // freeze below. This ensures that the dentry is in
- // place to be frozen. Otherwise, we freeze without
- // these entries.
- defer d.DecRef()
-
- // Expand the last component if necessary.
- if current == paths[i] {
- // Is it a directory or symlink?
- sattr := d.Inode.StableAttr
- if fs.IsDir(sattr) {
- for name := range childDentAttrs(ctx, d) {
- paths = append(paths, path.Join(current, name))
- }
- }
- if fs.IsSymlink(sattr) {
- // Only expand symlinks once. The
- // folder structure may contain
- // recursive symlinks and we don't want
- // to end up infinitely expanding this
- // symlink. This is safe because this
- // is the last component. If a later
- // path wants to symlink something
- // beneath this symlink that will still
- // be handled by the FindLink above.
- if done[current] {
- continue
- }
-
- s, err := d.Inode.Readlink(ctx)
- if err != nil {
- log.Warningf("readlink failed for %q: %v", current, err)
- continue
- }
- if path.IsAbs(s) {
- paths = append(paths, s)
- } else {
- target := path.Join(path.Dir(current), s)
- paths = append(paths, target)
- }
- }
- }
-
- // Only report this one once even though we may look
- // it up more than once. If we whitelist /a/b,/a then
- // /a will be "done" when it is looked up for /a/b,
- // however we still need to expand all of its contents
- // when whitelisting /a.
- if !done[current] {
- log.Debugf("whitelisted: %s", current)
- }
- done[current] = true
- }
- }
-
- // Freeze the mount tree in place. This prevents any new paths from
- // being opened and any old ones from being removed. If we do provide
- // tmpfs mounts, we'll want to freeze/thaw those separately.
- m.Freeze()
- return nil
-}
-
-func childDentAttrs(ctx context.Context, d *fs.Dirent) map[string]fs.DentAttr {
- dirname, _ := d.FullName(nil /* root */)
- dir, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true})
- if err != nil {
- log.Warningf("failed to open directory %q: %v", dirname, err)
- return nil
- }
- dir.DecRef()
- var stubSerializer fs.CollectEntriesSerializer
- if err := dir.Readdir(ctx, &stubSerializer); err != nil {
- log.Warningf("failed to iterate on host directory %q: %v", dirname, err)
- return nil
- }
- delete(stubSerializer.Entries, ".")
- delete(stubSerializer.Entries, "..")
- return stubSerializer.Entries
-}
-
-// newMountSource constructs a new host fs.MountSource
-// relative to a root path. The root should match the mount point.
-func newMountSource(ctx context.Context, root string, mounter fs.FileOwner, filesystem fs.Filesystem, flags fs.MountSourceFlags, dontTranslateOwnership bool) *fs.MountSource {
- return fs.NewMountSource(ctx, &superOperations{
- root: root,
- inodeMappings: make(map[uint64]string),
- mounter: mounter,
- dontTranslateOwnership: dontTranslateOwnership,
- }, filesystem, flags)
-}
-
-// superOperations implements fs.MountSourceOperations.
-//
-// +stateify savable
-type superOperations struct {
- fs.SimpleMountSourceOperations
-
- // root is the path of the mount point. All inode mappings
- // are relative to this root.
- root string
-
- // inodeMappings contains mappings of fs.Inodes associated
- // with this MountSource to paths under root.
- inodeMappings map[uint64]string
-
- // mounter is the cached EUID/EGID that mounted this file system.
- mounter fs.FileOwner
-
- // dontTranslateOwnership indicates whether to not translate file
- // ownership.
- //
- // By default, files/directories owned by the sandbox uses UID/GID
- // of the mounter. For files/directories that are not owned by the
- // sandbox, file UID/GID is translated to a UID/GID which cannot
- // be mapped in the sandboxed application's user namespace. The
- // UID/GID will look like the nobody UID/GID (65534) but is not
- // strictly owned by the user "nobody".
- //
- // If whitelistfs is a lower filesystem in an overlay, set
- // dont_translate_ownership=true in mount options.
- dontTranslateOwnership bool
-}
-
-var _ fs.MountSourceOperations = (*superOperations)(nil)
-
-// ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings.
-func (m *superOperations) ResetInodeMappings() {
- m.inodeMappings = make(map[uint64]string)
-}
-
-// SaveInodeMapping implements fs.MountSourceOperations.SaveInodeMapping.
-func (m *superOperations) SaveInodeMapping(inode *fs.Inode, path string) {
- // This is very unintuitive. We *CANNOT* trust the inode's StableAttrs,
- // because overlay copyUp may have changed them out from under us.
- // So much for "immutable".
- sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr
- m.inodeMappings[sattr.InodeID] = path
-}
-
-// Keep implements fs.MountSourceOperations.Keep.
-//
-// TODO(b/72455313,b/77596690): It is possible to change the permissions on a
-// host file while it is in the dirent cache (say from RO to RW), but it is not
-// possible to re-open the file with more relaxed permissions, since the host
-// FD is already open and stored in the inode.
-//
-// Using the dirent LRU cache increases the odds that this bug is encountered.
-// Since host file access is relatively fast anyways, we disable the LRU cache
-// for host fs files. Once we can properly deal with permissions changes and
-// re-opening host files, we should revisit whether or not to make use of the
-// LRU cache.
-func (*superOperations) Keep(*fs.Dirent) bool {
- return false
-}
-
-func init() {
- fs.RegisterFilesystem(&Filesystem{})
-}
diff --git a/pkg/sentry/fs/host/fs_test.go b/pkg/sentry/fs/host/fs_test.go
deleted file mode 100644
index 3111d2df9..000000000
--- a/pkg/sentry/fs/host/fs_test.go
+++ /dev/null
@@ -1,380 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package host
-
-import (
- "fmt"
- "io/ioutil"
- "os"
- "path"
- "reflect"
- "sort"
- "testing"
-
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/fs"
-)
-
-// newTestMountNamespace creates a MountNamespace with a ramfs root.
-// It returns the host folder created, which should be removed when done.
-func newTestMountNamespace(t *testing.T) (*fs.MountNamespace, string, error) {
- p, err := ioutil.TempDir("", "root")
- if err != nil {
- return nil, "", err
- }
-
- fd, err := open(nil, p)
- if err != nil {
- os.RemoveAll(p)
- return nil, "", err
- }
- ctx := contexttest.Context(t)
- root, err := newInode(ctx, newMountSource(ctx, p, fs.RootOwner, &Filesystem{}, fs.MountSourceFlags{}, false), fd, false, false)
- if err != nil {
- os.RemoveAll(p)
- return nil, "", err
- }
- mm, err := fs.NewMountNamespace(ctx, root)
- if err != nil {
- os.RemoveAll(p)
- return nil, "", err
- }
- return mm, p, nil
-}
-
-// createTestDirs populates the root with some test files and directories.
-// /a/a1.txt
-// /a/a2.txt
-// /b/b1.txt
-// /b/c/c1.txt
-// /symlinks/normal.txt
-// /symlinks/to_normal.txt -> /symlinks/normal.txt
-// /symlinks/recursive -> /symlinks
-func createTestDirs(ctx context.Context, t *testing.T, m *fs.MountNamespace) error {
- r := m.Root()
- defer r.DecRef()
-
- if err := r.CreateDirectory(ctx, r, "a", fs.FilePermsFromMode(0777)); err != nil {
- return err
- }
-
- a, err := r.Walk(ctx, r, "a")
- if err != nil {
- return err
- }
- defer a.DecRef()
-
- a1, err := a.Create(ctx, r, "a1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666))
- if err != nil {
- return err
- }
- a1.DecRef()
-
- a2, err := a.Create(ctx, r, "a2.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666))
- if err != nil {
- return err
- }
- a2.DecRef()
-
- if err := r.CreateDirectory(ctx, r, "b", fs.FilePermsFromMode(0777)); err != nil {
- return err
- }
-
- b, err := r.Walk(ctx, r, "b")
- if err != nil {
- return err
- }
- defer b.DecRef()
-
- b1, err := b.Create(ctx, r, "b1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666))
- if err != nil {
- return err
- }
- b1.DecRef()
-
- if err := b.CreateDirectory(ctx, r, "c", fs.FilePermsFromMode(0777)); err != nil {
- return err
- }
-
- c, err := b.Walk(ctx, r, "c")
- if err != nil {
- return err
- }
- defer c.DecRef()
-
- c1, err := c.Create(ctx, r, "c1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666))
- if err != nil {
- return err
- }
- c1.DecRef()
-
- if err := r.CreateDirectory(ctx, r, "symlinks", fs.FilePermsFromMode(0777)); err != nil {
- return err
- }
-
- symlinks, err := r.Walk(ctx, r, "symlinks")
- if err != nil {
- return err
- }
- defer symlinks.DecRef()
-
- normal, err := symlinks.Create(ctx, r, "normal.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666))
- if err != nil {
- return err
- }
- normal.DecRef()
-
- if err := symlinks.CreateLink(ctx, r, "/symlinks/normal.txt", "to_normal.txt"); err != nil {
- return err
- }
-
- return symlinks.CreateLink(ctx, r, "/symlinks", "recursive")
-}
-
-// allPaths returns a slice of all paths of entries visible in the rootfs.
-func allPaths(ctx context.Context, t *testing.T, m *fs.MountNamespace, base string) ([]string, error) {
- var paths []string
- root := m.Root()
- defer root.DecRef()
-
- maxTraversals := uint(1)
- d, err := m.FindLink(ctx, root, nil, base, &maxTraversals)
- if err != nil {
- t.Logf("FindLink failed for %q", base)
- return paths, err
- }
- defer d.DecRef()
-
- if fs.IsDir(d.Inode.StableAttr) {
- dir, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true})
- if err != nil {
- return nil, fmt.Errorf("failed to open directory %q: %v", base, err)
- }
- iter, ok := dir.FileOperations.(fs.DirIterator)
- if !ok {
- return nil, fmt.Errorf("cannot directly iterate on host directory %q", base)
- }
- dirCtx := &fs.DirCtx{
- Serializer: noopDentrySerializer{},
- }
- if _, err := fs.DirentReaddir(ctx, d, iter, root, dirCtx, 0); err != nil {
- return nil, err
- }
- for name := range dirCtx.DentAttrs() {
- if name == "." || name == ".." {
- continue
- }
-
- fullName := path.Join(base, name)
- paths = append(paths, fullName)
-
- // Recurse.
- subpaths, err := allPaths(ctx, t, m, fullName)
- if err != nil {
- return paths, err
- }
- paths = append(paths, subpaths...)
- }
- }
-
- return paths, nil
-}
-
-type noopDentrySerializer struct{}
-
-func (noopDentrySerializer) CopyOut(string, fs.DentAttr) error {
- return nil
-}
-func (noopDentrySerializer) Written() int {
- return 4096
-}
-
-// pathsEqual returns true if the two string slices contain the same entries.
-func pathsEqual(got, want []string) bool {
- sort.Strings(got)
- sort.Strings(want)
-
- if len(got) != len(want) {
- return false
- }
-
- for i := range got {
- if got[i] != want[i] {
- return false
- }
- }
-
- return true
-}
-
-func TestWhitelist(t *testing.T) {
- for _, test := range []struct {
- // description of the test.
- desc string
- // paths are the paths to whitelist
- paths []string
- // want are all of the directory entries that should be
- // visible (nothing beyond this set should be visible).
- want []string
- }{
- {
- desc: "root",
- paths: []string{"/"},
- want: []string{"/a", "/a/a1.txt", "/a/a2.txt", "/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt", "/symlinks", "/symlinks/normal.txt", "/symlinks/to_normal.txt", "/symlinks/recursive"},
- },
- {
- desc: "top-level directories",
- paths: []string{"/a", "/b"},
- want: []string{"/a", "/a/a1.txt", "/a/a2.txt", "/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"},
- },
- {
- desc: "nested directories (1/2)",
- paths: []string{"/b", "/b/c"},
- want: []string{"/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"},
- },
- {
- desc: "nested directories (2/2)",
- paths: []string{"/b/c", "/b"},
- want: []string{"/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"},
- },
- {
- desc: "single file",
- paths: []string{"/b/c/c1.txt"},
- want: []string{"/b", "/b/c", "/b/c/c1.txt"},
- },
- {
- desc: "single file and directory",
- paths: []string{"/a/a1.txt", "/b/c"},
- want: []string{"/a", "/a/a1.txt", "/b", "/b/c", "/b/c/c1.txt"},
- },
- {
- desc: "symlink",
- paths: []string{"/symlinks/to_normal.txt"},
- want: []string{"/symlinks", "/symlinks/normal.txt", "/symlinks/to_normal.txt"},
- },
- {
- desc: "recursive symlink",
- paths: []string{"/symlinks/recursive/normal.txt"},
- want: []string{"/symlinks", "/symlinks/normal.txt", "/symlinks/recursive"},
- },
- } {
- t.Run(test.desc, func(t *testing.T) {
- m, p, err := newTestMountNamespace(t)
- if err != nil {
- t.Errorf("Failed to create MountNamespace: %v", err)
- }
- defer os.RemoveAll(p)
-
- ctx := withRoot(contexttest.RootContext(t), m.Root())
- if err := createTestDirs(ctx, t, m); err != nil {
- t.Errorf("Failed to create test dirs: %v", err)
- }
-
- if err := installWhitelist(ctx, m, test.paths); err != nil {
- t.Errorf("installWhitelist(%v) err got %v want nil", test.paths, err)
- }
-
- got, err := allPaths(ctx, t, m, "/")
- if err != nil {
- t.Fatalf("Failed to lookup paths (whitelisted: %v): %v", test.paths, err)
- }
-
- if !pathsEqual(got, test.want) {
- t.Errorf("For paths %v got %v want %v", test.paths, got, test.want)
- }
- })
- }
-}
-
-func TestRootPath(t *testing.T) {
- // Create a temp dir, which will be the root of our mounted fs.
- rootPath, err := ioutil.TempDir(os.TempDir(), "root")
- if err != nil {
- t.Fatalf("TempDir failed: %v", err)
- }
- defer os.RemoveAll(rootPath)
-
- // Create two files inside the new root, one which will be whitelisted
- // and one not.
- whitelisted, err := ioutil.TempFile(rootPath, "white")
- if err != nil {
- t.Fatalf("TempFile failed: %v", err)
- }
- if _, err := ioutil.TempFile(rootPath, "black"); err != nil {
- t.Fatalf("TempFile failed: %v", err)
- }
-
- // Create a mount with a root path and single whitelisted file.
- hostFS := &Filesystem{}
- ctx := contexttest.Context(t)
- data := fmt.Sprintf("%s=%s,%s=%s", rootPathKey, rootPath, whitelistKey, whitelisted.Name())
- inode, err := hostFS.Mount(ctx, "", fs.MountSourceFlags{}, data, nil)
- if err != nil {
- t.Fatalf("Mount failed: %v", err)
- }
- mm, err := fs.NewMountNamespace(ctx, inode)
- if err != nil {
- t.Fatalf("NewMountNamespace failed: %v", err)
- }
- if err := hostFS.InstallWhitelist(ctx, mm); err != nil {
- t.Fatalf("InstallWhitelist failed: %v", err)
- }
-
- // Get the contents of the root directory.
- rootDir := mm.Root()
- rctx := withRoot(ctx, rootDir)
- f, err := rootDir.Inode.GetFile(rctx, rootDir, fs.FileFlags{})
- if err != nil {
- t.Fatalf("GetFile failed: %v", err)
- }
- c := &fs.CollectEntriesSerializer{}
- if err := f.Readdir(rctx, c); err != nil {
- t.Fatalf("Readdir failed: %v", err)
- }
-
- // We should have only our whitelisted file, plus the dots.
- want := []string{path.Base(whitelisted.Name()), ".", ".."}
- got := c.Order
- sort.Strings(want)
- sort.Strings(got)
- if !reflect.DeepEqual(got, want) {
- t.Errorf("Readdir got %v, wanted %v", got, want)
- }
-}
-
-type rootContext struct {
- context.Context
- root *fs.Dirent
-}
-
-// withRoot returns a copy of ctx with the given root.
-func withRoot(ctx context.Context, root *fs.Dirent) context.Context {
- return &rootContext{
- Context: ctx,
- root: root,
- }
-}
-
-// Value implements Context.Value.
-func (rc rootContext) Value(key interface{}) interface{} {
- switch key {
- case fs.CtxRoot:
- rc.root.IncRef()
- return rc.root
- default:
- return rc.Context.Value(key)
- }
-}
diff --git a/pkg/sentry/fs/host/host.go b/pkg/sentry/fs/host/host.go
new file mode 100644
index 000000000..081ba1dd8
--- /dev/null
+++ b/pkg/sentry/fs/host/host.go
@@ -0,0 +1,59 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package host supports file descriptors imported directly.
+package host
+
+import (
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// filesystem is a host filesystem.
+//
+// +stateify savable
+type filesystem struct{}
+
+func init() {
+ fs.RegisterFilesystem(&filesystem{})
+}
+
+// FilesystemName is the name under which the filesystem is registered.
+const FilesystemName = "host"
+
+// Name is the name of the filesystem.
+func (*filesystem) Name() string {
+ return FilesystemName
+}
+
+// Mount returns an error. Mounting hostfs is not allowed.
+func (*filesystem) Mount(ctx context.Context, device string, flags fs.MountSourceFlags, data string, dataObj interface{}) (*fs.Inode, error) {
+ return nil, syserror.EPERM
+}
+
+// AllowUserMount prohibits users from using mount(2) with this file system.
+func (*filesystem) AllowUserMount() bool {
+ return false
+}
+
+// AllowUserList prohibits this filesystem to be listed in /proc/filesystems.
+func (*filesystem) AllowUserList() bool {
+ return false
+}
+
+// Flags returns that there is nothing special about this file system.
+func (*filesystem) Flags() fs.FilesystemFlags {
+ return 0
+}
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 6fa39caab..1da3c0a17 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -17,12 +17,10 @@ package host
import (
"syscall"
- "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/secio"
- "gvisor.dev/gvisor/pkg/sentry/device"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/memmap"
@@ -69,9 +67,6 @@ type inodeOperations struct {
//
// +stateify savable
type inodeFileState struct {
- // Common file system state.
- mops *superOperations `state:"wait"`
-
// descriptor is the backing host FD.
descriptor *descriptor `state:"wait"`
@@ -160,7 +155,7 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err
if err := syscall.Fstat(i.FD(), &s); err != nil {
return fs.UnstableAttr{}, err
}
- return unstableAttr(i.mops, &s), nil
+ return unstableAttr(&s), nil
}
// Allocate implements fsutil.CachedFileObject.Allocate.
@@ -172,7 +167,7 @@ func (i *inodeFileState) Allocate(_ context.Context, offset, length int64) error
var _ fs.InodeOperations = (*inodeOperations)(nil)
// newInode returns a new fs.Inode backed by the host FD.
-func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool, donated bool) (*fs.Inode, error) {
+func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool) (*fs.Inode, error) {
// Retrieve metadata.
var s syscall.Stat_t
err := syscall.Fstat(fd, &s)
@@ -181,24 +176,17 @@ func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool,
}
fileState := &inodeFileState{
- mops: msrc.MountSourceOperations.(*superOperations),
sattr: stableAttr(&s),
}
// Initialize the wrapped host file descriptor.
- fileState.descriptor, err = newDescriptor(
- fd,
- donated,
- saveable,
- wouldBlock(&s),
- &fileState.queue,
- )
+ fileState.descriptor, err = newDescriptor(fd, saveable, wouldBlock(&s), &fileState.queue)
if err != nil {
return nil, err
}
// Build the fs.InodeOperations.
- uattr := unstableAttr(msrc.MountSourceOperations.(*superOperations), &s)
+ uattr := unstableAttr(&s)
iops := &inodeOperations{
fileState: fileState,
cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{
@@ -232,54 +220,23 @@ func (i *inodeOperations) Release(context.Context) {
// Lookup implements fs.InodeOperations.Lookup.
func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) {
- // Get a new FD relative to i at name.
- fd, err := open(i, name)
- if err != nil {
- if err == syserror.ENOENT {
- return nil, syserror.ENOENT
- }
- return nil, err
- }
-
- inode, err := newInode(ctx, dir.MountSource, fd, false /* saveable */, false /* donated */)
- if err != nil {
- return nil, err
- }
-
- // Return the fs.Dirent.
- return fs.NewDirent(ctx, inode, name), nil
+ return nil, syserror.ENOENT
}
// Create implements fs.InodeOperations.Create.
func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) {
- // Create a file relative to i at name.
- //
- // N.B. We always open this file O_RDWR regardless of flags because a
- // future GetFile might want more access. Open allows this regardless
- // of perm.
- fd, err := openAt(i, name, syscall.O_RDWR|syscall.O_CREAT|syscall.O_EXCL, perm.LinuxMode())
- if err != nil {
- return nil, err
- }
-
- inode, err := newInode(ctx, dir.MountSource, fd, false /* saveable */, false /* donated */)
- if err != nil {
- return nil, err
- }
+ return nil, syserror.EPERM
- d := fs.NewDirent(ctx, inode, name)
- defer d.DecRef()
- return inode.GetFile(ctx, d, flags)
}
// CreateDirectory implements fs.InodeOperations.CreateDirectory.
func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
- return syscall.Mkdirat(i.fileState.FD(), name, uint32(perm.LinuxMode()))
+ return syserror.EPERM
}
// CreateLink implements fs.InodeOperations.CreateLink.
func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error {
- return createLink(i.fileState.FD(), oldname, newname)
+ return syserror.EPERM
}
// CreateHardLink implements fs.InodeOperations.CreateHardLink.
@@ -294,25 +251,17 @@ func (*inodeOperations) CreateFifo(context.Context, *fs.Inode, string, fs.FilePe
// Remove implements fs.InodeOperations.Remove.
func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error {
- return unlinkAt(i.fileState.FD(), name, false /* dir */)
+ return syserror.EPERM
}
// RemoveDirectory implements fs.InodeOperations.RemoveDirectory.
func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error {
- return unlinkAt(i.fileState.FD(), name, true /* dir */)
+ return syserror.EPERM
}
// Rename implements fs.InodeOperations.Rename.
func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error {
- op, ok := oldParent.InodeOperations.(*inodeOperations)
- if !ok {
- return syscall.EXDEV
- }
- np, ok := newParent.InodeOperations.(*inodeOperations)
- if !ok {
- return syscall.EXDEV
- }
- return syscall.Renameat(op.fileState.FD(), oldName, np.fileState.FD(), newName)
+ return syserror.EPERM
}
// Bind implements fs.InodeOperations.Bind.
@@ -461,69 +410,7 @@ func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {}
// readdirAll returns all of the directory entries in i.
func (i *inodeOperations) readdirAll(d *dirInfo) (map[string]fs.DentAttr, error) {
- i.readdirMu.Lock()
- defer i.readdirMu.Unlock()
-
- fd := i.fileState.FD()
-
- // syscall.ReadDirent will use getdents, which will seek the file past
- // the last directory entry. To read the directory entries a second
- // time, we need to seek back to the beginning.
- if _, err := syscall.Seek(fd, 0, 0); err != nil {
- if err == syscall.ESPIPE {
- // All directories should be seekable. If this file
- // isn't seekable, it is not a directory and we should
- // return that more sane error.
- err = syscall.ENOTDIR
- }
- return nil, err
- }
-
- names := make([]string, 0, 100)
- for {
- // Refill the buffer if necessary
- if d.bufp >= d.nbuf {
- d.bufp = 0
- // ReadDirent will just do a sys_getdents64 to the kernel.
- n, err := syscall.ReadDirent(fd, d.buf)
- if err != nil {
- return nil, err
- }
- if n == 0 {
- break // EOF
- }
- d.nbuf = n
- }
-
- var nb int
- // Parse the dirent buffer we just get and return the directory names along
- // with the number of bytes consumed in the buffer.
- nb, _, names = syscall.ParseDirent(d.buf[d.bufp:d.nbuf], -1, names)
- d.bufp += nb
- }
-
- entries := make(map[string]fs.DentAttr)
- for _, filename := range names {
- // Lookup the type and host device and inode.
- stat, lerr := fstatat(fd, filename, linux.AT_SYMLINK_NOFOLLOW)
- if lerr == syscall.ENOENT {
- // File disappeared between readdir and lstat.
- // Just treat it as if it didn't exist.
- continue
- }
-
- // There was a serious problem, we should probably report it.
- if lerr != nil {
- return nil, lerr
- }
-
- entries[filename] = fs.DentAttr{
- Type: nodeType(&stat),
- InodeID: hostFileDevice.Map(device.MultiDeviceKey{
- Device: stat.Dev,
- Inode: stat.Ino,
- }),
- }
- }
- return entries, nil
+ // We only support non-directory file descriptors that have been
+ // imported, so just claim that this isn't a directory, even if it is.
+ return nil, syscall.ENOTDIR
}
diff --git a/pkg/sentry/fs/host/inode_state.go b/pkg/sentry/fs/host/inode_state.go
index 299e0e0b0..1adbd4562 100644
--- a/pkg/sentry/fs/host/inode_state.go
+++ b/pkg/sentry/fs/host/inode_state.go
@@ -18,29 +18,14 @@ import (
"fmt"
"syscall"
- "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/device"
"gvisor.dev/gvisor/pkg/sentry/fs"
)
-// beforeSave is invoked by stateify.
-func (i *inodeFileState) beforeSave() {
- if !i.queue.IsEmpty() {
- panic("event queue must be empty")
- }
- if !i.descriptor.donated && i.sattr.Type == fs.RegularFile {
- uattr, err := i.unstableAttr(context.Background())
- if err != nil {
- panic(fs.ErrSaveRejection{fmt.Errorf("failed to get unstable atttribute of %s: %v", i.mops.inodeMappings[i.sattr.InodeID], err)})
- }
- i.savedUAttr = &uattr
- }
-}
-
// afterLoad is invoked by stateify.
func (i *inodeFileState) afterLoad() {
// Initialize the descriptor value.
- if err := i.descriptor.initAfterLoad(i.mops, i.sattr.InodeID, &i.queue); err != nil {
+ if err := i.descriptor.initAfterLoad(i.sattr.InodeID, &i.queue); err != nil {
panic(fmt.Sprintf("failed to load value of descriptor: %v", err))
}
@@ -61,19 +46,4 @@ func (i *inodeFileState) afterLoad() {
// change across save and restore, error out.
panic(fs.ErrCorruption{fmt.Errorf("host %s conflict in host device mappings: %s", key, hostFileDevice)})
}
-
- if !i.descriptor.donated && i.sattr.Type == fs.RegularFile {
- env, ok := fs.CurrentRestoreEnvironment()
- if !ok {
- panic("missing restore environment")
- }
- uattr := unstableAttr(i.mops, &s)
- if env.ValidateFileSize && uattr.Size != i.savedUAttr.Size {
- panic(fs.ErrCorruption{fmt.Errorf("file size has changed for %s: previously %d, now %d", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.Size, uattr.Size)})
- }
- if env.ValidateFileTimestamp && uattr.ModificationTime != i.savedUAttr.ModificationTime {
- panic(fs.ErrCorruption{fmt.Errorf("file modification time has changed for %s: previously %v, now %v", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.ModificationTime, uattr.ModificationTime)})
- }
- i.savedUAttr = nil
- }
}
diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go
index 7221bc825..4c374681c 100644
--- a/pkg/sentry/fs/host/inode_test.go
+++ b/pkg/sentry/fs/host/inode_test.go
@@ -15,9 +15,6 @@
package host
import (
- "io/ioutil"
- "os"
- "path"
"syscall"
"testing"
@@ -25,69 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
)
-// TestMultipleReaddir verifies that multiple Readdir calls return the same
-// thing if they use different dir contexts.
-func TestMultipleReaddir(t *testing.T) {
- p, err := ioutil.TempDir("", "readdir")
- if err != nil {
- t.Fatalf("Failed to create test dir: %v", err)
- }
- defer os.RemoveAll(p)
-
- f, err := os.Create(path.Join(p, "a.txt"))
- if err != nil {
- t.Fatalf("Failed to create a.txt: %v", err)
- }
- f.Close()
-
- f, err = os.Create(path.Join(p, "b.txt"))
- if err != nil {
- t.Fatalf("Failed to create b.txt: %v", err)
- }
- f.Close()
-
- fd, err := open(nil, p)
- if err != nil {
- t.Fatalf("Failed to open %q: %v", p, err)
- }
- ctx := contexttest.Context(t)
- n, err := newInode(ctx, newMountSource(ctx, p, fs.RootOwner, &Filesystem{}, fs.MountSourceFlags{}, false), fd, false, false)
- if err != nil {
- t.Fatalf("Failed to create inode: %v", err)
- }
-
- dirent := fs.NewDirent(ctx, n, "readdir")
- openFile, err := n.GetFile(ctx, dirent, fs.FileFlags{Read: true})
- if err != nil {
- t.Fatalf("Failed to get file: %v", err)
- }
- defer openFile.DecRef()
-
- c1 := &fs.DirCtx{DirCursor: new(string)}
- if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c1, 0); err != nil {
- t.Fatalf("First Readdir failed: %v", err)
- }
-
- c2 := &fs.DirCtx{DirCursor: new(string)}
- if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c2, 0); err != nil {
- t.Errorf("Second Readdir failed: %v", err)
- }
-
- if _, ok := c1.DentAttrs()["a.txt"]; !ok {
- t.Errorf("want a.txt in first Readdir, got %v", c1.DentAttrs())
- }
- if _, ok := c1.DentAttrs()["b.txt"]; !ok {
- t.Errorf("want b.txt in first Readdir, got %v", c1.DentAttrs())
- }
-
- if _, ok := c2.DentAttrs()["a.txt"]; !ok {
- t.Errorf("want a.txt in second Readdir, got %v", c2.DentAttrs())
- }
- if _, ok := c2.DentAttrs()["b.txt"]; !ok {
- t.Errorf("want b.txt in second Readdir, got %v", c2.DentAttrs())
- }
-}
-
// TestCloseFD verifies fds will be closed.
func TestCloseFD(t *testing.T) {
var p [2]int
diff --git a/pkg/sentry/fs/host/ioctl_unsafe.go b/pkg/sentry/fs/host/ioctl_unsafe.go
index 271582e54..150ac8e19 100644
--- a/pkg/sentry/fs/host/ioctl_unsafe.go
+++ b/pkg/sentry/fs/host/ioctl_unsafe.go
@@ -21,6 +21,8 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
)
+// LINT.IfChange
+
func ioctlGetTermios(fd int) (*linux.Termios, error) {
var t linux.Termios
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TCGETS, uintptr(unsafe.Pointer(&t)))
@@ -54,3 +56,5 @@ func ioctlSetWinsize(fd int, w *linux.Winsize) error {
}
return nil
}
+
+// LINT.ThenChange(../../fsimpl/host/ioctl_unsafe.go)
diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go
index 3f218b4a7..cb91355ab 100644
--- a/pkg/sentry/fs/host/tty.go
+++ b/pkg/sentry/fs/host/tty.go
@@ -26,6 +26,8 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
+// LINT.IfChange
+
// TTYFileOperations implements fs.FileOperations for a host file descriptor
// that wraps a TTY FD.
//
@@ -43,6 +45,7 @@ type TTYFileOperations struct {
// connected to this TTY.
fgProcessGroup *kernel.ProcessGroup
+ // termios contains the terminal attributes for this TTY.
termios linux.KernelTermios
}
@@ -357,3 +360,5 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e
_ = pg.SendSignal(kernel.SignalInfoPriv(sig))
return kernel.ERESTARTSYS
}
+
+// LINT.ThenChange(../../fsimpl/host/tty.go)
diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go
index 7c60dc1db..1b0356930 100644
--- a/pkg/sentry/fs/host/util.go
+++ b/pkg/sentry/fs/host/util.go
@@ -16,7 +16,6 @@ package host
import (
"os"
- "path"
"syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -24,49 +23,10 @@ import (
"gvisor.dev/gvisor/pkg/sentry/device"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
"gvisor.dev/gvisor/pkg/syserror"
)
-func open(parent *inodeOperations, name string) (int, error) {
- if parent == nil && !path.IsAbs(name) {
- return -1, syserror.EINVAL
- }
- name = path.Clean(name)
-
- // Don't follow through symlinks.
- flags := syscall.O_NOFOLLOW
-
- if fd, err := openAt(parent, name, flags|syscall.O_RDWR, 0); err == nil {
- return fd, nil
- }
- // Retry as read-only.
- if fd, err := openAt(parent, name, flags|syscall.O_RDONLY, 0); err == nil {
- return fd, nil
- }
-
- // Retry as write-only.
- if fd, err := openAt(parent, name, flags|syscall.O_WRONLY, 0); err == nil {
- return fd, nil
- }
-
- // Retry as a symlink, by including O_PATH as an option.
- fd, err := openAt(parent, name, linux.O_PATH|flags, 0)
- if err == nil {
- return fd, nil
- }
-
- // Everything failed.
- return -1, err
-}
-
-func openAt(parent *inodeOperations, name string, flags int, perm linux.FileMode) (int, error) {
- if parent == nil {
- return syscall.Open(name, flags, uint32(perm))
- }
- return syscall.Openat(parent.fileState.FD(), name, flags, uint32(perm))
-}
-
func nodeType(s *syscall.Stat_t) fs.InodeType {
switch x := (s.Mode & syscall.S_IFMT); x {
case syscall.S_IFLNK:
@@ -107,54 +67,22 @@ func stableAttr(s *syscall.Stat_t) fs.StableAttr {
}
}
-func owner(mo *superOperations, s *syscall.Stat_t) fs.FileOwner {
- // User requested no translation, just return actual owner.
- if mo.dontTranslateOwnership {
- return fs.FileOwner{auth.KUID(s.Uid), auth.KGID(s.Gid)}
- }
-
- // Show only IDs relevant to the sandboxed task. I.e. if we not own the
- // file, no sandboxed task can own the file. In that case, we
- // use OverflowID for UID, implying that the IDs are not mapped in the
- // "root" user namespace.
- //
- // E.g.
- // sandbox's host EUID/EGID is 1/1.
- // some_dir's host UID/GID is 2/1.
- // Task that mounted this fs has virtualized EUID/EGID 5/5.
- //
- // If you executed `ls -n` in the sandboxed task, it would show:
- // drwxwrxwrx [...] 65534 5 [...] some_dir
-
- // Files are owned by OverflowID by default.
- owner := fs.FileOwner{auth.KUID(auth.OverflowUID), auth.KGID(auth.OverflowGID)}
-
- // If we own file on host, let mounting task's initial EUID own
- // the file.
- if s.Uid == hostUID {
- owner.UID = mo.mounter.UID
- }
-
- // If our group matches file's group, make file's group match
- // the mounting task's initial EGID.
- for _, gid := range hostGIDs {
- if s.Gid == gid {
- owner.GID = mo.mounter.GID
- break
- }
+func owner(s *syscall.Stat_t) fs.FileOwner {
+ return fs.FileOwner{
+ UID: auth.KUID(s.Uid),
+ GID: auth.KGID(s.Gid),
}
- return owner
}
-func unstableAttr(mo *superOperations, s *syscall.Stat_t) fs.UnstableAttr {
+func unstableAttr(s *syscall.Stat_t) fs.UnstableAttr {
return fs.UnstableAttr{
Size: s.Size,
Usage: s.Blocks * 512,
Perms: fs.FilePermsFromMode(linux.FileMode(s.Mode)),
- Owner: owner(mo, s),
- AccessTime: time.FromUnix(s.Atim.Sec, s.Atim.Nsec),
- ModificationTime: time.FromUnix(s.Mtim.Sec, s.Mtim.Nsec),
- StatusChangeTime: time.FromUnix(s.Ctim.Sec, s.Ctim.Nsec),
+ Owner: owner(s),
+ AccessTime: ktime.FromUnix(s.Atim.Sec, s.Atim.Nsec),
+ ModificationTime: ktime.FromUnix(s.Mtim.Sec, s.Mtim.Nsec),
+ StatusChangeTime: ktime.FromUnix(s.Ctim.Sec, s.Ctim.Nsec),
Links: uint64(s.Nlink),
}
}
diff --git a/pkg/sentry/fs/host/util_unsafe.go b/pkg/sentry/fs/host/util_unsafe.go
index 3ab36b088..23bd35d64 100644
--- a/pkg/sentry/fs/host/util_unsafe.go
+++ b/pkg/sentry/fs/host/util_unsafe.go
@@ -26,26 +26,6 @@ import (
// NulByte is a single NUL byte. It is passed to readlinkat as an empty string.
var NulByte byte = '\x00'
-func createLink(fd int, name string, linkName string) error {
- namePtr, err := syscall.BytePtrFromString(name)
- if err != nil {
- return err
- }
- linkNamePtr, err := syscall.BytePtrFromString(linkName)
- if err != nil {
- return err
- }
- _, _, errno := syscall.Syscall(
- syscall.SYS_SYMLINKAT,
- uintptr(unsafe.Pointer(namePtr)),
- uintptr(fd),
- uintptr(unsafe.Pointer(linkNamePtr)))
- if errno != 0 {
- return errno
- }
- return nil
-}
-
func readLink(fd int) (string, error) {
// Buffer sizing copied from os.Readlink.
for l := 128; ; l *= 2 {
@@ -66,27 +46,6 @@ func readLink(fd int) (string, error) {
}
}
-func unlinkAt(fd int, name string, dir bool) error {
- namePtr, err := syscall.BytePtrFromString(name)
- if err != nil {
- return err
- }
- var flags uintptr
- if dir {
- flags = linux.AT_REMOVEDIR
- }
- _, _, errno := syscall.Syscall(
- syscall.SYS_UNLINKAT,
- uintptr(fd),
- uintptr(unsafe.Pointer(namePtr)),
- flags,
- )
- if errno != 0 {
- return errno
- }
- return nil
-}
-
func timespecFromTimestamp(t ktime.Time, omit, setSysTime bool) syscall.Timespec {
if omit {
return syscall.Timespec{0, linux.UTIME_OMIT}
diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go
index c7981f66e..b414ddaee 100644
--- a/pkg/sentry/fs/mounts.go
+++ b/pkg/sentry/fs/mounts.go
@@ -273,19 +273,6 @@ func (mns *MountNamespace) DecRef() {
mns.DecRefWithDestructor(mns.destroy)
}
-// Freeze freezes the entire mount tree.
-func (mns *MountNamespace) Freeze() {
- mns.mu.Lock()
- defer mns.mu.Unlock()
-
- // We only want to freeze Dirents with active references, not Dirents referenced
- // by a mount's MountSource.
- mns.flushMountSourceRefsLocked()
-
- // Freeze the entire shebang.
- mns.root.Freeze()
-}
-
// withMountLocked prevents further walks to `node`, because `node` is about to
// be a mount point.
func (mns *MountNamespace) withMountLocked(node *Dirent, fn func() error) error {
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
index 03cc788c8..d6c5dd2c1 100644
--- a/pkg/sentry/fs/proc/task.go
+++ b/pkg/sentry/fs/proc/task.go
@@ -853,15 +853,15 @@ func (o *oomScoreAdj) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.F
// Read implements fs.FileOperations.Read.
func (f *oomScoreAdjFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
- if offset != 0 {
- return 0, io.EOF
+ if f.t.ExitState() == kernel.TaskExitDead {
+ return 0, syserror.ESRCH
}
- adj, err := f.t.OOMScoreAdj()
- if err != nil {
- return 0, err
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "%d\n", f.t.OOMScoreAdj())
+ if offset >= int64(buf.Len()) {
+ return 0, io.EOF
}
- adjBytes := []byte(strconv.FormatInt(int64(adj), 10) + "\n")
- n, err := dst.CopyOut(ctx, adjBytes)
+ n, err := dst.CopyOut(ctx, buf.Bytes()[offset:])
return int64(n), err
}
@@ -880,6 +880,9 @@ func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOS
return 0, err
}
+ if f.t.ExitState() == kernel.TaskExitDead {
+ return 0, syserror.ESRCH
+ }
if err := f.t.SetOOMScoreAdj(v); err != nil {
return 0, err
}