diff options
Diffstat (limited to 'pkg/sentry/fs')
-rw-r--r-- | pkg/sentry/fs/dirent.go | 133 | ||||
-rw-r--r-- | pkg/sentry/fs/file_overlay_test.go | 84 | ||||
-rw-r--r-- | pkg/sentry/fs/host/BUILD | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/host/control.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/host/descriptor.go | 37 | ||||
-rw-r--r-- | pkg/sentry/fs/host/descriptor_state.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fs/host/descriptor_test.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/host/file.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/host/fs.go | 339 | ||||
-rw-r--r-- | pkg/sentry/fs/host/fs_test.go | 380 | ||||
-rw-r--r-- | pkg/sentry/fs/host/host.go | 59 | ||||
-rw-r--r-- | pkg/sentry/fs/host/inode.go | 141 | ||||
-rw-r--r-- | pkg/sentry/fs/host/inode_state.go | 32 | ||||
-rw-r--r-- | pkg/sentry/fs/host/inode_test.go | 66 | ||||
-rw-r--r-- | pkg/sentry/fs/host/ioctl_unsafe.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fs/host/tty.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fs/host/util.go | 92 | ||||
-rw-r--r-- | pkg/sentry/fs/host/util_unsafe.go | 41 | ||||
-rw-r--r-- | pkg/sentry/fs/mounts.go | 13 | ||||
-rw-r--r-- | pkg/sentry/fs/proc/task.go | 17 |
20 files changed, 120 insertions, 1339 deletions
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go index e0b32e1c1..0266a5287 100644 --- a/pkg/sentry/fs/dirent.go +++ b/pkg/sentry/fs/dirent.go @@ -17,7 +17,6 @@ package fs import ( "fmt" "path" - "sort" "sync/atomic" "syscall" @@ -121,9 +120,6 @@ type Dirent struct { // deleted may be set atomically when removed. deleted int32 - // frozen indicates this entry can't walk to unknown nodes. - frozen bool - // mounted is true if Dirent is a mount point, similar to include/linux/dcache.h:DCACHE_MOUNTED. mounted bool @@ -253,8 +249,7 @@ func (d *Dirent) IsNegative() bool { return d.Inode == nil } -// hashChild will hash child into the children list of its new parent d, carrying over -// any "frozen" state from d. +// hashChild will hash child into the children list of its new parent d. // // Returns (*WeakRef, true) if hashing child caused a Dirent to be unhashed. The caller must // validate the returned unhashed weak reference. Common cases: @@ -282,9 +277,6 @@ func (d *Dirent) hashChild(child *Dirent) (*refs.WeakRef, bool) { d.IncRef() } - // Carry over parent's frozen state. - child.frozen = d.frozen - return d.hashChildParentSet(child) } @@ -400,38 +392,6 @@ func (d *Dirent) MountRoot() *Dirent { return mountRoot } -// Freeze prevents this dirent from walking to more nodes. Freeze is applied -// recursively to all children. -// -// If this particular Dirent represents a Virtual node, then Walks and Creates -// may proceed as before. -// -// Freeze can only be called before the application starts running, otherwise -// the root it might be out of sync with the application root if modified by -// sys_chroot. -func (d *Dirent) Freeze() { - d.mu.Lock() - defer d.mu.Unlock() - if d.frozen { - // Already frozen. - return - } - d.frozen = true - - // Take a reference when freezing. - for _, w := range d.children { - if child := w.Get(); child != nil { - // NOTE: We would normally drop the reference here. But - // instead we're hanging on to it. - ch := child.(*Dirent) - ch.Freeze() - } - } - - // Drop all expired weak references. - d.flush() -} - // descendantOf returns true if the receiver dirent is equal to, or a // descendant of, the argument dirent. // @@ -524,11 +484,6 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl w.Drop() } - // Are we allowed to do the lookup? - if d.frozen && !d.Inode.IsVirtual() { - return nil, syscall.ENOENT - } - // Slow path: load the InodeOperations into memory. Since this is a hot path and the lookup may be // expensive, if possible release the lock and re-acquire it. if walkMayUnlock { @@ -659,11 +614,6 @@ func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags Fi return nil, syscall.EEXIST } - // Are we frozen? - if d.frozen && !d.Inode.IsVirtual() { - return nil, syscall.ENOENT - } - // Try the create. We need to trust the file system to return EEXIST (or something // that will translate to EEXIST) if name already exists. file, err := d.Inode.Create(ctx, d, name, flags, perms) @@ -727,11 +677,6 @@ func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, c return syscall.EEXIST } - // Are we frozen? - if d.frozen && !d.Inode.IsVirtual() { - return syscall.ENOENT - } - // Remove any negative Dirent. We've already asserted above with d.exists // that the only thing remaining here can be a negative Dirent. if w, ok := d.children[name]; ok { @@ -862,49 +807,6 @@ func (d *Dirent) GetDotAttrs(root *Dirent) (DentAttr, DentAttr) { return dot, dot } -// readdirFrozen returns readdir results based solely on the frozen children. -func (d *Dirent) readdirFrozen(root *Dirent, offset int64, dirCtx *DirCtx) (int64, error) { - // Collect attrs for "." and "..". - attrs := make(map[string]DentAttr) - names := []string{".", ".."} - attrs["."], attrs[".."] = d.GetDotAttrs(root) - - // Get info from all children. - d.mu.Lock() - defer d.mu.Unlock() - for name, w := range d.children { - if child := w.Get(); child != nil { - defer child.DecRef() - - // Skip negative children. - if child.(*Dirent).IsNegative() { - continue - } - - sattr := child.(*Dirent).Inode.StableAttr - attrs[name] = DentAttr{ - Type: sattr.Type, - InodeID: sattr.InodeID, - } - names = append(names, name) - } - } - - sort.Strings(names) - - if int(offset) >= len(names) { - return offset, nil - } - names = names[int(offset):] - for _, name := range names { - if err := dirCtx.DirEmit(name, attrs[name]); err != nil { - return offset, err - } - offset++ - } - return offset, nil -} - // DirIterator is an open directory containing directory entries that can be read. type DirIterator interface { // IterateDir emits directory entries by calling dirCtx.EmitDir, beginning @@ -964,10 +866,6 @@ func direntReaddir(ctx context.Context, d *Dirent, it DirIterator, root *Dirent, return offset, nil } - if d.frozen { - return d.readdirFrozen(root, offset, dirCtx) - } - // Collect attrs for "." and "..". dot, dotdot := d.GetDotAttrs(root) @@ -1068,11 +966,6 @@ func (d *Dirent) mount(ctx context.Context, inode *Inode) (newChild *Dirent, err return nil, syserror.EINVAL } - // Are we frozen? - if d.parent.frozen && !d.parent.Inode.IsVirtual() { - return nil, syserror.ENOENT - } - // Dirent that'll replace d. // // Note that NewDirent returns with one reference taken; the reference @@ -1101,11 +994,6 @@ func (d *Dirent) unmount(ctx context.Context, replacement *Dirent) error { return syserror.ENOENT } - // Are we frozen? - if d.parent.frozen && !d.parent.Inode.IsVirtual() { - return syserror.ENOENT - } - // Remount our former child in its place. // // As replacement used to be our child, it must already have the right @@ -1135,11 +1023,6 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath unlock := d.lockDirectory() defer unlock() - // Are we frozen? - if d.frozen && !d.Inode.IsVirtual() { - return syscall.ENOENT - } - // Try to walk to the node. child, err := d.walk(ctx, root, name, false /* may unlock */) if err != nil { @@ -1201,11 +1084,6 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string) unlock := d.lockDirectory() defer unlock() - // Are we frozen? - if d.frozen && !d.Inode.IsVirtual() { - return syscall.ENOENT - } - // Check for dots. if name == "." { // Rejected as the last component by rmdir(2). @@ -1519,15 +1397,6 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string return err } - // Are we frozen? - // TODO(jamieliu): Is this the right errno? - if oldParent.frozen && !oldParent.Inode.IsVirtual() { - return syscall.ENOENT - } - if newParent.frozen && !newParent.Inode.IsVirtual() { - return syscall.ENOENT - } - // Do we have general permission to remove from oldParent and // create/replace in newParent? if err := oldParent.Inode.CheckPermission(ctx, PermMask{Write: true, Execute: true}); err != nil { diff --git a/pkg/sentry/fs/file_overlay_test.go b/pkg/sentry/fs/file_overlay_test.go index a76d87e3a..1971cc680 100644 --- a/pkg/sentry/fs/file_overlay_test.go +++ b/pkg/sentry/fs/file_overlay_test.go @@ -175,90 +175,6 @@ func TestReaddirRevalidation(t *testing.T) { } } -// TestReaddirOverlayFrozen tests that calling Readdir on an overlay file with -// a frozen dirent tree does not make Readdir calls to the underlying files. -// This is a regression test for b/114808269. -func TestReaddirOverlayFrozen(t *testing.T) { - ctx := contexttest.Context(t) - - // Create an overlay with two directories, each with two files. - upper := newTestRamfsDir(ctx, []dirContent{{name: "upper-file1"}, {name: "upper-file2"}}, nil) - lower := newTestRamfsDir(ctx, []dirContent{{name: "lower-file1"}, {name: "lower-file2"}}, nil) - overlayInode := fs.NewTestOverlayDir(ctx, upper, lower, false) - - // Set that overlay as the root. - root := fs.NewDirent(ctx, overlayInode, "root") - ctx = &rootContext{ - Context: ctx, - root: root, - } - - // Check that calling Readdir on the root now returns all 4 files (2 - // from each layer in the overlay). - rootFile, err := root.Inode.GetFile(ctx, root, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("root.Inode.GetFile failed: %v", err) - } - defer rootFile.DecRef() - ser := &fs.CollectEntriesSerializer{} - if err := rootFile.Readdir(ctx, ser); err != nil { - t.Fatalf("rootFile.Readdir failed: %v", err) - } - if got, want := ser.Order, []string{".", "..", "lower-file1", "lower-file2", "upper-file1", "upper-file2"}; !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } - - // Readdir should have been called on upper and lower. - upperDir := upper.InodeOperations.(*dir) - lowerDir := lower.InodeOperations.(*dir) - if !upperDir.ReaddirCalled { - t.Errorf("upperDir.ReaddirCalled got %v, want true", upperDir.ReaddirCalled) - } - if !lowerDir.ReaddirCalled { - t.Errorf("lowerDir.ReaddirCalled got %v, want true", lowerDir.ReaddirCalled) - } - - // Reset. - upperDir.ReaddirCalled = false - lowerDir.ReaddirCalled = false - - // Take references on "upper-file1" and "lower-file1", pinning them in - // the dirent tree. - for _, name := range []string{"upper-file1", "lower-file1"} { - if _, err := root.Walk(ctx, root, name); err != nil { - t.Fatalf("root.Walk(%q) failed: %v", name, err) - } - // Don't drop a reference on the returned dirent so that it - // will stay in the tree. - } - - // Freeze the dirent tree. - root.Freeze() - - // Seek back to the beginning of the file. - if _, err := rootFile.Seek(ctx, fs.SeekSet, 0); err != nil { - t.Fatalf("error seeking to beginning of directory: %v", err) - } - - // Calling Readdir on the root now will return only the pinned - // children. - ser = &fs.CollectEntriesSerializer{} - if err := rootFile.Readdir(ctx, ser); err != nil { - t.Fatalf("rootFile.Readdir failed: %v", err) - } - if got, want := ser.Order, []string{".", "..", "lower-file1", "upper-file1"}; !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got names %v, want %v", got, want) - } - - // Readdir should NOT have been called on upper or lower. - if upperDir.ReaddirCalled { - t.Errorf("upperDir.ReaddirCalled got %v, want false", upperDir.ReaddirCalled) - } - if lowerDir.ReaddirCalled { - t.Errorf("lowerDir.ReaddirCalled got %v, want false", lowerDir.ReaddirCalled) - } -} - type rootContext struct { context.Context root *fs.Dirent diff --git a/pkg/sentry/fs/host/BUILD b/pkg/sentry/fs/host/BUILD index 21003ea45..011625c80 100644 --- a/pkg/sentry/fs/host/BUILD +++ b/pkg/sentry/fs/host/BUILD @@ -10,7 +10,7 @@ go_library( "descriptor_state.go", "device.go", "file.go", - "fs.go", + "host.go", "inode.go", "inode_state.go", "ioctl_unsafe.go", @@ -62,14 +62,12 @@ go_test( size = "small", srcs = [ "descriptor_test.go", - "fs_test.go", "inode_test.go", "socket_test.go", "wait_test.go", ], library = ":host", deps = [ - "//pkg/context", "//pkg/fd", "//pkg/fdnotifier", "//pkg/sentry/contexttest", diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go index 1658979fc..cd84e1337 100644 --- a/pkg/sentry/fs/host/control.go +++ b/pkg/sentry/fs/host/control.go @@ -32,6 +32,8 @@ func newSCMRights(fds []int) control.SCMRights { } // Files implements control.SCMRights.Files. +// +// TODO(gvisor.dev/issue/2017): Port to VFS2. func (c *scmRights) Files(ctx context.Context, max int) (control.RightsFiles, bool) { n := max var trunc bool diff --git a/pkg/sentry/fs/host/descriptor.go b/pkg/sentry/fs/host/descriptor.go index 2a4d1b291..cfdce6a74 100644 --- a/pkg/sentry/fs/host/descriptor.go +++ b/pkg/sentry/fs/host/descriptor.go @@ -16,7 +16,6 @@ package host import ( "fmt" - "path" "syscall" "gvisor.dev/gvisor/pkg/fdnotifier" @@ -28,12 +27,9 @@ import ( // // +stateify savable type descriptor struct { - // donated is true if the host fd was donated by another process. - donated bool - // If origFD >= 0, it is the host fd that this file was originally created // from, which must be available at time of restore. The FD can be closed - // after descriptor is created. Only set if donated is true. + // after descriptor is created. origFD int // wouldBlock is true if value (below) points to a file that can @@ -41,15 +37,13 @@ type descriptor struct { wouldBlock bool // value is the wrapped host fd. It is never saved or restored - // directly. How it is restored depends on whether it was - // donated and the fs.MountSource it was originally - // opened/created from. + // directly. value int `state:"nosave"` } // newDescriptor returns a wrapped host file descriptor. On success, // the descriptor is registered for event notifications with queue. -func newDescriptor(fd int, donated bool, saveable bool, wouldBlock bool, queue *waiter.Queue) (*descriptor, error) { +func newDescriptor(fd int, saveable bool, wouldBlock bool, queue *waiter.Queue) (*descriptor, error) { ownedFD := fd origFD := -1 if saveable { @@ -69,7 +63,6 @@ func newDescriptor(fd int, donated bool, saveable bool, wouldBlock bool, queue * } } return &descriptor{ - donated: donated, origFD: origFD, wouldBlock: wouldBlock, value: ownedFD, @@ -77,25 +70,11 @@ func newDescriptor(fd int, donated bool, saveable bool, wouldBlock bool, queue * } // initAfterLoad initializes the value of the descriptor after Load. -func (d *descriptor) initAfterLoad(mo *superOperations, id uint64, queue *waiter.Queue) error { - if d.donated { - var err error - d.value, err = syscall.Dup(d.origFD) - if err != nil { - return fmt.Errorf("failed to dup restored fd %d: %v", d.origFD, err) - } - } else { - name, ok := mo.inodeMappings[id] - if !ok { - return fmt.Errorf("failed to find path for inode number %d", id) - } - fullpath := path.Join(mo.root, name) - - var err error - d.value, err = open(nil, fullpath) - if err != nil { - return fmt.Errorf("failed to open %q: %v", fullpath, err) - } +func (d *descriptor) initAfterLoad(id uint64, queue *waiter.Queue) error { + var err error + d.value, err = syscall.Dup(d.origFD) + if err != nil { + return fmt.Errorf("failed to dup restored fd %d: %v", d.origFD, err) } if d.wouldBlock { if err := syscall.SetNonblock(d.value, true); err != nil { diff --git a/pkg/sentry/fs/host/descriptor_state.go b/pkg/sentry/fs/host/descriptor_state.go index 8167390a9..e880582ab 100644 --- a/pkg/sentry/fs/host/descriptor_state.go +++ b/pkg/sentry/fs/host/descriptor_state.go @@ -16,7 +16,7 @@ package host // beforeSave is invoked by stateify. func (d *descriptor) beforeSave() { - if d.donated && d.origFD < 0 { + if d.origFD < 0 { panic("donated file descriptor cannot be saved") } } diff --git a/pkg/sentry/fs/host/descriptor_test.go b/pkg/sentry/fs/host/descriptor_test.go index 4205981f5..d8e4605b6 100644 --- a/pkg/sentry/fs/host/descriptor_test.go +++ b/pkg/sentry/fs/host/descriptor_test.go @@ -47,10 +47,10 @@ func TestDescriptorRelease(t *testing.T) { // FD ownership is transferred to the descritor. queue := &waiter.Queue{} - d, err := newDescriptor(fd, false /* donated*/, tc.saveable, tc.wouldBlock, queue) + d, err := newDescriptor(fd, tc.saveable, tc.wouldBlock, queue) if err != nil { syscall.Close(fd) - t.Fatalf("newDescriptor(%d, %t, false, %t, queue) failed, err: %v", fd, tc.saveable, tc.wouldBlock, err) + t.Fatalf("newDescriptor(%d, %t, %t, queue) failed, err: %v", fd, tc.saveable, tc.wouldBlock, err) } if tc.saveable { if d.origFD < 0 { diff --git a/pkg/sentry/fs/host/file.go b/pkg/sentry/fs/host/file.go index e08f56d04..034862694 100644 --- a/pkg/sentry/fs/host/file.go +++ b/pkg/sentry/fs/host/file.go @@ -101,8 +101,8 @@ func newFileFromDonatedFD(ctx context.Context, donated int, mounter fs.FileOwner }) return s, nil default: - msrc := newMountSource(ctx, "/", mounter, &Filesystem{}, fs.MountSourceFlags{}, false /* dontTranslateOwnership */) - inode, err := newInode(ctx, msrc, donated, saveable, true /* donated */) + msrc := fs.NewNonCachingMountSource(ctx, &filesystem{}, fs.MountSourceFlags{}) + inode, err := newInode(ctx, msrc, donated, saveable) if err != nil { return nil, err } diff --git a/pkg/sentry/fs/host/fs.go b/pkg/sentry/fs/host/fs.go deleted file mode 100644 index d3e8e3a36..000000000 --- a/pkg/sentry/fs/host/fs.go +++ /dev/null @@ -1,339 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package host implements an fs.Filesystem for files backed by host -// file descriptors. -package host - -import ( - "fmt" - "path" - "path/filepath" - "strconv" - "strings" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// FilesystemName is the name under which Filesystem is registered. -const FilesystemName = "whitelistfs" - -const ( - // whitelistKey is the mount option containing a comma-separated list - // of host paths to whitelist. - whitelistKey = "whitelist" - - // rootPathKey is the mount option containing the root path of the - // mount. - rootPathKey = "root" - - // dontTranslateOwnershipKey is the key to superOperations.dontTranslateOwnership. - dontTranslateOwnershipKey = "dont_translate_ownership" -) - -// maxTraversals determines link traversals in building the whitelist. -const maxTraversals = 10 - -// Filesystem is a pseudo file system that is only available during the setup -// to lock down the configurations. This filesystem should only be mounted at root. -// -// Think twice before exposing this to applications. -// -// +stateify savable -type Filesystem struct { - // whitelist is a set of host paths to whitelist. - paths []string -} - -var _ fs.Filesystem = (*Filesystem)(nil) - -// Name is the identifier of this file system. -func (*Filesystem) Name() string { - return FilesystemName -} - -// AllowUserMount prohibits users from using mount(2) with this file system. -func (*Filesystem) AllowUserMount() bool { - return false -} - -// AllowUserList allows this filesystem to be listed in /proc/filesystems. -func (*Filesystem) AllowUserList() bool { - return true -} - -// Flags returns that there is nothing special about this file system. -func (*Filesystem) Flags() fs.FilesystemFlags { - return 0 -} - -// Mount returns an fs.Inode exposing the host file system. It is intended to be locked -// down in PreExec below. -func (f *Filesystem) Mount(ctx context.Context, _ string, flags fs.MountSourceFlags, data string, _ interface{}) (*fs.Inode, error) { - // Parse generic comma-separated key=value options. - options := fs.GenericMountSourceOptions(data) - - // Grab the whitelist if one was specified. - // TODO(edahlgren/mpratt/hzy): require another option "testonly" in order to allow - // no whitelist. - if wl, ok := options[whitelistKey]; ok { - f.paths = strings.Split(wl, "|") - delete(options, whitelistKey) - } - - // If the rootPath was set, use it. Othewise default to the root of the - // host fs. - rootPath := "/" - if rp, ok := options[rootPathKey]; ok { - rootPath = rp - delete(options, rootPathKey) - - // We must relativize the whitelisted paths to the new root. - for i, p := range f.paths { - rel, err := filepath.Rel(rootPath, p) - if err != nil { - return nil, fmt.Errorf("whitelist path %q must be a child of root path %q", p, rootPath) - } - f.paths[i] = path.Join("/", rel) - } - } - fd, err := open(nil, rootPath) - if err != nil { - return nil, fmt.Errorf("failed to find root: %v", err) - } - - var dontTranslateOwnership bool - if v, ok := options[dontTranslateOwnershipKey]; ok { - b, err := strconv.ParseBool(v) - if err != nil { - return nil, fmt.Errorf("invalid value for %q: %v", dontTranslateOwnershipKey, err) - } - dontTranslateOwnership = b - delete(options, dontTranslateOwnershipKey) - } - - // Fail if the caller passed us more options than we know about. - if len(options) > 0 { - return nil, fmt.Errorf("unsupported mount options: %v", options) - } - - // The mounting EUID/EGID will be cached by this file system. This will - // be used to assign ownership to files that we own. - owner := fs.FileOwnerFromContext(ctx) - - // Construct the host file system mount and inode. - msrc := newMountSource(ctx, rootPath, owner, f, flags, dontTranslateOwnership) - return newInode(ctx, msrc, fd, false /* saveable */, false /* donated */) -} - -// InstallWhitelist locks down the MountNamespace to only the currently installed -// Dirents and the given paths. -func (f *Filesystem) InstallWhitelist(ctx context.Context, m *fs.MountNamespace) error { - return installWhitelist(ctx, m, f.paths) -} - -func installWhitelist(ctx context.Context, m *fs.MountNamespace, paths []string) error { - if len(paths) == 0 || (len(paths) == 1 && paths[0] == "") { - // Warning will be logged during filter installation if the empty - // whitelist matters (allows for host file access). - return nil - } - - // Done tracks entries already added. - done := make(map[string]bool) - root := m.Root() - defer root.DecRef() - - for i := 0; i < len(paths); i++ { - // Make sure the path is absolute. This is a sanity check. - if !path.IsAbs(paths[i]) { - return fmt.Errorf("path %q is not absolute", paths[i]) - } - - // We need to add all the intermediate paths, in case one of - // them is a symlink that needs to be resolved. - for j := 1; j <= len(paths[i]); j++ { - if j < len(paths[i]) && paths[i][j] != '/' { - continue - } - current := paths[i][:j] - - // Lookup the given component in the tree. - remainingTraversals := uint(maxTraversals) - d, err := m.FindLink(ctx, root, nil, current, &remainingTraversals) - if err != nil { - log.Warningf("populate failed for %q: %v", current, err) - continue - } - - // It's critical that this DecRef happens after the - // freeze below. This ensures that the dentry is in - // place to be frozen. Otherwise, we freeze without - // these entries. - defer d.DecRef() - - // Expand the last component if necessary. - if current == paths[i] { - // Is it a directory or symlink? - sattr := d.Inode.StableAttr - if fs.IsDir(sattr) { - for name := range childDentAttrs(ctx, d) { - paths = append(paths, path.Join(current, name)) - } - } - if fs.IsSymlink(sattr) { - // Only expand symlinks once. The - // folder structure may contain - // recursive symlinks and we don't want - // to end up infinitely expanding this - // symlink. This is safe because this - // is the last component. If a later - // path wants to symlink something - // beneath this symlink that will still - // be handled by the FindLink above. - if done[current] { - continue - } - - s, err := d.Inode.Readlink(ctx) - if err != nil { - log.Warningf("readlink failed for %q: %v", current, err) - continue - } - if path.IsAbs(s) { - paths = append(paths, s) - } else { - target := path.Join(path.Dir(current), s) - paths = append(paths, target) - } - } - } - - // Only report this one once even though we may look - // it up more than once. If we whitelist /a/b,/a then - // /a will be "done" when it is looked up for /a/b, - // however we still need to expand all of its contents - // when whitelisting /a. - if !done[current] { - log.Debugf("whitelisted: %s", current) - } - done[current] = true - } - } - - // Freeze the mount tree in place. This prevents any new paths from - // being opened and any old ones from being removed. If we do provide - // tmpfs mounts, we'll want to freeze/thaw those separately. - m.Freeze() - return nil -} - -func childDentAttrs(ctx context.Context, d *fs.Dirent) map[string]fs.DentAttr { - dirname, _ := d.FullName(nil /* root */) - dir, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) - if err != nil { - log.Warningf("failed to open directory %q: %v", dirname, err) - return nil - } - dir.DecRef() - var stubSerializer fs.CollectEntriesSerializer - if err := dir.Readdir(ctx, &stubSerializer); err != nil { - log.Warningf("failed to iterate on host directory %q: %v", dirname, err) - return nil - } - delete(stubSerializer.Entries, ".") - delete(stubSerializer.Entries, "..") - return stubSerializer.Entries -} - -// newMountSource constructs a new host fs.MountSource -// relative to a root path. The root should match the mount point. -func newMountSource(ctx context.Context, root string, mounter fs.FileOwner, filesystem fs.Filesystem, flags fs.MountSourceFlags, dontTranslateOwnership bool) *fs.MountSource { - return fs.NewMountSource(ctx, &superOperations{ - root: root, - inodeMappings: make(map[uint64]string), - mounter: mounter, - dontTranslateOwnership: dontTranslateOwnership, - }, filesystem, flags) -} - -// superOperations implements fs.MountSourceOperations. -// -// +stateify savable -type superOperations struct { - fs.SimpleMountSourceOperations - - // root is the path of the mount point. All inode mappings - // are relative to this root. - root string - - // inodeMappings contains mappings of fs.Inodes associated - // with this MountSource to paths under root. - inodeMappings map[uint64]string - - // mounter is the cached EUID/EGID that mounted this file system. - mounter fs.FileOwner - - // dontTranslateOwnership indicates whether to not translate file - // ownership. - // - // By default, files/directories owned by the sandbox uses UID/GID - // of the mounter. For files/directories that are not owned by the - // sandbox, file UID/GID is translated to a UID/GID which cannot - // be mapped in the sandboxed application's user namespace. The - // UID/GID will look like the nobody UID/GID (65534) but is not - // strictly owned by the user "nobody". - // - // If whitelistfs is a lower filesystem in an overlay, set - // dont_translate_ownership=true in mount options. - dontTranslateOwnership bool -} - -var _ fs.MountSourceOperations = (*superOperations)(nil) - -// ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings. -func (m *superOperations) ResetInodeMappings() { - m.inodeMappings = make(map[uint64]string) -} - -// SaveInodeMapping implements fs.MountSourceOperations.SaveInodeMapping. -func (m *superOperations) SaveInodeMapping(inode *fs.Inode, path string) { - // This is very unintuitive. We *CANNOT* trust the inode's StableAttrs, - // because overlay copyUp may have changed them out from under us. - // So much for "immutable". - sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr - m.inodeMappings[sattr.InodeID] = path -} - -// Keep implements fs.MountSourceOperations.Keep. -// -// TODO(b/72455313,b/77596690): It is possible to change the permissions on a -// host file while it is in the dirent cache (say from RO to RW), but it is not -// possible to re-open the file with more relaxed permissions, since the host -// FD is already open and stored in the inode. -// -// Using the dirent LRU cache increases the odds that this bug is encountered. -// Since host file access is relatively fast anyways, we disable the LRU cache -// for host fs files. Once we can properly deal with permissions changes and -// re-opening host files, we should revisit whether or not to make use of the -// LRU cache. -func (*superOperations) Keep(*fs.Dirent) bool { - return false -} - -func init() { - fs.RegisterFilesystem(&Filesystem{}) -} diff --git a/pkg/sentry/fs/host/fs_test.go b/pkg/sentry/fs/host/fs_test.go deleted file mode 100644 index 3111d2df9..000000000 --- a/pkg/sentry/fs/host/fs_test.go +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package host - -import ( - "fmt" - "io/ioutil" - "os" - "path" - "reflect" - "sort" - "testing" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fs" -) - -// newTestMountNamespace creates a MountNamespace with a ramfs root. -// It returns the host folder created, which should be removed when done. -func newTestMountNamespace(t *testing.T) (*fs.MountNamespace, string, error) { - p, err := ioutil.TempDir("", "root") - if err != nil { - return nil, "", err - } - - fd, err := open(nil, p) - if err != nil { - os.RemoveAll(p) - return nil, "", err - } - ctx := contexttest.Context(t) - root, err := newInode(ctx, newMountSource(ctx, p, fs.RootOwner, &Filesystem{}, fs.MountSourceFlags{}, false), fd, false, false) - if err != nil { - os.RemoveAll(p) - return nil, "", err - } - mm, err := fs.NewMountNamespace(ctx, root) - if err != nil { - os.RemoveAll(p) - return nil, "", err - } - return mm, p, nil -} - -// createTestDirs populates the root with some test files and directories. -// /a/a1.txt -// /a/a2.txt -// /b/b1.txt -// /b/c/c1.txt -// /symlinks/normal.txt -// /symlinks/to_normal.txt -> /symlinks/normal.txt -// /symlinks/recursive -> /symlinks -func createTestDirs(ctx context.Context, t *testing.T, m *fs.MountNamespace) error { - r := m.Root() - defer r.DecRef() - - if err := r.CreateDirectory(ctx, r, "a", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - a, err := r.Walk(ctx, r, "a") - if err != nil { - return err - } - defer a.DecRef() - - a1, err := a.Create(ctx, r, "a1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - a1.DecRef() - - a2, err := a.Create(ctx, r, "a2.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - a2.DecRef() - - if err := r.CreateDirectory(ctx, r, "b", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - b, err := r.Walk(ctx, r, "b") - if err != nil { - return err - } - defer b.DecRef() - - b1, err := b.Create(ctx, r, "b1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - b1.DecRef() - - if err := b.CreateDirectory(ctx, r, "c", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - c, err := b.Walk(ctx, r, "c") - if err != nil { - return err - } - defer c.DecRef() - - c1, err := c.Create(ctx, r, "c1.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - c1.DecRef() - - if err := r.CreateDirectory(ctx, r, "symlinks", fs.FilePermsFromMode(0777)); err != nil { - return err - } - - symlinks, err := r.Walk(ctx, r, "symlinks") - if err != nil { - return err - } - defer symlinks.DecRef() - - normal, err := symlinks.Create(ctx, r, "normal.txt", fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0666)) - if err != nil { - return err - } - normal.DecRef() - - if err := symlinks.CreateLink(ctx, r, "/symlinks/normal.txt", "to_normal.txt"); err != nil { - return err - } - - return symlinks.CreateLink(ctx, r, "/symlinks", "recursive") -} - -// allPaths returns a slice of all paths of entries visible in the rootfs. -func allPaths(ctx context.Context, t *testing.T, m *fs.MountNamespace, base string) ([]string, error) { - var paths []string - root := m.Root() - defer root.DecRef() - - maxTraversals := uint(1) - d, err := m.FindLink(ctx, root, nil, base, &maxTraversals) - if err != nil { - t.Logf("FindLink failed for %q", base) - return paths, err - } - defer d.DecRef() - - if fs.IsDir(d.Inode.StableAttr) { - dir, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) - if err != nil { - return nil, fmt.Errorf("failed to open directory %q: %v", base, err) - } - iter, ok := dir.FileOperations.(fs.DirIterator) - if !ok { - return nil, fmt.Errorf("cannot directly iterate on host directory %q", base) - } - dirCtx := &fs.DirCtx{ - Serializer: noopDentrySerializer{}, - } - if _, err := fs.DirentReaddir(ctx, d, iter, root, dirCtx, 0); err != nil { - return nil, err - } - for name := range dirCtx.DentAttrs() { - if name == "." || name == ".." { - continue - } - - fullName := path.Join(base, name) - paths = append(paths, fullName) - - // Recurse. - subpaths, err := allPaths(ctx, t, m, fullName) - if err != nil { - return paths, err - } - paths = append(paths, subpaths...) - } - } - - return paths, nil -} - -type noopDentrySerializer struct{} - -func (noopDentrySerializer) CopyOut(string, fs.DentAttr) error { - return nil -} -func (noopDentrySerializer) Written() int { - return 4096 -} - -// pathsEqual returns true if the two string slices contain the same entries. -func pathsEqual(got, want []string) bool { - sort.Strings(got) - sort.Strings(want) - - if len(got) != len(want) { - return false - } - - for i := range got { - if got[i] != want[i] { - return false - } - } - - return true -} - -func TestWhitelist(t *testing.T) { - for _, test := range []struct { - // description of the test. - desc string - // paths are the paths to whitelist - paths []string - // want are all of the directory entries that should be - // visible (nothing beyond this set should be visible). - want []string - }{ - { - desc: "root", - paths: []string{"/"}, - want: []string{"/a", "/a/a1.txt", "/a/a2.txt", "/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt", "/symlinks", "/symlinks/normal.txt", "/symlinks/to_normal.txt", "/symlinks/recursive"}, - }, - { - desc: "top-level directories", - paths: []string{"/a", "/b"}, - want: []string{"/a", "/a/a1.txt", "/a/a2.txt", "/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "nested directories (1/2)", - paths: []string{"/b", "/b/c"}, - want: []string{"/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "nested directories (2/2)", - paths: []string{"/b/c", "/b"}, - want: []string{"/b", "/b/b1.txt", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "single file", - paths: []string{"/b/c/c1.txt"}, - want: []string{"/b", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "single file and directory", - paths: []string{"/a/a1.txt", "/b/c"}, - want: []string{"/a", "/a/a1.txt", "/b", "/b/c", "/b/c/c1.txt"}, - }, - { - desc: "symlink", - paths: []string{"/symlinks/to_normal.txt"}, - want: []string{"/symlinks", "/symlinks/normal.txt", "/symlinks/to_normal.txt"}, - }, - { - desc: "recursive symlink", - paths: []string{"/symlinks/recursive/normal.txt"}, - want: []string{"/symlinks", "/symlinks/normal.txt", "/symlinks/recursive"}, - }, - } { - t.Run(test.desc, func(t *testing.T) { - m, p, err := newTestMountNamespace(t) - if err != nil { - t.Errorf("Failed to create MountNamespace: %v", err) - } - defer os.RemoveAll(p) - - ctx := withRoot(contexttest.RootContext(t), m.Root()) - if err := createTestDirs(ctx, t, m); err != nil { - t.Errorf("Failed to create test dirs: %v", err) - } - - if err := installWhitelist(ctx, m, test.paths); err != nil { - t.Errorf("installWhitelist(%v) err got %v want nil", test.paths, err) - } - - got, err := allPaths(ctx, t, m, "/") - if err != nil { - t.Fatalf("Failed to lookup paths (whitelisted: %v): %v", test.paths, err) - } - - if !pathsEqual(got, test.want) { - t.Errorf("For paths %v got %v want %v", test.paths, got, test.want) - } - }) - } -} - -func TestRootPath(t *testing.T) { - // Create a temp dir, which will be the root of our mounted fs. - rootPath, err := ioutil.TempDir(os.TempDir(), "root") - if err != nil { - t.Fatalf("TempDir failed: %v", err) - } - defer os.RemoveAll(rootPath) - - // Create two files inside the new root, one which will be whitelisted - // and one not. - whitelisted, err := ioutil.TempFile(rootPath, "white") - if err != nil { - t.Fatalf("TempFile failed: %v", err) - } - if _, err := ioutil.TempFile(rootPath, "black"); err != nil { - t.Fatalf("TempFile failed: %v", err) - } - - // Create a mount with a root path and single whitelisted file. - hostFS := &Filesystem{} - ctx := contexttest.Context(t) - data := fmt.Sprintf("%s=%s,%s=%s", rootPathKey, rootPath, whitelistKey, whitelisted.Name()) - inode, err := hostFS.Mount(ctx, "", fs.MountSourceFlags{}, data, nil) - if err != nil { - t.Fatalf("Mount failed: %v", err) - } - mm, err := fs.NewMountNamespace(ctx, inode) - if err != nil { - t.Fatalf("NewMountNamespace failed: %v", err) - } - if err := hostFS.InstallWhitelist(ctx, mm); err != nil { - t.Fatalf("InstallWhitelist failed: %v", err) - } - - // Get the contents of the root directory. - rootDir := mm.Root() - rctx := withRoot(ctx, rootDir) - f, err := rootDir.Inode.GetFile(rctx, rootDir, fs.FileFlags{}) - if err != nil { - t.Fatalf("GetFile failed: %v", err) - } - c := &fs.CollectEntriesSerializer{} - if err := f.Readdir(rctx, c); err != nil { - t.Fatalf("Readdir failed: %v", err) - } - - // We should have only our whitelisted file, plus the dots. - want := []string{path.Base(whitelisted.Name()), ".", ".."} - got := c.Order - sort.Strings(want) - sort.Strings(got) - if !reflect.DeepEqual(got, want) { - t.Errorf("Readdir got %v, wanted %v", got, want) - } -} - -type rootContext struct { - context.Context - root *fs.Dirent -} - -// withRoot returns a copy of ctx with the given root. -func withRoot(ctx context.Context, root *fs.Dirent) context.Context { - return &rootContext{ - Context: ctx, - root: root, - } -} - -// Value implements Context.Value. -func (rc rootContext) Value(key interface{}) interface{} { - switch key { - case fs.CtxRoot: - rc.root.IncRef() - return rc.root - default: - return rc.Context.Value(key) - } -} diff --git a/pkg/sentry/fs/host/host.go b/pkg/sentry/fs/host/host.go new file mode 100644 index 000000000..081ba1dd8 --- /dev/null +++ b/pkg/sentry/fs/host/host.go @@ -0,0 +1,59 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package host supports file descriptors imported directly. +package host + +import ( + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/syserror" +) + +// filesystem is a host filesystem. +// +// +stateify savable +type filesystem struct{} + +func init() { + fs.RegisterFilesystem(&filesystem{}) +} + +// FilesystemName is the name under which the filesystem is registered. +const FilesystemName = "host" + +// Name is the name of the filesystem. +func (*filesystem) Name() string { + return FilesystemName +} + +// Mount returns an error. Mounting hostfs is not allowed. +func (*filesystem) Mount(ctx context.Context, device string, flags fs.MountSourceFlags, data string, dataObj interface{}) (*fs.Inode, error) { + return nil, syserror.EPERM +} + +// AllowUserMount prohibits users from using mount(2) with this file system. +func (*filesystem) AllowUserMount() bool { + return false +} + +// AllowUserList prohibits this filesystem to be listed in /proc/filesystems. +func (*filesystem) AllowUserList() bool { + return false +} + +// Flags returns that there is nothing special about this file system. +func (*filesystem) Flags() fs.FilesystemFlags { + return 0 +} diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go index 6fa39caab..1da3c0a17 100644 --- a/pkg/sentry/fs/host/inode.go +++ b/pkg/sentry/fs/host/inode.go @@ -17,12 +17,10 @@ package host import ( "syscall" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/secio" - "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/fsutil" "gvisor.dev/gvisor/pkg/sentry/memmap" @@ -69,9 +67,6 @@ type inodeOperations struct { // // +stateify savable type inodeFileState struct { - // Common file system state. - mops *superOperations `state:"wait"` - // descriptor is the backing host FD. descriptor *descriptor `state:"wait"` @@ -160,7 +155,7 @@ func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, err if err := syscall.Fstat(i.FD(), &s); err != nil { return fs.UnstableAttr{}, err } - return unstableAttr(i.mops, &s), nil + return unstableAttr(&s), nil } // Allocate implements fsutil.CachedFileObject.Allocate. @@ -172,7 +167,7 @@ func (i *inodeFileState) Allocate(_ context.Context, offset, length int64) error var _ fs.InodeOperations = (*inodeOperations)(nil) // newInode returns a new fs.Inode backed by the host FD. -func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool, donated bool) (*fs.Inode, error) { +func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool) (*fs.Inode, error) { // Retrieve metadata. var s syscall.Stat_t err := syscall.Fstat(fd, &s) @@ -181,24 +176,17 @@ func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool, } fileState := &inodeFileState{ - mops: msrc.MountSourceOperations.(*superOperations), sattr: stableAttr(&s), } // Initialize the wrapped host file descriptor. - fileState.descriptor, err = newDescriptor( - fd, - donated, - saveable, - wouldBlock(&s), - &fileState.queue, - ) + fileState.descriptor, err = newDescriptor(fd, saveable, wouldBlock(&s), &fileState.queue) if err != nil { return nil, err } // Build the fs.InodeOperations. - uattr := unstableAttr(msrc.MountSourceOperations.(*superOperations), &s) + uattr := unstableAttr(&s) iops := &inodeOperations{ fileState: fileState, cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{ @@ -232,54 +220,23 @@ func (i *inodeOperations) Release(context.Context) { // Lookup implements fs.InodeOperations.Lookup. func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { - // Get a new FD relative to i at name. - fd, err := open(i, name) - if err != nil { - if err == syserror.ENOENT { - return nil, syserror.ENOENT - } - return nil, err - } - - inode, err := newInode(ctx, dir.MountSource, fd, false /* saveable */, false /* donated */) - if err != nil { - return nil, err - } - - // Return the fs.Dirent. - return fs.NewDirent(ctx, inode, name), nil + return nil, syserror.ENOENT } // Create implements fs.InodeOperations.Create. func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) { - // Create a file relative to i at name. - // - // N.B. We always open this file O_RDWR regardless of flags because a - // future GetFile might want more access. Open allows this regardless - // of perm. - fd, err := openAt(i, name, syscall.O_RDWR|syscall.O_CREAT|syscall.O_EXCL, perm.LinuxMode()) - if err != nil { - return nil, err - } - - inode, err := newInode(ctx, dir.MountSource, fd, false /* saveable */, false /* donated */) - if err != nil { - return nil, err - } + return nil, syserror.EPERM - d := fs.NewDirent(ctx, inode, name) - defer d.DecRef() - return inode.GetFile(ctx, d, flags) } // CreateDirectory implements fs.InodeOperations.CreateDirectory. func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { - return syscall.Mkdirat(i.fileState.FD(), name, uint32(perm.LinuxMode())) + return syserror.EPERM } // CreateLink implements fs.InodeOperations.CreateLink. func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error { - return createLink(i.fileState.FD(), oldname, newname) + return syserror.EPERM } // CreateHardLink implements fs.InodeOperations.CreateHardLink. @@ -294,25 +251,17 @@ func (*inodeOperations) CreateFifo(context.Context, *fs.Inode, string, fs.FilePe // Remove implements fs.InodeOperations.Remove. func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error { - return unlinkAt(i.fileState.FD(), name, false /* dir */) + return syserror.EPERM } // RemoveDirectory implements fs.InodeOperations.RemoveDirectory. func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error { - return unlinkAt(i.fileState.FD(), name, true /* dir */) + return syserror.EPERM } // Rename implements fs.InodeOperations.Rename. func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { - op, ok := oldParent.InodeOperations.(*inodeOperations) - if !ok { - return syscall.EXDEV - } - np, ok := newParent.InodeOperations.(*inodeOperations) - if !ok { - return syscall.EXDEV - } - return syscall.Renameat(op.fileState.FD(), oldName, np.fileState.FD(), newName) + return syserror.EPERM } // Bind implements fs.InodeOperations.Bind. @@ -461,69 +410,7 @@ func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {} // readdirAll returns all of the directory entries in i. func (i *inodeOperations) readdirAll(d *dirInfo) (map[string]fs.DentAttr, error) { - i.readdirMu.Lock() - defer i.readdirMu.Unlock() - - fd := i.fileState.FD() - - // syscall.ReadDirent will use getdents, which will seek the file past - // the last directory entry. To read the directory entries a second - // time, we need to seek back to the beginning. - if _, err := syscall.Seek(fd, 0, 0); err != nil { - if err == syscall.ESPIPE { - // All directories should be seekable. If this file - // isn't seekable, it is not a directory and we should - // return that more sane error. - err = syscall.ENOTDIR - } - return nil, err - } - - names := make([]string, 0, 100) - for { - // Refill the buffer if necessary - if d.bufp >= d.nbuf { - d.bufp = 0 - // ReadDirent will just do a sys_getdents64 to the kernel. - n, err := syscall.ReadDirent(fd, d.buf) - if err != nil { - return nil, err - } - if n == 0 { - break // EOF - } - d.nbuf = n - } - - var nb int - // Parse the dirent buffer we just get and return the directory names along - // with the number of bytes consumed in the buffer. - nb, _, names = syscall.ParseDirent(d.buf[d.bufp:d.nbuf], -1, names) - d.bufp += nb - } - - entries := make(map[string]fs.DentAttr) - for _, filename := range names { - // Lookup the type and host device and inode. - stat, lerr := fstatat(fd, filename, linux.AT_SYMLINK_NOFOLLOW) - if lerr == syscall.ENOENT { - // File disappeared between readdir and lstat. - // Just treat it as if it didn't exist. - continue - } - - // There was a serious problem, we should probably report it. - if lerr != nil { - return nil, lerr - } - - entries[filename] = fs.DentAttr{ - Type: nodeType(&stat), - InodeID: hostFileDevice.Map(device.MultiDeviceKey{ - Device: stat.Dev, - Inode: stat.Ino, - }), - } - } - return entries, nil + // We only support non-directory file descriptors that have been + // imported, so just claim that this isn't a directory, even if it is. + return nil, syscall.ENOTDIR } diff --git a/pkg/sentry/fs/host/inode_state.go b/pkg/sentry/fs/host/inode_state.go index 299e0e0b0..1adbd4562 100644 --- a/pkg/sentry/fs/host/inode_state.go +++ b/pkg/sentry/fs/host/inode_state.go @@ -18,29 +18,14 @@ import ( "fmt" "syscall" - "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" ) -// beforeSave is invoked by stateify. -func (i *inodeFileState) beforeSave() { - if !i.queue.IsEmpty() { - panic("event queue must be empty") - } - if !i.descriptor.donated && i.sattr.Type == fs.RegularFile { - uattr, err := i.unstableAttr(context.Background()) - if err != nil { - panic(fs.ErrSaveRejection{fmt.Errorf("failed to get unstable atttribute of %s: %v", i.mops.inodeMappings[i.sattr.InodeID], err)}) - } - i.savedUAttr = &uattr - } -} - // afterLoad is invoked by stateify. func (i *inodeFileState) afterLoad() { // Initialize the descriptor value. - if err := i.descriptor.initAfterLoad(i.mops, i.sattr.InodeID, &i.queue); err != nil { + if err := i.descriptor.initAfterLoad(i.sattr.InodeID, &i.queue); err != nil { panic(fmt.Sprintf("failed to load value of descriptor: %v", err)) } @@ -61,19 +46,4 @@ func (i *inodeFileState) afterLoad() { // change across save and restore, error out. panic(fs.ErrCorruption{fmt.Errorf("host %s conflict in host device mappings: %s", key, hostFileDevice)}) } - - if !i.descriptor.donated && i.sattr.Type == fs.RegularFile { - env, ok := fs.CurrentRestoreEnvironment() - if !ok { - panic("missing restore environment") - } - uattr := unstableAttr(i.mops, &s) - if env.ValidateFileSize && uattr.Size != i.savedUAttr.Size { - panic(fs.ErrCorruption{fmt.Errorf("file size has changed for %s: previously %d, now %d", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.Size, uattr.Size)}) - } - if env.ValidateFileTimestamp && uattr.ModificationTime != i.savedUAttr.ModificationTime { - panic(fs.ErrCorruption{fmt.Errorf("file modification time has changed for %s: previously %v, now %v", i.mops.inodeMappings[i.sattr.InodeID], i.savedUAttr.ModificationTime, uattr.ModificationTime)}) - } - i.savedUAttr = nil - } } diff --git a/pkg/sentry/fs/host/inode_test.go b/pkg/sentry/fs/host/inode_test.go index 7221bc825..4c374681c 100644 --- a/pkg/sentry/fs/host/inode_test.go +++ b/pkg/sentry/fs/host/inode_test.go @@ -15,9 +15,6 @@ package host import ( - "io/ioutil" - "os" - "path" "syscall" "testing" @@ -25,69 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" ) -// TestMultipleReaddir verifies that multiple Readdir calls return the same -// thing if they use different dir contexts. -func TestMultipleReaddir(t *testing.T) { - p, err := ioutil.TempDir("", "readdir") - if err != nil { - t.Fatalf("Failed to create test dir: %v", err) - } - defer os.RemoveAll(p) - - f, err := os.Create(path.Join(p, "a.txt")) - if err != nil { - t.Fatalf("Failed to create a.txt: %v", err) - } - f.Close() - - f, err = os.Create(path.Join(p, "b.txt")) - if err != nil { - t.Fatalf("Failed to create b.txt: %v", err) - } - f.Close() - - fd, err := open(nil, p) - if err != nil { - t.Fatalf("Failed to open %q: %v", p, err) - } - ctx := contexttest.Context(t) - n, err := newInode(ctx, newMountSource(ctx, p, fs.RootOwner, &Filesystem{}, fs.MountSourceFlags{}, false), fd, false, false) - if err != nil { - t.Fatalf("Failed to create inode: %v", err) - } - - dirent := fs.NewDirent(ctx, n, "readdir") - openFile, err := n.GetFile(ctx, dirent, fs.FileFlags{Read: true}) - if err != nil { - t.Fatalf("Failed to get file: %v", err) - } - defer openFile.DecRef() - - c1 := &fs.DirCtx{DirCursor: new(string)} - if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c1, 0); err != nil { - t.Fatalf("First Readdir failed: %v", err) - } - - c2 := &fs.DirCtx{DirCursor: new(string)} - if _, err := openFile.FileOperations.(*fileOperations).IterateDir(ctx, dirent, c2, 0); err != nil { - t.Errorf("Second Readdir failed: %v", err) - } - - if _, ok := c1.DentAttrs()["a.txt"]; !ok { - t.Errorf("want a.txt in first Readdir, got %v", c1.DentAttrs()) - } - if _, ok := c1.DentAttrs()["b.txt"]; !ok { - t.Errorf("want b.txt in first Readdir, got %v", c1.DentAttrs()) - } - - if _, ok := c2.DentAttrs()["a.txt"]; !ok { - t.Errorf("want a.txt in second Readdir, got %v", c2.DentAttrs()) - } - if _, ok := c2.DentAttrs()["b.txt"]; !ok { - t.Errorf("want b.txt in second Readdir, got %v", c2.DentAttrs()) - } -} - // TestCloseFD verifies fds will be closed. func TestCloseFD(t *testing.T) { var p [2]int diff --git a/pkg/sentry/fs/host/ioctl_unsafe.go b/pkg/sentry/fs/host/ioctl_unsafe.go index 271582e54..150ac8e19 100644 --- a/pkg/sentry/fs/host/ioctl_unsafe.go +++ b/pkg/sentry/fs/host/ioctl_unsafe.go @@ -21,6 +21,8 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" ) +// LINT.IfChange + func ioctlGetTermios(fd int) (*linux.Termios, error) { var t linux.Termios _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), linux.TCGETS, uintptr(unsafe.Pointer(&t))) @@ -54,3 +56,5 @@ func ioctlSetWinsize(fd int, w *linux.Winsize) error { } return nil } + +// LINT.ThenChange(../../fsimpl/host/ioctl_unsafe.go) diff --git a/pkg/sentry/fs/host/tty.go b/pkg/sentry/fs/host/tty.go index 3f218b4a7..cb91355ab 100644 --- a/pkg/sentry/fs/host/tty.go +++ b/pkg/sentry/fs/host/tty.go @@ -26,6 +26,8 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) +// LINT.IfChange + // TTYFileOperations implements fs.FileOperations for a host file descriptor // that wraps a TTY FD. // @@ -43,6 +45,7 @@ type TTYFileOperations struct { // connected to this TTY. fgProcessGroup *kernel.ProcessGroup + // termios contains the terminal attributes for this TTY. termios linux.KernelTermios } @@ -357,3 +360,5 @@ func (t *TTYFileOperations) checkChange(ctx context.Context, sig linux.Signal) e _ = pg.SendSignal(kernel.SignalInfoPriv(sig)) return kernel.ERESTARTSYS } + +// LINT.ThenChange(../../fsimpl/host/tty.go) diff --git a/pkg/sentry/fs/host/util.go b/pkg/sentry/fs/host/util.go index 7c60dc1db..1b0356930 100644 --- a/pkg/sentry/fs/host/util.go +++ b/pkg/sentry/fs/host/util.go @@ -16,7 +16,6 @@ package host import ( "os" - "path" "syscall" "gvisor.dev/gvisor/pkg/abi/linux" @@ -24,49 +23,10 @@ import ( "gvisor.dev/gvisor/pkg/sentry/device" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/time" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/syserror" ) -func open(parent *inodeOperations, name string) (int, error) { - if parent == nil && !path.IsAbs(name) { - return -1, syserror.EINVAL - } - name = path.Clean(name) - - // Don't follow through symlinks. - flags := syscall.O_NOFOLLOW - - if fd, err := openAt(parent, name, flags|syscall.O_RDWR, 0); err == nil { - return fd, nil - } - // Retry as read-only. - if fd, err := openAt(parent, name, flags|syscall.O_RDONLY, 0); err == nil { - return fd, nil - } - - // Retry as write-only. - if fd, err := openAt(parent, name, flags|syscall.O_WRONLY, 0); err == nil { - return fd, nil - } - - // Retry as a symlink, by including O_PATH as an option. - fd, err := openAt(parent, name, linux.O_PATH|flags, 0) - if err == nil { - return fd, nil - } - - // Everything failed. - return -1, err -} - -func openAt(parent *inodeOperations, name string, flags int, perm linux.FileMode) (int, error) { - if parent == nil { - return syscall.Open(name, flags, uint32(perm)) - } - return syscall.Openat(parent.fileState.FD(), name, flags, uint32(perm)) -} - func nodeType(s *syscall.Stat_t) fs.InodeType { switch x := (s.Mode & syscall.S_IFMT); x { case syscall.S_IFLNK: @@ -107,54 +67,22 @@ func stableAttr(s *syscall.Stat_t) fs.StableAttr { } } -func owner(mo *superOperations, s *syscall.Stat_t) fs.FileOwner { - // User requested no translation, just return actual owner. - if mo.dontTranslateOwnership { - return fs.FileOwner{auth.KUID(s.Uid), auth.KGID(s.Gid)} - } - - // Show only IDs relevant to the sandboxed task. I.e. if we not own the - // file, no sandboxed task can own the file. In that case, we - // use OverflowID for UID, implying that the IDs are not mapped in the - // "root" user namespace. - // - // E.g. - // sandbox's host EUID/EGID is 1/1. - // some_dir's host UID/GID is 2/1. - // Task that mounted this fs has virtualized EUID/EGID 5/5. - // - // If you executed `ls -n` in the sandboxed task, it would show: - // drwxwrxwrx [...] 65534 5 [...] some_dir - - // Files are owned by OverflowID by default. - owner := fs.FileOwner{auth.KUID(auth.OverflowUID), auth.KGID(auth.OverflowGID)} - - // If we own file on host, let mounting task's initial EUID own - // the file. - if s.Uid == hostUID { - owner.UID = mo.mounter.UID - } - - // If our group matches file's group, make file's group match - // the mounting task's initial EGID. - for _, gid := range hostGIDs { - if s.Gid == gid { - owner.GID = mo.mounter.GID - break - } +func owner(s *syscall.Stat_t) fs.FileOwner { + return fs.FileOwner{ + UID: auth.KUID(s.Uid), + GID: auth.KGID(s.Gid), } - return owner } -func unstableAttr(mo *superOperations, s *syscall.Stat_t) fs.UnstableAttr { +func unstableAttr(s *syscall.Stat_t) fs.UnstableAttr { return fs.UnstableAttr{ Size: s.Size, Usage: s.Blocks * 512, Perms: fs.FilePermsFromMode(linux.FileMode(s.Mode)), - Owner: owner(mo, s), - AccessTime: time.FromUnix(s.Atim.Sec, s.Atim.Nsec), - ModificationTime: time.FromUnix(s.Mtim.Sec, s.Mtim.Nsec), - StatusChangeTime: time.FromUnix(s.Ctim.Sec, s.Ctim.Nsec), + Owner: owner(s), + AccessTime: ktime.FromUnix(s.Atim.Sec, s.Atim.Nsec), + ModificationTime: ktime.FromUnix(s.Mtim.Sec, s.Mtim.Nsec), + StatusChangeTime: ktime.FromUnix(s.Ctim.Sec, s.Ctim.Nsec), Links: uint64(s.Nlink), } } diff --git a/pkg/sentry/fs/host/util_unsafe.go b/pkg/sentry/fs/host/util_unsafe.go index 3ab36b088..23bd35d64 100644 --- a/pkg/sentry/fs/host/util_unsafe.go +++ b/pkg/sentry/fs/host/util_unsafe.go @@ -26,26 +26,6 @@ import ( // NulByte is a single NUL byte. It is passed to readlinkat as an empty string. var NulByte byte = '\x00' -func createLink(fd int, name string, linkName string) error { - namePtr, err := syscall.BytePtrFromString(name) - if err != nil { - return err - } - linkNamePtr, err := syscall.BytePtrFromString(linkName) - if err != nil { - return err - } - _, _, errno := syscall.Syscall( - syscall.SYS_SYMLINKAT, - uintptr(unsafe.Pointer(namePtr)), - uintptr(fd), - uintptr(unsafe.Pointer(linkNamePtr))) - if errno != 0 { - return errno - } - return nil -} - func readLink(fd int) (string, error) { // Buffer sizing copied from os.Readlink. for l := 128; ; l *= 2 { @@ -66,27 +46,6 @@ func readLink(fd int) (string, error) { } } -func unlinkAt(fd int, name string, dir bool) error { - namePtr, err := syscall.BytePtrFromString(name) - if err != nil { - return err - } - var flags uintptr - if dir { - flags = linux.AT_REMOVEDIR - } - _, _, errno := syscall.Syscall( - syscall.SYS_UNLINKAT, - uintptr(fd), - uintptr(unsafe.Pointer(namePtr)), - flags, - ) - if errno != 0 { - return errno - } - return nil -} - func timespecFromTimestamp(t ktime.Time, omit, setSysTime bool) syscall.Timespec { if omit { return syscall.Timespec{0, linux.UTIME_OMIT} diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index c7981f66e..b414ddaee 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -273,19 +273,6 @@ func (mns *MountNamespace) DecRef() { mns.DecRefWithDestructor(mns.destroy) } -// Freeze freezes the entire mount tree. -func (mns *MountNamespace) Freeze() { - mns.mu.Lock() - defer mns.mu.Unlock() - - // We only want to freeze Dirents with active references, not Dirents referenced - // by a mount's MountSource. - mns.flushMountSourceRefsLocked() - - // Freeze the entire shebang. - mns.root.Freeze() -} - // withMountLocked prevents further walks to `node`, because `node` is about to // be a mount point. func (mns *MountNamespace) withMountLocked(node *Dirent, fn func() error) error { diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go index 03cc788c8..d6c5dd2c1 100644 --- a/pkg/sentry/fs/proc/task.go +++ b/pkg/sentry/fs/proc/task.go @@ -853,15 +853,15 @@ func (o *oomScoreAdj) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.F // Read implements fs.FileOperations.Read. func (f *oomScoreAdjFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { - if offset != 0 { - return 0, io.EOF + if f.t.ExitState() == kernel.TaskExitDead { + return 0, syserror.ESRCH } - adj, err := f.t.OOMScoreAdj() - if err != nil { - return 0, err + var buf bytes.Buffer + fmt.Fprintf(&buf, "%d\n", f.t.OOMScoreAdj()) + if offset >= int64(buf.Len()) { + return 0, io.EOF } - adjBytes := []byte(strconv.FormatInt(int64(adj), 10) + "\n") - n, err := dst.CopyOut(ctx, adjBytes) + n, err := dst.CopyOut(ctx, buf.Bytes()[offset:]) return int64(n), err } @@ -880,6 +880,9 @@ func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOS return 0, err } + if f.t.ExitState() == kernel.TaskExitDead { + return 0, syserror.ESRCH + } if err := f.t.SetOOMScoreAdj(v); err != nil { return 0, err } |