summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs/host/fs.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fs/host/fs.go')
-rw-r--r--pkg/sentry/fs/host/fs.go327
1 files changed, 327 insertions, 0 deletions
diff --git a/pkg/sentry/fs/host/fs.go b/pkg/sentry/fs/host/fs.go
new file mode 100644
index 000000000..ffd55a5ab
--- /dev/null
+++ b/pkg/sentry/fs/host/fs.go
@@ -0,0 +1,327 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package host implements an fs.Filesystem for files backed by host
+// file descriptors.
+package host
+
+import (
+ "fmt"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+)
+
+// FilesystemName is the name under which Filesystem is registered.
+const FilesystemName = "whitelistfs"
+
+const (
+ // whitelistKey is the mount option containing a comma-separated list
+ // of host paths to whitelist.
+ whitelistKey = "whitelist"
+
+ // rootPathKey is the mount option containing the root path of the
+ // mount.
+ rootPathKey = "root"
+
+ // dontTranslateOwnershipKey is the key to superOperations.dontTranslateOwnership.
+ dontTranslateOwnershipKey = "dont_translate_ownership"
+)
+
+// maxTraversals determines link traversals in building the whitelist.
+const maxTraversals = 10
+
+// Filesystem is a pseudo file system that is only available during the setup
+// to lock down the configurations. This filesystem should only be mounted at root.
+//
+// Think twice before exposing this to applications.
+type Filesystem struct {
+ // whitelist is a set of host paths to whitelist.
+ paths []string
+}
+
+// Name is the identifier of this file system.
+func (*Filesystem) Name() string {
+ return FilesystemName
+}
+
+// AllowUserMount prohibits users from using mount(2) with this file system.
+func (*Filesystem) AllowUserMount() bool {
+ return false
+}
+
+// Flags returns that there is nothing special about this file system.
+func (*Filesystem) Flags() fs.FilesystemFlags {
+ return 0
+}
+
+// Mount returns an fs.Inode exposing the host file system. It is intended to be locked
+// down in PreExec below.
+func (f *Filesystem) Mount(ctx context.Context, _ string, flags fs.MountSourceFlags, data string) (*fs.Inode, error) {
+ // Parse generic comma-separated key=value options.
+ options := fs.GenericMountSourceOptions(data)
+
+ // Grab the whitelist if one was specified.
+ // TODO: require another option "testonly" in order to allow
+ // no whitelist.
+ if wl, ok := options[whitelistKey]; ok {
+ f.paths = strings.Split(wl, "|")
+ delete(options, whitelistKey)
+ }
+
+ // If the rootPath was set, use it. Othewise default to the root of the
+ // host fs.
+ rootPath := "/"
+ if rp, ok := options[rootPathKey]; ok {
+ rootPath = rp
+ delete(options, rootPathKey)
+
+ // We must relativize the whitelisted paths to the new root.
+ for i, p := range f.paths {
+ rel, err := filepath.Rel(rootPath, p)
+ if err != nil {
+ return nil, fmt.Errorf("whitelist path %q must be a child of root path %q", p, rootPath)
+ }
+ f.paths[i] = path.Join("/", rel)
+ }
+ }
+ fd, err := open(nil, rootPath)
+ if err != nil {
+ return nil, fmt.Errorf("failed to find root: %v", err)
+ }
+
+ var dontTranslateOwnership bool
+ if v, ok := options[dontTranslateOwnershipKey]; ok {
+ b, err := strconv.ParseBool(v)
+ if err != nil {
+ return nil, fmt.Errorf("invalid value for %q: %v", dontTranslateOwnershipKey, err)
+ }
+ dontTranslateOwnership = b
+ delete(options, dontTranslateOwnershipKey)
+ }
+
+ // Fail if the caller passed us more options than we know about.
+ if len(options) > 0 {
+ return nil, fmt.Errorf("unsupported mount options: %v", options)
+ }
+
+ // The mounting EUID/EGID will be cached by this file system. This will
+ // be used to assign ownership to files that we own.
+ owner := fs.FileOwnerFromContext(ctx)
+
+ // Construct the host file system mount and inode.
+ msrc := newMountSource(ctx, rootPath, owner, f, flags, dontTranslateOwnership)
+ return newInode(ctx, msrc, fd, false /* saveable */, false /* donated */)
+}
+
+// InstallWhitelist locks down the MountNamespace to only the currently installed
+// Dirents and the given paths.
+func (f *Filesystem) InstallWhitelist(ctx context.Context, m *fs.MountNamespace) error {
+ return installWhitelist(ctx, m, f.paths)
+}
+
+func installWhitelist(ctx context.Context, m *fs.MountNamespace, paths []string) error {
+ if len(paths) == 0 || (len(paths) == 1 && paths[0] == "") {
+ // Warning will be logged during filter installation if the empty
+ // whitelist matters (allows for host file access).
+ return nil
+ }
+
+ // Done tracks entries already added.
+ done := make(map[string]bool)
+ root := m.Root()
+ defer root.DecRef()
+
+ for i := 0; i < len(paths); i++ {
+ // Make sure the path is absolute. This is a sanity check.
+ if !path.IsAbs(paths[i]) {
+ return fmt.Errorf("path %q is not absolute", paths[i])
+ }
+
+ // We need to add all the intermediate paths, in case one of
+ // them is a symlink that needs to be resolved.
+ for j := 1; j <= len(paths[i]); j++ {
+ if j < len(paths[i]) && paths[i][j] != '/' {
+ continue
+ }
+ current := paths[i][:j]
+
+ // Lookup the given component in the tree.
+ d, err := m.FindLink(ctx, root, nil, current, maxTraversals)
+ if err != nil {
+ log.Warningf("populate failed for %q: %v", current, err)
+ continue
+ }
+
+ // It's critical that this DecRef happens after the
+ // freeze below. This ensures that the dentry is in
+ // place to be frozen. Otherwise, we freeze without
+ // these entries.
+ defer d.DecRef()
+
+ // Expand the last component if necessary.
+ if current == paths[i] {
+ // Is it a directory or symlink?
+ sattr := d.Inode.StableAttr
+ if fs.IsDir(sattr) {
+ for name := range childDentAttrs(ctx, d) {
+ paths = append(paths, path.Join(current, name))
+ }
+ }
+ if fs.IsSymlink(sattr) {
+ // Only expand symlinks once. The
+ // folder structure may contain
+ // recursive symlinks and we don't want
+ // to end up infinitely expanding this
+ // symlink. This is safe because this
+ // is the last component. If a later
+ // path wants to symlink something
+ // beneath this symlink that will still
+ // be handled by the FindLink above.
+ if done[current] {
+ continue
+ }
+
+ s, err := d.Inode.Readlink(ctx)
+ if err != nil {
+ log.Warningf("readlink failed for %q: %v", current, err)
+ continue
+ }
+ if path.IsAbs(s) {
+ paths = append(paths, s)
+ } else {
+ target := path.Join(path.Dir(current), s)
+ paths = append(paths, target)
+ }
+ }
+ }
+
+ // Only report this one once even though we may look
+ // it up more than once. If we whitelist /a/b,/a then
+ // /a will be "done" when it is looked up for /a/b,
+ // however we still need to expand all of its contents
+ // when whitelisting /a.
+ if !done[current] {
+ log.Debugf("whitelisted: %s", current)
+ }
+ done[current] = true
+ }
+ }
+
+ // Freeze the mount tree in place. This prevents any new paths from
+ // being opened and any old ones from being removed. If we do provide
+ // tmpfs mounts, we'll want to freeze/thaw those separately.
+ m.Freeze()
+ return nil
+}
+
+func childDentAttrs(ctx context.Context, d *fs.Dirent) map[string]fs.DentAttr {
+ dirname, _ := d.FullName(nil /* root */)
+ dir, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true})
+ if err != nil {
+ log.Warningf("failed to open directory %q: %v", dirname, err)
+ return nil
+ }
+ dir.DecRef()
+ var stubSerializer fs.CollectEntriesSerializer
+ if err := dir.Readdir(ctx, &stubSerializer); err != nil {
+ log.Warningf("failed to iterate on host directory %q: %v", dirname, err)
+ return nil
+ }
+ delete(stubSerializer.Entries, ".")
+ delete(stubSerializer.Entries, "..")
+ return stubSerializer.Entries
+}
+
+// newMountSource constructs a new host fs.MountSource
+// relative to a root path. The root should match the mount point.
+func newMountSource(ctx context.Context, root string, mounter fs.FileOwner, filesystem fs.Filesystem, flags fs.MountSourceFlags, dontTranslateOwnership bool) *fs.MountSource {
+ return fs.NewMountSource(&superOperations{
+ root: root,
+ inodeMappings: make(map[uint64]string),
+ mounter: mounter,
+ dontTranslateOwnership: dontTranslateOwnership,
+ }, filesystem, flags)
+}
+
+// superOperations implements fs.MountSourceOperations.
+type superOperations struct {
+ fs.SimpleMountSourceOperations `state:"nosave"`
+
+ // root is the path of the mount point. All inode mappings
+ // are relative to this root.
+ root string
+
+ // inodeMappings contains mappings of fs.Inodes associated
+ // with this MountSource to paths under root.
+ inodeMappings map[uint64]string
+
+ // mounter is the cached EUID/EGID that mounted this file system.
+ mounter fs.FileOwner
+
+ // dontTranslateOwnership indicates whether to not translate file
+ // ownership.
+ //
+ // By default, files/directories owned by the sandbox uses UID/GID
+ // of the mounter. For files/directories that are not owned by the
+ // sandbox, file UID/GID is translated to a UID/GID which cannot
+ // be mapped in the sandboxed application's user namespace. The
+ // UID/GID will look like the nobody UID/GID (65534) but is not
+ // strictly owned by the user "nobody".
+ //
+ // If whitelistfs is a lower filesystem in an overlay, set
+ // dont_translate_ownership=true in mount options.
+ dontTranslateOwnership bool
+}
+
+var _ fs.MountSourceOperations = (*superOperations)(nil)
+
+// ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings.
+func (m *superOperations) ResetInodeMappings() {
+ m.inodeMappings = make(map[uint64]string)
+}
+
+// SaveInodeMapping implements fs.MountSourceOperations.SaveInodeMapping.
+func (m *superOperations) SaveInodeMapping(inode *fs.Inode, path string) {
+ // This is very unintuitive. We *CANNOT* trust the inode's StableAttrs,
+ // because overlay copyUp may have changed them out from under us.
+ // So much for "immutable".
+ sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr
+ m.inodeMappings[sattr.InodeID] = path
+}
+
+// Keep implements fs.MountSourceOperations.Keep.
+//
+// TODO: It is possible to change the permissions on a
+// host file while it is in the dirent cache (say from RO to RW), but it is not
+// possible to re-open the file with more relaxed permissions, since the host
+// FD is already open and stored in the inode.
+//
+// Using the dirent LRU cache increases the odds that this bug is encountered.
+// Since host file access is relatively fast anyways, we disable the LRU cache
+// for host fs files. Once we can properly deal with permissions changes and
+// re-opening host files, we should revisit whether or not to make use of the
+// LRU cache.
+func (*superOperations) Keep(*fs.Dirent) bool {
+ return false
+}
+
+func init() {
+ fs.RegisterFilesystem(&Filesystem{})
+}