// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package host implements an fs.Filesystem for files backed by host // file descriptors. package host import ( "fmt" "path" "path/filepath" "strconv" "strings" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" ) // FilesystemName is the name under which Filesystem is registered. const FilesystemName = "whitelistfs" const ( // whitelistKey is the mount option containing a comma-separated list // of host paths to whitelist. whitelistKey = "whitelist" // rootPathKey is the mount option containing the root path of the // mount. rootPathKey = "root" // dontTranslateOwnershipKey is the key to superOperations.dontTranslateOwnership. dontTranslateOwnershipKey = "dont_translate_ownership" ) // maxTraversals determines link traversals in building the whitelist. const maxTraversals = 10 // Filesystem is a pseudo file system that is only available during the setup // to lock down the configurations. This filesystem should only be mounted at root. // // Think twice before exposing this to applications. // // +stateify savable type Filesystem struct { // whitelist is a set of host paths to whitelist. paths []string } var _ fs.Filesystem = (*Filesystem)(nil) // Name is the identifier of this file system. func (*Filesystem) Name() string { return FilesystemName } // AllowUserMount prohibits users from using mount(2) with this file system. func (*Filesystem) AllowUserMount() bool { return false } // AllowUserList allows this filesystem to be listed in /proc/filesystems. func (*Filesystem) AllowUserList() bool { return true } // Flags returns that there is nothing special about this file system. func (*Filesystem) Flags() fs.FilesystemFlags { return 0 } // Mount returns an fs.Inode exposing the host file system. It is intended to be locked // down in PreExec below. func (f *Filesystem) Mount(ctx context.Context, _ string, flags fs.MountSourceFlags, data string, _ interface{}) (*fs.Inode, error) { // Parse generic comma-separated key=value options. options := fs.GenericMountSourceOptions(data) // Grab the whitelist if one was specified. // TODO(edahlgren/mpratt/hzy): require another option "testonly" in order to allow // no whitelist. if wl, ok := options[whitelistKey]; ok { f.paths = strings.Split(wl, "|") delete(options, whitelistKey) } // If the rootPath was set, use it. Othewise default to the root of the // host fs. rootPath := "/" if rp, ok := options[rootPathKey]; ok { rootPath = rp delete(options, rootPathKey) // We must relativize the whitelisted paths to the new root. for i, p := range f.paths { rel, err := filepath.Rel(rootPath, p) if err != nil { return nil, fmt.Errorf("whitelist path %q must be a child of root path %q", p, rootPath) } f.paths[i] = path.Join("/", rel) } } fd, err := open(nil, rootPath) if err != nil { return nil, fmt.Errorf("failed to find root: %v", err) } var dontTranslateOwnership bool if v, ok := options[dontTranslateOwnershipKey]; ok { b, err := strconv.ParseBool(v) if err != nil { return nil, fmt.Errorf("invalid value for %q: %v", dontTranslateOwnershipKey, err) } dontTranslateOwnership = b delete(options, dontTranslateOwnershipKey) } // Fail if the caller passed us more options than we know about. if len(options) > 0 { return nil, fmt.Errorf("unsupported mount options: %v", options) } // The mounting EUID/EGID will be cached by this file system. This will // be used to assign ownership to files that we own. owner := fs.FileOwnerFromContext(ctx) // Construct the host file system mount and inode. msrc := newMountSource(ctx, rootPath, owner, f, flags, dontTranslateOwnership) return newInode(ctx, msrc, fd, false /* saveable */, false /* donated */) } // InstallWhitelist locks down the MountNamespace to only the currently installed // Dirents and the given paths. func (f *Filesystem) InstallWhitelist(ctx context.Context, m *fs.MountNamespace) error { return installWhitelist(ctx, m, f.paths) } func installWhitelist(ctx context.Context, m *fs.MountNamespace, paths []string) error { if len(paths) == 0 || (len(paths) == 1 && paths[0] == "") { // Warning will be logged during filter installation if the empty // whitelist matters (allows for host file access). return nil } // Done tracks entries already added. done := make(map[string]bool) root := m.Root() defer root.DecRef() for i := 0; i < len(paths); i++ { // Make sure the path is absolute. This is a sanity check. if !path.IsAbs(paths[i]) { return fmt.Errorf("path %q is not absolute", paths[i]) } // We need to add all the intermediate paths, in case one of // them is a symlink that needs to be resolved. for j := 1; j <= len(paths[i]); j++ { if j < len(paths[i]) && paths[i][j] != '/' { continue } current := paths[i][:j] // Lookup the given component in the tree. remainingTraversals := uint(maxTraversals) d, err := m.FindLink(ctx, root, nil, current, &remainingTraversals) if err != nil { log.Warningf("populate failed for %q: %v", current, err) continue } // It's critical that this DecRef happens after the // freeze below. This ensures that the dentry is in // place to be frozen. Otherwise, we freeze without // these entries. defer d.DecRef() // Expand the last component if necessary. if current == paths[i] { // Is it a directory or symlink? sattr := d.Inode.StableAttr if fs.IsDir(sattr) { for name := range childDentAttrs(ctx, d) { paths = append(paths, path.Join(current, name)) } } if fs.IsSymlink(sattr) { // Only expand symlinks once. The // folder structure may contain // recursive symlinks and we don't want // to end up infinitely expanding this // symlink. This is safe because this // is the last component. If a later // path wants to symlink something // beneath this symlink that will still // be handled by the FindLink above. if done[current] { continue } s, err := d.Inode.Readlink(ctx) if err != nil { log.Warningf("readlink failed for %q: %v", current, err) continue } if path.IsAbs(s) { paths = append(paths, s) } else { target := path.Join(path.Dir(current), s) paths = append(paths, target) } } } // Only report this one once even though we may look // it up more than once. If we whitelist /a/b,/a then // /a will be "done" when it is looked up for /a/b, // however we still need to expand all of its contents // when whitelisting /a. if !done[current] { log.Debugf("whitelisted: %s", current) } done[current] = true } } // Freeze the mount tree in place. This prevents any new paths from // being opened and any old ones from being removed. If we do provide // tmpfs mounts, we'll want to freeze/thaw those separately. m.Freeze() return nil } func childDentAttrs(ctx context.Context, d *fs.Dirent) map[string]fs.DentAttr { dirname, _ := d.FullName(nil /* root */) dir, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) if err != nil { log.Warningf("failed to open directory %q: %v", dirname, err) return nil } dir.DecRef() var stubSerializer fs.CollectEntriesSerializer if err := dir.Readdir(ctx, &stubSerializer); err != nil { log.Warningf("failed to iterate on host directory %q: %v", dirname, err) return nil } delete(stubSerializer.Entries, ".") delete(stubSerializer.Entries, "..") return stubSerializer.Entries } // newMountSource constructs a new host fs.MountSource // relative to a root path. The root should match the mount point. func newMountSource(ctx context.Context, root string, mounter fs.FileOwner, filesystem fs.Filesystem, flags fs.MountSourceFlags, dontTranslateOwnership bool) *fs.MountSource { return fs.NewMountSource(ctx, &superOperations{ root: root, inodeMappings: make(map[uint64]string), mounter: mounter, dontTranslateOwnership: dontTranslateOwnership, }, filesystem, flags) } // superOperations implements fs.MountSourceOperations. // // +stateify savable type superOperations struct { fs.SimpleMountSourceOperations // root is the path of the mount point. All inode mappings // are relative to this root. root string // inodeMappings contains mappings of fs.Inodes associated // with this MountSource to paths under root. inodeMappings map[uint64]string // mounter is the cached EUID/EGID that mounted this file system. mounter fs.FileOwner // dontTranslateOwnership indicates whether to not translate file // ownership. // // By default, files/directories owned by the sandbox uses UID/GID // of the mounter. For files/directories that are not owned by the // sandbox, file UID/GID is translated to a UID/GID which cannot // be mapped in the sandboxed application's user namespace. The // UID/GID will look like the nobody UID/GID (65534) but is not // strictly owned by the user "nobody". // // If whitelistfs is a lower filesystem in an overlay, set // dont_translate_ownership=true in mount options. dontTranslateOwnership bool } var _ fs.MountSourceOperations = (*superOperations)(nil) // ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings. func (m *superOperations) ResetInodeMappings() { m.inodeMappings = make(map[uint64]string) } // SaveInodeMapping implements fs.MountSourceOperations.SaveInodeMapping. func (m *superOperations) SaveInodeMapping(inode *fs.Inode, path string) { // This is very unintuitive. We *CANNOT* trust the inode's StableAttrs, // because overlay copyUp may have changed them out from under us. // So much for "immutable". sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr m.inodeMappings[sattr.InodeID] = path } // Keep implements fs.MountSourceOperations.Keep. // // TODO(b/72455313,b/77596690): It is possible to change the permissions on a // host file while it is in the dirent cache (say from RO to RW), but it is not // possible to re-open the file with more relaxed permissions, since the host // FD is already open and stored in the inode. // // Using the dirent LRU cache increases the odds that this bug is encountered. // Since host file access is relatively fast anyways, we disable the LRU cache // for host fs files. Once we can properly deal with permissions changes and // re-opening host files, we should revisit whether or not to make use of the // LRU cache. func (*superOperations) Keep(*fs.Dirent) bool { return false } func init() { fs.RegisterFilesystem(&Filesystem{}) }