summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/verity/verity.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/verity/verity.go')
-rw-r--r--pkg/sentry/fsimpl/verity/verity.go1267
1 files changed, 0 insertions, 1267 deletions
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
deleted file mode 100644
index 0d9b0ee2c..000000000
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ /dev/null
@@ -1,1267 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package verity provides a filesystem implementation that is a wrapper of
-// another file system.
-// The verity file system provides integrity check for the underlying file
-// system by providing verification for path traversals and each read.
-// The verity file system is read-only, except for one case: when
-// allowRuntimeEnable is true, additional Merkle files can be generated using
-// the FS_IOC_ENABLE_VERITY ioctl.
-//
-// Lock order:
-//
-// filesystem.renameMu
-// dentry.dirMu
-// fileDescription.mu
-// filesystem.verityMu
-// dentry.hashMu
-//
-// Locking dentry.dirMu in multiple dentries requires that parent dentries are
-// locked before child dentries, and that filesystem.renameMu is locked to
-// stabilize this relationship.
-package verity
-
-import (
- "encoding/json"
- "fmt"
- "math"
- "strconv"
- "strings"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/marshal/primitive"
- "gvisor.dev/gvisor/pkg/merkletree"
- "gvisor.dev/gvisor/pkg/refsvfs2"
- "gvisor.dev/gvisor/pkg/sentry/arch"
- fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-const (
- // Name is the default filesystem name.
- Name = "verity"
-
- // merklePrefix is the prefix of the Merkle tree files. For example, the Merkle
- // tree file for "/foo" is "/.merkle.verity.foo".
- merklePrefix = ".merkle.verity."
-
- // merkleRootPrefix is the prefix of the Merkle tree root file. This
- // needs to be different from merklePrefix to avoid name collision.
- merkleRootPrefix = ".merkleroot.verity."
-
- // merkleOffsetInParentXattr is the extended attribute name specifying the
- // offset of the child hash in its parent's Merkle tree.
- merkleOffsetInParentXattr = "user.merkle.offset"
-
- // merkleSizeXattr is the extended attribute name specifying the size of data
- // hashed by the corresponding Merkle tree. For a regular file, this is the
- // file size. For a directory, this is the size of all its children's hashes.
- merkleSizeXattr = "user.merkle.size"
-
- // childrenOffsetXattr is the extended attribute name specifying the
- // names of the offset of the serialized children names in the Merkle
- // tree file.
- childrenOffsetXattr = "user.merkle.childrenOffset"
-
- // childrenSizeXattr is the extended attribute name specifying the size
- // of the serialized children names.
- childrenSizeXattr = "user.merkle.childrenSize"
-
- // sizeOfStringInt32 is the size for a 32 bit integer stored as string in
- // extended attributes. The maximum value of a 32 bit integer has 10 digits.
- sizeOfStringInt32 = 10
-)
-
-var (
- // action specifies the action towards detected violation.
- action ViolationAction
-
- // verityMu synchronizes concurrent operations that enable verity and perform
- // verification checks.
- verityMu sync.RWMutex
-)
-
-// HashAlgorithm is a type specifying the algorithm used to hash the file
-// content.
-type HashAlgorithm int
-
-// ViolationAction is a type specifying the action when an integrity violation
-// is detected.
-type ViolationAction int
-
-const (
- // PanicOnViolation terminates the sentry on detected violation.
- PanicOnViolation ViolationAction = 0
- // ErrorOnViolation returns an error from the violating system call on
- // detected violation.
- ErrorOnViolation = 1
-)
-
-// Currently supported hashing algorithms include SHA256 and SHA512.
-const (
- SHA256 HashAlgorithm = iota
- SHA512
-)
-
-func (alg HashAlgorithm) toLinuxHashAlg() int {
- switch alg {
- case SHA256:
- return linux.FS_VERITY_HASH_ALG_SHA256
- case SHA512:
- return linux.FS_VERITY_HASH_ALG_SHA512
- default:
- return 0
- }
-}
-
-// FilesystemType implements vfs.FilesystemType.
-//
-// +stateify savable
-type FilesystemType struct{}
-
-// filesystem implements vfs.FilesystemImpl.
-//
-// +stateify savable
-type filesystem struct {
- vfsfs vfs.Filesystem
-
- // creds is a copy of the filesystem's creator's credentials, which are
- // used for accesses to the underlying file system. creds is immutable.
- creds *auth.Credentials
-
- // allowRuntimeEnable is true if using ioctl with FS_IOC_ENABLE_VERITY
- // to build Merkle trees in the verity file system is allowed. If this
- // is false, no new Merkle trees can be built, and only the files that
- // had Merkle trees before startup (e.g. from a host filesystem mounted
- // with gofer fs) can be verified.
- allowRuntimeEnable bool
-
- // lowerMount is the underlying file system mount.
- lowerMount *vfs.Mount
-
- // rootDentry is the mount root Dentry for this file system, which
- // stores the root hash of the whole file system in bytes.
- rootDentry *dentry
-
- // alg is the algorithms used to hash the files in the verity file
- // system.
- alg HashAlgorithm
-
- // renameMu synchronizes renaming with non-renaming operations in order
- // to ensure consistent lock ordering between dentry.dirMu in different
- // dentries.
- renameMu sync.RWMutex `state:"nosave"`
-
- // verityMu synchronizes enabling verity files, protects files or
- // directories from being enabled by different threads simultaneously.
- // It also ensures that verity does not access files that are being
- // enabled.
- //
- // Also, the directory Merkle trees depends on the generated trees of
- // its children. So they shouldn't be enabled the same time. This lock
- // is for the whole file system to ensure that no more than one file is
- // enabled the same time.
- verityMu sync.RWMutex `state:"nosave"`
-}
-
-// InternalFilesystemOptions may be passed as
-// vfs.GetFilesystemOptions.InternalData to FilesystemType.GetFilesystem.
-//
-// +stateify savable
-type InternalFilesystemOptions struct {
- // RootMerkleFileName is the name of the verity root Merkle tree file.
- RootMerkleFileName string
-
- // LowerName is the name of the filesystem wrapped by verity fs.
- LowerName string
-
- // Alg is the algorithms used to hash the files in the verity file
- // system.
- Alg HashAlgorithm
-
- // RootHash is the root hash of the overall verity file system.
- RootHash []byte
-
- // AllowRuntimeEnable specifies whether the verity file system allows
- // enabling verification for files (i.e. building Merkle trees) during
- // runtime.
- AllowRuntimeEnable bool
-
- // LowerGetFSOptions is the file system option for the lower layer file
- // system wrapped by verity file system.
- LowerGetFSOptions vfs.GetFilesystemOptions
-
- // Action specifies the action on an integrity violation.
- Action ViolationAction
-}
-
-// Name implements vfs.FilesystemType.Name.
-func (FilesystemType) Name() string {
- return Name
-}
-
-// Release implements vfs.FilesystemType.Release.
-func (FilesystemType) Release(ctx context.Context) {}
-
-// alertIntegrityViolation alerts a violation of integrity, which usually means
-// unexpected modification to the file system is detected. In ErrorOnViolation
-// mode, it returns EIO, otherwise it panic.
-func alertIntegrityViolation(msg string) error {
- if action == ErrorOnViolation {
- return syserror.EIO
- }
- panic(msg)
-}
-
-// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
-func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
- iopts, ok := opts.InternalData.(InternalFilesystemOptions)
- if !ok {
- ctx.Warningf("verity.FilesystemType.GetFilesystem: missing verity configs")
- return nil, nil, syserror.EINVAL
- }
- action = iopts.Action
-
- // Mount the lower file system. The lower file system is wrapped inside
- // verity, and should not be exposed or connected.
- mopts := &vfs.MountOptions{
- GetFilesystemOptions: iopts.LowerGetFSOptions,
- InternalMount: true,
- }
- mnt, err := vfsObj.MountDisconnected(ctx, creds, "", iopts.LowerName, mopts)
- if err != nil {
- return nil, nil, err
- }
-
- fs := &filesystem{
- creds: creds.Fork(),
- alg: iopts.Alg,
- lowerMount: mnt,
- allowRuntimeEnable: iopts.AllowRuntimeEnable,
- }
- fs.vfsfs.Init(vfsObj, &fstype, fs)
-
- // Construct the root dentry.
- d := fs.newDentry()
- d.refs = 1
- lowerVD := vfs.MakeVirtualDentry(mnt, mnt.Root())
- lowerVD.IncRef()
- d.lowerVD = lowerVD
-
- rootMerkleName := merkleRootPrefix + iopts.RootMerkleFileName
-
- lowerMerkleVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
- Root: lowerVD,
- Start: lowerVD,
- Path: fspath.Parse(rootMerkleName),
- }, &vfs.GetDentryOptions{})
-
- // If runtime enable is allowed, the root merkle tree may be absent. We
- // should create the tree file.
- if err == syserror.ENOENT && fs.allowRuntimeEnable {
- lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
- Root: lowerVD,
- Start: lowerVD,
- Path: fspath.Parse(rootMerkleName),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_CREAT,
- Mode: 0644,
- })
- if err != nil {
- fs.vfsfs.DecRef(ctx)
- d.DecRef(ctx)
- return nil, nil, err
- }
- lowerMerkleFD.DecRef(ctx)
- lowerMerkleVD, err = vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
- Root: lowerVD,
- Start: lowerVD,
- Path: fspath.Parse(rootMerkleName),
- }, &vfs.GetDentryOptions{})
- if err != nil {
- fs.vfsfs.DecRef(ctx)
- d.DecRef(ctx)
- return nil, nil, err
- }
- } else if err != nil {
- // Failed to get dentry for the root Merkle file. This
- // indicates an unexpected modification that removed/renamed
- // the root Merkle file, or it's never generated.
- fs.vfsfs.DecRef(ctx)
- d.DecRef(ctx)
- return nil, nil, alertIntegrityViolation("Failed to find root Merkle file")
- }
-
- // Clear the Merkle tree file if they are to be generated at runtime.
- // TODO(b/182315468): Optimize the Merkle tree generate process to
- // allow only updating certain files/directories.
- if fs.allowRuntimeEnable {
- lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
- Root: lowerMerkleVD,
- Start: lowerMerkleVD,
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_TRUNC,
- Mode: 0644,
- })
- if err != nil {
- return nil, nil, err
- }
- lowerMerkleFD.DecRef(ctx)
- }
-
- d.lowerMerkleVD = lowerMerkleVD
-
- // Get metadata from the underlying file system.
- const statMask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID
- stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
- Root: lowerVD,
- Start: lowerVD,
- }, &vfs.StatOptions{
- Mask: statMask,
- })
- if err != nil {
- fs.vfsfs.DecRef(ctx)
- d.DecRef(ctx)
- return nil, nil, err
- }
-
- d.mode = uint32(stat.Mode)
- d.uid = stat.UID
- d.gid = stat.GID
- d.hash = make([]byte, len(iopts.RootHash))
- d.childrenNames = make(map[string]struct{})
-
- if !d.isDir() {
- ctx.Warningf("verity root must be a directory")
- return nil, nil, syserror.EINVAL
- }
-
- if !fs.allowRuntimeEnable {
- // Get children names from the underlying file system.
- offString, err := vfsObj.GetXattrAt(ctx, creds, &vfs.PathOperation{
- Root: lowerMerkleVD,
- Start: lowerMerkleVD,
- }, &vfs.GetXattrOptions{
- Name: childrenOffsetXattr,
- Size: sizeOfStringInt32,
- })
- if err == syserror.ENOENT || err == syserror.ENODATA {
- return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenOffsetXattr, err))
- }
- if err != nil {
- return nil, nil, err
- }
-
- off, err := strconv.Atoi(offString)
- if err != nil {
- return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenOffsetXattr, err))
- }
-
- sizeString, err := vfsObj.GetXattrAt(ctx, creds, &vfs.PathOperation{
- Root: lowerMerkleVD,
- Start: lowerMerkleVD,
- }, &vfs.GetXattrOptions{
- Name: childrenSizeXattr,
- Size: sizeOfStringInt32,
- })
- if err == syserror.ENOENT || err == syserror.ENODATA {
- return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenSizeXattr, err))
- }
- if err != nil {
- return nil, nil, err
- }
- size, err := strconv.Atoi(sizeString)
- if err != nil {
- return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenSizeXattr, err))
- }
-
- lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
- Root: lowerMerkleVD,
- Start: lowerMerkleVD,
- }, &vfs.OpenOptions{
- Flags: linux.O_RDONLY,
- })
- if err == syserror.ENOENT {
- return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to open root Merkle file: %v", err))
- }
- if err != nil {
- return nil, nil, err
- }
-
- childrenNames := make([]byte, size)
- if _, err := lowerMerkleFD.PRead(ctx, usermem.BytesIOSequence(childrenNames), int64(off), vfs.ReadOptions{}); err != nil {
- return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to read root children map: %v", err))
- }
-
- if err := json.Unmarshal(childrenNames, &d.childrenNames); err != nil {
- return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to deserialize childrenNames: %v", err))
- }
-
- if err := fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil {
- return nil, nil, err
- }
- }
-
- d.hashMu.Lock()
- copy(d.hash, iopts.RootHash)
- d.hashMu.Unlock()
- d.vfsd.Init(d)
-
- fs.rootDentry = d
-
- return &fs.vfsfs, &d.vfsd, nil
-}
-
-// Release implements vfs.FilesystemImpl.Release.
-func (fs *filesystem) Release(ctx context.Context) {
- fs.lowerMount.DecRef(ctx)
-}
-
-// MountOptions implements vfs.FilesystemImpl.MountOptions.
-func (fs *filesystem) MountOptions() string {
- return ""
-}
-
-// dentry implements vfs.DentryImpl.
-//
-// +stateify savable
-type dentry struct {
- vfsd vfs.Dentry
-
- refs int64
-
- // fs is the owning filesystem. fs is immutable.
- fs *filesystem
-
- // mode, uid, gid and size are the file mode, owner, group, and size of
- // the file in the underlying file system. They are set when a dentry
- // is initialized, and never modified.
- mode uint32
- uid uint32
- gid uint32
- size uint32
-
- // parent is the dentry corresponding to this dentry's parent directory.
- // name is this dentry's name in parent. If this dentry is a filesystem
- // root, parent is nil and name is the empty string. parent and name are
- // protected by fs.renameMu.
- parent *dentry
- name string
-
- // If this dentry represents a directory, children maps the names of
- // children for which dentries have been instantiated to those dentries,
- // and dirents (if not nil) is a cache of dirents as returned by
- // directoryFDs representing this directory. children is protected by
- // dirMu.
- dirMu sync.Mutex `state:"nosave"`
- children map[string]*dentry
-
- // childrenNames stores the name of all children of the dentry. This is
- // used by verity to check whether a child is expected. This is only
- // populated by enableVerity. childrenNames is also protected by dirMu.
- childrenNames map[string]struct{}
-
- // lowerVD is the VirtualDentry in the underlying file system. It is
- // never modified after initialized.
- lowerVD vfs.VirtualDentry
-
- // lowerMerkleVD is the VirtualDentry of the corresponding Merkle tree
- // in the underlying file system. It is never modified after
- // initialized.
- lowerMerkleVD vfs.VirtualDentry
-
- // symlinkTarget is the target path of a symlink file in the underlying filesystem.
- symlinkTarget string
-
- // hash is the calculated hash for the current file or directory. hash
- // is protected by hashMu.
- hashMu sync.RWMutex `state:"nosave"`
- hash []byte
-}
-
-// newDentry creates a new dentry representing the given verity file. The
-// dentry initially has no references; it is the caller's responsibility to set
-// the dentry's reference count and/or call dentry.destroy() as appropriate.
-// The dentry is initially invalid in that it contains no underlying dentry;
-// the caller is responsible for setting them.
-func (fs *filesystem) newDentry() *dentry {
- d := &dentry{
- fs: fs,
- }
- d.vfsd.Init(d)
- refsvfs2.Register(d)
- return d
-}
-
-// IncRef implements vfs.DentryImpl.IncRef.
-func (d *dentry) IncRef() {
- r := atomic.AddInt64(&d.refs, 1)
- if d.LogRefs() {
- refsvfs2.LogIncRef(d, r)
- }
-}
-
-// TryIncRef implements vfs.DentryImpl.TryIncRef.
-func (d *dentry) TryIncRef() bool {
- for {
- r := atomic.LoadInt64(&d.refs)
- if r <= 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&d.refs, r, r+1) {
- if d.LogRefs() {
- refsvfs2.LogTryIncRef(d, r+1)
- }
- return true
- }
- }
-}
-
-// DecRef implements vfs.DentryImpl.DecRef.
-func (d *dentry) DecRef(ctx context.Context) {
- r := atomic.AddInt64(&d.refs, -1)
- if d.LogRefs() {
- refsvfs2.LogDecRef(d, r)
- }
- if r == 0 {
- d.fs.renameMu.Lock()
- d.checkDropLocked(ctx)
- d.fs.renameMu.Unlock()
- } else if r < 0 {
- panic("verity.dentry.DecRef() called without holding a reference")
- }
-}
-
-func (d *dentry) decRefLocked(ctx context.Context) {
- r := atomic.AddInt64(&d.refs, -1)
- if d.LogRefs() {
- refsvfs2.LogDecRef(d, r)
- }
- if r == 0 {
- d.checkDropLocked(ctx)
- } else if r < 0 {
- panic("verity.dentry.decRefLocked() called without holding a reference")
- }
-}
-
-// checkDropLocked should be called after d's reference count becomes 0 or it
-// becomes deleted.
-func (d *dentry) checkDropLocked(ctx context.Context) {
- // Dentries with a positive reference count must be retained. Dentries
- // with a negative reference count have already been destroyed.
- if atomic.LoadInt64(&d.refs) != 0 {
- return
- }
- // Refs is still zero; destroy it.
- d.destroyLocked(ctx)
- return
-}
-
-// destroyLocked destroys the dentry.
-//
-// Preconditions:
-// * d.fs.renameMu must be locked for writing.
-// * d.refs == 0.
-func (d *dentry) destroyLocked(ctx context.Context) {
- switch atomic.LoadInt64(&d.refs) {
- case 0:
- // Mark the dentry destroyed.
- atomic.StoreInt64(&d.refs, -1)
- case -1:
- panic("verity.dentry.destroyLocked() called on already destroyed dentry")
- default:
- panic("verity.dentry.destroyLocked() called with references on the dentry")
- }
-
- if d.lowerVD.Ok() {
- d.lowerVD.DecRef(ctx)
- }
- if d.lowerMerkleVD.Ok() {
- d.lowerMerkleVD.DecRef(ctx)
- }
- if d.parent != nil {
- d.parent.dirMu.Lock()
- if !d.vfsd.IsDead() {
- delete(d.parent.children, d.name)
- }
- d.parent.dirMu.Unlock()
- d.parent.decRefLocked(ctx)
- }
- refsvfs2.Unregister(d)
-}
-
-// RefType implements refsvfs2.CheckedObject.Type.
-func (d *dentry) RefType() string {
- return "verity.dentry"
-}
-
-// LeakMessage implements refsvfs2.CheckedObject.LeakMessage.
-func (d *dentry) LeakMessage() string {
- return fmt.Sprintf("[verity.dentry %p] reference count of %d instead of -1", d, atomic.LoadInt64(&d.refs))
-}
-
-// LogRefs implements refsvfs2.CheckedObject.LogRefs.
-//
-// This should only be set to true for debugging purposes, as it can generate an
-// extremely large amount of output and drastically degrade performance.
-func (d *dentry) LogRefs() bool {
- return false
-}
-
-// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
-func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) {
- //TODO(b/159261227): Implement InotifyWithParent.
-}
-
-// Watches implements vfs.DentryImpl.Watches.
-func (d *dentry) Watches() *vfs.Watches {
- //TODO(b/159261227): Implement Watches.
- return nil
-}
-
-// OnZeroWatches implements vfs.DentryImpl.OnZeroWatches.
-func (d *dentry) OnZeroWatches(context.Context) {
- //TODO(b/159261227): Implement OnZeroWatches.
-}
-
-func (d *dentry) isSymlink() bool {
- return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
-}
-
-func (d *dentry) isDir() bool {
- return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFDIR
-}
-
-func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
- return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid)))
-}
-
-// verityEnabled checks whether the file is enabled with verity features. It
-// should always be true if runtime enable is not allowed. In runtime enable
-// mode, it returns true if the target has been enabled with
-// ioctl(FS_IOC_ENABLE_VERITY).
-func (d *dentry) verityEnabled() bool {
- d.hashMu.RLock()
- defer d.hashMu.RUnlock()
- return !d.fs.allowRuntimeEnable || len(d.hash) != 0
-}
-
-// getLowerAt returns the dentry in the underlying file system, which is
-// represented by filename relative to d.
-func (d *dentry) getLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, filename string) (vfs.VirtualDentry, error) {
- return vfsObj.GetDentryAt(ctx, d.fs.creds, &vfs.PathOperation{
- Root: d.lowerVD,
- Start: d.lowerVD,
- Path: fspath.Parse(filename),
- }, &vfs.GetDentryOptions{})
-}
-
-func (d *dentry) readlink(ctx context.Context) (string, error) {
- vfsObj := d.fs.vfsfs.VirtualFilesystem()
- if d.verityEnabled() {
- stat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{
- Root: d.lowerVD,
- Start: d.lowerVD,
- }, &vfs.StatOptions{})
- if err != nil {
- return "", err
- }
- d.dirMu.Lock()
- defer d.dirMu.Unlock()
- if err := d.fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil {
- return "", err
- }
- return d.symlinkTarget, nil
- }
-
- return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
- Root: d.lowerVD,
- Start: d.lowerVD,
- })
-}
-
-// FileDescription implements vfs.FileDescriptionImpl for verity fds.
-// FileDescription is a wrapper of the underlying lowerFD, with support to build
-// Merkle trees through the Linux fs-verity API to verify contents read from
-// lowerFD.
-//
-// +stateify savable
-type fileDescription struct {
- vfsfd vfs.FileDescription
- vfs.FileDescriptionDefaultImpl
-
- // d is the corresponding dentry to the fileDescription.
- d *dentry
-
- // isDir specifies whehter the fileDescription points to a directory.
- isDir bool
-
- // lowerFD is the FileDescription corresponding to the file in the
- // underlying file system.
- lowerFD *vfs.FileDescription
-
- // merkleReader is the read-only FileDescription corresponding to the
- // Merkle tree file in the underlying file system.
- merkleReader *vfs.FileDescription
-
- // merkleWriter is the FileDescription corresponding to the Merkle tree
- // file in the underlying file system for writing. This should only be
- // used when allowRuntimeEnable is set to true.
- merkleWriter *vfs.FileDescription
-
- // parentMerkleWriter is the FileDescription of the Merkle tree for the
- // directory that contains the current file/directory. This is only used
- // if allowRuntimeEnable is set to true.
- parentMerkleWriter *vfs.FileDescription
-
- // off is the file offset. off is protected by mu.
- mu sync.Mutex `state:"nosave"`
- off int64
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *fileDescription) Release(ctx context.Context) {
- fd.lowerFD.DecRef(ctx)
- fd.merkleReader.DecRef(ctx)
- if fd.merkleWriter != nil {
- fd.merkleWriter.DecRef(ctx)
- }
- if fd.parentMerkleWriter != nil {
- fd.parentMerkleWriter.DecRef(ctx)
- }
-}
-
-// Stat implements vfs.FileDescriptionImpl.Stat.
-func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
- // TODO(b/162788573): Add integrity check for metadata.
- stat, err := fd.lowerFD.Stat(ctx, opts)
- if err != nil {
- return linux.Statx{}, err
- }
- fd.d.dirMu.Lock()
- if fd.d.verityEnabled() {
- if err := fd.d.fs.verifyStatAndChildrenLocked(ctx, fd.d, stat); err != nil {
- return linux.Statx{}, err
- }
- }
- fd.d.dirMu.Unlock()
- return stat, nil
-}
-
-// SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
- // Verity files are read-only.
- return syserror.EPERM
-}
-
-// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
-func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
- if !fd.d.isDir() {
- return syserror.ENOTDIR
- }
- fd.mu.Lock()
- defer fd.mu.Unlock()
-
- var ds []vfs.Dirent
- err := fd.lowerFD.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error {
- // Do not include the Merkle tree files.
- if strings.Contains(dirent.Name, merklePrefix) || strings.Contains(dirent.Name, merkleRootPrefix) {
- return nil
- }
- if fd.d.verityEnabled() {
- // Verify that the child is expected.
- if dirent.Name != "." && dirent.Name != ".." {
- if _, ok := fd.d.childrenNames[dirent.Name]; !ok {
- return alertIntegrityViolation(fmt.Sprintf("Unexpected children %s", dirent.Name))
- }
- }
- }
- ds = append(ds, dirent)
- return nil
- }))
-
- if err != nil {
- return err
- }
-
- // The result should contain all children plus "." and "..".
- if fd.d.verityEnabled() && len(ds) != len(fd.d.childrenNames)+2 {
- return alertIntegrityViolation(fmt.Sprintf("Unexpected children number %d", len(ds)))
- }
-
- for fd.off < int64(len(ds)) {
- if err := cb.Handle(ds[fd.off]); err != nil {
- return err
- }
- fd.off++
- }
- return nil
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *fileDescription) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- fd.mu.Lock()
- defer fd.mu.Unlock()
- n := int64(0)
- switch whence {
- case linux.SEEK_SET:
- // use offset as specified
- case linux.SEEK_CUR:
- n = fd.off
- case linux.SEEK_END:
- n = int64(fd.d.size)
- default:
- return 0, syserror.EINVAL
- }
- if offset > math.MaxInt64-n {
- return 0, syserror.EINVAL
- }
- offset += n
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- fd.off = offset
- return offset, nil
-}
-
-// generateMerkleLocked generates a Merkle tree file for fd. If fd points to a
-// file /foo/bar, a Merkle tree file /foo/.merkle.verity.bar is generated. The
-// hash of the generated Merkle tree and the data size is returned. If fd
-// points to a regular file, the data is the content of the file. If fd points
-// to a directory, the data is all hashes of its children, written to the Merkle
-// tree file. If fd represents a symlink, the data is empty and nothing is written
-// to the Merkle tree file.
-//
-// Preconditions: fd.d.fs.verityMu must be locked.
-func (fd *fileDescription) generateMerkleLocked(ctx context.Context) ([]byte, uint64, error) {
- fdReader := FileReadWriteSeeker{
- FD: fd.lowerFD,
- Ctx: ctx,
- }
- merkleReader := FileReadWriteSeeker{
- FD: fd.merkleReader,
- Ctx: ctx,
- }
- merkleWriter := FileReadWriteSeeker{
- FD: fd.merkleWriter,
- Ctx: ctx,
- }
-
- stat, err := fd.lowerFD.Stat(ctx, vfs.StatOptions{})
- if err != nil {
- return nil, 0, err
- }
-
- params := &merkletree.GenerateParams{
- TreeReader: &merkleReader,
- TreeWriter: &merkleWriter,
- Children: fd.d.childrenNames,
- //TODO(b/156980949): Support passing other hash algorithms.
- HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(),
- Name: fd.d.name,
- Mode: uint32(stat.Mode),
- UID: stat.UID,
- GID: stat.GID,
- }
-
- switch atomic.LoadUint32(&fd.d.mode) & linux.S_IFMT {
- case linux.S_IFREG:
- // For a regular file, generate a Merkle tree based on its
- // content.
- params.File = &fdReader
- params.Size = int64(stat.Size)
- params.DataAndTreeInSameFile = false
- case linux.S_IFDIR:
- // For a directory, generate a Merkle tree based on the hashes
- // of its children that has already been written to the Merkle
- // tree file.
- merkleStat, err := fd.merkleReader.Stat(ctx, vfs.StatOptions{})
- if err != nil {
- return nil, 0, err
- }
-
- params.Size = int64(merkleStat.Size)
- params.File = &merkleReader
- params.DataAndTreeInSameFile = true
- case linux.S_IFLNK:
- // For a symlink, generate a Merkle tree file but do not write the root hash
- // of the target file content to it. Return a hash of a VerityDescriptor object
- // which includes the symlink target name.
- target, err := fd.d.readlink(ctx)
- if err != nil {
- return nil, 0, err
- }
-
- params.Size = int64(stat.Size)
- params.DataAndTreeInSameFile = false
- params.SymlinkTarget = target
- default:
- // TODO(b/167728857): Investigate whether and how we should
- // enable other types of file.
- return nil, 0, syserror.EINVAL
- }
- hash, err := merkletree.Generate(params)
- return hash, uint64(params.Size), err
-}
-
-// recordChildrenLocked writes the names of fd's children into the
-// corresponding Merkle tree file, and saves the offset/size of the map into
-// xattrs.
-//
-// Preconditions:
-// * fd.d.fs.verityMu must be locked.
-// * fd.d.isDir() == true.
-func (fd *fileDescription) recordChildrenLocked(ctx context.Context) error {
- // Record the children names in the Merkle tree file.
- childrenNames, err := json.Marshal(fd.d.childrenNames)
- if err != nil {
- return err
- }
-
- stat, err := fd.merkleWriter.Stat(ctx, vfs.StatOptions{})
- if err != nil {
- return err
- }
-
- if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
- Name: childrenOffsetXattr,
- Value: strconv.Itoa(int(stat.Size)),
- }); err != nil {
- return err
- }
- if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
- Name: childrenSizeXattr,
- Value: strconv.Itoa(len(childrenNames)),
- }); err != nil {
- return err
- }
-
- if _, err = fd.merkleWriter.Write(ctx, usermem.BytesIOSequence(childrenNames), vfs.WriteOptions{}); err != nil {
- return err
- }
-
- return nil
-}
-
-// enableVerity enables verity features on fd by generating a Merkle tree file
-// and stores its hash in its parent directory's Merkle tree.
-func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) {
- if !fd.d.fs.allowRuntimeEnable {
- return 0, syserror.EPERM
- }
-
- fd.d.fs.verityMu.Lock()
- defer fd.d.fs.verityMu.Unlock()
-
- // In allowRuntimeEnable mode, the underlying fd and read/write fd for
- // the Merkle tree file should have all been initialized. For any file
- // or directory other than the root, the parent Merkle tree file should
- // have also been initialized.
- if fd.lowerFD == nil || fd.merkleReader == nil || fd.merkleWriter == nil || (fd.parentMerkleWriter == nil && fd.d != fd.d.fs.rootDentry) {
- return 0, alertIntegrityViolation("Unexpected verity fd: missing expected underlying fds")
- }
-
- hash, dataSize, err := fd.generateMerkleLocked(ctx)
- if err != nil {
- return 0, err
- }
-
- if fd.parentMerkleWriter != nil {
- stat, err := fd.parentMerkleWriter.Stat(ctx, vfs.StatOptions{})
- if err != nil {
- return 0, err
- }
-
- // Write the hash of fd to the parent directory's Merkle tree
- // file, as it should be part of the parent Merkle tree data.
- // parentMerkleWriter is open with O_APPEND, so it should write
- // directly to the end of the file.
- if _, err = fd.parentMerkleWriter.Write(ctx, usermem.BytesIOSequence(hash), vfs.WriteOptions{}); err != nil {
- return 0, err
- }
-
- // Record the offset of the hash of fd in parent directory's
- // Merkle tree file.
- if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
- Name: merkleOffsetInParentXattr,
- Value: strconv.Itoa(int(stat.Size)),
- }); err != nil {
- return 0, err
- }
-
- // Add the current child's name to parent's childrenNames.
- fd.d.parent.childrenNames[fd.d.name] = struct{}{}
- }
-
- // Record the size of the data being hashed for fd.
- if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
- Name: merkleSizeXattr,
- Value: strconv.Itoa(int(dataSize)),
- }); err != nil {
- return 0, err
- }
-
- if fd.d.isDir() {
- if err := fd.recordChildrenLocked(ctx); err != nil {
- return 0, err
- }
- }
- fd.d.hashMu.Lock()
- fd.d.hash = hash
- fd.d.hashMu.Unlock()
- return 0, nil
-}
-
-// measureVerity returns the hash of fd, saved in verityDigest.
-func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest usermem.Addr) (uintptr, error) {
- t := kernel.TaskFromContext(ctx)
- if t == nil {
- return 0, syserror.EINVAL
- }
- var metadata linux.DigestMetadata
-
- fd.d.hashMu.RLock()
- defer fd.d.hashMu.RUnlock()
-
- // If allowRuntimeEnable is true, an empty fd.d.hash indicates that
- // verity is not enabled for the file. If allowRuntimeEnable is false,
- // this is an integrity violation because all files should have verity
- // enabled, in which case fd.d.hash should be set.
- if len(fd.d.hash) == 0 {
- if fd.d.fs.allowRuntimeEnable {
- return 0, syserror.ENODATA
- }
- return 0, alertIntegrityViolation("Ioctl measureVerity: no hash found")
- }
-
- // The first part of VerityDigest is the metadata.
- if _, err := metadata.CopyIn(t, verityDigest); err != nil {
- return 0, err
- }
- if metadata.DigestSize < uint16(len(fd.d.hash)) {
- return 0, syserror.EOVERFLOW
- }
-
- // Populate the output digest size, since DigestSize is both input and
- // output.
- metadata.DigestSize = uint16(len(fd.d.hash))
-
- // First copy the metadata.
- if _, err := metadata.CopyOut(t, verityDigest); err != nil {
- return 0, err
- }
-
- // Now copy the root hash bytes to the memory after metadata.
- _, err := t.CopyOutBytes(usermem.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.hash)
- return 0, err
-}
-
-func (fd *fileDescription) verityFlags(ctx context.Context, flags usermem.Addr) (uintptr, error) {
- f := int32(0)
-
- fd.d.hashMu.RLock()
- // All enabled files should store a hash. This flag is not settable via
- // FS_IOC_SETFLAGS.
- if len(fd.d.hash) != 0 {
- f |= linux.FS_VERITY_FL
- }
- fd.d.hashMu.RUnlock()
-
- t := kernel.TaskFromContext(ctx)
- if t == nil {
- return 0, syserror.EINVAL
- }
- _, err := primitive.CopyInt32Out(t, flags, f)
- return 0, err
-}
-
-// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
-func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- switch cmd := args[1].Uint(); cmd {
- case linux.FS_IOC_ENABLE_VERITY:
- return fd.enableVerity(ctx)
- case linux.FS_IOC_MEASURE_VERITY:
- return fd.measureVerity(ctx, args[2].Pointer())
- case linux.FS_IOC_GETFLAGS:
- return fd.verityFlags(ctx, args[2].Pointer())
- default:
- // TODO(b/169682228): Investigate which ioctl commands should
- // be allowed.
- return 0, syserror.ENOSYS
- }
-}
-
-// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- // Implement Read with PRead by setting offset.
- fd.mu.Lock()
- n, err := fd.PRead(ctx, dst, fd.off, opts)
- fd.off += n
- fd.mu.Unlock()
- return n, err
-}
-
-// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- // No need to verify if the file is not enabled yet in
- // allowRuntimeEnable mode.
- if !fd.d.verityEnabled() {
- return fd.lowerFD.PRead(ctx, dst, offset, opts)
- }
-
- fd.d.fs.verityMu.RLock()
- defer fd.d.fs.verityMu.RUnlock()
- // dataSize is the size of the whole file.
- dataSize, err := fd.merkleReader.GetXattr(ctx, &vfs.GetXattrOptions{
- Name: merkleSizeXattr,
- Size: sizeOfStringInt32,
- })
-
- // The Merkle tree file for the child should have been created and
- // contains the expected xattrs. If the xattr does not exist, it
- // indicates unexpected modifications to the file system.
- if err == syserror.ENODATA {
- return 0, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err))
- }
- if err != nil {
- return 0, err
- }
-
- // The dataSize xattr should be an integer. If it's not, it indicates
- // unexpected modifications to the file system.
- size, err := strconv.Atoi(dataSize)
- if err != nil {
- return 0, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", merkleSizeXattr, err))
- }
-
- dataReader := FileReadWriteSeeker{
- FD: fd.lowerFD,
- Ctx: ctx,
- }
-
- merkleReader := FileReadWriteSeeker{
- FD: fd.merkleReader,
- Ctx: ctx,
- }
-
- fd.d.hashMu.RLock()
- n, err := merkletree.Verify(&merkletree.VerifyParams{
- Out: dst.Writer(ctx),
- File: &dataReader,
- Tree: &merkleReader,
- Size: int64(size),
- Name: fd.d.name,
- Mode: fd.d.mode,
- UID: fd.d.uid,
- GID: fd.d.gid,
- Children: fd.d.childrenNames,
- //TODO(b/156980949): Support passing other hash algorithms.
- HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(),
- ReadOffset: offset,
- ReadSize: dst.NumBytes(),
- Expected: fd.d.hash,
- DataAndTreeInSameFile: false,
- })
- fd.d.hashMu.RUnlock()
- if err != nil {
- return 0, alertIntegrityViolation(fmt.Sprintf("Verification failed: %v", err))
- }
- return n, err
-}
-
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EROFS
-}
-
-// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.EROFS
-}
-
-// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
-func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
- return fd.lowerFD.LockBSD(ctx, ownerPID, t, block)
-}
-
-// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD.
-func (fd *fileDescription) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
- return fd.lowerFD.UnlockBSD(ctx)
-}
-
-// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
-func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
- return fd.lowerFD.LockPOSIX(ctx, uid, ownerPID, t, r, block)
-}
-
-// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
-func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
- return fd.lowerFD.UnlockPOSIX(ctx, uid, r)
-}
-
-// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX.
-func (fd *fileDescription) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
- return fd.lowerFD.TestPOSIX(ctx, uid, t, r)
-}
-
-// FileReadWriteSeeker is a helper struct to pass a vfs.FileDescription as
-// io.Reader/io.Writer/io.ReadSeeker/io.ReaderAt/io.WriterAt/etc.
-type FileReadWriteSeeker struct {
- FD *vfs.FileDescription
- Ctx context.Context
- ROpts vfs.ReadOptions
- WOpts vfs.WriteOptions
-}
-
-// ReadAt implements io.ReaderAt.ReadAt.
-func (f *FileReadWriteSeeker) ReadAt(p []byte, off int64) (int, error) {
- dst := usermem.BytesIOSequence(p)
- n, err := f.FD.PRead(f.Ctx, dst, off, f.ROpts)
- return int(n), err
-}
-
-// Read implements io.ReadWriteSeeker.Read.
-func (f *FileReadWriteSeeker) Read(p []byte) (int, error) {
- dst := usermem.BytesIOSequence(p)
- n, err := f.FD.Read(f.Ctx, dst, f.ROpts)
- return int(n), err
-}
-
-// Seek implements io.ReadWriteSeeker.Seek.
-func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) {
- return f.FD.Seek(f.Ctx, offset, int32(whence))
-}
-
-// WriteAt implements io.WriterAt.WriteAt.
-func (f *FileReadWriteSeeker) WriteAt(p []byte, off int64) (int, error) {
- dst := usermem.BytesIOSequence(p)
- n, err := f.FD.PWrite(f.Ctx, dst, off, f.WOpts)
- return int(n), err
-}
-
-// Write implements io.ReadWriteSeeker.Write.
-func (f *FileReadWriteSeeker) Write(p []byte) (int, error) {
- buf := usermem.BytesIOSequence(p)
- n, err := f.FD.Write(f.Ctx, buf, f.WOpts)
- return int(n), err
-}