diff options
Diffstat (limited to 'pkg/sentry/fsimpl/verity/verity.go')
-rw-r--r-- | pkg/sentry/fsimpl/verity/verity.go | 1267 |
1 files changed, 0 insertions, 1267 deletions
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go deleted file mode 100644 index 0d9b0ee2c..000000000 --- a/pkg/sentry/fsimpl/verity/verity.go +++ /dev/null @@ -1,1267 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package verity provides a filesystem implementation that is a wrapper of -// another file system. -// The verity file system provides integrity check for the underlying file -// system by providing verification for path traversals and each read. -// The verity file system is read-only, except for one case: when -// allowRuntimeEnable is true, additional Merkle files can be generated using -// the FS_IOC_ENABLE_VERITY ioctl. -// -// Lock order: -// -// filesystem.renameMu -// dentry.dirMu -// fileDescription.mu -// filesystem.verityMu -// dentry.hashMu -// -// Locking dentry.dirMu in multiple dentries requires that parent dentries are -// locked before child dentries, and that filesystem.renameMu is locked to -// stabilize this relationship. -package verity - -import ( - "encoding/json" - "fmt" - "math" - "strconv" - "strings" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/marshal/primitive" - "gvisor.dev/gvisor/pkg/merkletree" - "gvisor.dev/gvisor/pkg/refsvfs2" - "gvisor.dev/gvisor/pkg/sentry/arch" - fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -const ( - // Name is the default filesystem name. - Name = "verity" - - // merklePrefix is the prefix of the Merkle tree files. For example, the Merkle - // tree file for "/foo" is "/.merkle.verity.foo". - merklePrefix = ".merkle.verity." - - // merkleRootPrefix is the prefix of the Merkle tree root file. This - // needs to be different from merklePrefix to avoid name collision. - merkleRootPrefix = ".merkleroot.verity." - - // merkleOffsetInParentXattr is the extended attribute name specifying the - // offset of the child hash in its parent's Merkle tree. - merkleOffsetInParentXattr = "user.merkle.offset" - - // merkleSizeXattr is the extended attribute name specifying the size of data - // hashed by the corresponding Merkle tree. For a regular file, this is the - // file size. For a directory, this is the size of all its children's hashes. - merkleSizeXattr = "user.merkle.size" - - // childrenOffsetXattr is the extended attribute name specifying the - // names of the offset of the serialized children names in the Merkle - // tree file. - childrenOffsetXattr = "user.merkle.childrenOffset" - - // childrenSizeXattr is the extended attribute name specifying the size - // of the serialized children names. - childrenSizeXattr = "user.merkle.childrenSize" - - // sizeOfStringInt32 is the size for a 32 bit integer stored as string in - // extended attributes. The maximum value of a 32 bit integer has 10 digits. - sizeOfStringInt32 = 10 -) - -var ( - // action specifies the action towards detected violation. - action ViolationAction - - // verityMu synchronizes concurrent operations that enable verity and perform - // verification checks. - verityMu sync.RWMutex -) - -// HashAlgorithm is a type specifying the algorithm used to hash the file -// content. -type HashAlgorithm int - -// ViolationAction is a type specifying the action when an integrity violation -// is detected. -type ViolationAction int - -const ( - // PanicOnViolation terminates the sentry on detected violation. - PanicOnViolation ViolationAction = 0 - // ErrorOnViolation returns an error from the violating system call on - // detected violation. - ErrorOnViolation = 1 -) - -// Currently supported hashing algorithms include SHA256 and SHA512. -const ( - SHA256 HashAlgorithm = iota - SHA512 -) - -func (alg HashAlgorithm) toLinuxHashAlg() int { - switch alg { - case SHA256: - return linux.FS_VERITY_HASH_ALG_SHA256 - case SHA512: - return linux.FS_VERITY_HASH_ALG_SHA512 - default: - return 0 - } -} - -// FilesystemType implements vfs.FilesystemType. -// -// +stateify savable -type FilesystemType struct{} - -// filesystem implements vfs.FilesystemImpl. -// -// +stateify savable -type filesystem struct { - vfsfs vfs.Filesystem - - // creds is a copy of the filesystem's creator's credentials, which are - // used for accesses to the underlying file system. creds is immutable. - creds *auth.Credentials - - // allowRuntimeEnable is true if using ioctl with FS_IOC_ENABLE_VERITY - // to build Merkle trees in the verity file system is allowed. If this - // is false, no new Merkle trees can be built, and only the files that - // had Merkle trees before startup (e.g. from a host filesystem mounted - // with gofer fs) can be verified. - allowRuntimeEnable bool - - // lowerMount is the underlying file system mount. - lowerMount *vfs.Mount - - // rootDentry is the mount root Dentry for this file system, which - // stores the root hash of the whole file system in bytes. - rootDentry *dentry - - // alg is the algorithms used to hash the files in the verity file - // system. - alg HashAlgorithm - - // renameMu synchronizes renaming with non-renaming operations in order - // to ensure consistent lock ordering between dentry.dirMu in different - // dentries. - renameMu sync.RWMutex `state:"nosave"` - - // verityMu synchronizes enabling verity files, protects files or - // directories from being enabled by different threads simultaneously. - // It also ensures that verity does not access files that are being - // enabled. - // - // Also, the directory Merkle trees depends on the generated trees of - // its children. So they shouldn't be enabled the same time. This lock - // is for the whole file system to ensure that no more than one file is - // enabled the same time. - verityMu sync.RWMutex `state:"nosave"` -} - -// InternalFilesystemOptions may be passed as -// vfs.GetFilesystemOptions.InternalData to FilesystemType.GetFilesystem. -// -// +stateify savable -type InternalFilesystemOptions struct { - // RootMerkleFileName is the name of the verity root Merkle tree file. - RootMerkleFileName string - - // LowerName is the name of the filesystem wrapped by verity fs. - LowerName string - - // Alg is the algorithms used to hash the files in the verity file - // system. - Alg HashAlgorithm - - // RootHash is the root hash of the overall verity file system. - RootHash []byte - - // AllowRuntimeEnable specifies whether the verity file system allows - // enabling verification for files (i.e. building Merkle trees) during - // runtime. - AllowRuntimeEnable bool - - // LowerGetFSOptions is the file system option for the lower layer file - // system wrapped by verity file system. - LowerGetFSOptions vfs.GetFilesystemOptions - - // Action specifies the action on an integrity violation. - Action ViolationAction -} - -// Name implements vfs.FilesystemType.Name. -func (FilesystemType) Name() string { - return Name -} - -// Release implements vfs.FilesystemType.Release. -func (FilesystemType) Release(ctx context.Context) {} - -// alertIntegrityViolation alerts a violation of integrity, which usually means -// unexpected modification to the file system is detected. In ErrorOnViolation -// mode, it returns EIO, otherwise it panic. -func alertIntegrityViolation(msg string) error { - if action == ErrorOnViolation { - return syserror.EIO - } - panic(msg) -} - -// GetFilesystem implements vfs.FilesystemType.GetFilesystem. -func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - iopts, ok := opts.InternalData.(InternalFilesystemOptions) - if !ok { - ctx.Warningf("verity.FilesystemType.GetFilesystem: missing verity configs") - return nil, nil, syserror.EINVAL - } - action = iopts.Action - - // Mount the lower file system. The lower file system is wrapped inside - // verity, and should not be exposed or connected. - mopts := &vfs.MountOptions{ - GetFilesystemOptions: iopts.LowerGetFSOptions, - InternalMount: true, - } - mnt, err := vfsObj.MountDisconnected(ctx, creds, "", iopts.LowerName, mopts) - if err != nil { - return nil, nil, err - } - - fs := &filesystem{ - creds: creds.Fork(), - alg: iopts.Alg, - lowerMount: mnt, - allowRuntimeEnable: iopts.AllowRuntimeEnable, - } - fs.vfsfs.Init(vfsObj, &fstype, fs) - - // Construct the root dentry. - d := fs.newDentry() - d.refs = 1 - lowerVD := vfs.MakeVirtualDentry(mnt, mnt.Root()) - lowerVD.IncRef() - d.lowerVD = lowerVD - - rootMerkleName := merkleRootPrefix + iopts.RootMerkleFileName - - lowerMerkleVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{ - Root: lowerVD, - Start: lowerVD, - Path: fspath.Parse(rootMerkleName), - }, &vfs.GetDentryOptions{}) - - // If runtime enable is allowed, the root merkle tree may be absent. We - // should create the tree file. - if err == syserror.ENOENT && fs.allowRuntimeEnable { - lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ - Root: lowerVD, - Start: lowerVD, - Path: fspath.Parse(rootMerkleName), - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT, - Mode: 0644, - }) - if err != nil { - fs.vfsfs.DecRef(ctx) - d.DecRef(ctx) - return nil, nil, err - } - lowerMerkleFD.DecRef(ctx) - lowerMerkleVD, err = vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{ - Root: lowerVD, - Start: lowerVD, - Path: fspath.Parse(rootMerkleName), - }, &vfs.GetDentryOptions{}) - if err != nil { - fs.vfsfs.DecRef(ctx) - d.DecRef(ctx) - return nil, nil, err - } - } else if err != nil { - // Failed to get dentry for the root Merkle file. This - // indicates an unexpected modification that removed/renamed - // the root Merkle file, or it's never generated. - fs.vfsfs.DecRef(ctx) - d.DecRef(ctx) - return nil, nil, alertIntegrityViolation("Failed to find root Merkle file") - } - - // Clear the Merkle tree file if they are to be generated at runtime. - // TODO(b/182315468): Optimize the Merkle tree generate process to - // allow only updating certain files/directories. - if fs.allowRuntimeEnable { - lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ - Root: lowerMerkleVD, - Start: lowerMerkleVD, - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_TRUNC, - Mode: 0644, - }) - if err != nil { - return nil, nil, err - } - lowerMerkleFD.DecRef(ctx) - } - - d.lowerMerkleVD = lowerMerkleVD - - // Get metadata from the underlying file system. - const statMask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID - stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ - Root: lowerVD, - Start: lowerVD, - }, &vfs.StatOptions{ - Mask: statMask, - }) - if err != nil { - fs.vfsfs.DecRef(ctx) - d.DecRef(ctx) - return nil, nil, err - } - - d.mode = uint32(stat.Mode) - d.uid = stat.UID - d.gid = stat.GID - d.hash = make([]byte, len(iopts.RootHash)) - d.childrenNames = make(map[string]struct{}) - - if !d.isDir() { - ctx.Warningf("verity root must be a directory") - return nil, nil, syserror.EINVAL - } - - if !fs.allowRuntimeEnable { - // Get children names from the underlying file system. - offString, err := vfsObj.GetXattrAt(ctx, creds, &vfs.PathOperation{ - Root: lowerMerkleVD, - Start: lowerMerkleVD, - }, &vfs.GetXattrOptions{ - Name: childrenOffsetXattr, - Size: sizeOfStringInt32, - }) - if err == syserror.ENOENT || err == syserror.ENODATA { - return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenOffsetXattr, err)) - } - if err != nil { - return nil, nil, err - } - - off, err := strconv.Atoi(offString) - if err != nil { - return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenOffsetXattr, err)) - } - - sizeString, err := vfsObj.GetXattrAt(ctx, creds, &vfs.PathOperation{ - Root: lowerMerkleVD, - Start: lowerMerkleVD, - }, &vfs.GetXattrOptions{ - Name: childrenSizeXattr, - Size: sizeOfStringInt32, - }) - if err == syserror.ENOENT || err == syserror.ENODATA { - return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenSizeXattr, err)) - } - if err != nil { - return nil, nil, err - } - size, err := strconv.Atoi(sizeString) - if err != nil { - return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenSizeXattr, err)) - } - - lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ - Root: lowerMerkleVD, - Start: lowerMerkleVD, - }, &vfs.OpenOptions{ - Flags: linux.O_RDONLY, - }) - if err == syserror.ENOENT { - return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to open root Merkle file: %v", err)) - } - if err != nil { - return nil, nil, err - } - - childrenNames := make([]byte, size) - if _, err := lowerMerkleFD.PRead(ctx, usermem.BytesIOSequence(childrenNames), int64(off), vfs.ReadOptions{}); err != nil { - return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to read root children map: %v", err)) - } - - if err := json.Unmarshal(childrenNames, &d.childrenNames); err != nil { - return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to deserialize childrenNames: %v", err)) - } - - if err := fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil { - return nil, nil, err - } - } - - d.hashMu.Lock() - copy(d.hash, iopts.RootHash) - d.hashMu.Unlock() - d.vfsd.Init(d) - - fs.rootDentry = d - - return &fs.vfsfs, &d.vfsd, nil -} - -// Release implements vfs.FilesystemImpl.Release. -func (fs *filesystem) Release(ctx context.Context) { - fs.lowerMount.DecRef(ctx) -} - -// MountOptions implements vfs.FilesystemImpl.MountOptions. -func (fs *filesystem) MountOptions() string { - return "" -} - -// dentry implements vfs.DentryImpl. -// -// +stateify savable -type dentry struct { - vfsd vfs.Dentry - - refs int64 - - // fs is the owning filesystem. fs is immutable. - fs *filesystem - - // mode, uid, gid and size are the file mode, owner, group, and size of - // the file in the underlying file system. They are set when a dentry - // is initialized, and never modified. - mode uint32 - uid uint32 - gid uint32 - size uint32 - - // parent is the dentry corresponding to this dentry's parent directory. - // name is this dentry's name in parent. If this dentry is a filesystem - // root, parent is nil and name is the empty string. parent and name are - // protected by fs.renameMu. - parent *dentry - name string - - // If this dentry represents a directory, children maps the names of - // children for which dentries have been instantiated to those dentries, - // and dirents (if not nil) is a cache of dirents as returned by - // directoryFDs representing this directory. children is protected by - // dirMu. - dirMu sync.Mutex `state:"nosave"` - children map[string]*dentry - - // childrenNames stores the name of all children of the dentry. This is - // used by verity to check whether a child is expected. This is only - // populated by enableVerity. childrenNames is also protected by dirMu. - childrenNames map[string]struct{} - - // lowerVD is the VirtualDentry in the underlying file system. It is - // never modified after initialized. - lowerVD vfs.VirtualDentry - - // lowerMerkleVD is the VirtualDentry of the corresponding Merkle tree - // in the underlying file system. It is never modified after - // initialized. - lowerMerkleVD vfs.VirtualDentry - - // symlinkTarget is the target path of a symlink file in the underlying filesystem. - symlinkTarget string - - // hash is the calculated hash for the current file or directory. hash - // is protected by hashMu. - hashMu sync.RWMutex `state:"nosave"` - hash []byte -} - -// newDentry creates a new dentry representing the given verity file. The -// dentry initially has no references; it is the caller's responsibility to set -// the dentry's reference count and/or call dentry.destroy() as appropriate. -// The dentry is initially invalid in that it contains no underlying dentry; -// the caller is responsible for setting them. -func (fs *filesystem) newDentry() *dentry { - d := &dentry{ - fs: fs, - } - d.vfsd.Init(d) - refsvfs2.Register(d) - return d -} - -// IncRef implements vfs.DentryImpl.IncRef. -func (d *dentry) IncRef() { - r := atomic.AddInt64(&d.refs, 1) - if d.LogRefs() { - refsvfs2.LogIncRef(d, r) - } -} - -// TryIncRef implements vfs.DentryImpl.TryIncRef. -func (d *dentry) TryIncRef() bool { - for { - r := atomic.LoadInt64(&d.refs) - if r <= 0 { - return false - } - if atomic.CompareAndSwapInt64(&d.refs, r, r+1) { - if d.LogRefs() { - refsvfs2.LogTryIncRef(d, r+1) - } - return true - } - } -} - -// DecRef implements vfs.DentryImpl.DecRef. -func (d *dentry) DecRef(ctx context.Context) { - r := atomic.AddInt64(&d.refs, -1) - if d.LogRefs() { - refsvfs2.LogDecRef(d, r) - } - if r == 0 { - d.fs.renameMu.Lock() - d.checkDropLocked(ctx) - d.fs.renameMu.Unlock() - } else if r < 0 { - panic("verity.dentry.DecRef() called without holding a reference") - } -} - -func (d *dentry) decRefLocked(ctx context.Context) { - r := atomic.AddInt64(&d.refs, -1) - if d.LogRefs() { - refsvfs2.LogDecRef(d, r) - } - if r == 0 { - d.checkDropLocked(ctx) - } else if r < 0 { - panic("verity.dentry.decRefLocked() called without holding a reference") - } -} - -// checkDropLocked should be called after d's reference count becomes 0 or it -// becomes deleted. -func (d *dentry) checkDropLocked(ctx context.Context) { - // Dentries with a positive reference count must be retained. Dentries - // with a negative reference count have already been destroyed. - if atomic.LoadInt64(&d.refs) != 0 { - return - } - // Refs is still zero; destroy it. - d.destroyLocked(ctx) - return -} - -// destroyLocked destroys the dentry. -// -// Preconditions: -// * d.fs.renameMu must be locked for writing. -// * d.refs == 0. -func (d *dentry) destroyLocked(ctx context.Context) { - switch atomic.LoadInt64(&d.refs) { - case 0: - // Mark the dentry destroyed. - atomic.StoreInt64(&d.refs, -1) - case -1: - panic("verity.dentry.destroyLocked() called on already destroyed dentry") - default: - panic("verity.dentry.destroyLocked() called with references on the dentry") - } - - if d.lowerVD.Ok() { - d.lowerVD.DecRef(ctx) - } - if d.lowerMerkleVD.Ok() { - d.lowerMerkleVD.DecRef(ctx) - } - if d.parent != nil { - d.parent.dirMu.Lock() - if !d.vfsd.IsDead() { - delete(d.parent.children, d.name) - } - d.parent.dirMu.Unlock() - d.parent.decRefLocked(ctx) - } - refsvfs2.Unregister(d) -} - -// RefType implements refsvfs2.CheckedObject.Type. -func (d *dentry) RefType() string { - return "verity.dentry" -} - -// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. -func (d *dentry) LeakMessage() string { - return fmt.Sprintf("[verity.dentry %p] reference count of %d instead of -1", d, atomic.LoadInt64(&d.refs)) -} - -// LogRefs implements refsvfs2.CheckedObject.LogRefs. -// -// This should only be set to true for debugging purposes, as it can generate an -// extremely large amount of output and drastically degrade performance. -func (d *dentry) LogRefs() bool { - return false -} - -// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. -func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) { - //TODO(b/159261227): Implement InotifyWithParent. -} - -// Watches implements vfs.DentryImpl.Watches. -func (d *dentry) Watches() *vfs.Watches { - //TODO(b/159261227): Implement Watches. - return nil -} - -// OnZeroWatches implements vfs.DentryImpl.OnZeroWatches. -func (d *dentry) OnZeroWatches(context.Context) { - //TODO(b/159261227): Implement OnZeroWatches. -} - -func (d *dentry) isSymlink() bool { - return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK -} - -func (d *dentry) isDir() bool { - return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFDIR -} - -func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error { - return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))) -} - -// verityEnabled checks whether the file is enabled with verity features. It -// should always be true if runtime enable is not allowed. In runtime enable -// mode, it returns true if the target has been enabled with -// ioctl(FS_IOC_ENABLE_VERITY). -func (d *dentry) verityEnabled() bool { - d.hashMu.RLock() - defer d.hashMu.RUnlock() - return !d.fs.allowRuntimeEnable || len(d.hash) != 0 -} - -// getLowerAt returns the dentry in the underlying file system, which is -// represented by filename relative to d. -func (d *dentry) getLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, filename string) (vfs.VirtualDentry, error) { - return vfsObj.GetDentryAt(ctx, d.fs.creds, &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - Path: fspath.Parse(filename), - }, &vfs.GetDentryOptions{}) -} - -func (d *dentry) readlink(ctx context.Context) (string, error) { - vfsObj := d.fs.vfsfs.VirtualFilesystem() - if d.verityEnabled() { - stat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - }, &vfs.StatOptions{}) - if err != nil { - return "", err - } - d.dirMu.Lock() - defer d.dirMu.Unlock() - if err := d.fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil { - return "", err - } - return d.symlinkTarget, nil - } - - return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ - Root: d.lowerVD, - Start: d.lowerVD, - }) -} - -// FileDescription implements vfs.FileDescriptionImpl for verity fds. -// FileDescription is a wrapper of the underlying lowerFD, with support to build -// Merkle trees through the Linux fs-verity API to verify contents read from -// lowerFD. -// -// +stateify savable -type fileDescription struct { - vfsfd vfs.FileDescription - vfs.FileDescriptionDefaultImpl - - // d is the corresponding dentry to the fileDescription. - d *dentry - - // isDir specifies whehter the fileDescription points to a directory. - isDir bool - - // lowerFD is the FileDescription corresponding to the file in the - // underlying file system. - lowerFD *vfs.FileDescription - - // merkleReader is the read-only FileDescription corresponding to the - // Merkle tree file in the underlying file system. - merkleReader *vfs.FileDescription - - // merkleWriter is the FileDescription corresponding to the Merkle tree - // file in the underlying file system for writing. This should only be - // used when allowRuntimeEnable is set to true. - merkleWriter *vfs.FileDescription - - // parentMerkleWriter is the FileDescription of the Merkle tree for the - // directory that contains the current file/directory. This is only used - // if allowRuntimeEnable is set to true. - parentMerkleWriter *vfs.FileDescription - - // off is the file offset. off is protected by mu. - mu sync.Mutex `state:"nosave"` - off int64 -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *fileDescription) Release(ctx context.Context) { - fd.lowerFD.DecRef(ctx) - fd.merkleReader.DecRef(ctx) - if fd.merkleWriter != nil { - fd.merkleWriter.DecRef(ctx) - } - if fd.parentMerkleWriter != nil { - fd.parentMerkleWriter.DecRef(ctx) - } -} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - // TODO(b/162788573): Add integrity check for metadata. - stat, err := fd.lowerFD.Stat(ctx, opts) - if err != nil { - return linux.Statx{}, err - } - fd.d.dirMu.Lock() - if fd.d.verityEnabled() { - if err := fd.d.fs.verifyStatAndChildrenLocked(ctx, fd.d, stat); err != nil { - return linux.Statx{}, err - } - } - fd.d.dirMu.Unlock() - return stat, nil -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - // Verity files are read-only. - return syserror.EPERM -} - -// IterDirents implements vfs.FileDescriptionImpl.IterDirents. -func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - if !fd.d.isDir() { - return syserror.ENOTDIR - } - fd.mu.Lock() - defer fd.mu.Unlock() - - var ds []vfs.Dirent - err := fd.lowerFD.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error { - // Do not include the Merkle tree files. - if strings.Contains(dirent.Name, merklePrefix) || strings.Contains(dirent.Name, merkleRootPrefix) { - return nil - } - if fd.d.verityEnabled() { - // Verify that the child is expected. - if dirent.Name != "." && dirent.Name != ".." { - if _, ok := fd.d.childrenNames[dirent.Name]; !ok { - return alertIntegrityViolation(fmt.Sprintf("Unexpected children %s", dirent.Name)) - } - } - } - ds = append(ds, dirent) - return nil - })) - - if err != nil { - return err - } - - // The result should contain all children plus "." and "..". - if fd.d.verityEnabled() && len(ds) != len(fd.d.childrenNames)+2 { - return alertIntegrityViolation(fmt.Sprintf("Unexpected children number %d", len(ds))) - } - - for fd.off < int64(len(ds)) { - if err := cb.Handle(ds[fd.off]); err != nil { - return err - } - fd.off++ - } - return nil -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *fileDescription) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fd.mu.Lock() - defer fd.mu.Unlock() - n := int64(0) - switch whence { - case linux.SEEK_SET: - // use offset as specified - case linux.SEEK_CUR: - n = fd.off - case linux.SEEK_END: - n = int64(fd.d.size) - default: - return 0, syserror.EINVAL - } - if offset > math.MaxInt64-n { - return 0, syserror.EINVAL - } - offset += n - if offset < 0 { - return 0, syserror.EINVAL - } - fd.off = offset - return offset, nil -} - -// generateMerkleLocked generates a Merkle tree file for fd. If fd points to a -// file /foo/bar, a Merkle tree file /foo/.merkle.verity.bar is generated. The -// hash of the generated Merkle tree and the data size is returned. If fd -// points to a regular file, the data is the content of the file. If fd points -// to a directory, the data is all hashes of its children, written to the Merkle -// tree file. If fd represents a symlink, the data is empty and nothing is written -// to the Merkle tree file. -// -// Preconditions: fd.d.fs.verityMu must be locked. -func (fd *fileDescription) generateMerkleLocked(ctx context.Context) ([]byte, uint64, error) { - fdReader := FileReadWriteSeeker{ - FD: fd.lowerFD, - Ctx: ctx, - } - merkleReader := FileReadWriteSeeker{ - FD: fd.merkleReader, - Ctx: ctx, - } - merkleWriter := FileReadWriteSeeker{ - FD: fd.merkleWriter, - Ctx: ctx, - } - - stat, err := fd.lowerFD.Stat(ctx, vfs.StatOptions{}) - if err != nil { - return nil, 0, err - } - - params := &merkletree.GenerateParams{ - TreeReader: &merkleReader, - TreeWriter: &merkleWriter, - Children: fd.d.childrenNames, - //TODO(b/156980949): Support passing other hash algorithms. - HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(), - Name: fd.d.name, - Mode: uint32(stat.Mode), - UID: stat.UID, - GID: stat.GID, - } - - switch atomic.LoadUint32(&fd.d.mode) & linux.S_IFMT { - case linux.S_IFREG: - // For a regular file, generate a Merkle tree based on its - // content. - params.File = &fdReader - params.Size = int64(stat.Size) - params.DataAndTreeInSameFile = false - case linux.S_IFDIR: - // For a directory, generate a Merkle tree based on the hashes - // of its children that has already been written to the Merkle - // tree file. - merkleStat, err := fd.merkleReader.Stat(ctx, vfs.StatOptions{}) - if err != nil { - return nil, 0, err - } - - params.Size = int64(merkleStat.Size) - params.File = &merkleReader - params.DataAndTreeInSameFile = true - case linux.S_IFLNK: - // For a symlink, generate a Merkle tree file but do not write the root hash - // of the target file content to it. Return a hash of a VerityDescriptor object - // which includes the symlink target name. - target, err := fd.d.readlink(ctx) - if err != nil { - return nil, 0, err - } - - params.Size = int64(stat.Size) - params.DataAndTreeInSameFile = false - params.SymlinkTarget = target - default: - // TODO(b/167728857): Investigate whether and how we should - // enable other types of file. - return nil, 0, syserror.EINVAL - } - hash, err := merkletree.Generate(params) - return hash, uint64(params.Size), err -} - -// recordChildrenLocked writes the names of fd's children into the -// corresponding Merkle tree file, and saves the offset/size of the map into -// xattrs. -// -// Preconditions: -// * fd.d.fs.verityMu must be locked. -// * fd.d.isDir() == true. -func (fd *fileDescription) recordChildrenLocked(ctx context.Context) error { - // Record the children names in the Merkle tree file. - childrenNames, err := json.Marshal(fd.d.childrenNames) - if err != nil { - return err - } - - stat, err := fd.merkleWriter.Stat(ctx, vfs.StatOptions{}) - if err != nil { - return err - } - - if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ - Name: childrenOffsetXattr, - Value: strconv.Itoa(int(stat.Size)), - }); err != nil { - return err - } - if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ - Name: childrenSizeXattr, - Value: strconv.Itoa(len(childrenNames)), - }); err != nil { - return err - } - - if _, err = fd.merkleWriter.Write(ctx, usermem.BytesIOSequence(childrenNames), vfs.WriteOptions{}); err != nil { - return err - } - - return nil -} - -// enableVerity enables verity features on fd by generating a Merkle tree file -// and stores its hash in its parent directory's Merkle tree. -func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) { - if !fd.d.fs.allowRuntimeEnable { - return 0, syserror.EPERM - } - - fd.d.fs.verityMu.Lock() - defer fd.d.fs.verityMu.Unlock() - - // In allowRuntimeEnable mode, the underlying fd and read/write fd for - // the Merkle tree file should have all been initialized. For any file - // or directory other than the root, the parent Merkle tree file should - // have also been initialized. - if fd.lowerFD == nil || fd.merkleReader == nil || fd.merkleWriter == nil || (fd.parentMerkleWriter == nil && fd.d != fd.d.fs.rootDentry) { - return 0, alertIntegrityViolation("Unexpected verity fd: missing expected underlying fds") - } - - hash, dataSize, err := fd.generateMerkleLocked(ctx) - if err != nil { - return 0, err - } - - if fd.parentMerkleWriter != nil { - stat, err := fd.parentMerkleWriter.Stat(ctx, vfs.StatOptions{}) - if err != nil { - return 0, err - } - - // Write the hash of fd to the parent directory's Merkle tree - // file, as it should be part of the parent Merkle tree data. - // parentMerkleWriter is open with O_APPEND, so it should write - // directly to the end of the file. - if _, err = fd.parentMerkleWriter.Write(ctx, usermem.BytesIOSequence(hash), vfs.WriteOptions{}); err != nil { - return 0, err - } - - // Record the offset of the hash of fd in parent directory's - // Merkle tree file. - if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ - Name: merkleOffsetInParentXattr, - Value: strconv.Itoa(int(stat.Size)), - }); err != nil { - return 0, err - } - - // Add the current child's name to parent's childrenNames. - fd.d.parent.childrenNames[fd.d.name] = struct{}{} - } - - // Record the size of the data being hashed for fd. - if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ - Name: merkleSizeXattr, - Value: strconv.Itoa(int(dataSize)), - }); err != nil { - return 0, err - } - - if fd.d.isDir() { - if err := fd.recordChildrenLocked(ctx); err != nil { - return 0, err - } - } - fd.d.hashMu.Lock() - fd.d.hash = hash - fd.d.hashMu.Unlock() - return 0, nil -} - -// measureVerity returns the hash of fd, saved in verityDigest. -func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest usermem.Addr) (uintptr, error) { - t := kernel.TaskFromContext(ctx) - if t == nil { - return 0, syserror.EINVAL - } - var metadata linux.DigestMetadata - - fd.d.hashMu.RLock() - defer fd.d.hashMu.RUnlock() - - // If allowRuntimeEnable is true, an empty fd.d.hash indicates that - // verity is not enabled for the file. If allowRuntimeEnable is false, - // this is an integrity violation because all files should have verity - // enabled, in which case fd.d.hash should be set. - if len(fd.d.hash) == 0 { - if fd.d.fs.allowRuntimeEnable { - return 0, syserror.ENODATA - } - return 0, alertIntegrityViolation("Ioctl measureVerity: no hash found") - } - - // The first part of VerityDigest is the metadata. - if _, err := metadata.CopyIn(t, verityDigest); err != nil { - return 0, err - } - if metadata.DigestSize < uint16(len(fd.d.hash)) { - return 0, syserror.EOVERFLOW - } - - // Populate the output digest size, since DigestSize is both input and - // output. - metadata.DigestSize = uint16(len(fd.d.hash)) - - // First copy the metadata. - if _, err := metadata.CopyOut(t, verityDigest); err != nil { - return 0, err - } - - // Now copy the root hash bytes to the memory after metadata. - _, err := t.CopyOutBytes(usermem.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.hash) - return 0, err -} - -func (fd *fileDescription) verityFlags(ctx context.Context, flags usermem.Addr) (uintptr, error) { - f := int32(0) - - fd.d.hashMu.RLock() - // All enabled files should store a hash. This flag is not settable via - // FS_IOC_SETFLAGS. - if len(fd.d.hash) != 0 { - f |= linux.FS_VERITY_FL - } - fd.d.hashMu.RUnlock() - - t := kernel.TaskFromContext(ctx) - if t == nil { - return 0, syserror.EINVAL - } - _, err := primitive.CopyInt32Out(t, flags, f) - return 0, err -} - -// Ioctl implements vfs.FileDescriptionImpl.Ioctl. -func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { - switch cmd := args[1].Uint(); cmd { - case linux.FS_IOC_ENABLE_VERITY: - return fd.enableVerity(ctx) - case linux.FS_IOC_MEASURE_VERITY: - return fd.measureVerity(ctx, args[2].Pointer()) - case linux.FS_IOC_GETFLAGS: - return fd.verityFlags(ctx, args[2].Pointer()) - default: - // TODO(b/169682228): Investigate which ioctl commands should - // be allowed. - return 0, syserror.ENOSYS - } -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - // Implement Read with PRead by setting offset. - fd.mu.Lock() - n, err := fd.PRead(ctx, dst, fd.off, opts) - fd.off += n - fd.mu.Unlock() - return n, err -} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - // No need to verify if the file is not enabled yet in - // allowRuntimeEnable mode. - if !fd.d.verityEnabled() { - return fd.lowerFD.PRead(ctx, dst, offset, opts) - } - - fd.d.fs.verityMu.RLock() - defer fd.d.fs.verityMu.RUnlock() - // dataSize is the size of the whole file. - dataSize, err := fd.merkleReader.GetXattr(ctx, &vfs.GetXattrOptions{ - Name: merkleSizeXattr, - Size: sizeOfStringInt32, - }) - - // The Merkle tree file for the child should have been created and - // contains the expected xattrs. If the xattr does not exist, it - // indicates unexpected modifications to the file system. - if err == syserror.ENODATA { - return 0, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err)) - } - if err != nil { - return 0, err - } - - // The dataSize xattr should be an integer. If it's not, it indicates - // unexpected modifications to the file system. - size, err := strconv.Atoi(dataSize) - if err != nil { - return 0, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", merkleSizeXattr, err)) - } - - dataReader := FileReadWriteSeeker{ - FD: fd.lowerFD, - Ctx: ctx, - } - - merkleReader := FileReadWriteSeeker{ - FD: fd.merkleReader, - Ctx: ctx, - } - - fd.d.hashMu.RLock() - n, err := merkletree.Verify(&merkletree.VerifyParams{ - Out: dst.Writer(ctx), - File: &dataReader, - Tree: &merkleReader, - Size: int64(size), - Name: fd.d.name, - Mode: fd.d.mode, - UID: fd.d.uid, - GID: fd.d.gid, - Children: fd.d.childrenNames, - //TODO(b/156980949): Support passing other hash algorithms. - HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(), - ReadOffset: offset, - ReadSize: dst.NumBytes(), - Expected: fd.d.hash, - DataAndTreeInSameFile: false, - }) - fd.d.hashMu.RUnlock() - if err != nil { - return 0, alertIntegrityViolation(fmt.Sprintf("Verification failed: %v", err)) - } - return n, err -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EROFS -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return 0, syserror.EROFS -} - -// LockBSD implements vfs.FileDescriptionImpl.LockBSD. -func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error { - return fd.lowerFD.LockBSD(ctx, ownerPID, t, block) -} - -// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD. -func (fd *fileDescription) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error { - return fd.lowerFD.UnlockBSD(ctx) -} - -// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX. -func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error { - return fd.lowerFD.LockPOSIX(ctx, uid, ownerPID, t, r, block) -} - -// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX. -func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error { - return fd.lowerFD.UnlockPOSIX(ctx, uid, r) -} - -// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX. -func (fd *fileDescription) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) { - return fd.lowerFD.TestPOSIX(ctx, uid, t, r) -} - -// FileReadWriteSeeker is a helper struct to pass a vfs.FileDescription as -// io.Reader/io.Writer/io.ReadSeeker/io.ReaderAt/io.WriterAt/etc. -type FileReadWriteSeeker struct { - FD *vfs.FileDescription - Ctx context.Context - ROpts vfs.ReadOptions - WOpts vfs.WriteOptions -} - -// ReadAt implements io.ReaderAt.ReadAt. -func (f *FileReadWriteSeeker) ReadAt(p []byte, off int64) (int, error) { - dst := usermem.BytesIOSequence(p) - n, err := f.FD.PRead(f.Ctx, dst, off, f.ROpts) - return int(n), err -} - -// Read implements io.ReadWriteSeeker.Read. -func (f *FileReadWriteSeeker) Read(p []byte) (int, error) { - dst := usermem.BytesIOSequence(p) - n, err := f.FD.Read(f.Ctx, dst, f.ROpts) - return int(n), err -} - -// Seek implements io.ReadWriteSeeker.Seek. -func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) { - return f.FD.Seek(f.Ctx, offset, int32(whence)) -} - -// WriteAt implements io.WriterAt.WriteAt. -func (f *FileReadWriteSeeker) WriteAt(p []byte, off int64) (int, error) { - dst := usermem.BytesIOSequence(p) - n, err := f.FD.PWrite(f.Ctx, dst, off, f.WOpts) - return int(n), err -} - -// Write implements io.ReadWriteSeeker.Write. -func (f *FileReadWriteSeeker) Write(p []byte) (int, error) { - buf := usermem.BytesIOSequence(p) - n, err := f.FD.Write(f.Ctx, buf, f.WOpts) - return int(n), err -} |