summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2021-04-03 02:39:12 +0000
committergVisor bot <gvisor-bot@google.com>2021-04-03 02:39:12 +0000
commit851ddb10033a81d9f5c4a1d685c39bbc9e2cd029 (patch)
treeba3588c875cd677b5414abcb7cc0cf011264c79b /pkg/sentry
parentd70f6e164f7eb42d93ed2b5e4a15bc49a6fc5a0e (diff)
parent491b106d62ba97cafb63252bf7d5bdd4749d417a (diff)
Merge release-20210322.0-36-g491b106d6 (automated)
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fsimpl/verity/filesystem.go1109
-rw-r--r--pkg/sentry/fsimpl/verity/save_restore.go27
-rw-r--r--pkg/sentry/fsimpl/verity/verity.go1402
-rw-r--r--pkg/sentry/fsimpl/verity/verity_state_autogen.go237
4 files changed, 2775 insertions, 0 deletions
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
new file mode 100644
index 000000000..6cb1a23e0
--- /dev/null
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -0,0 +1,1109 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package verity
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "io"
+ "strconv"
+ "strings"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/merkletree"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// Sync implements vfs.FilesystemImpl.Sync.
+func (fs *filesystem) Sync(ctx context.Context) error {
+ // All files should be read-only.
+ return nil
+}
+
+var dentrySlicePool = sync.Pool{
+ New: func() interface{} {
+ ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity
+ return &ds
+ },
+}
+
+func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry {
+ if ds == nil {
+ ds = dentrySlicePool.Get().(*[]*dentry)
+ }
+ *ds = append(*ds, d)
+ return ds
+}
+
+// Preconditions: ds != nil.
+func putDentrySlice(ds *[]*dentry) {
+ // Allow dentries to be GC'd.
+ for i := range *ds {
+ (*ds)[i] = nil
+ }
+ *ds = (*ds)[:0]
+ dentrySlicePool.Put(ds)
+}
+
+// renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls
+// dentry.checkDropLocked on all dentries in *ds with fs.renameMu locked for
+// writing.
+//
+// ds is a pointer-to-pointer since defer evaluates its arguments immediately,
+// but dentry slices are allocated lazily, and it's much easier to say "defer
+// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
+// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
+func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
+ fs.renameMu.RUnlock()
+ if *ds == nil {
+ return
+ }
+ if len(**ds) != 0 {
+ fs.renameMu.Lock()
+ for _, d := range **ds {
+ d.checkDropLocked(ctx)
+ }
+ fs.renameMu.Unlock()
+ }
+ putDentrySlice(*ds)
+}
+
+func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
+ if *ds == nil {
+ fs.renameMu.Unlock()
+ return
+ }
+ for _, d := range **ds {
+ d.checkDropLocked(ctx)
+ }
+ fs.renameMu.Unlock()
+ putDentrySlice(*ds)
+}
+
+// stepLocked resolves rp.Component() to an existing file, starting from the
+// given directory.
+//
+// Dentries which may have a reference count of zero, and which therefore
+// should be dropped once traversal is complete, are appended to ds.
+//
+// Preconditions:
+// * fs.renameMu must be locked.
+// * d.dirMu must be locked.
+// * !rp.Done().
+func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, error) {
+ if !d.isDir() {
+ return nil, syserror.ENOTDIR
+ }
+
+ if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+ return nil, err
+ }
+
+afterSymlink:
+ name := rp.Component()
+ if name == "." {
+ rp.Advance()
+ return d, nil
+ }
+ if name == ".." {
+ if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil {
+ return nil, err
+ } else if isRoot || d.parent == nil {
+ rp.Advance()
+ return d, nil
+ }
+ if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil {
+ return nil, err
+ }
+ rp.Advance()
+ return d.parent, nil
+ }
+ child, err := fs.getChildLocked(ctx, d, name, ds)
+ if err != nil {
+ return nil, err
+ }
+ if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
+ return nil, err
+ }
+ if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() {
+ target, err := child.readlink(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if err := rp.HandleSymlink(target); err != nil {
+ return nil, err
+ }
+ goto afterSymlink // don't check the current directory again
+ }
+ rp.Advance()
+ return child, nil
+}
+
+// verifyChildLocked verifies the hash of child against the already verified
+// hash of the parent to ensure the child is expected. verifyChild triggers a
+// sentry panic if unexpected modifications to the file system are detected. In
+// ErrorOnViolation mode it returns a syserror instead.
+//
+// Preconditions:
+// * fs.renameMu must be locked.
+// * d.dirMu must be locked.
+//
+// TODO(b/166474175): Investigate all possible errors returned in this
+// function, and make sure we differentiate all errors that indicate unexpected
+// modifications to the file system from the ones that are not harmful.
+func (fs *filesystem) verifyChildLocked(ctx context.Context, parent *dentry, child *dentry) (*dentry, error) {
+ vfsObj := fs.vfsfs.VirtualFilesystem()
+
+ // Get the path to the child dentry. This is only used to provide path
+ // information in failure case.
+ childPath, err := vfsObj.PathnameWithDeleted(ctx, child.fs.rootDentry.lowerVD, child.lowerVD)
+ if err != nil {
+ return nil, err
+ }
+
+ fs.verityMu.RLock()
+ defer fs.verityMu.RUnlock()
+ // Read the offset of the child from the extended attributes of the
+ // corresponding Merkle tree file.
+ // This is the offset of the hash for child in its parent's Merkle tree
+ // file.
+ off, err := vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: child.lowerMerkleVD,
+ Start: child.lowerMerkleVD,
+ }, &vfs.GetXattrOptions{
+ Name: merkleOffsetInParentXattr,
+ Size: sizeOfStringInt32,
+ })
+
+ // The Merkle tree file for the child should have been created and
+ // contains the expected xattrs. If the file or the xattr does not
+ // exist, it indicates unexpected modifications to the file system.
+ if err == syserror.ENOENT || err == syserror.ENODATA {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleOffsetInParentXattr, childPath, err))
+ }
+ if err != nil {
+ return nil, err
+ }
+ // The offset xattr should be an integer. If it's not, it indicates
+ // unexpected modifications to the file system.
+ offset, err := strconv.Atoi(off)
+ if err != nil {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s for %s to int: %v", merkleOffsetInParentXattr, childPath, err))
+ }
+
+ // Open parent Merkle tree file to read and verify child's hash.
+ parentMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: parent.lowerMerkleVD,
+ Start: parent.lowerMerkleVD,
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY,
+ })
+
+ // The parent Merkle tree file should have been created. If it's
+ // missing, it indicates an unexpected modification to the file system.
+ if err == syserror.ENOENT {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Failed to open parent Merkle file for %s: %v", childPath, err))
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ // dataSize is the size of raw data for the Merkle tree. For a file,
+ // dataSize is the size of the whole file. For a directory, dataSize is
+ // the size of all its children's hashes.
+ dataSize, err := parentMerkleFD.GetXattr(ctx, &vfs.GetXattrOptions{
+ Name: merkleSizeXattr,
+ Size: sizeOfStringInt32,
+ })
+
+ // The Merkle tree file for the child should have been created and
+ // contains the expected xattrs. If the file or the xattr does not
+ // exist, it indicates unexpected modifications to the file system.
+ if err == syserror.ENOENT || err == syserror.ENODATA {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for %s: %v", merkleSizeXattr, childPath, err))
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ // The dataSize xattr should be an integer. If it's not, it indicates
+ // unexpected modifications to the file system.
+ parentSize, err := strconv.Atoi(dataSize)
+ if err != nil {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s for %s to int: %v", merkleSizeXattr, childPath, err))
+ }
+
+ fdReader := FileReadWriteSeeker{
+ FD: parentMerkleFD,
+ Ctx: ctx,
+ }
+
+ parentStat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: parent.lowerVD,
+ Start: parent.lowerVD,
+ }, &vfs.StatOptions{})
+ if err == syserror.ENOENT {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Failed to get parent stat for %s: %v", childPath, err))
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ // Since we are verifying against a directory Merkle tree, buf should
+ // contain the hash of the children in the parent Merkle tree when
+ // Verify returns with success.
+ var buf bytes.Buffer
+ parent.hashMu.RLock()
+ _, err = merkletree.Verify(&merkletree.VerifyParams{
+ Out: &buf,
+ File: &fdReader,
+ Tree: &fdReader,
+ Size: int64(parentSize),
+ Name: parent.name,
+ Mode: uint32(parentStat.Mode),
+ UID: parentStat.UID,
+ GID: parentStat.GID,
+ Children: parent.childrenNames,
+ //TODO(b/156980949): Support passing other hash algorithms.
+ HashAlgorithms: fs.alg.toLinuxHashAlg(),
+ ReadOffset: int64(offset),
+ ReadSize: int64(merkletree.DigestSize(fs.alg.toLinuxHashAlg())),
+ Expected: parent.hash,
+ DataAndTreeInSameFile: true,
+ })
+ parent.hashMu.RUnlock()
+ if err != nil && err != io.EOF {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Verification for %s failed: %v", childPath, err))
+ }
+
+ // Cache child hash when it's verified the first time.
+ child.hashMu.Lock()
+ if len(child.hash) == 0 {
+ child.hash = buf.Bytes()
+ }
+ child.hashMu.Unlock()
+ return child, nil
+}
+
+// verifyStatAndChildrenLocked verifies the stat and children names against the
+// verified hash. The mode/uid/gid and childrenNames of the file is cached
+// after verified.
+//
+// Preconditions: d.dirMu must be locked.
+func (fs *filesystem) verifyStatAndChildrenLocked(ctx context.Context, d *dentry, stat linux.Statx) error {
+ vfsObj := fs.vfsfs.VirtualFilesystem()
+
+ // Get the path to the child dentry. This is only used to provide path
+ // information in failure case.
+ childPath, err := vfsObj.PathnameWithDeleted(ctx, d.fs.rootDentry.lowerVD, d.lowerVD)
+ if err != nil {
+ return err
+ }
+
+ fs.verityMu.RLock()
+ defer fs.verityMu.RUnlock()
+
+ fd, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: d.lowerMerkleVD,
+ Start: d.lowerMerkleVD,
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY,
+ })
+ if err == syserror.ENOENT {
+ return alertIntegrityViolation(fmt.Sprintf("Failed to open merkle file for %s: %v", childPath, err))
+ }
+ if err != nil {
+ return err
+ }
+
+ merkleSize, err := fd.GetXattr(ctx, &vfs.GetXattrOptions{
+ Name: merkleSizeXattr,
+ Size: sizeOfStringInt32,
+ })
+
+ if err == syserror.ENODATA {
+ return alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", merkleSizeXattr, childPath, err))
+ }
+ if err != nil {
+ return err
+ }
+
+ size, err := strconv.Atoi(merkleSize)
+ if err != nil {
+ return alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s for %s to int: %v", merkleSizeXattr, childPath, err))
+ }
+
+ if d.isDir() && len(d.childrenNames) == 0 {
+ childrenOffString, err := fd.GetXattr(ctx, &vfs.GetXattrOptions{
+ Name: childrenOffsetXattr,
+ Size: sizeOfStringInt32,
+ })
+
+ if err == syserror.ENODATA {
+ return alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", childrenOffsetXattr, childPath, err))
+ }
+ if err != nil {
+ return err
+ }
+ childrenOffset, err := strconv.Atoi(childrenOffString)
+ if err != nil {
+ return alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenOffsetXattr, err))
+ }
+
+ childrenSizeString, err := fd.GetXattr(ctx, &vfs.GetXattrOptions{
+ Name: childrenSizeXattr,
+ Size: sizeOfStringInt32,
+ })
+
+ if err == syserror.ENODATA {
+ return alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s for merkle file of %s: %v", childrenSizeXattr, childPath, err))
+ }
+ if err != nil {
+ return err
+ }
+ childrenSize, err := strconv.Atoi(childrenSizeString)
+ if err != nil {
+ return alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenSizeXattr, err))
+ }
+
+ childrenNames := make([]byte, childrenSize)
+ if _, err := fd.PRead(ctx, usermem.BytesIOSequence(childrenNames), int64(childrenOffset), vfs.ReadOptions{}); err != nil {
+ return alertIntegrityViolation(fmt.Sprintf("Failed to read children map for %s: %v", childPath, err))
+ }
+
+ if err := json.Unmarshal(childrenNames, &d.childrenNames); err != nil {
+ return alertIntegrityViolation(fmt.Sprintf("Failed to deserialize childrenNames of %s: %v", childPath, err))
+ }
+ }
+
+ fdReader := FileReadWriteSeeker{
+ FD: fd,
+ Ctx: ctx,
+ }
+
+ var buf bytes.Buffer
+ d.hashMu.RLock()
+ params := &merkletree.VerifyParams{
+ Out: &buf,
+ Tree: &fdReader,
+ Size: int64(size),
+ Name: d.name,
+ Mode: uint32(stat.Mode),
+ UID: stat.UID,
+ GID: stat.GID,
+ Children: d.childrenNames,
+ //TODO(b/156980949): Support passing other hash algorithms.
+ HashAlgorithms: fs.alg.toLinuxHashAlg(),
+ ReadOffset: 0,
+ // Set read size to 0 so only the metadata is verified.
+ ReadSize: 0,
+ Expected: d.hash,
+ DataAndTreeInSameFile: false,
+ }
+ d.hashMu.RUnlock()
+ if atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFDIR {
+ params.DataAndTreeInSameFile = true
+ }
+
+ if d.isSymlink() {
+ target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: d.lowerVD,
+ Start: d.lowerVD,
+ })
+ if err != nil {
+ return err
+ }
+ params.SymlinkTarget = target
+ }
+
+ if _, err := merkletree.Verify(params); err != nil && err != io.EOF {
+ return alertIntegrityViolation(fmt.Sprintf("Verification stat for %s failed: %v", childPath, err))
+ }
+ d.mode = uint32(stat.Mode)
+ d.uid = stat.UID
+ d.gid = stat.GID
+ d.size = uint32(size)
+ d.symlinkTarget = params.SymlinkTarget
+ return nil
+}
+
+// Preconditions:
+// * fs.renameMu must be locked.
+// * parent.dirMu must be locked.
+func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
+ if child, ok := parent.children[name]; ok {
+ // If verity is enabled on child, we should check again whether
+ // the file and the corresponding Merkle tree are as expected,
+ // in order to catch deletion/renaming after the last time it's
+ // accessed.
+ if child.verityEnabled() {
+ vfsObj := fs.vfsfs.VirtualFilesystem()
+ // Get the path to the child dentry. This is only used
+ // to provide path information in failure case.
+ path, err := vfsObj.PathnameWithDeleted(ctx, child.fs.rootDentry.lowerVD, child.lowerVD)
+ if err != nil {
+ return nil, err
+ }
+
+ childVD, err := parent.getLowerAt(ctx, vfsObj, name)
+ if err == syserror.ENOENT {
+ // The file was previously accessed. If the
+ // file does not exist now, it indicates an
+ // unexpected modification to the file system.
+ return nil, alertIntegrityViolation(fmt.Sprintf("Target file %s is expected but missing", path))
+ }
+ if err != nil {
+ return nil, err
+ }
+ defer childVD.DecRef(ctx)
+
+ childMerkleVD, err := parent.getLowerAt(ctx, vfsObj, merklePrefix+name)
+ // The Merkle tree file was previous accessed. If it
+ // does not exist now, it indicates an unexpected
+ // modification to the file system.
+ if err == syserror.ENOENT {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Expected Merkle file for target %s but none found", path))
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ defer childMerkleVD.DecRef(ctx)
+ }
+
+ // If enabling verification on files/directories is not allowed
+ // during runtime, all cached children are already verified. If
+ // runtime enable is allowed and the parent directory is
+ // enabled, we should verify the child hash here because it may
+ // be cached before enabled.
+ if fs.allowRuntimeEnable {
+ if parent.verityEnabled() {
+ if _, err := fs.verifyChildLocked(ctx, parent, child); err != nil {
+ return nil, err
+ }
+ }
+ if child.verityEnabled() {
+ vfsObj := fs.vfsfs.VirtualFilesystem()
+ mask := uint32(linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID)
+ stat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: child.lowerVD,
+ Start: child.lowerVD,
+ }, &vfs.StatOptions{
+ Mask: mask,
+ })
+ if err != nil {
+ return nil, err
+ }
+ if err := fs.verifyStatAndChildrenLocked(ctx, child, stat); err != nil {
+ return nil, err
+ }
+ }
+ }
+ return child, nil
+ }
+ child, err := fs.lookupAndVerifyLocked(ctx, parent, name)
+ if err != nil {
+ return nil, err
+ }
+ if parent.children == nil {
+ parent.children = make(map[string]*dentry)
+ }
+ parent.children[name] = child
+ // child's refcount is initially 0, so it may be dropped after traversal.
+ *ds = appendDentry(*ds, child)
+ return child, nil
+}
+
+// Preconditions:
+// * fs.renameMu must be locked.
+// * parent.dirMu must be locked.
+func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, name string) (*dentry, error) {
+ vfsObj := fs.vfsfs.VirtualFilesystem()
+
+ if parent.verityEnabled() {
+ if _, ok := parent.childrenNames[name]; !ok {
+ return nil, syserror.ENOENT
+ }
+ }
+
+ parentPath, err := vfsObj.PathnameWithDeleted(ctx, parent.fs.rootDentry.lowerVD, parent.lowerVD)
+ if err != nil {
+ return nil, err
+ }
+
+ childVD, err := parent.getLowerAt(ctx, vfsObj, name)
+ if err == syserror.ENOENT {
+ return nil, alertIntegrityViolation(fmt.Sprintf("file %s expected but not found", parentPath+"/"+name))
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ // The dentry needs to be cleaned up if any error occurs. IncRef will be
+ // called if a verity child dentry is successfully created.
+ defer childVD.DecRef(ctx)
+
+ childMerkleVD, err := parent.getLowerAt(ctx, vfsObj, merklePrefix+name)
+ if err == syserror.ENOENT {
+ if !fs.allowRuntimeEnable {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", parentPath+"/"+name))
+ }
+ childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: parent.lowerVD,
+ Start: parent.lowerVD,
+ Path: fspath.Parse(merklePrefix + name),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDWR | linux.O_CREAT,
+ Mode: 0644,
+ })
+ if err != nil {
+ return nil, err
+ }
+ childMerkleFD.DecRef(ctx)
+ childMerkleVD, err = parent.getLowerAt(ctx, vfsObj, merklePrefix+name)
+ if err != nil {
+ return nil, err
+ }
+ }
+ if err != nil && err != syserror.ENOENT {
+ return nil, err
+ }
+
+ // Clear the Merkle tree file if they are to be generated at runtime.
+ // TODO(b/182315468): Optimize the Merkle tree generate process to
+ // allow only updating certain files/directories.
+ if fs.allowRuntimeEnable {
+ childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: childMerkleVD,
+ Start: childMerkleVD,
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDWR | linux.O_TRUNC,
+ Mode: 0644,
+ })
+ if err != nil {
+ return nil, err
+ }
+ childMerkleFD.DecRef(ctx)
+ }
+
+ // The dentry needs to be cleaned up if any error occurs. IncRef will be
+ // called if a verity child dentry is successfully created.
+ defer childMerkleVD.DecRef(ctx)
+
+ mask := uint32(linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID)
+ stat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: childVD,
+ Start: childVD,
+ }, &vfs.StatOptions{
+ Mask: mask,
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ child := fs.newDentry()
+ child.lowerVD = childVD
+ child.lowerMerkleVD = childMerkleVD
+
+ // Increase the reference for both childVD and childMerkleVD as they are
+ // held by child. If this function fails and the child is destroyed, the
+ // references will be decreased in destroyLocked.
+ childVD.IncRef()
+ childMerkleVD.IncRef()
+
+ parent.IncRef()
+ child.parent = parent
+ child.name = name
+
+ child.mode = uint32(stat.Mode)
+ child.uid = stat.UID
+ child.gid = stat.GID
+ child.childrenNames = make(map[string]struct{})
+
+ // Verify child hash. This should always be performed unless in
+ // allowRuntimeEnable mode and the parent directory hasn't been enabled
+ // yet.
+ if parent.verityEnabled() {
+ if _, err := fs.verifyChildLocked(ctx, parent, child); err != nil {
+ child.destroyLocked(ctx)
+ return nil, err
+ }
+ }
+ if child.verityEnabled() {
+ if err := fs.verifyStatAndChildrenLocked(ctx, child, stat); err != nil {
+ child.destroyLocked(ctx)
+ return nil, err
+ }
+ }
+
+ return child, nil
+}
+
+// walkParentDirLocked resolves all but the last path component of rp to an
+// existing directory, starting from the given directory (which is usually
+// rp.Start().Impl().(*dentry)). It does not check that the returned directory
+// is searchable by the provider of rp.
+//
+// Preconditions:
+// * fs.renameMu must be locked.
+// * !rp.Done().
+func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
+ for !rp.Final() {
+ d.dirMu.Lock()
+ next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
+ d.dirMu.Unlock()
+ if err != nil {
+ return nil, err
+ }
+ d = next
+ }
+ if !d.isDir() {
+ return nil, syserror.ENOTDIR
+ }
+ return d, nil
+}
+
+// resolveLocked resolves rp to an existing file.
+//
+// Preconditions: fs.renameMu must be locked.
+func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
+ d := rp.Start().Impl().(*dentry)
+ for !rp.Done() {
+ d.dirMu.Lock()
+ next, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
+ d.dirMu.Unlock()
+ if err != nil {
+ return nil, err
+ }
+ d = next
+ }
+ if rp.MustBeDir() && !d.isDir() {
+ return nil, syserror.ENOTDIR
+ }
+ return d, nil
+}
+
+// AccessAt implements vfs.Filesystem.Impl.AccessAt.
+func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
+ // Verity file system is read-only.
+ if ats&vfs.MayWrite != 0 {
+ return syserror.EROFS
+ }
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return err
+ }
+ return d.checkPermissions(creds, ats)
+}
+
+// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
+func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return nil, err
+ }
+ if opts.CheckSearchable {
+ if !d.isDir() {
+ return nil, syserror.ENOTDIR
+ }
+ if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+ return nil, err
+ }
+ }
+ d.IncRef()
+ return &d.vfsd, nil
+}
+
+// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
+func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ start := rp.Start().Impl().(*dentry)
+ d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
+ if err != nil {
+ return nil, err
+ }
+ d.IncRef()
+ return &d.vfsd, nil
+}
+
+// LinkAt implements vfs.FilesystemImpl.LinkAt.
+func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
+func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// MknodAt implements vfs.FilesystemImpl.MknodAt.
+func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// OpenAt implements vfs.FilesystemImpl.OpenAt.
+func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ // Verity fs is read-only.
+ if opts.Flags&(linux.O_WRONLY|linux.O_CREAT) != 0 {
+ return nil, syserror.EROFS
+ }
+
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+
+ start := rp.Start().Impl().(*dentry)
+ if rp.Done() {
+ return start.openLocked(ctx, rp, &opts)
+ }
+
+afterTrailingSymlink:
+ parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
+ if err != nil {
+ return nil, err
+ }
+
+ // Check for search permission in the parent directory.
+ if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+ return nil, err
+ }
+
+ // Open existing child or follow symlink.
+ parent.dirMu.Lock()
+ child, err := fs.stepLocked(ctx, rp, parent, false /*mayFollowSymlinks*/, &ds)
+ parent.dirMu.Unlock()
+ if err != nil {
+ return nil, err
+ }
+ if child.isSymlink() && rp.ShouldFollowSymlink() {
+ target, err := child.readlink(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if err := rp.HandleSymlink(target); err != nil {
+ return nil, err
+ }
+ start = parent
+ goto afterTrailingSymlink
+ }
+ return child.openLocked(ctx, rp, &opts)
+}
+
+// Preconditions: fs.renameMu must be locked.
+func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+ // Users should not open the Merkle tree files. Those are for verity fs
+ // use only.
+ if strings.Contains(d.name, merklePrefix) {
+ return nil, syserror.EPERM
+ }
+ ats := vfs.AccessTypesForOpenFlags(opts)
+ if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
+ return nil, err
+ }
+
+ // Verity fs is read-only.
+ if ats&vfs.MayWrite != 0 {
+ return nil, syserror.EROFS
+ }
+
+ // Get the path to the target file. This is only used to provide path
+ // information in failure case.
+ path, err := d.fs.vfsfs.VirtualFilesystem().PathnameWithDeleted(ctx, d.fs.rootDentry.lowerVD, d.lowerVD)
+ if err != nil {
+ return nil, err
+ }
+
+ // Open the file in the underlying file system.
+ lowerFD, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: d.lowerVD,
+ Start: d.lowerVD,
+ }, opts)
+
+ // The file should exist, as we succeeded in finding its dentry. If it's
+ // missing, it indicates an unexpected modification to the file system.
+ if err != nil {
+ if err == syserror.ENOENT {
+ return nil, alertIntegrityViolation(fmt.Sprintf("File %s expected but not found", path))
+ }
+ return nil, err
+ }
+
+ // lowerFD needs to be cleaned up if any error occurs. IncRef will be
+ // called if a verity FD is successfully created.
+ defer lowerFD.DecRef(ctx)
+
+ // Open the Merkle tree file corresponding to the current file/directory
+ // to be used later for verifying Read/Walk.
+ merkleReader, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: d.lowerMerkleVD,
+ Start: d.lowerMerkleVD,
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY,
+ })
+
+ // The Merkle tree file should exist, as we succeeded in finding its
+ // dentry. If it's missing, it indicates an unexpected modification to
+ // the file system.
+ if err != nil {
+ if err == syserror.ENOENT {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", path))
+ }
+ return nil, err
+ }
+
+ // merkleReader needs to be cleaned up if any error occurs. IncRef will
+ // be called if a verity FD is successfully created.
+ defer merkleReader.DecRef(ctx)
+
+ lowerFlags := lowerFD.StatusFlags()
+ lowerFDOpts := lowerFD.Options()
+ var merkleWriter *vfs.FileDescription
+ var parentMerkleWriter *vfs.FileDescription
+
+ // Only open the Merkle tree files for write if in allowRuntimeEnable
+ // mode.
+ if d.fs.allowRuntimeEnable {
+ merkleWriter, err = rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: d.lowerMerkleVD,
+ Start: d.lowerMerkleVD,
+ }, &vfs.OpenOptions{
+ Flags: linux.O_WRONLY | linux.O_APPEND,
+ })
+ if err != nil {
+ if err == syserror.ENOENT {
+ return nil, alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", path))
+ }
+ return nil, err
+ }
+ // merkleWriter is cleaned up if any error occurs. IncRef will
+ // be called if a verity FD is created successfully.
+ defer merkleWriter.DecRef(ctx)
+
+ if d.parent != nil {
+ parentMerkleWriter, err = rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: d.parent.lowerMerkleVD,
+ Start: d.parent.lowerMerkleVD,
+ }, &vfs.OpenOptions{
+ Flags: linux.O_WRONLY | linux.O_APPEND,
+ })
+ if err != nil {
+ if err == syserror.ENOENT {
+ parentPath, _ := d.fs.vfsfs.VirtualFilesystem().PathnameWithDeleted(ctx, d.fs.rootDentry.lowerVD, d.parent.lowerVD)
+ return nil, alertIntegrityViolation(fmt.Sprintf("Merkle file for %s expected but not found", parentPath))
+ }
+ return nil, err
+ }
+ // parentMerkleWriter is cleaned up if any error occurs. IncRef
+ // will be called if a verity FD is created successfully.
+ defer parentMerkleWriter.DecRef(ctx)
+ }
+ }
+
+ fd := &fileDescription{
+ d: d,
+ lowerFD: lowerFD,
+ merkleReader: merkleReader,
+ merkleWriter: merkleWriter,
+ parentMerkleWriter: parentMerkleWriter,
+ isDir: d.isDir(),
+ }
+
+ if err := fd.vfsfd.Init(fd, lowerFlags, rp.Mount(), &d.vfsd, &lowerFDOpts); err != nil {
+ return nil, err
+ }
+ lowerFD.IncRef()
+ merkleReader.IncRef()
+ if merkleWriter != nil {
+ merkleWriter.IncRef()
+ }
+ if parentMerkleWriter != nil {
+ parentMerkleWriter.IncRef()
+ }
+ return &fd.vfsfd, err
+}
+
+// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
+func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return "", err
+ }
+ return d.readlink(ctx)
+}
+
+// RenameAt implements vfs.FilesystemImpl.RenameAt.
+func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
+func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
+func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// StatAt implements vfs.FilesystemImpl.StatAt.
+// TODO(b/170157489): Investigate whether stats other than Mode/UID/GID should
+// be verified.
+func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return linux.Statx{}, err
+ }
+
+ var stat linux.Statx
+ stat, err = fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: d.lowerVD,
+ Start: d.lowerVD,
+ }, &opts)
+ if err != nil {
+ return linux.Statx{}, err
+ }
+ d.dirMu.Lock()
+ if d.verityEnabled() {
+ if err := fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil {
+ return linux.Statx{}, err
+ }
+ }
+ d.dirMu.Unlock()
+ return stat, nil
+}
+
+// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
+func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
+ // TODO(b/159261227): Implement StatFSAt.
+ return linux.Statfs{}, nil
+}
+
+// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
+func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
+func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
+func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ if _, err := fs.resolveLocked(ctx, rp, &ds); err != nil {
+ return nil, err
+ }
+ return nil, syserror.ECONNREFUSED
+}
+
+// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
+func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return nil, err
+ }
+ lowerVD := d.lowerVD
+ return fs.vfsfs.VirtualFilesystem().ListXattrAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: lowerVD,
+ Start: lowerVD,
+ }, size)
+}
+
+// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
+func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
+ var ds *[]*dentry
+ fs.renameMu.RLock()
+ defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+ d, err := fs.resolveLocked(ctx, rp, &ds)
+ if err != nil {
+ return "", err
+ }
+ lowerVD := d.lowerVD
+ return fs.vfsfs.VirtualFilesystem().GetXattrAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: lowerVD,
+ Start: lowerVD,
+ }, &opts)
+}
+
+// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
+func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
+func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+ // Verity file system is read-only.
+ return syserror.EROFS
+}
+
+// PrependPath implements vfs.FilesystemImpl.PrependPath.
+func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
+ fs.renameMu.RLock()
+ defer fs.renameMu.RUnlock()
+ mnt := vd.Mount()
+ d := vd.Dentry().Impl().(*dentry)
+ for {
+ if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() {
+ return vfs.PrependPathAtVFSRootError{}
+ }
+ if &d.vfsd == mnt.Root() {
+ return nil
+ }
+ if d.parent == nil {
+ return vfs.PrependPathAtNonMountRootError{}
+ }
+ b.PrependComponent(d.name)
+ d = d.parent
+ }
+}
diff --git a/pkg/sentry/fsimpl/verity/save_restore.go b/pkg/sentry/fsimpl/verity/save_restore.go
new file mode 100644
index 000000000..46b064342
--- /dev/null
+++ b/pkg/sentry/fsimpl/verity/save_restore.go
@@ -0,0 +1,27 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package verity
+
+import (
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/refsvfs2"
+)
+
+func (d *dentry) afterLoad() {
+ if atomic.LoadInt64(&d.refs) != -1 {
+ refsvfs2.Register(d)
+ }
+}
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
new file mode 100644
index 000000000..2f97324a8
--- /dev/null
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -0,0 +1,1402 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package verity provides a filesystem implementation that is a wrapper of
+// another file system.
+// The verity file system provides integrity check for the underlying file
+// system by providing verification for path traversals and each read.
+// The verity file system is read-only, except for one case: when
+// allowRuntimeEnable is true, additional Merkle files can be generated using
+// the FS_IOC_ENABLE_VERITY ioctl.
+//
+// Lock order:
+//
+// filesystem.renameMu
+// dentry.dirMu
+// fileDescription.mu
+// filesystem.verityMu
+// dentry.hashMu
+//
+// Locking dentry.dirMu in multiple dentries requires that parent dentries are
+// locked before child dentries, and that filesystem.renameMu is locked to
+// stabilize this relationship.
+package verity
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "math"
+ "strconv"
+ "strings"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/hostarch"
+ "gvisor.dev/gvisor/pkg/marshal/primitive"
+ "gvisor.dev/gvisor/pkg/merkletree"
+ "gvisor.dev/gvisor/pkg/refsvfs2"
+ "gvisor.dev/gvisor/pkg/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ fslock "gvisor.dev/gvisor/pkg/sentry/fs/lock"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+const (
+ // Name is the default filesystem name.
+ Name = "verity"
+
+ // merklePrefix is the prefix of the Merkle tree files. For example, the Merkle
+ // tree file for "/foo" is "/.merkle.verity.foo".
+ merklePrefix = ".merkle.verity."
+
+ // merkleRootPrefix is the prefix of the Merkle tree root file. This
+ // needs to be different from merklePrefix to avoid name collision.
+ merkleRootPrefix = ".merkleroot.verity."
+
+ // merkleOffsetInParentXattr is the extended attribute name specifying the
+ // offset of the child hash in its parent's Merkle tree.
+ merkleOffsetInParentXattr = "user.merkle.offset"
+
+ // merkleSizeXattr is the extended attribute name specifying the size of data
+ // hashed by the corresponding Merkle tree. For a regular file, this is the
+ // file size. For a directory, this is the size of all its children's hashes.
+ merkleSizeXattr = "user.merkle.size"
+
+ // childrenOffsetXattr is the extended attribute name specifying the
+ // names of the offset of the serialized children names in the Merkle
+ // tree file.
+ childrenOffsetXattr = "user.merkle.childrenOffset"
+
+ // childrenSizeXattr is the extended attribute name specifying the size
+ // of the serialized children names.
+ childrenSizeXattr = "user.merkle.childrenSize"
+
+ // sizeOfStringInt32 is the size for a 32 bit integer stored as string in
+ // extended attributes. The maximum value of a 32 bit integer has 10 digits.
+ sizeOfStringInt32 = 10
+)
+
+var (
+ // action specifies the action towards detected violation.
+ action ViolationAction
+
+ // verityMu synchronizes concurrent operations that enable verity and perform
+ // verification checks.
+ verityMu sync.RWMutex
+)
+
+// HashAlgorithm is a type specifying the algorithm used to hash the file
+// content.
+type HashAlgorithm int
+
+// ViolationAction is a type specifying the action when an integrity violation
+// is detected.
+type ViolationAction int
+
+const (
+ // PanicOnViolation terminates the sentry on detected violation.
+ PanicOnViolation ViolationAction = 0
+ // ErrorOnViolation returns an error from the violating system call on
+ // detected violation.
+ ErrorOnViolation = 1
+)
+
+// Currently supported hashing algorithms include SHA256 and SHA512.
+const (
+ SHA256 HashAlgorithm = iota
+ SHA512
+)
+
+func (alg HashAlgorithm) toLinuxHashAlg() int {
+ switch alg {
+ case SHA256:
+ return linux.FS_VERITY_HASH_ALG_SHA256
+ case SHA512:
+ return linux.FS_VERITY_HASH_ALG_SHA512
+ default:
+ return 0
+ }
+}
+
+// FilesystemType implements vfs.FilesystemType.
+//
+// +stateify savable
+type FilesystemType struct{}
+
+// filesystem implements vfs.FilesystemImpl.
+//
+// +stateify savable
+type filesystem struct {
+ vfsfs vfs.Filesystem
+
+ // creds is a copy of the filesystem's creator's credentials, which are
+ // used for accesses to the underlying file system. creds is immutable.
+ creds *auth.Credentials
+
+ // allowRuntimeEnable is true if using ioctl with FS_IOC_ENABLE_VERITY
+ // to build Merkle trees in the verity file system is allowed. If this
+ // is false, no new Merkle trees can be built, and only the files that
+ // had Merkle trees before startup (e.g. from a host filesystem mounted
+ // with gofer fs) can be verified.
+ allowRuntimeEnable bool
+
+ // lowerMount is the underlying file system mount.
+ lowerMount *vfs.Mount
+
+ // rootDentry is the mount root Dentry for this file system, which
+ // stores the root hash of the whole file system in bytes.
+ rootDentry *dentry
+
+ // alg is the algorithms used to hash the files in the verity file
+ // system.
+ alg HashAlgorithm
+
+ // renameMu synchronizes renaming with non-renaming operations in order
+ // to ensure consistent lock ordering between dentry.dirMu in different
+ // dentries.
+ renameMu sync.RWMutex `state:"nosave"`
+
+ // verityMu synchronizes enabling verity files, protects files or
+ // directories from being enabled by different threads simultaneously.
+ // It also ensures that verity does not access files that are being
+ // enabled.
+ //
+ // Also, the directory Merkle trees depends on the generated trees of
+ // its children. So they shouldn't be enabled the same time. This lock
+ // is for the whole file system to ensure that no more than one file is
+ // enabled the same time.
+ verityMu sync.RWMutex `state:"nosave"`
+}
+
+// InternalFilesystemOptions may be passed as
+// vfs.GetFilesystemOptions.InternalData to FilesystemType.GetFilesystem.
+//
+// +stateify savable
+type InternalFilesystemOptions struct {
+ // RootMerkleFileName is the name of the verity root Merkle tree file.
+ RootMerkleFileName string
+
+ // LowerName is the name of the filesystem wrapped by verity fs.
+ LowerName string
+
+ // Alg is the algorithms used to hash the files in the verity file
+ // system.
+ Alg HashAlgorithm
+
+ // RootHash is the root hash of the overall verity file system.
+ RootHash []byte
+
+ // AllowRuntimeEnable specifies whether the verity file system allows
+ // enabling verification for files (i.e. building Merkle trees) during
+ // runtime.
+ AllowRuntimeEnable bool
+
+ // LowerGetFSOptions is the file system option for the lower layer file
+ // system wrapped by verity file system.
+ LowerGetFSOptions vfs.GetFilesystemOptions
+
+ // Action specifies the action on an integrity violation.
+ Action ViolationAction
+}
+
+// Name implements vfs.FilesystemType.Name.
+func (FilesystemType) Name() string {
+ return Name
+}
+
+// Release implements vfs.FilesystemType.Release.
+func (FilesystemType) Release(ctx context.Context) {}
+
+// alertIntegrityViolation alerts a violation of integrity, which usually means
+// unexpected modification to the file system is detected. In ErrorOnViolation
+// mode, it returns EIO, otherwise it panic.
+func alertIntegrityViolation(msg string) error {
+ if action == ErrorOnViolation {
+ return syserror.EIO
+ }
+ panic(msg)
+}
+
+// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
+func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+ iopts, ok := opts.InternalData.(InternalFilesystemOptions)
+ if !ok {
+ ctx.Warningf("verity.FilesystemType.GetFilesystem: missing verity configs")
+ return nil, nil, syserror.EINVAL
+ }
+ action = iopts.Action
+
+ // Mount the lower file system. The lower file system is wrapped inside
+ // verity, and should not be exposed or connected.
+ mopts := &vfs.MountOptions{
+ GetFilesystemOptions: iopts.LowerGetFSOptions,
+ InternalMount: true,
+ }
+ mnt, err := vfsObj.MountDisconnected(ctx, creds, "", iopts.LowerName, mopts)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ fs := &filesystem{
+ creds: creds.Fork(),
+ alg: iopts.Alg,
+ lowerMount: mnt,
+ allowRuntimeEnable: iopts.AllowRuntimeEnable,
+ }
+ fs.vfsfs.Init(vfsObj, &fstype, fs)
+
+ // Construct the root dentry.
+ d := fs.newDentry()
+ d.refs = 1
+ lowerVD := vfs.MakeVirtualDentry(mnt, mnt.Root())
+ lowerVD.IncRef()
+ d.lowerVD = lowerVD
+
+ rootMerkleName := merkleRootPrefix + iopts.RootMerkleFileName
+
+ lowerMerkleVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: lowerVD,
+ Start: lowerVD,
+ Path: fspath.Parse(rootMerkleName),
+ }, &vfs.GetDentryOptions{})
+
+ // If runtime enable is allowed, the root merkle tree may be absent. We
+ // should create the tree file.
+ if err == syserror.ENOENT && fs.allowRuntimeEnable {
+ lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: lowerVD,
+ Start: lowerVD,
+ Path: fspath.Parse(rootMerkleName),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDWR | linux.O_CREAT,
+ Mode: 0644,
+ })
+ if err != nil {
+ fs.vfsfs.DecRef(ctx)
+ d.DecRef(ctx)
+ return nil, nil, err
+ }
+ lowerMerkleFD.DecRef(ctx)
+ lowerMerkleVD, err = vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: lowerVD,
+ Start: lowerVD,
+ Path: fspath.Parse(rootMerkleName),
+ }, &vfs.GetDentryOptions{})
+ if err != nil {
+ fs.vfsfs.DecRef(ctx)
+ d.DecRef(ctx)
+ return nil, nil, err
+ }
+ } else if err != nil {
+ // Failed to get dentry for the root Merkle file. This
+ // indicates an unexpected modification that removed/renamed
+ // the root Merkle file, or it's never generated.
+ fs.vfsfs.DecRef(ctx)
+ d.DecRef(ctx)
+ return nil, nil, alertIntegrityViolation("Failed to find root Merkle file")
+ }
+
+ // Clear the Merkle tree file if they are to be generated at runtime.
+ // TODO(b/182315468): Optimize the Merkle tree generate process to
+ // allow only updating certain files/directories.
+ if fs.allowRuntimeEnable {
+ lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: lowerMerkleVD,
+ Start: lowerMerkleVD,
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDWR | linux.O_TRUNC,
+ Mode: 0644,
+ })
+ if err != nil {
+ return nil, nil, err
+ }
+ lowerMerkleFD.DecRef(ctx)
+ }
+
+ d.lowerMerkleVD = lowerMerkleVD
+
+ // Get metadata from the underlying file system.
+ const statMask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID
+ stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
+ Root: lowerVD,
+ Start: lowerVD,
+ }, &vfs.StatOptions{
+ Mask: statMask,
+ })
+ if err != nil {
+ fs.vfsfs.DecRef(ctx)
+ d.DecRef(ctx)
+ return nil, nil, err
+ }
+
+ d.mode = uint32(stat.Mode)
+ d.uid = stat.UID
+ d.gid = stat.GID
+ d.hash = make([]byte, len(iopts.RootHash))
+ d.childrenNames = make(map[string]struct{})
+
+ if !d.isDir() {
+ ctx.Warningf("verity root must be a directory")
+ return nil, nil, syserror.EINVAL
+ }
+
+ if !fs.allowRuntimeEnable {
+ // Get children names from the underlying file system.
+ offString, err := vfsObj.GetXattrAt(ctx, creds, &vfs.PathOperation{
+ Root: lowerMerkleVD,
+ Start: lowerMerkleVD,
+ }, &vfs.GetXattrOptions{
+ Name: childrenOffsetXattr,
+ Size: sizeOfStringInt32,
+ })
+ if err == syserror.ENOENT || err == syserror.ENODATA {
+ return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenOffsetXattr, err))
+ }
+ if err != nil {
+ return nil, nil, err
+ }
+
+ off, err := strconv.Atoi(offString)
+ if err != nil {
+ return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenOffsetXattr, err))
+ }
+
+ sizeString, err := vfsObj.GetXattrAt(ctx, creds, &vfs.PathOperation{
+ Root: lowerMerkleVD,
+ Start: lowerMerkleVD,
+ }, &vfs.GetXattrOptions{
+ Name: childrenSizeXattr,
+ Size: sizeOfStringInt32,
+ })
+ if err == syserror.ENOENT || err == syserror.ENODATA {
+ return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", childrenSizeXattr, err))
+ }
+ if err != nil {
+ return nil, nil, err
+ }
+ size, err := strconv.Atoi(sizeString)
+ if err != nil {
+ return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", childrenSizeXattr, err))
+ }
+
+ lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{
+ Root: lowerMerkleVD,
+ Start: lowerMerkleVD,
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY,
+ })
+ if err == syserror.ENOENT {
+ return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to open root Merkle file: %v", err))
+ }
+ if err != nil {
+ return nil, nil, err
+ }
+
+ childrenNames := make([]byte, size)
+ if _, err := lowerMerkleFD.PRead(ctx, usermem.BytesIOSequence(childrenNames), int64(off), vfs.ReadOptions{}); err != nil {
+ return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to read root children map: %v", err))
+ }
+
+ if err := json.Unmarshal(childrenNames, &d.childrenNames); err != nil {
+ return nil, nil, alertIntegrityViolation(fmt.Sprintf("Failed to deserialize childrenNames: %v", err))
+ }
+
+ if err := fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil {
+ return nil, nil, err
+ }
+ }
+
+ d.hashMu.Lock()
+ copy(d.hash, iopts.RootHash)
+ d.hashMu.Unlock()
+ d.vfsd.Init(d)
+
+ fs.rootDentry = d
+
+ return &fs.vfsfs, &d.vfsd, nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release(ctx context.Context) {
+ fs.lowerMount.DecRef(ctx)
+}
+
+// MountOptions implements vfs.FilesystemImpl.MountOptions.
+func (fs *filesystem) MountOptions() string {
+ return ""
+}
+
+// dentry implements vfs.DentryImpl.
+//
+// +stateify savable
+type dentry struct {
+ vfsd vfs.Dentry
+
+ refs int64
+
+ // fs is the owning filesystem. fs is immutable.
+ fs *filesystem
+
+ // mode, uid, gid and size are the file mode, owner, group, and size of
+ // the file in the underlying file system. They are set when a dentry
+ // is initialized, and never modified.
+ mode uint32
+ uid uint32
+ gid uint32
+ size uint32
+
+ // parent is the dentry corresponding to this dentry's parent directory.
+ // name is this dentry's name in parent. If this dentry is a filesystem
+ // root, parent is nil and name is the empty string. parent and name are
+ // protected by fs.renameMu.
+ parent *dentry
+ name string
+
+ // If this dentry represents a directory, children maps the names of
+ // children for which dentries have been instantiated to those dentries,
+ // and dirents (if not nil) is a cache of dirents as returned by
+ // directoryFDs representing this directory. children is protected by
+ // dirMu.
+ dirMu sync.Mutex `state:"nosave"`
+ children map[string]*dentry
+
+ // childrenNames stores the name of all children of the dentry. This is
+ // used by verity to check whether a child is expected. This is only
+ // populated by enableVerity. childrenNames is also protected by dirMu.
+ childrenNames map[string]struct{}
+
+ // lowerVD is the VirtualDentry in the underlying file system. It is
+ // never modified after initialized.
+ lowerVD vfs.VirtualDentry
+
+ // lowerMerkleVD is the VirtualDentry of the corresponding Merkle tree
+ // in the underlying file system. It is never modified after
+ // initialized.
+ lowerMerkleVD vfs.VirtualDentry
+
+ // symlinkTarget is the target path of a symlink file in the underlying filesystem.
+ symlinkTarget string
+
+ // hash is the calculated hash for the current file or directory. hash
+ // is protected by hashMu.
+ hashMu sync.RWMutex `state:"nosave"`
+ hash []byte
+}
+
+// newDentry creates a new dentry representing the given verity file. The
+// dentry initially has no references; it is the caller's responsibility to set
+// the dentry's reference count and/or call dentry.destroy() as appropriate.
+// The dentry is initially invalid in that it contains no underlying dentry;
+// the caller is responsible for setting them.
+func (fs *filesystem) newDentry() *dentry {
+ d := &dentry{
+ fs: fs,
+ }
+ d.vfsd.Init(d)
+ refsvfs2.Register(d)
+ return d
+}
+
+// IncRef implements vfs.DentryImpl.IncRef.
+func (d *dentry) IncRef() {
+ r := atomic.AddInt64(&d.refs, 1)
+ if d.LogRefs() {
+ refsvfs2.LogIncRef(d, r)
+ }
+}
+
+// TryIncRef implements vfs.DentryImpl.TryIncRef.
+func (d *dentry) TryIncRef() bool {
+ for {
+ r := atomic.LoadInt64(&d.refs)
+ if r <= 0 {
+ return false
+ }
+ if atomic.CompareAndSwapInt64(&d.refs, r, r+1) {
+ if d.LogRefs() {
+ refsvfs2.LogTryIncRef(d, r+1)
+ }
+ return true
+ }
+ }
+}
+
+// DecRef implements vfs.DentryImpl.DecRef.
+func (d *dentry) DecRef(ctx context.Context) {
+ r := atomic.AddInt64(&d.refs, -1)
+ if d.LogRefs() {
+ refsvfs2.LogDecRef(d, r)
+ }
+ if r == 0 {
+ d.fs.renameMu.Lock()
+ d.checkDropLocked(ctx)
+ d.fs.renameMu.Unlock()
+ } else if r < 0 {
+ panic("verity.dentry.DecRef() called without holding a reference")
+ }
+}
+
+func (d *dentry) decRefLocked(ctx context.Context) {
+ r := atomic.AddInt64(&d.refs, -1)
+ if d.LogRefs() {
+ refsvfs2.LogDecRef(d, r)
+ }
+ if r == 0 {
+ d.checkDropLocked(ctx)
+ } else if r < 0 {
+ panic("verity.dentry.decRefLocked() called without holding a reference")
+ }
+}
+
+// checkDropLocked should be called after d's reference count becomes 0 or it
+// becomes deleted.
+func (d *dentry) checkDropLocked(ctx context.Context) {
+ // Dentries with a positive reference count must be retained. Dentries
+ // with a negative reference count have already been destroyed.
+ if atomic.LoadInt64(&d.refs) != 0 {
+ return
+ }
+ // Refs is still zero; destroy it.
+ d.destroyLocked(ctx)
+ return
+}
+
+// destroyLocked destroys the dentry.
+//
+// Preconditions:
+// * d.fs.renameMu must be locked for writing.
+// * d.refs == 0.
+func (d *dentry) destroyLocked(ctx context.Context) {
+ switch atomic.LoadInt64(&d.refs) {
+ case 0:
+ // Mark the dentry destroyed.
+ atomic.StoreInt64(&d.refs, -1)
+ case -1:
+ panic("verity.dentry.destroyLocked() called on already destroyed dentry")
+ default:
+ panic("verity.dentry.destroyLocked() called with references on the dentry")
+ }
+
+ if d.lowerVD.Ok() {
+ d.lowerVD.DecRef(ctx)
+ }
+ if d.lowerMerkleVD.Ok() {
+ d.lowerMerkleVD.DecRef(ctx)
+ }
+ if d.parent != nil {
+ d.parent.dirMu.Lock()
+ if !d.vfsd.IsDead() {
+ delete(d.parent.children, d.name)
+ }
+ d.parent.dirMu.Unlock()
+ d.parent.decRefLocked(ctx)
+ }
+ refsvfs2.Unregister(d)
+}
+
+// RefType implements refsvfs2.CheckedObject.Type.
+func (d *dentry) RefType() string {
+ return "verity.dentry"
+}
+
+// LeakMessage implements refsvfs2.CheckedObject.LeakMessage.
+func (d *dentry) LeakMessage() string {
+ return fmt.Sprintf("[verity.dentry %p] reference count of %d instead of -1", d, atomic.LoadInt64(&d.refs))
+}
+
+// LogRefs implements refsvfs2.CheckedObject.LogRefs.
+//
+// This should only be set to true for debugging purposes, as it can generate an
+// extremely large amount of output and drastically degrade performance.
+func (d *dentry) LogRefs() bool {
+ return false
+}
+
+// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
+func (d *dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) {
+ //TODO(b/159261227): Implement InotifyWithParent.
+}
+
+// Watches implements vfs.DentryImpl.Watches.
+func (d *dentry) Watches() *vfs.Watches {
+ //TODO(b/159261227): Implement Watches.
+ return nil
+}
+
+// OnZeroWatches implements vfs.DentryImpl.OnZeroWatches.
+func (d *dentry) OnZeroWatches(context.Context) {
+ //TODO(b/159261227): Implement OnZeroWatches.
+}
+
+func (d *dentry) isSymlink() bool {
+ return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
+}
+
+func (d *dentry) isDir() bool {
+ return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFDIR
+}
+
+func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
+ return vfs.GenericCheckPermissions(creds, ats, linux.FileMode(atomic.LoadUint32(&d.mode)), auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid)))
+}
+
+// verityEnabled checks whether the file is enabled with verity features. It
+// should always be true if runtime enable is not allowed. In runtime enable
+// mode, it returns true if the target has been enabled with
+// ioctl(FS_IOC_ENABLE_VERITY).
+func (d *dentry) verityEnabled() bool {
+ d.hashMu.RLock()
+ defer d.hashMu.RUnlock()
+ return !d.fs.allowRuntimeEnable || len(d.hash) != 0
+}
+
+// getLowerAt returns the dentry in the underlying file system, which is
+// represented by filename relative to d.
+func (d *dentry) getLowerAt(ctx context.Context, vfsObj *vfs.VirtualFilesystem, filename string) (vfs.VirtualDentry, error) {
+ return vfsObj.GetDentryAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: d.lowerVD,
+ Start: d.lowerVD,
+ Path: fspath.Parse(filename),
+ }, &vfs.GetDentryOptions{})
+}
+
+func (d *dentry) readlink(ctx context.Context) (string, error) {
+ vfsObj := d.fs.vfsfs.VirtualFilesystem()
+ if d.verityEnabled() {
+ stat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: d.lowerVD,
+ Start: d.lowerVD,
+ }, &vfs.StatOptions{})
+ if err != nil {
+ return "", err
+ }
+ d.dirMu.Lock()
+ defer d.dirMu.Unlock()
+ if err := d.fs.verifyStatAndChildrenLocked(ctx, d, stat); err != nil {
+ return "", err
+ }
+ return d.symlinkTarget, nil
+ }
+
+ return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
+ Root: d.lowerVD,
+ Start: d.lowerVD,
+ })
+}
+
+// FileDescription implements vfs.FileDescriptionImpl for verity fds.
+// FileDescription is a wrapper of the underlying lowerFD, with support to build
+// Merkle trees through the Linux fs-verity API to verify contents read from
+// lowerFD.
+//
+// +stateify savable
+type fileDescription struct {
+ vfsfd vfs.FileDescription
+ vfs.FileDescriptionDefaultImpl
+
+ // d is the corresponding dentry to the fileDescription.
+ d *dentry
+
+ // isDir specifies whehter the fileDescription points to a directory.
+ isDir bool
+
+ // lowerFD is the FileDescription corresponding to the file in the
+ // underlying file system.
+ lowerFD *vfs.FileDescription
+
+ // lowerMappable is the memmap.Mappable corresponding to this file in the
+ // underlying file system.
+ lowerMappable memmap.Mappable
+
+ // merkleReader is the read-only FileDescription corresponding to the
+ // Merkle tree file in the underlying file system.
+ merkleReader *vfs.FileDescription
+
+ // merkleWriter is the FileDescription corresponding to the Merkle tree
+ // file in the underlying file system for writing. This should only be
+ // used when allowRuntimeEnable is set to true.
+ merkleWriter *vfs.FileDescription
+
+ // parentMerkleWriter is the FileDescription of the Merkle tree for the
+ // directory that contains the current file/directory. This is only used
+ // if allowRuntimeEnable is set to true.
+ parentMerkleWriter *vfs.FileDescription
+
+ // off is the file offset. off is protected by mu.
+ mu sync.Mutex `state:"nosave"`
+ off int64
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *fileDescription) Release(ctx context.Context) {
+ fd.lowerFD.DecRef(ctx)
+ fd.merkleReader.DecRef(ctx)
+ if fd.merkleWriter != nil {
+ fd.merkleWriter.DecRef(ctx)
+ }
+ if fd.parentMerkleWriter != nil {
+ fd.parentMerkleWriter.DecRef(ctx)
+ }
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+ // TODO(b/162788573): Add integrity check for metadata.
+ stat, err := fd.lowerFD.Stat(ctx, opts)
+ if err != nil {
+ return linux.Statx{}, err
+ }
+ fd.d.dirMu.Lock()
+ if fd.d.verityEnabled() {
+ if err := fd.d.fs.verifyStatAndChildrenLocked(ctx, fd.d, stat); err != nil {
+ return linux.Statx{}, err
+ }
+ }
+ fd.d.dirMu.Unlock()
+ return stat, nil
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+ // Verity files are read-only.
+ return syserror.EPERM
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
+ if !fd.d.isDir() {
+ return syserror.ENOTDIR
+ }
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+
+ var ds []vfs.Dirent
+ err := fd.lowerFD.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error {
+ // Do not include the Merkle tree files.
+ if strings.Contains(dirent.Name, merklePrefix) || strings.Contains(dirent.Name, merkleRootPrefix) {
+ return nil
+ }
+ if fd.d.verityEnabled() {
+ // Verify that the child is expected.
+ if dirent.Name != "." && dirent.Name != ".." {
+ if _, ok := fd.d.childrenNames[dirent.Name]; !ok {
+ return alertIntegrityViolation(fmt.Sprintf("Unexpected children %s", dirent.Name))
+ }
+ }
+ }
+ ds = append(ds, dirent)
+ return nil
+ }))
+
+ if err != nil {
+ return err
+ }
+
+ // The result should contain all children plus "." and "..".
+ if fd.d.verityEnabled() && len(ds) != len(fd.d.childrenNames)+2 {
+ return alertIntegrityViolation(fmt.Sprintf("Unexpected children number %d", len(ds)))
+ }
+
+ for fd.off < int64(len(ds)) {
+ if err := cb.Handle(ds[fd.off]); err != nil {
+ return err
+ }
+ fd.off++
+ }
+ return nil
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *fileDescription) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ n := int64(0)
+ switch whence {
+ case linux.SEEK_SET:
+ // use offset as specified
+ case linux.SEEK_CUR:
+ n = fd.off
+ case linux.SEEK_END:
+ n = int64(fd.d.size)
+ default:
+ return 0, syserror.EINVAL
+ }
+ if offset > math.MaxInt64-n {
+ return 0, syserror.EINVAL
+ }
+ offset += n
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ fd.off = offset
+ return offset, nil
+}
+
+// generateMerkleLocked generates a Merkle tree file for fd. If fd points to a
+// file /foo/bar, a Merkle tree file /foo/.merkle.verity.bar is generated. The
+// hash of the generated Merkle tree and the data size is returned. If fd
+// points to a regular file, the data is the content of the file. If fd points
+// to a directory, the data is all hashes of its children, written to the Merkle
+// tree file. If fd represents a symlink, the data is empty and nothing is written
+// to the Merkle tree file.
+//
+// Preconditions: fd.d.fs.verityMu must be locked.
+func (fd *fileDescription) generateMerkleLocked(ctx context.Context) ([]byte, uint64, error) {
+ fdReader := FileReadWriteSeeker{
+ FD: fd.lowerFD,
+ Ctx: ctx,
+ }
+ merkleReader := FileReadWriteSeeker{
+ FD: fd.merkleReader,
+ Ctx: ctx,
+ }
+ merkleWriter := FileReadWriteSeeker{
+ FD: fd.merkleWriter,
+ Ctx: ctx,
+ }
+
+ stat, err := fd.lowerFD.Stat(ctx, vfs.StatOptions{})
+ if err != nil {
+ return nil, 0, err
+ }
+
+ params := &merkletree.GenerateParams{
+ TreeReader: &merkleReader,
+ TreeWriter: &merkleWriter,
+ Children: fd.d.childrenNames,
+ //TODO(b/156980949): Support passing other hash algorithms.
+ HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(),
+ Name: fd.d.name,
+ Mode: uint32(stat.Mode),
+ UID: stat.UID,
+ GID: stat.GID,
+ }
+
+ switch atomic.LoadUint32(&fd.d.mode) & linux.S_IFMT {
+ case linux.S_IFREG:
+ // For a regular file, generate a Merkle tree based on its
+ // content.
+ params.File = &fdReader
+ params.Size = int64(stat.Size)
+ params.DataAndTreeInSameFile = false
+ case linux.S_IFDIR:
+ // For a directory, generate a Merkle tree based on the hashes
+ // of its children that has already been written to the Merkle
+ // tree file.
+ merkleStat, err := fd.merkleReader.Stat(ctx, vfs.StatOptions{})
+ if err != nil {
+ return nil, 0, err
+ }
+
+ params.Size = int64(merkleStat.Size)
+ params.File = &merkleReader
+ params.DataAndTreeInSameFile = true
+ case linux.S_IFLNK:
+ // For a symlink, generate a Merkle tree file but do not write the root hash
+ // of the target file content to it. Return a hash of a VerityDescriptor object
+ // which includes the symlink target name.
+ target, err := fd.d.readlink(ctx)
+ if err != nil {
+ return nil, 0, err
+ }
+
+ params.Size = int64(stat.Size)
+ params.DataAndTreeInSameFile = false
+ params.SymlinkTarget = target
+ default:
+ // TODO(b/167728857): Investigate whether and how we should
+ // enable other types of file.
+ return nil, 0, syserror.EINVAL
+ }
+ hash, err := merkletree.Generate(params)
+ return hash, uint64(params.Size), err
+}
+
+// recordChildrenLocked writes the names of fd's children into the
+// corresponding Merkle tree file, and saves the offset/size of the map into
+// xattrs.
+//
+// Preconditions:
+// * fd.d.fs.verityMu must be locked.
+// * fd.d.isDir() == true.
+func (fd *fileDescription) recordChildrenLocked(ctx context.Context) error {
+ // Record the children names in the Merkle tree file.
+ childrenNames, err := json.Marshal(fd.d.childrenNames)
+ if err != nil {
+ return err
+ }
+
+ stat, err := fd.merkleWriter.Stat(ctx, vfs.StatOptions{})
+ if err != nil {
+ return err
+ }
+
+ if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
+ Name: childrenOffsetXattr,
+ Value: strconv.Itoa(int(stat.Size)),
+ }); err != nil {
+ return err
+ }
+ if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
+ Name: childrenSizeXattr,
+ Value: strconv.Itoa(len(childrenNames)),
+ }); err != nil {
+ return err
+ }
+
+ if _, err = fd.merkleWriter.Write(ctx, usermem.BytesIOSequence(childrenNames), vfs.WriteOptions{}); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// enableVerity enables verity features on fd by generating a Merkle tree file
+// and stores its hash in its parent directory's Merkle tree.
+func (fd *fileDescription) enableVerity(ctx context.Context) (uintptr, error) {
+ if !fd.d.fs.allowRuntimeEnable {
+ return 0, syserror.EPERM
+ }
+
+ fd.d.fs.verityMu.Lock()
+ defer fd.d.fs.verityMu.Unlock()
+
+ // In allowRuntimeEnable mode, the underlying fd and read/write fd for
+ // the Merkle tree file should have all been initialized. For any file
+ // or directory other than the root, the parent Merkle tree file should
+ // have also been initialized.
+ if fd.lowerFD == nil || fd.merkleReader == nil || fd.merkleWriter == nil || (fd.parentMerkleWriter == nil && fd.d != fd.d.fs.rootDentry) {
+ return 0, alertIntegrityViolation("Unexpected verity fd: missing expected underlying fds")
+ }
+
+ hash, dataSize, err := fd.generateMerkleLocked(ctx)
+ if err != nil {
+ return 0, err
+ }
+
+ if fd.parentMerkleWriter != nil {
+ stat, err := fd.parentMerkleWriter.Stat(ctx, vfs.StatOptions{})
+ if err != nil {
+ return 0, err
+ }
+
+ // Write the hash of fd to the parent directory's Merkle tree
+ // file, as it should be part of the parent Merkle tree data.
+ // parentMerkleWriter is open with O_APPEND, so it should write
+ // directly to the end of the file.
+ if _, err = fd.parentMerkleWriter.Write(ctx, usermem.BytesIOSequence(hash), vfs.WriteOptions{}); err != nil {
+ return 0, err
+ }
+
+ // Record the offset of the hash of fd in parent directory's
+ // Merkle tree file.
+ if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
+ Name: merkleOffsetInParentXattr,
+ Value: strconv.Itoa(int(stat.Size)),
+ }); err != nil {
+ return 0, err
+ }
+
+ // Add the current child's name to parent's childrenNames.
+ fd.d.parent.childrenNames[fd.d.name] = struct{}{}
+ }
+
+ // Record the size of the data being hashed for fd.
+ if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{
+ Name: merkleSizeXattr,
+ Value: strconv.Itoa(int(dataSize)),
+ }); err != nil {
+ return 0, err
+ }
+
+ if fd.d.isDir() {
+ if err := fd.recordChildrenLocked(ctx); err != nil {
+ return 0, err
+ }
+ }
+ fd.d.hashMu.Lock()
+ fd.d.hash = hash
+ fd.d.hashMu.Unlock()
+ return 0, nil
+}
+
+// measureVerity returns the hash of fd, saved in verityDigest.
+func (fd *fileDescription) measureVerity(ctx context.Context, verityDigest hostarch.Addr) (uintptr, error) {
+ t := kernel.TaskFromContext(ctx)
+ if t == nil {
+ return 0, syserror.EINVAL
+ }
+ var metadata linux.DigestMetadata
+
+ fd.d.hashMu.RLock()
+ defer fd.d.hashMu.RUnlock()
+
+ // If allowRuntimeEnable is true, an empty fd.d.hash indicates that
+ // verity is not enabled for the file. If allowRuntimeEnable is false,
+ // this is an integrity violation because all files should have verity
+ // enabled, in which case fd.d.hash should be set.
+ if len(fd.d.hash) == 0 {
+ if fd.d.fs.allowRuntimeEnable {
+ return 0, syserror.ENODATA
+ }
+ return 0, alertIntegrityViolation("Ioctl measureVerity: no hash found")
+ }
+
+ // The first part of VerityDigest is the metadata.
+ if _, err := metadata.CopyIn(t, verityDigest); err != nil {
+ return 0, err
+ }
+ if metadata.DigestSize < uint16(len(fd.d.hash)) {
+ return 0, syserror.EOVERFLOW
+ }
+
+ // Populate the output digest size, since DigestSize is both input and
+ // output.
+ metadata.DigestSize = uint16(len(fd.d.hash))
+
+ // First copy the metadata.
+ if _, err := metadata.CopyOut(t, verityDigest); err != nil {
+ return 0, err
+ }
+
+ // Now copy the root hash bytes to the memory after metadata.
+ _, err := t.CopyOutBytes(hostarch.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.hash)
+ return 0, err
+}
+
+func (fd *fileDescription) verityFlags(ctx context.Context, flags hostarch.Addr) (uintptr, error) {
+ f := int32(0)
+
+ fd.d.hashMu.RLock()
+ // All enabled files should store a hash. This flag is not settable via
+ // FS_IOC_SETFLAGS.
+ if len(fd.d.hash) != 0 {
+ f |= linux.FS_VERITY_FL
+ }
+ fd.d.hashMu.RUnlock()
+
+ t := kernel.TaskFromContext(ctx)
+ if t == nil {
+ return 0, syserror.EINVAL
+ }
+ _, err := primitive.CopyInt32Out(t, flags, f)
+ return 0, err
+}
+
+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
+func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ switch cmd := args[1].Uint(); cmd {
+ case linux.FS_IOC_ENABLE_VERITY:
+ return fd.enableVerity(ctx)
+ case linux.FS_IOC_MEASURE_VERITY:
+ return fd.measureVerity(ctx, args[2].Pointer())
+ case linux.FS_IOC_GETFLAGS:
+ return fd.verityFlags(ctx, args[2].Pointer())
+ default:
+ // TODO(b/169682228): Investigate which ioctl commands should
+ // be allowed.
+ return 0, syserror.ENOSYS
+ }
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ // Implement Read with PRead by setting offset.
+ fd.mu.Lock()
+ n, err := fd.PRead(ctx, dst, fd.off, opts)
+ fd.off += n
+ fd.mu.Unlock()
+ return n, err
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+ // No need to verify if the file is not enabled yet in
+ // allowRuntimeEnable mode.
+ if !fd.d.verityEnabled() {
+ return fd.lowerFD.PRead(ctx, dst, offset, opts)
+ }
+
+ fd.d.fs.verityMu.RLock()
+ defer fd.d.fs.verityMu.RUnlock()
+ // dataSize is the size of the whole file.
+ dataSize, err := fd.merkleReader.GetXattr(ctx, &vfs.GetXattrOptions{
+ Name: merkleSizeXattr,
+ Size: sizeOfStringInt32,
+ })
+
+ // The Merkle tree file for the child should have been created and
+ // contains the expected xattrs. If the xattr does not exist, it
+ // indicates unexpected modifications to the file system.
+ if err == syserror.ENODATA {
+ return 0, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err))
+ }
+ if err != nil {
+ return 0, err
+ }
+
+ // The dataSize xattr should be an integer. If it's not, it indicates
+ // unexpected modifications to the file system.
+ size, err := strconv.Atoi(dataSize)
+ if err != nil {
+ return 0, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", merkleSizeXattr, err))
+ }
+
+ dataReader := FileReadWriteSeeker{
+ FD: fd.lowerFD,
+ Ctx: ctx,
+ }
+
+ merkleReader := FileReadWriteSeeker{
+ FD: fd.merkleReader,
+ Ctx: ctx,
+ }
+
+ fd.d.hashMu.RLock()
+ n, err := merkletree.Verify(&merkletree.VerifyParams{
+ Out: dst.Writer(ctx),
+ File: &dataReader,
+ Tree: &merkleReader,
+ Size: int64(size),
+ Name: fd.d.name,
+ Mode: fd.d.mode,
+ UID: fd.d.uid,
+ GID: fd.d.gid,
+ Children: fd.d.childrenNames,
+ //TODO(b/156980949): Support passing other hash algorithms.
+ HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(),
+ ReadOffset: offset,
+ ReadSize: dst.NumBytes(),
+ Expected: fd.d.hash,
+ DataAndTreeInSameFile: false,
+ })
+ fd.d.hashMu.RUnlock()
+ if err != nil {
+ return 0, alertIntegrityViolation(fmt.Sprintf("Verification failed: %v", err))
+ }
+ return n, err
+}
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *fileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+ return 0, syserror.EROFS
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *fileDescription) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+ return 0, syserror.EROFS
+}
+
+// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+func (fd *fileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+ if err := fd.lowerFD.ConfigureMMap(ctx, opts); err != nil {
+ return err
+ }
+ fd.lowerMappable = opts.Mappable
+ if opts.MappingIdentity != nil {
+ opts.MappingIdentity.DecRef(ctx)
+ opts.MappingIdentity = nil
+ }
+
+ // Check if mmap is allowed on the lower filesystem.
+ if !opts.SentryOwnedContent {
+ return syserror.ENODEV
+ }
+ return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
+}
+
+// LockBSD implements vfs.FileDescriptionImpl.LockBSD.
+func (fd *fileDescription) LockBSD(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, block fslock.Blocker) error {
+ return fd.lowerFD.LockBSD(ctx, ownerPID, t, block)
+}
+
+// UnlockBSD implements vfs.FileDescriptionImpl.UnlockBSD.
+func (fd *fileDescription) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
+ return fd.lowerFD.UnlockBSD(ctx)
+}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *fileDescription) LockPOSIX(ctx context.Context, uid fslock.UniqueID, ownerPID int32, t fslock.LockType, r fslock.LockRange, block fslock.Blocker) error {
+ return fd.lowerFD.LockPOSIX(ctx, uid, ownerPID, t, r, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *fileDescription) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, r fslock.LockRange) error {
+ return fd.lowerFD.UnlockPOSIX(ctx, uid, r)
+}
+
+// TestPOSIX implements vfs.FileDescriptionImpl.TestPOSIX.
+func (fd *fileDescription) TestPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, r fslock.LockRange) (linux.Flock, error) {
+ return fd.lowerFD.TestPOSIX(ctx, uid, t, r)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (fd *fileDescription) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
+ ts, err := fd.lowerMappable.Translate(ctx, required, optional, at)
+ if err != nil {
+ return ts, err
+ }
+
+ // dataSize is the size of the whole file.
+ dataSize, err := fd.merkleReader.GetXattr(ctx, &vfs.GetXattrOptions{
+ Name: merkleSizeXattr,
+ Size: sizeOfStringInt32,
+ })
+
+ // The Merkle tree file for the child should have been created and
+ // contains the expected xattrs. If the xattr does not exist, it
+ // indicates unexpected modifications to the file system.
+ if err == syserror.ENODATA {
+ return ts, alertIntegrityViolation(fmt.Sprintf("Failed to get xattr %s: %v", merkleSizeXattr, err))
+ }
+ if err != nil {
+ return ts, err
+ }
+
+ // The dataSize xattr should be an integer. If it's not, it indicates
+ // unexpected modifications to the file system.
+ size, err := strconv.Atoi(dataSize)
+ if err != nil {
+ return ts, alertIntegrityViolation(fmt.Sprintf("Failed to convert xattr %s to int: %v", merkleSizeXattr, err))
+ }
+
+ merkleReader := FileReadWriteSeeker{
+ FD: fd.merkleReader,
+ Ctx: ctx,
+ }
+
+ for _, t := range ts {
+ // Content integrity relies on sentry owning the backing data. MapInternal is guaranteed
+ // to fetch sentry owned memory because we disallow verity mmaps otherwise.
+ ims, err := t.File.MapInternal(memmap.FileRange{t.Offset, t.Offset + t.Source.Length()}, hostarch.Read)
+ if err != nil {
+ return nil, err
+ }
+ dataReader := mmapReadSeeker{ims, t.Source.Start}
+ var buf bytes.Buffer
+ _, err = merkletree.Verify(&merkletree.VerifyParams{
+ Out: &buf,
+ File: &dataReader,
+ Tree: &merkleReader,
+ Size: int64(size),
+ Name: fd.d.name,
+ Mode: fd.d.mode,
+ UID: fd.d.uid,
+ GID: fd.d.gid,
+ HashAlgorithms: fd.d.fs.alg.toLinuxHashAlg(),
+ ReadOffset: int64(t.Source.Start),
+ ReadSize: int64(t.Source.Length()),
+ Expected: fd.d.hash,
+ DataAndTreeInSameFile: false,
+ })
+ if err != nil {
+ return ts, alertIntegrityViolation(fmt.Sprintf("Verification failed: %v", err))
+ }
+ }
+ return ts, err
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (fd *fileDescription) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
+ return fd.lowerMappable.AddMapping(ctx, ms, ar, offset, writable)
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (fd *fileDescription) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
+ fd.lowerMappable.RemoveMapping(ctx, ms, ar, offset, writable)
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (fd *fileDescription) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
+ return fd.lowerMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable)
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (fd *fileDescription) InvalidateUnsavable(context.Context) error {
+ return nil
+}
+
+// mmapReadSeeker is a helper struct used by fileDescription.Translate to pass
+// a safemem.BlockSeq pointing to the mapped region as io.ReaderAt.
+type mmapReadSeeker struct {
+ safemem.BlockSeq
+ Offset uint64
+}
+
+// ReadAt implements io.ReaderAt.ReadAt. off is the offset into the mapped file.
+func (r *mmapReadSeeker) ReadAt(p []byte, off int64) (int, error) {
+ bs := r.BlockSeq
+ // Adjust the offset into the mapped file to get the offset into the internally
+ // mapped region.
+ readOffset := off - int64(r.Offset)
+ if readOffset < 0 {
+ return 0, syserror.EINVAL
+ }
+ bs.DropFirst64(uint64(readOffset))
+ view := bs.TakeFirst64(uint64(len(p)))
+ dst := safemem.BlockSeqOf(safemem.BlockFromSafeSlice(p))
+ n, err := safemem.CopySeq(dst, view)
+ return int(n), err
+}
+
+// FileReadWriteSeeker is a helper struct to pass a vfs.FileDescription as
+// io.Reader/io.Writer/io.ReadSeeker/io.ReaderAt/io.WriterAt/etc.
+type FileReadWriteSeeker struct {
+ FD *vfs.FileDescription
+ Ctx context.Context
+ ROpts vfs.ReadOptions
+ WOpts vfs.WriteOptions
+}
+
+// ReadAt implements io.ReaderAt.ReadAt.
+func (f *FileReadWriteSeeker) ReadAt(p []byte, off int64) (int, error) {
+ dst := usermem.BytesIOSequence(p)
+ n, err := f.FD.PRead(f.Ctx, dst, off, f.ROpts)
+ return int(n), err
+}
+
+// Read implements io.ReadWriteSeeker.Read.
+func (f *FileReadWriteSeeker) Read(p []byte) (int, error) {
+ dst := usermem.BytesIOSequence(p)
+ n, err := f.FD.Read(f.Ctx, dst, f.ROpts)
+ return int(n), err
+}
+
+// Seek implements io.ReadWriteSeeker.Seek.
+func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) {
+ return f.FD.Seek(f.Ctx, offset, int32(whence))
+}
+
+// WriteAt implements io.WriterAt.WriteAt.
+func (f *FileReadWriteSeeker) WriteAt(p []byte, off int64) (int, error) {
+ dst := usermem.BytesIOSequence(p)
+ n, err := f.FD.PWrite(f.Ctx, dst, off, f.WOpts)
+ return int(n), err
+}
+
+// Write implements io.ReadWriteSeeker.Write.
+func (f *FileReadWriteSeeker) Write(p []byte) (int, error) {
+ buf := usermem.BytesIOSequence(p)
+ n, err := f.FD.Write(f.Ctx, buf, f.WOpts)
+ return int(n), err
+}
diff --git a/pkg/sentry/fsimpl/verity/verity_state_autogen.go b/pkg/sentry/fsimpl/verity/verity_state_autogen.go
new file mode 100644
index 000000000..47bea46c9
--- /dev/null
+++ b/pkg/sentry/fsimpl/verity/verity_state_autogen.go
@@ -0,0 +1,237 @@
+// automatically generated by stateify.
+
+package verity
+
+import (
+ "gvisor.dev/gvisor/pkg/state"
+)
+
+func (fstype *FilesystemType) StateTypeName() string {
+ return "pkg/sentry/fsimpl/verity.FilesystemType"
+}
+
+func (fstype *FilesystemType) StateFields() []string {
+ return []string{}
+}
+
+func (fstype *FilesystemType) beforeSave() {}
+
+// +checklocksignore
+func (fstype *FilesystemType) StateSave(stateSinkObject state.Sink) {
+ fstype.beforeSave()
+}
+
+func (fstype *FilesystemType) afterLoad() {}
+
+// +checklocksignore
+func (fstype *FilesystemType) StateLoad(stateSourceObject state.Source) {
+}
+
+func (fs *filesystem) StateTypeName() string {
+ return "pkg/sentry/fsimpl/verity.filesystem"
+}
+
+func (fs *filesystem) StateFields() []string {
+ return []string{
+ "vfsfs",
+ "creds",
+ "allowRuntimeEnable",
+ "lowerMount",
+ "rootDentry",
+ "alg",
+ }
+}
+
+func (fs *filesystem) beforeSave() {}
+
+// +checklocksignore
+func (fs *filesystem) StateSave(stateSinkObject state.Sink) {
+ fs.beforeSave()
+ stateSinkObject.Save(0, &fs.vfsfs)
+ stateSinkObject.Save(1, &fs.creds)
+ stateSinkObject.Save(2, &fs.allowRuntimeEnable)
+ stateSinkObject.Save(3, &fs.lowerMount)
+ stateSinkObject.Save(4, &fs.rootDentry)
+ stateSinkObject.Save(5, &fs.alg)
+}
+
+func (fs *filesystem) afterLoad() {}
+
+// +checklocksignore
+func (fs *filesystem) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &fs.vfsfs)
+ stateSourceObject.Load(1, &fs.creds)
+ stateSourceObject.Load(2, &fs.allowRuntimeEnable)
+ stateSourceObject.Load(3, &fs.lowerMount)
+ stateSourceObject.Load(4, &fs.rootDentry)
+ stateSourceObject.Load(5, &fs.alg)
+}
+
+func (i *InternalFilesystemOptions) StateTypeName() string {
+ return "pkg/sentry/fsimpl/verity.InternalFilesystemOptions"
+}
+
+func (i *InternalFilesystemOptions) StateFields() []string {
+ return []string{
+ "RootMerkleFileName",
+ "LowerName",
+ "Alg",
+ "RootHash",
+ "AllowRuntimeEnable",
+ "LowerGetFSOptions",
+ "Action",
+ }
+}
+
+func (i *InternalFilesystemOptions) beforeSave() {}
+
+// +checklocksignore
+func (i *InternalFilesystemOptions) StateSave(stateSinkObject state.Sink) {
+ i.beforeSave()
+ stateSinkObject.Save(0, &i.RootMerkleFileName)
+ stateSinkObject.Save(1, &i.LowerName)
+ stateSinkObject.Save(2, &i.Alg)
+ stateSinkObject.Save(3, &i.RootHash)
+ stateSinkObject.Save(4, &i.AllowRuntimeEnable)
+ stateSinkObject.Save(5, &i.LowerGetFSOptions)
+ stateSinkObject.Save(6, &i.Action)
+}
+
+func (i *InternalFilesystemOptions) afterLoad() {}
+
+// +checklocksignore
+func (i *InternalFilesystemOptions) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &i.RootMerkleFileName)
+ stateSourceObject.Load(1, &i.LowerName)
+ stateSourceObject.Load(2, &i.Alg)
+ stateSourceObject.Load(3, &i.RootHash)
+ stateSourceObject.Load(4, &i.AllowRuntimeEnable)
+ stateSourceObject.Load(5, &i.LowerGetFSOptions)
+ stateSourceObject.Load(6, &i.Action)
+}
+
+func (d *dentry) StateTypeName() string {
+ return "pkg/sentry/fsimpl/verity.dentry"
+}
+
+func (d *dentry) StateFields() []string {
+ return []string{
+ "vfsd",
+ "refs",
+ "fs",
+ "mode",
+ "uid",
+ "gid",
+ "size",
+ "parent",
+ "name",
+ "children",
+ "childrenNames",
+ "lowerVD",
+ "lowerMerkleVD",
+ "symlinkTarget",
+ "hash",
+ }
+}
+
+func (d *dentry) beforeSave() {}
+
+// +checklocksignore
+func (d *dentry) StateSave(stateSinkObject state.Sink) {
+ d.beforeSave()
+ stateSinkObject.Save(0, &d.vfsd)
+ stateSinkObject.Save(1, &d.refs)
+ stateSinkObject.Save(2, &d.fs)
+ stateSinkObject.Save(3, &d.mode)
+ stateSinkObject.Save(4, &d.uid)
+ stateSinkObject.Save(5, &d.gid)
+ stateSinkObject.Save(6, &d.size)
+ stateSinkObject.Save(7, &d.parent)
+ stateSinkObject.Save(8, &d.name)
+ stateSinkObject.Save(9, &d.children)
+ stateSinkObject.Save(10, &d.childrenNames)
+ stateSinkObject.Save(11, &d.lowerVD)
+ stateSinkObject.Save(12, &d.lowerMerkleVD)
+ stateSinkObject.Save(13, &d.symlinkTarget)
+ stateSinkObject.Save(14, &d.hash)
+}
+
+// +checklocksignore
+func (d *dentry) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &d.vfsd)
+ stateSourceObject.Load(1, &d.refs)
+ stateSourceObject.Load(2, &d.fs)
+ stateSourceObject.Load(3, &d.mode)
+ stateSourceObject.Load(4, &d.uid)
+ stateSourceObject.Load(5, &d.gid)
+ stateSourceObject.Load(6, &d.size)
+ stateSourceObject.Load(7, &d.parent)
+ stateSourceObject.Load(8, &d.name)
+ stateSourceObject.Load(9, &d.children)
+ stateSourceObject.Load(10, &d.childrenNames)
+ stateSourceObject.Load(11, &d.lowerVD)
+ stateSourceObject.Load(12, &d.lowerMerkleVD)
+ stateSourceObject.Load(13, &d.symlinkTarget)
+ stateSourceObject.Load(14, &d.hash)
+ stateSourceObject.AfterLoad(d.afterLoad)
+}
+
+func (fd *fileDescription) StateTypeName() string {
+ return "pkg/sentry/fsimpl/verity.fileDescription"
+}
+
+func (fd *fileDescription) StateFields() []string {
+ return []string{
+ "vfsfd",
+ "FileDescriptionDefaultImpl",
+ "d",
+ "isDir",
+ "lowerFD",
+ "lowerMappable",
+ "merkleReader",
+ "merkleWriter",
+ "parentMerkleWriter",
+ "off",
+ }
+}
+
+func (fd *fileDescription) beforeSave() {}
+
+// +checklocksignore
+func (fd *fileDescription) StateSave(stateSinkObject state.Sink) {
+ fd.beforeSave()
+ stateSinkObject.Save(0, &fd.vfsfd)
+ stateSinkObject.Save(1, &fd.FileDescriptionDefaultImpl)
+ stateSinkObject.Save(2, &fd.d)
+ stateSinkObject.Save(3, &fd.isDir)
+ stateSinkObject.Save(4, &fd.lowerFD)
+ stateSinkObject.Save(5, &fd.lowerMappable)
+ stateSinkObject.Save(6, &fd.merkleReader)
+ stateSinkObject.Save(7, &fd.merkleWriter)
+ stateSinkObject.Save(8, &fd.parentMerkleWriter)
+ stateSinkObject.Save(9, &fd.off)
+}
+
+func (fd *fileDescription) afterLoad() {}
+
+// +checklocksignore
+func (fd *fileDescription) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &fd.vfsfd)
+ stateSourceObject.Load(1, &fd.FileDescriptionDefaultImpl)
+ stateSourceObject.Load(2, &fd.d)
+ stateSourceObject.Load(3, &fd.isDir)
+ stateSourceObject.Load(4, &fd.lowerFD)
+ stateSourceObject.Load(5, &fd.lowerMappable)
+ stateSourceObject.Load(6, &fd.merkleReader)
+ stateSourceObject.Load(7, &fd.merkleWriter)
+ stateSourceObject.Load(8, &fd.parentMerkleWriter)
+ stateSourceObject.Load(9, &fd.off)
+}
+
+func init() {
+ state.Register((*FilesystemType)(nil))
+ state.Register((*filesystem)(nil))
+ state.Register((*InternalFilesystemOptions)(nil))
+ state.Register((*dentry)(nil))
+ state.Register((*fileDescription)(nil))
+}