diff options
author | Chong Cai <chongc@google.com> | 2020-10-12 17:28:20 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2020-10-12 17:30:14 -0700 |
commit | ef90fe173380a8d769c699aec08737ef56f43c3e (patch) | |
tree | 5ef76cba87b92bb950427976b5b7cea158ef2418 /pkg/sentry | |
parent | e7bbe70f79aa9308c2eb54b057ee5779b22f478e (diff) |
Change Merkle tree library to use ReaderAt
Merkle tree library was originally using Read/Seek to access data and
tree, since the parameters are io.ReadSeeker. This could cause race
conditions if multiple threads accesses the same fd to read. Here we
change to use ReaderAt, and implement it with PRead to make it thread
safe.
PiperOrigin-RevId: 336779260
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/fsimpl/verity/filesystem.go | 45 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/verity/verity.go | 76 | ||||
-rw-r--r-- | pkg/sentry/vfs/file_description.go | 24 |
3 files changed, 79 insertions, 66 deletions
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go index 7779271a9..34e2c9d7c 100644 --- a/pkg/sentry/fsimpl/verity/filesystem.go +++ b/pkg/sentry/fsimpl/verity/filesystem.go @@ -156,11 +156,10 @@ afterSymlink: return child, nil } -// verifyChild verifies the root hash of child against the already verified -// root hash of the parent to ensure the child is expected. verifyChild -// triggers a sentry panic if unexpected modifications to the file system are -// detected. In noCrashOnVerificationFailure mode it returns a syserror -// instead. +// verifyChild verifies the hash of child against the already verified hash of +// the parent to ensure the child is expected. verifyChild triggers a sentry +// panic if unexpected modifications to the file system are detected. In +// noCrashOnVerificationFailure mode it returns a syserror instead. // Preconditions: fs.renameMu must be locked. d.dirMu must be locked. // TODO(b/166474175): Investigate all possible errors returned in this // function, and make sure we differentiate all errors that indicate unexpected @@ -179,8 +178,8 @@ func (fs *filesystem) verifyChild(ctx context.Context, parent *dentry, child *de defer verityMu.RUnlock() // Read the offset of the child from the extended attributes of the // corresponding Merkle tree file. - // This is the offset of the root hash for child in its parent's Merkle - // tree file. + // This is the offset of the hash for child in its parent's Merkle tree + // file. off, err := vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{ Root: child.lowerMerkleVD, Start: child.lowerMerkleVD, @@ -205,7 +204,7 @@ func (fs *filesystem) verifyChild(ctx context.Context, parent *dentry, child *de return nil, alertIntegrityViolation(err, fmt.Sprintf("Failed to convert xattr %s for %s to int: %v", merkleOffsetInParentXattr, childPath, err)) } - // Open parent Merkle tree file to read and verify child's root hash. + // Open parent Merkle tree file to read and verify child's hash. parentMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ Root: parent.lowerMerkleVD, Start: parent.lowerMerkleVD, @@ -224,7 +223,7 @@ func (fs *filesystem) verifyChild(ctx context.Context, parent *dentry, child *de // dataSize is the size of raw data for the Merkle tree. For a file, // dataSize is the size of the whole file. For a directory, dataSize is - // the size of all its children's root hashes. + // the size of all its children's hashes. dataSize, err := parentMerkleFD.GetXattr(ctx, &vfs.GetXattrOptions{ Name: merkleSizeXattr, Size: sizeOfStringInt32, @@ -264,7 +263,7 @@ func (fs *filesystem) verifyChild(ctx context.Context, parent *dentry, child *de } // Since we are verifying against a directory Merkle tree, buf should - // contain the root hash of the children in the parent Merkle tree when + // contain the hash of the children in the parent Merkle tree when // Verify returns with success. var buf bytes.Buffer if _, err := merkletree.Verify(&merkletree.VerifyParams{ @@ -278,21 +277,21 @@ func (fs *filesystem) verifyChild(ctx context.Context, parent *dentry, child *de GID: parentStat.GID, ReadOffset: int64(offset), ReadSize: int64(merkletree.DigestSize()), - ExpectedRoot: parent.rootHash, + Expected: parent.hash, DataAndTreeInSameFile: true, }); err != nil && err != io.EOF { return nil, alertIntegrityViolation(syserror.EIO, fmt.Sprintf("Verification for %s failed: %v", childPath, err)) } - // Cache child root hash when it's verified the first time. - if len(child.rootHash) == 0 { - child.rootHash = buf.Bytes() + // Cache child hash when it's verified the first time. + if len(child.hash) == 0 { + child.hash = buf.Bytes() } return child, nil } -// verifyStat verifies the stat against the verified root hash. The mode/uid/gid -// of the file is cached after verified. +// verifyStat verifies the stat against the verified hash. The mode/uid/gid of +// the file is cached after verified. func (fs *filesystem) verifyStat(ctx context.Context, d *dentry, stat linux.Statx) error { vfsObj := fs.vfsfs.VirtualFilesystem() @@ -353,7 +352,7 @@ func (fs *filesystem) verifyStat(ctx context.Context, d *dentry, stat linux.Stat ReadOffset: 0, // Set read size to 0 so only the metadata is verified. ReadSize: 0, - ExpectedRoot: d.rootHash, + Expected: d.hash, DataAndTreeInSameFile: false, } if atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFDIR { @@ -375,8 +374,8 @@ func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name s // If enabling verification on files/directories is not allowed // during runtime, all cached children are already verified. If // runtime enable is allowed and the parent directory is - // enabled, we should verify the child root hash here because - // it may be cached before enabled. + // enabled, we should verify the child hash here because it may + // be cached before enabled. if fs.allowRuntimeEnable { if isEnabled(parent) { if _, err := fs.verifyChild(ctx, parent, child); err != nil { @@ -481,9 +480,9 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, // file open and ready to use. // This may cause empty and unused Merkle tree files in // allowRuntimeEnable mode, if they are never enabled. This - // does not affect verification, as we rely on cached root hash - // to decide whether to perform verification, not the existence - // of the Merkle tree file. Also, those Merkle tree files are + // does not affect verification, as we rely on cached hash to + // decide whether to perform verification, not the existence of + // the Merkle tree file. Also, those Merkle tree files are // always hidden and cannot be accessed by verity fs users. if fs.allowRuntimeEnable { childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ @@ -551,7 +550,7 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, child.uid = stat.UID child.gid = stat.GID - // Verify child root hash. This should always be performed unless in + // Verify child hash. This should always be performed unless in // allowRuntimeEnable mode and the parent directory hasn't been enabled // yet. if isEnabled(parent) { diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go index 3eb972237..4a6708633 100644 --- a/pkg/sentry/fsimpl/verity/verity.go +++ b/pkg/sentry/fsimpl/verity/verity.go @@ -49,12 +49,12 @@ const Name = "verity" const merklePrefix = ".merkle.verity." // merkleoffsetInParentXattr is the extended attribute name specifying the -// offset of child root hash in its parent's Merkle tree. +// offset of child hash in its parent's Merkle tree. const merkleOffsetInParentXattr = "user.merkle.offset" // merkleSizeXattr is the extended attribute name specifying the size of data // hashed by the corresponding Merkle tree. For a file, it's the size of the -// whole file. For a directory, it's the size of all its children's root hashes. +// whole file. For a directory, it's the size of all its children's hashes. const merkleSizeXattr = "user.merkle.size" // sizeOfStringInt32 is the size for a 32 bit integer stored as string in @@ -147,7 +147,7 @@ func (FilesystemType) Name() string { // mode, it returns true if the target has been enabled with // ioctl(FS_IOC_ENABLE_VERITY). func isEnabled(d *dentry) bool { - return !d.fs.allowRuntimeEnable || len(d.rootHash) != 0 + return !d.fs.allowRuntimeEnable || len(d.hash) != 0 } // alertIntegrityViolation alerts a violation of integrity, which usually means @@ -256,7 +256,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt d.mode = uint32(stat.Mode) d.uid = stat.UID d.gid = stat.GID - d.rootHash = make([]byte, len(iopts.RootHash)) + d.hash = make([]byte, len(iopts.RootHash)) if !fs.allowRuntimeEnable { if err := fs.verifyStat(ctx, d, stat); err != nil { @@ -264,7 +264,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt } } - copy(d.rootHash, iopts.RootHash) + copy(d.hash, iopts.RootHash) d.vfsd.Init(d) fs.rootDentry = d @@ -316,8 +316,8 @@ type dentry struct { // in the underlying file system. lowerMerkleVD vfs.VirtualDentry - // rootHash is the rootHash for the current file or directory. - rootHash []byte + // hash is the calculated hash for the current file or directory. + hash []byte } // newDentry creates a new dentry representing the given verity file. The @@ -516,11 +516,11 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) } // generateMerkle generates a Merkle tree file for fd. If fd points to a file -// /foo/bar, a Merkle tree file /foo/.merkle.verity.bar is generated. The root -// hash of the generated Merkle tree and the data size is returned. -// If fd points to a regular file, the data is the content of the file. If fd -// points to a directory, the data is all root hahes of its children, written -// to the Merkle tree file. +// /foo/bar, a Merkle tree file /foo/.merkle.verity.bar is generated. The hash +// of the generated Merkle tree and the data size is returned. If fd points to +// a regular file, the data is the content of the file. If fd points to a +// directory, the data is all hahes of its children, written to the Merkle tree +// file. func (fd *fileDescription) generateMerkle(ctx context.Context) ([]byte, uint64, error) { fdReader := vfs.FileReadWriteSeeker{ FD: fd.lowerFD, @@ -557,9 +557,9 @@ func (fd *fileDescription) generateMerkle(ctx context.Context) ([]byte, uint64, params.GID = stat.GID params.DataAndTreeInSameFile = false case linux.S_IFDIR: - // For a directory, generate a Merkle tree based on the root - // hashes of its children that has already been written to the - // Merkle tree file. + // For a directory, generate a Merkle tree based on the hashes + // of its children that has already been written to the Merkle + // tree file. merkleStat, err := fd.merkleReader.Stat(ctx, vfs.StatOptions{}) if err != nil { return nil, 0, err @@ -583,12 +583,12 @@ func (fd *fileDescription) generateMerkle(ctx context.Context) ([]byte, uint64, // enable other types of file. return nil, 0, syserror.EINVAL } - rootHash, err := merkletree.Generate(params) - return rootHash, uint64(params.Size), err + hash, err := merkletree.Generate(params) + return hash, uint64(params.Size), err } // enableVerity enables verity features on fd by generating a Merkle tree file -// and stores its root hash in its parent directory's Merkle tree. +// and stores its hash in its parent directory's Merkle tree. func (fd *fileDescription) enableVerity(ctx context.Context, uio usermem.IO) (uintptr, error) { if !fd.d.fs.allowRuntimeEnable { return 0, syserror.EPERM @@ -607,7 +607,7 @@ func (fd *fileDescription) enableVerity(ctx context.Context, uio usermem.IO) (ui return 0, alertIntegrityViolation(syserror.EIO, "Unexpected verity fd: missing expected underlying fds") } - rootHash, dataSize, err := fd.generateMerkle(ctx) + hash, dataSize, err := fd.generateMerkle(ctx) if err != nil { return 0, err } @@ -618,15 +618,15 @@ func (fd *fileDescription) enableVerity(ctx context.Context, uio usermem.IO) (ui return 0, err } - // Write the root hash of fd to the parent directory's Merkle - // tree file, as it should be part of the parent Merkle tree - // data. parentMerkleWriter is open with O_APPEND, so it - // should write directly to the end of the file. - if _, err = fd.parentMerkleWriter.Write(ctx, usermem.BytesIOSequence(rootHash), vfs.WriteOptions{}); err != nil { + // Write the hash of fd to the parent directory's Merkle tree + // file, as it should be part of the parent Merkle tree data. + // parentMerkleWriter is open with O_APPEND, so it should write + // directly to the end of the file. + if _, err = fd.parentMerkleWriter.Write(ctx, usermem.BytesIOSequence(hash), vfs.WriteOptions{}); err != nil { return 0, err } - // Record the offset of the root hash of fd in parent directory's + // Record the offset of the hash of fd in parent directory's // Merkle tree file. if err := fd.merkleWriter.SetXattr(ctx, &vfs.SetXattrOptions{ Name: merkleOffsetInParentXattr, @@ -643,37 +643,37 @@ func (fd *fileDescription) enableVerity(ctx context.Context, uio usermem.IO) (ui }); err != nil { return 0, err } - fd.d.rootHash = append(fd.d.rootHash, rootHash...) + fd.d.hash = append(fd.d.hash, hash...) return 0, nil } -// measureVerity returns the root hash of fd, saved in args[2]. +// measureVerity returns the hash of fd, saved in verityDigest. func (fd *fileDescription) measureVerity(ctx context.Context, uio usermem.IO, verityDigest usermem.Addr) (uintptr, error) { t := kernel.TaskFromContext(ctx) var metadata linux.DigestMetadata - // If allowRuntimeEnable is true, an empty fd.d.rootHash indicates that + // If allowRuntimeEnable is true, an empty fd.d.hash indicates that // verity is not enabled for the file. If allowRuntimeEnable is false, // this is an integrity violation because all files should have verity - // enabled, in which case fd.d.rootHash should be set. - if len(fd.d.rootHash) == 0 { + // enabled, in which case fd.d.hash should be set. + if len(fd.d.hash) == 0 { if fd.d.fs.allowRuntimeEnable { return 0, syserror.ENODATA } - return 0, alertIntegrityViolation(syserror.ENODATA, "Ioctl measureVerity: no root hash found") + return 0, alertIntegrityViolation(syserror.ENODATA, "Ioctl measureVerity: no hash found") } // The first part of VerityDigest is the metadata. if _, err := metadata.CopyIn(t, verityDigest); err != nil { return 0, err } - if metadata.DigestSize < uint16(len(fd.d.rootHash)) { + if metadata.DigestSize < uint16(len(fd.d.hash)) { return 0, syserror.EOVERFLOW } // Populate the output digest size, since DigestSize is both input and // output. - metadata.DigestSize = uint16(len(fd.d.rootHash)) + metadata.DigestSize = uint16(len(fd.d.hash)) // First copy the metadata. if _, err := metadata.CopyOut(t, verityDigest); err != nil { @@ -681,16 +681,16 @@ func (fd *fileDescription) measureVerity(ctx context.Context, uio usermem.IO, ve } // Now copy the root hash bytes to the memory after metadata. - _, err := t.CopyOutBytes(usermem.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.rootHash) + _, err := t.CopyOutBytes(usermem.Addr(uintptr(verityDigest)+linux.SizeOfDigestMetadata), fd.d.hash) return 0, err } func (fd *fileDescription) verityFlags(ctx context.Context, uio usermem.IO, flags usermem.Addr) (uintptr, error) { f := int32(0) - // All enabled files should store a root hash. This flag is not settable - // via FS_IOC_SETFLAGS. - if len(fd.d.rootHash) != 0 { + // All enabled files should store a hash. This flag is not settable via + // FS_IOC_SETFLAGS. + if len(fd.d.hash) != 0 { f |= linux.FS_VERITY_FL } @@ -767,7 +767,7 @@ func (fd *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of GID: fd.d.gid, ReadOffset: offset, ReadSize: dst.NumBytes(), - ExpectedRoot: fd.d.rootHash, + Expected: fd.d.hash, DataAndTreeInSameFile: false, }) if err != nil { diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index 1eba0270f..183957ad8 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -827,7 +827,7 @@ func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) FileAsyn } // FileReadWriteSeeker is a helper struct to pass a FileDescription as -// io.Reader/io.Writer/io.ReadSeeker/etc. +// io.Reader/io.Writer/io.ReadSeeker/io.ReaderAt/io.WriterAt/etc. type FileReadWriteSeeker struct { FD *FileDescription Ctx context.Context @@ -835,11 +835,18 @@ type FileReadWriteSeeker struct { WOpts WriteOptions } +// ReadAt implements io.ReaderAt.ReadAt. +func (f *FileReadWriteSeeker) ReadAt(p []byte, off int64) (int, error) { + dst := usermem.BytesIOSequence(p) + n, err := f.FD.PRead(f.Ctx, dst, off, f.ROpts) + return int(n), err +} + // Read implements io.ReadWriteSeeker.Read. func (f *FileReadWriteSeeker) Read(p []byte) (int, error) { dst := usermem.BytesIOSequence(p) - ret, err := f.FD.Read(f.Ctx, dst, f.ROpts) - return int(ret), err + n, err := f.FD.Read(f.Ctx, dst, f.ROpts) + return int(n), err } // Seek implements io.ReadWriteSeeker.Seek. @@ -847,9 +854,16 @@ func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) { return f.FD.Seek(f.Ctx, offset, int32(whence)) } +// WriteAt implements io.WriterAt.WriteAt. +func (f *FileReadWriteSeeker) WriteAt(p []byte, off int64) (int, error) { + dst := usermem.BytesIOSequence(p) + n, err := f.FD.PWrite(f.Ctx, dst, off, f.WOpts) + return int(n), err +} + // Write implements io.ReadWriteSeeker.Write. func (f *FileReadWriteSeeker) Write(p []byte) (int, error) { buf := usermem.BytesIOSequence(p) - ret, err := f.FD.Write(f.Ctx, buf, f.WOpts) - return int(ret), err + n, err := f.FD.Write(f.Ctx, buf, f.WOpts) + return int(n), err } |