diff options
Diffstat (limited to 'pkg/merkletree/merkletree.go')
-rw-r--r-- | pkg/merkletree/merkletree.go | 534 |
1 files changed, 0 insertions, 534 deletions
diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go deleted file mode 100644 index 961bd4dcf..000000000 --- a/pkg/merkletree/merkletree.go +++ /dev/null @@ -1,534 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package merkletree implements Merkle tree generating and verification. -package merkletree - -import ( - "bytes" - "crypto/sha256" - "crypto/sha512" - "encoding/gob" - "fmt" - "io" - - "gvisor.dev/gvisor/pkg/abi/linux" - - "gvisor.dev/gvisor/pkg/hostarch" -) - -const ( - // sha256DigestSize specifies the digest size of a SHA256 hash. - sha256DigestSize = 32 - // sha512DigestSize specifies the digest size of a SHA512 hash. - sha512DigestSize = 64 -) - -// DigestSize returns the size (in bytes) of a digest. -// TODO(b/156980949): Allow config SHA384. -func DigestSize(hashAlgorithm int) int { - switch hashAlgorithm { - case linux.FS_VERITY_HASH_ALG_SHA256: - return sha256DigestSize - case linux.FS_VERITY_HASH_ALG_SHA512: - return sha512DigestSize - default: - return -1 - } -} - -// Layout defines the scale of a Merkle tree. -type Layout struct { - // blockSize is the size of a data block to be hashed. - blockSize int64 - // digestSize is the size of a generated hash. - digestSize int64 - // levelOffset contains the offset of the beginning of each level in - // bytes. The number of levels in the tree is the length of the slice. - // The leaf nodes (level 0) contain hashes of blocks of the input data. - // Each level N contains hashes of the blocks in level N-1. The highest - // level is the root hash. - levelOffset []int64 -} - -// InitLayout initializes and returns a new Layout object describing the structure -// of a tree. dataSize specifies the size of input data in bytes. -func InitLayout(dataSize int64, hashAlgorithms int, dataAndTreeInSameFile bool) (Layout, error) { - layout := Layout{ - blockSize: hostarch.PageSize, - } - - // TODO(b/156980949): Allow config SHA384. - switch hashAlgorithms { - case linux.FS_VERITY_HASH_ALG_SHA256: - layout.digestSize = sha256DigestSize - case linux.FS_VERITY_HASH_ALG_SHA512: - layout.digestSize = sha512DigestSize - default: - return Layout{}, fmt.Errorf("unexpected hash algorithms") - } - - // treeStart is the offset (in bytes) of the first level of the tree in - // the file. If data and tree are in different files, treeStart should - // be zero. If data is in the same file as the tree, treeStart points - // to the block after the last data block (which may be zero-padded). - var treeStart int64 - if dataAndTreeInSameFile { - treeStart = dataSize - if dataSize%layout.blockSize != 0 { - treeStart += layout.blockSize - dataSize%layout.blockSize - } - } - - numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize - level := 0 - offset := int64(0) - - // Calculate the number of levels in the Merkle tree and the beginning - // offset of each level. Level 0 consists of the leaf nodes that - // contain the hashes of the data blocks, while level numLevels - 1 is - // the root. - for numBlocks > 1 { - layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) - // Round numBlocks up to fill up a block. - numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock() - offset += numBlocks / layout.hashesPerBlock() - numBlocks = numBlocks / layout.hashesPerBlock() - level++ - } - layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) - - return layout, nil -} - -// hashesPerBlock() returns the number of digests in each block. For example, -// if blockSize is 4096 bytes, and digestSize is 32 bytes, there will be 128 -// hashesPerBlock. Therefore 128 hashes in one level will be combined in one -// hash in the level above. -func (layout Layout) hashesPerBlock() int64 { - return layout.blockSize / layout.digestSize -} - -// numLevels returns the total number of levels in the Merkle tree. -func (layout Layout) numLevels() int { - return len(layout.levelOffset) -} - -// rootLevel returns the level of the root hash. -func (layout Layout) rootLevel() int { - return layout.numLevels() - 1 -} - -// digestOffset finds the offset of a digest from the beginning of the tree. -// The target digest is at level of the tree, with index from the beginning of -// the current level. -func (layout Layout) digestOffset(level int, index int64) int64 { - return layout.levelOffset[level] + index*layout.digestSize -} - -// blockOffset finds the offset of a block from the beginning of the tree. The -// target block is at level of the tree, with index from the beginning of the -// current level. -func (layout Layout) blockOffset(level int, index int64) int64 { - return layout.levelOffset[level] + index*layout.blockSize -} - -// VerityDescriptor is a struct that is serialized and hashed to get a file's -// root hash, which contains the root hash of the raw content and the file's -// meatadata. -type VerityDescriptor struct { - Name string - FileSize int64 - Mode uint32 - UID uint32 - GID uint32 - Children map[string]struct{} - SymlinkTarget string - RootHash []byte -} - -func (d *VerityDescriptor) String() string { - b := new(bytes.Buffer) - e := gob.NewEncoder(b) - e.Encode(d.Children) - return fmt.Sprintf("Name: %s, Size: %d, Mode: %d, UID: %d, GID: %d, Children: %v, Symlink: %s, RootHash: %v", d.Name, d.FileSize, d.Mode, d.UID, d.GID, b.Bytes(), d.SymlinkTarget, d.RootHash) -} - -// verify generates a hash from d, and compares it with expected. -func (d *VerityDescriptor) verify(expected []byte, hashAlgorithms int) error { - h, err := hashData([]byte(d.String()), hashAlgorithms) - if err != nil { - return err - } - if !bytes.Equal(h[:], expected) { - return fmt.Errorf("unexpected root hash") - } - return nil - -} - -// hashData hashes data and returns the result hash based on the hash -// algorithms. -func hashData(data []byte, hashAlgorithms int) ([]byte, error) { - var digest []byte - switch hashAlgorithms { - case linux.FS_VERITY_HASH_ALG_SHA256: - digestArray := sha256.Sum256(data) - digest = digestArray[:] - case linux.FS_VERITY_HASH_ALG_SHA512: - digestArray := sha512.Sum512(data) - digest = digestArray[:] - default: - return nil, fmt.Errorf("unexpected hash algorithms") - } - return digest, nil -} - -// GenerateParams contains the parameters used to generate a Merkle tree for a -// given file. -type GenerateParams struct { - // File is a reader of the file to be hashed. - File io.ReaderAt - // Size is the size of the file. - Size int64 - // Name is the name of the target file. - Name string - // Mode is the mode of the target file. - Mode uint32 - // UID is the user ID of the target file. - UID uint32 - // GID is the group ID of the target file. - GID uint32 - // Children is a map of children names for a directory. It should be - // empty for a regular file. - Children map[string]struct{} - // SymlinkTarget is the target path of a symlink file, or "" if the file is not a symlink. - SymlinkTarget string - // HashAlgorithms is the algorithms used to hash data. - HashAlgorithms int - // TreeReader is a reader for the Merkle tree. - TreeReader io.ReaderAt - // TreeWriter is a writer for the Merkle tree. - TreeWriter io.Writer - // DataAndTreeInSameFile is true if data and Merkle tree are in the same - // file, or false if Merkle tree is a separate file from data. - DataAndTreeInSameFile bool -} - -// Generate constructs a Merkle tree for the contents of params.File. The -// output is written to params.TreeWriter. -// -// Generate returns a hash of a VerityDescriptor, which contains the file -// metadata and the hash from file content. -func Generate(params *GenerateParams) ([]byte, error) { - descriptor := VerityDescriptor{ - FileSize: params.Size, - Name: params.Name, - Mode: params.Mode, - UID: params.UID, - GID: params.GID, - SymlinkTarget: params.SymlinkTarget, - } - - // If file is a symlink do not generate root hash for file content. - if params.SymlinkTarget != "" { - return hashData([]byte(descriptor.String()), params.HashAlgorithms) - } - - layout, err := InitLayout(params.Size, params.HashAlgorithms, params.DataAndTreeInSameFile) - if err != nil { - return nil, err - } - - numBlocks := (params.Size + layout.blockSize - 1) / layout.blockSize - - // If the data is in the same file as the tree, zero pad the last data - // block. - bytesInLastBlock := params.Size % layout.blockSize - if params.DataAndTreeInSameFile && bytesInLastBlock != 0 { - zeroBuf := make([]byte, layout.blockSize-bytesInLastBlock) - if _, err := params.TreeWriter.Write(zeroBuf); err != nil { - return nil, err - } - } - - var root []byte - for level := 0; level < layout.numLevels(); level++ { - for i := int64(0); i < numBlocks; i++ { - buf := make([]byte, layout.blockSize) - var ( - n int - err error - ) - if level == 0 { - // Read data block from the target file since level 0 includes hashes - // of blocks in the input data. - n, err = params.File.ReadAt(buf, i*layout.blockSize) - } else { - // Read data block from the tree file since levels higher than 0 are - // hashing the lower level hashes. - n, err = params.TreeReader.ReadAt(buf, layout.blockOffset(level-1, i)) - } - - // err is populated as long as the bytes read is smaller than the buffer - // size. This could be the case if we are reading the last block, and - // break in that case. If this is the last block, the end of the block - // will be zero-padded. - if n == 0 && err == io.EOF { - break - } else if err != nil && err != io.EOF { - return nil, err - } - // Hash the bytes in buf. - digest, err := hashData(buf, params.HashAlgorithms) - if err != nil { - return nil, err - } - - if level == layout.rootLevel() { - root = digest - } - - // Write the generated hash to the end of the tree file. - if _, err = params.TreeWriter.Write(digest[:]); err != nil { - return nil, err - } - } - // If the generated digests do not round up to a block, zero-padding the - // remaining of the last block. But no need to do so for root. - if level != layout.rootLevel() && numBlocks%layout.hashesPerBlock() != 0 { - zeroBuf := make([]byte, layout.blockSize-(numBlocks%layout.hashesPerBlock())*layout.digestSize) - if _, err := params.TreeWriter.Write(zeroBuf[:]); err != nil { - return nil, err - } - } - numBlocks = (numBlocks + layout.hashesPerBlock() - 1) / layout.hashesPerBlock() - } - descriptor.RootHash = root - return hashData([]byte(descriptor.String()), params.HashAlgorithms) -} - -// VerifyParams contains the params used to verify a portion of a file against -// a Merkle tree. -type VerifyParams struct { - // Out will be filled with verified data. - Out io.Writer - // File is a handler on the file to be verified. - File io.ReaderAt - // tree is a handler on the Merkle tree used to verify file. - Tree io.ReaderAt - // Size is the size of the file. - Size int64 - // Name is the name of the target file. - Name string - // Mode is the mode of the target file. - Mode uint32 - // UID is the user ID of the target file. - UID uint32 - // GID is the group ID of the target file. - GID uint32 - // Children is a map of children names for a directory. It should be - // empty for a regular file. - Children map[string]struct{} - // SymlinkTarget is the target path of a symlink file, or "" if the file is not a symlink. - SymlinkTarget string - // HashAlgorithms is the algorithms used to hash data. - HashAlgorithms int - // ReadOffset is the offset of the data range to be verified. - ReadOffset int64 - // ReadSize is the size of the data range to be verified. - ReadSize int64 - // Expected is a trusted hash for the file. It is compared with the - // calculated root hash to verify the content. - Expected []byte - // DataAndTreeInSameFile is true if data and Merkle tree are in the same - // file, or false if Merkle tree is a separate file from data. - DataAndTreeInSameFile bool -} - -// verifyMetadata verifies the metadata by hashing a descriptor that contains -// the metadata and compare the generated hash with expected. -// -// For verifyMetadata, params.data is not needed. It only accesses params.tree -// for the raw root hash. -func verifyMetadata(params *VerifyParams, layout *Layout) error { - var root []byte - // Only read the root hash if we expect that the file is not a symlink and its - // Merkle tree file is non-empty. - if params.Size != 0 && params.SymlinkTarget == "" { - root = make([]byte, layout.digestSize) - if _, err := params.Tree.ReadAt(root, layout.blockOffset(layout.rootLevel(), 0 /* index */)); err != nil { - return fmt.Errorf("failed to read root hash: %w", err) - } - } - descriptor := VerityDescriptor{ - Name: params.Name, - FileSize: params.Size, - Mode: params.Mode, - UID: params.UID, - GID: params.GID, - Children: params.Children, - SymlinkTarget: params.SymlinkTarget, - RootHash: root, - } - return descriptor.verify(params.Expected, params.HashAlgorithms) -} - -// Verify verifies the content read from data with offset. The content is -// verified against tree. If content spans across multiple blocks, each block is -// verified. Verification fails if the hash of the data does not match the tree -// at any level, or if the final root hash does not match expected. -// Once the data is verified, it will be written using params.Out. -// -// Verify checks for both target file content and metadata. If readSize is 0, -// only metadata is checked. -func Verify(params *VerifyParams) (int64, error) { - if params.ReadSize < 0 { - return 0, fmt.Errorf("unexpected read size: %d", params.ReadSize) - } - layout, err := InitLayout(int64(params.Size), params.HashAlgorithms, params.DataAndTreeInSameFile) - if err != nil { - return 0, err - } - if params.ReadSize == 0 { - return 0, verifyMetadata(params, &layout) - } - - // Calculate the index of blocks that includes the target range in input - // data. - firstDataBlock := params.ReadOffset / layout.blockSize - lastDataBlock := (params.ReadOffset + params.ReadSize - 1) / layout.blockSize - - buf := make([]byte, layout.blockSize) - var readErr error - total := int64(0) - for i := firstDataBlock; i <= lastDataBlock; i++ { - // Read a block that includes all or part of target range in - // input data. - bytesRead, err := params.File.ReadAt(buf, i*layout.blockSize) - readErr = err - // If at the end of input data and all previous blocks are - // verified, return the verified input data and EOF. - if readErr == io.EOF && bytesRead == 0 { - break - } - if readErr != nil && readErr != io.EOF { - return 0, fmt.Errorf("read from data failed: %w", err) - } - // If this is the end of file, zero the remaining bytes in buf, - // otherwise they are still from the previous block. - // TODO(b/162908070): Investigate possible issues with zero - // padding the data. - if bytesRead < len(buf) { - for j := bytesRead; j < len(buf); j++ { - buf[j] = 0 - } - } - descriptor := VerityDescriptor{ - Name: params.Name, - FileSize: params.Size, - Mode: params.Mode, - UID: params.UID, - GID: params.GID, - SymlinkTarget: params.SymlinkTarget, - Children: params.Children, - } - if err := verifyBlock(params.Tree, &descriptor, &layout, buf, i, params.HashAlgorithms, params.Expected); err != nil { - return 0, err - } - - // startOff is the beginning of the read range within the - // current data block. Note that for all blocks other than the - // first, startOff should be 0. - startOff := int64(0) - if i == firstDataBlock { - startOff = params.ReadOffset % layout.blockSize - } - // endOff is the end of the read range within the current data - // block. Note that for all blocks other than the last, endOff - // should be the block size. - endOff := layout.blockSize - if i == lastDataBlock { - endOff = (params.ReadOffset+params.ReadSize-1)%layout.blockSize + 1 - } - // If the provided size exceeds the end of input data, we should - // only copy the parts in buf that's part of input data. - if startOff > int64(bytesRead) { - startOff = int64(bytesRead) - } - if endOff > int64(bytesRead) { - endOff = int64(bytesRead) - } - n, err := params.Out.Write(buf[startOff:endOff]) - if err != nil { - return total, err - } - total += int64(n) - - } - return total, readErr -} - -// verifyBlock verifies a block against tree. index is the number of block in -// original data. The block is verified through each level of the tree. It -// fails if the calculated hash from block is different from any level of -// hashes stored in tree. And the final root hash is compared with -// expected. -func verifyBlock(tree io.ReaderAt, descriptor *VerityDescriptor, layout *Layout, dataBlock []byte, blockIndex int64, hashAlgorithms int, expected []byte) error { - if len(dataBlock) != int(layout.blockSize) { - return fmt.Errorf("incorrect block size") - } - - expectedDigest := make([]byte, layout.digestSize) - treeBlock := make([]byte, layout.blockSize) - var digest []byte - for level := 0; level < layout.numLevels(); level++ { - // Calculate hash. - if level == 0 { - h, err := hashData(dataBlock, hashAlgorithms) - if err != nil { - return err - } - digest = h - } else { - // Read a block in previous level that contains the - // hash we just generated, and generate a next level - // hash from it. - if _, err := tree.ReadAt(treeBlock, layout.blockOffset(level-1, blockIndex)); err != nil { - return err - } - h, err := hashData(treeBlock, hashAlgorithms) - if err != nil { - return err - } - digest = h - } - - // Read the digest for the current block and store in - // expectedDigest. - if _, err := tree.ReadAt(expectedDigest, layout.digestOffset(level, blockIndex)); err != nil { - return err - } - - if !bytes.Equal(digest, expectedDigest) { - return fmt.Errorf("verification failed") - } - blockIndex = blockIndex / layout.hashesPerBlock() - } - - // Verification for the tree succeeded. Now hash the descriptor with - // the root hash and compare it with expected. - descriptor.RootHash = digest - return descriptor.verify(expected, hashAlgorithms) -} |