diff options
Diffstat (limited to 'pkg/merkletree/merkletree.go')
-rw-r--r-- | pkg/merkletree/merkletree.go | 274 |
1 files changed, 199 insertions, 75 deletions
diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go index 955c9c473..d8227b8bd 100644 --- a/pkg/merkletree/merkletree.go +++ b/pkg/merkletree/merkletree.go @@ -29,13 +29,19 @@ const ( sha256DigestSize = 32 ) +// DigestSize returns the size (in bytes) of a digest. +// TODO(b/156980949): Allow config other hash methods (SHA384/SHA512). +func DigestSize() int { + return sha256DigestSize +} + // Layout defines the scale of a Merkle tree. type Layout struct { // blockSize is the size of a data block to be hashed. blockSize int64 // digestSize is the size of a generated hash. digestSize int64 - // levelOffset contains the offset of the begnning of each level in + // levelOffset contains the offset of the beginning of each level in // bytes. The number of levels in the tree is the length of the slice. // The leaf nodes (level 0) contain hashes of blocks of the input data. // Each level N contains hashes of the blocks in level N-1. The highest @@ -45,12 +51,25 @@ type Layout struct { // InitLayout initializes and returns a new Layout object describing the structure // of a tree. dataSize specifies the size of input data in bytes. -func InitLayout(dataSize int64) Layout { +func InitLayout(dataSize int64, dataAndTreeInSameFile bool) Layout { layout := Layout{ blockSize: usermem.PageSize, // TODO(b/156980949): Allow config other hash methods (SHA384/SHA512). digestSize: sha256DigestSize, } + + // treeStart is the offset (in bytes) of the first level of the tree in + // the file. If data and tree are in different files, treeStart should + // be zero. If data is in the same file as the tree, treeStart points + // to the block after the last data block (which may be zero-padded). + var treeStart int64 + if dataAndTreeInSameFile { + treeStart = dataSize + if dataSize%layout.blockSize != 0 { + treeStart += layout.blockSize - dataSize%layout.blockSize + } + } + numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize level := 0 offset := int64(0) @@ -60,14 +79,15 @@ func InitLayout(dataSize int64) Layout { // contain the hashes of the data blocks, while level numLevels - 1 is // the root. for numBlocks > 1 { - layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) + layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) // Round numBlocks up to fill up a block. numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock() offset += numBlocks / layout.hashesPerBlock() numBlocks = numBlocks / layout.hashesPerBlock() level++ } - layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) + layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) + return layout } @@ -103,14 +123,72 @@ func (layout Layout) blockOffset(level int, index int64) int64 { return layout.levelOffset[level] + index*layout.blockSize } -// Generate constructs a Merkle tree for the contents of data. The output is -// written to treeWriter. The treeReader should be able to read the tree after -// it has been written. That is, treeWriter and treeReader should point to the -// same underlying data but have separate cursors. -func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter io.Writer) ([]byte, error) { - layout := InitLayout(dataSize) +// VerityDescriptor is a struct that is serialized and hashed to get a file's +// root hash, which contains the root hash of the raw content and the file's +// meatadata. +type VerityDescriptor struct { + Name string + Mode uint32 + UID uint32 + GID uint32 + RootHash []byte +} - numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize +func (d *VerityDescriptor) String() string { + return fmt.Sprintf("Name: %s, Mode: %d, UID: %d, GID: %d, RootHash: %v", d.Name, d.Mode, d.UID, d.GID, d.RootHash) +} + +// verify generates a hash from d, and compares it with expected. +func (d *VerityDescriptor) verify(expected []byte) error { + h := sha256.Sum256([]byte(d.String())) + if !bytes.Equal(h[:], expected) { + return fmt.Errorf("unexpected root hash") + } + return nil +} + +// GenerateParams contains the parameters used to generate a Merkle tree. +type GenerateParams struct { + // File is a reader of the file to be hashed. + File io.ReaderAt + // Size is the size of the file. + Size int64 + // Name is the name of the target file. + Name string + // Mode is the mode of the target file. + Mode uint32 + // UID is the user ID of the target file. + UID uint32 + // GID is the group ID of the target file. + GID uint32 + // TreeReader is a reader for the Merkle tree. + TreeReader io.ReaderAt + // TreeWriter is a writer for the Merkle tree. + TreeWriter io.Writer + // DataAndTreeInSameFile is true if data and Merkle tree are in the same + // file, or false if Merkle tree is a separate file from data. + DataAndTreeInSameFile bool +} + +// Generate constructs a Merkle tree for the contents of params.File. The +// output is written to params.TreeWriter. +// +// Generate returns a hash of a VerityDescriptor, which contains the file +// metadata and the hash from file content. +func Generate(params *GenerateParams) ([]byte, error) { + layout := InitLayout(params.Size, params.DataAndTreeInSameFile) + + numBlocks := (params.Size + layout.blockSize - 1) / layout.blockSize + + // If the data is in the same file as the tree, zero pad the last data + // block. + bytesInLastBlock := params.Size % layout.blockSize + if params.DataAndTreeInSameFile && bytesInLastBlock != 0 { + zeroBuf := make([]byte, layout.blockSize-bytesInLastBlock) + if _, err := params.TreeWriter.Write(zeroBuf); err != nil { + return nil, err + } + } var root []byte for level := 0; level < layout.numLevels(); level++ { @@ -123,11 +201,11 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i if level == 0 { // Read data block from the target file since level 0 includes hashes // of blocks in the input data. - n, err = data.Read(buf) + n, err = params.File.ReadAt(buf, i*layout.blockSize) } else { // Read data block from the tree file since levels higher than 0 are // hashing the lower level hashes. - n, err = treeReader.Read(buf) + n, err = params.TreeReader.ReadAt(buf, layout.blockOffset(level-1, i)) } // err is populated as long as the bytes read is smaller than the buffer @@ -147,7 +225,7 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i } // Write the generated hash to the end of the tree file. - if _, err = treeWriter.Write(digest[:]); err != nil { + if _, err = params.TreeWriter.Write(digest[:]); err != nil { return nil, err } } @@ -155,61 +233,111 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i // remaining of the last block. But no need to do so for root. if level != layout.rootLevel() && numBlocks%layout.hashesPerBlock() != 0 { zeroBuf := make([]byte, layout.blockSize-(numBlocks%layout.hashesPerBlock())*layout.digestSize) - if _, err := treeWriter.Write(zeroBuf[:]); err != nil { + if _, err := params.TreeWriter.Write(zeroBuf[:]); err != nil { return nil, err } } numBlocks = (numBlocks + layout.hashesPerBlock() - 1) / layout.hashesPerBlock() } - return root, nil + descriptor := VerityDescriptor{ + Name: params.Name, + Mode: params.Mode, + UID: params.UID, + GID: params.GID, + RootHash: root, + } + ret := sha256.Sum256([]byte(descriptor.String())) + return ret[:], nil +} + +// VerifyParams contains the params used to verify a portion of a file against +// a Merkle tree. +type VerifyParams struct { + // Out will be filled with verified data. + Out io.Writer + // File is a handler on the file to be verified. + File io.ReaderAt + // tree is a handler on the Merkle tree used to verify file. + Tree io.ReaderAt + // Size is the size of the file. + Size int64 + // Name is the name of the target file. + Name string + // Mode is the mode of the target file. + Mode uint32 + // UID is the user ID of the target file. + UID uint32 + // GID is the group ID of the target file. + GID uint32 + // ReadOffset is the offset of the data range to be verified. + ReadOffset int64 + // ReadSize is the size of the data range to be verified. + ReadSize int64 + // Expected is a trusted hash for the file. It is compared with the + // calculated root hash to verify the content. + Expected []byte + // DataAndTreeInSameFile is true if data and Merkle tree are in the same + // file, or false if Merkle tree is a separate file from data. + DataAndTreeInSameFile bool +} + +// verifyMetadata verifies the metadata by hashing a descriptor that contains +// the metadata and compare the generated hash with expected. +// +// For verifyMetadata, params.data is not needed. It only accesses params.tree +// for the raw root hash. +func verifyMetadata(params *VerifyParams, layout *Layout) error { + root := make([]byte, layout.digestSize) + if _, err := params.Tree.ReadAt(root, layout.blockOffset(layout.rootLevel(), 0 /* index */)); err != nil { + return fmt.Errorf("failed to read root hash: %w", err) + } + descriptor := VerityDescriptor{ + Name: params.Name, + Mode: params.Mode, + UID: params.UID, + GID: params.GID, + RootHash: root, + } + return descriptor.verify(params.Expected) } // Verify verifies the content read from data with offset. The content is // verified against tree. If content spans across multiple blocks, each block is // verified. Verification fails if the hash of the data does not match the tree -// at any level, or if the final root hash does not match expectedRoot. -// Once the data is verified, it will be written using w. -// Verify will modify the cursor for data, but always restores it to its -// original position upon exit. The cursor for tree is modified and not -// restored. -func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte) error { - if readSize <= 0 { - return fmt.Errorf("Unexpected read size: %d", readSize) +// at any level, or if the final root hash does not match expected. +// Once the data is verified, it will be written using params.Out. +// +// Verify checks for both target file content and metadata. If readSize is 0, +// only metadata is checked. +func Verify(params *VerifyParams) (int64, error) { + if params.ReadSize < 0 { + return 0, fmt.Errorf("unexpected read size: %d", params.ReadSize) + } + layout := InitLayout(int64(params.Size), params.DataAndTreeInSameFile) + if params.ReadSize == 0 { + return 0, verifyMetadata(params, &layout) } - layout := InitLayout(int64(dataSize)) // Calculate the index of blocks that includes the target range in input // data. - firstDataBlock := readOffset / layout.blockSize - lastDataBlock := (readOffset + readSize - 1) / layout.blockSize - - // Store the current offset, so we can set it back once verification - // finishes. - origOffset, err := data.Seek(0, io.SeekCurrent) - if err != nil { - return fmt.Errorf("Find current data offset failed: %v", err) - } - defer data.Seek(origOffset, io.SeekStart) - - // Move to the first block that contains target data. - if _, err := data.Seek(firstDataBlock*layout.blockSize, io.SeekStart); err != nil { - return fmt.Errorf("Seek to datablock start failed: %v", err) - } + firstDataBlock := params.ReadOffset / layout.blockSize + lastDataBlock := (params.ReadOffset + params.ReadSize - 1) / layout.blockSize buf := make([]byte, layout.blockSize) var readErr error - bytesRead := 0 + total := int64(0) for i := firstDataBlock; i <= lastDataBlock; i++ { // Read a block that includes all or part of target range in // input data. - bytesRead, readErr = data.Read(buf) + bytesRead, err := params.File.ReadAt(buf, i*layout.blockSize) + readErr = err // If at the end of input data and all previous blocks are // verified, return the verified input data and EOF. if readErr == io.EOF && bytesRead == 0 { break } if readErr != nil && readErr != io.EOF { - return fmt.Errorf("Read from data failed: %v", err) + return 0, fmt.Errorf("read from data failed: %w", err) } // If this is the end of file, zero the remaining bytes in buf, // otherwise they are still from the previous block. @@ -220,22 +348,29 @@ func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset in buf[j] = 0 } } - if err := verifyBlock(tree, layout, buf, i, expectedRoot); err != nil { - return err + descriptor := VerityDescriptor{ + Name: params.Name, + Mode: params.Mode, + UID: params.UID, + GID: params.GID, } + if err := verifyBlock(params.Tree, &descriptor, &layout, buf, i, params.Expected); err != nil { + return 0, err + } + // startOff is the beginning of the read range within the // current data block. Note that for all blocks other than the // first, startOff should be 0. startOff := int64(0) if i == firstDataBlock { - startOff = readOffset % layout.blockSize + startOff = params.ReadOffset % layout.blockSize } // endOff is the end of the read range within the current data // block. Note that for all blocks other than the last, endOff // should be the block size. endOff := layout.blockSize if i == lastDataBlock { - endOff = (readOffset+readSize-1)%layout.blockSize + 1 + endOff = (params.ReadOffset+params.ReadSize-1)%layout.blockSize + 1 } // If the provided size exceeds the end of input data, we should // only copy the parts in buf that's part of input data. @@ -245,19 +380,22 @@ func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset in if endOff > int64(bytesRead) { endOff = int64(bytesRead) } - w.Write(buf[startOff:endOff]) + n, err := params.Out.Write(buf[startOff:endOff]) + if err != nil { + return total, err + } + total += int64(n) } - return readErr + return total, readErr } // verifyBlock verifies a block against tree. index is the number of block in // original data. The block is verified through each level of the tree. It // fails if the calculated hash from block is different from any level of // hashes stored in tree. And the final root hash is compared with -// expectedRoot. verifyBlock modifies the cursor for tree. Users needs to -// maintain the cursor if intended. -func verifyBlock(tree io.ReadSeeker, layout Layout, dataBlock []byte, blockIndex int64, expectedRoot []byte) error { +// expected. +func verifyBlock(tree io.ReaderAt, descriptor *VerityDescriptor, layout *Layout, dataBlock []byte, blockIndex int64, expected []byte) error { if len(dataBlock) != int(layout.blockSize) { return fmt.Errorf("incorrect block size") } @@ -274,41 +412,27 @@ func verifyBlock(tree io.ReadSeeker, layout Layout, dataBlock []byte, blockIndex // Read a block in previous level that contains the // hash we just generated, and generate a next level // hash from it. - if _, err := tree.Seek(layout.blockOffset(level-1, blockIndex), io.SeekStart); err != nil { - return err - } - if _, err := tree.Read(treeBlock); err != nil { + if _, err := tree.ReadAt(treeBlock, layout.blockOffset(level-1, blockIndex)); err != nil { return err } digestArray := sha256.Sum256(treeBlock) digest = digestArray[:] } - // Move to stored hash for the current block, read the digest - // and store in expectedDigest. - if _, err := tree.Seek(layout.digestOffset(level, blockIndex), io.SeekStart); err != nil { - return err - } - if _, err := tree.Read(expectedDigest); err != nil { + // Read the digest for the current block and store in + // expectedDigest. + if _, err := tree.ReadAt(expectedDigest, layout.digestOffset(level, blockIndex)); err != nil { return err } if !bytes.Equal(digest, expectedDigest) { - return fmt.Errorf("Verification failed") - } - - // If this is the root layer, no need to generate next level - // hash. - if level == layout.rootLevel() { - break + return fmt.Errorf("verification failed") } blockIndex = blockIndex / layout.hashesPerBlock() } - // Verification for the tree succeeded. Now compare the root hash in the - // tree with expectedRoot. - if !bytes.Equal(digest[:], expectedRoot) { - return fmt.Errorf("Verification failed") - } - return nil + // Verification for the tree succeeded. Now hash the descriptor with + // the root hash and compare it with expected. + descriptor.RootHash = digest + return descriptor.verify(expected) } |