From 1b938ba72c79c6e650c7406fcf26e6fdbc219fc6 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Mon, 17 Aug 2020 15:53:58 -0700 Subject: Add Verify in merkle tree library Verify checks input data against the merkle tree, and compares the root hash with expectation. PiperOrigin-RevId: 327116711 --- pkg/merkletree/merkletree.go | 259 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 219 insertions(+), 40 deletions(-) (limited to 'pkg/merkletree/merkletree.go') diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go index 906f67943..955c9c473 100644 --- a/pkg/merkletree/merkletree.go +++ b/pkg/merkletree/merkletree.go @@ -16,7 +16,9 @@ package merkletree import ( + "bytes" "crypto/sha256" + "fmt" "io" "gvisor.dev/gvisor/pkg/usermem" @@ -27,50 +29,78 @@ const ( sha256DigestSize = 32 ) -// Size defines the scale of a Merkle tree. -type Size struct { +// Layout defines the scale of a Merkle tree. +type Layout struct { // blockSize is the size of a data block to be hashed. blockSize int64 // digestSize is the size of a generated hash. digestSize int64 - // hashesPerBlock is the number of hashes in a block. For example, if - // blockSize is 4096 bytes, and digestSize is 32 bytes, there will be 128 - // hashesPerBlock. Therefore 128 hashes in a lower level will be put into a - // block and generate a single hash in an upper level. - hashesPerBlock int64 - // levelStart is the start block index of each level. The number of levels in - // the tree is the length of the slice. The leafs (level 0) are hashes of - // blocks in the input data. The levels above are hashes of lower level - // hashes. The highest level is the root hash. - levelStart []int64 + // levelOffset contains the offset of the begnning of each level in + // bytes. The number of levels in the tree is the length of the slice. + // The leaf nodes (level 0) contain hashes of blocks of the input data. + // Each level N contains hashes of the blocks in level N-1. The highest + // level is the root hash. + levelOffset []int64 } -// MakeSize initializes and returns a new Size object describing the structure -// of a tree. dataSize specifies the number of the file system size in bytes. -func MakeSize(dataSize int64) Size { - size := Size{ +// InitLayout initializes and returns a new Layout object describing the structure +// of a tree. dataSize specifies the size of input data in bytes. +func InitLayout(dataSize int64) Layout { + layout := Layout{ blockSize: usermem.PageSize, // TODO(b/156980949): Allow config other hash methods (SHA384/SHA512). - digestSize: sha256DigestSize, - hashesPerBlock: usermem.PageSize / sha256DigestSize, + digestSize: sha256DigestSize, } - numBlocks := (dataSize + size.blockSize - 1) / size.blockSize - level := int64(0) + numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize + level := 0 offset := int64(0) - // Calcuate the number of levels in the Merkle tree and the beginning offset - // of each level. Level 0 is the level directly above the data blocks, while - // level NumLevels - 1 is the root. + // Calculate the number of levels in the Merkle tree and the beginning + // offset of each level. Level 0 consists of the leaf nodes that + // contain the hashes of the data blocks, while level numLevels - 1 is + // the root. for numBlocks > 1 { - size.levelStart = append(size.levelStart, offset) + layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) // Round numBlocks up to fill up a block. - numBlocks += (size.hashesPerBlock - numBlocks%size.hashesPerBlock) % size.hashesPerBlock - offset += numBlocks / size.hashesPerBlock - numBlocks = numBlocks / size.hashesPerBlock + numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock() + offset += numBlocks / layout.hashesPerBlock() + numBlocks = numBlocks / layout.hashesPerBlock() level++ } - size.levelStart = append(size.levelStart, offset) - return size + layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) + return layout +} + +// hashesPerBlock() returns the number of digests in each block. For example, +// if blockSize is 4096 bytes, and digestSize is 32 bytes, there will be 128 +// hashesPerBlock. Therefore 128 hashes in one level will be combined in one +// hash in the level above. +func (layout Layout) hashesPerBlock() int64 { + return layout.blockSize / layout.digestSize +} + +// numLevels returns the total number of levels in the Merkle tree. +func (layout Layout) numLevels() int { + return len(layout.levelOffset) +} + +// rootLevel returns the level of the root hash. +func (layout Layout) rootLevel() int { + return layout.numLevels() - 1 +} + +// digestOffset finds the offset of a digest from the beginning of the tree. +// The target digest is at level of the tree, with index from the beginning of +// the current level. +func (layout Layout) digestOffset(level int, index int64) int64 { + return layout.levelOffset[level] + index*layout.digestSize +} + +// blockOffset finds the offset of a block from the beginning of the tree. The +// target block is at level of the tree, with index from the beginning of the +// current level. +func (layout Layout) blockOffset(level int, index int64) int64 { + return layout.levelOffset[level] + index*layout.blockSize } // Generate constructs a Merkle tree for the contents of data. The output is @@ -78,21 +108,21 @@ func MakeSize(dataSize int64) Size { // it has been written. That is, treeWriter and treeReader should point to the // same underlying data but have separate cursors. func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter io.Writer) ([]byte, error) { - size := MakeSize(dataSize) + layout := InitLayout(dataSize) - numBlocks := (dataSize + size.blockSize - 1) / size.blockSize + numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize var root []byte - for level := 0; level < len(size.levelStart); level++ { + for level := 0; level < layout.numLevels(); level++ { for i := int64(0); i < numBlocks; i++ { - buf := make([]byte, size.blockSize) + buf := make([]byte, layout.blockSize) var ( n int err error ) if level == 0 { - // Read data block from the target file since level 0 is directly above - // the raw data block. + // Read data block from the target file since level 0 includes hashes + // of blocks in the input data. n, err = data.Read(buf) } else { // Read data block from the tree file since levels higher than 0 are @@ -112,7 +142,7 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i // Hash the bytes in buf. digest := sha256.Sum256(buf) - if level == len(size.levelStart)-1 { + if level == layout.rootLevel() { root = digest[:] } @@ -121,15 +151,164 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i return nil, err } } - // If the genereated digests do not round up to a block, zero-padding the + // If the generated digests do not round up to a block, zero-padding the // remaining of the last block. But no need to do so for root. - if level != len(size.levelStart)-1 && numBlocks%size.hashesPerBlock != 0 { - zeroBuf := make([]byte, size.blockSize-(numBlocks%size.hashesPerBlock)*size.digestSize) + if level != layout.rootLevel() && numBlocks%layout.hashesPerBlock() != 0 { + zeroBuf := make([]byte, layout.blockSize-(numBlocks%layout.hashesPerBlock())*layout.digestSize) if _, err := treeWriter.Write(zeroBuf[:]); err != nil { return nil, err } } - numBlocks = (numBlocks + size.hashesPerBlock - 1) / size.hashesPerBlock + numBlocks = (numBlocks + layout.hashesPerBlock() - 1) / layout.hashesPerBlock() } return root, nil } + +// Verify verifies the content read from data with offset. The content is +// verified against tree. If content spans across multiple blocks, each block is +// verified. Verification fails if the hash of the data does not match the tree +// at any level, or if the final root hash does not match expectedRoot. +// Once the data is verified, it will be written using w. +// Verify will modify the cursor for data, but always restores it to its +// original position upon exit. The cursor for tree is modified and not +// restored. +func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte) error { + if readSize <= 0 { + return fmt.Errorf("Unexpected read size: %d", readSize) + } + layout := InitLayout(int64(dataSize)) + + // Calculate the index of blocks that includes the target range in input + // data. + firstDataBlock := readOffset / layout.blockSize + lastDataBlock := (readOffset + readSize - 1) / layout.blockSize + + // Store the current offset, so we can set it back once verification + // finishes. + origOffset, err := data.Seek(0, io.SeekCurrent) + if err != nil { + return fmt.Errorf("Find current data offset failed: %v", err) + } + defer data.Seek(origOffset, io.SeekStart) + + // Move to the first block that contains target data. + if _, err := data.Seek(firstDataBlock*layout.blockSize, io.SeekStart); err != nil { + return fmt.Errorf("Seek to datablock start failed: %v", err) + } + + buf := make([]byte, layout.blockSize) + var readErr error + bytesRead := 0 + for i := firstDataBlock; i <= lastDataBlock; i++ { + // Read a block that includes all or part of target range in + // input data. + bytesRead, readErr = data.Read(buf) + // If at the end of input data and all previous blocks are + // verified, return the verified input data and EOF. + if readErr == io.EOF && bytesRead == 0 { + break + } + if readErr != nil && readErr != io.EOF { + return fmt.Errorf("Read from data failed: %v", err) + } + // If this is the end of file, zero the remaining bytes in buf, + // otherwise they are still from the previous block. + // TODO(b/162908070): Investigate possible issues with zero + // padding the data. + if bytesRead < len(buf) { + for j := bytesRead; j < len(buf); j++ { + buf[j] = 0 + } + } + if err := verifyBlock(tree, layout, buf, i, expectedRoot); err != nil { + return err + } + // startOff is the beginning of the read range within the + // current data block. Note that for all blocks other than the + // first, startOff should be 0. + startOff := int64(0) + if i == firstDataBlock { + startOff = readOffset % layout.blockSize + } + // endOff is the end of the read range within the current data + // block. Note that for all blocks other than the last, endOff + // should be the block size. + endOff := layout.blockSize + if i == lastDataBlock { + endOff = (readOffset+readSize-1)%layout.blockSize + 1 + } + // If the provided size exceeds the end of input data, we should + // only copy the parts in buf that's part of input data. + if startOff > int64(bytesRead) { + startOff = int64(bytesRead) + } + if endOff > int64(bytesRead) { + endOff = int64(bytesRead) + } + w.Write(buf[startOff:endOff]) + + } + return readErr +} + +// verifyBlock verifies a block against tree. index is the number of block in +// original data. The block is verified through each level of the tree. It +// fails if the calculated hash from block is different from any level of +// hashes stored in tree. And the final root hash is compared with +// expectedRoot. verifyBlock modifies the cursor for tree. Users needs to +// maintain the cursor if intended. +func verifyBlock(tree io.ReadSeeker, layout Layout, dataBlock []byte, blockIndex int64, expectedRoot []byte) error { + if len(dataBlock) != int(layout.blockSize) { + return fmt.Errorf("incorrect block size") + } + + expectedDigest := make([]byte, layout.digestSize) + treeBlock := make([]byte, layout.blockSize) + var digest []byte + for level := 0; level < layout.numLevels(); level++ { + // Calculate hash. + if level == 0 { + digestArray := sha256.Sum256(dataBlock) + digest = digestArray[:] + } else { + // Read a block in previous level that contains the + // hash we just generated, and generate a next level + // hash from it. + if _, err := tree.Seek(layout.blockOffset(level-1, blockIndex), io.SeekStart); err != nil { + return err + } + if _, err := tree.Read(treeBlock); err != nil { + return err + } + digestArray := sha256.Sum256(treeBlock) + digest = digestArray[:] + } + + // Move to stored hash for the current block, read the digest + // and store in expectedDigest. + if _, err := tree.Seek(layout.digestOffset(level, blockIndex), io.SeekStart); err != nil { + return err + } + if _, err := tree.Read(expectedDigest); err != nil { + return err + } + + if !bytes.Equal(digest, expectedDigest) { + return fmt.Errorf("Verification failed") + } + + // If this is the root layer, no need to generate next level + // hash. + if level == layout.rootLevel() { + break + } + blockIndex = blockIndex / layout.hashesPerBlock() + } + + // Verification for the tree succeeded. Now compare the root hash in the + // tree with expectedRoot. + if !bytes.Equal(digest[:], expectedRoot) { + return fmt.Errorf("Verification failed") + } + return nil +} -- cgit v1.2.3