summaryrefslogtreecommitdiffhomepage
path: root/pkg/merkletree/merkletree.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/merkletree/merkletree.go')
-rw-r--r--pkg/merkletree/merkletree.go274
1 files changed, 199 insertions, 75 deletions
diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go
index 955c9c473..d8227b8bd 100644
--- a/pkg/merkletree/merkletree.go
+++ b/pkg/merkletree/merkletree.go
@@ -29,13 +29,19 @@ const (
sha256DigestSize = 32
)
+// DigestSize returns the size (in bytes) of a digest.
+// TODO(b/156980949): Allow config other hash methods (SHA384/SHA512).
+func DigestSize() int {
+ return sha256DigestSize
+}
+
// Layout defines the scale of a Merkle tree.
type Layout struct {
// blockSize is the size of a data block to be hashed.
blockSize int64
// digestSize is the size of a generated hash.
digestSize int64
- // levelOffset contains the offset of the begnning of each level in
+ // levelOffset contains the offset of the beginning of each level in
// bytes. The number of levels in the tree is the length of the slice.
// The leaf nodes (level 0) contain hashes of blocks of the input data.
// Each level N contains hashes of the blocks in level N-1. The highest
@@ -45,12 +51,25 @@ type Layout struct {
// InitLayout initializes and returns a new Layout object describing the structure
// of a tree. dataSize specifies the size of input data in bytes.
-func InitLayout(dataSize int64) Layout {
+func InitLayout(dataSize int64, dataAndTreeInSameFile bool) Layout {
layout := Layout{
blockSize: usermem.PageSize,
// TODO(b/156980949): Allow config other hash methods (SHA384/SHA512).
digestSize: sha256DigestSize,
}
+
+ // treeStart is the offset (in bytes) of the first level of the tree in
+ // the file. If data and tree are in different files, treeStart should
+ // be zero. If data is in the same file as the tree, treeStart points
+ // to the block after the last data block (which may be zero-padded).
+ var treeStart int64
+ if dataAndTreeInSameFile {
+ treeStart = dataSize
+ if dataSize%layout.blockSize != 0 {
+ treeStart += layout.blockSize - dataSize%layout.blockSize
+ }
+ }
+
numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize
level := 0
offset := int64(0)
@@ -60,14 +79,15 @@ func InitLayout(dataSize int64) Layout {
// contain the hashes of the data blocks, while level numLevels - 1 is
// the root.
for numBlocks > 1 {
- layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize)
+ layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize)
// Round numBlocks up to fill up a block.
numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock()
offset += numBlocks / layout.hashesPerBlock()
numBlocks = numBlocks / layout.hashesPerBlock()
level++
}
- layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize)
+ layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize)
+
return layout
}
@@ -103,14 +123,72 @@ func (layout Layout) blockOffset(level int, index int64) int64 {
return layout.levelOffset[level] + index*layout.blockSize
}
-// Generate constructs a Merkle tree for the contents of data. The output is
-// written to treeWriter. The treeReader should be able to read the tree after
-// it has been written. That is, treeWriter and treeReader should point to the
-// same underlying data but have separate cursors.
-func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter io.Writer) ([]byte, error) {
- layout := InitLayout(dataSize)
+// VerityDescriptor is a struct that is serialized and hashed to get a file's
+// root hash, which contains the root hash of the raw content and the file's
+// meatadata.
+type VerityDescriptor struct {
+ Name string
+ Mode uint32
+ UID uint32
+ GID uint32
+ RootHash []byte
+}
- numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize
+func (d *VerityDescriptor) String() string {
+ return fmt.Sprintf("Name: %s, Mode: %d, UID: %d, GID: %d, RootHash: %v", d.Name, d.Mode, d.UID, d.GID, d.RootHash)
+}
+
+// verify generates a hash from d, and compares it with expected.
+func (d *VerityDescriptor) verify(expected []byte) error {
+ h := sha256.Sum256([]byte(d.String()))
+ if !bytes.Equal(h[:], expected) {
+ return fmt.Errorf("unexpected root hash")
+ }
+ return nil
+}
+
+// GenerateParams contains the parameters used to generate a Merkle tree.
+type GenerateParams struct {
+ // File is a reader of the file to be hashed.
+ File io.ReaderAt
+ // Size is the size of the file.
+ Size int64
+ // Name is the name of the target file.
+ Name string
+ // Mode is the mode of the target file.
+ Mode uint32
+ // UID is the user ID of the target file.
+ UID uint32
+ // GID is the group ID of the target file.
+ GID uint32
+ // TreeReader is a reader for the Merkle tree.
+ TreeReader io.ReaderAt
+ // TreeWriter is a writer for the Merkle tree.
+ TreeWriter io.Writer
+ // DataAndTreeInSameFile is true if data and Merkle tree are in the same
+ // file, or false if Merkle tree is a separate file from data.
+ DataAndTreeInSameFile bool
+}
+
+// Generate constructs a Merkle tree for the contents of params.File. The
+// output is written to params.TreeWriter.
+//
+// Generate returns a hash of a VerityDescriptor, which contains the file
+// metadata and the hash from file content.
+func Generate(params *GenerateParams) ([]byte, error) {
+ layout := InitLayout(params.Size, params.DataAndTreeInSameFile)
+
+ numBlocks := (params.Size + layout.blockSize - 1) / layout.blockSize
+
+ // If the data is in the same file as the tree, zero pad the last data
+ // block.
+ bytesInLastBlock := params.Size % layout.blockSize
+ if params.DataAndTreeInSameFile && bytesInLastBlock != 0 {
+ zeroBuf := make([]byte, layout.blockSize-bytesInLastBlock)
+ if _, err := params.TreeWriter.Write(zeroBuf); err != nil {
+ return nil, err
+ }
+ }
var root []byte
for level := 0; level < layout.numLevels(); level++ {
@@ -123,11 +201,11 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i
if level == 0 {
// Read data block from the target file since level 0 includes hashes
// of blocks in the input data.
- n, err = data.Read(buf)
+ n, err = params.File.ReadAt(buf, i*layout.blockSize)
} else {
// Read data block from the tree file since levels higher than 0 are
// hashing the lower level hashes.
- n, err = treeReader.Read(buf)
+ n, err = params.TreeReader.ReadAt(buf, layout.blockOffset(level-1, i))
}
// err is populated as long as the bytes read is smaller than the buffer
@@ -147,7 +225,7 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i
}
// Write the generated hash to the end of the tree file.
- if _, err = treeWriter.Write(digest[:]); err != nil {
+ if _, err = params.TreeWriter.Write(digest[:]); err != nil {
return nil, err
}
}
@@ -155,61 +233,111 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i
// remaining of the last block. But no need to do so for root.
if level != layout.rootLevel() && numBlocks%layout.hashesPerBlock() != 0 {
zeroBuf := make([]byte, layout.blockSize-(numBlocks%layout.hashesPerBlock())*layout.digestSize)
- if _, err := treeWriter.Write(zeroBuf[:]); err != nil {
+ if _, err := params.TreeWriter.Write(zeroBuf[:]); err != nil {
return nil, err
}
}
numBlocks = (numBlocks + layout.hashesPerBlock() - 1) / layout.hashesPerBlock()
}
- return root, nil
+ descriptor := VerityDescriptor{
+ Name: params.Name,
+ Mode: params.Mode,
+ UID: params.UID,
+ GID: params.GID,
+ RootHash: root,
+ }
+ ret := sha256.Sum256([]byte(descriptor.String()))
+ return ret[:], nil
+}
+
+// VerifyParams contains the params used to verify a portion of a file against
+// a Merkle tree.
+type VerifyParams struct {
+ // Out will be filled with verified data.
+ Out io.Writer
+ // File is a handler on the file to be verified.
+ File io.ReaderAt
+ // tree is a handler on the Merkle tree used to verify file.
+ Tree io.ReaderAt
+ // Size is the size of the file.
+ Size int64
+ // Name is the name of the target file.
+ Name string
+ // Mode is the mode of the target file.
+ Mode uint32
+ // UID is the user ID of the target file.
+ UID uint32
+ // GID is the group ID of the target file.
+ GID uint32
+ // ReadOffset is the offset of the data range to be verified.
+ ReadOffset int64
+ // ReadSize is the size of the data range to be verified.
+ ReadSize int64
+ // Expected is a trusted hash for the file. It is compared with the
+ // calculated root hash to verify the content.
+ Expected []byte
+ // DataAndTreeInSameFile is true if data and Merkle tree are in the same
+ // file, or false if Merkle tree is a separate file from data.
+ DataAndTreeInSameFile bool
+}
+
+// verifyMetadata verifies the metadata by hashing a descriptor that contains
+// the metadata and compare the generated hash with expected.
+//
+// For verifyMetadata, params.data is not needed. It only accesses params.tree
+// for the raw root hash.
+func verifyMetadata(params *VerifyParams, layout *Layout) error {
+ root := make([]byte, layout.digestSize)
+ if _, err := params.Tree.ReadAt(root, layout.blockOffset(layout.rootLevel(), 0 /* index */)); err != nil {
+ return fmt.Errorf("failed to read root hash: %w", err)
+ }
+ descriptor := VerityDescriptor{
+ Name: params.Name,
+ Mode: params.Mode,
+ UID: params.UID,
+ GID: params.GID,
+ RootHash: root,
+ }
+ return descriptor.verify(params.Expected)
}
// Verify verifies the content read from data with offset. The content is
// verified against tree. If content spans across multiple blocks, each block is
// verified. Verification fails if the hash of the data does not match the tree
-// at any level, or if the final root hash does not match expectedRoot.
-// Once the data is verified, it will be written using w.
-// Verify will modify the cursor for data, but always restores it to its
-// original position upon exit. The cursor for tree is modified and not
-// restored.
-func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte) error {
- if readSize <= 0 {
- return fmt.Errorf("Unexpected read size: %d", readSize)
+// at any level, or if the final root hash does not match expected.
+// Once the data is verified, it will be written using params.Out.
+//
+// Verify checks for both target file content and metadata. If readSize is 0,
+// only metadata is checked.
+func Verify(params *VerifyParams) (int64, error) {
+ if params.ReadSize < 0 {
+ return 0, fmt.Errorf("unexpected read size: %d", params.ReadSize)
+ }
+ layout := InitLayout(int64(params.Size), params.DataAndTreeInSameFile)
+ if params.ReadSize == 0 {
+ return 0, verifyMetadata(params, &layout)
}
- layout := InitLayout(int64(dataSize))
// Calculate the index of blocks that includes the target range in input
// data.
- firstDataBlock := readOffset / layout.blockSize
- lastDataBlock := (readOffset + readSize - 1) / layout.blockSize
-
- // Store the current offset, so we can set it back once verification
- // finishes.
- origOffset, err := data.Seek(0, io.SeekCurrent)
- if err != nil {
- return fmt.Errorf("Find current data offset failed: %v", err)
- }
- defer data.Seek(origOffset, io.SeekStart)
-
- // Move to the first block that contains target data.
- if _, err := data.Seek(firstDataBlock*layout.blockSize, io.SeekStart); err != nil {
- return fmt.Errorf("Seek to datablock start failed: %v", err)
- }
+ firstDataBlock := params.ReadOffset / layout.blockSize
+ lastDataBlock := (params.ReadOffset + params.ReadSize - 1) / layout.blockSize
buf := make([]byte, layout.blockSize)
var readErr error
- bytesRead := 0
+ total := int64(0)
for i := firstDataBlock; i <= lastDataBlock; i++ {
// Read a block that includes all or part of target range in
// input data.
- bytesRead, readErr = data.Read(buf)
+ bytesRead, err := params.File.ReadAt(buf, i*layout.blockSize)
+ readErr = err
// If at the end of input data and all previous blocks are
// verified, return the verified input data and EOF.
if readErr == io.EOF && bytesRead == 0 {
break
}
if readErr != nil && readErr != io.EOF {
- return fmt.Errorf("Read from data failed: %v", err)
+ return 0, fmt.Errorf("read from data failed: %w", err)
}
// If this is the end of file, zero the remaining bytes in buf,
// otherwise they are still from the previous block.
@@ -220,22 +348,29 @@ func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset in
buf[j] = 0
}
}
- if err := verifyBlock(tree, layout, buf, i, expectedRoot); err != nil {
- return err
+ descriptor := VerityDescriptor{
+ Name: params.Name,
+ Mode: params.Mode,
+ UID: params.UID,
+ GID: params.GID,
}
+ if err := verifyBlock(params.Tree, &descriptor, &layout, buf, i, params.Expected); err != nil {
+ return 0, err
+ }
+
// startOff is the beginning of the read range within the
// current data block. Note that for all blocks other than the
// first, startOff should be 0.
startOff := int64(0)
if i == firstDataBlock {
- startOff = readOffset % layout.blockSize
+ startOff = params.ReadOffset % layout.blockSize
}
// endOff is the end of the read range within the current data
// block. Note that for all blocks other than the last, endOff
// should be the block size.
endOff := layout.blockSize
if i == lastDataBlock {
- endOff = (readOffset+readSize-1)%layout.blockSize + 1
+ endOff = (params.ReadOffset+params.ReadSize-1)%layout.blockSize + 1
}
// If the provided size exceeds the end of input data, we should
// only copy the parts in buf that's part of input data.
@@ -245,19 +380,22 @@ func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset in
if endOff > int64(bytesRead) {
endOff = int64(bytesRead)
}
- w.Write(buf[startOff:endOff])
+ n, err := params.Out.Write(buf[startOff:endOff])
+ if err != nil {
+ return total, err
+ }
+ total += int64(n)
}
- return readErr
+ return total, readErr
}
// verifyBlock verifies a block against tree. index is the number of block in
// original data. The block is verified through each level of the tree. It
// fails if the calculated hash from block is different from any level of
// hashes stored in tree. And the final root hash is compared with
-// expectedRoot. verifyBlock modifies the cursor for tree. Users needs to
-// maintain the cursor if intended.
-func verifyBlock(tree io.ReadSeeker, layout Layout, dataBlock []byte, blockIndex int64, expectedRoot []byte) error {
+// expected.
+func verifyBlock(tree io.ReaderAt, descriptor *VerityDescriptor, layout *Layout, dataBlock []byte, blockIndex int64, expected []byte) error {
if len(dataBlock) != int(layout.blockSize) {
return fmt.Errorf("incorrect block size")
}
@@ -274,41 +412,27 @@ func verifyBlock(tree io.ReadSeeker, layout Layout, dataBlock []byte, blockIndex
// Read a block in previous level that contains the
// hash we just generated, and generate a next level
// hash from it.
- if _, err := tree.Seek(layout.blockOffset(level-1, blockIndex), io.SeekStart); err != nil {
- return err
- }
- if _, err := tree.Read(treeBlock); err != nil {
+ if _, err := tree.ReadAt(treeBlock, layout.blockOffset(level-1, blockIndex)); err != nil {
return err
}
digestArray := sha256.Sum256(treeBlock)
digest = digestArray[:]
}
- // Move to stored hash for the current block, read the digest
- // and store in expectedDigest.
- if _, err := tree.Seek(layout.digestOffset(level, blockIndex), io.SeekStart); err != nil {
- return err
- }
- if _, err := tree.Read(expectedDigest); err != nil {
+ // Read the digest for the current block and store in
+ // expectedDigest.
+ if _, err := tree.ReadAt(expectedDigest, layout.digestOffset(level, blockIndex)); err != nil {
return err
}
if !bytes.Equal(digest, expectedDigest) {
- return fmt.Errorf("Verification failed")
- }
-
- // If this is the root layer, no need to generate next level
- // hash.
- if level == layout.rootLevel() {
- break
+ return fmt.Errorf("verification failed")
}
blockIndex = blockIndex / layout.hashesPerBlock()
}
- // Verification for the tree succeeded. Now compare the root hash in the
- // tree with expectedRoot.
- if !bytes.Equal(digest[:], expectedRoot) {
- return fmt.Errorf("Verification failed")
- }
- return nil
+ // Verification for the tree succeeded. Now hash the descriptor with
+ // the root hash and compare it with expected.
+ descriptor.RootHash = digest
+ return descriptor.verify(expected)
}