From ee041b60bf87d78cf076c88b1fd92be5ec928374 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Mon, 24 Aug 2020 16:32:26 -0700 Subject: Add check for same source in merkle tree lib If the data is in the same Reader as the merkle tree, we should verify from the first layer in the tree, instead of from the beginning. PiperOrigin-RevId: 328230988 --- pkg/merkletree/merkletree.go | 61 +++++++-- pkg/merkletree/merkletree_test.go | 251 +++++++++++++++++++++++--------------- 2 files changed, 205 insertions(+), 107 deletions(-) diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go index 955c9c473..1a0477c6a 100644 --- a/pkg/merkletree/merkletree.go +++ b/pkg/merkletree/merkletree.go @@ -45,12 +45,25 @@ type Layout struct { // InitLayout initializes and returns a new Layout object describing the structure // of a tree. dataSize specifies the size of input data in bytes. -func InitLayout(dataSize int64) Layout { +func InitLayout(dataSize int64, dataAndTreeInSameFile bool) Layout { layout := Layout{ blockSize: usermem.PageSize, // TODO(b/156980949): Allow config other hash methods (SHA384/SHA512). digestSize: sha256DigestSize, } + + // treeStart is the offset (in bytes) of the first level of the tree in + // the file. If data and tree are in different files, treeStart should + // be zero. If data is in the same file as the tree, treeStart points + // to the block after the last data block (which may be zero-padded). + var treeStart int64 + if dataAndTreeInSameFile { + treeStart = dataSize + if dataSize%layout.blockSize != 0 { + treeStart += layout.blockSize - dataSize%layout.blockSize + } + } + numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize level := 0 offset := int64(0) @@ -60,14 +73,15 @@ func InitLayout(dataSize int64) Layout { // contain the hashes of the data blocks, while level numLevels - 1 is // the root. for numBlocks > 1 { - layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) + layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) // Round numBlocks up to fill up a block. numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock() offset += numBlocks / layout.hashesPerBlock() numBlocks = numBlocks / layout.hashesPerBlock() level++ } - layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) + layout.levelOffset = append(layout.levelOffset, treeStart+offset*layout.blockSize) + return layout } @@ -107,11 +121,44 @@ func (layout Layout) blockOffset(level int, index int64) int64 { // written to treeWriter. The treeReader should be able to read the tree after // it has been written. That is, treeWriter and treeReader should point to the // same underlying data but have separate cursors. -func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter io.Writer) ([]byte, error) { - layout := InitLayout(dataSize) +// Generate will modify the cursor for data, but always restores it to its +// original position upon exit. The cursor for tree is modified and not +// restored. +func Generate(data io.ReadSeeker, dataSize int64, treeReader io.ReadSeeker, treeWriter io.WriteSeeker, dataAndTreeInSameFile bool) ([]byte, error) { + layout := InitLayout(dataSize, dataAndTreeInSameFile) numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize + // If the data is in the same file as the tree, zero pad the last data + // block. + bytesInLastBlock := dataSize % layout.blockSize + if dataAndTreeInSameFile && bytesInLastBlock != 0 { + zeroBuf := make([]byte, layout.blockSize-bytesInLastBlock) + if _, err := treeWriter.Seek(0, io.SeekEnd); err != nil && err != io.EOF { + return nil, err + } + if _, err := treeWriter.Write(zeroBuf); err != nil { + return nil, err + } + } + + // Store the current offset, so we can set it back once verification + // finishes. + origOffset, err := data.Seek(0, io.SeekCurrent) + if err != nil { + return nil, err + } + defer data.Seek(origOffset, io.SeekStart) + + // Read from the beginning of both data and treeReader. + if _, err := data.Seek(0, io.SeekStart); err != nil && err != io.EOF { + return nil, err + } + + if _, err := treeReader.Seek(0, io.SeekStart); err != nil && err != io.EOF { + return nil, err + } + var root []byte for level := 0; level < layout.numLevels(); level++ { for i := int64(0); i < numBlocks; i++ { @@ -172,11 +219,11 @@ func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter i // Verify will modify the cursor for data, but always restores it to its // original position upon exit. The cursor for tree is modified and not // restored. -func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte) error { +func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte, dataAndTreeInSameFile bool) error { if readSize <= 0 { return fmt.Errorf("Unexpected read size: %d", readSize) } - layout := InitLayout(int64(dataSize)) + layout := InitLayout(int64(dataSize), dataAndTreeInSameFile) // Calculate the index of blocks that includes the target range in input // data. diff --git a/pkg/merkletree/merkletree_test.go b/pkg/merkletree/merkletree_test.go index 911f61df9..ad50ba5f6 100644 --- a/pkg/merkletree/merkletree_test.go +++ b/pkg/merkletree/merkletree_test.go @@ -27,80 +27,58 @@ import ( func TestLayout(t *testing.T) { testCases := []struct { - dataSize int64 - expectedLevelOffset []int64 + dataSize int64 + dataAndTreeInSameFile bool + expectedLevelOffset []int64 }{ { - dataSize: 100, - expectedLevelOffset: []int64{0}, + dataSize: 100, + dataAndTreeInSameFile: false, + expectedLevelOffset: []int64{0}, }, { - dataSize: 1000000, - expectedLevelOffset: []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize}, + dataSize: 100, + dataAndTreeInSameFile: true, + expectedLevelOffset: []int64{usermem.PageSize}, }, { - dataSize: 4096 * int64(usermem.PageSize), - expectedLevelOffset: []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize}, + dataSize: 1000000, + dataAndTreeInSameFile: false, + expectedLevelOffset: []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize}, }, - } - - for _, tc := range testCases { - t.Run(fmt.Sprintf("%d", tc.dataSize), func(t *testing.T) { - p := InitLayout(tc.dataSize) - if p.blockSize != int64(usermem.PageSize) { - t.Errorf("got blockSize %d, want %d", p.blockSize, usermem.PageSize) - } - if p.digestSize != sha256DigestSize { - t.Errorf("got digestSize %d, want %d", p.digestSize, sha256DigestSize) - } - if p.numLevels() != len(tc.expectedLevelOffset) { - t.Errorf("got levels %d, want %d", p.numLevels(), len(tc.expectedLevelOffset)) - } - for i := 0; i < p.numLevels() && i < len(tc.expectedLevelOffset); i++ { - if p.levelOffset[i] != tc.expectedLevelOffset[i] { - t.Errorf("got levelStart[%d] %d, want %d", i, p.levelOffset[i], tc.expectedLevelOffset[i]) - } - } - }) - } -} - -func TestGenerate(t *testing.T) { - // The input data has size dataSize. It starts with the data in startWith, - // and all other bytes are zeroes. - testCases := []struct { - data []byte - expectedRoot []byte - }{ { - data: bytes.Repeat([]byte{0}, usermem.PageSize), - expectedRoot: []byte{173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167}, - }, - { - data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1), - expectedRoot: []byte{62, 93, 40, 92, 161, 241, 30, 223, 202, 99, 39, 2, 132, 113, 240, 139, 117, 99, 79, 243, 54, 18, 100, 184, 141, 121, 238, 46, 149, 202, 203, 132}, + dataSize: 1000000, + dataAndTreeInSameFile: true, + expectedLevelOffset: []int64{245 * usermem.PageSize, 247 * usermem.PageSize, 248 * usermem.PageSize}, }, { - data: []byte{'a'}, - expectedRoot: []byte{52, 75, 204, 142, 172, 129, 37, 14, 145, 137, 103, 203, 11, 162, 209, 205, 30, 169, 213, 72, 20, 28, 243, 24, 242, 2, 92, 43, 169, 59, 110, 210}, + dataSize: 4096 * int64(usermem.PageSize), + dataAndTreeInSameFile: false, + expectedLevelOffset: []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize}, }, { - data: bytes.Repeat([]byte{'a'}, usermem.PageSize), - expectedRoot: []byte{201, 62, 238, 45, 13, 176, 47, 16, 172, 199, 70, 13, 149, 118, 225, 34, 220, 248, 205, 83, 196, 191, 141, 252, 174, 27, 62, 116, 235, 207, 255, 90}, + dataSize: 4096 * int64(usermem.PageSize), + dataAndTreeInSameFile: true, + expectedLevelOffset: []int64{4096 * usermem.PageSize, 4128 * usermem.PageSize, 4129 * usermem.PageSize}, }, } for _, tc := range testCases { - t.Run(fmt.Sprintf("%d:%v", len(tc.data), tc.data[0]), func(t *testing.T) { - var tree bytes.Buffer - - root, err := Generate(bytes.NewBuffer(tc.data), int64(len(tc.data)), &tree, &tree) - if err != nil { - t.Fatalf("Generate failed: %v", err) + t.Run(fmt.Sprintf("%d", tc.dataSize), func(t *testing.T) { + l := InitLayout(tc.dataSize, tc.dataAndTreeInSameFile) + if l.blockSize != int64(usermem.PageSize) { + t.Errorf("got blockSize %d, want %d", l.blockSize, usermem.PageSize) } - - if !bytes.Equal(root, tc.expectedRoot) { - t.Errorf("Unexpected root") + if l.digestSize != sha256DigestSize { + t.Errorf("got digestSize %d, want %d", l.digestSize, sha256DigestSize) + } + if l.numLevels() != len(tc.expectedLevelOffset) { + t.Errorf("got levels %d, want %d", l.numLevels(), len(tc.expectedLevelOffset)) + } + for i := 0; i < l.numLevels() && i < len(tc.expectedLevelOffset); i++ { + if l.levelOffset[i] != tc.expectedLevelOffset[i] { + t.Errorf("got levelStart[%d] %d, want %d", i, l.levelOffset[i], tc.expectedLevelOffset[i]) + } } }) } @@ -151,6 +129,57 @@ func (brw *bytesReadWriter) Seek(offset int64, whence int) (int64, error) { return off, nil } +func TestGenerate(t *testing.T) { + // The input data has size dataSize. It starts with the data in startWith, + // and all other bytes are zeroes. + testCases := []struct { + data []byte + expectedRoot []byte + }{ + { + data: bytes.Repeat([]byte{0}, usermem.PageSize), + expectedRoot: []byte{173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167}, + }, + { + data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1), + expectedRoot: []byte{62, 93, 40, 92, 161, 241, 30, 223, 202, 99, 39, 2, 132, 113, 240, 139, 117, 99, 79, 243, 54, 18, 100, 184, 141, 121, 238, 46, 149, 202, 203, 132}, + }, + { + data: []byte{'a'}, + expectedRoot: []byte{52, 75, 204, 142, 172, 129, 37, 14, 145, 137, 103, 203, 11, 162, 209, 205, 30, 169, 213, 72, 20, 28, 243, 24, 242, 2, 92, 43, 169, 59, 110, 210}, + }, + { + data: bytes.Repeat([]byte{'a'}, usermem.PageSize), + expectedRoot: []byte{201, 62, 238, 45, 13, 176, 47, 16, 172, 199, 70, 13, 149, 118, 225, 34, 220, 248, 205, 83, 196, 191, 141, 252, 174, 27, 62, 116, 235, 207, 255, 90}, + }, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("%d:%v", len(tc.data), tc.data[0]), func(t *testing.T) { + for _, dataAndTreeInSameFile := range []bool{false, true} { + var tree bytesReadWriter + var root []byte + var err error + if dataAndTreeInSameFile { + tree.Write(tc.data) + root, err = Generate(&tree, int64(len(tc.data)), &tree, &tree, dataAndTreeInSameFile) + } else { + root, err = Generate(&bytesReadWriter{ + bytes: tc.data, + }, int64(len(tc.data)), &tree, &tree, dataAndTreeInSameFile) + } + if err != nil { + t.Fatalf("got err: %v, want nil", err) + } + + if !bytes.Equal(root, tc.expectedRoot) { + t.Errorf("got root: %v, want %v", root, tc.expectedRoot) + } + } + }) + } +} + func TestVerify(t *testing.T) { // The input data has size dataSize. The portion to be verified ranges from // verifyStart with verifySize. A bit is flipped in outOfRangeByteIndex to @@ -284,26 +313,37 @@ func TestVerify(t *testing.T) { data := make([]byte, tc.dataSize) // Generate random bytes in data. rand.Read(data) - var tree bytesReadWriter - - root, err := Generate(bytes.NewBuffer(data), int64(tc.dataSize), &tree, &tree) - if err != nil { - t.Fatalf("Generate failed: %v", err) - } - // Flip a bit in data and checks Verify results. - var buf bytes.Buffer - data[tc.modifyByte] ^= 1 - if tc.shouldSucceed { - if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err != nil && err != io.EOF { - t.Errorf("Verification failed when expected to succeed: %v", err) + for _, dataAndTreeInSameFile := range []bool{false, true} { + var tree bytesReadWriter + var root []byte + var err error + if dataAndTreeInSameFile { + tree.Write(data) + root, err = Generate(&tree, int64(len(data)), &tree, &tree, dataAndTreeInSameFile) + } else { + root, err = Generate(&bytesReadWriter{ + bytes: data, + }, int64(tc.dataSize), &tree, &tree, false /* dataAndTreeInSameFile */) } - if int64(buf.Len()) != tc.verifySize || !bytes.Equal(data[tc.verifyStart:tc.verifyStart+tc.verifySize], buf.Bytes()) { - t.Errorf("Incorrect output from Verify") + if err != nil { + t.Fatalf("Generate failed: %v", err) } - } else { - if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err == nil { - t.Errorf("Verification succeeded when expected to fail") + + // Flip a bit in data and checks Verify results. + var buf bytes.Buffer + data[tc.modifyByte] ^= 1 + if tc.shouldSucceed { + if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root, dataAndTreeInSameFile); err != nil && err != io.EOF { + t.Errorf("Verification failed when expected to succeed: %v", err) + } + if int64(buf.Len()) != tc.verifySize || !bytes.Equal(data[tc.verifyStart:tc.verifyStart+tc.verifySize], buf.Bytes()) { + t.Errorf("Incorrect output from Verify") + } + } else { + if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root, dataAndTreeInSameFile); err == nil { + t.Errorf("Verification succeeded when expected to fail") + } } } }) @@ -318,36 +358,47 @@ func TestVerifyRandom(t *testing.T) { data := make([]byte, dataSize) // Generate random bytes in data. rand.Read(data) - var tree bytesReadWriter - root, err := Generate(bytes.NewBuffer(data), int64(dataSize), &tree, &tree) - if err != nil { - t.Fatalf("Generate failed: %v", err) - } + for _, dataAndTreeInSameFile := range []bool{false, true} { + var tree bytesReadWriter + var root []byte + var err error + if dataAndTreeInSameFile { + tree.Write(data) + root, err = Generate(&tree, int64(len(data)), &tree, &tree, dataAndTreeInSameFile) + } else { + root, err = Generate(&bytesReadWriter{ + bytes: data, + }, int64(dataSize), &tree, &tree, dataAndTreeInSameFile) + } + if err != nil { + t.Fatalf("Generate failed: %v", err) + } - // Pick a random portion of data. - start := rand.Int63n(dataSize - 1) - size := rand.Int63n(dataSize) + 1 + // Pick a random portion of data. + start := rand.Int63n(dataSize - 1) + size := rand.Int63n(dataSize) + 1 - var buf bytes.Buffer - // Checks that the random portion of data from the original data is - // verified successfully. - if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err != nil && err != io.EOF { - t.Errorf("Verification failed for correct data: %v", err) - } - if size > dataSize-start { - size = dataSize - start - } - if int64(buf.Len()) != size || !bytes.Equal(data[start:start+size], buf.Bytes()) { - t.Errorf("Incorrect output from Verify") - } + var buf bytes.Buffer + // Checks that the random portion of data from the original data is + // verified successfully. + if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root, dataAndTreeInSameFile); err != nil && err != io.EOF { + t.Errorf("Verification failed for correct data: %v", err) + } + if size > dataSize-start { + size = dataSize - start + } + if int64(buf.Len()) != size || !bytes.Equal(data[start:start+size], buf.Bytes()) { + t.Errorf("Incorrect output from Verify") + } - buf.Reset() - // Flip a random bit in randPortion, and check that verification fails. - randBytePos := rand.Int63n(size) - data[start+randBytePos] ^= 1 + buf.Reset() + // Flip a random bit in randPortion, and check that verification fails. + randBytePos := rand.Int63n(size) + data[start+randBytePos] ^= 1 - if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err == nil { - t.Errorf("Verification succeeded for modified data") + if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root, dataAndTreeInSameFile); err == nil { + t.Errorf("Verification succeeded for modified data") + } } } -- cgit v1.2.3