diff options
author | Ian Lewis <ianmlewis@gmail.com> | 2020-08-17 21:44:31 -0400 |
---|---|---|
committer | Ian Lewis <ianmlewis@gmail.com> | 2020-08-17 21:44:31 -0400 |
commit | ac324f646ee3cb7955b0b45a7453aeb9671cbdf1 (patch) | |
tree | 0cbc5018e8807421d701d190dc20525726c7ca76 /pkg/merkletree | |
parent | 352ae1022ce19de28fc72e034cc469872ad79d06 (diff) | |
parent | 6d0c5803d557d453f15ac6f683697eeb46dab680 (diff) |
Merge branch 'master' into ip-forwarding
- Merges aleksej-paschenko's with HEAD
- Adds vfs2 support for ip_forward
Diffstat (limited to 'pkg/merkletree')
-rw-r--r-- | pkg/merkletree/BUILD | 17 | ||||
-rw-r--r-- | pkg/merkletree/merkletree.go | 314 | ||||
-rw-r--r-- | pkg/merkletree/merkletree_test.go | 353 |
3 files changed, 684 insertions, 0 deletions
diff --git a/pkg/merkletree/BUILD b/pkg/merkletree/BUILD new file mode 100644 index 000000000..a8fcb2e19 --- /dev/null +++ b/pkg/merkletree/BUILD @@ -0,0 +1,17 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "merkletree", + srcs = ["merkletree.go"], + visibility = ["//pkg/sentry:internal"], + deps = ["//pkg/usermem"], +) + +go_test( + name = "merkletree_test", + srcs = ["merkletree_test.go"], + library = ":merkletree", + deps = ["//pkg/usermem"], +) diff --git a/pkg/merkletree/merkletree.go b/pkg/merkletree/merkletree.go new file mode 100644 index 000000000..955c9c473 --- /dev/null +++ b/pkg/merkletree/merkletree.go @@ -0,0 +1,314 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package merkletree implements Merkle tree generating and verification. +package merkletree + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io" + + "gvisor.dev/gvisor/pkg/usermem" +) + +const ( + // sha256DigestSize specifies the digest size of a SHA256 hash. + sha256DigestSize = 32 +) + +// Layout defines the scale of a Merkle tree. +type Layout struct { + // blockSize is the size of a data block to be hashed. + blockSize int64 + // digestSize is the size of a generated hash. + digestSize int64 + // levelOffset contains the offset of the begnning of each level in + // bytes. The number of levels in the tree is the length of the slice. + // The leaf nodes (level 0) contain hashes of blocks of the input data. + // Each level N contains hashes of the blocks in level N-1. The highest + // level is the root hash. + levelOffset []int64 +} + +// InitLayout initializes and returns a new Layout object describing the structure +// of a tree. dataSize specifies the size of input data in bytes. +func InitLayout(dataSize int64) Layout { + layout := Layout{ + blockSize: usermem.PageSize, + // TODO(b/156980949): Allow config other hash methods (SHA384/SHA512). + digestSize: sha256DigestSize, + } + numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize + level := 0 + offset := int64(0) + + // Calculate the number of levels in the Merkle tree and the beginning + // offset of each level. Level 0 consists of the leaf nodes that + // contain the hashes of the data blocks, while level numLevels - 1 is + // the root. + for numBlocks > 1 { + layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) + // Round numBlocks up to fill up a block. + numBlocks += (layout.hashesPerBlock() - numBlocks%layout.hashesPerBlock()) % layout.hashesPerBlock() + offset += numBlocks / layout.hashesPerBlock() + numBlocks = numBlocks / layout.hashesPerBlock() + level++ + } + layout.levelOffset = append(layout.levelOffset, offset*layout.blockSize) + return layout +} + +// hashesPerBlock() returns the number of digests in each block. For example, +// if blockSize is 4096 bytes, and digestSize is 32 bytes, there will be 128 +// hashesPerBlock. Therefore 128 hashes in one level will be combined in one +// hash in the level above. +func (layout Layout) hashesPerBlock() int64 { + return layout.blockSize / layout.digestSize +} + +// numLevels returns the total number of levels in the Merkle tree. +func (layout Layout) numLevels() int { + return len(layout.levelOffset) +} + +// rootLevel returns the level of the root hash. +func (layout Layout) rootLevel() int { + return layout.numLevels() - 1 +} + +// digestOffset finds the offset of a digest from the beginning of the tree. +// The target digest is at level of the tree, with index from the beginning of +// the current level. +func (layout Layout) digestOffset(level int, index int64) int64 { + return layout.levelOffset[level] + index*layout.digestSize +} + +// blockOffset finds the offset of a block from the beginning of the tree. The +// target block is at level of the tree, with index from the beginning of the +// current level. +func (layout Layout) blockOffset(level int, index int64) int64 { + return layout.levelOffset[level] + index*layout.blockSize +} + +// Generate constructs a Merkle tree for the contents of data. The output is +// written to treeWriter. The treeReader should be able to read the tree after +// it has been written. That is, treeWriter and treeReader should point to the +// same underlying data but have separate cursors. +func Generate(data io.Reader, dataSize int64, treeReader io.Reader, treeWriter io.Writer) ([]byte, error) { + layout := InitLayout(dataSize) + + numBlocks := (dataSize + layout.blockSize - 1) / layout.blockSize + + var root []byte + for level := 0; level < layout.numLevels(); level++ { + for i := int64(0); i < numBlocks; i++ { + buf := make([]byte, layout.blockSize) + var ( + n int + err error + ) + if level == 0 { + // Read data block from the target file since level 0 includes hashes + // of blocks in the input data. + n, err = data.Read(buf) + } else { + // Read data block from the tree file since levels higher than 0 are + // hashing the lower level hashes. + n, err = treeReader.Read(buf) + } + + // err is populated as long as the bytes read is smaller than the buffer + // size. This could be the case if we are reading the last block, and + // break in that case. If this is the last block, the end of the block + // will be zero-padded. + if n == 0 && err == io.EOF { + break + } else if err != nil && err != io.EOF { + return nil, err + } + // Hash the bytes in buf. + digest := sha256.Sum256(buf) + + if level == layout.rootLevel() { + root = digest[:] + } + + // Write the generated hash to the end of the tree file. + if _, err = treeWriter.Write(digest[:]); err != nil { + return nil, err + } + } + // If the generated digests do not round up to a block, zero-padding the + // remaining of the last block. But no need to do so for root. + if level != layout.rootLevel() && numBlocks%layout.hashesPerBlock() != 0 { + zeroBuf := make([]byte, layout.blockSize-(numBlocks%layout.hashesPerBlock())*layout.digestSize) + if _, err := treeWriter.Write(zeroBuf[:]); err != nil { + return nil, err + } + } + numBlocks = (numBlocks + layout.hashesPerBlock() - 1) / layout.hashesPerBlock() + } + return root, nil +} + +// Verify verifies the content read from data with offset. The content is +// verified against tree. If content spans across multiple blocks, each block is +// verified. Verification fails if the hash of the data does not match the tree +// at any level, or if the final root hash does not match expectedRoot. +// Once the data is verified, it will be written using w. +// Verify will modify the cursor for data, but always restores it to its +// original position upon exit. The cursor for tree is modified and not +// restored. +func Verify(w io.Writer, data, tree io.ReadSeeker, dataSize int64, readOffset int64, readSize int64, expectedRoot []byte) error { + if readSize <= 0 { + return fmt.Errorf("Unexpected read size: %d", readSize) + } + layout := InitLayout(int64(dataSize)) + + // Calculate the index of blocks that includes the target range in input + // data. + firstDataBlock := readOffset / layout.blockSize + lastDataBlock := (readOffset + readSize - 1) / layout.blockSize + + // Store the current offset, so we can set it back once verification + // finishes. + origOffset, err := data.Seek(0, io.SeekCurrent) + if err != nil { + return fmt.Errorf("Find current data offset failed: %v", err) + } + defer data.Seek(origOffset, io.SeekStart) + + // Move to the first block that contains target data. + if _, err := data.Seek(firstDataBlock*layout.blockSize, io.SeekStart); err != nil { + return fmt.Errorf("Seek to datablock start failed: %v", err) + } + + buf := make([]byte, layout.blockSize) + var readErr error + bytesRead := 0 + for i := firstDataBlock; i <= lastDataBlock; i++ { + // Read a block that includes all or part of target range in + // input data. + bytesRead, readErr = data.Read(buf) + // If at the end of input data and all previous blocks are + // verified, return the verified input data and EOF. + if readErr == io.EOF && bytesRead == 0 { + break + } + if readErr != nil && readErr != io.EOF { + return fmt.Errorf("Read from data failed: %v", err) + } + // If this is the end of file, zero the remaining bytes in buf, + // otherwise they are still from the previous block. + // TODO(b/162908070): Investigate possible issues with zero + // padding the data. + if bytesRead < len(buf) { + for j := bytesRead; j < len(buf); j++ { + buf[j] = 0 + } + } + if err := verifyBlock(tree, layout, buf, i, expectedRoot); err != nil { + return err + } + // startOff is the beginning of the read range within the + // current data block. Note that for all blocks other than the + // first, startOff should be 0. + startOff := int64(0) + if i == firstDataBlock { + startOff = readOffset % layout.blockSize + } + // endOff is the end of the read range within the current data + // block. Note that for all blocks other than the last, endOff + // should be the block size. + endOff := layout.blockSize + if i == lastDataBlock { + endOff = (readOffset+readSize-1)%layout.blockSize + 1 + } + // If the provided size exceeds the end of input data, we should + // only copy the parts in buf that's part of input data. + if startOff > int64(bytesRead) { + startOff = int64(bytesRead) + } + if endOff > int64(bytesRead) { + endOff = int64(bytesRead) + } + w.Write(buf[startOff:endOff]) + + } + return readErr +} + +// verifyBlock verifies a block against tree. index is the number of block in +// original data. The block is verified through each level of the tree. It +// fails if the calculated hash from block is different from any level of +// hashes stored in tree. And the final root hash is compared with +// expectedRoot. verifyBlock modifies the cursor for tree. Users needs to +// maintain the cursor if intended. +func verifyBlock(tree io.ReadSeeker, layout Layout, dataBlock []byte, blockIndex int64, expectedRoot []byte) error { + if len(dataBlock) != int(layout.blockSize) { + return fmt.Errorf("incorrect block size") + } + + expectedDigest := make([]byte, layout.digestSize) + treeBlock := make([]byte, layout.blockSize) + var digest []byte + for level := 0; level < layout.numLevels(); level++ { + // Calculate hash. + if level == 0 { + digestArray := sha256.Sum256(dataBlock) + digest = digestArray[:] + } else { + // Read a block in previous level that contains the + // hash we just generated, and generate a next level + // hash from it. + if _, err := tree.Seek(layout.blockOffset(level-1, blockIndex), io.SeekStart); err != nil { + return err + } + if _, err := tree.Read(treeBlock); err != nil { + return err + } + digestArray := sha256.Sum256(treeBlock) + digest = digestArray[:] + } + + // Move to stored hash for the current block, read the digest + // and store in expectedDigest. + if _, err := tree.Seek(layout.digestOffset(level, blockIndex), io.SeekStart); err != nil { + return err + } + if _, err := tree.Read(expectedDigest); err != nil { + return err + } + + if !bytes.Equal(digest, expectedDigest) { + return fmt.Errorf("Verification failed") + } + + // If this is the root layer, no need to generate next level + // hash. + if level == layout.rootLevel() { + break + } + blockIndex = blockIndex / layout.hashesPerBlock() + } + + // Verification for the tree succeeded. Now compare the root hash in the + // tree with expectedRoot. + if !bytes.Equal(digest[:], expectedRoot) { + return fmt.Errorf("Verification failed") + } + return nil +} diff --git a/pkg/merkletree/merkletree_test.go b/pkg/merkletree/merkletree_test.go new file mode 100644 index 000000000..911f61df9 --- /dev/null +++ b/pkg/merkletree/merkletree_test.go @@ -0,0 +1,353 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package merkletree + +import ( + "bytes" + "fmt" + "io" + "math/rand" + "testing" + "time" + + "gvisor.dev/gvisor/pkg/usermem" +) + +func TestLayout(t *testing.T) { + testCases := []struct { + dataSize int64 + expectedLevelOffset []int64 + }{ + { + dataSize: 100, + expectedLevelOffset: []int64{0}, + }, + { + dataSize: 1000000, + expectedLevelOffset: []int64{0, 2 * usermem.PageSize, 3 * usermem.PageSize}, + }, + { + dataSize: 4096 * int64(usermem.PageSize), + expectedLevelOffset: []int64{0, 32 * usermem.PageSize, 33 * usermem.PageSize}, + }, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("%d", tc.dataSize), func(t *testing.T) { + p := InitLayout(tc.dataSize) + if p.blockSize != int64(usermem.PageSize) { + t.Errorf("got blockSize %d, want %d", p.blockSize, usermem.PageSize) + } + if p.digestSize != sha256DigestSize { + t.Errorf("got digestSize %d, want %d", p.digestSize, sha256DigestSize) + } + if p.numLevels() != len(tc.expectedLevelOffset) { + t.Errorf("got levels %d, want %d", p.numLevels(), len(tc.expectedLevelOffset)) + } + for i := 0; i < p.numLevels() && i < len(tc.expectedLevelOffset); i++ { + if p.levelOffset[i] != tc.expectedLevelOffset[i] { + t.Errorf("got levelStart[%d] %d, want %d", i, p.levelOffset[i], tc.expectedLevelOffset[i]) + } + } + }) + } +} + +func TestGenerate(t *testing.T) { + // The input data has size dataSize. It starts with the data in startWith, + // and all other bytes are zeroes. + testCases := []struct { + data []byte + expectedRoot []byte + }{ + { + data: bytes.Repeat([]byte{0}, usermem.PageSize), + expectedRoot: []byte{173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255, 124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167}, + }, + { + data: bytes.Repeat([]byte{0}, 128*usermem.PageSize+1), + expectedRoot: []byte{62, 93, 40, 92, 161, 241, 30, 223, 202, 99, 39, 2, 132, 113, 240, 139, 117, 99, 79, 243, 54, 18, 100, 184, 141, 121, 238, 46, 149, 202, 203, 132}, + }, + { + data: []byte{'a'}, + expectedRoot: []byte{52, 75, 204, 142, 172, 129, 37, 14, 145, 137, 103, 203, 11, 162, 209, 205, 30, 169, 213, 72, 20, 28, 243, 24, 242, 2, 92, 43, 169, 59, 110, 210}, + }, + { + data: bytes.Repeat([]byte{'a'}, usermem.PageSize), + expectedRoot: []byte{201, 62, 238, 45, 13, 176, 47, 16, 172, 199, 70, 13, 149, 118, 225, 34, 220, 248, 205, 83, 196, 191, 141, 252, 174, 27, 62, 116, 235, 207, 255, 90}, + }, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("%d:%v", len(tc.data), tc.data[0]), func(t *testing.T) { + var tree bytes.Buffer + + root, err := Generate(bytes.NewBuffer(tc.data), int64(len(tc.data)), &tree, &tree) + if err != nil { + t.Fatalf("Generate failed: %v", err) + } + + if !bytes.Equal(root, tc.expectedRoot) { + t.Errorf("Unexpected root") + } + }) + } +} + +// bytesReadWriter is used to read from/write to/seek in a byte array. Unlike +// bytes.Buffer, it keeps the whole buffer during read so that it can be reused. +type bytesReadWriter struct { + // bytes contains the underlying byte array. + bytes []byte + // readPos is the currently location for Read. Write always appends to + // the end of the array. + readPos int +} + +func (brw *bytesReadWriter) Write(p []byte) (int, error) { + brw.bytes = append(brw.bytes, p...) + return len(p), nil +} + +func (brw *bytesReadWriter) Read(p []byte) (int, error) { + if brw.readPos >= len(brw.bytes) { + return 0, io.EOF + } + bytesRead := copy(p, brw.bytes[brw.readPos:]) + brw.readPos += bytesRead + if bytesRead < len(p) { + return bytesRead, io.EOF + } + return bytesRead, nil +} + +func (brw *bytesReadWriter) Seek(offset int64, whence int) (int64, error) { + off := offset + if whence == io.SeekCurrent { + off += int64(brw.readPos) + } + if whence == io.SeekEnd { + off += int64(len(brw.bytes)) + } + if off < 0 { + panic("seek with negative offset") + } + if off >= int64(len(brw.bytes)) { + return 0, io.EOF + } + brw.readPos = int(off) + return off, nil +} + +func TestVerify(t *testing.T) { + // The input data has size dataSize. The portion to be verified ranges from + // verifyStart with verifySize. A bit is flipped in outOfRangeByteIndex to + // confirm that modifications outside the verification range does not cause + // issue. And a bit is flipped in modifyByte to confirm that + // modifications in the verification range is caught during verification. + testCases := []struct { + dataSize int64 + verifyStart int64 + verifySize int64 + // A byte in input data is modified during the test. If the + // modified byte falls in verification range, Verify should + // fail, otherwise Verify should still succeed. + modifyByte int64 + shouldSucceed bool + }{ + // Verify range start outside the data range should fail. + { + dataSize: usermem.PageSize, + verifyStart: usermem.PageSize, + verifySize: 1, + modifyByte: 0, + shouldSucceed: false, + }, + // Verifying range is valid if it starts inside data and ends + // outside data range, in that case start to the end of data is + // verified. + { + dataSize: usermem.PageSize, + verifyStart: 0, + verifySize: 2 * usermem.PageSize, + modifyByte: 0, + shouldSucceed: false, + }, + // Invalid verify range (negative size) should fail. + { + dataSize: usermem.PageSize, + verifyStart: 1, + verifySize: -1, + modifyByte: 0, + shouldSucceed: false, + }, + // Invalid verify range (0 size) should fail. + { + dataSize: usermem.PageSize, + verifyStart: 0, + verifySize: 0, + modifyByte: 0, + shouldSucceed: false, + }, + // The test cases below use a block-aligned verify range. + // Modifying a byte in the verified range should cause verify + // to fail. + { + dataSize: 8 * usermem.PageSize, + verifyStart: 4 * usermem.PageSize, + verifySize: usermem.PageSize, + modifyByte: 4 * usermem.PageSize, + shouldSucceed: false, + }, + // Modifying a byte before the verified range should not cause + // verify to fail. + { + dataSize: 8 * usermem.PageSize, + verifyStart: 4 * usermem.PageSize, + verifySize: usermem.PageSize, + modifyByte: 4*usermem.PageSize - 1, + shouldSucceed: true, + }, + // Modifying a byte after the verified range should not cause + // verify to fail. + { + dataSize: 8 * usermem.PageSize, + verifyStart: 4 * usermem.PageSize, + verifySize: usermem.PageSize, + modifyByte: 5 * usermem.PageSize, + shouldSucceed: true, + }, + // The tests below use a non-block-aligned verify range. + // Modifying a byte at strat of verify range should cause + // verify to fail. + { + dataSize: 8 * usermem.PageSize, + verifyStart: 4*usermem.PageSize + 123, + verifySize: 2 * usermem.PageSize, + modifyByte: 4*usermem.PageSize + 123, + shouldSucceed: false, + }, + // Modifying a byte at the end of verify range should cause + // verify to fail. + { + dataSize: 8 * usermem.PageSize, + verifyStart: 4*usermem.PageSize + 123, + verifySize: 2 * usermem.PageSize, + modifyByte: 6*usermem.PageSize + 123, + shouldSucceed: false, + }, + // Modifying a byte in the middle verified block should cause + // verify to fail. + { + dataSize: 8 * usermem.PageSize, + verifyStart: 4*usermem.PageSize + 123, + verifySize: 2 * usermem.PageSize, + modifyByte: 5*usermem.PageSize + 123, + shouldSucceed: false, + }, + // Modifying a byte in the first block in the verified range + // should cause verify to fail, even the modified bit itself is + // out of verify range. + { + dataSize: 8 * usermem.PageSize, + verifyStart: 4*usermem.PageSize + 123, + verifySize: 2 * usermem.PageSize, + modifyByte: 4*usermem.PageSize + 122, + shouldSucceed: false, + }, + // Modifying a byte in the last block in the verified range + // should cause verify to fail, even the modified bit itself is + // out of verify range. + { + dataSize: 8 * usermem.PageSize, + verifyStart: 4*usermem.PageSize + 123, + verifySize: 2 * usermem.PageSize, + modifyByte: 6*usermem.PageSize + 124, + shouldSucceed: false, + }, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("%d", tc.modifyByte), func(t *testing.T) { + data := make([]byte, tc.dataSize) + // Generate random bytes in data. + rand.Read(data) + var tree bytesReadWriter + + root, err := Generate(bytes.NewBuffer(data), int64(tc.dataSize), &tree, &tree) + if err != nil { + t.Fatalf("Generate failed: %v", err) + } + + // Flip a bit in data and checks Verify results. + var buf bytes.Buffer + data[tc.modifyByte] ^= 1 + if tc.shouldSucceed { + if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err != nil && err != io.EOF { + t.Errorf("Verification failed when expected to succeed: %v", err) + } + if int64(buf.Len()) != tc.verifySize || !bytes.Equal(data[tc.verifyStart:tc.verifyStart+tc.verifySize], buf.Bytes()) { + t.Errorf("Incorrect output from Verify") + } + } else { + if err := Verify(&buf, bytes.NewReader(data), &tree, tc.dataSize, tc.verifyStart, tc.verifySize, root); err == nil { + t.Errorf("Verification succeeded when expected to fail") + } + } + }) + } +} + +func TestVerifyRandom(t *testing.T) { + rand.Seed(time.Now().UnixNano()) + // Use a random dataSize. Minimum size 2 so that we can pick a random + // portion from it. + dataSize := rand.Int63n(200*usermem.PageSize) + 2 + data := make([]byte, dataSize) + // Generate random bytes in data. + rand.Read(data) + var tree bytesReadWriter + + root, err := Generate(bytes.NewBuffer(data), int64(dataSize), &tree, &tree) + if err != nil { + t.Fatalf("Generate failed: %v", err) + } + + // Pick a random portion of data. + start := rand.Int63n(dataSize - 1) + size := rand.Int63n(dataSize) + 1 + + var buf bytes.Buffer + // Checks that the random portion of data from the original data is + // verified successfully. + if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err != nil && err != io.EOF { + t.Errorf("Verification failed for correct data: %v", err) + } + if size > dataSize-start { + size = dataSize - start + } + if int64(buf.Len()) != size || !bytes.Equal(data[start:start+size], buf.Bytes()) { + t.Errorf("Incorrect output from Verify") + } + + buf.Reset() + // Flip a random bit in randPortion, and check that verification fails. + randBytePos := rand.Int63n(size) + data[start+randBytePos] ^= 1 + + if err := Verify(&buf, bytes.NewReader(data), &tree, dataSize, start, size, root); err == nil { + t.Errorf("Verification succeeded for modified data") + } +} |