3 files changed, 456 insertions, 0 deletions
diff --git a/pkg/hashio/BUILD b/pkg/hashio/BUILD
new file mode 100644
index 000000000..aaa58b58f
--- /dev/null
+++ b/pkg/hashio/BUILD
@@ -0,0 +1,19 @@
+package(licenses = ["notice"])  # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "hashio",
+    srcs = [
+        "hashio.go",
+    ],
+    importpath = "gvisor.googlesource.com/gvisor/pkg/hashio",
+    visibility = ["//:sandbox"],
+)
+
+go_test(
+    name = "hashio_test",
+    size = "small",
+    srcs = ["hashio_test.go"],
+    embed = [":hashio"],
+)
diff --git a/pkg/hashio/hashio.go b/pkg/hashio/hashio.go
new file mode 100644
index 000000000..d97948850
--- /dev/null
+++ b/pkg/hashio/hashio.go
@@ -0,0 +1,295 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+Package hashio provides hash-verified I/O streams.
+
+The I/O stream format is defined as follows.
+
+/-----------------------------------------\
+|                 payload                 |
++-----------------------------------------+
+|                  hash                   |
++-----------------------------------------+
+|                 payload                 |
++-----------------------------------------+
+|                  hash                   |
++-----------------------------------------+
+|                 ......                  |
+\-----------------------------------------/
+
+Payload bytes written to / read from the stream are automatically split
+into segments, each followed by a hash. All data read out must have already
+passed hash verification. Hence the client code can safely do any kind of
+(stream) processing of these data.
+*/
+package hashio
+
+import (
+	"crypto/hmac"
+	"errors"
+	"hash"
+	"io"
+	"sync"
+)
+
+// SegmentSize is the unit we split payload data and insert hash at.
+const SegmentSize = 8 * 1024
+
+// ErrHashMismatch is returned if the ErrHashMismatch does not match.
+var ErrHashMismatch = errors.New("hash mismatch")
+
+// writer computes hashs during writes.
+type writer struct {
+	mu      sync.Mutex
+	w       io.Writer
+	h       hash.Hash
+	written int
+	closed  bool
+	hashv   []byte
+}
+
+// NewWriter creates a hash-verified IO stream writer.
+func NewWriter(w io.Writer, h hash.Hash) io.WriteCloser {
+	return &writer{
+		w:     w,
+		h:     h,
+		hashv: make([]byte, h.Size()),
+	}
+}
+
+// Write writes the given data.
+func (w *writer) Write(p []byte) (int, error) {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+
+	// Did we already close?
+	if w.closed {
+		return 0, io.ErrUnexpectedEOF
+	}
+
+	for done := 0; done < len(p); {
+		// Slice the data at segment boundary.
+		left := SegmentSize - w.written
+		if left > len(p[done:]) {
+			left = len(p[done:])
+		}
+
+		// Write the rest of the segment and write to hash writer the
+		// same number of bytes. Hash.Write may never return an error.
+		n, err := w.w.Write(p[done : done+left])
+		w.h.Write(p[done : done+left])
+		w.written += n
+		done += n
+
+		// And only check the actual write errors here.
+		if n == 0 && err != nil {
+			return done, err
+		}
+
+		// Write hash if starting a new segment.
+		if w.written == SegmentSize {
+			if err := w.closeSegment(); err != nil {
+				return done, err
+			}
+		}
+	}
+
+	return len(p), nil
+}
+
+// closeSegment closes the current segment and writes out its hash.
+func (w *writer) closeSegment() error {
+	// Serialize and write the current segment's hash.
+	hashv := w.h.Sum(w.hashv[:0])
+	for done := 0; done < len(hashv); {
+		n, err := w.w.Write(hashv[done:])
+		done += n
+		if n == 0 && err != nil {
+			return err
+		}
+	}
+	w.written = 0 // reset counter.
+	return nil
+}
+
+// Close writes the final hash to the stream and closes the underlying Writer.
+func (w *writer) Close() error {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+
+	// Did we already close?
+	if w.closed {
+		return io.ErrUnexpectedEOF
+	}
+
+	// Always mark as closed, regardless of errors.
+	w.closed = true
+
+	// Write the final segment.
+	if err := w.closeSegment(); err != nil {
+		return err
+	}
+
+	// Call the underlying closer.
+	if c, ok := w.w.(io.Closer); ok {
+		return c.Close()
+	}
+	return nil
+}
+
+// reader computes and verifies hashs during reads.
+type reader struct {
+	mu sync.Mutex
+	r  io.Reader
+	h  hash.Hash
+
+	// data is remaining verified but unused payload data. This is
+	// populated on short reads and may be consumed without any
+	// verification.
+	data [SegmentSize]byte
+
+	// index is the index into data above.
+	index int
+
+	// available is the amount of valid data above.
+	available int
+
+	// hashv is the read hash for the current segment.
+	hashv []byte
+
+	// computev is the computed hash for the current segment.
+	computev []byte
+}
+
+// NewReader creates a hash-verified IO stream reader.
+func NewReader(r io.Reader, h hash.Hash) io.Reader {
+	return &reader{
+		r:        r,
+		h:        h,
+		hashv:    make([]byte, h.Size()),
+		computev: make([]byte, h.Size()),
+	}
+}
+
+// readSegment reads a segment and hash vector.
+//
+// Precondition: datav must have length SegmentSize.
+func (r *reader) readSegment(datav []byte) (data []byte, err error) {
+	// Make two reads: the first is the segment, the second is the hash
+	// which needs verification. We may need to adjust the resulting slices
+	// in the case of short reads.
+	for done := 0; done < SegmentSize; {
+		n, err := r.r.Read(datav[done:])
+		done += n
+		if n == 0 && err == io.EOF {
+			if done == 0 {
+				// No data at all.
+				return nil, io.EOF
+			} else if done < len(r.hashv) {
+				// Not enough for a hash.
+				return nil, ErrHashMismatch
+			}
+			// Truncate the data and copy to the hash.
+			copy(r.hashv, datav[done-len(r.hashv):])
+			datav = datav[:done-len(r.hashv)]
+			return datav, nil
+		} else if n == 0 && err != nil {
+			return nil, err
+		}
+	}
+	for done := 0; done < len(r.hashv); {
+		n, err := r.r.Read(r.hashv[done:])
+		done += n
+		if n == 0 && err == io.EOF {
+			// Copy over from the data.
+			missing := len(r.hashv) - done
+			copy(r.hashv[missing:], r.hashv[:done])
+			copy(r.hashv[:missing], datav[len(datav)-missing:])
+			datav = datav[:len(datav)-missing]
+			return datav, nil
+		} else if n == 0 && err != nil {
+			return nil, err
+		}
+	}
+	return datav, nil
+}
+
+// verifyHash verifies the given hash.
+//
+// The passed hash will be returned to the pool.
+func (r *reader) verifyHash(datav []byte) error {
+	for done := 0; done < len(datav); {
+		n, _ := r.h.Write(datav[done:])
+		done += n
+	}
+	computev := r.h.Sum(r.computev[:0])
+	if !hmac.Equal(r.hashv, computev) {
+		return ErrHashMismatch
+	}
+	return nil
+}
+
+// Read reads the data.
+func (r *reader) Read(p []byte) (int, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	for done := 0; done < len(p); {
+		// Check for pending data.
+		if r.index < r.available {
+			n := copy(p[done:], r.data[r.index:r.available])
+			done += n
+			r.index += n
+			continue
+		}
+
+		// Prepare the next read.
+		var (
+			datav  []byte
+			inline bool
+		)
+
+		// We need to read a new segment. Can we read directly?
+		if len(p[done:]) >= SegmentSize {
+			datav = p[done : done+SegmentSize]
+			inline = true
+		} else {
+			datav = r.data[:]
+			inline = false
+		}
+
+		// Read the next segments.
+		datav, err := r.readSegment(datav)
+		if err != nil && err != io.EOF {
+			return 0, err
+		} else if err == io.EOF {
+			return done, io.EOF
+		}
+		if err := r.verifyHash(datav); err != nil {
+			return done, err
+		}
+
+		if inline {
+			// Move the cursor.
+			done += len(datav)
+		} else {
+			// Reset index & available.
+			r.index = 0
+			r.available = len(datav)
+		}
+	}
+
+	return len(p), nil
+}
diff --git a/pkg/hashio/hashio_test.go b/pkg/hashio/hashio_test.go
new file mode 100644
index 000000000..41dbdf860
--- /dev/null
+++ b/pkg/hashio/hashio_test.go
@@ -0,0 +1,142 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package hashio
+
+import (
+	"bytes"
+	"crypto/hmac"
+	"crypto/sha256"
+	"fmt"
+	"io"
+	"math/rand"
+	"testing"
+)
+
+var testKey = []byte("01234567890123456789012345678901")
+
+func runTest(c []byte, fn func(enc *bytes.Buffer), iters int) error {
+	// Encoding happens via a buffer.
+	var (
+		enc bytes.Buffer
+		dec bytes.Buffer
+	)
+
+	for i := 0; i < iters; i++ {
+		enc.Reset()
+		w := NewWriter(&enc, hmac.New(sha256.New, testKey))
+		if _, err := io.Copy(w, bytes.NewBuffer(c)); err != nil {
+			return err
+		}
+		if err := w.Close(); err != nil {
+			return err
+		}
+	}
+
+	fn(&enc)
+
+	for i := 0; i < iters; i++ {
+		dec.Reset()
+		r := NewReader(bytes.NewReader(enc.Bytes()), hmac.New(sha256.New, testKey))
+		if _, err := io.Copy(&dec, r); err != nil {
+			return err
+		}
+	}
+
+	// Check that the data matches; this should never fail.
+	if !bytes.Equal(c, dec.Bytes()) {
+		panic(fmt.Sprintf("data didn't match: got %v, expected %v", dec.Bytes(), c))
+	}
+
+	return nil
+}
+
+func TestTable(t *testing.T) {
+	cases := [][]byte{
+		// Various data sizes.
+		nil,
+		[]byte(""),
+		[]byte("_"),
+		[]byte("0"),
+		[]byte("01"),
+		[]byte("012"),
+		[]byte("0123"),
+		[]byte("01234"),
+		[]byte("012356"),
+		[]byte("0123567"),
+		[]byte("01235678"),
+
+		// Make sure we have one longer than the hash length.
+		[]byte("012356asdjflkasjlk3jlk23j4lkjaso0d789f0aujw3lkjlkxsdf78asdful2kj3ljka78"),
+
+		// Make sure we have one longer than the segment size.
+		make([]byte, 3*SegmentSize),
+		make([]byte, 3*SegmentSize-1),
+		make([]byte, 3*SegmentSize+1),
+		make([]byte, 3*SegmentSize-32),
+		make([]byte, 3*SegmentSize+32),
+		make([]byte, 30*SegmentSize),
+	}
+
+	for _, c := range cases {
+		for _, flip := range []bool{false, true} {
+			if len(c) == 0 && flip == true {
+				continue
+			}
+
+			// Log the case.
+			t.Logf("case: len=%d flip=%v", len(c), flip)
+
+			if err := runTest(c, func(enc *bytes.Buffer) {
+				if flip {
+					corrupted := rand.Intn(enc.Len())
+					enc.Bytes()[corrupted]++
+				}
+			}, 1); err != nil {
+				if !flip || err != ErrHashMismatch {
+					t.Errorf("error during read: got %v, expected nil", err)
+				}
+				continue
+			} else if flip {
+				t.Errorf("failed to detect ErrHashMismatch on corrupted data!")
+				continue
+			}
+		}
+	}
+}
+
+const benchBytes = 10 * 1024 * 1024 // 10 MB.
+
+func BenchmarkWrite(b *testing.B) {
+	b.StopTimer()
+	x := make([]byte, benchBytes)
+	b.SetBytes(benchBytes)
+	b.StartTimer()
+	if err := runTest(x, func(enc *bytes.Buffer) {
+		b.StopTimer()
+	}, b.N); err != nil {
+		b.Errorf("benchmark failed: %v", err)
+	}
+}
+
+func BenchmarkRead(b *testing.B) {
+	b.StopTimer()
+	x := make([]byte, benchBytes)
+	b.SetBytes(benchBytes)
+	if err := runTest(x, func(enc *bytes.Buffer) {
+		b.StartTimer()
+	}, b.N); err != nil {
+		b.Errorf("benchmark failed: %v", err)
+	}
+}