summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAyush Ranjan <ayushranjan@google.com>2019-07-29 19:16:07 -0700
committergVisor bot <gvisor-bot@google.com>2019-07-29 19:17:27 -0700
commitddf25e3331a18a74d0e01d74fee7f82963fe778c (patch)
treecdf7054d670703d929b01761012823adc69e90a0
parentb765eb45894ea426d2c6d167b6ceb662db6ff4d2 (diff)
ext: extent reader implementation.
PiperOrigin-RevId: 260629559
-rw-r--r--pkg/sentry/fs/ext/BUILD1
-rw-r--r--pkg/sentry/fs/ext/extent_file.go118
-rw-r--r--pkg/sentry/fs/ext/extent_test.go165
-rw-r--r--pkg/sentry/fs/ext/inline_file.go3
-rw-r--r--pkg/sentry/fs/ext/utils.go17
5 files changed, 270 insertions, 34 deletions
diff --git a/pkg/sentry/fs/ext/BUILD b/pkg/sentry/fs/ext/BUILD
index 60f6debaf..a89dc5166 100644
--- a/pkg/sentry/fs/ext/BUILD
+++ b/pkg/sentry/fs/ext/BUILD
@@ -42,6 +42,7 @@ go_library(
"//pkg/sentry/fs/ext/disklayout",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/pipe",
+ "//pkg/sentry/safemem",
"//pkg/sentry/usermem",
"//pkg/sentry/vfs",
"//pkg/syserror",
diff --git a/pkg/sentry/fs/ext/extent_file.go b/pkg/sentry/fs/ext/extent_file.go
index 86583881d..882300d96 100644
--- a/pkg/sentry/fs/ext/extent_file.go
+++ b/pkg/sentry/fs/ext/extent_file.go
@@ -16,6 +16,7 @@ package ext
import (
"io"
+ "sort"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
@@ -36,7 +37,12 @@ var _ fileReader = (*extentFile)(nil)
// Read implements fileReader.getFileReader.
func (f *extentFile) getFileReader(dev io.ReadSeeker, blkSize uint64, offset uint64) io.Reader {
- panic("unimplemented")
+ return &extentReader{
+ dev: dev,
+ file: f,
+ fileOff: offset,
+ blkSize: blkSize,
+ }
}
// newExtentFile is the extent file constructor. It reads the entire extent
@@ -145,3 +151,113 @@ func buildExtTreeFromDisk(dev io.ReadSeeker, entry disklayout.ExtentEntry, blkSi
return &disklayout.ExtentNode{header, entries}, nil
}
+
+// extentReader implements io.Reader which can traverse the extent tree and
+// read file data. This is not thread safe.
+type extentReader struct {
+ dev io.ReadSeeker
+ file *extentFile
+ fileOff uint64 // Represents the current file offset being read from.
+ blkSize uint64
+}
+
+// Compiles only if inlineReader implements io.Reader.
+var _ io.Reader = (*extentReader)(nil)
+
+// Read implements io.Reader.Read.
+func (r *extentReader) Read(dst []byte) (int, error) {
+ if len(dst) == 0 {
+ return 0, nil
+ }
+
+ if r.fileOff >= r.file.regFile.inode.diskInode.Size() {
+ return 0, io.EOF
+ }
+
+ return r.read(&r.file.root, dst)
+}
+
+// read is a helper which traverses the extent tree and reads data.
+func (r *extentReader) read(node *disklayout.ExtentNode, dst []byte) (int, error) {
+ // Perform a binary search for the node covering bytes starting at r.fileOff.
+ // A highly fragmented filesystem can have upto 340 entries and so linear
+ // search should be avoided. Finds the first entry which does not cover the
+ // file block we want and subtracts 1 to get the desired index.
+ fileBlk := r.fileBlock()
+ n := len(node.Entries)
+ found := sort.Search(n, func(i int) bool {
+ return node.Entries[i].Entry.FileBlock() > fileBlk
+ }) - 1
+
+ // We should be in this recursive step only if the data we want exists under
+ // the current node.
+ if found < 0 {
+ panic("searching for a file block in an extent entry which does not cover it")
+ }
+
+ read := 0
+ toRead := len(dst)
+ var curR int
+ var err error
+ for i := found; i < n && read < toRead; i++ {
+ if node.Header.Height == 0 {
+ curR, err = r.readFromExtent(node.Entries[i].Entry.(*disklayout.Extent), dst[read:])
+ } else {
+ curR, err = r.read(node.Entries[i].Node, dst[read:])
+ }
+
+ read += curR
+ if err != nil {
+ return read, err
+ }
+ }
+
+ return read, nil
+}
+
+// readFromExtent reads file data from the extent. It takes advantage of the
+// sequential nature of extents and reads file data from multiple blocks in one
+// call. Also updates the file offset.
+//
+// A non-nil error indicates that this is a partial read and there is probably
+// more to read from this extent. The caller should propagate the error upward
+// and not move to the next extent in the tree.
+//
+// A subsequent call to extentReader.Read should continue reading from where we
+// left off as expected.
+func (r *extentReader) readFromExtent(ex *disklayout.Extent, dst []byte) (int, error) {
+ curFileBlk := r.fileBlock()
+ exFirstFileBlk := ex.FileBlock()
+ exLastFileBlk := exFirstFileBlk + uint32(ex.Length) // This is exclusive.
+
+ // We should be in this recursive step only if the data we want exists under
+ // the current extent.
+ if curFileBlk < exFirstFileBlk || exLastFileBlk <= curFileBlk {
+ panic("searching for a file block in an extent which does not cover it")
+ }
+
+ curPhyBlk := uint64(curFileBlk-exFirstFileBlk) + ex.PhysicalBlock()
+ readStart := curPhyBlk*r.blkSize + r.fileBlockOff()
+
+ endPhyBlk := ex.PhysicalBlock() + uint64(ex.Length)
+ extentEnd := endPhyBlk * r.blkSize // This is exclusive.
+
+ toRead := int(extentEnd - readStart)
+ if len(dst) < toRead {
+ toRead = len(dst)
+ }
+
+ n, err := readFull(r.dev, int64(readStart), dst[:toRead])
+ r.fileOff += uint64(n)
+ return n, err
+}
+
+// fileBlock returns the file block number we are currently reading.
+func (r *extentReader) fileBlock() uint32 {
+ return uint32(r.fileOff / r.blkSize)
+}
+
+// fileBlockOff returns the current offset within the current file block.
+func (r *extentReader) fileBlockOff() uint64 {
+ return r.fileOff % r.blkSize
+}
diff --git a/pkg/sentry/fs/ext/extent_test.go b/pkg/sentry/fs/ext/extent_test.go
index 01251d0a7..fb7921add 100644
--- a/pkg/sentry/fs/ext/extent_test.go
+++ b/pkg/sentry/fs/ext/extent_test.go
@@ -16,6 +16,8 @@ package ext
import (
"bytes"
+ "io"
+ "math/rand"
"testing"
"github.com/google/go-cmp/cmp"
@@ -24,9 +26,14 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
)
-// TestExtentTree tests the extent tree building logic.
+const (
+ // mockExtentBlkSize is the mock block size used for testing.
+ // No block has more than 1 header + 4 entries.
+ mockExtentBlkSize = uint64(64)
+)
+
+// The tree described below looks like:
//
-// Test tree:
// 0.{Head}[Idx][Idx]
// / \
// / \
@@ -44,18 +51,8 @@ import (
//
// Please note that ext4 might not construct extent trees looking like this.
// This is purely for testing the tree traversal logic.
-func TestExtentTree(t *testing.T) {
- blkSize := uint64(64) // No block has more than 1 header + 4 entries.
- mockDisk := make([]byte, blkSize*10)
- mockExtentFile := extentFile{
- regFile: regularFile{
- inode: inode{
- diskInode: &disklayout.InodeNew{},
- },
- },
- }
-
- node3 := &disklayout.ExtentNode{
+var (
+ node3 = &disklayout.ExtentNode{
Header: disklayout.ExtentHeader{
Magic: disklayout.ExtentMagic,
NumEntries: 1,
@@ -74,7 +71,7 @@ func TestExtentTree(t *testing.T) {
},
}
- node2 := &disklayout.ExtentNode{
+ node2 = &disklayout.ExtentNode{
Header: disklayout.ExtentHeader{
Magic: disklayout.ExtentMagic,
NumEntries: 1,
@@ -92,7 +89,7 @@ func TestExtentTree(t *testing.T) {
},
}
- node1 := &disklayout.ExtentNode{
+ node1 = &disklayout.ExtentNode{
Header: disklayout.ExtentHeader{
Magic: disklayout.ExtentMagic,
NumEntries: 2,
@@ -119,7 +116,7 @@ func TestExtentTree(t *testing.T) {
},
}
- node0 := &disklayout.ExtentNode{
+ node0 = &disklayout.ExtentNode{
Header: disklayout.ExtentHeader{
Magic: disklayout.ExtentMagic,
NumEntries: 2,
@@ -143,22 +140,95 @@ func TestExtentTree(t *testing.T) {
},
},
}
+)
- writeTree(&mockExtentFile.regFile.inode, mockDisk, node0, blkSize)
+// TestExtentReader tests extentReader functionality. We should be able to use
+// the file reader like any other io.Reader.
+func TestExtentReader(t *testing.T) {
+ type extentReaderTest struct {
+ name string
+ from func(uint64) uint64
+ to func(uint64) uint64
+ }
- r := bytes.NewReader(mockDisk)
- if err := mockExtentFile.buildExtTree(r, blkSize); err != nil {
- t.Fatalf("inode.buildExtTree failed: %v", err)
+ tests := []extentReaderTest{
+ {
+ name: "read first half",
+ from: beginning,
+ to: middle,
+ },
+ {
+ name: "read entire file",
+ from: beginning,
+ to: end,
+ },
+ {
+ name: "read second half",
+ from: middle,
+ to: end,
+ },
}
+ dev, mockExtentFile, want := extentTreeSetUp(t, node0)
+ size := mockExtentFile.regFile.inode.diskInode.Size()
+
+ for _, test := range tests {
+ from := test.from(size)
+ to := test.to(size)
+ fileReader := mockExtentFile.getFileReader(dev, mockExtentBlkSize, from)
+
+ got := make([]byte, to-from)
+ if _, err := io.ReadFull(fileReader, got); err != nil {
+ t.Errorf("file read failed: %v", err)
+ }
+
+ if diff := cmp.Diff(got, want[from:to]); diff != "" {
+ t.Errorf("file data mismatch (-want +got):\n%s", diff)
+ }
+ }
+}
+
+// TestBuildExtentTree tests the extent tree building logic.
+func TestBuildExtentTree(t *testing.T) {
+ _, mockExtentFile, _ := extentTreeSetUp(t, node0)
+
opt := cmpopts.IgnoreUnexported(disklayout.ExtentIdx{}, disklayout.ExtentHeader{})
if diff := cmp.Diff(&mockExtentFile.root, node0, opt); diff != "" {
t.Errorf("extent tree mismatch (-want +got):\n%s", diff)
}
}
-// writeTree writes the tree represented by `root` to the inode and disk passed.
-func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, blkSize uint64) {
+// extentTreeSetUp writes the passed extent tree to a mock disk as an extent
+// tree. It also constucts a mock extent file with the same tree built in it.
+// It also writes random data file data and returns it.
+func extentTreeSetUp(t *testing.T, root *disklayout.ExtentNode) (io.ReadSeeker, *extentFile, []byte) {
+ t.Helper()
+
+ mockDisk := make([]byte, mockExtentBlkSize*10)
+ mockExtentFile := &extentFile{
+ regFile: regularFile{
+ inode: inode{
+ diskInode: &disklayout.InodeNew{
+ InodeOld: disklayout.InodeOld{
+ SizeLo: uint32(mockExtentBlkSize) * getNumPhyBlks(root),
+ },
+ },
+ },
+ },
+ }
+
+ fileData := writeTree(&mockExtentFile.regFile.inode, mockDisk, node0, mockExtentBlkSize)
+
+ r := bytes.NewReader(mockDisk)
+ if err := mockExtentFile.buildExtTree(r, mockExtentBlkSize); err != nil {
+ t.Fatalf("inode.buildExtTree failed: %v", err)
+ }
+ return r, mockExtentFile, fileData
+}
+
+// writeTree writes the tree represented by `root` to the inode and disk. It
+// also writes random file data on disk.
+func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, mockExtentBlkSize uint64) []byte {
rootData := binary.Marshal(nil, binary.LittleEndian, root.Header)
for _, ep := range root.Entries {
rootData = binary.Marshal(rootData, binary.LittleEndian, ep.Entry)
@@ -166,26 +236,57 @@ func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, blkSize uint
copy(in.diskInode.Data(), rootData)
- if root.Header.Height > 0 {
- for _, ep := range root.Entries {
- writeTreeToDisk(disk, ep, blkSize)
+ var fileData []byte
+ for _, ep := range root.Entries {
+ if root.Header.Height == 0 {
+ fileData = append(fileData, writeRandomFileData(disk, ep.Entry.(*disklayout.Extent))...)
+ } else {
+ fileData = append(fileData, writeTreeToDisk(disk, ep)...)
}
}
+ return fileData
}
// writeTreeToDisk is the recursive step for writeTree which writes the tree
-// on the disk only.
-func writeTreeToDisk(disk []byte, curNode disklayout.ExtentEntryPair, blkSize uint64) {
+// on the disk only. Also writes random file data on disk.
+func writeTreeToDisk(disk []byte, curNode disklayout.ExtentEntryPair) []byte {
nodeData := binary.Marshal(nil, binary.LittleEndian, curNode.Node.Header)
for _, ep := range curNode.Node.Entries {
nodeData = binary.Marshal(nodeData, binary.LittleEndian, ep.Entry)
}
- copy(disk[curNode.Entry.PhysicalBlock()*blkSize:], nodeData)
+ copy(disk[curNode.Entry.PhysicalBlock()*mockExtentBlkSize:], nodeData)
+
+ var fileData []byte
+ for _, ep := range curNode.Node.Entries {
+ if curNode.Node.Header.Height == 0 {
+ fileData = append(fileData, writeRandomFileData(disk, ep.Entry.(*disklayout.Extent))...)
+ } else {
+ fileData = append(fileData, writeTreeToDisk(disk, ep)...)
+ }
+ }
+ return fileData
+}
+
+// writeRandomFileData writes random bytes to the blocks on disk that the
+// passed extent points to.
+func writeRandomFileData(disk []byte, ex *disklayout.Extent) []byte {
+ phyExStartBlk := ex.PhysicalBlock()
+ phyExStartOff := phyExStartBlk * mockExtentBlkSize
+ phyExEndOff := phyExStartOff + uint64(ex.Length)*mockExtentBlkSize
+ rand.Read(disk[phyExStartOff:phyExEndOff])
+ return disk[phyExStartOff:phyExEndOff]
+}
- if curNode.Node.Header.Height > 0 {
- for _, ep := range curNode.Node.Entries {
- writeTreeToDisk(disk, ep, blkSize)
+// getNumPhyBlks returns the number of physical blocks covered under the node.
+func getNumPhyBlks(node *disklayout.ExtentNode) uint32 {
+ var res uint32
+ for _, ep := range node.Entries {
+ if node.Header.Height == 0 {
+ res += uint32(ep.Entry.(*disklayout.Extent).Length)
+ } else {
+ res += getNumPhyBlks(ep.Node)
}
}
+ return res
}
diff --git a/pkg/sentry/fs/ext/inline_file.go b/pkg/sentry/fs/ext/inline_file.go
index dd93ee2e1..8f1395567 100644
--- a/pkg/sentry/fs/ext/inline_file.go
+++ b/pkg/sentry/fs/ext/inline_file.go
@@ -40,7 +40,8 @@ func newInlineFile(regFile regularFile) *inlineFile {
return file
}
-// inlineReader implements io.Reader which can read the underlying data.
+// inlineReader implements io.Reader which can read the underlying data. This
+// is not thread safe.
type inlineReader struct {
offset uint64
data []byte
diff --git a/pkg/sentry/fs/ext/utils.go b/pkg/sentry/fs/ext/utils.go
index 8790c7778..7c33919e0 100644
--- a/pkg/sentry/fs/ext/utils.go
+++ b/pkg/sentry/fs/ext/utils.go
@@ -41,6 +41,23 @@ func readFromDisk(dev io.ReadSeeker, abOff int64, v interface{}) error {
return nil
}
+// readFull is a wrapper around io.ReadFull which enforces the absolute offset
+// parameter so that we can ensure that we do not perform incorrect reads from
+// stale previously used offsets.
+//
+// Precondition: Must hold the mutex of the filesystem containing dev.
+func readFull(dev io.ReadSeeker, abOff int64, dst []byte) (int, error) {
+ if _, err := dev.Seek(abOff, io.SeekStart); err != nil {
+ return 0, syserror.EIO
+ }
+
+ n, err := io.ReadFull(dev, dst)
+ if err != nil {
+ err = syserror.EIO
+ }
+ return n, err
+}
+
// readSuperBlock reads the SuperBlock from block group 0 in the underlying
// device. There are three versions of the superblock. This function identifies
// and returns the correct version.