From ddf25e3331a18a74d0e01d74fee7f82963fe778c Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Mon, 29 Jul 2019 19:16:07 -0700 Subject: ext: extent reader implementation. PiperOrigin-RevId: 260629559 --- pkg/sentry/fs/ext/BUILD | 1 + pkg/sentry/fs/ext/extent_file.go | 118 +++++++++++++++++++++++++++- pkg/sentry/fs/ext/extent_test.go | 165 +++++++++++++++++++++++++++++++-------- pkg/sentry/fs/ext/inline_file.go | 3 +- pkg/sentry/fs/ext/utils.go | 17 ++++ 5 files changed, 270 insertions(+), 34 deletions(-) (limited to 'pkg/sentry') diff --git a/pkg/sentry/fs/ext/BUILD b/pkg/sentry/fs/ext/BUILD index 60f6debaf..a89dc5166 100644 --- a/pkg/sentry/fs/ext/BUILD +++ b/pkg/sentry/fs/ext/BUILD @@ -42,6 +42,7 @@ go_library( "//pkg/sentry/fs/ext/disklayout", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/pipe", + "//pkg/sentry/safemem", "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/syserror", diff --git a/pkg/sentry/fs/ext/extent_file.go b/pkg/sentry/fs/ext/extent_file.go index 86583881d..882300d96 100644 --- a/pkg/sentry/fs/ext/extent_file.go +++ b/pkg/sentry/fs/ext/extent_file.go @@ -16,6 +16,7 @@ package ext import ( "io" + "sort" "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" @@ -36,7 +37,12 @@ var _ fileReader = (*extentFile)(nil) // Read implements fileReader.getFileReader. func (f *extentFile) getFileReader(dev io.ReadSeeker, blkSize uint64, offset uint64) io.Reader { - panic("unimplemented") + return &extentReader{ + dev: dev, + file: f, + fileOff: offset, + blkSize: blkSize, + } } // newExtentFile is the extent file constructor. It reads the entire extent @@ -145,3 +151,113 @@ func buildExtTreeFromDisk(dev io.ReadSeeker, entry disklayout.ExtentEntry, blkSi return &disklayout.ExtentNode{header, entries}, nil } + +// extentReader implements io.Reader which can traverse the extent tree and +// read file data. This is not thread safe. +type extentReader struct { + dev io.ReadSeeker + file *extentFile + fileOff uint64 // Represents the current file offset being read from. + blkSize uint64 +} + +// Compiles only if inlineReader implements io.Reader. +var _ io.Reader = (*extentReader)(nil) + +// Read implements io.Reader.Read. +func (r *extentReader) Read(dst []byte) (int, error) { + if len(dst) == 0 { + return 0, nil + } + + if r.fileOff >= r.file.regFile.inode.diskInode.Size() { + return 0, io.EOF + } + + return r.read(&r.file.root, dst) +} + +// read is a helper which traverses the extent tree and reads data. +func (r *extentReader) read(node *disklayout.ExtentNode, dst []byte) (int, error) { + // Perform a binary search for the node covering bytes starting at r.fileOff. + // A highly fragmented filesystem can have upto 340 entries and so linear + // search should be avoided. Finds the first entry which does not cover the + // file block we want and subtracts 1 to get the desired index. + fileBlk := r.fileBlock() + n := len(node.Entries) + found := sort.Search(n, func(i int) bool { + return node.Entries[i].Entry.FileBlock() > fileBlk + }) - 1 + + // We should be in this recursive step only if the data we want exists under + // the current node. + if found < 0 { + panic("searching for a file block in an extent entry which does not cover it") + } + + read := 0 + toRead := len(dst) + var curR int + var err error + for i := found; i < n && read < toRead; i++ { + if node.Header.Height == 0 { + curR, err = r.readFromExtent(node.Entries[i].Entry.(*disklayout.Extent), dst[read:]) + } else { + curR, err = r.read(node.Entries[i].Node, dst[read:]) + } + + read += curR + if err != nil { + return read, err + } + } + + return read, nil +} + +// readFromExtent reads file data from the extent. It takes advantage of the +// sequential nature of extents and reads file data from multiple blocks in one +// call. Also updates the file offset. +// +// A non-nil error indicates that this is a partial read and there is probably +// more to read from this extent. The caller should propagate the error upward +// and not move to the next extent in the tree. +// +// A subsequent call to extentReader.Read should continue reading from where we +// left off as expected. +func (r *extentReader) readFromExtent(ex *disklayout.Extent, dst []byte) (int, error) { + curFileBlk := r.fileBlock() + exFirstFileBlk := ex.FileBlock() + exLastFileBlk := exFirstFileBlk + uint32(ex.Length) // This is exclusive. + + // We should be in this recursive step only if the data we want exists under + // the current extent. + if curFileBlk < exFirstFileBlk || exLastFileBlk <= curFileBlk { + panic("searching for a file block in an extent which does not cover it") + } + + curPhyBlk := uint64(curFileBlk-exFirstFileBlk) + ex.PhysicalBlock() + readStart := curPhyBlk*r.blkSize + r.fileBlockOff() + + endPhyBlk := ex.PhysicalBlock() + uint64(ex.Length) + extentEnd := endPhyBlk * r.blkSize // This is exclusive. + + toRead := int(extentEnd - readStart) + if len(dst) < toRead { + toRead = len(dst) + } + + n, err := readFull(r.dev, int64(readStart), dst[:toRead]) + r.fileOff += uint64(n) + return n, err +} + +// fileBlock returns the file block number we are currently reading. +func (r *extentReader) fileBlock() uint32 { + return uint32(r.fileOff / r.blkSize) +} + +// fileBlockOff returns the current offset within the current file block. +func (r *extentReader) fileBlockOff() uint64 { + return r.fileOff % r.blkSize +} diff --git a/pkg/sentry/fs/ext/extent_test.go b/pkg/sentry/fs/ext/extent_test.go index 01251d0a7..fb7921add 100644 --- a/pkg/sentry/fs/ext/extent_test.go +++ b/pkg/sentry/fs/ext/extent_test.go @@ -16,6 +16,8 @@ package ext import ( "bytes" + "io" + "math/rand" "testing" "github.com/google/go-cmp/cmp" @@ -24,9 +26,14 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" ) -// TestExtentTree tests the extent tree building logic. +const ( + // mockExtentBlkSize is the mock block size used for testing. + // No block has more than 1 header + 4 entries. + mockExtentBlkSize = uint64(64) +) + +// The tree described below looks like: // -// Test tree: // 0.{Head}[Idx][Idx] // / \ // / \ @@ -44,18 +51,8 @@ import ( // // Please note that ext4 might not construct extent trees looking like this. // This is purely for testing the tree traversal logic. -func TestExtentTree(t *testing.T) { - blkSize := uint64(64) // No block has more than 1 header + 4 entries. - mockDisk := make([]byte, blkSize*10) - mockExtentFile := extentFile{ - regFile: regularFile{ - inode: inode{ - diskInode: &disklayout.InodeNew{}, - }, - }, - } - - node3 := &disklayout.ExtentNode{ +var ( + node3 = &disklayout.ExtentNode{ Header: disklayout.ExtentHeader{ Magic: disklayout.ExtentMagic, NumEntries: 1, @@ -74,7 +71,7 @@ func TestExtentTree(t *testing.T) { }, } - node2 := &disklayout.ExtentNode{ + node2 = &disklayout.ExtentNode{ Header: disklayout.ExtentHeader{ Magic: disklayout.ExtentMagic, NumEntries: 1, @@ -92,7 +89,7 @@ func TestExtentTree(t *testing.T) { }, } - node1 := &disklayout.ExtentNode{ + node1 = &disklayout.ExtentNode{ Header: disklayout.ExtentHeader{ Magic: disklayout.ExtentMagic, NumEntries: 2, @@ -119,7 +116,7 @@ func TestExtentTree(t *testing.T) { }, } - node0 := &disklayout.ExtentNode{ + node0 = &disklayout.ExtentNode{ Header: disklayout.ExtentHeader{ Magic: disklayout.ExtentMagic, NumEntries: 2, @@ -143,22 +140,95 @@ func TestExtentTree(t *testing.T) { }, }, } +) - writeTree(&mockExtentFile.regFile.inode, mockDisk, node0, blkSize) +// TestExtentReader tests extentReader functionality. We should be able to use +// the file reader like any other io.Reader. +func TestExtentReader(t *testing.T) { + type extentReaderTest struct { + name string + from func(uint64) uint64 + to func(uint64) uint64 + } - r := bytes.NewReader(mockDisk) - if err := mockExtentFile.buildExtTree(r, blkSize); err != nil { - t.Fatalf("inode.buildExtTree failed: %v", err) + tests := []extentReaderTest{ + { + name: "read first half", + from: beginning, + to: middle, + }, + { + name: "read entire file", + from: beginning, + to: end, + }, + { + name: "read second half", + from: middle, + to: end, + }, } + dev, mockExtentFile, want := extentTreeSetUp(t, node0) + size := mockExtentFile.regFile.inode.diskInode.Size() + + for _, test := range tests { + from := test.from(size) + to := test.to(size) + fileReader := mockExtentFile.getFileReader(dev, mockExtentBlkSize, from) + + got := make([]byte, to-from) + if _, err := io.ReadFull(fileReader, got); err != nil { + t.Errorf("file read failed: %v", err) + } + + if diff := cmp.Diff(got, want[from:to]); diff != "" { + t.Errorf("file data mismatch (-want +got):\n%s", diff) + } + } +} + +// TestBuildExtentTree tests the extent tree building logic. +func TestBuildExtentTree(t *testing.T) { + _, mockExtentFile, _ := extentTreeSetUp(t, node0) + opt := cmpopts.IgnoreUnexported(disklayout.ExtentIdx{}, disklayout.ExtentHeader{}) if diff := cmp.Diff(&mockExtentFile.root, node0, opt); diff != "" { t.Errorf("extent tree mismatch (-want +got):\n%s", diff) } } -// writeTree writes the tree represented by `root` to the inode and disk passed. -func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, blkSize uint64) { +// extentTreeSetUp writes the passed extent tree to a mock disk as an extent +// tree. It also constucts a mock extent file with the same tree built in it. +// It also writes random data file data and returns it. +func extentTreeSetUp(t *testing.T, root *disklayout.ExtentNode) (io.ReadSeeker, *extentFile, []byte) { + t.Helper() + + mockDisk := make([]byte, mockExtentBlkSize*10) + mockExtentFile := &extentFile{ + regFile: regularFile{ + inode: inode{ + diskInode: &disklayout.InodeNew{ + InodeOld: disklayout.InodeOld{ + SizeLo: uint32(mockExtentBlkSize) * getNumPhyBlks(root), + }, + }, + }, + }, + } + + fileData := writeTree(&mockExtentFile.regFile.inode, mockDisk, node0, mockExtentBlkSize) + + r := bytes.NewReader(mockDisk) + if err := mockExtentFile.buildExtTree(r, mockExtentBlkSize); err != nil { + t.Fatalf("inode.buildExtTree failed: %v", err) + } + return r, mockExtentFile, fileData +} + +// writeTree writes the tree represented by `root` to the inode and disk. It +// also writes random file data on disk. +func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, mockExtentBlkSize uint64) []byte { rootData := binary.Marshal(nil, binary.LittleEndian, root.Header) for _, ep := range root.Entries { rootData = binary.Marshal(rootData, binary.LittleEndian, ep.Entry) @@ -166,26 +236,57 @@ func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, blkSize uint copy(in.diskInode.Data(), rootData) - if root.Header.Height > 0 { - for _, ep := range root.Entries { - writeTreeToDisk(disk, ep, blkSize) + var fileData []byte + for _, ep := range root.Entries { + if root.Header.Height == 0 { + fileData = append(fileData, writeRandomFileData(disk, ep.Entry.(*disklayout.Extent))...) + } else { + fileData = append(fileData, writeTreeToDisk(disk, ep)...) } } + return fileData } // writeTreeToDisk is the recursive step for writeTree which writes the tree -// on the disk only. -func writeTreeToDisk(disk []byte, curNode disklayout.ExtentEntryPair, blkSize uint64) { +// on the disk only. Also writes random file data on disk. +func writeTreeToDisk(disk []byte, curNode disklayout.ExtentEntryPair) []byte { nodeData := binary.Marshal(nil, binary.LittleEndian, curNode.Node.Header) for _, ep := range curNode.Node.Entries { nodeData = binary.Marshal(nodeData, binary.LittleEndian, ep.Entry) } - copy(disk[curNode.Entry.PhysicalBlock()*blkSize:], nodeData) + copy(disk[curNode.Entry.PhysicalBlock()*mockExtentBlkSize:], nodeData) + + var fileData []byte + for _, ep := range curNode.Node.Entries { + if curNode.Node.Header.Height == 0 { + fileData = append(fileData, writeRandomFileData(disk, ep.Entry.(*disklayout.Extent))...) + } else { + fileData = append(fileData, writeTreeToDisk(disk, ep)...) + } + } + return fileData +} + +// writeRandomFileData writes random bytes to the blocks on disk that the +// passed extent points to. +func writeRandomFileData(disk []byte, ex *disklayout.Extent) []byte { + phyExStartBlk := ex.PhysicalBlock() + phyExStartOff := phyExStartBlk * mockExtentBlkSize + phyExEndOff := phyExStartOff + uint64(ex.Length)*mockExtentBlkSize + rand.Read(disk[phyExStartOff:phyExEndOff]) + return disk[phyExStartOff:phyExEndOff] +} - if curNode.Node.Header.Height > 0 { - for _, ep := range curNode.Node.Entries { - writeTreeToDisk(disk, ep, blkSize) +// getNumPhyBlks returns the number of physical blocks covered under the node. +func getNumPhyBlks(node *disklayout.ExtentNode) uint32 { + var res uint32 + for _, ep := range node.Entries { + if node.Header.Height == 0 { + res += uint32(ep.Entry.(*disklayout.Extent).Length) + } else { + res += getNumPhyBlks(ep.Node) } } + return res } diff --git a/pkg/sentry/fs/ext/inline_file.go b/pkg/sentry/fs/ext/inline_file.go index dd93ee2e1..8f1395567 100644 --- a/pkg/sentry/fs/ext/inline_file.go +++ b/pkg/sentry/fs/ext/inline_file.go @@ -40,7 +40,8 @@ func newInlineFile(regFile regularFile) *inlineFile { return file } -// inlineReader implements io.Reader which can read the underlying data. +// inlineReader implements io.Reader which can read the underlying data. This +// is not thread safe. type inlineReader struct { offset uint64 data []byte diff --git a/pkg/sentry/fs/ext/utils.go b/pkg/sentry/fs/ext/utils.go index 8790c7778..7c33919e0 100644 --- a/pkg/sentry/fs/ext/utils.go +++ b/pkg/sentry/fs/ext/utils.go @@ -41,6 +41,23 @@ func readFromDisk(dev io.ReadSeeker, abOff int64, v interface{}) error { return nil } +// readFull is a wrapper around io.ReadFull which enforces the absolute offset +// parameter so that we can ensure that we do not perform incorrect reads from +// stale previously used offsets. +// +// Precondition: Must hold the mutex of the filesystem containing dev. +func readFull(dev io.ReadSeeker, abOff int64, dst []byte) (int, error) { + if _, err := dev.Seek(abOff, io.SeekStart); err != nil { + return 0, syserror.EIO + } + + n, err := io.ReadFull(dev, dst) + if err != nil { + err = syserror.EIO + } + return n, err +} + // readSuperBlock reads the SuperBlock from block group 0 in the underlying // device. There are three versions of the superblock. This function identifies // and returns the correct version. -- cgit v1.2.3