summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/ext/extent_file.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/ext/extent_file.go')
-rw-r--r--pkg/sentry/fsimpl/ext/extent_file.go237
1 files changed, 237 insertions, 0 deletions
diff --git a/pkg/sentry/fsimpl/ext/extent_file.go b/pkg/sentry/fsimpl/ext/extent_file.go
new file mode 100644
index 000000000..38b68a2d3
--- /dev/null
+++ b/pkg/sentry/fsimpl/ext/extent_file.go
@@ -0,0 +1,237 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ext
+
+import (
+ "io"
+ "sort"
+
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// extentFile is a type of regular file which uses extents to store file data.
+type extentFile struct {
+ regFile regularFile
+
+ // root is the root extent node. This lives in the 60 byte diskInode.Data().
+ // Immutable.
+ root disklayout.ExtentNode
+}
+
+// Compiles only if extentFile implements io.ReaderAt.
+var _ io.ReaderAt = (*extentFile)(nil)
+
+// newExtentFile is the extent file constructor. It reads the entire extent
+// tree into memory.
+// TODO(b/134676337): Build extent tree on demand to reduce memory usage.
+func newExtentFile(regFile regularFile) (*extentFile, error) {
+ file := &extentFile{regFile: regFile}
+ file.regFile.impl = file
+ err := file.buildExtTree()
+ if err != nil {
+ return nil, err
+ }
+ return file, nil
+}
+
+// buildExtTree builds the extent tree by reading it from disk by doing
+// running a simple DFS. It first reads the root node from the inode struct in
+// memory. Then it recursively builds the rest of the tree by reading it off
+// disk.
+//
+// Precondition: inode flag InExtents must be set.
+func (f *extentFile) buildExtTree() error {
+ rootNodeData := f.regFile.inode.diskInode.Data()
+
+ binary.Unmarshal(rootNodeData[:disklayout.ExtentStructsSize], binary.LittleEndian, &f.root.Header)
+
+ // Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries.
+ if f.root.Header.NumEntries > 4 {
+ // read(2) specifies that EINVAL should be returned if the file is unsuitable
+ // for reading.
+ return syserror.EINVAL
+ }
+
+ f.root.Entries = make([]disklayout.ExtentEntryPair, f.root.Header.NumEntries)
+ for i, off := uint16(0), disklayout.ExtentStructsSize; i < f.root.Header.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize {
+ var curEntry disklayout.ExtentEntry
+ if f.root.Header.Height == 0 {
+ // Leaf node.
+ curEntry = &disklayout.Extent{}
+ } else {
+ // Internal node.
+ curEntry = &disklayout.ExtentIdx{}
+ }
+ binary.Unmarshal(rootNodeData[off:off+disklayout.ExtentStructsSize], binary.LittleEndian, curEntry)
+ f.root.Entries[i].Entry = curEntry
+ }
+
+ // If this node is internal, perform DFS.
+ if f.root.Header.Height > 0 {
+ for i := uint16(0); i < f.root.Header.NumEntries; i++ {
+ var err error
+ if f.root.Entries[i].Node, err = f.buildExtTreeFromDisk(f.root.Entries[i].Entry); err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+// buildExtTreeFromDisk reads the extent tree nodes from disk and recursively
+// builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to
+// by the ExtentEntry.
+func (f *extentFile) buildExtTreeFromDisk(entry disklayout.ExtentEntry) (*disklayout.ExtentNode, error) {
+ var header disklayout.ExtentHeader
+ off := entry.PhysicalBlock() * f.regFile.inode.blkSize
+ err := readFromDisk(f.regFile.inode.dev, int64(off), &header)
+ if err != nil {
+ return nil, err
+ }
+
+ entries := make([]disklayout.ExtentEntryPair, header.NumEntries)
+ for i, off := uint16(0), off+disklayout.ExtentStructsSize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize {
+ var curEntry disklayout.ExtentEntry
+ if header.Height == 0 {
+ // Leaf node.
+ curEntry = &disklayout.Extent{}
+ } else {
+ // Internal node.
+ curEntry = &disklayout.ExtentIdx{}
+ }
+
+ err := readFromDisk(f.regFile.inode.dev, int64(off), curEntry)
+ if err != nil {
+ return nil, err
+ }
+ entries[i].Entry = curEntry
+ }
+
+ // If this node is internal, perform DFS.
+ if header.Height > 0 {
+ for i := uint16(0); i < header.NumEntries; i++ {
+ var err error
+ entries[i].Node, err = f.buildExtTreeFromDisk(entries[i].Entry)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ return &disklayout.ExtentNode{header, entries}, nil
+}
+
+// ReadAt implements io.ReaderAt.ReadAt.
+func (f *extentFile) ReadAt(dst []byte, off int64) (int, error) {
+ if len(dst) == 0 {
+ return 0, nil
+ }
+
+ if off < 0 {
+ return 0, syserror.EINVAL
+ }
+
+ if uint64(off) >= f.regFile.inode.diskInode.Size() {
+ return 0, io.EOF
+ }
+
+ n, err := f.read(&f.root, uint64(off), dst)
+ if n < len(dst) && err == nil {
+ err = io.EOF
+ }
+ return n, err
+}
+
+// read is the recursive step of extentFile.ReadAt which traverses the extent
+// tree from the node passed and reads file data.
+func (f *extentFile) read(node *disklayout.ExtentNode, off uint64, dst []byte) (int, error) {
+ // Perform a binary search for the node covering bytes starting at r.fileOff.
+ // A highly fragmented filesystem can have upto 340 entries and so linear
+ // search should be avoided. Finds the first entry which does not cover the
+ // file block we want and subtracts 1 to get the desired index.
+ fileBlk := uint32(off / f.regFile.inode.blkSize)
+ n := len(node.Entries)
+ found := sort.Search(n, func(i int) bool {
+ return node.Entries[i].Entry.FileBlock() > fileBlk
+ }) - 1
+
+ // We should be in this recursive step only if the data we want exists under
+ // the current node.
+ if found < 0 {
+ panic("searching for a file block in an extent entry which does not cover it")
+ }
+
+ read := 0
+ toRead := len(dst)
+ var curR int
+ var err error
+ for i := found; i < n && read < toRead; i++ {
+ if node.Header.Height == 0 {
+ curR, err = f.readFromExtent(node.Entries[i].Entry.(*disklayout.Extent), off, dst[read:])
+ } else {
+ curR, err = f.read(node.Entries[i].Node, off, dst[read:])
+ }
+
+ read += curR
+ off += uint64(curR)
+ if err != nil {
+ return read, err
+ }
+ }
+
+ return read, nil
+}
+
+// readFromExtent reads file data from the extent. It takes advantage of the
+// sequential nature of extents and reads file data from multiple blocks in one
+// call.
+//
+// A non-nil error indicates that this is a partial read and there is probably
+// more to read from this extent. The caller should propagate the error upward
+// and not move to the next extent in the tree.
+//
+// A subsequent call to extentReader.Read should continue reading from where we
+// left off as expected.
+func (f *extentFile) readFromExtent(ex *disklayout.Extent, off uint64, dst []byte) (int, error) {
+ curFileBlk := uint32(off / f.regFile.inode.blkSize)
+ exFirstFileBlk := ex.FileBlock()
+ exLastFileBlk := exFirstFileBlk + uint32(ex.Length) // This is exclusive.
+
+ // We should be in this recursive step only if the data we want exists under
+ // the current extent.
+ if curFileBlk < exFirstFileBlk || exLastFileBlk <= curFileBlk {
+ panic("searching for a file block in an extent which does not cover it")
+ }
+
+ curPhyBlk := uint64(curFileBlk-exFirstFileBlk) + ex.PhysicalBlock()
+ readStart := curPhyBlk*f.regFile.inode.blkSize + (off % f.regFile.inode.blkSize)
+
+ endPhyBlk := ex.PhysicalBlock() + uint64(ex.Length)
+ extentEnd := endPhyBlk * f.regFile.inode.blkSize // This is exclusive.
+
+ toRead := int(extentEnd - readStart)
+ if len(dst) < toRead {
+ toRead = len(dst)
+ }
+
+ n, _ := f.regFile.inode.dev.ReadAt(dst[:toRead], int64(readStart))
+ if n < toRead {
+ return n, syserror.EIO
+ }
+ return n, nil
+}