summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAyush Ranjan <ayushranjan@google.com>2019-07-23 15:50:35 -0700
committergVisor bot <gvisor-bot@google.com>2019-07-23 15:51:50 -0700
commitbd7708956f0c9e00e88eff9b35b22eea75e71f5f (patch)
tree08d110a027a067f202d672d524d09546c68dd6fc
parent04cbb13ce9b151cf906f42e3f18ce3a875f01f63 (diff)
ext: Added extent tree building logic.
PiperOrigin-RevId: 259628657
-rw-r--r--pkg/sentry/fs/ext/BUILD18
-rw-r--r--pkg/sentry/fs/ext/dentry.go23
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode.go11
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_old.go6
-rw-r--r--pkg/sentry/fs/ext/ext.go10
-rw-r--r--pkg/sentry/fs/ext/extent_test.go184
-rw-r--r--pkg/sentry/fs/ext/inode.go154
-rw-r--r--pkg/sentry/fs/ext/utils.go6
8 files changed, 398 insertions, 14 deletions
diff --git a/pkg/sentry/fs/ext/BUILD b/pkg/sentry/fs/ext/BUILD
index b386f31c8..3ba278e08 100644
--- a/pkg/sentry/fs/ext/BUILD
+++ b/pkg/sentry/fs/ext/BUILD
@@ -1,18 +1,34 @@
package(licenses = ["notice"])
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
go_library(
name = "ext",
srcs = [
+ "dentry.go",
"ext.go",
+ "inode.go",
"utils.go",
],
importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/binary",
"//pkg/sentry/fs/ext/disklayout",
"//pkg/syserror",
],
)
+
+go_test(
+ name = "ext_test",
+ size = "small",
+ srcs = ["extent_test.go"],
+ embed = [":ext"],
+ deps = [
+ "//pkg/binary",
+ "//pkg/sentry/fs/ext/disklayout",
+ "@com_github_google_go-cmp//cmp:go_default_library",
+ "@com_github_google_go-cmp//cmp/cmpopts:go_default_library",
+ ],
+)
diff --git a/pkg/sentry/fs/ext/dentry.go b/pkg/sentry/fs/ext/dentry.go
new file mode 100644
index 000000000..71cd217df
--- /dev/null
+++ b/pkg/sentry/fs/ext/dentry.go
@@ -0,0 +1,23 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ext
+
+// dentry implements vfs.DentryImpl.
+type dentry struct {
+ // inode is the inode represented by this dentry. Multiple Dentries may
+ // share a single non-directory Inode (with hard links). inode is
+ // immutable.
+ inode *inode
+}
diff --git a/pkg/sentry/fs/ext/disklayout/inode.go b/pkg/sentry/fs/ext/disklayout/inode.go
index b48001910..9ab9a4988 100644
--- a/pkg/sentry/fs/ext/disklayout/inode.go
+++ b/pkg/sentry/fs/ext/disklayout/inode.go
@@ -107,16 +107,17 @@ type Inode interface {
// Flags returns InodeFlags which represents the inode flags.
Flags() InodeFlags
- // Blocks returns the underlying inode.i_block array. This field is special
- // and is used to store various kinds of things depending on the filesystem
- // version and inode type.
+ // Data returns the underlying inode.i_block array as a slice so it's
+ // modifiable. This field is special and is used to store various kinds of
+ // things depending on the filesystem version and inode type. The underlying
+ // field name in Linux is a little misleading.
// - In ext2/ext3, it contains the block map.
- // - In ext4, it contains the extent tree.
+ // - In ext4, it contains the extent tree root node.
// - For inline files, it contains the file contents.
// - For symlinks, it contains the link path (if it fits here).
//
// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#the-contents-of-inode-i-block.
- Blocks() [60]byte
+ Data() []byte
}
// Inode flags. This is not comprehensive and flags which were not used in
diff --git a/pkg/sentry/fs/ext/disklayout/inode_old.go b/pkg/sentry/fs/ext/disklayout/inode_old.go
index dc4c9d8e4..7d7cc9143 100644
--- a/pkg/sentry/fs/ext/disklayout/inode_old.go
+++ b/pkg/sentry/fs/ext/disklayout/inode_old.go
@@ -47,7 +47,7 @@ type InodeOld struct {
BlocksCountLo uint32
FlagsRaw uint32
VersionLo uint32 // This is OS dependent.
- BlocksRaw [60]byte
+ DataRaw [60]byte
Generation uint32
FileACLLo uint32
SizeHi uint32
@@ -113,5 +113,5 @@ func (in *InodeOld) LinksCount() uint16 { return in.LinksCountRaw }
// Flags implements Inode.Flags.
func (in *InodeOld) Flags() InodeFlags { return InodeFlagsFromInt(in.FlagsRaw) }
-// Blocks implements Inode.Blocks.
-func (in *InodeOld) Blocks() [60]byte { return in.BlocksRaw }
+// Data implements Inode.Data.
+func (in *InodeOld) Data() []byte { return in.DataRaw[:] }
diff --git a/pkg/sentry/fs/ext/ext.go b/pkg/sentry/fs/ext/ext.go
index 9d33be08f..8bc591c8b 100644
--- a/pkg/sentry/fs/ext/ext.go
+++ b/pkg/sentry/fs/ext/ext.go
@@ -24,8 +24,8 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
)
-// Filesystem implements vfs.FilesystemImpl.
-type Filesystem struct {
+// filesystem implements vfs.FilesystemImpl.
+type filesystem struct {
// dev is the ReadSeeker for the underlying fs device and is protected by mu.
dev io.ReadSeeker
@@ -50,9 +50,9 @@ type Filesystem struct {
bgs []disklayout.BlockGroup
}
-// newFilesystem is the Filesystem constructor.
-func newFilesystem(dev io.ReadSeeker) (*Filesystem, error) {
- fs := Filesystem{dev: dev}
+// newFilesystem is the filesystem constructor.
+func newFilesystem(dev io.ReadSeeker) (*filesystem, error) {
+ fs := filesystem{dev: dev}
var err error
fs.sb, err = readSuperBlock(dev)
diff --git a/pkg/sentry/fs/ext/extent_test.go b/pkg/sentry/fs/ext/extent_test.go
new file mode 100644
index 000000000..c8c52f836
--- /dev/null
+++ b/pkg/sentry/fs/ext/extent_test.go
@@ -0,0 +1,184 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ext
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
+)
+
+// TestExtentTree tests the extent tree building logic.
+//
+// Test tree:
+// 0.{Head}[Idx][Idx]
+// / \
+// / \
+// 1.{Head}[Ext][Ext] 2.{Head}[Idx]
+// / | \
+// [Phy] [Phy, Phy] 3.{Head}[Ext]
+// |
+// [Phy, Phy, Phy]
+//
+// Legend:
+// - Head = ExtentHeader
+// - Idx = ExtentIdx
+// - Ext = Extent
+// - Phy = Physical Block
+//
+// Please note that ext4 might not construct extent trees looking like this.
+// This is purely for testing the tree traversal logic.
+func TestExtentTree(t *testing.T) {
+ blkSize := uint64(64) // No block has more than 1 header + 4 entries.
+ mockDisk := make([]byte, blkSize*10)
+ mockInode := &inode{diskInode: &disklayout.InodeNew{}}
+
+ node3 := &disklayout.ExtentNode{
+ Header: disklayout.ExtentHeader{
+ Magic: disklayout.ExtentMagic,
+ NumEntries: 1,
+ MaxEntries: 4,
+ Height: 0,
+ },
+ Entries: []disklayout.ExtentEntryPair{
+ {
+ Entry: &disklayout.Extent{
+ FirstFileBlock: 3,
+ Length: 3,
+ StartBlockLo: 6,
+ },
+ Node: nil,
+ },
+ },
+ }
+
+ node2 := &disklayout.ExtentNode{
+ Header: disklayout.ExtentHeader{
+ Magic: disklayout.ExtentMagic,
+ NumEntries: 1,
+ MaxEntries: 4,
+ Height: 1,
+ },
+ Entries: []disklayout.ExtentEntryPair{
+ {
+ Entry: &disklayout.ExtentIdx{
+ FirstFileBlock: 3,
+ ChildBlockLo: 2,
+ },
+ Node: node3,
+ },
+ },
+ }
+
+ node1 := &disklayout.ExtentNode{
+ Header: disklayout.ExtentHeader{
+ Magic: disklayout.ExtentMagic,
+ NumEntries: 2,
+ MaxEntries: 4,
+ Height: 0,
+ },
+ Entries: []disklayout.ExtentEntryPair{
+ {
+ Entry: &disklayout.Extent{
+ FirstFileBlock: 0,
+ Length: 1,
+ StartBlockLo: 3,
+ },
+ Node: nil,
+ },
+ {
+ Entry: &disklayout.Extent{
+ FirstFileBlock: 1,
+ Length: 2,
+ StartBlockLo: 4,
+ },
+ Node: nil,
+ },
+ },
+ }
+
+ node0 := &disklayout.ExtentNode{
+ Header: disklayout.ExtentHeader{
+ Magic: disklayout.ExtentMagic,
+ NumEntries: 2,
+ MaxEntries: 4,
+ Height: 2,
+ },
+ Entries: []disklayout.ExtentEntryPair{
+ {
+ Entry: &disklayout.ExtentIdx{
+ FirstFileBlock: 0,
+ ChildBlockLo: 0,
+ },
+ Node: node1,
+ },
+ {
+ Entry: &disklayout.ExtentIdx{
+ FirstFileBlock: 3,
+ ChildBlockLo: 1,
+ },
+ Node: node2,
+ },
+ },
+ }
+
+ writeTree(mockInode, mockDisk, node0, blkSize)
+
+ r := bytes.NewReader(mockDisk)
+ if err := mockInode.buildExtTree(r, blkSize); err != nil {
+ t.Fatalf("inode.buildExtTree failed: %v", err)
+ }
+
+ if diff := cmp.Diff(&mockInode.root, node0, cmpopts.IgnoreUnexported(disklayout.ExtentNode{})); diff != "" {
+ t.Errorf("extent tree mismatch (-want +got):\n%s", diff)
+ }
+}
+
+// writeTree writes the tree represented by `root` to the inode and disk passed.
+func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, blkSize uint64) {
+ rootData := binary.Marshal(nil, binary.LittleEndian, root.Header)
+ for _, ep := range root.Entries {
+ rootData = binary.Marshal(rootData, binary.LittleEndian, ep.Entry)
+ }
+
+ copy(in.diskInode.Data(), rootData)
+
+ if root.Header.Height > 0 {
+ for _, ep := range root.Entries {
+ writeTreeToDisk(disk, ep, blkSize)
+ }
+ }
+}
+
+// writeTreeToDisk is the recursive step for writeTree which writes the tree
+// on the disk only.
+func writeTreeToDisk(disk []byte, curNode disklayout.ExtentEntryPair, blkSize uint64) {
+ nodeData := binary.Marshal(nil, binary.LittleEndian, curNode.Node.Header)
+ for _, ep := range curNode.Node.Entries {
+ nodeData = binary.Marshal(nodeData, binary.LittleEndian, ep.Entry)
+ }
+
+ copy(disk[curNode.Entry.PhysicalBlock()*blkSize:], nodeData)
+
+ if curNode.Node.Header.Height > 0 {
+ for _, ep := range curNode.Node.Entries {
+ writeTreeToDisk(disk, ep, blkSize)
+ }
+ }
+}
diff --git a/pkg/sentry/fs/ext/inode.go b/pkg/sentry/fs/ext/inode.go
new file mode 100644
index 000000000..5bf9dbfa3
--- /dev/null
+++ b/pkg/sentry/fs/ext/inode.go
@@ -0,0 +1,154 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ext
+
+import (
+ "io"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// inode represents an ext inode.
+type inode struct {
+ // refs is a reference count. refs is accessed using atomic memory operations.
+ refs int64
+
+ // diskInode gives us access to the inode struct on disk. Immutable.
+ diskInode disklayout.Inode
+
+ // root is the root extent node. This lives in the 60 byte diskInode.Blocks().
+ // Immutable.
+ root disklayout.ExtentNode
+}
+
+// incRef increments the inode ref count.
+func (in *inode) incRef() {
+ atomic.AddInt64(&in.refs, 1)
+}
+
+// tryIncRef tries to increment the ref count. Returns true if successful.
+func (in *inode) tryIncRef() bool {
+ for {
+ refs := atomic.LoadInt64(&in.refs)
+ if refs == 0 {
+ return false
+ }
+ if atomic.CompareAndSwapInt64(&in.refs, refs, refs+1) {
+ return true
+ }
+ }
+}
+
+// decRef decrements the inode ref count.
+func (in *inode) decRef() {
+ if refs := atomic.AddInt64(&in.refs, -1); refs < 0 {
+ panic("ext.inode.decRef() called without holding a reference")
+ }
+}
+
+// buildExtTree builds the extent tree by reading it from disk by doing
+// running a simple DFS. It first reads the root node from the inode struct in
+// memory. Then it recursively builds the rest of the tree by reading it off
+// disk.
+//
+// Preconditions:
+// - Must have mutual exclusion on device fd.
+// - Inode flag InExtents must be set.
+func (in *inode) buildExtTree(dev io.ReadSeeker, blkSize uint64) error {
+ rootNodeData := in.diskInode.Data()
+
+ var rootHeader disklayout.ExtentHeader
+ binary.Unmarshal(rootNodeData[:disklayout.ExtentStructsSize], binary.LittleEndian, &rootHeader)
+
+ // Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries.
+ if rootHeader.NumEntries > 4 {
+ // read(2) specifies that EINVAL should be returned if the file is unsuitable
+ // for reading.
+ return syserror.EINVAL
+ }
+
+ rootEntries := make([]disklayout.ExtentEntryPair, rootHeader.NumEntries)
+ for i, off := uint16(0), disklayout.ExtentStructsSize; i < rootHeader.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize {
+ var curEntry disklayout.ExtentEntry
+ if rootHeader.Height == 0 {
+ // Leaf node.
+ curEntry = &disklayout.Extent{}
+ } else {
+ // Internal node.
+ curEntry = &disklayout.ExtentIdx{}
+ }
+ binary.Unmarshal(rootNodeData[off:off+disklayout.ExtentStructsSize], binary.LittleEndian, curEntry)
+ rootEntries[i].Entry = curEntry
+ }
+
+ // If this node is internal, perform DFS.
+ if rootHeader.Height > 0 {
+ for i := uint16(0); i < rootHeader.NumEntries; i++ {
+ var err error
+ if rootEntries[i].Node, err = buildExtTreeFromDisk(dev, rootEntries[i].Entry, blkSize); err != nil {
+ return err
+ }
+ }
+ }
+
+ in.root = disklayout.ExtentNode{rootHeader, rootEntries}
+ return nil
+}
+
+// buildExtTreeFromDisk reads the extent tree nodes from disk and recursively
+// builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to
+// by the ExtentEntry.
+//
+// Preconditions: Must have mutual exclusion on device fd.
+func buildExtTreeFromDisk(dev io.ReadSeeker, entry disklayout.ExtentEntry, blkSize uint64) (*disklayout.ExtentNode, error) {
+ var header disklayout.ExtentHeader
+ off := entry.PhysicalBlock() * blkSize
+ if err := readFromDisk(dev, int64(off), &header); err != nil {
+ return nil, err
+ }
+
+ entries := make([]disklayout.ExtentEntryPair, header.NumEntries)
+ for i, off := uint16(0), off+disklayout.ExtentStructsSize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize {
+ var curEntry disklayout.ExtentEntry
+ if header.Height == 0 {
+ // Leaf node.
+ curEntry = &disklayout.Extent{}
+ } else {
+ // Internal node.
+ curEntry = &disklayout.ExtentIdx{}
+ }
+
+ if err := readFromDisk(dev, int64(off), curEntry); err != nil {
+ return nil, err
+ }
+ entries[i].Entry = curEntry
+ }
+
+ // If this node is internal, perform DFS.
+ if header.Height > 0 {
+ for i := uint16(0); i < header.NumEntries; i++ {
+ var err error
+ entries[i].Node, err = buildExtTreeFromDisk(dev, entries[i].Entry, blkSize)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ return &disklayout.ExtentNode{header, entries}, nil
+}
diff --git a/pkg/sentry/fs/ext/utils.go b/pkg/sentry/fs/ext/utils.go
index 861ef9a73..71e46b2c4 100644
--- a/pkg/sentry/fs/ext/utils.go
+++ b/pkg/sentry/fs/ext/utils.go
@@ -27,6 +27,8 @@ import (
//
// All disk reads should use this helper so we avoid reading from stale
// previously used offsets. This function forces the offset parameter.
+//
+// Precondition: Must have mutual exclusion on device fd.
func readFromDisk(dev io.ReadSeeker, abOff int64, v interface{}) error {
if _, err := dev.Seek(abOff, io.SeekStart); err != nil {
return syserror.EIO
@@ -42,6 +44,8 @@ func readFromDisk(dev io.ReadSeeker, abOff int64, v interface{}) error {
// readSuperBlock reads the SuperBlock from block group 0 in the underlying
// device. There are three versions of the superblock. This function identifies
// and returns the correct version.
+//
+// Precondition: Must have mutual exclusion on device fd.
func readSuperBlock(dev io.ReadSeeker) (disklayout.SuperBlock, error) {
var sb disklayout.SuperBlock = &disklayout.SuperBlockOld{}
if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil {
@@ -82,6 +86,8 @@ func blockGroupsCount(sb disklayout.SuperBlock) uint64 {
// readBlockGroups reads the block group descriptor table from block group 0 in
// the underlying device.
+//
+// Precondition: Must have mutual exclusion on device fd.
func readBlockGroups(dev io.ReadSeeker, sb disklayout.SuperBlock) ([]disklayout.BlockGroup, error) {
bgCount := blockGroupsCount(sb)
bgdSize := uint64(sb.BgDescSize())