summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_new.go2
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_old.go6
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_test.go2
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock_old.go2
-rw-r--r--pkg/sentry/fs/ext/ext.go48
-rw-r--r--pkg/sentry/fs/ext/extent_test.go2
-rw-r--r--pkg/sentry/fs/ext/inode.go71
-rw-r--r--pkg/sentry/fs/ext/utils.go6
8 files changed, 110 insertions, 29 deletions
diff --git a/pkg/sentry/fs/ext/disklayout/inode_new.go b/pkg/sentry/fs/ext/disklayout/inode_new.go
index 4f5348372..8f9f574ce 100644
--- a/pkg/sentry/fs/ext/disklayout/inode_new.go
+++ b/pkg/sentry/fs/ext/disklayout/inode_new.go
@@ -62,7 +62,7 @@ func (in *InodeNew) Size() uint64 {
// InodeSize implements Inode.InodeSize.
func (in *InodeNew) InodeSize() uint16 {
- return oldInodeSize + in.ExtraInodeSize
+ return OldInodeSize + in.ExtraInodeSize
}
// ChangeTime implements Inode.ChangeTime.
diff --git a/pkg/sentry/fs/ext/disklayout/inode_old.go b/pkg/sentry/fs/ext/disklayout/inode_old.go
index 7d7cc9143..db25b11b6 100644
--- a/pkg/sentry/fs/ext/disklayout/inode_old.go
+++ b/pkg/sentry/fs/ext/disklayout/inode_old.go
@@ -21,8 +21,8 @@ import (
)
const (
- // oldInodeSize is the inode size in ext2/ext3.
- oldInodeSize = 128
+ // OldInodeSize is the inode size in ext2/ext3.
+ OldInodeSize = 128
)
// InodeOld implements Inode interface. It emulates ext2/ext3 inode struct.
@@ -85,7 +85,7 @@ func (in *InodeOld) Size() uint64 {
}
// InodeSize implements Inode.InodeSize.
-func (in *InodeOld) InodeSize() uint16 { return oldInodeSize }
+func (in *InodeOld) InodeSize() uint16 { return OldInodeSize }
// AccessTime implements Inode.AccessTime.
func (in *InodeOld) AccessTime() time.Time {
diff --git a/pkg/sentry/fs/ext/disklayout/inode_test.go b/pkg/sentry/fs/ext/disklayout/inode_test.go
index 9cae9e4f0..dd03ee50e 100644
--- a/pkg/sentry/fs/ext/disklayout/inode_test.go
+++ b/pkg/sentry/fs/ext/disklayout/inode_test.go
@@ -24,7 +24,7 @@ import (
// TestInodeSize tests that the inode structs are of the correct size.
func TestInodeSize(t *testing.T) {
- assertSize(t, InodeOld{}, oldInodeSize)
+ assertSize(t, InodeOld{}, OldInodeSize)
// This was updated from 156 bytes to 160 bytes in Oct 2015.
assertSize(t, InodeNew{}, 160)
diff --git a/pkg/sentry/fs/ext/disklayout/superblock_old.go b/pkg/sentry/fs/ext/disklayout/superblock_old.go
index aada8b550..5a64aaaa1 100644
--- a/pkg/sentry/fs/ext/disklayout/superblock_old.go
+++ b/pkg/sentry/fs/ext/disklayout/superblock_old.go
@@ -81,7 +81,7 @@ func (sb *SuperBlockOld) ClusterSize() uint64 { return 1 << (10 + sb.LogClusterS
func (sb *SuperBlockOld) ClustersPerGroup() uint32 { return sb.ClustersPerGroupRaw }
// InodeSize implements SuperBlock.InodeSize.
-func (sb *SuperBlockOld) InodeSize() uint16 { return oldInodeSize }
+func (sb *SuperBlockOld) InodeSize() uint16 { return OldInodeSize }
// InodesPerGroup implements SuperBlock.InodesPerGroup.
func (sb *SuperBlockOld) InodesPerGroup() uint32 { return sb.InodesPerGroupRaw }
diff --git a/pkg/sentry/fs/ext/ext.go b/pkg/sentry/fs/ext/ext.go
index 8bc591c8b..7f4287b01 100644
--- a/pkg/sentry/fs/ext/ext.go
+++ b/pkg/sentry/fs/ext/ext.go
@@ -26,21 +26,29 @@ import (
// filesystem implements vfs.FilesystemImpl.
type filesystem struct {
- // dev is the ReadSeeker for the underlying fs device and is protected by mu.
- dev io.ReadSeeker
+ // mu serializes changes to the Dentry tree and the usage of the read seeker.
+ mu sync.Mutex
- // mu synchronizes the usage of dev. The ext filesystems take locality into
- // condsideration, i.e. data blocks of a file will tend to be placed close
- // together. On a spinning disk, locality reduces the amount of movement of
- // the head hence speeding up IO operations. On an SSD there are no moving
- // parts but locality increases the size of each transer request. Hence,
- // having mutual exclusion on the read seeker while reading a file *should*
- // help in achieving the intended performance gains.
+ // dev is the ReadSeeker for the underlying fs device. It is protected by mu.
+ //
+ // The ext filesystems aim to maximize locality, i.e. place all the data
+ // blocks of a file close together. On a spinning disk, locality reduces the
+ // amount of movement of the head hence speeding up IO operations. On an SSD
+ // there are no moving parts but locality increases the size of each transer
+ // request. Hence, having mutual exclusion on the read seeker while reading a
+ // file *should* help in achieving the intended performance gains.
//
// Note: This synchronization was not coupled with the ReadSeeker itself
// because we want to synchronize across read/seek operations for the
// performance gains mentioned above. Helps enforcing one-file-at-a-time IO.
- mu sync.Mutex
+ dev io.ReadSeeker
+
+ // inodeCache maps absolute inode numbers to the corresponding Inode struct.
+ // Inodes should be removed from this once their reference count hits 0.
+ //
+ // Protected by mu because every addition and removal from this corresponds to
+ // a change in the dentry tree.
+ inodeCache map[uint32]*inode
// sb represents the filesystem superblock. Immutable after initialization.
sb disklayout.SuperBlock
@@ -52,7 +60,7 @@ type filesystem struct {
// newFilesystem is the filesystem constructor.
func newFilesystem(dev io.ReadSeeker) (*filesystem, error) {
- fs := filesystem{dev: dev}
+ fs := filesystem{dev: dev, inodeCache: make(map[uint32]*inode)}
var err error
fs.sb, err = readSuperBlock(dev)
@@ -73,3 +81,21 @@ func newFilesystem(dev io.ReadSeeker) (*filesystem, error) {
return &fs, nil
}
+
+// getOrCreateInode gets the inode corresponding to the inode number passed in.
+// It creates a new one with the given inode number if one does not exist.
+//
+// Preconditions: must be holding fs.mu.
+func (fs *filesystem) getOrCreateInode(inodeNum uint32) (*inode, error) {
+ if in, ok := fs.inodeCache[inodeNum]; ok {
+ return in, nil
+ }
+
+ in, err := newInode(fs.dev, fs.sb, fs.bgs, inodeNum)
+ if err != nil {
+ return nil, err
+ }
+
+ fs.inodeCache[inodeNum] = in
+ return in, nil
+}
diff --git a/pkg/sentry/fs/ext/extent_test.go b/pkg/sentry/fs/ext/extent_test.go
index 9e55187e5..b3f342c8e 100644
--- a/pkg/sentry/fs/ext/extent_test.go
+++ b/pkg/sentry/fs/ext/extent_test.go
@@ -146,7 +146,7 @@ func TestExtentTree(t *testing.T) {
}
opt := cmpopts.IgnoreUnexported(disklayout.ExtentIdx{}, disklayout.ExtentHeader{})
- if diff := cmp.Diff(&mockInode.root, node0, opt); diff != "" {
+ if diff := cmp.Diff(mockInode.root, node0, opt); diff != "" {
t.Errorf("extent tree mismatch (-want +got):\n%s", diff)
}
}
diff --git a/pkg/sentry/fs/ext/inode.go b/pkg/sentry/fs/ext/inode.go
index 5bf9dbfa3..df1ea0bda 100644
--- a/pkg/sentry/fs/ext/inode.go
+++ b/pkg/sentry/fs/ext/inode.go
@@ -28,12 +28,16 @@ type inode struct {
// refs is a reference count. refs is accessed using atomic memory operations.
refs int64
+ // inodeNum is the inode number of this inode on disk. This is used to
+ // identify inodes within the ext filesystem.
+ inodeNum uint32
+
// diskInode gives us access to the inode struct on disk. Immutable.
diskInode disklayout.Inode
// root is the root extent node. This lives in the 60 byte diskInode.Blocks().
- // Immutable.
- root disklayout.ExtentNode
+ // Immutable. Nil if the inode does not use extents.
+ root *disklayout.ExtentNode
}
// incRef increments the inode ref count.
@@ -54,20 +58,71 @@ func (in *inode) tryIncRef() bool {
}
}
-// decRef decrements the inode ref count.
-func (in *inode) decRef() {
- if refs := atomic.AddInt64(&in.refs, -1); refs < 0 {
+// decRef decrements the inode ref count and releases the inode resources if
+// the ref count hits 0.
+//
+// Preconditions: Must have locked fs.mu.
+func (in *inode) decRef(fs *filesystem) {
+ if refs := atomic.AddInt64(&in.refs, -1); refs == 0 {
+ delete(fs.inodeCache, in.inodeNum)
+ } else if refs < 0 {
panic("ext.inode.decRef() called without holding a reference")
}
}
+// newInode is the inode constructor. Reads the inode off disk. Identifies
+// inodes based on the absolute inode number on disk.
+//
+// Preconditions: Must hold the mutex of the filesystem containing dev.
+func newInode(dev io.ReadSeeker, sb disklayout.SuperBlock, bgs []disklayout.BlockGroup, inodeNum uint32) (*inode, error) {
+ if inodeNum == 0 {
+ panic("inode number 0 on ext filesystems is not possible")
+ }
+
+ in := &inode{refs: 1, inodeNum: inodeNum}
+ inodeRecordSize := sb.InodeSize()
+ if inodeRecordSize == disklayout.OldInodeSize {
+ in.diskInode = &disklayout.InodeOld{}
+ } else {
+ in.diskInode = &disklayout.InodeNew{}
+ }
+
+ // Calculate where the inode is actually placed.
+ inodesPerGrp := sb.InodesPerGroup()
+ blkSize := sb.BlockSize()
+ inodeTableOff := bgs[getBGNum(inodeNum, inodesPerGrp)].InodeTable() * blkSize
+ inodeOff := inodeTableOff + uint64(uint32(inodeRecordSize)*getBGOff(inodeNum, inodesPerGrp))
+
+ // Read it from disk and figure out which type of inode this is.
+ if err := readFromDisk(dev, int64(inodeOff), in.diskInode); err != nil {
+ return nil, err
+ }
+
+ if in.diskInode.Flags().Extents {
+ in.buildExtTree(dev, blkSize)
+ }
+
+ return in, nil
+}
+
+// getBGNum returns the block group number that a given inode belongs to.
+func getBGNum(inodeNum uint32, inodesPerGrp uint32) uint32 {
+ return (inodeNum - 1) / inodesPerGrp
+}
+
+// getBGOff returns the offset at which the given inode lives in the block
+// group's inode table, i.e. the index of the inode in the inode table.
+func getBGOff(inodeNum uint32, inodesPerGrp uint32) uint32 {
+ return (inodeNum - 1) % inodesPerGrp
+}
+
// buildExtTree builds the extent tree by reading it from disk by doing
// running a simple DFS. It first reads the root node from the inode struct in
// memory. Then it recursively builds the rest of the tree by reading it off
// disk.
//
// Preconditions:
-// - Must have mutual exclusion on device fd.
+// - Must hold the mutex of the filesystem containing dev.
// - Inode flag InExtents must be set.
func (in *inode) buildExtTree(dev io.ReadSeeker, blkSize uint64) error {
rootNodeData := in.diskInode.Data()
@@ -106,7 +161,7 @@ func (in *inode) buildExtTree(dev io.ReadSeeker, blkSize uint64) error {
}
}
- in.root = disklayout.ExtentNode{rootHeader, rootEntries}
+ in.root = &disklayout.ExtentNode{rootHeader, rootEntries}
return nil
}
@@ -114,7 +169,7 @@ func (in *inode) buildExtTree(dev io.ReadSeeker, blkSize uint64) error {
// builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to
// by the ExtentEntry.
//
-// Preconditions: Must have mutual exclusion on device fd.
+// Preconditions: Must hold the mutex of the filesystem containing dev.
func buildExtTreeFromDisk(dev io.ReadSeeker, entry disklayout.ExtentEntry, blkSize uint64) (*disklayout.ExtentNode, error) {
var header disklayout.ExtentHeader
off := entry.PhysicalBlock() * blkSize
diff --git a/pkg/sentry/fs/ext/utils.go b/pkg/sentry/fs/ext/utils.go
index 71e46b2c4..3472c5fa8 100644
--- a/pkg/sentry/fs/ext/utils.go
+++ b/pkg/sentry/fs/ext/utils.go
@@ -28,7 +28,7 @@ import (
// All disk reads should use this helper so we avoid reading from stale
// previously used offsets. This function forces the offset parameter.
//
-// Precondition: Must have mutual exclusion on device fd.
+// Precondition: Must hold the mutex of the filesystem containing dev.
func readFromDisk(dev io.ReadSeeker, abOff int64, v interface{}) error {
if _, err := dev.Seek(abOff, io.SeekStart); err != nil {
return syserror.EIO
@@ -45,7 +45,7 @@ func readFromDisk(dev io.ReadSeeker, abOff int64, v interface{}) error {
// device. There are three versions of the superblock. This function identifies
// and returns the correct version.
//
-// Precondition: Must have mutual exclusion on device fd.
+// Precondition: Must hold the mutex of the filesystem containing dev.
func readSuperBlock(dev io.ReadSeeker) (disklayout.SuperBlock, error) {
var sb disklayout.SuperBlock = &disklayout.SuperBlockOld{}
if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil {
@@ -87,7 +87,7 @@ func blockGroupsCount(sb disklayout.SuperBlock) uint64 {
// readBlockGroups reads the block group descriptor table from block group 0 in
// the underlying device.
//
-// Precondition: Must have mutual exclusion on device fd.
+// Precondition: Must hold the mutex of the filesystem containing dev.
func readBlockGroups(dev io.ReadSeeker, sb disklayout.SuperBlock) ([]disklayout.BlockGroup, error) {
bgCount := blockGroupsCount(sb)
bgdSize := uint64(sb.BgDescSize())