From b765eb45894ea426d2c6d167b6ceb662db6ff4d2 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Mon, 29 Jul 2019 18:32:45 -0700 Subject: ext: inode implementations. PiperOrigin-RevId: 260624470 --- pkg/sentry/fs/ext/BUILD | 24 ++++++ pkg/sentry/fs/ext/block_map_file.go | 61 ++++++++++++++ pkg/sentry/fs/ext/dentry.go | 2 + pkg/sentry/fs/ext/directory.go | 36 +++++++++ pkg/sentry/fs/ext/ext.go | 2 +- pkg/sentry/fs/ext/extent_file.go | 147 ++++++++++++++++++++++++++++++++++ pkg/sentry/fs/ext/extent_test.go | 14 +++- pkg/sentry/fs/ext/filesystem.go | 4 +- pkg/sentry/fs/ext/inline_file.go | 65 +++++++++++++++ pkg/sentry/fs/ext/inode.go | 154 ++++++++++++------------------------ pkg/sentry/fs/ext/named_pipe.go | 40 ++++++++++ pkg/sentry/fs/ext/regular_file.go | 90 +++++++++++++++++++++ pkg/sentry/fs/ext/symlink.go | 59 ++++++++++++++ pkg/sentry/fs/ext/utils.go | 7 +- 14 files changed, 587 insertions(+), 118 deletions(-) create mode 100644 pkg/sentry/fs/ext/block_map_file.go create mode 100644 pkg/sentry/fs/ext/directory.go create mode 100644 pkg/sentry/fs/ext/extent_file.go create mode 100644 pkg/sentry/fs/ext/inline_file.go create mode 100644 pkg/sentry/fs/ext/named_pipe.go create mode 100644 pkg/sentry/fs/ext/regular_file.go create mode 100644 pkg/sentry/fs/ext/symlink.go (limited to 'pkg') diff --git a/pkg/sentry/fs/ext/BUILD b/pkg/sentry/fs/ext/BUILD index 2c15875f5..60f6debaf 100644 --- a/pkg/sentry/fs/ext/BUILD +++ b/pkg/sentry/fs/ext/BUILD @@ -1,14 +1,35 @@ package(licenses = ["notice"]) load("//tools/go_stateify:defs.bzl", "go_library", "go_test") +load("//tools/go_generics:defs.bzl", "go_template_instance") + +go_template_instance( + name = "dentry_list", + out = "dentry_list.go", + package = "ext", + prefix = "dentry", + template = "//pkg/ilist:generic_list", + types = { + "Element": "*dentry", + "Linker": "*dentry", + }, +) go_library( name = "ext", srcs = [ + "block_map_file.go", "dentry.go", + "dentry_list.go", + "directory.go", "ext.go", + "extent_file.go", "filesystem.go", + "inline_file.go", "inode.go", + "named_pipe.go", + "regular_file.go", + "symlink.go", "utils.go", ], importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext", @@ -17,8 +38,11 @@ go_library( "//pkg/abi/linux", "//pkg/binary", "//pkg/sentry/context", + "//pkg/sentry/fs", "//pkg/sentry/fs/ext/disklayout", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/pipe", + "//pkg/sentry/usermem", "//pkg/sentry/vfs", "//pkg/syserror", ], diff --git a/pkg/sentry/fs/ext/block_map_file.go b/pkg/sentry/fs/ext/block_map_file.go new file mode 100644 index 000000000..eb0b35e36 --- /dev/null +++ b/pkg/sentry/fs/ext/block_map_file.go @@ -0,0 +1,61 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ext + +import ( + "io" + + "gvisor.dev/gvisor/pkg/binary" +) + +// blockMapFile is a type of regular file which uses direct/indirect block +// addressing to store file data. This was deprecated in ext4. +type blockMapFile struct { + regFile regularFile + + // fileToPhysBlks maps the file block numbers to the physical block numbers. + // the physical block number for the (i)th file block is stored in the (i)th + // index. This is initialized (at max) with the first 12 entries. The rest + // have to be read in from disk when required. + fileToPhysBlks []uint32 +} + +// Compiles only if blockMapFile implements fileReader. +var _ fileReader = (*blockMapFile)(nil) + +// Read implements fileReader.getFileReader. +func (f *blockMapFile) getFileReader(dev io.ReadSeeker, blkSize uint64, offset uint64) io.Reader { + panic("unimplemented") +} + +// newBlockMapFile is the blockMapFile constructor. It initializes the file to +// physical blocks map with (at most) the first 12 (direct) blocks. +func newBlockMapFile(blkSize uint64, regFile regularFile) (*blockMapFile, error) { + file := &blockMapFile{regFile: regFile} + file.regFile.impl = file + + toFill := uint64(12) + blksUsed := regFile.blksUsed(blkSize) + if blksUsed < toFill { + toFill = blksUsed + } + + blkMap := regFile.inode.diskInode.Data() + file.fileToPhysBlks = make([]uint32, toFill) + for i := uint64(0); i < toFill; i++ { + binary.Unmarshal(blkMap[i*4:(i+1)*4], binary.LittleEndian, &file.fileToPhysBlks[i]) + } + return file, nil +} diff --git a/pkg/sentry/fs/ext/dentry.go b/pkg/sentry/fs/ext/dentry.go index 054fb42b6..19c9b3b2d 100644 --- a/pkg/sentry/fs/ext/dentry.go +++ b/pkg/sentry/fs/ext/dentry.go @@ -26,6 +26,8 @@ type dentry struct { // share a single non-directory Inode (with hard links). inode is // immutable. inode *inode + // dentryEntry links Dentries into their parent directory.childList. + dentryEntry } // Compiles only if dentry implements vfs.DentryImpl. diff --git a/pkg/sentry/fs/ext/directory.go b/pkg/sentry/fs/ext/directory.go new file mode 100644 index 000000000..ab2b59e44 --- /dev/null +++ b/pkg/sentry/fs/ext/directory.go @@ -0,0 +1,36 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ext + +// directory represents a directory inode. It holds the childList in memory. +type directory struct { + inode inode + + // childList is a list containing (1) child Dentries and (2) fake Dentries + // (with inode == nil) that represent the iteration position of + // directoryFDs. childList is used to support directoryFD.IterDirents() + // efficiently. childList is immutable. + childList dentryList + + // TODO(b/134676337): Add directory navigators. +} + +// newDirectroy is the directory constructor. +func newDirectroy(inode inode) *directory { + // TODO(b/134676337): initialize childList. + file := &directory{inode: inode} + file.inode.impl = file + return file +} diff --git a/pkg/sentry/fs/ext/ext.go b/pkg/sentry/fs/ext/ext.go index 10e235fb1..2380f15da 100644 --- a/pkg/sentry/fs/ext/ext.go +++ b/pkg/sentry/fs/ext/ext.go @@ -88,7 +88,7 @@ func (fstype filesystemType) NewFilesystem(ctx context.Context, creds *auth.Cred return nil, nil, err } - rootInode, err := fs.getOrCreateInode(disklayout.RootDirInode) + rootInode, err := fs.getOrCreateInode(ctx, disklayout.RootDirInode) if err != nil { return nil, nil, err } diff --git a/pkg/sentry/fs/ext/extent_file.go b/pkg/sentry/fs/ext/extent_file.go new file mode 100644 index 000000000..86583881d --- /dev/null +++ b/pkg/sentry/fs/ext/extent_file.go @@ -0,0 +1,147 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ext + +import ( + "io" + + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" + "gvisor.dev/gvisor/pkg/syserror" +) + +// extentFile is a type of regular file which uses extents to store file data. +type extentFile struct { + regFile regularFile + + // root is the root extent node. This lives in the 60 byte diskInode.Data(). + // Immutable. + root disklayout.ExtentNode +} + +// Compiles only if extentFile implements fileReader. +var _ fileReader = (*extentFile)(nil) + +// Read implements fileReader.getFileReader. +func (f *extentFile) getFileReader(dev io.ReadSeeker, blkSize uint64, offset uint64) io.Reader { + panic("unimplemented") +} + +// newExtentFile is the extent file constructor. It reads the entire extent +// tree into memory. +// +// Preconditions: Must hold the mutex of the filesystem containing dev. +// TODO(b/134676337): Build extent tree on demand to reduce memory usage. +func newExtentFile(dev io.ReadSeeker, blkSize uint64, regFile regularFile) (*extentFile, error) { + file := &extentFile{regFile: regFile} + file.regFile.impl = file + err := file.buildExtTree(dev, blkSize) + if err != nil { + return nil, err + } + return file, nil +} + +// buildExtTree builds the extent tree by reading it from disk by doing +// running a simple DFS. It first reads the root node from the inode struct in +// memory. Then it recursively builds the rest of the tree by reading it off +// disk. +// +// Preconditions: +// - Must hold the mutex of the filesystem containing dev. +// - Inode flag InExtents must be set. +func (f *extentFile) buildExtTree(dev io.ReadSeeker, blkSize uint64) error { + rootNodeData := f.regFile.inode.diskInode.Data() + + binary.Unmarshal(rootNodeData[:disklayout.ExtentStructsSize], binary.LittleEndian, &f.root.Header) + + // Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries. + if f.root.Header.NumEntries > 4 { + // read(2) specifies that EINVAL should be returned if the file is unsuitable + // for reading. + return syserror.EINVAL + } + + f.root.Entries = make([]disklayout.ExtentEntryPair, f.root.Header.NumEntries) + for i, off := uint16(0), disklayout.ExtentStructsSize; i < f.root.Header.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize { + var curEntry disklayout.ExtentEntry + if f.root.Header.Height == 0 { + // Leaf node. + curEntry = &disklayout.Extent{} + } else { + // Internal node. + curEntry = &disklayout.ExtentIdx{} + } + binary.Unmarshal(rootNodeData[off:off+disklayout.ExtentStructsSize], binary.LittleEndian, curEntry) + f.root.Entries[i].Entry = curEntry + } + + // If this node is internal, perform DFS. + if f.root.Header.Height > 0 { + for i := uint16(0); i < f.root.Header.NumEntries; i++ { + var err error + if f.root.Entries[i].Node, err = buildExtTreeFromDisk(dev, f.root.Entries[i].Entry, blkSize); err != nil { + return err + } + } + } + + return nil +} + +// buildExtTreeFromDisk reads the extent tree nodes from disk and recursively +// builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to +// by the ExtentEntry. +// +// Preconditions: Must hold the mutex of the filesystem containing dev. +func buildExtTreeFromDisk(dev io.ReadSeeker, entry disklayout.ExtentEntry, blkSize uint64) (*disklayout.ExtentNode, error) { + var header disklayout.ExtentHeader + off := entry.PhysicalBlock() * blkSize + err := readFromDisk(dev, int64(off), &header) + if err != nil { + return nil, err + } + + entries := make([]disklayout.ExtentEntryPair, header.NumEntries) + for i, off := uint16(0), off+disklayout.ExtentStructsSize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize { + var curEntry disklayout.ExtentEntry + if header.Height == 0 { + // Leaf node. + curEntry = &disklayout.Extent{} + } else { + // Internal node. + curEntry = &disklayout.ExtentIdx{} + } + + err := readFromDisk(dev, int64(off), curEntry) + if err != nil { + return nil, err + } + entries[i].Entry = curEntry + } + + // If this node is internal, perform DFS. + if header.Height > 0 { + for i := uint16(0); i < header.NumEntries; i++ { + var err error + entries[i].Node, err = buildExtTreeFromDisk(dev, entries[i].Entry, blkSize) + if err != nil { + return nil, err + } + } + } + + return &disklayout.ExtentNode{header, entries}, nil +} diff --git a/pkg/sentry/fs/ext/extent_test.go b/pkg/sentry/fs/ext/extent_test.go index b3f342c8e..01251d0a7 100644 --- a/pkg/sentry/fs/ext/extent_test.go +++ b/pkg/sentry/fs/ext/extent_test.go @@ -47,7 +47,13 @@ import ( func TestExtentTree(t *testing.T) { blkSize := uint64(64) // No block has more than 1 header + 4 entries. mockDisk := make([]byte, blkSize*10) - mockInode := &inode{diskInode: &disklayout.InodeNew{}} + mockExtentFile := extentFile{ + regFile: regularFile{ + inode: inode{ + diskInode: &disklayout.InodeNew{}, + }, + }, + } node3 := &disklayout.ExtentNode{ Header: disklayout.ExtentHeader{ @@ -138,15 +144,15 @@ func TestExtentTree(t *testing.T) { }, } - writeTree(mockInode, mockDisk, node0, blkSize) + writeTree(&mockExtentFile.regFile.inode, mockDisk, node0, blkSize) r := bytes.NewReader(mockDisk) - if err := mockInode.buildExtTree(r, blkSize); err != nil { + if err := mockExtentFile.buildExtTree(r, blkSize); err != nil { t.Fatalf("inode.buildExtTree failed: %v", err) } opt := cmpopts.IgnoreUnexported(disklayout.ExtentIdx{}, disklayout.ExtentHeader{}) - if diff := cmp.Diff(mockInode.root, node0, opt); diff != "" { + if diff := cmp.Diff(&mockExtentFile.root, node0, opt); diff != "" { t.Errorf("extent tree mismatch (-want +got):\n%s", diff) } } diff --git a/pkg/sentry/fs/ext/filesystem.go b/pkg/sentry/fs/ext/filesystem.go index 7150e75a5..32ca11026 100644 --- a/pkg/sentry/fs/ext/filesystem.go +++ b/pkg/sentry/fs/ext/filesystem.go @@ -70,12 +70,12 @@ var _ vfs.FilesystemImpl = (*filesystem)(nil) // It creates a new one with the given inode number if one does not exist. // // Preconditions: must be holding fs.mu. -func (fs *filesystem) getOrCreateInode(inodeNum uint32) (*inode, error) { +func (fs *filesystem) getOrCreateInode(ctx context.Context, inodeNum uint32) (*inode, error) { if in, ok := fs.inodeCache[inodeNum]; ok { return in, nil } - in, err := newInode(fs.dev, fs.sb, fs.bgs, inodeNum) + in, err := newInode(ctx, fs.dev, fs.sb, fs.bgs, inodeNum) if err != nil { return nil, err } diff --git a/pkg/sentry/fs/ext/inline_file.go b/pkg/sentry/fs/ext/inline_file.go new file mode 100644 index 000000000..dd93ee2e1 --- /dev/null +++ b/pkg/sentry/fs/ext/inline_file.go @@ -0,0 +1,65 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ext + +import ( + "io" +) + +// inlineFile is a type of regular file. All the data here is stored in the +// inode.Data() array. +type inlineFile struct { + regFile regularFile +} + +// Compiles only if inlineFile implements fileReader. +var _ fileReader = (*inlineFile)(nil) + +// getFileReader implements fileReader.getFileReader. +func (f *inlineFile) getFileReader(_ io.ReadSeeker, _ uint64, offset uint64) io.Reader { + diskInode := f.regFile.inode.diskInode + return &inlineReader{offset: offset, data: diskInode.Data()[:diskInode.Size()]} +} + +// newInlineFile is the inlineFile constructor. +func newInlineFile(regFile regularFile) *inlineFile { + file := &inlineFile{regFile: regFile} + file.regFile.impl = file + return file +} + +// inlineReader implements io.Reader which can read the underlying data. +type inlineReader struct { + offset uint64 + data []byte +} + +// Compiles only if inlineReader implements io.Reader. +var _ io.Reader = (*inlineReader)(nil) + +// Read implements io.Reader.Read. +func (r *inlineReader) Read(dst []byte) (int, error) { + if len(dst) == 0 { + return 0, nil + } + + if int(r.offset) >= len(r.data) { + return 0, io.EOF + } + + n := copy(dst, r.data[r.offset:]) + r.offset += uint64(n) + return n, nil +} diff --git a/pkg/sentry/fs/ext/inode.go b/pkg/sentry/fs/ext/inode.go index df1ea0bda..7d2a445fb 100644 --- a/pkg/sentry/fs/ext/inode.go +++ b/pkg/sentry/fs/ext/inode.go @@ -18,12 +18,26 @@ import ( "io" "sync/atomic" - "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout" "gvisor.dev/gvisor/pkg/syserror" ) // inode represents an ext inode. +// +// inode uses the same inheritance pattern that pkg/sentry/vfs structures use. +// This has been done to increase memory locality. +// +// Implementations: +// inode -- +// |-- pipe +// |-- dir +// |-- symlink +// |-- regular-- +// |-- extent file +// |-- block map file +// |-- inline file type inode struct { // refs is a reference count. refs is accessed using atomic memory operations. refs int64 @@ -35,9 +49,9 @@ type inode struct { // diskInode gives us access to the inode struct on disk. Immutable. diskInode disklayout.Inode - // root is the root extent node. This lives in the 60 byte diskInode.Blocks(). - // Immutable. Nil if the inode does not use extents. - root *disklayout.ExtentNode + // This is immutable. The first field of the implementations must have inode + // as the first field to ensure temporality. + impl interface{} } // incRef increments the inode ref count. @@ -74,17 +88,17 @@ func (in *inode) decRef(fs *filesystem) { // inodes based on the absolute inode number on disk. // // Preconditions: Must hold the mutex of the filesystem containing dev. -func newInode(dev io.ReadSeeker, sb disklayout.SuperBlock, bgs []disklayout.BlockGroup, inodeNum uint32) (*inode, error) { +func newInode(ctx context.Context, dev io.ReadSeeker, sb disklayout.SuperBlock, bgs []disklayout.BlockGroup, inodeNum uint32) (*inode, error) { if inodeNum == 0 { panic("inode number 0 on ext filesystems is not possible") } - in := &inode{refs: 1, inodeNum: inodeNum} inodeRecordSize := sb.InodeSize() + var diskInode disklayout.Inode if inodeRecordSize == disklayout.OldInodeSize { - in.diskInode = &disklayout.InodeOld{} + diskInode = &disklayout.InodeOld{} } else { - in.diskInode = &disklayout.InodeNew{} + diskInode = &disklayout.InodeNew{} } // Calculate where the inode is actually placed. @@ -93,16 +107,38 @@ func newInode(dev io.ReadSeeker, sb disklayout.SuperBlock, bgs []disklayout.Bloc inodeTableOff := bgs[getBGNum(inodeNum, inodesPerGrp)].InodeTable() * blkSize inodeOff := inodeTableOff + uint64(uint32(inodeRecordSize)*getBGOff(inodeNum, inodesPerGrp)) - // Read it from disk and figure out which type of inode this is. - if err := readFromDisk(dev, int64(inodeOff), in.diskInode); err != nil { + if err := readFromDisk(dev, int64(inodeOff), diskInode); err != nil { return nil, err } - if in.diskInode.Flags().Extents { - in.buildExtTree(dev, blkSize) + // Build the inode based on its type. + inode := inode{ + refs: 1, + inodeNum: inodeNum, + diskInode: diskInode, } - return in, nil + switch diskInode.Mode().FileType() { + case linux.ModeSymlink: + f, err := newSymlink(dev, blkSize, inode) + if err != nil { + return nil, err + } + return &f.inode, nil + case linux.ModeRegular: + f, err := newRegularFile(dev, blkSize, inode) + if err != nil { + return nil, err + } + return &f.inode, nil + case linux.ModeDirectory: + return &newDirectroy(inode).inode, nil + case linux.ModeNamedPipe: + return &newNamedPipe(ctx, inode).inode, nil + default: + // TODO(b/134676337): Return appropriate errors for sockets and devices. + return nil, syserror.EINVAL + } } // getBGNum returns the block group number that a given inode belongs to. @@ -115,95 +151,3 @@ func getBGNum(inodeNum uint32, inodesPerGrp uint32) uint32 { func getBGOff(inodeNum uint32, inodesPerGrp uint32) uint32 { return (inodeNum - 1) % inodesPerGrp } - -// buildExtTree builds the extent tree by reading it from disk by doing -// running a simple DFS. It first reads the root node from the inode struct in -// memory. Then it recursively builds the rest of the tree by reading it off -// disk. -// -// Preconditions: -// - Must hold the mutex of the filesystem containing dev. -// - Inode flag InExtents must be set. -func (in *inode) buildExtTree(dev io.ReadSeeker, blkSize uint64) error { - rootNodeData := in.diskInode.Data() - - var rootHeader disklayout.ExtentHeader - binary.Unmarshal(rootNodeData[:disklayout.ExtentStructsSize], binary.LittleEndian, &rootHeader) - - // Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries. - if rootHeader.NumEntries > 4 { - // read(2) specifies that EINVAL should be returned if the file is unsuitable - // for reading. - return syserror.EINVAL - } - - rootEntries := make([]disklayout.ExtentEntryPair, rootHeader.NumEntries) - for i, off := uint16(0), disklayout.ExtentStructsSize; i < rootHeader.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize { - var curEntry disklayout.ExtentEntry - if rootHeader.Height == 0 { - // Leaf node. - curEntry = &disklayout.Extent{} - } else { - // Internal node. - curEntry = &disklayout.ExtentIdx{} - } - binary.Unmarshal(rootNodeData[off:off+disklayout.ExtentStructsSize], binary.LittleEndian, curEntry) - rootEntries[i].Entry = curEntry - } - - // If this node is internal, perform DFS. - if rootHeader.Height > 0 { - for i := uint16(0); i < rootHeader.NumEntries; i++ { - var err error - if rootEntries[i].Node, err = buildExtTreeFromDisk(dev, rootEntries[i].Entry, blkSize); err != nil { - return err - } - } - } - - in.root = &disklayout.ExtentNode{rootHeader, rootEntries} - return nil -} - -// buildExtTreeFromDisk reads the extent tree nodes from disk and recursively -// builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to -// by the ExtentEntry. -// -// Preconditions: Must hold the mutex of the filesystem containing dev. -func buildExtTreeFromDisk(dev io.ReadSeeker, entry disklayout.ExtentEntry, blkSize uint64) (*disklayout.ExtentNode, error) { - var header disklayout.ExtentHeader - off := entry.PhysicalBlock() * blkSize - if err := readFromDisk(dev, int64(off), &header); err != nil { - return nil, err - } - - entries := make([]disklayout.ExtentEntryPair, header.NumEntries) - for i, off := uint16(0), off+disklayout.ExtentStructsSize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize { - var curEntry disklayout.ExtentEntry - if header.Height == 0 { - // Leaf node. - curEntry = &disklayout.Extent{} - } else { - // Internal node. - curEntry = &disklayout.ExtentIdx{} - } - - if err := readFromDisk(dev, int64(off), curEntry); err != nil { - return nil, err - } - entries[i].Entry = curEntry - } - - // If this node is internal, perform DFS. - if header.Height > 0 { - for i := uint16(0); i < header.NumEntries; i++ { - var err error - entries[i].Node, err = buildExtTreeFromDisk(dev, entries[i].Entry, blkSize) - if err != nil { - return nil, err - } - } - } - - return &disklayout.ExtentNode{header, entries}, nil -} diff --git a/pkg/sentry/fs/ext/named_pipe.go b/pkg/sentry/fs/ext/named_pipe.go new file mode 100644 index 000000000..0f3af1b53 --- /dev/null +++ b/pkg/sentry/fs/ext/named_pipe.go @@ -0,0 +1,40 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ext + +import ( + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" + "gvisor.dev/gvisor/pkg/sentry/usermem" +) + +// namedPipe represents a named pipe inode. It is currently just a wrapper +// around pkg/sentry/kernel/pipe. +type namedPipe struct { + inode inode + + p *pipe.Pipe + inodeOps fs.InodeOperations +} + +// newNamedPipe is the namedPipe constructor. +func newNamedPipe(ctx context.Context, inode inode) *namedPipe { + file := &namedPipe{inode: inode} + file.inode.impl = file + file.p = pipe.NewPipe(ctx, true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize) + file.inodeOps = pipe.NewInodeOperations(ctx, fs.FilePermsFromMode(file.inode.diskInode.Mode()), file.p) + return file +} diff --git a/pkg/sentry/fs/ext/regular_file.go b/pkg/sentry/fs/ext/regular_file.go new file mode 100644 index 000000000..9bf39acfe --- /dev/null +++ b/pkg/sentry/fs/ext/regular_file.go @@ -0,0 +1,90 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ext + +import ( + "io" +) + +// fileReader is used to abstact away the complexity of how the file data is +// stored under the hood. Provides a method to get a file reader which can be +// used to read file data without worrying about how it is organized on disk. +type fileReader interface { + + // getFileReader returns a Reader implementation which can be used to read a + // file. It abstracts away the complexity of how the file is actually + // organized on disk. The reader is initialized with the passed offset. + // + // This reader is not meant to be retained across Read operations as it needs + // to be reinitialized with the correct offset for every Read. + // + // Precondition: Must hold the mutex of the filesystem containing dev while + // using the Reader. + getFileReader(dev io.ReadSeeker, blkSize uint64, offset uint64) io.Reader +} + +// regularFile represents a regular file's inode. This too follows the +// inheritance pattern prevelant in the vfs layer described in +// pkg/sentry/vfs/README.md. +type regularFile struct { + inode inode + + // This is immutable. The first field of fileReader implementations must be + // regularFile to ensure temporality. + impl fileReader +} + +// newRegularFile is the regularFile constructor. It figures out what kind of +// file this is and initializes the fileReader. +// +// Preconditions: Must hold the mutex of the filesystem containing dev. +func newRegularFile(dev io.ReadSeeker, blkSize uint64, inode inode) (*regularFile, error) { + regFile := regularFile{ + inode: inode, + } + + inodeFlags := inode.diskInode.Flags() + + if inodeFlags.Extents { + file, err := newExtentFile(dev, blkSize, regFile) + if err != nil { + return nil, err + } + + file.regFile.inode.impl = &file.regFile + return &file.regFile, nil + } + + if inodeFlags.Inline { + if inode.diskInode.Size() > 60 { + panic("ext fs: inline file larger than 60 bytes") + } + + file := newInlineFile(regFile) + file.regFile.inode.impl = &file.regFile + return &file.regFile, nil + } + + file, err := newBlockMapFile(blkSize, regFile) + if err != nil { + return nil, err + } + file.regFile.inode.impl = &file.regFile + return &file.regFile, nil +} + +func (f *regularFile) blksUsed(blkSize uint64) uint64 { + return (f.inode.diskInode.Size() + blkSize - 1) / blkSize +} diff --git a/pkg/sentry/fs/ext/symlink.go b/pkg/sentry/fs/ext/symlink.go new file mode 100644 index 000000000..0ed67c0e4 --- /dev/null +++ b/pkg/sentry/fs/ext/symlink.go @@ -0,0 +1,59 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ext + +import ( + "io" + + "gvisor.dev/gvisor/pkg/syserror" +) + +// symlink represents a symlink inode. +type symlink struct { + inode inode + target string // immutable +} + +// newSymlink is the symlink constructor. It reads out the symlink target from +// the inode (however it might have been stored). +// +// Preconditions: Must hold the mutex of the filesystem containing dev. +func newSymlink(dev io.ReadSeeker, blkSize uint64, inode inode) (*symlink, error) { + var file *symlink + var link []byte + + // If the symlink target is lesser than 60 bytes, its stores in inode.Data(). + // Otherwise either extents or block maps will be used to store the link. + size := inode.diskInode.Size() + if size < 60 { + link = inode.diskInode.Data()[:size] + } else { + // Create a regular file out of this inode and read out the target. + regFile, err := newRegularFile(dev, blkSize, inode) + if err != nil { + return nil, err + } + + link = make([]byte, size) + reader := regFile.impl.getFileReader(dev, blkSize, 0) + if _, err := io.ReadFull(reader, link); err != nil { + return nil, syserror.EIO + } + } + + file = &symlink{inode: inode, target: string(link)} + file.inode.impl = file + return file, nil +} diff --git a/pkg/sentry/fs/ext/utils.go b/pkg/sentry/fs/ext/utils.go index 3472c5fa8..8790c7778 100644 --- a/pkg/sentry/fs/ext/utils.go +++ b/pkg/sentry/fs/ext/utils.go @@ -76,12 +76,7 @@ func blockGroupsCount(sb disklayout.SuperBlock) uint64 { blocksPerGroup := uint64(sb.BlocksPerGroup()) // Round up the result. float64 can compromise precision so do it manually. - bgCount := blocksCount / blocksPerGroup - if blocksCount%blocksPerGroup != 0 { - bgCount++ - } - - return bgCount + return (blocksCount + blocksPerGroup - 1) / blocksPerGroup } // readBlockGroups reads the block group descriptor table from block group 0 in -- cgit v1.2.3