summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/fs/ext4/disklayout/BUILD8
-rw-r--r--pkg/sentry/fs/ext4/disklayout/block_group.go22
-rw-r--r--pkg/sentry/fs/ext4/disklayout/block_group_32.go4
-rw-r--r--pkg/sentry/fs/ext4/disklayout/inode.go267
-rw-r--r--pkg/sentry/fs/ext4/disklayout/superblock.go22
5 files changed, 302 insertions, 21 deletions
diff --git a/pkg/sentry/fs/ext4/disklayout/BUILD b/pkg/sentry/fs/ext4/disklayout/BUILD
index cdac63655..e3d2f8d71 100644
--- a/pkg/sentry/fs/ext4/disklayout/BUILD
+++ b/pkg/sentry/fs/ext4/disklayout/BUILD
@@ -8,6 +8,7 @@ go_library(
"block_group.go",
"block_group_32.go",
"block_group_64.go",
+ "inode.go",
"superblock.go",
"superblock_32.go",
"superblock_64.go",
@@ -15,7 +16,12 @@ go_library(
"test_utils.go",
],
importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext4/disklayout",
- deps = ["//pkg/binary"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/binary",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/time",
+ ],
)
go_test(
diff --git a/pkg/sentry/fs/ext4/disklayout/block_group.go b/pkg/sentry/fs/ext4/disklayout/block_group.go
index 7df76a036..32ea3d97d 100644
--- a/pkg/sentry/fs/ext4/disklayout/block_group.go
+++ b/pkg/sentry/fs/ext4/disklayout/block_group.go
@@ -12,26 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package disklayout provides ext4 disk level structures which can be directly
-// filled with bytes from the underlying device. All structures on disk are in
-// little-endian order. Only jbd2 (journal) structures are in big-endian order.
-// Structs aim to emulate structures `exactly` how they are layed out on disk.
-//
-// Note: All fields in these structs are exported because binary.Read would
-// panic otherwise.
package disklayout
-// BlockGroup represents Linux struct ext4_group_desc which is internally
-// called a block group descriptor. An ext4 file system is split into a series
-// of block groups. This provides an access layer to information needed to
-// access and use a block group.
+// BlockGroup represents a Linux ext block group descriptor. An ext file system
+// is split into a series of block groups. This provides an access layer to
+// information needed to access and use a block group.
//
// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#block-group-descriptors.
type BlockGroup interface {
// InodeTable returns the absolute block number of the block containing the
// inode table. This points to an array of Inode structs. Inode tables are
// statically allocated at mkfs time. The superblock records the number of
- // inodes per group (length of this table).
+ // inodes per group (length of this table) and the size of each inode struct.
InodeTable() uint64
// BlockBitmap returns the absolute block number of the block containing the
@@ -73,15 +65,15 @@ type BlockGroup interface {
// Checksum returns this block group's checksum.
//
- // If RO_COMPAT_METADATA_CSUM feature is set:
+ // If SbMetadataCsum feature is set:
// - checksum is crc32c(FS UUID + group number + group descriptor
// structure) & 0xFFFF.
//
- // If RO_COMPAT_GDT_CSUM feature is set:
+ // If SbGdtCsum feature is set:
// - checksum is crc16(FS UUID + group number + group descriptor
// structure).
//
- // RO_COMPAT_METADATA_CSUM and RO_COMPAT_GDT_CSUM should not be both set.
+ // SbMetadataCsum and SbGdtCsum should not be both set.
// If they are, Linux warns and asks to run fsck.
Checksum() uint16
diff --git a/pkg/sentry/fs/ext4/disklayout/block_group_32.go b/pkg/sentry/fs/ext4/disklayout/block_group_32.go
index 087f1fb4a..dadecb3e3 100644
--- a/pkg/sentry/fs/ext4/disklayout/block_group_32.go
+++ b/pkg/sentry/fs/ext4/disklayout/block_group_32.go
@@ -15,8 +15,8 @@
package disklayout
// BlockGroup32Bit emulates the first half of struct ext4_group_desc in
-// fs/ext4/ext4.h. It is the block group descriptor struct for 32-bit ext4
-// filesystems. It implements BlockGroup interface.
+// fs/ext4/ext4.h. It is the block group descriptor struct for ext2, ext3 and
+// 32-bit ext4 filesystems. It implements BlockGroup interface.
//
// The suffix `Lo` here stands for lower bits because this is also used in the
// 64-bit version where these fields represent the lower half of the fields.
diff --git a/pkg/sentry/fs/ext4/disklayout/inode.go b/pkg/sentry/fs/ext4/disklayout/inode.go
new file mode 100644
index 000000000..b48001910
--- /dev/null
+++ b/pkg/sentry/fs/ext4/disklayout/inode.go
@@ -0,0 +1,267 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package disklayout
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+)
+
+// The Inode interface must be implemented by structs representing ext inodes.
+// The inode stores all the metadata pertaining to the file (except for the
+// file name which is held by the directory entry). It does NOT expose all
+// fields and should be extended if need be.
+//
+// Some file systems (e.g. FAT) use the directory entry to store all this
+// information. Ext file systems do not so that they can support hard links.
+// However, ext4 cheats a little bit and duplicates the file type in the
+// directory entry for performance gains.
+//
+// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#index-nodes.
+type Inode interface {
+ // Mode returns the linux file mode which is majorly used to extract
+ // information like:
+ // - File permissions (read/write/execute by user/group/others).
+ // - Sticky, set UID and GID bits.
+ // - File type.
+ //
+ // Masks to extract this information are provided in pkg/abi/linux/file.go.
+ Mode() linux.FileMode
+
+ // UID returns the owner UID.
+ UID() auth.KUID
+
+ // GID returns the owner GID.
+ GID() auth.KGID
+
+ // Size returns the size of the file in bytes.
+ Size() uint64
+
+ // InodeSize returns the size of this inode struct in bytes.
+ // In ext2 and ext3, the inode struct and inode disk record size was fixed at
+ // 128 bytes. Ext4 makes it possible for the inode struct to be bigger.
+ // However, accessing any field beyond the 128 bytes marker must be verified
+ // using this method.
+ InodeSize() uint16
+
+ // AccessTime returns the last access time. Shows when the file was last read.
+ //
+ // If InExtendedAttr is set, then this should NOT be used because the
+ // underlying field is used to store the extended attribute value checksum.
+ AccessTime() time.Time
+
+ // ChangeTime returns the last change time. Shows when the file meta data
+ // (like permissions) was last changed.
+ //
+ // If InExtendedAttr is set, then this should NOT be used because the
+ // underlying field is used to store the lower 32 bits of the attribute
+ // value’s reference count.
+ ChangeTime() time.Time
+
+ // ModificationTime returns the last modification time. Shows when the file
+ // content was last modified.
+ //
+ // If InExtendedAttr is set, then this should NOT be used because
+ // the underlying field contains the number of the inode that owns the
+ // extended attribute.
+ ModificationTime() time.Time
+
+ // DeletionTime returns the deletion time. Inodes are marked as deleted by
+ // writing to the underlying field. FS tools can restore files until they are
+ // actually overwritten.
+ DeletionTime() time.Time
+
+ // LinksCount returns the number of hard links to this inode.
+ //
+ // Normally there is an upper limit on the number of hard links:
+ // - ext2/ext3 = 32,000
+ // - ext4 = 65,000
+ //
+ // This implies that an ext4 directory cannot have more than 64,998
+ // subdirectories because each subdirectory will have a hard link to the
+ // directory via the `..` entry. The directory has hard link via the `.` entry
+ // of its own. And finally the inode is initiated with 1 hard link (itself).
+ //
+ // The underlying value is reset to 1 if all the following hold:
+ // - Inode is a directory.
+ // - SbDirNlink is enabled.
+ // - Number of hard links is incremented past 64,999.
+ // Hard link value of 1 for a directory would indicate that the number of hard
+ // links is unknown because a directory can have minimum 2 hard links (itself
+ // and `.` entry).
+ LinksCount() uint16
+
+ // Flags returns InodeFlags which represents the inode flags.
+ Flags() InodeFlags
+
+ // Blocks returns the underlying inode.i_block array. This field is special
+ // and is used to store various kinds of things depending on the filesystem
+ // version and inode type.
+ // - In ext2/ext3, it contains the block map.
+ // - In ext4, it contains the extent tree.
+ // - For inline files, it contains the file contents.
+ // - For symlinks, it contains the link path (if it fits here).
+ //
+ // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#the-contents-of-inode-i-block.
+ Blocks() [60]byte
+}
+
+// Inode flags. This is not comprehensive and flags which were not used in
+// the Linux kernel have been excluded.
+const (
+ // InSync indicates that all writes to the file must be synchronous.
+ InSync = 0x8
+
+ // InImmutable indicates that this file is immutable.
+ InImmutable = 0x10
+
+ // InAppend indicates that this file can only be appended to.
+ InAppend = 0x20
+
+ // InNoDump indicates that teh dump(1) utility should not dump this file.
+ InNoDump = 0x40
+
+ // InNoAccessTime indicates that the access time of this inode must not be
+ // updated.
+ InNoAccessTime = 0x80
+
+ // InIndex indicates that this directory has hashed indexes.
+ InIndex = 0x1000
+
+ // InJournalData indicates that file data must always be written through a
+ // journal device.
+ InJournalData = 0x4000
+
+ // InDirSync indicates that all the directory entiry data must be written
+ // synchronously.
+ InDirSync = 0x10000
+
+ // InTopDir indicates that this inode is at the top of the directory hierarchy.
+ InTopDir = 0x20000
+
+ // InHugeFile indicates that this is a huge file.
+ InHugeFile = 0x40000
+
+ // InExtents indicates that this inode uses extents.
+ InExtents = 0x80000
+
+ // InExtendedAttr indicates that this inode stores a large extended attribute
+ // value in its data blocks.
+ InExtendedAttr = 0x200000
+
+ // InInline indicates that this inode has inline data.
+ InInline = 0x10000000
+
+ // InReserved indicates that this inode is reserved for the ext4 library.
+ InReserved = 0x80000000
+)
+
+// InodeFlags represents all possible combinations of inode flags. It aims to
+// cover the bit masks and provide a more user-friendly interface.
+type InodeFlags struct {
+ Sync bool
+ Immutable bool
+ Append bool
+ NoDump bool
+ NoAccessTime bool
+ Index bool
+ JournalData bool
+ DirSync bool
+ TopDir bool
+ HugeFile bool
+ Extents bool
+ ExtendedAttr bool
+ Inline bool
+ Reserved bool
+}
+
+// ToInt converts inode flags back to its 32-bit rep.
+func (f InodeFlags) ToInt() uint32 {
+ var res uint32
+
+ if f.Sync {
+ res |= InSync
+ }
+ if f.Immutable {
+ res |= InImmutable
+ }
+ if f.Append {
+ res |= InAppend
+ }
+ if f.NoDump {
+ res |= InNoDump
+ }
+ if f.NoAccessTime {
+ res |= InNoAccessTime
+ }
+ if f.Index {
+ res |= InIndex
+ }
+ if f.JournalData {
+ res |= InJournalData
+ }
+ if f.DirSync {
+ res |= InDirSync
+ }
+ if f.TopDir {
+ res |= InTopDir
+ }
+ if f.HugeFile {
+ res |= InHugeFile
+ }
+ if f.Extents {
+ res |= InExtents
+ }
+ if f.ExtendedAttr {
+ res |= InExtendedAttr
+ }
+ if f.Inline {
+ res |= InInline
+ }
+ if f.Reserved {
+ res |= InReserved
+ }
+
+ return res
+}
+
+// InodeFlagsFromInt converts the integer representation of inode flags to
+// a InodeFlags struct.
+func InodeFlagsFromInt(f uint32) InodeFlags {
+ return InodeFlags{
+ Sync: f&InSync > 0,
+ Immutable: f&InImmutable > 0,
+ Append: f&InAppend > 0,
+ NoDump: f&InNoDump > 0,
+ NoAccessTime: f&InNoAccessTime > 0,
+ Index: f&InIndex > 0,
+ JournalData: f&InJournalData > 0,
+ DirSync: f&InDirSync > 0,
+ TopDir: f&InTopDir > 0,
+ HugeFile: f&InHugeFile > 0,
+ Extents: f&InExtents > 0,
+ ExtendedAttr: f&InExtendedAttr > 0,
+ Inline: f&InInline > 0,
+ Reserved: f&InReserved > 0,
+ }
+}
+
+// These masks define how users can view/modify inode flags. The rest of the
+// flags are for internal kernel usage only.
+const (
+ InUserReadFlagMask = 0x4BDFFF
+ InUserWriteFlagMask = 0x4B80FF
+)
diff --git a/pkg/sentry/fs/ext4/disklayout/superblock.go b/pkg/sentry/fs/ext4/disklayout/superblock.go
index d630ba8a6..030c73410 100644
--- a/pkg/sentry/fs/ext4/disklayout/superblock.go
+++ b/pkg/sentry/fs/ext4/disklayout/superblock.go
@@ -12,9 +12,25 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// Package disklayout provides Linux ext file system's disk level structures
+// which can be directly read into from the underlying device. All structures
+// on disk are in little-endian order. Only jbd2 (journal) structures are in
+// big-endian order. Structs aim to emulate structures `exactly` how they are
+// layed out on disk.
+//
+// This library aims to be compatible with all ext(2/3/4) systems so it
+// provides a generic interface for all major structures and various
+// implementations (for different versions). The user code is responsible for
+// using appropriate implementations based on the underlying device.
+//
+// Notes:
+// - All fields in these structs are exported because binary.Read would
+// panic otherwise.
+// - All OS dependent fields in these structures will be interpretted using
+// the Linux version of that field.
package disklayout
-// SuperBlock should be implemented by structs representing ext4 superblock.
+// SuperBlock should be implemented by structs representing the ext superblock.
// The superblock holds a lot of information about the enclosing filesystem.
// This interface aims to provide access methods to important information held
// by the superblock. It does NOT expose all fields of the superblock, only the
@@ -23,11 +39,11 @@ package disklayout
// Location and replication:
// - The superblock is located at offset 1024 in block group 0.
// - Redundant copies of the superblock and group descriptors are kept in
-// all groups if sparse_super feature flag is NOT set. If it is set, the
+// all groups if SbSparse feature flag is NOT set. If it is set, the
// replicas only exist in groups whose group number is either 0 or a
// power of 3, 5, or 7.
// - There is also a sparse superblock feature v2 in which there are just
-// two replicas saved in block groups pointed by the s_backup_bgs field.
+// two replicas saved in the block groups pointed by sb.s_backup_bgs.
//
// Replicas should eventually be updated if the superblock is updated.
//