diff options
Diffstat (limited to 'pkg/sentry/fs/ext4/disklayout')
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/BUILD | 29 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/block_group.go | 135 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/block_group_32.go | 75 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/block_group_64.go | 93 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/block_group_test.go | 26 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/superblock.go | 468 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/superblock_32.go | 76 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/superblock_64.go | 94 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/superblock_old.go | 108 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/superblock_test.go | 27 | ||||
-rw-r--r-- | pkg/sentry/fs/ext4/disklayout/test_utils.go | 30 |
11 files changed, 1161 insertions, 0 deletions
diff --git a/pkg/sentry/fs/ext4/disklayout/BUILD b/pkg/sentry/fs/ext4/disklayout/BUILD new file mode 100644 index 000000000..cdac63655 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/BUILD @@ -0,0 +1,29 @@ +package(licenses = ["notice"]) + +load("//tools/go_stateify:defs.bzl", "go_library", "go_test") + +go_library( + name = "disklayout", + srcs = [ + "block_group.go", + "block_group_32.go", + "block_group_64.go", + "superblock.go", + "superblock_32.go", + "superblock_64.go", + "superblock_old.go", + "test_utils.go", + ], + importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext4/disklayout", + deps = ["//pkg/binary"], +) + +go_test( + name = "disklayout_test", + size = "small", + srcs = [ + "block_group_test.go", + "superblock_test.go", + ], + embed = [":disklayout"], +) diff --git a/pkg/sentry/fs/ext4/disklayout/block_group.go b/pkg/sentry/fs/ext4/disklayout/block_group.go new file mode 100644 index 000000000..7df76a036 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/block_group.go @@ -0,0 +1,135 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package disklayout provides ext4 disk level structures which can be directly +// filled with bytes from the underlying device. All structures on disk are in +// little-endian order. Only jbd2 (journal) structures are in big-endian order. +// Structs aim to emulate structures `exactly` how they are layed out on disk. +// +// Note: All fields in these structs are exported because binary.Read would +// panic otherwise. +package disklayout + +// BlockGroup represents Linux struct ext4_group_desc which is internally +// called a block group descriptor. An ext4 file system is split into a series +// of block groups. This provides an access layer to information needed to +// access and use a block group. +// +// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#block-group-descriptors. +type BlockGroup interface { + // InodeTable returns the absolute block number of the block containing the + // inode table. This points to an array of Inode structs. Inode tables are + // statically allocated at mkfs time. The superblock records the number of + // inodes per group (length of this table). + InodeTable() uint64 + + // BlockBitmap returns the absolute block number of the block containing the + // block bitmap. This bitmap tracks the usage of data blocks within this block + // group and has its own checksum. + BlockBitmap() uint64 + + // InodeBitmap returns the absolute block number of the block containing the + // inode bitmap. This bitmap tracks the usage of this group's inode table + // entries and has its own checksum. + InodeBitmap() uint64 + + // ExclusionBitmap returns the absolute block number of the snapshot exclusion + // bitmap. + ExclusionBitmap() uint64 + + // FreeBlocksCount returns the number of free blocks in the group. + FreeBlocksCount() uint32 + + // FreeInodesCount returns the number of free inodes in the group. + FreeInodesCount() uint32 + + // DirectoryCount returns the number of inodes that represent directories + // under this block group. + DirectoryCount() uint32 + + // UnusedInodeCount returns the number of unused inodes beyond the last used + // inode in this group's inode table. As a result, we needn’t scan past the + // (InodesPerGroup - UnusedInodeCount())th entry in the inode table. + UnusedInodeCount() uint32 + + // BlockBitmapChecksum returns the block bitmap checksum. This is calculated + // using crc32c(FS UUID + group number + entire bitmap). + BlockBitmapChecksum() uint32 + + // InodeBitmapChecksum returns the inode bitmap checksum. This is calculated + // using crc32c(FS UUID + group number + entire bitmap). + InodeBitmapChecksum() uint32 + + // Checksum returns this block group's checksum. + // + // If RO_COMPAT_METADATA_CSUM feature is set: + // - checksum is crc32c(FS UUID + group number + group descriptor + // structure) & 0xFFFF. + // + // If RO_COMPAT_GDT_CSUM feature is set: + // - checksum is crc16(FS UUID + group number + group descriptor + // structure). + // + // RO_COMPAT_METADATA_CSUM and RO_COMPAT_GDT_CSUM should not be both set. + // If they are, Linux warns and asks to run fsck. + Checksum() uint16 + + // Flags returns BGFlags which represents the block group flags. + Flags() BGFlags +} + +// These are the different block group flags. +const ( + // BgInodeUninit indicates that inode table and bitmap are not initialized. + BgInodeUninit uint16 = 0x1 + + // BgBlockUninit indicates that block bitmap is not initialized. + BgBlockUninit uint16 = 0x2 + + // BgInodeZeroed indicates that inode table is zeroed. + BgInodeZeroed uint16 = 0x4 +) + +// BGFlags represents all the different combinations of block group flags. +type BGFlags struct { + InodeUninit bool + BlockUninit bool + InodeZeroed bool +} + +// ToInt converts a BGFlags struct back to its 16-bit representation. +func (f BGFlags) ToInt() uint16 { + var res uint16 + + if f.InodeUninit { + res |= BgInodeUninit + } + if f.BlockUninit { + res |= BgBlockUninit + } + if f.InodeZeroed { + res |= BgInodeZeroed + } + + return res +} + +// BGFlagsFromInt converts the 16-bit flag representation to a BGFlags struct. +func BGFlagsFromInt(flags uint16) BGFlags { + return BGFlags{ + InodeUninit: flags&BgInodeUninit > 0, + BlockUninit: flags&BgBlockUninit > 0, + InodeZeroed: flags&BgInodeZeroed > 0, + } +} diff --git a/pkg/sentry/fs/ext4/disklayout/block_group_32.go b/pkg/sentry/fs/ext4/disklayout/block_group_32.go new file mode 100644 index 000000000..087f1fb4a --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/block_group_32.go @@ -0,0 +1,75 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// BlockGroup32Bit emulates the first half of struct ext4_group_desc in +// fs/ext4/ext4.h. It is the block group descriptor struct for 32-bit ext4 +// filesystems. It implements BlockGroup interface. +// +// The suffix `Lo` here stands for lower bits because this is also used in the +// 64-bit version where these fields represent the lower half of the fields. +// The suffix `Raw` has been added to indicate that the field does not have a +// counterpart in the 64-bit version and to resolve name collision with the +// interface. +type BlockGroup32Bit struct { + BlockBitmapLo uint32 + InodeBitmapLo uint32 + InodeTableLo uint32 + FreeBlocksCountLo uint16 + FreeInodesCountLo uint16 + UsedDirsCountLo uint16 + FlagsRaw uint16 + ExcludeBitmapLo uint32 + BlockBitmapChecksumLo uint16 + InodeBitmapChecksumLo uint16 + ItableUnusedLo uint16 + ChecksumRaw uint16 +} + +// InodeTable implements BlockGroup.InodeTable. +func (bg *BlockGroup32Bit) InodeTable() uint64 { return uint64(bg.InodeTableLo) } + +// BlockBitmap implements BlockGroup.BlockBitmap. +func (bg *BlockGroup32Bit) BlockBitmap() uint64 { return uint64(bg.BlockBitmapLo) } + +// InodeBitmap implements BlockGroup.InodeBitmap. +func (bg *BlockGroup32Bit) InodeBitmap() uint64 { return uint64(bg.InodeBitmapLo) } + +// ExclusionBitmap implements BlockGroup.ExclusionBitmap. +func (bg *BlockGroup32Bit) ExclusionBitmap() uint64 { return uint64(bg.ExcludeBitmapLo) } + +// FreeBlocksCount implements BlockGroup.FreeBlocksCount. +func (bg *BlockGroup32Bit) FreeBlocksCount() uint32 { return uint32(bg.FreeBlocksCountLo) } + +// FreeInodesCount implements BlockGroup.FreeInodesCount. +func (bg *BlockGroup32Bit) FreeInodesCount() uint32 { return uint32(bg.FreeInodesCountLo) } + +// DirectoryCount implements BlockGroup.DirectoryCount. +func (bg *BlockGroup32Bit) DirectoryCount() uint32 { return uint32(bg.UsedDirsCountLo) } + +// UnusedInodeCount implements BlockGroup.UnusedInodeCount. +func (bg *BlockGroup32Bit) UnusedInodeCount() uint32 { return uint32(bg.ItableUnusedLo) } + +// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum. +func (bg *BlockGroup32Bit) BlockBitmapChecksum() uint32 { return uint32(bg.BlockBitmapChecksumLo) } + +// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum. +func (bg *BlockGroup32Bit) InodeBitmapChecksum() uint32 { return uint32(bg.InodeBitmapChecksumLo) } + +// Checksum implements BlockGroup.Checksum. +func (bg *BlockGroup32Bit) Checksum() uint16 { return bg.ChecksumRaw } + +// Flags implements BlockGroup.Flags. +func (bg *BlockGroup32Bit) Flags() BGFlags { return BGFlagsFromInt(bg.FlagsRaw) } diff --git a/pkg/sentry/fs/ext4/disklayout/block_group_64.go b/pkg/sentry/fs/ext4/disklayout/block_group_64.go new file mode 100644 index 000000000..27de3990d --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/block_group_64.go @@ -0,0 +1,93 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// BlockGroup64Bit emulates struct ext4_group_desc in fs/ext4/ext4.h. +// It is the block group descriptor struct for 64-bit ext4 filesystems. +// It implements BlockGroup interface. It is an extension of the 32-bit +// version of BlockGroup. +// +// The suffix `Hi` here stands for upper bits because they represent the upper +// half of the fields. +type BlockGroup64Bit struct { + // We embed the 32-bit struct here because 64-bit version is just an extension + // of the 32-bit version. + BlockGroup32Bit + + // 64-bit specific fields. + BlockBitmapHi uint32 + InodeBitmapHi uint32 + InodeTableHi uint32 + FreeBlocksCountHi uint16 + FreeInodesCountHi uint16 + UsedDirsCountHi uint16 + ItableUnusedHi uint16 + ExcludeBitmapHi uint32 + BlockBitmapChecksumHi uint16 + InodeBitmapChecksumHi uint16 + _ uint32 // Padding to 64 bytes. +} + +// Methods to override. Checksum() and Flags() are not overridden. + +// InodeTable implements BlockGroup.InodeTable. +func (bg *BlockGroup64Bit) InodeTable() uint64 { + return (uint64(bg.InodeTableHi) << 32) | uint64(bg.InodeTableLo) +} + +// BlockBitmap implements BlockGroup.BlockBitmap. +func (bg *BlockGroup64Bit) BlockBitmap() uint64 { + return (uint64(bg.BlockBitmapHi) << 32) | uint64(bg.BlockBitmapLo) +} + +// InodeBitmap implements BlockGroup.InodeBitmap. +func (bg *BlockGroup64Bit) InodeBitmap() uint64 { + return (uint64(bg.InodeBitmapHi) << 32) | uint64(bg.InodeBitmapLo) +} + +// ExclusionBitmap implements BlockGroup.ExclusionBitmap. +func (bg *BlockGroup64Bit) ExclusionBitmap() uint64 { + return (uint64(bg.ExcludeBitmapHi) << 32) | uint64(bg.ExcludeBitmapLo) +} + +// FreeBlocksCount implements BlockGroup.FreeBlocksCount. +func (bg *BlockGroup64Bit) FreeBlocksCount() uint32 { + return (uint32(bg.FreeBlocksCountHi) << 16) | uint32(bg.FreeBlocksCountLo) +} + +// FreeInodesCount implements BlockGroup.FreeInodesCount. +func (bg *BlockGroup64Bit) FreeInodesCount() uint32 { + return (uint32(bg.FreeInodesCountHi) << 16) | uint32(bg.FreeInodesCountLo) +} + +// DirectoryCount implements BlockGroup.DirectoryCount. +func (bg *BlockGroup64Bit) DirectoryCount() uint32 { + return (uint32(bg.UsedDirsCountHi) << 16) | uint32(bg.UsedDirsCountLo) +} + +// UnusedInodeCount implements BlockGroup.UnusedInodeCount. +func (bg *BlockGroup64Bit) UnusedInodeCount() uint32 { + return (uint32(bg.ItableUnusedHi) << 16) | uint32(bg.ItableUnusedLo) +} + +// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum. +func (bg *BlockGroup64Bit) BlockBitmapChecksum() uint32 { + return (uint32(bg.BlockBitmapChecksumHi) << 16) | uint32(bg.BlockBitmapChecksumLo) +} + +// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum. +func (bg *BlockGroup64Bit) InodeBitmapChecksum() uint32 { + return (uint32(bg.InodeBitmapChecksumHi) << 16) | uint32(bg.InodeBitmapChecksumLo) +} diff --git a/pkg/sentry/fs/ext4/disklayout/block_group_test.go b/pkg/sentry/fs/ext4/disklayout/block_group_test.go new file mode 100644 index 000000000..0ef4294c0 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/block_group_test.go @@ -0,0 +1,26 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +import ( + "testing" +) + +// TestBlockGroupSize tests that the block group descriptor structs are of the +// correct size. +func TestBlockGroupSize(t *testing.T) { + assertSize(t, BlockGroup32Bit{}, 32) + assertSize(t, BlockGroup64Bit{}, 64) +} diff --git a/pkg/sentry/fs/ext4/disklayout/superblock.go b/pkg/sentry/fs/ext4/disklayout/superblock.go new file mode 100644 index 000000000..d630ba8a6 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/superblock.go @@ -0,0 +1,468 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// SuperBlock should be implemented by structs representing ext4 superblock. +// The superblock holds a lot of information about the enclosing filesystem. +// This interface aims to provide access methods to important information held +// by the superblock. It does NOT expose all fields of the superblock, only the +// ones necessary. This can be expanded when need be. +// +// Location and replication: +// - The superblock is located at offset 1024 in block group 0. +// - Redundant copies of the superblock and group descriptors are kept in +// all groups if sparse_super feature flag is NOT set. If it is set, the +// replicas only exist in groups whose group number is either 0 or a +// power of 3, 5, or 7. +// - There is also a sparse superblock feature v2 in which there are just +// two replicas saved in block groups pointed by the s_backup_bgs field. +// +// Replicas should eventually be updated if the superblock is updated. +// +// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#super-block. +type SuperBlock interface { + // InodesCount returns the total number of inodes in this filesystem. + InodesCount() uint32 + + // BlocksCount returns the total number of data blocks in this filesystem. + BlocksCount() uint64 + + // FreeBlocksCount returns the number of free blocks in this filesystem. + FreeBlocksCount() uint64 + + // FreeInodesCount returns the number of free inodes in this filesystem. + FreeInodesCount() uint32 + + // MountCount returns the number of mounts since the last fsck. + MountCount() uint16 + + // MaxMountCount returns the number of mounts allowed beyond which a fsck is + // needed. + MaxMountCount() uint16 + + // FirstDataBlock returns the absolute block number of the first data block, + // which contains the super block itself. + // + // If the filesystem has 1kb data blocks then this should return 1. For all + // other configurations, this typically returns 0. + // + // The first block group descriptor is in (FirstDataBlock() + 1)th block. + FirstDataBlock() uint32 + + // BlockSize returns the size of one data block in this filesystem. + // This can be calculated by 2^(10 + sb.s_log_block_size). This ensures that + // the smallest block size is 1kb. + BlockSize() uint64 + + // BlocksPerGroup returns the number of data blocks in a block group. + BlocksPerGroup() uint32 + + // ClusterSize returns block cluster size (set during mkfs time by admin). + // This can be calculated by 2^(10 + sb.s_log_cluster_size). This ensures that + // the smallest cluster size is 1kb. + // + // sb.s_log_cluster_size must equal sb.s_log_block_size if bigalloc feature + // is NOT set and consequently BlockSize() = ClusterSize() in that case. + ClusterSize() uint64 + + // ClustersPerGroup returns: + // - number of clusters per group if bigalloc is enabled. + // - BlocksPerGroup() otherwise. + ClustersPerGroup() uint32 + + // InodeSize returns the size of the inode disk record size in bytes. Use this + // to iterate over inode arrays on disk. + // + // In ext2 and ext3: + // - Each inode had a disk record of 128 bytes. + // - The inode struct size was fixed at 128 bytes. + // + // In ext4 its possible to allocate larger on-disk inodes: + // - Inode disk record size = sb.s_inode_size (function return value). + // = 256 (default) + // - Inode struct size = 128 + inode.i_extra_isize. + // = 128 + 28 = 156 (default) + InodeSize() uint16 + + // InodesPerGroup returns the number of inodes in a block group. + InodesPerGroup() uint32 + + // BgDescSize returns the size of the block group descriptor struct. + // + // In ext2, ext3, ext4 (without 64-bit feature), the block group descriptor + // is only 32 bytes long. + // In ext4 with 64-bit feature, the block group descriptor expands to AT LEAST + // 64 bytes. It might be bigger than that. + BgDescSize() uint16 + + // CompatibleFeatures returns the CompatFeatures struct which holds all the + // compatible features this fs supports. + CompatibleFeatures() CompatFeatures + + // IncompatibleFeatures returns the CompatFeatures struct which holds all the + // incompatible features this fs supports. + IncompatibleFeatures() IncompatFeatures + + // ReadOnlyCompatibleFeatures returns the CompatFeatures struct which holds all the + // readonly compatible features this fs supports. + ReadOnlyCompatibleFeatures() RoCompatFeatures + + // Magic() returns the magic signature which must be 0xef53. + Magic() uint16 + + // Revision returns the superblock revision. Superblock struct fields from + // offset 0x54 till 0x150 should only be used if superblock has DynamicRev. + Revision() SbRevision +} + +// SbRevision is the type for superblock revisions. +type SbRevision int + +// Super block revisions. +const ( + // OldRev is the good old (original) format. + OldRev SbRevision = 0 + + // DynamicRev is v2 format w/ dynamic inode sizes. + DynamicRev SbRevision = 1 +) + +// Superblock compatible features. +// This is not exhaustive, unused features are not listed. +const ( + // SbDirPrealloc indicates directory preallocation. + SbDirPrealloc = 0x1 + + // SbHasJournal indicates the presence of a journal. jbd2 should only work + // with this being set. + SbHasJournal = 0x4 + + // SbExtAttr indicates extended attributes support. + SbExtAttr = 0x8 + + // SbResizeInode indicates that the fs has reserved GDT blocks (right after + // group descriptors) for fs expansion. + SbResizeInode = 0x10 + + // SbDirIndex indicates that the fs has directory indices. + SbDirIndex = 0x20 + + // SbSparseV2 stands for Sparse superblock version 2. + SbSparseV2 = 0x200 +) + +// CompatFeatures represents a superblock's compatible feature set. If the +// kernel does not understand any of these feature, it can still read/write +// to this fs. +type CompatFeatures struct { + DirPrealloc bool + HasJournal bool + ExtAttr bool + ResizeInode bool + DirIndex bool + SparseV2 bool +} + +// ToInt converts superblock compatible features back to its 32-bit rep. +func (f CompatFeatures) ToInt() uint32 { + var res uint32 + + if f.DirPrealloc { + res |= SbDirPrealloc + } + if f.HasJournal { + res |= SbHasJournal + } + if f.ExtAttr { + res |= SbExtAttr + } + if f.ResizeInode { + res |= SbResizeInode + } + if f.DirIndex { + res |= SbDirIndex + } + if f.SparseV2 { + res |= SbSparseV2 + } + + return res +} + +// CompatFeaturesFromInt converts the integer representation of superblock +// compatible features to CompatFeatures struct. +func CompatFeaturesFromInt(f uint32) CompatFeatures { + return CompatFeatures{ + DirPrealloc: f&SbDirPrealloc > 0, + HasJournal: f&SbHasJournal > 0, + ExtAttr: f&SbExtAttr > 0, + ResizeInode: f&SbResizeInode > 0, + DirIndex: f&SbDirIndex > 0, + SparseV2: f&SbSparseV2 > 0, + } +} + +// Superblock incompatible features. +// This is not exhaustive, unused features are not listed. +const ( + // SbDirentFileType indicates that directory entries record the file type. + // We should use struct ext4_dir_entry_2 for dirents then. + SbDirentFileType = 0x2 + + // SbRecovery indicates that the filesystem needs recovery. + SbRecovery = 0x4 + + // SbJournalDev indicates that the filesystem has a separate journal device. + SbJournalDev = 0x8 + + // SbMetaBG indicates that the filesystem is using Meta block groups. Moves + // the group descriptors from the congested first block group into the first + // group of each metablock group to increase the maximum block groups limit + // and hence support much larger filesystems. + // + // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#meta-block-groups. + SbMetaBG = 0x10 + + // SbExtents indicates that the filesystem uses extents. Must be set in ext4 + // filesystems. + SbExtents = 0x40 + + // SbIs64Bit indicates that this filesystem addresses blocks with 64-bits. + // Hence can support 2^64 data blocks. + SbIs64Bit = 0x80 + + // SbMMP indicates that this filesystem has multiple mount protection. + // + // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#multiple-mount-protection. + SbMMP = 0x100 + + // SbFlexBg indicates that this filesystem has flexible block groups. Several + // block groups are tied into one logical block group so that all the metadata + // for the block groups (bitmaps and inode tables) are close together for + // faster loading. Consequently, large files will be continuous on disk. + // However, this does not affect the placement of redundant superblocks and + // group descriptors. + // + // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#flexible-block-groups. + SbFlexBg = 0x200 + + // SbLargeDir shows that large directory enabled. Directory htree can be 3 + // levels deep. Directory htrees are allowed to be 2 levels deep otherwise. + SbLargeDir = 0x4000 + + // SbInlineData allows inline data in inodes for really small files. + SbInlineData = 0x8000 + + // SbEncrypted indicates that this fs contains encrypted inodes. + SbEncrypted = 0x10000 +) + +// IncompatFeatures represents a superblock's incompatible feature set. If the +// kernel does not understand any of these feature, it should refuse to mount. +type IncompatFeatures struct { + DirentFileType bool + Recovery bool + JournalDev bool + MetaBG bool + Extents bool + Is64Bit bool + MMP bool + FlexBg bool + LargeDir bool + InlineData bool + Encrypted bool +} + +// ToInt converts superblock incompatible features back to its 32-bit rep. +func (f IncompatFeatures) ToInt() uint32 { + var res uint32 + + if f.DirentFileType { + res |= SbDirentFileType + } + if f.Recovery { + res |= SbRecovery + } + if f.JournalDev { + res |= SbJournalDev + } + if f.MetaBG { + res |= SbMetaBG + } + if f.Extents { + res |= SbExtents + } + if f.Is64Bit { + res |= SbIs64Bit + } + if f.MMP { + res |= SbMMP + } + if f.FlexBg { + res |= SbFlexBg + } + if f.LargeDir { + res |= SbLargeDir + } + if f.InlineData { + res |= SbInlineData + } + if f.Encrypted { + res |= SbEncrypted + } + + return res +} + +// IncompatFeaturesFromInt converts the integer representation of superblock +// incompatible features to IncompatFeatures struct. +func IncompatFeaturesFromInt(f uint32) IncompatFeatures { + return IncompatFeatures{ + DirentFileType: f&SbDirentFileType > 0, + Recovery: f&SbRecovery > 0, + JournalDev: f&SbJournalDev > 0, + MetaBG: f&SbMetaBG > 0, + Extents: f&SbExtents > 0, + Is64Bit: f&SbIs64Bit > 0, + MMP: f&SbMMP > 0, + FlexBg: f&SbFlexBg > 0, + LargeDir: f&SbLargeDir > 0, + InlineData: f&SbInlineData > 0, + Encrypted: f&SbEncrypted > 0, + } +} + +// Superblock readonly compatible features. +// This is not exhaustive, unused features are not listed. +const ( + // SbSparse indicates sparse superblocks. Only groups with number either 0 or + // a power of 3, 5, or 7 will have redundant copies of the superblock and + // block descriptors. + SbSparse = 0x1 + + // SbLargeFile indicates that this fs has been used to store a file >= 2GiB. + SbLargeFile = 0x2 + + // SbHugeFile indicates that this fs contains files whose sizes are + // represented in units of logicals blocks, not 512-byte sectors. + SbHugeFile = 0x8 + + // SbGdtCsum indicates that group descriptors have checksums. + SbGdtCsum = 0x10 + + // SbDirNlink indicates that the new subdirectory limit is 64,999. Ext3 has a + // 32,000 subdirectory limit. + SbDirNlink = 0x20 + + // SbExtraIsize indicates that large inodes exist on this filesystem. + SbExtraIsize = 0x40 + + // SbHasSnapshot indicates the existence of a snapshot. + SbHasSnapshot = 0x80 + + // SbQuota enables usage tracking for all quota types. + SbQuota = 0x100 + + // SbBigalloc maps to the bigalloc feature. When set, the minimum allocation + // unit becomes a cluster rather than a data block. Then block bitmaps track + // clusters, not data blocks. + // + // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#bigalloc. + SbBigalloc = 0x200 + + // SbMetadataCsum indicates that the fs supports metadata checksumming. + SbMetadataCsum = 0x400 + + // SbReadOnly marks this filesystem as readonly. Should refuse to mount in + // read/write mode. + SbReadOnly = 0x1000 +) + +// RoCompatFeatures represents a superblock's readonly compatible feature set. +// If the kernel does not understand any of these feature, it can still mount +// readonly. But if the user wants to mount read/write, the kernel should +// refuse to mount. +type RoCompatFeatures struct { + Sparse bool + LargeFile bool + HugeFile bool + GdtCsum bool + DirNlink bool + ExtraIsize bool + HasSnapshot bool + Quota bool + Bigalloc bool + MetadataCsum bool + ReadOnly bool +} + +// ToInt converts superblock readonly compatible features to its 32-bit rep. +func (f RoCompatFeatures) ToInt() uint32 { + var res uint32 + + if f.Sparse { + res |= SbSparse + } + if f.LargeFile { + res |= SbLargeFile + } + if f.HugeFile { + res |= SbHugeFile + } + if f.GdtCsum { + res |= SbGdtCsum + } + if f.DirNlink { + res |= SbDirNlink + } + if f.ExtraIsize { + res |= SbExtraIsize + } + if f.HasSnapshot { + res |= SbHasSnapshot + } + if f.Quota { + res |= SbQuota + } + if f.Bigalloc { + res |= SbBigalloc + } + if f.MetadataCsum { + res |= SbMetadataCsum + } + if f.ReadOnly { + res |= SbReadOnly + } + + return res +} + +// RoCompatFeaturesFromInt converts the integer representation of superblock +// readonly compatible features to RoCompatFeatures struct. +func RoCompatFeaturesFromInt(f uint32) RoCompatFeatures { + return RoCompatFeatures{ + Sparse: f&SbSparse > 0, + LargeFile: f&SbLargeFile > 0, + HugeFile: f&SbHugeFile > 0, + GdtCsum: f&SbGdtCsum > 0, + DirNlink: f&SbDirNlink > 0, + ExtraIsize: f&SbExtraIsize > 0, + HasSnapshot: f&SbHasSnapshot > 0, + Quota: f&SbQuota > 0, + Bigalloc: f&SbBigalloc > 0, + MetadataCsum: f&SbMetadataCsum > 0, + ReadOnly: f&SbReadOnly > 0, + } +} diff --git a/pkg/sentry/fs/ext4/disklayout/superblock_32.go b/pkg/sentry/fs/ext4/disklayout/superblock_32.go new file mode 100644 index 000000000..4c3233eed --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/superblock_32.go @@ -0,0 +1,76 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// SuperBlock32Bit implements SuperBlock and represents the 32-bit version of +// the ext4_super_block struct in fs/ext4/ext4.h. +// +// The suffix `Raw` has been added to indicate that the field does not have a +// counterpart in the 64-bit version and to resolve name collision with the +// interface. +type SuperBlock32Bit struct { + // We embed the old superblock struct here because the 32-bit version is just + // an extension of the old version. + SuperBlockOld + + FirstInode uint32 + InodeSizeRaw uint16 + BlockGroupNumber uint16 + FeatureCompat uint32 + FeatureIncompat uint32 + FeatureRoCompat uint32 + UUID [16]byte + VolumeName [16]byte + LastMounted [64]byte + AlgoUsageBitmap uint32 + PreallocBlocks uint8 + PreallocDirBlocks uint8 + ReservedGdtBlocks uint16 + JournalUUID [16]byte + JournalInum uint32 + JournalDev uint32 + LastOrphan uint32 + HashSeed [4]uint32 + DefaultHashVersion uint8 + JnlBackupType uint8 + BgDescSizeRaw uint16 + DefaultMountOpts uint32 + FirstMetaBg uint32 + MkfsTime uint32 + JnlBlocks [17]uint32 +} + +// Only override methods which change based on the additional fields above. +// Not overriding SuperBlock.BgDescSize because it would still return 32 here. + +// InodeSize implements SuperBlock.InodeSize. +func (sb *SuperBlock32Bit) InodeSize() uint16 { + return sb.InodeSizeRaw +} + +// CompatibleFeatures implements SuperBlock.CompatibleFeatures. +func (sb *SuperBlock32Bit) CompatibleFeatures() CompatFeatures { + return CompatFeaturesFromInt(sb.FeatureCompat) +} + +// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures. +func (sb *SuperBlock32Bit) IncompatibleFeatures() IncompatFeatures { + return IncompatFeaturesFromInt(sb.FeatureIncompat) +} + +// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures. +func (sb *SuperBlock32Bit) ReadOnlyCompatibleFeatures() RoCompatFeatures { + return RoCompatFeaturesFromInt(sb.FeatureRoCompat) +} diff --git a/pkg/sentry/fs/ext4/disklayout/superblock_64.go b/pkg/sentry/fs/ext4/disklayout/superblock_64.go new file mode 100644 index 000000000..2e945a7c7 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/superblock_64.go @@ -0,0 +1,94 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// SuperBlock64Bit implements SuperBlock and represents the 64-bit version of +// the ext4_super_block struct in fs/ext4/ext4.h. This sums up to be exactly +// 1024 bytes (smallest possible block size) and hence the superblock always +// fits in no more than one data block. +// +// The suffix `Hi` here stands for upper bits because they represent the upper +// half of the fields. +type SuperBlock64Bit struct { + // We embed the 32-bit struct here because 64-bit version is just an extension + // of the 32-bit version. + SuperBlock32Bit + + BlocksCountHi uint32 + ReservedBlocksCountHi uint32 + FreeBlocksCountHi uint32 + MinInodeSize uint16 + WantInodeSize uint16 + Flags uint32 + RaidStride uint16 + MmpInterval uint16 + MmpBlock uint64 + RaidStripeWidth uint32 + LogGroupsPerFlex uint8 + ChecksumType uint8 + _ uint16 + KbytesWritten uint64 + SnapshotInum uint32 + SnapshotID uint32 + SnapshotRsrvBlocksCount uint64 + SnapshotList uint32 + ErrorCount uint32 + FirstErrorTime uint32 + FirstErrorInode uint32 + FirstErrorBlock uint64 + FirstErrorFunction [32]byte + FirstErrorLine uint32 + LastErrorTime uint32 + LastErrorInode uint32 + LastErrorLine uint32 + LastErrorBlock uint64 + LastErrorFunction [32]byte + MountOpts [64]byte + UserQuotaInum uint32 + GroupQuotaInum uint32 + OverheadBlocks uint32 + BackupBgs [2]uint32 + EncryptAlgos [4]uint8 + EncryptPwSalt [16]uint8 + LostFoundInode uint32 + ProjectQuotaInode uint32 + ChecksumSeed uint32 + WtimeHi uint8 + MtimeHi uint8 + MkfsTimeHi uint8 + LastCheckHi uint8 + FirstErrorTimeHi uint8 + LastErrorTimeHi uint8 + _ [2]uint8 + Encoding uint16 + EncodingFlags uint16 + _ [95]uint32 + Checksum uint32 +} + +// Only override methods which change based on the 64-bit feature. + +// BlocksCount implements SuperBlock.BlocksCount. +func (sb *SuperBlock64Bit) BlocksCount() uint64 { + return (uint64(sb.BlocksCountHi) << 32) | uint64(sb.BlocksCountLo) +} + +// FreeBlocksCount implements SuperBlock.FreeBlocksCount. +func (sb *SuperBlock64Bit) FreeBlocksCount() uint64 { + return (uint64(sb.FreeBlocksCountHi) << 32) | uint64(sb.FreeBlocksCountLo) +} + +// BgDescSize implements SuperBlock.BgDescSize. +func (sb *SuperBlock64Bit) BgDescSize() uint16 { return sb.BgDescSizeRaw } diff --git a/pkg/sentry/fs/ext4/disklayout/superblock_old.go b/pkg/sentry/fs/ext4/disklayout/superblock_old.go new file mode 100644 index 000000000..1f7425ba3 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/superblock_old.go @@ -0,0 +1,108 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +// SuperBlockOld implements SuperBlock and represents the old version of the +// superblock struct in ext2 and ext3 systems. +// +// The suffix `Lo` here stands for lower bits because this is also used in the +// 64-bit version where these fields represent the lower half of the fields. +// The suffix `Raw` has been added to indicate that the field does not have a +// counterpart in the 64-bit version and to resolve name collision with the +// interface. +type SuperBlockOld struct { + InodesCountRaw uint32 + BlocksCountLo uint32 + ReservedBlocksCount uint32 + FreeBlocksCountLo uint32 + FreeInodesCountRaw uint32 + FirstDataBlockRaw uint32 + LogBlockSize uint32 + LogClusterSize uint32 + BlocksPerGroupRaw uint32 + ClustersPerGroupRaw uint32 + InodesPerGroupRaw uint32 + Mtime uint32 + Wtime uint32 + MountCountRaw uint16 + MaxMountCountRaw uint16 + MagicRaw uint16 + State uint16 + Errors uint16 + MinorRevLevel uint16 + LastCheck uint32 + CheckInterval uint32 + CreatorOS uint32 + RevLevel uint32 + DefResUID uint16 + DefResGID uint16 +} + +// InodesCount implements SuperBlock.InodesCount. +func (sb *SuperBlockOld) InodesCount() uint32 { return sb.InodesCountRaw } + +// BlocksCount implements SuperBlock.BlocksCount. +func (sb *SuperBlockOld) BlocksCount() uint64 { return uint64(sb.BlocksCountLo) } + +// FreeBlocksCount implements SuperBlock.FreeBlocksCount. +func (sb *SuperBlockOld) FreeBlocksCount() uint64 { return uint64(sb.FreeBlocksCountLo) } + +// FreeInodesCount implements SuperBlock.FreeInodesCount. +func (sb *SuperBlockOld) FreeInodesCount() uint32 { return sb.FreeInodesCountRaw } + +// MountCount implements SuperBlock.MountCount. +func (sb *SuperBlockOld) MountCount() uint16 { return sb.MountCountRaw } + +// MaxMountCount implements SuperBlock.MaxMountCount. +func (sb *SuperBlockOld) MaxMountCount() uint16 { return sb.MaxMountCountRaw } + +// FirstDataBlock implements SuperBlock.FirstDataBlock. +func (sb *SuperBlockOld) FirstDataBlock() uint32 { return sb.FirstDataBlockRaw } + +// BlockSize implements SuperBlock.BlockSize. +func (sb *SuperBlockOld) BlockSize() uint64 { return 1 << (10 + sb.LogBlockSize) } + +// BlocksPerGroup implements SuperBlock.BlocksPerGroup. +func (sb *SuperBlockOld) BlocksPerGroup() uint32 { return sb.BlocksPerGroupRaw } + +// ClusterSize implements SuperBlock.ClusterSize. +func (sb *SuperBlockOld) ClusterSize() uint64 { return 1 << (10 + sb.LogClusterSize) } + +// ClustersPerGroup implements SuperBlock.ClustersPerGroup. +func (sb *SuperBlockOld) ClustersPerGroup() uint32 { return sb.ClustersPerGroupRaw } + +// InodeSize implements SuperBlock.InodeSize. +func (sb *SuperBlockOld) InodeSize() uint16 { return 128 } + +// InodesPerGroup implements SuperBlock.InodesPerGroup. +func (sb *SuperBlockOld) InodesPerGroup() uint32 { return sb.InodesPerGroupRaw } + +// BgDescSize implements SuperBlock.BgDescSize. +func (sb *SuperBlockOld) BgDescSize() uint16 { return 32 } + +// CompatibleFeatures implements SuperBlock.CompatibleFeatures. +func (sb *SuperBlockOld) CompatibleFeatures() CompatFeatures { return CompatFeatures{} } + +// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures. +func (sb *SuperBlockOld) IncompatibleFeatures() IncompatFeatures { return IncompatFeatures{} } + +// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures. +func (sb *SuperBlockOld) ReadOnlyCompatibleFeatures() RoCompatFeatures { return RoCompatFeatures{} } + +// Magic implements SuperBlock.Magic. +func (sb *SuperBlockOld) Magic() uint16 { return sb.MagicRaw } + +// Revision implements SuperBlock.Revision. +func (sb *SuperBlockOld) Revision() SbRevision { return SbRevision(sb.RevLevel) } diff --git a/pkg/sentry/fs/ext4/disklayout/superblock_test.go b/pkg/sentry/fs/ext4/disklayout/superblock_test.go new file mode 100644 index 000000000..463b5ba21 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/superblock_test.go @@ -0,0 +1,27 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +import ( + "testing" +) + +// TestSuperBlockSize tests that the superblock structs are of the correct +// size. +func TestSuperBlockSize(t *testing.T) { + assertSize(t, SuperBlockOld{}, 84) + assertSize(t, SuperBlock32Bit{}, 336) + assertSize(t, SuperBlock64Bit{}, 1024) +} diff --git a/pkg/sentry/fs/ext4/disklayout/test_utils.go b/pkg/sentry/fs/ext4/disklayout/test_utils.go new file mode 100644 index 000000000..9c63f04c0 --- /dev/null +++ b/pkg/sentry/fs/ext4/disklayout/test_utils.go @@ -0,0 +1,30 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package disklayout + +import ( + "reflect" + "testing" + + "gvisor.dev/gvisor/pkg/binary" +) + +func assertSize(t *testing.T, v interface{}, want uintptr) { + t.Helper() + + if got := binary.Size(v); got != want { + t.Errorf("struct %s should be exactly %d bytes but is %d bytes", reflect.TypeOf(v).Name(), want, got) + } +} |