summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fs')
-rw-r--r--pkg/sentry/fs/attr.go44
-rw-r--r--pkg/sentry/fs/context.go24
-rw-r--r--pkg/sentry/fs/ext/BUILD54
-rw-r--r--pkg/sentry/fs/ext/assets/README.md36
-rw-r--r--pkg/sentry/fs/ext/assets/bigfile.txt41
-rw-r--r--pkg/sentry/fs/ext/assets/file.txt1
l---------pkg/sentry/fs/ext/assets/symlink.txt1
-rw-r--r--pkg/sentry/fs/ext/assets/tiny.ext2bin65536 -> 0 bytes
-rw-r--r--pkg/sentry/fs/ext/assets/tiny.ext3bin65536 -> 0 bytes
-rw-r--r--pkg/sentry/fs/ext/assets/tiny.ext4bin65536 -> 0 bytes
-rw-r--r--pkg/sentry/fs/ext/dentry.go56
-rw-r--r--pkg/sentry/fs/ext/disklayout/BUILD48
-rw-r--r--pkg/sentry/fs/ext/disklayout/block_group.go137
-rw-r--r--pkg/sentry/fs/ext/disklayout/block_group_32.go72
-rw-r--r--pkg/sentry/fs/ext/disklayout/block_group_64.go93
-rw-r--r--pkg/sentry/fs/ext/disklayout/block_group_test.go26
-rw-r--r--pkg/sentry/fs/ext/disklayout/dirent.go69
-rw-r--r--pkg/sentry/fs/ext/disklayout/dirent_new.go61
-rw-r--r--pkg/sentry/fs/ext/disklayout/dirent_old.go49
-rw-r--r--pkg/sentry/fs/ext/disklayout/dirent_test.go28
-rw-r--r--pkg/sentry/fs/ext/disklayout/disklayout.go50
-rw-r--r--pkg/sentry/fs/ext/disklayout/extent.go139
-rw-r--r--pkg/sentry/fs/ext/disklayout/extent_test.go27
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode.go274
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_new.go96
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_old.go117
-rw-r--r--pkg/sentry/fs/ext/disklayout/inode_test.go222
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock.go471
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock_32.go76
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock_64.go95
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock_old.go105
-rw-r--r--pkg/sentry/fs/ext/disklayout/superblock_test.go27
-rw-r--r--pkg/sentry/fs/ext/disklayout/test_utils.go30
-rw-r--r--pkg/sentry/fs/ext/ext.go97
-rw-r--r--pkg/sentry/fs/ext/ext_test.go407
-rw-r--r--pkg/sentry/fs/ext/extent_test.go185
-rw-r--r--pkg/sentry/fs/ext/filesystem.go137
-rw-r--r--pkg/sentry/fs/ext/inode.go209
-rw-r--r--pkg/sentry/fs/ext/utils.go109
-rw-r--r--pkg/sentry/fs/fdpipe/pipe.go2
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go111
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached_test.go10
-rw-r--r--pkg/sentry/fs/gofer/fs.go22
-rw-r--r--pkg/sentry/fs/gofer/session.go27
-rw-r--r--pkg/sentry/fs/host/inode.go6
-rw-r--r--pkg/sentry/fs/host/socket.go8
-rw-r--r--pkg/sentry/fs/host/socket_iovec.go18
-rw-r--r--pkg/sentry/fs/host/socket_unsafe.go9
-rw-r--r--pkg/sentry/fs/mounts.go16
-rw-r--r--pkg/sentry/fs/proc/net.go4
-rw-r--r--pkg/sentry/fs/ramfs/dir.go23
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go6
-rw-r--r--pkg/sentry/fs/tty/BUILD1
-rw-r--r--pkg/sentry/fs/tty/dir.go3
-rw-r--r--pkg/sentry/fs/tty/master.go17
-rw-r--r--pkg/sentry/fs/tty/slave.go13
-rw-r--r--pkg/sentry/fs/tty/terminal.go92
57 files changed, 361 insertions, 3740 deletions
diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go
index 9fc6a5bc2..4f3d6410e 100644
--- a/pkg/sentry/fs/attr.go
+++ b/pkg/sentry/fs/attr.go
@@ -111,6 +111,50 @@ func (n InodeType) LinuxType() uint32 {
}
}
+// ToDirentType converts an InodeType to a linux dirent type field.
+func ToDirentType(nodeType InodeType) uint8 {
+ switch nodeType {
+ case RegularFile, SpecialFile:
+ return linux.DT_REG
+ case Symlink:
+ return linux.DT_LNK
+ case Directory, SpecialDirectory:
+ return linux.DT_DIR
+ case Pipe:
+ return linux.DT_FIFO
+ case CharacterDevice:
+ return linux.DT_CHR
+ case BlockDevice:
+ return linux.DT_BLK
+ case Socket:
+ return linux.DT_SOCK
+ default:
+ return linux.DT_UNKNOWN
+ }
+}
+
+// ToInodeType coverts a linux file type to InodeType.
+func ToInodeType(linuxFileType linux.FileMode) InodeType {
+ switch linuxFileType {
+ case linux.ModeRegular:
+ return RegularFile
+ case linux.ModeDirectory:
+ return Directory
+ case linux.ModeSymlink:
+ return Symlink
+ case linux.ModeNamedPipe:
+ return Pipe
+ case linux.ModeCharacterDevice:
+ return CharacterDevice
+ case linux.ModeBlockDevice:
+ return BlockDevice
+ case linux.ModeSocket:
+ return Socket
+ default:
+ panic(fmt.Sprintf("unknown file mode: %d", linuxFileType))
+ }
+}
+
// StableAttr contains Inode attributes that will be stable throughout the
// lifetime of the Inode.
//
diff --git a/pkg/sentry/fs/context.go b/pkg/sentry/fs/context.go
index 51b4c7ee1..dd427de5d 100644
--- a/pkg/sentry/fs/context.go
+++ b/pkg/sentry/fs/context.go
@@ -112,3 +112,27 @@ func DirentCacheLimiterFromContext(ctx context.Context) *DirentCacheLimiter {
}
return nil
}
+
+type rootContext struct {
+ context.Context
+ root *Dirent
+}
+
+// WithRoot returns a copy of ctx with the given root.
+func WithRoot(ctx context.Context, root *Dirent) context.Context {
+ return &rootContext{
+ Context: ctx,
+ root: root,
+ }
+}
+
+// Value implements Context.Value.
+func (rc rootContext) Value(key interface{}) interface{} {
+ switch key {
+ case CtxRoot:
+ rc.root.IncRef()
+ return rc.root
+ default:
+ return rc.Context.Value(key)
+ }
+}
diff --git a/pkg/sentry/fs/ext/BUILD b/pkg/sentry/fs/ext/BUILD
deleted file mode 100644
index 2c15875f5..000000000
--- a/pkg/sentry/fs/ext/BUILD
+++ /dev/null
@@ -1,54 +0,0 @@
-package(licenses = ["notice"])
-
-load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
-
-go_library(
- name = "ext",
- srcs = [
- "dentry.go",
- "ext.go",
- "filesystem.go",
- "inode.go",
- "utils.go",
- ],
- importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext",
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/binary",
- "//pkg/sentry/context",
- "//pkg/sentry/fs/ext/disklayout",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/vfs",
- "//pkg/syserror",
- ],
-)
-
-go_test(
- name = "ext_test",
- size = "small",
- srcs = [
- "ext_test.go",
- "extent_test.go",
- ],
- data = [
- "//pkg/sentry/fs/ext:assets/bigfile.txt",
- "//pkg/sentry/fs/ext:assets/file.txt",
- "//pkg/sentry/fs/ext:assets/tiny.ext2",
- "//pkg/sentry/fs/ext:assets/tiny.ext3",
- "//pkg/sentry/fs/ext:assets/tiny.ext4",
- ],
- embed = [":ext"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/binary",
- "//pkg/sentry/context",
- "//pkg/sentry/context/contexttest",
- "//pkg/sentry/fs/ext/disklayout",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/vfs",
- "//runsc/test/testutil",
- "@com_github_google_go-cmp//cmp:go_default_library",
- "@com_github_google_go-cmp//cmp/cmpopts:go_default_library",
- ],
-)
diff --git a/pkg/sentry/fs/ext/assets/README.md b/pkg/sentry/fs/ext/assets/README.md
deleted file mode 100644
index 6f1e81b3a..000000000
--- a/pkg/sentry/fs/ext/assets/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-### Tiny Ext(2/3/4) Images
-
-The images are of size 64Kb which supports 64 1k blocks and 16 inodes. This is
-the smallest size mkfs.ext(2/3/4) works with.
-
-These images were generated using the following commands.
-
-```bash
-fallocate -l 64K tiny.ext$VERSION
-mkfs.ext$VERSION -j tiny.ext$VERSION
-```
-
-where `VERSION` is `2`, `3` or `4`.
-
-You can mount it using:
-
-```bash
-sudo mount -o loop tiny.ext$VERSION $MOUNTPOINT
-```
-
-`file.txt`, `bigfile.txt` and `symlink.txt` were added to this image by just
-mounting it and copying (while preserving links) those files to the mountpoint
-directory using:
-
-```bash
-sudo cp -P {file.txt,symlink.txt,bigfile.txt} $MOUNTPOINT
-```
-
-The files in this directory mirror the contents and organisation of the files
-stored in the image.
-
-You can umount the filesystem using:
-
-```bash
-sudo umount $MOUNTPOINT
-```
diff --git a/pkg/sentry/fs/ext/assets/bigfile.txt b/pkg/sentry/fs/ext/assets/bigfile.txt
deleted file mode 100644
index 3857cf516..000000000
--- a/pkg/sentry/fs/ext/assets/bigfile.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus faucibus eleifend orci, ut ornare nibh faucibus eu. Cras at condimentum massa. Nullam luctus, elit non porttitor congue, sapien diam feugiat sapien, sed eleifend nulla mauris non arcu. Sed lacinia mauris magna, eu mollis libero varius sit amet. Donec mollis, quam convallis commodo posuere, dolor nisi placerat nisi, in faucibus augue mi eu lorem. In pharetra consectetur faucibus. Ut euismod ex efficitur egestas tincidunt. Maecenas condimentum ut ante in rutrum. Vivamus sed arcu tempor, faucibus turpis et, lacinia diam.
-
-Sed in lacus vel nisl interdum bibendum in sed justo. Nunc tellus risus, molestie vitae arcu sed, molestie tempus ligula. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc risus neque, volutpat et ante non, ullamcorper condimentum ante. Aliquam sed metus in urna condimentum convallis. Vivamus ut libero mauris. Proin mollis posuere consequat. Vestibulum placerat mollis est et pulvinar.
-
-Donec rutrum odio ac diam pharetra, id fermentum magna cursus. Pellentesque in dapibus elit, et condimentum orci. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse euismod dapibus est, id vestibulum mauris. Nulla facilisi. Nulla cursus gravida nisi. Phasellus vestibulum rutrum lectus, a dignissim mauris hendrerit vitae. In at elementum mauris. Integer vel efficitur velit. Nullam fringilla sapien mi, quis luctus neque efficitur ac. Aenean nec quam dapibus nunc commodo pharetra. Proin sapien mi, fermentum aliquet vulputate non, aliquet porttitor diam. Quisque lacinia, urna et finibus fermentum, nunc lacus vehicula ex, sed congue metus lectus ac quam. Aliquam erat volutpat. Suspendisse sodales, dolor ut tincidunt finibus, augue erat varius tellus, a interdum erat sem at nunc. Vestibulum cursus iaculis sapien, vitae feugiat dui auctor quis.
-
-Pellentesque nec maximus nulla, eu blandit diam. Maecenas quis arcu ornare, congue ante at, vehicula ipsum. Praesent feugiat mauris rutrum sem fermentum, nec luctus ipsum placerat. Pellentesque placerat ipsum at dignissim fringilla. Vivamus et posuere sem, eget hendrerit felis. Aenean vulputate, augue vel mollis feugiat, justo ipsum mollis dolor, eu mollis elit neque ut ipsum. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Fusce bibendum sem quam, vulputate laoreet mi dapibus imperdiet. Sed a purus non nibh pretium aliquet. Integer eget luctus augue, vitae tincidunt magna. Ut eros enim, egestas eu nulla et, lobortis egestas arcu. Cras id ipsum ac justo lacinia rutrum. Vivamus lectus leo, ultricies sed justo at, pellentesque feugiat magna. Ut sollicitudin neque elit, vel ornare mauris commodo id.
-
-Duis dapibus orci et sapien finibus finibus. Mauris eleifend, lacus at vestibulum maximus, quam ligula pharetra erat, sit amet dapibus neque elit vitae neque. In bibendum sollicitudin erat, eget ultricies tortor malesuada at. Sed sit amet orci turpis. Donec feugiat ligula nibh, molestie tincidunt lectus elementum id. Donec volutpat maximus nibh, in vulputate felis posuere eu. Cras tincidunt ullamcorper lacus. Phasellus porta lorem auctor, congue magna a, commodo elit.
-
-Etiam auctor mi quis elit sodales, eu pulvinar arcu condimentum. Aenean imperdiet risus et dapibus tincidunt. Nullam tincidunt dictum dui, sed commodo urna rutrum id. Ut mollis libero vel elit laoreet bibendum. Quisque arcu arcu, tincidunt at ultricies id, vulputate nec metus. In tristique posuere quam sit amet volutpat. Vivamus scelerisque et nunc at dapibus. Fusce finibus libero ut ligula pretium rhoncus. Mauris non elit in arcu finibus imperdiet. Pellentesque nec massa odio. Proin rutrum mauris non sagittis efficitur. Aliquam auctor quam at dignissim faucibus. Ut eget ligula in magna posuere ultricies vitae sit amet turpis. Duis maximus odio nulla. Donec gravida sem tristique tempus scelerisque.
-
-Interdum et malesuada fames ac ante ipsum primis in faucibus. Fusce pharetra magna vulputate aliquet tempus. Duis id hendrerit arcu. Quisque ut ex elit. Integer velit orci, venenatis ut sapien ac, placerat porttitor dui. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nunc hendrerit cursus diam, hendrerit finibus ipsum scelerisque ut. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos.
-
-Nulla non euismod neque. Phasellus vel sapien eu metus pulvinar rhoncus. Suspendisse eu mollis tellus, quis vestibulum tortor. Maecenas interdum dolor sed nulla fermentum maximus. Donec imperdiet ullamcorper condimentum. Nam quis nibh ante. Praesent quis tellus ut tortor pulvinar blandit sit amet ut sapien. Vestibulum est orci, pellentesque vitae tristique sit amet, tristique non felis.
-
-Vivamus sodales pellentesque varius. Sed vel tempus ligula. Nulla tristique nisl vel dui facilisis, ac sodales augue hendrerit. Proin augue nisi, vestibulum quis augue nec, sagittis tincidunt velit. Vestibulum euismod, nulla nec sodales faucibus, urna sapien vulputate magna, id varius metus sapien ut neque. Duis in mollis urna, in scelerisque enim. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Nunc condimentum dictum turpis, et egestas neque dapibus eget. Quisque fringilla, dui eu venenatis eleifend, erat nibh lacinia urna, at lacinia lacus sapien eu dui. Duis eu erat ut mi lacinia convallis a sed ex.
-
-Fusce elit metus, tincidunt nec eleifend a, hendrerit nec ligula. Duis placerat finibus sollicitudin. In euismod porta tellus, in luctus justo bibendum bibendum. Maecenas at magna eleifend lectus tincidunt suscipit ut a ligula. Nulla tempor accumsan felis, fermentum dapibus est eleifend vitae. Mauris urna sem, fringilla at ultricies non, ultrices in arcu. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nam vehicula nunc at laoreet imperdiet. Nunc tristique ut risus id aliquet. Integer eleifend massa orci.
-
-Vestibulum sed ante sollicitudin nisi fringilla bibendum nec vel quam. Sed pretium augue eu ligula congue pulvinar. Donec vitae magna tincidunt, pharetra lacus id, convallis nulla. Cras viverra nisl nisl, varius convallis leo vulputate nec. Morbi at consequat dui, sed aliquet metus. Sed suscipit fermentum mollis. Maecenas nec mi sodales, tincidunt purus in, tristique mauris. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec interdum mi in velit efficitur, quis ultrices ex imperdiet. Sed vestibulum, magna ut tristique pretium, mi ipsum placerat tellus, non tempor enim augue et ex. Pellentesque eget felis quis ante sodales viverra ac sed lacus. Donec suscipit tempus massa, eget laoreet massa molestie at.
-
-Aenean fringilla dui non aliquet consectetur. Fusce cursus quam nec orci hendrerit faucibus. Donec consequat suscipit enim, non volutpat lectus auctor interdum. Proin lorem purus, maximus vel orci vitae, suscipit egestas turpis. Donec risus urna, congue a sem eu, aliquet placerat odio. Morbi gravida tristique turpis, quis efficitur enim. Nunc interdum gravida ipsum vel facilisis. Nunc congue finibus sollicitudin. Quisque euismod aliquet lectus et tincidunt. Curabitur ultrices sem ut mi fringilla fermentum. Morbi pretium, nisi sit amet dapibus congue, dolor enim consectetur risus, a interdum ligula odio sed odio. Quisque facilisis, mi at suscipit gravida, nunc sapien cursus justo, ut luctus odio nulla quis leo. Integer condimentum lobortis mauris, non egestas tellus lobortis sit amet.
-
-In sollicitudin velit ac ante vehicula, vitae varius tortor mollis. In hac habitasse platea dictumst. Quisque et orci lorem. Integer malesuada fringilla luctus. Pellentesque malesuada, mi non lobortis porttitor, ante ligula vulputate ante, nec dictum risus eros sit amet sapien. Nulla aliquam lorem libero, ac varius nulla tristique eget. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Ut pellentesque mauris orci, vel consequat mi varius a. Ut sit amet elit vulputate, lacinia metus non, fermentum nisl. Pellentesque eu nisi sed quam egestas blandit. Duis sit amet lobortis dolor. Donec consectetur sem interdum, tristique elit sit amet, sodales lacus. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Fusce id aliquam augue. Sed pretium congue risus vitae lacinia. Vestibulum non vulputate risus, ut malesuada justo.
-
-Sed odio elit, consectetur ac mauris quis, consequat commodo libero. Fusce sodales velit vulputate pulvinar fermentum. Donec iaculis nec nisl eget faucibus. Mauris at dictum velit. Donec fermentum lectus eu viverra volutpat. Aliquam consequat facilisis lorem, cursus consequat dui bibendum ullamcorper. Pellentesque nulla magna, imperdiet at magna et, cursus egestas enim. Nullam semper molestie lectus sit amet semper. Duis eget tincidunt est. Integer id neque risus. Integer ultricies hendrerit vestibulum. Donec blandit blandit sagittis. Nunc consectetur vitae nisi consectetur volutpat.
-
-Nulla id lorem fermentum, efficitur magna a, hendrerit dui. Vivamus sagittis orci gravida, bibendum quam eget, molestie est. Phasellus nec enim tincidunt, volutpat sapien non, laoreet diam. Nulla posuere enim nec porttitor lobortis. Donec auctor odio ut orci eleifend, ut eleifend purus convallis. Interdum et malesuada fames ac ante ipsum primis in faucibus. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut hendrerit, purus eget viverra tincidunt, sem magna imperdiet libero, et aliquam turpis neque vitae elit. Maecenas semper varius iaculis. Cras non lorem quis quam bibendum eleifend in et libero. Curabitur at purus mauris. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus porta diam sed elit eleifend gravida.
-
-Nulla facilisi. Ut ultricies diam vel diam consectetur, vel porta augue molestie. Fusce interdum sapien et metus facilisis pellentesque. Nulla convallis sem at nunc vehicula facilisis. Nam ac rutrum purus. Nunc bibendum, dolor sit amet tempus ullamcorper, lorem leo tempor sem, id fringilla nunc augue scelerisque augue. Nullam sit amet rutrum nisl. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Donec sed mauris gravida eros vehicula sagittis at eget orci. Cras elementum, eros at accumsan bibendum, libero neque blandit purus, vitae vestibulum libero massa ac nibh. Integer at placerat nulla. Mauris eu eleifend orci. Aliquam consequat ligula vitae erat porta lobortis. Duis fermentum elit ac aliquet ornare.
-
-Mauris eget cursus tellus, eget sodales purus. Aliquam malesuada, augue id vulputate finibus, nisi ex bibendum nisl, sit amet laoreet quam urna a dolor. Nullam ultricies, sapien eu laoreet consequat, erat eros dignissim diam, ultrices sodales lectus mauris et leo. Morbi lacinia eu ante at tempus. Sed iaculis finibus magna malesuada efficitur. Donec faucibus erat sit amet elementum feugiat. Praesent a placerat nisi. Etiam lacinia gravida diam, et sollicitudin sapien tincidunt ut.
-
-Maecenas felis quam, tincidunt vitae venenatis scelerisque, viverra vitae odio. Phasellus enim neque, ultricies suscipit malesuada sit amet, vehicula sit amet purus. Nulla placerat sit amet dui vel tincidunt. Nam quis neque vel magna commodo egestas. Vestibulum sagittis rutrum lorem ut congue. Maecenas vel ultrices tellus. Donec efficitur, urna ac consequat iaculis, lorem felis pharetra eros, eget faucibus orci lectus sit amet arcu.
-
-Ut a tempus nisi. Nulla facilisi. Praesent vulputate maximus mi et dapibus. Sed sit amet libero ac augue hendrerit efficitur in a sapien. Mauris placerat velit sit amet tellus sollicitudin faucibus. Donec egestas a magna ac suscipit. Duis enim sapien, mollis sed egestas et, vestibulum vel leo.
-
-Proin quis dapibus dui. Donec eu tincidunt nunc. Vivamus eget purus consectetur, maximus ante vitae, tincidunt elit. Aenean mattis dolor a gravida aliquam. Praesent quis tellus id sem maximus vulputate nec sed nulla. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur metus nulla, volutpat volutpat est eu, hendrerit congue erat. Aliquam sollicitudin augue ante. Sed sollicitudin, magna eu consequat elementum, mi augue ullamcorper felis, molestie imperdiet erat metus iaculis est. Proin ac tortor nisi. Pellentesque quis nisi risus. Integer enim sapien, tincidunt quis tortor id, accumsan venenatis mi. Nulla facilisi.
-
-Cras pretium sit amet quam congue maximus. Morbi lacus libero, imperdiet commodo massa sed, scelerisque placerat libero. Cras nisl nisi, consectetur sed bibendum eu, venenatis at enim. Proin sodales justo at quam aliquam, a consectetur mi ornare. Donec porta ac est sit amet efficitur. Suspendisse vestibulum tortor id neque imperdiet, id lacinia risus vehicula. Phasellus ac eleifend purus. Mauris vel gravida ante. Aliquam vitae lobortis risus. Sed vehicula consectetur tincidunt. Nam et justo vitae purus molestie consequat. Pellentesque ipsum ex, convallis quis blandit non, gravida et urna. Donec diam ligula amet.
diff --git a/pkg/sentry/fs/ext/assets/file.txt b/pkg/sentry/fs/ext/assets/file.txt
deleted file mode 100644
index 980a0d5f1..000000000
--- a/pkg/sentry/fs/ext/assets/file.txt
+++ /dev/null
@@ -1 +0,0 @@
-Hello World!
diff --git a/pkg/sentry/fs/ext/assets/symlink.txt b/pkg/sentry/fs/ext/assets/symlink.txt
deleted file mode 120000
index 4c330738c..000000000
--- a/pkg/sentry/fs/ext/assets/symlink.txt
+++ /dev/null
@@ -1 +0,0 @@
-file.txt \ No newline at end of file
diff --git a/pkg/sentry/fs/ext/assets/tiny.ext2 b/pkg/sentry/fs/ext/assets/tiny.ext2
deleted file mode 100644
index 381ade9bf..000000000
--- a/pkg/sentry/fs/ext/assets/tiny.ext2
+++ /dev/null
Binary files differ
diff --git a/pkg/sentry/fs/ext/assets/tiny.ext3 b/pkg/sentry/fs/ext/assets/tiny.ext3
deleted file mode 100644
index 0e97a324c..000000000
--- a/pkg/sentry/fs/ext/assets/tiny.ext3
+++ /dev/null
Binary files differ
diff --git a/pkg/sentry/fs/ext/assets/tiny.ext4 b/pkg/sentry/fs/ext/assets/tiny.ext4
deleted file mode 100644
index a6859736d..000000000
--- a/pkg/sentry/fs/ext/assets/tiny.ext4
+++ /dev/null
Binary files differ
diff --git a/pkg/sentry/fs/ext/dentry.go b/pkg/sentry/fs/ext/dentry.go
deleted file mode 100644
index 054fb42b6..000000000
--- a/pkg/sentry/fs/ext/dentry.go
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-// dentry implements vfs.DentryImpl.
-type dentry struct {
- vfsd vfs.Dentry
-
- // inode is the inode represented by this dentry. Multiple Dentries may
- // share a single non-directory Inode (with hard links). inode is
- // immutable.
- inode *inode
-}
-
-// Compiles only if dentry implements vfs.DentryImpl.
-var _ vfs.DentryImpl = (*dentry)(nil)
-
-// newDentry is the dentry constructor.
-func newDentry(in *inode) *dentry {
- d := &dentry{
- inode: in,
- }
- d.vfsd.Init(d)
- return d
-}
-
-// IncRef implements vfs.DentryImpl.IncRef.
-func (d *dentry) IncRef(vfsfs *vfs.Filesystem) {
- d.inode.incRef()
-}
-
-// TryIncRef implements vfs.DentryImpl.TryIncRef.
-func (d *dentry) TryIncRef(vfsfs *vfs.Filesystem) bool {
- return d.inode.tryIncRef()
-}
-
-// DecRef implements vfs.DentryImpl.DecRef.
-func (d *dentry) DecRef(vfsfs *vfs.Filesystem) {
- d.inode.decRef(vfsfs.Impl().(*filesystem))
-}
diff --git a/pkg/sentry/fs/ext/disklayout/BUILD b/pkg/sentry/fs/ext/disklayout/BUILD
deleted file mode 100644
index dde15110d..000000000
--- a/pkg/sentry/fs/ext/disklayout/BUILD
+++ /dev/null
@@ -1,48 +0,0 @@
-package(licenses = ["notice"])
-
-load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
-
-go_library(
- name = "disklayout",
- srcs = [
- "block_group.go",
- "block_group_32.go",
- "block_group_64.go",
- "dirent.go",
- "dirent_new.go",
- "dirent_old.go",
- "disklayout.go",
- "extent.go",
- "inode.go",
- "inode_new.go",
- "inode_old.go",
- "superblock.go",
- "superblock_32.go",
- "superblock_64.go",
- "superblock_old.go",
- "test_utils.go",
- ],
- importpath = "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout",
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/binary",
- "//pkg/sentry/fs",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/time",
- ],
-)
-
-go_test(
- name = "disklayout_test",
- size = "small",
- srcs = [
- "block_group_test.go",
- "dirent_test.go",
- "extent_test.go",
- "inode_test.go",
- "superblock_test.go",
- ],
- embed = [":disklayout"],
- deps = ["//pkg/sentry/kernel/time"],
-)
diff --git a/pkg/sentry/fs/ext/disklayout/block_group.go b/pkg/sentry/fs/ext/disklayout/block_group.go
deleted file mode 100644
index ad6f4fef8..000000000
--- a/pkg/sentry/fs/ext/disklayout/block_group.go
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// BlockGroup represents a Linux ext block group descriptor. An ext file system
-// is split into a series of block groups. This provides an access layer to
-// information needed to access and use a block group.
-//
-// Location:
-// - The block group descriptor table is always placed in the blocks
-// immediately after the block containing the superblock.
-// - The 1st block group descriptor in the original table is in the
-// (sb.FirstDataBlock() + 1)th block.
-// - See SuperBlock docs to see where the block group descriptor table is
-// replicated.
-// - sb.BgDescSize() must be used as the block group descriptor entry size
-// while reading the table from disk.
-//
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#block-group-descriptors.
-type BlockGroup interface {
- // InodeTable returns the absolute block number of the block containing the
- // inode table. This points to an array of Inode structs. Inode tables are
- // statically allocated at mkfs time. The superblock records the number of
- // inodes per group (length of this table) and the size of each inode struct.
- InodeTable() uint64
-
- // BlockBitmap returns the absolute block number of the block containing the
- // block bitmap. This bitmap tracks the usage of data blocks within this block
- // group and has its own checksum.
- BlockBitmap() uint64
-
- // InodeBitmap returns the absolute block number of the block containing the
- // inode bitmap. This bitmap tracks the usage of this group's inode table
- // entries and has its own checksum.
- InodeBitmap() uint64
-
- // ExclusionBitmap returns the absolute block number of the snapshot exclusion
- // bitmap.
- ExclusionBitmap() uint64
-
- // FreeBlocksCount returns the number of free blocks in the group.
- FreeBlocksCount() uint32
-
- // FreeInodesCount returns the number of free inodes in the group.
- FreeInodesCount() uint32
-
- // DirectoryCount returns the number of inodes that represent directories
- // under this block group.
- DirectoryCount() uint32
-
- // UnusedInodeCount returns the number of unused inodes beyond the last used
- // inode in this group's inode table. As a result, we needn’t scan past the
- // (InodesPerGroup - UnusedInodeCount())th entry in the inode table.
- UnusedInodeCount() uint32
-
- // BlockBitmapChecksum returns the block bitmap checksum. This is calculated
- // using crc32c(FS UUID + group number + entire bitmap).
- BlockBitmapChecksum() uint32
-
- // InodeBitmapChecksum returns the inode bitmap checksum. This is calculated
- // using crc32c(FS UUID + group number + entire bitmap).
- InodeBitmapChecksum() uint32
-
- // Checksum returns this block group's checksum.
- //
- // If SbMetadataCsum feature is set:
- // - checksum is crc32c(FS UUID + group number + group descriptor
- // structure) & 0xFFFF.
- //
- // If SbGdtCsum feature is set:
- // - checksum is crc16(FS UUID + group number + group descriptor
- // structure).
- //
- // SbMetadataCsum and SbGdtCsum should not be both set.
- // If they are, Linux warns and asks to run fsck.
- Checksum() uint16
-
- // Flags returns BGFlags which represents the block group flags.
- Flags() BGFlags
-}
-
-// These are the different block group flags.
-const (
- // BgInodeUninit indicates that inode table and bitmap are not initialized.
- BgInodeUninit uint16 = 0x1
-
- // BgBlockUninit indicates that block bitmap is not initialized.
- BgBlockUninit uint16 = 0x2
-
- // BgInodeZeroed indicates that inode table is zeroed.
- BgInodeZeroed uint16 = 0x4
-)
-
-// BGFlags represents all the different combinations of block group flags.
-type BGFlags struct {
- InodeUninit bool
- BlockUninit bool
- InodeZeroed bool
-}
-
-// ToInt converts a BGFlags struct back to its 16-bit representation.
-func (f BGFlags) ToInt() uint16 {
- var res uint16
-
- if f.InodeUninit {
- res |= BgInodeUninit
- }
- if f.BlockUninit {
- res |= BgBlockUninit
- }
- if f.InodeZeroed {
- res |= BgInodeZeroed
- }
-
- return res
-}
-
-// BGFlagsFromInt converts the 16-bit flag representation to a BGFlags struct.
-func BGFlagsFromInt(flags uint16) BGFlags {
- return BGFlags{
- InodeUninit: flags&BgInodeUninit > 0,
- BlockUninit: flags&BgBlockUninit > 0,
- InodeZeroed: flags&BgInodeZeroed > 0,
- }
-}
diff --git a/pkg/sentry/fs/ext/disklayout/block_group_32.go b/pkg/sentry/fs/ext/disklayout/block_group_32.go
deleted file mode 100644
index 3e16c76db..000000000
--- a/pkg/sentry/fs/ext/disklayout/block_group_32.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// BlockGroup32Bit emulates the first half of struct ext4_group_desc in
-// fs/ext4/ext4.h. It is the block group descriptor struct for ext2, ext3 and
-// 32-bit ext4 filesystems. It implements BlockGroup interface.
-type BlockGroup32Bit struct {
- BlockBitmapLo uint32
- InodeBitmapLo uint32
- InodeTableLo uint32
- FreeBlocksCountLo uint16
- FreeInodesCountLo uint16
- UsedDirsCountLo uint16
- FlagsRaw uint16
- ExcludeBitmapLo uint32
- BlockBitmapChecksumLo uint16
- InodeBitmapChecksumLo uint16
- ItableUnusedLo uint16
- ChecksumRaw uint16
-}
-
-// Compiles only if BlockGroup32Bit implements BlockGroup.
-var _ BlockGroup = (*BlockGroup32Bit)(nil)
-
-// InodeTable implements BlockGroup.InodeTable.
-func (bg *BlockGroup32Bit) InodeTable() uint64 { return uint64(bg.InodeTableLo) }
-
-// BlockBitmap implements BlockGroup.BlockBitmap.
-func (bg *BlockGroup32Bit) BlockBitmap() uint64 { return uint64(bg.BlockBitmapLo) }
-
-// InodeBitmap implements BlockGroup.InodeBitmap.
-func (bg *BlockGroup32Bit) InodeBitmap() uint64 { return uint64(bg.InodeBitmapLo) }
-
-// ExclusionBitmap implements BlockGroup.ExclusionBitmap.
-func (bg *BlockGroup32Bit) ExclusionBitmap() uint64 { return uint64(bg.ExcludeBitmapLo) }
-
-// FreeBlocksCount implements BlockGroup.FreeBlocksCount.
-func (bg *BlockGroup32Bit) FreeBlocksCount() uint32 { return uint32(bg.FreeBlocksCountLo) }
-
-// FreeInodesCount implements BlockGroup.FreeInodesCount.
-func (bg *BlockGroup32Bit) FreeInodesCount() uint32 { return uint32(bg.FreeInodesCountLo) }
-
-// DirectoryCount implements BlockGroup.DirectoryCount.
-func (bg *BlockGroup32Bit) DirectoryCount() uint32 { return uint32(bg.UsedDirsCountLo) }
-
-// UnusedInodeCount implements BlockGroup.UnusedInodeCount.
-func (bg *BlockGroup32Bit) UnusedInodeCount() uint32 { return uint32(bg.ItableUnusedLo) }
-
-// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum.
-func (bg *BlockGroup32Bit) BlockBitmapChecksum() uint32 { return uint32(bg.BlockBitmapChecksumLo) }
-
-// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum.
-func (bg *BlockGroup32Bit) InodeBitmapChecksum() uint32 { return uint32(bg.InodeBitmapChecksumLo) }
-
-// Checksum implements BlockGroup.Checksum.
-func (bg *BlockGroup32Bit) Checksum() uint16 { return bg.ChecksumRaw }
-
-// Flags implements BlockGroup.Flags.
-func (bg *BlockGroup32Bit) Flags() BGFlags { return BGFlagsFromInt(bg.FlagsRaw) }
diff --git a/pkg/sentry/fs/ext/disklayout/block_group_64.go b/pkg/sentry/fs/ext/disklayout/block_group_64.go
deleted file mode 100644
index 9a809197a..000000000
--- a/pkg/sentry/fs/ext/disklayout/block_group_64.go
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// BlockGroup64Bit emulates struct ext4_group_desc in fs/ext4/ext4.h.
-// It is the block group descriptor struct for 64-bit ext4 filesystems.
-// It implements BlockGroup interface. It is an extension of the 32-bit
-// version of BlockGroup.
-type BlockGroup64Bit struct {
- // We embed the 32-bit struct here because 64-bit version is just an extension
- // of the 32-bit version.
- BlockGroup32Bit
-
- // 64-bit specific fields.
- BlockBitmapHi uint32
- InodeBitmapHi uint32
- InodeTableHi uint32
- FreeBlocksCountHi uint16
- FreeInodesCountHi uint16
- UsedDirsCountHi uint16
- ItableUnusedHi uint16
- ExcludeBitmapHi uint32
- BlockBitmapChecksumHi uint16
- InodeBitmapChecksumHi uint16
- _ uint32 // Padding to 64 bytes.
-}
-
-// Compiles only if BlockGroup64Bit implements BlockGroup.
-var _ BlockGroup = (*BlockGroup64Bit)(nil)
-
-// Methods to override. Checksum() and Flags() are not overridden.
-
-// InodeTable implements BlockGroup.InodeTable.
-func (bg *BlockGroup64Bit) InodeTable() uint64 {
- return (uint64(bg.InodeTableHi) << 32) | uint64(bg.InodeTableLo)
-}
-
-// BlockBitmap implements BlockGroup.BlockBitmap.
-func (bg *BlockGroup64Bit) BlockBitmap() uint64 {
- return (uint64(bg.BlockBitmapHi) << 32) | uint64(bg.BlockBitmapLo)
-}
-
-// InodeBitmap implements BlockGroup.InodeBitmap.
-func (bg *BlockGroup64Bit) InodeBitmap() uint64 {
- return (uint64(bg.InodeBitmapHi) << 32) | uint64(bg.InodeBitmapLo)
-}
-
-// ExclusionBitmap implements BlockGroup.ExclusionBitmap.
-func (bg *BlockGroup64Bit) ExclusionBitmap() uint64 {
- return (uint64(bg.ExcludeBitmapHi) << 32) | uint64(bg.ExcludeBitmapLo)
-}
-
-// FreeBlocksCount implements BlockGroup.FreeBlocksCount.
-func (bg *BlockGroup64Bit) FreeBlocksCount() uint32 {
- return (uint32(bg.FreeBlocksCountHi) << 16) | uint32(bg.FreeBlocksCountLo)
-}
-
-// FreeInodesCount implements BlockGroup.FreeInodesCount.
-func (bg *BlockGroup64Bit) FreeInodesCount() uint32 {
- return (uint32(bg.FreeInodesCountHi) << 16) | uint32(bg.FreeInodesCountLo)
-}
-
-// DirectoryCount implements BlockGroup.DirectoryCount.
-func (bg *BlockGroup64Bit) DirectoryCount() uint32 {
- return (uint32(bg.UsedDirsCountHi) << 16) | uint32(bg.UsedDirsCountLo)
-}
-
-// UnusedInodeCount implements BlockGroup.UnusedInodeCount.
-func (bg *BlockGroup64Bit) UnusedInodeCount() uint32 {
- return (uint32(bg.ItableUnusedHi) << 16) | uint32(bg.ItableUnusedLo)
-}
-
-// BlockBitmapChecksum implements BlockGroup.BlockBitmapChecksum.
-func (bg *BlockGroup64Bit) BlockBitmapChecksum() uint32 {
- return (uint32(bg.BlockBitmapChecksumHi) << 16) | uint32(bg.BlockBitmapChecksumLo)
-}
-
-// InodeBitmapChecksum implements BlockGroup.InodeBitmapChecksum.
-func (bg *BlockGroup64Bit) InodeBitmapChecksum() uint32 {
- return (uint32(bg.InodeBitmapChecksumHi) << 16) | uint32(bg.InodeBitmapChecksumLo)
-}
diff --git a/pkg/sentry/fs/ext/disklayout/block_group_test.go b/pkg/sentry/fs/ext/disklayout/block_group_test.go
deleted file mode 100644
index 0ef4294c0..000000000
--- a/pkg/sentry/fs/ext/disklayout/block_group_test.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "testing"
-)
-
-// TestBlockGroupSize tests that the block group descriptor structs are of the
-// correct size.
-func TestBlockGroupSize(t *testing.T) {
- assertSize(t, BlockGroup32Bit{}, 32)
- assertSize(t, BlockGroup64Bit{}, 64)
-}
diff --git a/pkg/sentry/fs/ext/disklayout/dirent.go b/pkg/sentry/fs/ext/disklayout/dirent.go
deleted file mode 100644
index 685bf57b8..000000000
--- a/pkg/sentry/fs/ext/disklayout/dirent.go
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/fs"
-)
-
-const (
- // MaxFileName is the maximum length of an ext fs file's name.
- MaxFileName = 255
-)
-
-var (
- // inodeTypeByFileType maps ext4 file types to vfs inode types.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#ftype.
- inodeTypeByFileType = map[uint8]fs.InodeType{
- 0: fs.Anonymous,
- 1: fs.RegularFile,
- 2: fs.Directory,
- 3: fs.CharacterDevice,
- 4: fs.BlockDevice,
- 5: fs.Pipe,
- 6: fs.Socket,
- 7: fs.Symlink,
- }
-)
-
-// The Dirent interface should be implemented by structs representing ext
-// directory entries. These are for the linear classical directories which
-// just store a list of dirent structs. A directory is a series of data blocks
-// where is each data block contains a linear array of dirents. The last entry
-// of the block has a record size that takes it to the end of the block. The
-// end of the directory is when you read dirInode.Size() bytes from the blocks.
-//
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#linear-classic-directories.
-type Dirent interface {
- // Inode returns the absolute inode number of the underlying inode.
- // Inode number 0 signifies an unused dirent.
- Inode() uint32
-
- // RecordSize returns the record length of this dirent on disk. The next
- // dirent in the dirent list should be read after these many bytes from
- // the current dirent. Must be a multiple of 4.
- RecordSize() uint16
-
- // FileName returns the name of the file. Can be at most 255 is length.
- FileName() string
-
- // FileType returns the inode type of the underlying inode. This is a
- // performance hack so that we do not have to read the underlying inode struct
- // to know the type of inode. This will only work when the SbDirentFileType
- // feature is set. If not, the second returned value will be false indicating
- // that user code has to use the inode mode to extract the file type.
- FileType() (fs.InodeType, bool)
-}
diff --git a/pkg/sentry/fs/ext/disklayout/dirent_new.go b/pkg/sentry/fs/ext/disklayout/dirent_new.go
deleted file mode 100644
index 29ae4a5c2..000000000
--- a/pkg/sentry/fs/ext/disklayout/dirent_new.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "fmt"
-
- "gvisor.dev/gvisor/pkg/sentry/fs"
-)
-
-// DirentNew represents the ext4 directory entry struct. This emulates Linux's
-// ext4_dir_entry_2 struct. The FileName can not be more than 255 bytes so we
-// only need 8 bits to store the NameLength. As a result, NameLength has been
-// shortened and the other 8 bits are used to encode the file type. Use the
-// FileTypeRaw field only if the SbDirentFileType feature is set.
-//
-// Note: This struct can be of variable size on disk. The one described below
-// is of maximum size and the FileName beyond NameLength bytes might contain
-// garbage.
-type DirentNew struct {
- InodeNumber uint32
- RecordLength uint16
- NameLength uint8
- FileTypeRaw uint8
- FileNameRaw [MaxFileName]byte
-}
-
-// Compiles only if DirentNew implements Dirent.
-var _ Dirent = (*DirentNew)(nil)
-
-// Inode implements Dirent.Inode.
-func (d *DirentNew) Inode() uint32 { return d.InodeNumber }
-
-// RecordSize implements Dirent.RecordSize.
-func (d *DirentNew) RecordSize() uint16 { return d.RecordLength }
-
-// FileName implements Dirent.FileName.
-func (d *DirentNew) FileName() string {
- return string(d.FileNameRaw[:d.NameLength])
-}
-
-// FileType implements Dirent.FileType.
-func (d *DirentNew) FileType() (fs.InodeType, bool) {
- if inodeType, ok := inodeTypeByFileType[d.FileTypeRaw]; ok {
- return inodeType, true
- }
-
- panic(fmt.Sprintf("unknown file type %v", d.FileTypeRaw))
-}
diff --git a/pkg/sentry/fs/ext/disklayout/dirent_old.go b/pkg/sentry/fs/ext/disklayout/dirent_old.go
deleted file mode 100644
index 6fff12a6e..000000000
--- a/pkg/sentry/fs/ext/disklayout/dirent_old.go
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import "gvisor.dev/gvisor/pkg/sentry/fs"
-
-// DirentOld represents the old directory entry struct which does not contain
-// the file type. This emulates Linux's ext4_dir_entry struct.
-//
-// Note: This struct can be of variable size on disk. The one described below
-// is of maximum size and the FileName beyond NameLength bytes might contain
-// garbage.
-type DirentOld struct {
- InodeNumber uint32
- RecordLength uint16
- NameLength uint16
- FileNameRaw [MaxFileName]byte
-}
-
-// Compiles only if DirentOld implements Dirent.
-var _ Dirent = (*DirentOld)(nil)
-
-// Inode implements Dirent.Inode.
-func (d *DirentOld) Inode() uint32 { return d.InodeNumber }
-
-// RecordSize implements Dirent.RecordSize.
-func (d *DirentOld) RecordSize() uint16 { return d.RecordLength }
-
-// FileName implements Dirent.FileName.
-func (d *DirentOld) FileName() string {
- return string(d.FileNameRaw[:d.NameLength])
-}
-
-// FileType implements Dirent.FileType.
-func (d *DirentOld) FileType() (fs.InodeType, bool) {
- return fs.Anonymous, false
-}
diff --git a/pkg/sentry/fs/ext/disklayout/dirent_test.go b/pkg/sentry/fs/ext/disklayout/dirent_test.go
deleted file mode 100644
index cc6dff2c9..000000000
--- a/pkg/sentry/fs/ext/disklayout/dirent_test.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "testing"
-)
-
-// TestDirentSize tests that the dirent structs are of the correct
-// size.
-func TestDirentSize(t *testing.T) {
- want := uintptr(263)
-
- assertSize(t, DirentOld{}, want)
- assertSize(t, DirentNew{}, want)
-}
diff --git a/pkg/sentry/fs/ext/disklayout/disklayout.go b/pkg/sentry/fs/ext/disklayout/disklayout.go
deleted file mode 100644
index bdf4e2132..000000000
--- a/pkg/sentry/fs/ext/disklayout/disklayout.go
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package disklayout provides Linux ext file system's disk level structures
-// which can be directly read into from the underlying device. Structs aim to
-// emulate structures `exactly` how they are layed out on disk.
-//
-// This library aims to be compatible with all ext(2/3/4) systems so it
-// provides a generic interface for all major structures and various
-// implementations (for different versions). The user code is responsible for
-// using appropriate implementations based on the underlying device.
-//
-// Interfacing all major structures here serves a few purposes:
-// - Abstracts away the complexity of the underlying structure from client
-// code. The client only has to figure out versioning on set up and then
-// can use these as black boxes and pass it higher up the stack.
-// - Having pointer receivers forces the user to use pointers to these
-// heavy structs. Hence, prevents the client code from unintentionally
-// copying these by value while passing the interface around.
-// - Version-based implementation selection is resolved on set up hence
-// avoiding per call overhead of choosing implementation.
-// - All interface methods are pretty light weight (do not take in any
-// parameters by design). Passing pointer arguments to interface methods
-// can lead to heap allocation as the compiler won't be able to perform
-// escape analysis on an unknown implementation at compile time.
-//
-// Notes:
-// - All fields in these structs are exported because binary.Read would
-// panic otherwise.
-// - All structures on disk are in little-endian order. Only jbd2 (journal)
-// structures are in big-endian order.
-// - All OS dependent fields in these structures will be interpretted using
-// the Linux version of that field.
-// - The suffix `Lo` in field names stands for lower bits of that field.
-// - The suffix `Hi` in field names stands for upper bits of that field.
-// - The suffix `Raw` has been added to indicate that the field is not split
-// into Lo and Hi fields and also to resolve name collision with the
-// respective interface.
-package disklayout
diff --git a/pkg/sentry/fs/ext/disklayout/extent.go b/pkg/sentry/fs/ext/disklayout/extent.go
deleted file mode 100644
index 567523d32..000000000
--- a/pkg/sentry/fs/ext/disklayout/extent.go
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// Extents were introduced in ext4 and provide huge performance gains in terms
-// data locality and reduced metadata block usage. Extents are organized in
-// extent trees. The root node is contained in inode.BlocksRaw.
-//
-// Terminology:
-// - Physical Block:
-// Filesystem data block which is addressed normally wrt the entire
-// filesystem (addressed with 48 bits).
-//
-// - File Block:
-// Data block containing *only* file data and addressed wrt to the file
-// with only 32 bits. The (i)th file block contains file data from
-// byte (i * sb.BlockSize()) to ((i+1) * sb.BlockSize()).
-
-const (
- // ExtentStructsSize is the size of all the three extent on-disk structs.
- ExtentStructsSize = 12
-
- // ExtentMagic is the magic number which must be present in the header.
- ExtentMagic = 0xf30a
-)
-
-// ExtentEntryPair couples an in-memory ExtendNode with the ExtentEntry that
-// points to it. We want to cache these structs in memory to avoid repeated
-// disk reads.
-//
-// Note: This struct itself does not represent an on-disk struct.
-type ExtentEntryPair struct {
- // Entry points to the child node on disk.
- Entry ExtentEntry
- // Node points to child node in memory. Is nil if the current node is a leaf.
- Node *ExtentNode
-}
-
-// ExtentNode represents an extent tree node. For internal nodes, all Entries
-// will be ExtendIdxs. For leaf nodes, they will all be Extents.
-//
-// Note: This struct itself does not represent an on-disk struct.
-type ExtentNode struct {
- Header ExtentHeader
- Entries []ExtentEntryPair
-}
-
-// ExtentEntry reprsents an extent tree node entry. The entry can either be
-// an ExtentIdx or Extent itself. This exists to simplify navigation logic.
-type ExtentEntry interface {
- // FileBlock returns the first file block number covered by this entry.
- FileBlock() uint32
-
- // PhysicalBlock returns the child physical block that this entry points to.
- PhysicalBlock() uint64
-}
-
-// ExtentHeader emulates the ext4_extent_header struct in ext4. Each extent
-// tree node begins with this and is followed by `NumEntries` number of:
-// - Extent if `Depth` == 0
-// - ExtentIdx otherwise
-type ExtentHeader struct {
- // Magic in the extent magic number, must be 0xf30a.
- Magic uint16
-
- // NumEntries indicates the number of valid entries following the header.
- NumEntries uint16
-
- // MaxEntries that could follow the header. Used while adding entries.
- MaxEntries uint16
-
- // Height represents the distance of this node from the farthest leaf. Please
- // note that Linux incorrectly calls this `Depth` (which means the distance
- // of the node from the root).
- Height uint16
- _ uint32
-}
-
-// ExtentIdx emulates the ext4_extent_idx struct in ext4. Only present in
-// internal nodes. Sorted in ascending order based on FirstFileBlock since
-// Linux does a binary search on this. This points to a block containing the
-// child node.
-type ExtentIdx struct {
- FirstFileBlock uint32
- ChildBlockLo uint32
- ChildBlockHi uint16
- _ uint16
-}
-
-// Compiles only if ExtentIdx implements ExtentEntry.
-var _ ExtentEntry = (*ExtentIdx)(nil)
-
-// FileBlock implements ExtentEntry.FileBlock.
-func (ei *ExtentIdx) FileBlock() uint32 {
- return ei.FirstFileBlock
-}
-
-// PhysicalBlock implements ExtentEntry.PhysicalBlock. It returns the
-// physical block number of the child block.
-func (ei *ExtentIdx) PhysicalBlock() uint64 {
- return (uint64(ei.ChildBlockHi) << 32) | uint64(ei.ChildBlockLo)
-}
-
-// Extent represents the ext4_extent struct in ext4. Only present in leaf
-// nodes. Sorted in ascending order based on FirstFileBlock since Linux does a
-// binary search on this. This points to an array of data blocks containing the
-// file data. It covers `Length` data blocks starting from `StartBlock`.
-type Extent struct {
- FirstFileBlock uint32
- Length uint16
- StartBlockHi uint16
- StartBlockLo uint32
-}
-
-// Compiles only if Extent implements ExtentEntry.
-var _ ExtentEntry = (*Extent)(nil)
-
-// FileBlock implements ExtentEntry.FileBlock.
-func (e *Extent) FileBlock() uint32 {
- return e.FirstFileBlock
-}
-
-// PhysicalBlock implements ExtentEntry.PhysicalBlock. It returns the
-// physical block number of the first data block this extent covers.
-func (e *Extent) PhysicalBlock() uint64 {
- return (uint64(e.StartBlockHi) << 32) | uint64(e.StartBlockLo)
-}
diff --git a/pkg/sentry/fs/ext/disklayout/extent_test.go b/pkg/sentry/fs/ext/disklayout/extent_test.go
deleted file mode 100644
index b0fad9b71..000000000
--- a/pkg/sentry/fs/ext/disklayout/extent_test.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "testing"
-)
-
-// TestExtentSize tests that the extent structs are of the correct
-// size.
-func TestExtentSize(t *testing.T) {
- assertSize(t, ExtentHeader{}, ExtentStructsSize)
- assertSize(t, ExtentIdx{}, ExtentStructsSize)
- assertSize(t, Extent{}, ExtentStructsSize)
-}
diff --git a/pkg/sentry/fs/ext/disklayout/inode.go b/pkg/sentry/fs/ext/disklayout/inode.go
deleted file mode 100644
index 88ae913f5..000000000
--- a/pkg/sentry/fs/ext/disklayout/inode.go
+++ /dev/null
@@ -1,274 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/time"
-)
-
-// Special inodes. See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#special-inodes.
-const (
- // RootDirInode is the inode number of the root directory inode.
- RootDirInode = 2
-)
-
-// The Inode interface must be implemented by structs representing ext inodes.
-// The inode stores all the metadata pertaining to the file (except for the
-// file name which is held by the directory entry). It does NOT expose all
-// fields and should be extended if need be.
-//
-// Some file systems (e.g. FAT) use the directory entry to store all this
-// information. Ext file systems do not so that they can support hard links.
-// However, ext4 cheats a little bit and duplicates the file type in the
-// directory entry for performance gains.
-//
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#index-nodes.
-type Inode interface {
- // Mode returns the linux file mode which is majorly used to extract
- // information like:
- // - File permissions (read/write/execute by user/group/others).
- // - Sticky, set UID and GID bits.
- // - File type.
- //
- // Masks to extract this information are provided in pkg/abi/linux/file.go.
- Mode() linux.FileMode
-
- // UID returns the owner UID.
- UID() auth.KUID
-
- // GID returns the owner GID.
- GID() auth.KGID
-
- // Size returns the size of the file in bytes.
- Size() uint64
-
- // InodeSize returns the size of this inode struct in bytes.
- // In ext2 and ext3, the inode struct and inode disk record size was fixed at
- // 128 bytes. Ext4 makes it possible for the inode struct to be bigger.
- // However, accessing any field beyond the 128 bytes marker must be verified
- // using this method.
- InodeSize() uint16
-
- // AccessTime returns the last access time. Shows when the file was last read.
- //
- // If InExtendedAttr is set, then this should NOT be used because the
- // underlying field is used to store the extended attribute value checksum.
- AccessTime() time.Time
-
- // ChangeTime returns the last change time. Shows when the file meta data
- // (like permissions) was last changed.
- //
- // If InExtendedAttr is set, then this should NOT be used because the
- // underlying field is used to store the lower 32 bits of the attribute
- // value’s reference count.
- ChangeTime() time.Time
-
- // ModificationTime returns the last modification time. Shows when the file
- // content was last modified.
- //
- // If InExtendedAttr is set, then this should NOT be used because
- // the underlying field contains the number of the inode that owns the
- // extended attribute.
- ModificationTime() time.Time
-
- // DeletionTime returns the deletion time. Inodes are marked as deleted by
- // writing to the underlying field. FS tools can restore files until they are
- // actually overwritten.
- DeletionTime() time.Time
-
- // LinksCount returns the number of hard links to this inode.
- //
- // Normally there is an upper limit on the number of hard links:
- // - ext2/ext3 = 32,000
- // - ext4 = 65,000
- //
- // This implies that an ext4 directory cannot have more than 64,998
- // subdirectories because each subdirectory will have a hard link to the
- // directory via the `..` entry. The directory has hard link via the `.` entry
- // of its own. And finally the inode is initiated with 1 hard link (itself).
- //
- // The underlying value is reset to 1 if all the following hold:
- // - Inode is a directory.
- // - SbDirNlink is enabled.
- // - Number of hard links is incremented past 64,999.
- // Hard link value of 1 for a directory would indicate that the number of hard
- // links is unknown because a directory can have minimum 2 hard links (itself
- // and `.` entry).
- LinksCount() uint16
-
- // Flags returns InodeFlags which represents the inode flags.
- Flags() InodeFlags
-
- // Data returns the underlying inode.i_block array as a slice so it's
- // modifiable. This field is special and is used to store various kinds of
- // things depending on the filesystem version and inode type. The underlying
- // field name in Linux is a little misleading.
- // - In ext2/ext3, it contains the block map.
- // - In ext4, it contains the extent tree root node.
- // - For inline files, it contains the file contents.
- // - For symlinks, it contains the link path (if it fits here).
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#the-contents-of-inode-i-block.
- Data() []byte
-}
-
-// Inode flags. This is not comprehensive and flags which were not used in
-// the Linux kernel have been excluded.
-const (
- // InSync indicates that all writes to the file must be synchronous.
- InSync = 0x8
-
- // InImmutable indicates that this file is immutable.
- InImmutable = 0x10
-
- // InAppend indicates that this file can only be appended to.
- InAppend = 0x20
-
- // InNoDump indicates that teh dump(1) utility should not dump this file.
- InNoDump = 0x40
-
- // InNoAccessTime indicates that the access time of this inode must not be
- // updated.
- InNoAccessTime = 0x80
-
- // InIndex indicates that this directory has hashed indexes.
- InIndex = 0x1000
-
- // InJournalData indicates that file data must always be written through a
- // journal device.
- InJournalData = 0x4000
-
- // InDirSync indicates that all the directory entiry data must be written
- // synchronously.
- InDirSync = 0x10000
-
- // InTopDir indicates that this inode is at the top of the directory hierarchy.
- InTopDir = 0x20000
-
- // InHugeFile indicates that this is a huge file.
- InHugeFile = 0x40000
-
- // InExtents indicates that this inode uses extents.
- InExtents = 0x80000
-
- // InExtendedAttr indicates that this inode stores a large extended attribute
- // value in its data blocks.
- InExtendedAttr = 0x200000
-
- // InInline indicates that this inode has inline data.
- InInline = 0x10000000
-
- // InReserved indicates that this inode is reserved for the ext4 library.
- InReserved = 0x80000000
-)
-
-// InodeFlags represents all possible combinations of inode flags. It aims to
-// cover the bit masks and provide a more user-friendly interface.
-type InodeFlags struct {
- Sync bool
- Immutable bool
- Append bool
- NoDump bool
- NoAccessTime bool
- Index bool
- JournalData bool
- DirSync bool
- TopDir bool
- HugeFile bool
- Extents bool
- ExtendedAttr bool
- Inline bool
- Reserved bool
-}
-
-// ToInt converts inode flags back to its 32-bit rep.
-func (f InodeFlags) ToInt() uint32 {
- var res uint32
-
- if f.Sync {
- res |= InSync
- }
- if f.Immutable {
- res |= InImmutable
- }
- if f.Append {
- res |= InAppend
- }
- if f.NoDump {
- res |= InNoDump
- }
- if f.NoAccessTime {
- res |= InNoAccessTime
- }
- if f.Index {
- res |= InIndex
- }
- if f.JournalData {
- res |= InJournalData
- }
- if f.DirSync {
- res |= InDirSync
- }
- if f.TopDir {
- res |= InTopDir
- }
- if f.HugeFile {
- res |= InHugeFile
- }
- if f.Extents {
- res |= InExtents
- }
- if f.ExtendedAttr {
- res |= InExtendedAttr
- }
- if f.Inline {
- res |= InInline
- }
- if f.Reserved {
- res |= InReserved
- }
-
- return res
-}
-
-// InodeFlagsFromInt converts the integer representation of inode flags to
-// a InodeFlags struct.
-func InodeFlagsFromInt(f uint32) InodeFlags {
- return InodeFlags{
- Sync: f&InSync > 0,
- Immutable: f&InImmutable > 0,
- Append: f&InAppend > 0,
- NoDump: f&InNoDump > 0,
- NoAccessTime: f&InNoAccessTime > 0,
- Index: f&InIndex > 0,
- JournalData: f&InJournalData > 0,
- DirSync: f&InDirSync > 0,
- TopDir: f&InTopDir > 0,
- HugeFile: f&InHugeFile > 0,
- Extents: f&InExtents > 0,
- ExtendedAttr: f&InExtendedAttr > 0,
- Inline: f&InInline > 0,
- Reserved: f&InReserved > 0,
- }
-}
-
-// These masks define how users can view/modify inode flags. The rest of the
-// flags are for internal kernel usage only.
-const (
- InUserReadFlagMask = 0x4BDFFF
- InUserWriteFlagMask = 0x4B80FF
-)
diff --git a/pkg/sentry/fs/ext/disklayout/inode_new.go b/pkg/sentry/fs/ext/disklayout/inode_new.go
deleted file mode 100644
index 8f9f574ce..000000000
--- a/pkg/sentry/fs/ext/disklayout/inode_new.go
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import "gvisor.dev/gvisor/pkg/sentry/kernel/time"
-
-// InodeNew represents ext4 inode structure which can be bigger than
-// OldInodeSize. The actual size of this struct should be determined using
-// inode.ExtraInodeSize. Accessing any field here should be verified with the
-// actual size. The extra space between the end of the inode struct and end of
-// the inode record can be used to store extended attr.
-//
-// If the TimeExtra fields are in scope, the lower 2 bits of those are used
-// to extend their counter part to be 34 bits wide; the rest (upper) 30 bits
-// are used to provide nanoscond precision. Hence, these timestamps will now
-// overflow in May 2446.
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps.
-type InodeNew struct {
- InodeOld
-
- ExtraInodeSize uint16
- ChecksumHi uint16
- ChangeTimeExtra uint32
- ModificationTimeExtra uint32
- AccessTimeExtra uint32
- CreationTime uint32
- CreationTimeExtra uint32
- VersionHi uint32
- ProjectID uint32
-}
-
-// Compiles only if InodeNew implements Inode.
-var _ Inode = (*InodeNew)(nil)
-
-// fromExtraTime decodes the extra time and constructs the kernel time struct
-// with nanosecond precision.
-func fromExtraTime(lo int32, extra uint32) time.Time {
- // See description above InodeNew for format.
- seconds := (int64(extra&0x3) << 32) + int64(lo)
- nanoseconds := int64(extra >> 2)
- return time.FromUnix(seconds, nanoseconds)
-}
-
-// Only override methods which change due to ext4 specific fields.
-
-// Size implements Inode.Size.
-func (in *InodeNew) Size() uint64 {
- return (uint64(in.SizeHi) << 32) | uint64(in.SizeLo)
-}
-
-// InodeSize implements Inode.InodeSize.
-func (in *InodeNew) InodeSize() uint16 {
- return OldInodeSize + in.ExtraInodeSize
-}
-
-// ChangeTime implements Inode.ChangeTime.
-func (in *InodeNew) ChangeTime() time.Time {
- // Apply new timestamp logic if inode.ChangeTimeExtra is in scope.
- if in.ExtraInodeSize >= 8 {
- return fromExtraTime(in.ChangeTimeRaw, in.ChangeTimeExtra)
- }
-
- return in.InodeOld.ChangeTime()
-}
-
-// ModificationTime implements Inode.ModificationTime.
-func (in *InodeNew) ModificationTime() time.Time {
- // Apply new timestamp logic if inode.ModificationTimeExtra is in scope.
- if in.ExtraInodeSize >= 12 {
- return fromExtraTime(in.ModificationTimeRaw, in.ModificationTimeExtra)
- }
-
- return in.InodeOld.ModificationTime()
-}
-
-// AccessTime implements Inode.AccessTime.
-func (in *InodeNew) AccessTime() time.Time {
- // Apply new timestamp logic if inode.AccessTimeExtra is in scope.
- if in.ExtraInodeSize >= 16 {
- return fromExtraTime(in.AccessTimeRaw, in.AccessTimeExtra)
- }
-
- return in.InodeOld.AccessTime()
-}
diff --git a/pkg/sentry/fs/ext/disklayout/inode_old.go b/pkg/sentry/fs/ext/disklayout/inode_old.go
deleted file mode 100644
index db25b11b6..000000000
--- a/pkg/sentry/fs/ext/disklayout/inode_old.go
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/time"
-)
-
-const (
- // OldInodeSize is the inode size in ext2/ext3.
- OldInodeSize = 128
-)
-
-// InodeOld implements Inode interface. It emulates ext2/ext3 inode struct.
-// Inode struct size and record size are both 128 bytes for this.
-//
-// All fields representing time are in seconds since the epoch. Which means that
-// they will overflow in January 2038.
-type InodeOld struct {
- ModeRaw uint16
- UIDLo uint16
- SizeLo uint32
-
- // The time fields are signed integers because they could be negative to
- // represent time before the epoch.
- AccessTimeRaw int32
- ChangeTimeRaw int32
- ModificationTimeRaw int32
- DeletionTimeRaw int32
-
- GIDLo uint16
- LinksCountRaw uint16
- BlocksCountLo uint32
- FlagsRaw uint32
- VersionLo uint32 // This is OS dependent.
- DataRaw [60]byte
- Generation uint32
- FileACLLo uint32
- SizeHi uint32
- ObsoFaddr uint32
-
- // OS dependent fields have been inlined here.
- BlocksCountHi uint16
- FileACLHi uint16
- UIDHi uint16
- GIDHi uint16
- ChecksumLo uint16
- _ uint16
-}
-
-// Compiles only if InodeOld implements Inode.
-var _ Inode = (*InodeOld)(nil)
-
-// Mode implements Inode.Mode.
-func (in *InodeOld) Mode() linux.FileMode { return linux.FileMode(in.ModeRaw) }
-
-// UID implements Inode.UID.
-func (in *InodeOld) UID() auth.KUID {
- return auth.KUID((uint32(in.UIDHi) << 16) | uint32(in.UIDLo))
-}
-
-// GID implements Inode.GID.
-func (in *InodeOld) GID() auth.KGID {
- return auth.KGID((uint32(in.GIDHi) << 16) | uint32(in.GIDLo))
-}
-
-// Size implements Inode.Size.
-func (in *InodeOld) Size() uint64 {
- // In ext2/ext3, in.SizeHi did not exist, it was instead named in.DirACL.
- return uint64(in.SizeLo)
-}
-
-// InodeSize implements Inode.InodeSize.
-func (in *InodeOld) InodeSize() uint16 { return OldInodeSize }
-
-// AccessTime implements Inode.AccessTime.
-func (in *InodeOld) AccessTime() time.Time {
- return time.FromUnix(int64(in.AccessTimeRaw), 0)
-}
-
-// ChangeTime implements Inode.ChangeTime.
-func (in *InodeOld) ChangeTime() time.Time {
- return time.FromUnix(int64(in.ChangeTimeRaw), 0)
-}
-
-// ModificationTime implements Inode.ModificationTime.
-func (in *InodeOld) ModificationTime() time.Time {
- return time.FromUnix(int64(in.ModificationTimeRaw), 0)
-}
-
-// DeletionTime implements Inode.DeletionTime.
-func (in *InodeOld) DeletionTime() time.Time {
- return time.FromUnix(int64(in.DeletionTimeRaw), 0)
-}
-
-// LinksCount implements Inode.LinksCount.
-func (in *InodeOld) LinksCount() uint16 { return in.LinksCountRaw }
-
-// Flags implements Inode.Flags.
-func (in *InodeOld) Flags() InodeFlags { return InodeFlagsFromInt(in.FlagsRaw) }
-
-// Data implements Inode.Data.
-func (in *InodeOld) Data() []byte { return in.DataRaw[:] }
diff --git a/pkg/sentry/fs/ext/disklayout/inode_test.go b/pkg/sentry/fs/ext/disklayout/inode_test.go
deleted file mode 100644
index dd03ee50e..000000000
--- a/pkg/sentry/fs/ext/disklayout/inode_test.go
+++ /dev/null
@@ -1,222 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "fmt"
- "strconv"
- "testing"
-
- "gvisor.dev/gvisor/pkg/sentry/kernel/time"
-)
-
-// TestInodeSize tests that the inode structs are of the correct size.
-func TestInodeSize(t *testing.T) {
- assertSize(t, InodeOld{}, OldInodeSize)
-
- // This was updated from 156 bytes to 160 bytes in Oct 2015.
- assertSize(t, InodeNew{}, 160)
-}
-
-// TestTimestampSeconds tests that the seconds part of [a/c/m] timestamps in
-// ext4 inode structs are decoded correctly.
-//
-// These tests are derived from the table under https://www.kernel.org/doc/html/latest/filesystems/ext4/dynamic.html#inode-timestamps.
-func TestTimestampSeconds(t *testing.T) {
- type timestampTest struct {
- // msbSet tells if the most significant bit of InodeOld.[X]TimeRaw is set.
- // If this is set then the 32-bit time is negative.
- msbSet bool
-
- // lowerBound tells if we should take the lowest possible value of
- // InodeOld.[X]TimeRaw while satisfying test.msbSet condition. If set to
- // false it tells to take the highest possible value.
- lowerBound bool
-
- // extraBits is InodeNew.[X]TimeExtra.
- extraBits uint32
-
- // want is the kernel time struct that is expected.
- want time.Time
- }
-
- tests := []timestampTest{
- // 1901-12-13
- {
- msbSet: true,
- lowerBound: true,
- extraBits: 0,
- want: time.FromUnix(int64(-0x80000000), 0),
- },
-
- // 1969-12-31
- {
- msbSet: true,
- lowerBound: false,
- extraBits: 0,
- want: time.FromUnix(int64(-1), 0),
- },
-
- // 1970-01-01
- {
- msbSet: false,
- lowerBound: true,
- extraBits: 0,
- want: time.FromUnix(int64(0), 0),
- },
-
- // 2038-01-19
- {
- msbSet: false,
- lowerBound: false,
- extraBits: 0,
- want: time.FromUnix(int64(0x7fffffff), 0),
- },
-
- // 2038-01-19
- {
- msbSet: true,
- lowerBound: true,
- extraBits: 1,
- want: time.FromUnix(int64(0x80000000), 0),
- },
-
- // 2106-02-07
- {
- msbSet: true,
- lowerBound: false,
- extraBits: 1,
- want: time.FromUnix(int64(0xffffffff), 0),
- },
-
- // 2106-02-07
- {
- msbSet: false,
- lowerBound: true,
- extraBits: 1,
- want: time.FromUnix(int64(0x100000000), 0),
- },
-
- // 2174-02-25
- {
- msbSet: false,
- lowerBound: false,
- extraBits: 1,
- want: time.FromUnix(int64(0x17fffffff), 0),
- },
-
- // 2174-02-25
- {
- msbSet: true,
- lowerBound: true,
- extraBits: 2,
- want: time.FromUnix(int64(0x180000000), 0),
- },
-
- // 2242-03-16
- {
- msbSet: true,
- lowerBound: false,
- extraBits: 2,
- want: time.FromUnix(int64(0x1ffffffff), 0),
- },
-
- // 2242-03-16
- {
- msbSet: false,
- lowerBound: true,
- extraBits: 2,
- want: time.FromUnix(int64(0x200000000), 0),
- },
-
- // 2310-04-04
- {
- msbSet: false,
- lowerBound: false,
- extraBits: 2,
- want: time.FromUnix(int64(0x27fffffff), 0),
- },
-
- // 2310-04-04
- {
- msbSet: true,
- lowerBound: true,
- extraBits: 3,
- want: time.FromUnix(int64(0x280000000), 0),
- },
-
- // 2378-04-22
- {
- msbSet: true,
- lowerBound: false,
- extraBits: 3,
- want: time.FromUnix(int64(0x2ffffffff), 0),
- },
-
- // 2378-04-22
- {
- msbSet: false,
- lowerBound: true,
- extraBits: 3,
- want: time.FromUnix(int64(0x300000000), 0),
- },
-
- // 2446-05-10
- {
- msbSet: false,
- lowerBound: false,
- extraBits: 3,
- want: time.FromUnix(int64(0x37fffffff), 0),
- },
- }
-
- lowerMSB0 := int32(0) // binary: 00000000 00000000 00000000 00000000
- upperMSB0 := int32(0x7fffffff) // binary: 01111111 11111111 11111111 11111111
- lowerMSB1 := int32(-0x80000000) // binary: 10000000 00000000 00000000 00000000
- upperMSB1 := int32(-1) // binary: 11111111 11111111 11111111 11111111
-
- get32BitTime := func(test timestampTest) int32 {
- if test.msbSet {
- if test.lowerBound {
- return lowerMSB1
- }
-
- return upperMSB1
- }
-
- if test.lowerBound {
- return lowerMSB0
- }
-
- return upperMSB0
- }
-
- getTestName := func(test timestampTest) string {
- return fmt.Sprintf(
- "Tests time decoding with epoch bits 0b%s and 32-bit raw time: MSB set=%t, lower bound=%t",
- strconv.FormatInt(int64(test.extraBits), 2),
- test.msbSet,
- test.lowerBound,
- )
- }
-
- for _, test := range tests {
- t.Run(getTestName(test), func(t *testing.T) {
- if got := fromExtraTime(get32BitTime(test), test.extraBits); got != test.want {
- t.Errorf("Expected: %v, Got: %v", test.want, got)
- }
- })
- }
-}
diff --git a/pkg/sentry/fs/ext/disklayout/superblock.go b/pkg/sentry/fs/ext/disklayout/superblock.go
deleted file mode 100644
index 7a337a5e0..000000000
--- a/pkg/sentry/fs/ext/disklayout/superblock.go
+++ /dev/null
@@ -1,471 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-const (
- // SbOffset is the absolute offset at which the superblock is placed.
- SbOffset = 1024
-)
-
-// SuperBlock should be implemented by structs representing the ext superblock.
-// The superblock holds a lot of information about the enclosing filesystem.
-// This interface aims to provide access methods to important information held
-// by the superblock. It does NOT expose all fields of the superblock, only the
-// ones necessary. This can be expanded when need be.
-//
-// Location and replication:
-// - The superblock is located at offset 1024 in block group 0.
-// - Redundant copies of the superblock and group descriptors are kept in
-// all groups if SbSparse feature flag is NOT set. If it is set, the
-// replicas only exist in groups whose group number is either 0 or a
-// power of 3, 5, or 7.
-// - There is also a sparse superblock feature v2 in which there are just
-// two replicas saved in the block groups pointed by sb.s_backup_bgs.
-//
-// Replicas should eventually be updated if the superblock is updated.
-//
-// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#super-block.
-type SuperBlock interface {
- // InodesCount returns the total number of inodes in this filesystem.
- InodesCount() uint32
-
- // BlocksCount returns the total number of data blocks in this filesystem.
- BlocksCount() uint64
-
- // FreeBlocksCount returns the number of free blocks in this filesystem.
- FreeBlocksCount() uint64
-
- // FreeInodesCount returns the number of free inodes in this filesystem.
- FreeInodesCount() uint32
-
- // MountCount returns the number of mounts since the last fsck.
- MountCount() uint16
-
- // MaxMountCount returns the number of mounts allowed beyond which a fsck is
- // needed.
- MaxMountCount() uint16
-
- // FirstDataBlock returns the absolute block number of the first data block,
- // which contains the super block itself.
- //
- // If the filesystem has 1kb data blocks then this should return 1. For all
- // other configurations, this typically returns 0.
- FirstDataBlock() uint32
-
- // BlockSize returns the size of one data block in this filesystem.
- // This can be calculated by 2^(10 + sb.s_log_block_size). This ensures that
- // the smallest block size is 1kb.
- BlockSize() uint64
-
- // BlocksPerGroup returns the number of data blocks in a block group.
- BlocksPerGroup() uint32
-
- // ClusterSize returns block cluster size (set during mkfs time by admin).
- // This can be calculated by 2^(10 + sb.s_log_cluster_size). This ensures that
- // the smallest cluster size is 1kb.
- //
- // sb.s_log_cluster_size must equal sb.s_log_block_size if bigalloc feature
- // is NOT set and consequently BlockSize() = ClusterSize() in that case.
- ClusterSize() uint64
-
- // ClustersPerGroup returns:
- // - number of clusters per group if bigalloc is enabled.
- // - BlocksPerGroup() otherwise.
- ClustersPerGroup() uint32
-
- // InodeSize returns the size of the inode disk record size in bytes. Use this
- // to iterate over inode arrays on disk.
- //
- // In ext2 and ext3:
- // - Each inode had a disk record of 128 bytes.
- // - The inode struct size was fixed at 128 bytes.
- //
- // In ext4 its possible to allocate larger on-disk inodes:
- // - Inode disk record size = sb.s_inode_size (function return value).
- // = 256 (default)
- // - Inode struct size = 128 + inode.i_extra_isize.
- // = 128 + 32 = 160 (default)
- InodeSize() uint16
-
- // InodesPerGroup returns the number of inodes in a block group.
- InodesPerGroup() uint32
-
- // BgDescSize returns the size of the block group descriptor struct.
- //
- // In ext2, ext3, ext4 (without 64-bit feature), the block group descriptor
- // is only 32 bytes long.
- // In ext4 with 64-bit feature, the block group descriptor expands to AT LEAST
- // 64 bytes. It might be bigger than that.
- BgDescSize() uint16
-
- // CompatibleFeatures returns the CompatFeatures struct which holds all the
- // compatible features this fs supports.
- CompatibleFeatures() CompatFeatures
-
- // IncompatibleFeatures returns the CompatFeatures struct which holds all the
- // incompatible features this fs supports.
- IncompatibleFeatures() IncompatFeatures
-
- // ReadOnlyCompatibleFeatures returns the CompatFeatures struct which holds all the
- // readonly compatible features this fs supports.
- ReadOnlyCompatibleFeatures() RoCompatFeatures
-
- // Magic() returns the magic signature which must be 0xef53.
- Magic() uint16
-
- // Revision returns the superblock revision. Superblock struct fields from
- // offset 0x54 till 0x150 should only be used if superblock has DynamicRev.
- Revision() SbRevision
-}
-
-// SbRevision is the type for superblock revisions.
-type SbRevision uint32
-
-// Super block revisions.
-const (
- // OldRev is the good old (original) format.
- OldRev SbRevision = 0
-
- // DynamicRev is v2 format w/ dynamic inode sizes.
- DynamicRev SbRevision = 1
-)
-
-// Superblock compatible features.
-// This is not exhaustive, unused features are not listed.
-const (
- // SbDirPrealloc indicates directory preallocation.
- SbDirPrealloc = 0x1
-
- // SbHasJournal indicates the presence of a journal. jbd2 should only work
- // with this being set.
- SbHasJournal = 0x4
-
- // SbExtAttr indicates extended attributes support.
- SbExtAttr = 0x8
-
- // SbResizeInode indicates that the fs has reserved GDT blocks (right after
- // group descriptors) for fs expansion.
- SbResizeInode = 0x10
-
- // SbDirIndex indicates that the fs has directory indices.
- SbDirIndex = 0x20
-
- // SbSparseV2 stands for Sparse superblock version 2.
- SbSparseV2 = 0x200
-)
-
-// CompatFeatures represents a superblock's compatible feature set. If the
-// kernel does not understand any of these feature, it can still read/write
-// to this fs.
-type CompatFeatures struct {
- DirPrealloc bool
- HasJournal bool
- ExtAttr bool
- ResizeInode bool
- DirIndex bool
- SparseV2 bool
-}
-
-// ToInt converts superblock compatible features back to its 32-bit rep.
-func (f CompatFeatures) ToInt() uint32 {
- var res uint32
-
- if f.DirPrealloc {
- res |= SbDirPrealloc
- }
- if f.HasJournal {
- res |= SbHasJournal
- }
- if f.ExtAttr {
- res |= SbExtAttr
- }
- if f.ResizeInode {
- res |= SbResizeInode
- }
- if f.DirIndex {
- res |= SbDirIndex
- }
- if f.SparseV2 {
- res |= SbSparseV2
- }
-
- return res
-}
-
-// CompatFeaturesFromInt converts the integer representation of superblock
-// compatible features to CompatFeatures struct.
-func CompatFeaturesFromInt(f uint32) CompatFeatures {
- return CompatFeatures{
- DirPrealloc: f&SbDirPrealloc > 0,
- HasJournal: f&SbHasJournal > 0,
- ExtAttr: f&SbExtAttr > 0,
- ResizeInode: f&SbResizeInode > 0,
- DirIndex: f&SbDirIndex > 0,
- SparseV2: f&SbSparseV2 > 0,
- }
-}
-
-// Superblock incompatible features.
-// This is not exhaustive, unused features are not listed.
-const (
- // SbDirentFileType indicates that directory entries record the file type.
- // We should use struct ext4_dir_entry_2 for dirents then.
- SbDirentFileType = 0x2
-
- // SbRecovery indicates that the filesystem needs recovery.
- SbRecovery = 0x4
-
- // SbJournalDev indicates that the filesystem has a separate journal device.
- SbJournalDev = 0x8
-
- // SbMetaBG indicates that the filesystem is using Meta block groups. Moves
- // the group descriptors from the congested first block group into the first
- // group of each metablock group to increase the maximum block groups limit
- // and hence support much larger filesystems.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#meta-block-groups.
- SbMetaBG = 0x10
-
- // SbExtents indicates that the filesystem uses extents. Must be set in ext4
- // filesystems.
- SbExtents = 0x40
-
- // SbIs64Bit indicates that this filesystem addresses blocks with 64-bits.
- // Hence can support 2^64 data blocks.
- SbIs64Bit = 0x80
-
- // SbMMP indicates that this filesystem has multiple mount protection.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#multiple-mount-protection.
- SbMMP = 0x100
-
- // SbFlexBg indicates that this filesystem has flexible block groups. Several
- // block groups are tied into one logical block group so that all the metadata
- // for the block groups (bitmaps and inode tables) are close together for
- // faster loading. Consequently, large files will be continuous on disk.
- // However, this does not affect the placement of redundant superblocks and
- // group descriptors.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#flexible-block-groups.
- SbFlexBg = 0x200
-
- // SbLargeDir shows that large directory enabled. Directory htree can be 3
- // levels deep. Directory htrees are allowed to be 2 levels deep otherwise.
- SbLargeDir = 0x4000
-
- // SbInlineData allows inline data in inodes for really small files.
- SbInlineData = 0x8000
-
- // SbEncrypted indicates that this fs contains encrypted inodes.
- SbEncrypted = 0x10000
-)
-
-// IncompatFeatures represents a superblock's incompatible feature set. If the
-// kernel does not understand any of these feature, it should refuse to mount.
-type IncompatFeatures struct {
- DirentFileType bool
- Recovery bool
- JournalDev bool
- MetaBG bool
- Extents bool
- Is64Bit bool
- MMP bool
- FlexBg bool
- LargeDir bool
- InlineData bool
- Encrypted bool
-}
-
-// ToInt converts superblock incompatible features back to its 32-bit rep.
-func (f IncompatFeatures) ToInt() uint32 {
- var res uint32
-
- if f.DirentFileType {
- res |= SbDirentFileType
- }
- if f.Recovery {
- res |= SbRecovery
- }
- if f.JournalDev {
- res |= SbJournalDev
- }
- if f.MetaBG {
- res |= SbMetaBG
- }
- if f.Extents {
- res |= SbExtents
- }
- if f.Is64Bit {
- res |= SbIs64Bit
- }
- if f.MMP {
- res |= SbMMP
- }
- if f.FlexBg {
- res |= SbFlexBg
- }
- if f.LargeDir {
- res |= SbLargeDir
- }
- if f.InlineData {
- res |= SbInlineData
- }
- if f.Encrypted {
- res |= SbEncrypted
- }
-
- return res
-}
-
-// IncompatFeaturesFromInt converts the integer representation of superblock
-// incompatible features to IncompatFeatures struct.
-func IncompatFeaturesFromInt(f uint32) IncompatFeatures {
- return IncompatFeatures{
- DirentFileType: f&SbDirentFileType > 0,
- Recovery: f&SbRecovery > 0,
- JournalDev: f&SbJournalDev > 0,
- MetaBG: f&SbMetaBG > 0,
- Extents: f&SbExtents > 0,
- Is64Bit: f&SbIs64Bit > 0,
- MMP: f&SbMMP > 0,
- FlexBg: f&SbFlexBg > 0,
- LargeDir: f&SbLargeDir > 0,
- InlineData: f&SbInlineData > 0,
- Encrypted: f&SbEncrypted > 0,
- }
-}
-
-// Superblock readonly compatible features.
-// This is not exhaustive, unused features are not listed.
-const (
- // SbSparse indicates sparse superblocks. Only groups with number either 0 or
- // a power of 3, 5, or 7 will have redundant copies of the superblock and
- // block descriptors.
- SbSparse = 0x1
-
- // SbLargeFile indicates that this fs has been used to store a file >= 2GiB.
- SbLargeFile = 0x2
-
- // SbHugeFile indicates that this fs contains files whose sizes are
- // represented in units of logicals blocks, not 512-byte sectors.
- SbHugeFile = 0x8
-
- // SbGdtCsum indicates that group descriptors have checksums.
- SbGdtCsum = 0x10
-
- // SbDirNlink indicates that the new subdirectory limit is 64,999. Ext3 has a
- // 32,000 subdirectory limit.
- SbDirNlink = 0x20
-
- // SbExtraIsize indicates that large inodes exist on this filesystem.
- SbExtraIsize = 0x40
-
- // SbHasSnapshot indicates the existence of a snapshot.
- SbHasSnapshot = 0x80
-
- // SbQuota enables usage tracking for all quota types.
- SbQuota = 0x100
-
- // SbBigalloc maps to the bigalloc feature. When set, the minimum allocation
- // unit becomes a cluster rather than a data block. Then block bitmaps track
- // clusters, not data blocks.
- //
- // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#bigalloc.
- SbBigalloc = 0x200
-
- // SbMetadataCsum indicates that the fs supports metadata checksumming.
- SbMetadataCsum = 0x400
-
- // SbReadOnly marks this filesystem as readonly. Should refuse to mount in
- // read/write mode.
- SbReadOnly = 0x1000
-)
-
-// RoCompatFeatures represents a superblock's readonly compatible feature set.
-// If the kernel does not understand any of these feature, it can still mount
-// readonly. But if the user wants to mount read/write, the kernel should
-// refuse to mount.
-type RoCompatFeatures struct {
- Sparse bool
- LargeFile bool
- HugeFile bool
- GdtCsum bool
- DirNlink bool
- ExtraIsize bool
- HasSnapshot bool
- Quota bool
- Bigalloc bool
- MetadataCsum bool
- ReadOnly bool
-}
-
-// ToInt converts superblock readonly compatible features to its 32-bit rep.
-func (f RoCompatFeatures) ToInt() uint32 {
- var res uint32
-
- if f.Sparse {
- res |= SbSparse
- }
- if f.LargeFile {
- res |= SbLargeFile
- }
- if f.HugeFile {
- res |= SbHugeFile
- }
- if f.GdtCsum {
- res |= SbGdtCsum
- }
- if f.DirNlink {
- res |= SbDirNlink
- }
- if f.ExtraIsize {
- res |= SbExtraIsize
- }
- if f.HasSnapshot {
- res |= SbHasSnapshot
- }
- if f.Quota {
- res |= SbQuota
- }
- if f.Bigalloc {
- res |= SbBigalloc
- }
- if f.MetadataCsum {
- res |= SbMetadataCsum
- }
- if f.ReadOnly {
- res |= SbReadOnly
- }
-
- return res
-}
-
-// RoCompatFeaturesFromInt converts the integer representation of superblock
-// readonly compatible features to RoCompatFeatures struct.
-func RoCompatFeaturesFromInt(f uint32) RoCompatFeatures {
- return RoCompatFeatures{
- Sparse: f&SbSparse > 0,
- LargeFile: f&SbLargeFile > 0,
- HugeFile: f&SbHugeFile > 0,
- GdtCsum: f&SbGdtCsum > 0,
- DirNlink: f&SbDirNlink > 0,
- ExtraIsize: f&SbExtraIsize > 0,
- HasSnapshot: f&SbHasSnapshot > 0,
- Quota: f&SbQuota > 0,
- Bigalloc: f&SbBigalloc > 0,
- MetadataCsum: f&SbMetadataCsum > 0,
- ReadOnly: f&SbReadOnly > 0,
- }
-}
diff --git a/pkg/sentry/fs/ext/disklayout/superblock_32.go b/pkg/sentry/fs/ext/disklayout/superblock_32.go
deleted file mode 100644
index 53e515fd3..000000000
--- a/pkg/sentry/fs/ext/disklayout/superblock_32.go
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// SuperBlock32Bit implements SuperBlock and represents the 32-bit version of
-// the ext4_super_block struct in fs/ext4/ext4.h. Should be used only if
-// RevLevel = DynamicRev and 64-bit feature is disabled.
-type SuperBlock32Bit struct {
- // We embed the old superblock struct here because the 32-bit version is just
- // an extension of the old version.
- SuperBlockOld
-
- FirstInode uint32
- InodeSizeRaw uint16
- BlockGroupNumber uint16
- FeatureCompat uint32
- FeatureIncompat uint32
- FeatureRoCompat uint32
- UUID [16]byte
- VolumeName [16]byte
- LastMounted [64]byte
- AlgoUsageBitmap uint32
- PreallocBlocks uint8
- PreallocDirBlocks uint8
- ReservedGdtBlocks uint16
- JournalUUID [16]byte
- JournalInum uint32
- JournalDev uint32
- LastOrphan uint32
- HashSeed [4]uint32
- DefaultHashVersion uint8
- JnlBackupType uint8
- BgDescSizeRaw uint16
- DefaultMountOpts uint32
- FirstMetaBg uint32
- MkfsTime uint32
- JnlBlocks [17]uint32
-}
-
-// Compiles only if SuperBlock32Bit implements SuperBlock.
-var _ SuperBlock = (*SuperBlock32Bit)(nil)
-
-// Only override methods which change based on the additional fields above.
-// Not overriding SuperBlock.BgDescSize because it would still return 32 here.
-
-// InodeSize implements SuperBlock.InodeSize.
-func (sb *SuperBlock32Bit) InodeSize() uint16 {
- return sb.InodeSizeRaw
-}
-
-// CompatibleFeatures implements SuperBlock.CompatibleFeatures.
-func (sb *SuperBlock32Bit) CompatibleFeatures() CompatFeatures {
- return CompatFeaturesFromInt(sb.FeatureCompat)
-}
-
-// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures.
-func (sb *SuperBlock32Bit) IncompatibleFeatures() IncompatFeatures {
- return IncompatFeaturesFromInt(sb.FeatureIncompat)
-}
-
-// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures.
-func (sb *SuperBlock32Bit) ReadOnlyCompatibleFeatures() RoCompatFeatures {
- return RoCompatFeaturesFromInt(sb.FeatureRoCompat)
-}
diff --git a/pkg/sentry/fs/ext/disklayout/superblock_64.go b/pkg/sentry/fs/ext/disklayout/superblock_64.go
deleted file mode 100644
index 7c1053fb4..000000000
--- a/pkg/sentry/fs/ext/disklayout/superblock_64.go
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// SuperBlock64Bit implements SuperBlock and represents the 64-bit version of
-// the ext4_super_block struct in fs/ext4/ext4.h. This sums up to be exactly
-// 1024 bytes (smallest possible block size) and hence the superblock always
-// fits in no more than one data block. Should only be used when the 64-bit
-// feature is set.
-type SuperBlock64Bit struct {
- // We embed the 32-bit struct here because 64-bit version is just an extension
- // of the 32-bit version.
- SuperBlock32Bit
-
- BlocksCountHi uint32
- ReservedBlocksCountHi uint32
- FreeBlocksCountHi uint32
- MinInodeSize uint16
- WantInodeSize uint16
- Flags uint32
- RaidStride uint16
- MmpInterval uint16
- MmpBlock uint64
- RaidStripeWidth uint32
- LogGroupsPerFlex uint8
- ChecksumType uint8
- _ uint16
- KbytesWritten uint64
- SnapshotInum uint32
- SnapshotID uint32
- SnapshotRsrvBlocksCount uint64
- SnapshotList uint32
- ErrorCount uint32
- FirstErrorTime uint32
- FirstErrorInode uint32
- FirstErrorBlock uint64
- FirstErrorFunction [32]byte
- FirstErrorLine uint32
- LastErrorTime uint32
- LastErrorInode uint32
- LastErrorLine uint32
- LastErrorBlock uint64
- LastErrorFunction [32]byte
- MountOpts [64]byte
- UserQuotaInum uint32
- GroupQuotaInum uint32
- OverheadBlocks uint32
- BackupBgs [2]uint32
- EncryptAlgos [4]uint8
- EncryptPwSalt [16]uint8
- LostFoundInode uint32
- ProjectQuotaInode uint32
- ChecksumSeed uint32
- WtimeHi uint8
- MtimeHi uint8
- MkfsTimeHi uint8
- LastCheckHi uint8
- FirstErrorTimeHi uint8
- LastErrorTimeHi uint8
- _ [2]uint8
- Encoding uint16
- EncodingFlags uint16
- _ [95]uint32
- Checksum uint32
-}
-
-// Compiles only if SuperBlock64Bit implements SuperBlock.
-var _ SuperBlock = (*SuperBlock64Bit)(nil)
-
-// Only override methods which change based on the 64-bit feature.
-
-// BlocksCount implements SuperBlock.BlocksCount.
-func (sb *SuperBlock64Bit) BlocksCount() uint64 {
- return (uint64(sb.BlocksCountHi) << 32) | uint64(sb.BlocksCountLo)
-}
-
-// FreeBlocksCount implements SuperBlock.FreeBlocksCount.
-func (sb *SuperBlock64Bit) FreeBlocksCount() uint64 {
- return (uint64(sb.FreeBlocksCountHi) << 32) | uint64(sb.FreeBlocksCountLo)
-}
-
-// BgDescSize implements SuperBlock.BgDescSize.
-func (sb *SuperBlock64Bit) BgDescSize() uint16 { return sb.BgDescSizeRaw }
diff --git a/pkg/sentry/fs/ext/disklayout/superblock_old.go b/pkg/sentry/fs/ext/disklayout/superblock_old.go
deleted file mode 100644
index 9221e0251..000000000
--- a/pkg/sentry/fs/ext/disklayout/superblock_old.go
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-// SuperBlockOld implements SuperBlock and represents the old version of the
-// superblock struct. Should be used only if RevLevel = OldRev.
-type SuperBlockOld struct {
- InodesCountRaw uint32
- BlocksCountLo uint32
- ReservedBlocksCount uint32
- FreeBlocksCountLo uint32
- FreeInodesCountRaw uint32
- FirstDataBlockRaw uint32
- LogBlockSize uint32
- LogClusterSize uint32
- BlocksPerGroupRaw uint32
- ClustersPerGroupRaw uint32
- InodesPerGroupRaw uint32
- Mtime uint32
- Wtime uint32
- MountCountRaw uint16
- MaxMountCountRaw uint16
- MagicRaw uint16
- State uint16
- Errors uint16
- MinorRevLevel uint16
- LastCheck uint32
- CheckInterval uint32
- CreatorOS uint32
- RevLevel uint32
- DefResUID uint16
- DefResGID uint16
-}
-
-// Compiles only if SuperBlockOld implements SuperBlock.
-var _ SuperBlock = (*SuperBlockOld)(nil)
-
-// InodesCount implements SuperBlock.InodesCount.
-func (sb *SuperBlockOld) InodesCount() uint32 { return sb.InodesCountRaw }
-
-// BlocksCount implements SuperBlock.BlocksCount.
-func (sb *SuperBlockOld) BlocksCount() uint64 { return uint64(sb.BlocksCountLo) }
-
-// FreeBlocksCount implements SuperBlock.FreeBlocksCount.
-func (sb *SuperBlockOld) FreeBlocksCount() uint64 { return uint64(sb.FreeBlocksCountLo) }
-
-// FreeInodesCount implements SuperBlock.FreeInodesCount.
-func (sb *SuperBlockOld) FreeInodesCount() uint32 { return sb.FreeInodesCountRaw }
-
-// MountCount implements SuperBlock.MountCount.
-func (sb *SuperBlockOld) MountCount() uint16 { return sb.MountCountRaw }
-
-// MaxMountCount implements SuperBlock.MaxMountCount.
-func (sb *SuperBlockOld) MaxMountCount() uint16 { return sb.MaxMountCountRaw }
-
-// FirstDataBlock implements SuperBlock.FirstDataBlock.
-func (sb *SuperBlockOld) FirstDataBlock() uint32 { return sb.FirstDataBlockRaw }
-
-// BlockSize implements SuperBlock.BlockSize.
-func (sb *SuperBlockOld) BlockSize() uint64 { return 1 << (10 + sb.LogBlockSize) }
-
-// BlocksPerGroup implements SuperBlock.BlocksPerGroup.
-func (sb *SuperBlockOld) BlocksPerGroup() uint32 { return sb.BlocksPerGroupRaw }
-
-// ClusterSize implements SuperBlock.ClusterSize.
-func (sb *SuperBlockOld) ClusterSize() uint64 { return 1 << (10 + sb.LogClusterSize) }
-
-// ClustersPerGroup implements SuperBlock.ClustersPerGroup.
-func (sb *SuperBlockOld) ClustersPerGroup() uint32 { return sb.ClustersPerGroupRaw }
-
-// InodeSize implements SuperBlock.InodeSize.
-func (sb *SuperBlockOld) InodeSize() uint16 { return OldInodeSize }
-
-// InodesPerGroup implements SuperBlock.InodesPerGroup.
-func (sb *SuperBlockOld) InodesPerGroup() uint32 { return sb.InodesPerGroupRaw }
-
-// BgDescSize implements SuperBlock.BgDescSize.
-func (sb *SuperBlockOld) BgDescSize() uint16 { return 32 }
-
-// CompatibleFeatures implements SuperBlock.CompatibleFeatures.
-func (sb *SuperBlockOld) CompatibleFeatures() CompatFeatures { return CompatFeatures{} }
-
-// IncompatibleFeatures implements SuperBlock.IncompatibleFeatures.
-func (sb *SuperBlockOld) IncompatibleFeatures() IncompatFeatures { return IncompatFeatures{} }
-
-// ReadOnlyCompatibleFeatures implements SuperBlock.ReadOnlyCompatibleFeatures.
-func (sb *SuperBlockOld) ReadOnlyCompatibleFeatures() RoCompatFeatures { return RoCompatFeatures{} }
-
-// Magic implements SuperBlock.Magic.
-func (sb *SuperBlockOld) Magic() uint16 { return sb.MagicRaw }
-
-// Revision implements SuperBlock.Revision.
-func (sb *SuperBlockOld) Revision() SbRevision { return SbRevision(sb.RevLevel) }
diff --git a/pkg/sentry/fs/ext/disklayout/superblock_test.go b/pkg/sentry/fs/ext/disklayout/superblock_test.go
deleted file mode 100644
index 463b5ba21..000000000
--- a/pkg/sentry/fs/ext/disklayout/superblock_test.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "testing"
-)
-
-// TestSuperBlockSize tests that the superblock structs are of the correct
-// size.
-func TestSuperBlockSize(t *testing.T) {
- assertSize(t, SuperBlockOld{}, 84)
- assertSize(t, SuperBlock32Bit{}, 336)
- assertSize(t, SuperBlock64Bit{}, 1024)
-}
diff --git a/pkg/sentry/fs/ext/disklayout/test_utils.go b/pkg/sentry/fs/ext/disklayout/test_utils.go
deleted file mode 100644
index 9c63f04c0..000000000
--- a/pkg/sentry/fs/ext/disklayout/test_utils.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package disklayout
-
-import (
- "reflect"
- "testing"
-
- "gvisor.dev/gvisor/pkg/binary"
-)
-
-func assertSize(t *testing.T, v interface{}, want uintptr) {
- t.Helper()
-
- if got := binary.Size(v); got != want {
- t.Errorf("struct %s should be exactly %d bytes but is %d bytes", reflect.TypeOf(v).Name(), want, got)
- }
-}
diff --git a/pkg/sentry/fs/ext/ext.go b/pkg/sentry/fs/ext/ext.go
deleted file mode 100644
index 10e235fb1..000000000
--- a/pkg/sentry/fs/ext/ext.go
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package ext implements readonly ext(2/3/4) filesystems.
-package ext
-
-import (
- "errors"
- "fmt"
- "io"
- "os"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// filesystemType implements vfs.FilesystemType.
-type filesystemType struct{}
-
-// Compiles only if filesystemType implements vfs.FilesystemType.
-var _ vfs.FilesystemType = (*filesystemType)(nil)
-
-// getDeviceFd returns the read seeker to the underlying device.
-// Currently there are two ways of mounting an ext(2/3/4) fs:
-// 1. Specify a mount with our internal special MountType in the OCI spec.
-// 2. Expose the device to the container and mount it from application layer.
-func getDeviceFd(source string, opts vfs.NewFilesystemOptions) (io.ReadSeeker, error) {
- if opts.InternalData == nil {
- // User mount call.
- // TODO(b/134676337): Open the device specified by `source` and return that.
- panic("unimplemented")
- }
-
- // NewFilesystem call originated from within the sentry.
- fd, ok := opts.InternalData.(uintptr)
- if !ok {
- return nil, errors.New("internal data for ext fs must be a uintptr containing the file descriptor to device")
- }
-
- // We do not close this file because that would close the underlying device
- // file descriptor (which is required for reading the fs from disk).
- // TODO(b/134676337): Use pkg/fd instead.
- deviceFile := os.NewFile(fd, source)
- if deviceFile == nil {
- return nil, fmt.Errorf("ext4 device file descriptor is not valid: %d", fd)
- }
-
- return deviceFile, nil
-}
-
-// NewFilesystem implements vfs.FilesystemType.NewFilesystem.
-func (fstype filesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts vfs.NewFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
- dev, err := getDeviceFd(source, opts)
- if err != nil {
- return nil, nil, err
- }
-
- fs := filesystem{dev: dev, inodeCache: make(map[uint32]*inode)}
- fs.vfsfs.Init(&fs)
- fs.sb, err = readSuperBlock(dev)
- if err != nil {
- return nil, nil, err
- }
-
- if fs.sb.Magic() != linux.EXT_SUPER_MAGIC {
- // mount(2) specifies that EINVAL should be returned if the superblock is
- // invalid.
- return nil, nil, syserror.EINVAL
- }
-
- fs.bgs, err = readBlockGroups(dev, fs.sb)
- if err != nil {
- return nil, nil, err
- }
-
- rootInode, err := fs.getOrCreateInode(disklayout.RootDirInode)
- if err != nil {
- return nil, nil, err
- }
-
- return &fs.vfsfs, &newDentry(rootInode).vfsd, nil
-}
diff --git a/pkg/sentry/fs/ext/ext_test.go b/pkg/sentry/fs/ext/ext_test.go
deleted file mode 100644
index ee7f7907c..000000000
--- a/pkg/sentry/fs/ext/ext_test.go
+++ /dev/null
@@ -1,407 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "fmt"
- "os"
- "path"
- "testing"
-
- "github.com/google/go-cmp/cmp"
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/context/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-
- "gvisor.dev/gvisor/runsc/test/testutil"
-)
-
-const (
- assetsDir = "pkg/sentry/fs/ext/assets"
-)
-
-var (
- ext2ImagePath = path.Join(assetsDir, "tiny.ext2")
- ext3ImagePath = path.Join(assetsDir, "tiny.ext3")
- ext4ImagePath = path.Join(assetsDir, "tiny.ext4")
-)
-
-func beginning(_ uint64) uint64 {
- return 0
-}
-
-func middle(i uint64) uint64 {
- return i / 2
-}
-
-func end(i uint64) uint64 {
- return i
-}
-
-// setUp opens imagePath as an ext Filesystem and returns all necessary
-// elements required to run tests. If error is non-nil, it also returns a tear
-// down function which must be called after the test is run for clean up.
-func setUp(t *testing.T, imagePath string) (context.Context, *vfs.Filesystem, *vfs.Dentry, func(), error) {
- localImagePath, err := testutil.FindFile(imagePath)
- if err != nil {
- return nil, nil, nil, nil, fmt.Errorf("failed to open local image at path %s: %v", imagePath, err)
- }
-
- f, err := os.Open(localImagePath)
- if err != nil {
- return nil, nil, nil, nil, err
- }
-
- // Mount the ext4 fs and retrieve the inode structure for the file.
- mockCtx := contexttest.Context(t)
- fs, d, err := filesystemType{}.NewFilesystem(mockCtx, nil, localImagePath, vfs.NewFilesystemOptions{InternalData: f.Fd()})
- if err != nil {
- f.Close()
- return nil, nil, nil, nil, err
- }
-
- tearDown := func() {
- if err := f.Close(); err != nil {
- t.Fatalf("tearDown failed: %v", err)
- }
- }
- return mockCtx, fs, d, tearDown, nil
-}
-
-// TestRootDir tests that the root directory inode is correctly initialized and
-// returned from setUp.
-func TestRootDir(t *testing.T) {
- type inodeProps struct {
- Mode linux.FileMode
- UID auth.KUID
- GID auth.KGID
- Size uint64
- InodeSize uint16
- Links uint16
- Flags disklayout.InodeFlags
- }
-
- type rootDirTest struct {
- name string
- image string
- wantInode inodeProps
- }
-
- tests := []rootDirTest{
- {
- name: "ext4 root dir",
- image: ext4ImagePath,
- wantInode: inodeProps{
- Mode: linux.ModeDirectory | 0755,
- Size: 0x400,
- InodeSize: 0x80,
- Links: 3,
- Flags: disklayout.InodeFlags{Extents: true},
- },
- },
- {
- name: "ext3 root dir",
- image: ext3ImagePath,
- wantInode: inodeProps{
- Mode: linux.ModeDirectory | 0755,
- Size: 0x400,
- InodeSize: 0x80,
- Links: 3,
- },
- },
- {
- name: "ext2 root dir",
- image: ext2ImagePath,
- wantInode: inodeProps{
- Mode: linux.ModeDirectory | 0755,
- Size: 0x400,
- InodeSize: 0x80,
- Links: 3,
- },
- },
- }
-
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- _, _, vfsd, tearDown, err := setUp(t, test.image)
- if err != nil {
- t.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- d, ok := vfsd.Impl().(*dentry)
- if !ok {
- t.Fatalf("ext dentry of incorrect type: %T", vfsd.Impl())
- }
-
- // Offload inode contents into local structs for comparison.
- gotInode := inodeProps{
- Mode: d.inode.diskInode.Mode(),
- UID: d.inode.diskInode.UID(),
- GID: d.inode.diskInode.GID(),
- Size: d.inode.diskInode.Size(),
- InodeSize: d.inode.diskInode.InodeSize(),
- Links: d.inode.diskInode.LinksCount(),
- Flags: d.inode.diskInode.Flags(),
- }
-
- if diff := cmp.Diff(gotInode, test.wantInode); diff != "" {
- t.Errorf("inode mismatch (-want +got):\n%s", diff)
- }
- })
- }
-}
-
-// TestFilesystemInit tests that the filesystem superblock and block group
-// descriptors are correctly read in and initialized.
-func TestFilesystemInit(t *testing.T) {
- // sb only contains the immutable properties of the superblock.
- type sb struct {
- InodesCount uint32
- BlocksCount uint64
- MaxMountCount uint16
- FirstDataBlock uint32
- BlockSize uint64
- BlocksPerGroup uint32
- ClusterSize uint64
- ClustersPerGroup uint32
- InodeSize uint16
- InodesPerGroup uint32
- BgDescSize uint16
- Magic uint16
- Revision disklayout.SbRevision
- CompatFeatures disklayout.CompatFeatures
- IncompatFeatures disklayout.IncompatFeatures
- RoCompatFeatures disklayout.RoCompatFeatures
- }
-
- // bg only contains the immutable properties of the block group descriptor.
- type bg struct {
- InodeTable uint64
- BlockBitmap uint64
- InodeBitmap uint64
- ExclusionBitmap uint64
- Flags disklayout.BGFlags
- }
-
- type fsInitTest struct {
- name string
- image string
- wantSb sb
- wantBgs []bg
- }
-
- tests := []fsInitTest{
- {
- name: "ext4 filesystem init",
- image: ext4ImagePath,
- wantSb: sb{
- InodesCount: 0x10,
- BlocksCount: 0x40,
- MaxMountCount: 0xffff,
- FirstDataBlock: 0x1,
- BlockSize: 0x400,
- BlocksPerGroup: 0x2000,
- ClusterSize: 0x400,
- ClustersPerGroup: 0x2000,
- InodeSize: 0x80,
- InodesPerGroup: 0x10,
- BgDescSize: 0x40,
- Magic: linux.EXT_SUPER_MAGIC,
- Revision: disklayout.DynamicRev,
- CompatFeatures: disklayout.CompatFeatures{
- ExtAttr: true,
- ResizeInode: true,
- DirIndex: true,
- },
- IncompatFeatures: disklayout.IncompatFeatures{
- DirentFileType: true,
- Extents: true,
- Is64Bit: true,
- FlexBg: true,
- },
- RoCompatFeatures: disklayout.RoCompatFeatures{
- Sparse: true,
- LargeFile: true,
- HugeFile: true,
- DirNlink: true,
- ExtraIsize: true,
- MetadataCsum: true,
- },
- },
- wantBgs: []bg{
- {
- InodeTable: 0x23,
- BlockBitmap: 0x3,
- InodeBitmap: 0x13,
- Flags: disklayout.BGFlags{
- InodeZeroed: true,
- },
- },
- },
- },
- {
- name: "ext3 filesystem init",
- image: ext3ImagePath,
- wantSb: sb{
- InodesCount: 0x10,
- BlocksCount: 0x40,
- MaxMountCount: 0xffff,
- FirstDataBlock: 0x1,
- BlockSize: 0x400,
- BlocksPerGroup: 0x2000,
- ClusterSize: 0x400,
- ClustersPerGroup: 0x2000,
- InodeSize: 0x80,
- InodesPerGroup: 0x10,
- BgDescSize: 0x20,
- Magic: linux.EXT_SUPER_MAGIC,
- Revision: disklayout.DynamicRev,
- CompatFeatures: disklayout.CompatFeatures{
- ExtAttr: true,
- ResizeInode: true,
- DirIndex: true,
- },
- IncompatFeatures: disklayout.IncompatFeatures{
- DirentFileType: true,
- },
- RoCompatFeatures: disklayout.RoCompatFeatures{
- Sparse: true,
- LargeFile: true,
- },
- },
- wantBgs: []bg{
- {
- InodeTable: 0x5,
- BlockBitmap: 0x3,
- InodeBitmap: 0x4,
- Flags: disklayout.BGFlags{
- InodeZeroed: true,
- },
- },
- },
- },
- {
- name: "ext2 filesystem init",
- image: ext2ImagePath,
- wantSb: sb{
- InodesCount: 0x10,
- BlocksCount: 0x40,
- MaxMountCount: 0xffff,
- FirstDataBlock: 0x1,
- BlockSize: 0x400,
- BlocksPerGroup: 0x2000,
- ClusterSize: 0x400,
- ClustersPerGroup: 0x2000,
- InodeSize: 0x80,
- InodesPerGroup: 0x10,
- BgDescSize: 0x20,
- Magic: linux.EXT_SUPER_MAGIC,
- Revision: disklayout.DynamicRev,
- CompatFeatures: disklayout.CompatFeatures{
- ExtAttr: true,
- ResizeInode: true,
- DirIndex: true,
- },
- IncompatFeatures: disklayout.IncompatFeatures{
- DirentFileType: true,
- },
- RoCompatFeatures: disklayout.RoCompatFeatures{
- Sparse: true,
- LargeFile: true,
- },
- },
- wantBgs: []bg{
- {
- InodeTable: 0x5,
- BlockBitmap: 0x3,
- InodeBitmap: 0x4,
- Flags: disklayout.BGFlags{
- InodeZeroed: true,
- },
- },
- },
- },
- }
-
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- _, vfsfs, _, tearDown, err := setUp(t, test.image)
- if err != nil {
- t.Fatalf("setUp failed: %v", err)
- }
- defer tearDown()
-
- fs, ok := vfsfs.Impl().(*filesystem)
- if !ok {
- t.Fatalf("ext filesystem of incorrect type: %T", vfsfs.Impl())
- }
-
- // Offload superblock and block group descriptors contents into
- // local structs for comparison.
- totalFreeInodes := uint32(0)
- totalFreeBlocks := uint64(0)
- gotSb := sb{
- InodesCount: fs.sb.InodesCount(),
- BlocksCount: fs.sb.BlocksCount(),
- MaxMountCount: fs.sb.MaxMountCount(),
- FirstDataBlock: fs.sb.FirstDataBlock(),
- BlockSize: fs.sb.BlockSize(),
- BlocksPerGroup: fs.sb.BlocksPerGroup(),
- ClusterSize: fs.sb.ClusterSize(),
- ClustersPerGroup: fs.sb.ClustersPerGroup(),
- InodeSize: fs.sb.InodeSize(),
- InodesPerGroup: fs.sb.InodesPerGroup(),
- BgDescSize: fs.sb.BgDescSize(),
- Magic: fs.sb.Magic(),
- Revision: fs.sb.Revision(),
- CompatFeatures: fs.sb.CompatibleFeatures(),
- IncompatFeatures: fs.sb.IncompatibleFeatures(),
- RoCompatFeatures: fs.sb.ReadOnlyCompatibleFeatures(),
- }
- gotNumBgs := len(fs.bgs)
- gotBgs := make([]bg, gotNumBgs)
- for i := 0; i < gotNumBgs; i++ {
- gotBgs[i].InodeTable = fs.bgs[i].InodeTable()
- gotBgs[i].BlockBitmap = fs.bgs[i].BlockBitmap()
- gotBgs[i].InodeBitmap = fs.bgs[i].InodeBitmap()
- gotBgs[i].ExclusionBitmap = fs.bgs[i].ExclusionBitmap()
- gotBgs[i].Flags = fs.bgs[i].Flags()
-
- totalFreeInodes += fs.bgs[i].FreeInodesCount()
- totalFreeBlocks += uint64(fs.bgs[i].FreeBlocksCount())
- }
-
- if diff := cmp.Diff(gotSb, test.wantSb); diff != "" {
- t.Errorf("superblock mismatch (-want +got):\n%s", diff)
- }
-
- if diff := cmp.Diff(gotBgs, test.wantBgs); diff != "" {
- t.Errorf("block group descriptors mismatch (-want +got):\n%s", diff)
- }
-
- if diff := cmp.Diff(totalFreeInodes, fs.sb.FreeInodesCount()); diff != "" {
- t.Errorf("total free inodes mismatch (-want +got):\n%s", diff)
- }
-
- if diff := cmp.Diff(totalFreeBlocks, fs.sb.FreeBlocksCount()); diff != "" {
- t.Errorf("total free blocks mismatch (-want +got):\n%s", diff)
- }
- })
- }
-}
diff --git a/pkg/sentry/fs/ext/extent_test.go b/pkg/sentry/fs/ext/extent_test.go
deleted file mode 100644
index b3f342c8e..000000000
--- a/pkg/sentry/fs/ext/extent_test.go
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "bytes"
- "testing"
-
- "github.com/google/go-cmp/cmp"
- "github.com/google/go-cmp/cmp/cmpopts"
- "gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
-)
-
-// TestExtentTree tests the extent tree building logic.
-//
-// Test tree:
-// 0.{Head}[Idx][Idx]
-// / \
-// / \
-// 1.{Head}[Ext][Ext] 2.{Head}[Idx]
-// / | \
-// [Phy] [Phy, Phy] 3.{Head}[Ext]
-// |
-// [Phy, Phy, Phy]
-//
-// Legend:
-// - Head = ExtentHeader
-// - Idx = ExtentIdx
-// - Ext = Extent
-// - Phy = Physical Block
-//
-// Please note that ext4 might not construct extent trees looking like this.
-// This is purely for testing the tree traversal logic.
-func TestExtentTree(t *testing.T) {
- blkSize := uint64(64) // No block has more than 1 header + 4 entries.
- mockDisk := make([]byte, blkSize*10)
- mockInode := &inode{diskInode: &disklayout.InodeNew{}}
-
- node3 := &disklayout.ExtentNode{
- Header: disklayout.ExtentHeader{
- Magic: disklayout.ExtentMagic,
- NumEntries: 1,
- MaxEntries: 4,
- Height: 0,
- },
- Entries: []disklayout.ExtentEntryPair{
- {
- Entry: &disklayout.Extent{
- FirstFileBlock: 3,
- Length: 3,
- StartBlockLo: 6,
- },
- Node: nil,
- },
- },
- }
-
- node2 := &disklayout.ExtentNode{
- Header: disklayout.ExtentHeader{
- Magic: disklayout.ExtentMagic,
- NumEntries: 1,
- MaxEntries: 4,
- Height: 1,
- },
- Entries: []disklayout.ExtentEntryPair{
- {
- Entry: &disklayout.ExtentIdx{
- FirstFileBlock: 3,
- ChildBlockLo: 2,
- },
- Node: node3,
- },
- },
- }
-
- node1 := &disklayout.ExtentNode{
- Header: disklayout.ExtentHeader{
- Magic: disklayout.ExtentMagic,
- NumEntries: 2,
- MaxEntries: 4,
- Height: 0,
- },
- Entries: []disklayout.ExtentEntryPair{
- {
- Entry: &disklayout.Extent{
- FirstFileBlock: 0,
- Length: 1,
- StartBlockLo: 3,
- },
- Node: nil,
- },
- {
- Entry: &disklayout.Extent{
- FirstFileBlock: 1,
- Length: 2,
- StartBlockLo: 4,
- },
- Node: nil,
- },
- },
- }
-
- node0 := &disklayout.ExtentNode{
- Header: disklayout.ExtentHeader{
- Magic: disklayout.ExtentMagic,
- NumEntries: 2,
- MaxEntries: 4,
- Height: 2,
- },
- Entries: []disklayout.ExtentEntryPair{
- {
- Entry: &disklayout.ExtentIdx{
- FirstFileBlock: 0,
- ChildBlockLo: 0,
- },
- Node: node1,
- },
- {
- Entry: &disklayout.ExtentIdx{
- FirstFileBlock: 3,
- ChildBlockLo: 1,
- },
- Node: node2,
- },
- },
- }
-
- writeTree(mockInode, mockDisk, node0, blkSize)
-
- r := bytes.NewReader(mockDisk)
- if err := mockInode.buildExtTree(r, blkSize); err != nil {
- t.Fatalf("inode.buildExtTree failed: %v", err)
- }
-
- opt := cmpopts.IgnoreUnexported(disklayout.ExtentIdx{}, disklayout.ExtentHeader{})
- if diff := cmp.Diff(mockInode.root, node0, opt); diff != "" {
- t.Errorf("extent tree mismatch (-want +got):\n%s", diff)
- }
-}
-
-// writeTree writes the tree represented by `root` to the inode and disk passed.
-func writeTree(in *inode, disk []byte, root *disklayout.ExtentNode, blkSize uint64) {
- rootData := binary.Marshal(nil, binary.LittleEndian, root.Header)
- for _, ep := range root.Entries {
- rootData = binary.Marshal(rootData, binary.LittleEndian, ep.Entry)
- }
-
- copy(in.diskInode.Data(), rootData)
-
- if root.Header.Height > 0 {
- for _, ep := range root.Entries {
- writeTreeToDisk(disk, ep, blkSize)
- }
- }
-}
-
-// writeTreeToDisk is the recursive step for writeTree which writes the tree
-// on the disk only.
-func writeTreeToDisk(disk []byte, curNode disklayout.ExtentEntryPair, blkSize uint64) {
- nodeData := binary.Marshal(nil, binary.LittleEndian, curNode.Node.Header)
- for _, ep := range curNode.Node.Entries {
- nodeData = binary.Marshal(nodeData, binary.LittleEndian, ep.Entry)
- }
-
- copy(disk[curNode.Entry.PhysicalBlock()*blkSize:], nodeData)
-
- if curNode.Node.Header.Height > 0 {
- for _, ep := range curNode.Node.Entries {
- writeTreeToDisk(disk, ep, blkSize)
- }
- }
-}
diff --git a/pkg/sentry/fs/ext/filesystem.go b/pkg/sentry/fs/ext/filesystem.go
deleted file mode 100644
index 7150e75a5..000000000
--- a/pkg/sentry/fs/ext/filesystem.go
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "io"
- "sync"
-
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// filesystem implements vfs.FilesystemImpl.
-type filesystem struct {
- // TODO(b/134676337): Remove when all methods have been implemented.
- vfs.FilesystemImpl
-
- vfsfs vfs.Filesystem
-
- // mu serializes changes to the Dentry tree and the usage of the read seeker.
- mu sync.Mutex
-
- // dev is the ReadSeeker for the underlying fs device. It is protected by mu.
- //
- // The ext filesystems aim to maximize locality, i.e. place all the data
- // blocks of a file close together. On a spinning disk, locality reduces the
- // amount of movement of the head hence speeding up IO operations. On an SSD
- // there are no moving parts but locality increases the size of each transer
- // request. Hence, having mutual exclusion on the read seeker while reading a
- // file *should* help in achieving the intended performance gains.
- //
- // Note: This synchronization was not coupled with the ReadSeeker itself
- // because we want to synchronize across read/seek operations for the
- // performance gains mentioned above. Helps enforcing one-file-at-a-time IO.
- dev io.ReadSeeker
-
- // inodeCache maps absolute inode numbers to the corresponding Inode struct.
- // Inodes should be removed from this once their reference count hits 0.
- //
- // Protected by mu because every addition and removal from this corresponds to
- // a change in the dentry tree.
- inodeCache map[uint32]*inode
-
- // sb represents the filesystem superblock. Immutable after initialization.
- sb disklayout.SuperBlock
-
- // bgs represents all the block group descriptors for the filesystem.
- // Immutable after initialization.
- bgs []disklayout.BlockGroup
-}
-
-// Compiles only if filesystem implements vfs.FilesystemImpl.
-var _ vfs.FilesystemImpl = (*filesystem)(nil)
-
-// getOrCreateInode gets the inode corresponding to the inode number passed in.
-// It creates a new one with the given inode number if one does not exist.
-//
-// Preconditions: must be holding fs.mu.
-func (fs *filesystem) getOrCreateInode(inodeNum uint32) (*inode, error) {
- if in, ok := fs.inodeCache[inodeNum]; ok {
- return in, nil
- }
-
- in, err := newInode(fs.dev, fs.sb, fs.bgs, inodeNum)
- if err != nil {
- return nil, err
- }
-
- fs.inodeCache[inodeNum] = in
- return in, nil
-}
-
-// Release implements vfs.FilesystemImpl.Release.
-func (fs *filesystem) Release() {
-}
-
-// Sync implements vfs.FilesystemImpl.Sync.
-func (fs *filesystem) Sync(ctx context.Context) error {
- // This is a readonly filesystem for now.
- return nil
-}
-
-// The vfs.FilesystemImpl functions below return EROFS because their respective
-// man pages say that EROFS must be returned if the path resolves to a file on
-// a read-only filesystem.
-
-// TODO(b/134676337): Implement path traversal and return EROFS only if the
-// path resolves to a Dentry within ext fs.
-
-// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
-func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
- return syserror.EROFS
-}
-
-// MknodAt implements vfs.FilesystemImpl.MknodAt.
-func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
- return syserror.EROFS
-}
-
-// RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error {
- return syserror.EROFS
-}
-
-// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
-func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- return syserror.EROFS
-}
-
-// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
-func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
- return syserror.EROFS
-}
-
-// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
-func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
- return syserror.EROFS
-}
-
-// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
-func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- return syserror.EROFS
-}
diff --git a/pkg/sentry/fs/ext/inode.go b/pkg/sentry/fs/ext/inode.go
deleted file mode 100644
index df1ea0bda..000000000
--- a/pkg/sentry/fs/ext/inode.go
+++ /dev/null
@@ -1,209 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "io"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// inode represents an ext inode.
-type inode struct {
- // refs is a reference count. refs is accessed using atomic memory operations.
- refs int64
-
- // inodeNum is the inode number of this inode on disk. This is used to
- // identify inodes within the ext filesystem.
- inodeNum uint32
-
- // diskInode gives us access to the inode struct on disk. Immutable.
- diskInode disklayout.Inode
-
- // root is the root extent node. This lives in the 60 byte diskInode.Blocks().
- // Immutable. Nil if the inode does not use extents.
- root *disklayout.ExtentNode
-}
-
-// incRef increments the inode ref count.
-func (in *inode) incRef() {
- atomic.AddInt64(&in.refs, 1)
-}
-
-// tryIncRef tries to increment the ref count. Returns true if successful.
-func (in *inode) tryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&in.refs)
- if refs == 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&in.refs, refs, refs+1) {
- return true
- }
- }
-}
-
-// decRef decrements the inode ref count and releases the inode resources if
-// the ref count hits 0.
-//
-// Preconditions: Must have locked fs.mu.
-func (in *inode) decRef(fs *filesystem) {
- if refs := atomic.AddInt64(&in.refs, -1); refs == 0 {
- delete(fs.inodeCache, in.inodeNum)
- } else if refs < 0 {
- panic("ext.inode.decRef() called without holding a reference")
- }
-}
-
-// newInode is the inode constructor. Reads the inode off disk. Identifies
-// inodes based on the absolute inode number on disk.
-//
-// Preconditions: Must hold the mutex of the filesystem containing dev.
-func newInode(dev io.ReadSeeker, sb disklayout.SuperBlock, bgs []disklayout.BlockGroup, inodeNum uint32) (*inode, error) {
- if inodeNum == 0 {
- panic("inode number 0 on ext filesystems is not possible")
- }
-
- in := &inode{refs: 1, inodeNum: inodeNum}
- inodeRecordSize := sb.InodeSize()
- if inodeRecordSize == disklayout.OldInodeSize {
- in.diskInode = &disklayout.InodeOld{}
- } else {
- in.diskInode = &disklayout.InodeNew{}
- }
-
- // Calculate where the inode is actually placed.
- inodesPerGrp := sb.InodesPerGroup()
- blkSize := sb.BlockSize()
- inodeTableOff := bgs[getBGNum(inodeNum, inodesPerGrp)].InodeTable() * blkSize
- inodeOff := inodeTableOff + uint64(uint32(inodeRecordSize)*getBGOff(inodeNum, inodesPerGrp))
-
- // Read it from disk and figure out which type of inode this is.
- if err := readFromDisk(dev, int64(inodeOff), in.diskInode); err != nil {
- return nil, err
- }
-
- if in.diskInode.Flags().Extents {
- in.buildExtTree(dev, blkSize)
- }
-
- return in, nil
-}
-
-// getBGNum returns the block group number that a given inode belongs to.
-func getBGNum(inodeNum uint32, inodesPerGrp uint32) uint32 {
- return (inodeNum - 1) / inodesPerGrp
-}
-
-// getBGOff returns the offset at which the given inode lives in the block
-// group's inode table, i.e. the index of the inode in the inode table.
-func getBGOff(inodeNum uint32, inodesPerGrp uint32) uint32 {
- return (inodeNum - 1) % inodesPerGrp
-}
-
-// buildExtTree builds the extent tree by reading it from disk by doing
-// running a simple DFS. It first reads the root node from the inode struct in
-// memory. Then it recursively builds the rest of the tree by reading it off
-// disk.
-//
-// Preconditions:
-// - Must hold the mutex of the filesystem containing dev.
-// - Inode flag InExtents must be set.
-func (in *inode) buildExtTree(dev io.ReadSeeker, blkSize uint64) error {
- rootNodeData := in.diskInode.Data()
-
- var rootHeader disklayout.ExtentHeader
- binary.Unmarshal(rootNodeData[:disklayout.ExtentStructsSize], binary.LittleEndian, &rootHeader)
-
- // Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries.
- if rootHeader.NumEntries > 4 {
- // read(2) specifies that EINVAL should be returned if the file is unsuitable
- // for reading.
- return syserror.EINVAL
- }
-
- rootEntries := make([]disklayout.ExtentEntryPair, rootHeader.NumEntries)
- for i, off := uint16(0), disklayout.ExtentStructsSize; i < rootHeader.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize {
- var curEntry disklayout.ExtentEntry
- if rootHeader.Height == 0 {
- // Leaf node.
- curEntry = &disklayout.Extent{}
- } else {
- // Internal node.
- curEntry = &disklayout.ExtentIdx{}
- }
- binary.Unmarshal(rootNodeData[off:off+disklayout.ExtentStructsSize], binary.LittleEndian, curEntry)
- rootEntries[i].Entry = curEntry
- }
-
- // If this node is internal, perform DFS.
- if rootHeader.Height > 0 {
- for i := uint16(0); i < rootHeader.NumEntries; i++ {
- var err error
- if rootEntries[i].Node, err = buildExtTreeFromDisk(dev, rootEntries[i].Entry, blkSize); err != nil {
- return err
- }
- }
- }
-
- in.root = &disklayout.ExtentNode{rootHeader, rootEntries}
- return nil
-}
-
-// buildExtTreeFromDisk reads the extent tree nodes from disk and recursively
-// builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to
-// by the ExtentEntry.
-//
-// Preconditions: Must hold the mutex of the filesystem containing dev.
-func buildExtTreeFromDisk(dev io.ReadSeeker, entry disklayout.ExtentEntry, blkSize uint64) (*disklayout.ExtentNode, error) {
- var header disklayout.ExtentHeader
- off := entry.PhysicalBlock() * blkSize
- if err := readFromDisk(dev, int64(off), &header); err != nil {
- return nil, err
- }
-
- entries := make([]disklayout.ExtentEntryPair, header.NumEntries)
- for i, off := uint16(0), off+disklayout.ExtentStructsSize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentStructsSize {
- var curEntry disklayout.ExtentEntry
- if header.Height == 0 {
- // Leaf node.
- curEntry = &disklayout.Extent{}
- } else {
- // Internal node.
- curEntry = &disklayout.ExtentIdx{}
- }
-
- if err := readFromDisk(dev, int64(off), curEntry); err != nil {
- return nil, err
- }
- entries[i].Entry = curEntry
- }
-
- // If this node is internal, perform DFS.
- if header.Height > 0 {
- for i := uint16(0); i < header.NumEntries; i++ {
- var err error
- entries[i].Node, err = buildExtTreeFromDisk(dev, entries[i].Entry, blkSize)
- if err != nil {
- return nil, err
- }
- }
- }
-
- return &disklayout.ExtentNode{header, entries}, nil
-}
diff --git a/pkg/sentry/fs/ext/utils.go b/pkg/sentry/fs/ext/utils.go
deleted file mode 100644
index 3472c5fa8..000000000
--- a/pkg/sentry/fs/ext/utils.go
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
- "encoding/binary"
- "io"
-
- "gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// readFromDisk performs a binary read from disk into the given struct from
-// the absolute offset provided.
-//
-// All disk reads should use this helper so we avoid reading from stale
-// previously used offsets. This function forces the offset parameter.
-//
-// Precondition: Must hold the mutex of the filesystem containing dev.
-func readFromDisk(dev io.ReadSeeker, abOff int64, v interface{}) error {
- if _, err := dev.Seek(abOff, io.SeekStart); err != nil {
- return syserror.EIO
- }
-
- if err := binary.Read(dev, binary.LittleEndian, v); err != nil {
- return syserror.EIO
- }
-
- return nil
-}
-
-// readSuperBlock reads the SuperBlock from block group 0 in the underlying
-// device. There are three versions of the superblock. This function identifies
-// and returns the correct version.
-//
-// Precondition: Must hold the mutex of the filesystem containing dev.
-func readSuperBlock(dev io.ReadSeeker) (disklayout.SuperBlock, error) {
- var sb disklayout.SuperBlock = &disklayout.SuperBlockOld{}
- if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil {
- return nil, err
- }
- if sb.Revision() == disklayout.OldRev {
- return sb, nil
- }
-
- sb = &disklayout.SuperBlock32Bit{}
- if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil {
- return nil, err
- }
- if !sb.IncompatibleFeatures().Is64Bit {
- return sb, nil
- }
-
- sb = &disklayout.SuperBlock64Bit{}
- if err := readFromDisk(dev, disklayout.SbOffset, sb); err != nil {
- return nil, err
- }
- return sb, nil
-}
-
-// blockGroupsCount returns the number of block groups in the ext fs.
-func blockGroupsCount(sb disklayout.SuperBlock) uint64 {
- blocksCount := sb.BlocksCount()
- blocksPerGroup := uint64(sb.BlocksPerGroup())
-
- // Round up the result. float64 can compromise precision so do it manually.
- bgCount := blocksCount / blocksPerGroup
- if blocksCount%blocksPerGroup != 0 {
- bgCount++
- }
-
- return bgCount
-}
-
-// readBlockGroups reads the block group descriptor table from block group 0 in
-// the underlying device.
-//
-// Precondition: Must hold the mutex of the filesystem containing dev.
-func readBlockGroups(dev io.ReadSeeker, sb disklayout.SuperBlock) ([]disklayout.BlockGroup, error) {
- bgCount := blockGroupsCount(sb)
- bgdSize := uint64(sb.BgDescSize())
- is64Bit := sb.IncompatibleFeatures().Is64Bit
- bgds := make([]disklayout.BlockGroup, bgCount)
-
- for i, off := uint64(0), uint64(sb.FirstDataBlock()+1)*sb.BlockSize(); i < bgCount; i, off = i+1, off+bgdSize {
- if is64Bit {
- bgds[i] = &disklayout.BlockGroup64Bit{}
- } else {
- bgds[i] = &disklayout.BlockGroup32Bit{}
- }
-
- if err := readFromDisk(dev, int64(off), bgds[i]); err != nil {
- return nil, err
- }
- }
- return bgds, nil
-}
diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go
index 5a0a67eab..669ffcb75 100644
--- a/pkg/sentry/fs/fdpipe/pipe.go
+++ b/pkg/sentry/fs/fdpipe/pipe.go
@@ -87,7 +87,7 @@ func (p *pipeOperations) init() error {
log.Warningf("pipe: cannot stat fd %d: %v", p.file.FD(), err)
return syscall.EINVAL
}
- if s.Mode&syscall.S_IFIFO != syscall.S_IFIFO {
+ if (s.Mode & syscall.S_IFMT) != syscall.S_IFIFO {
log.Warningf("pipe: cannot load fd %d as pipe, file type: %o", p.file.FD(), s.Mode)
return syscall.EINVAL
}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index ed62049a9..20cb9a367 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -66,10 +66,8 @@ type CachingInodeOperations struct {
// mfp is used to allocate memory that caches backingFile's contents.
mfp pgalloc.MemoryFileProvider
- // forcePageCache indicates the sentry page cache should be used regardless
- // of whether the platform supports host mapped I/O or not. This must not be
- // modified after inode creation.
- forcePageCache bool
+ // opts contains options. opts is immutable.
+ opts CachingInodeOperationsOptions
attrMu sync.Mutex `state:"nosave"`
@@ -116,6 +114,20 @@ type CachingInodeOperations struct {
refs frameRefSet
}
+// CachingInodeOperationsOptions configures a CachingInodeOperations.
+//
+// +stateify savable
+type CachingInodeOperationsOptions struct {
+ // If ForcePageCache is true, use the sentry page cache even if a host file
+ // descriptor is available.
+ ForcePageCache bool
+
+ // If LimitHostFDTranslation is true, apply maxFillRange() constraints to
+ // host file descriptor mappings returned by
+ // CachingInodeOperations.Translate().
+ LimitHostFDTranslation bool
+}
+
// CachedFileObject is a file that may require caching.
type CachedFileObject interface {
// ReadToBlocksAt reads up to dsts.NumBytes() bytes from the file to dsts,
@@ -159,7 +171,7 @@ type CachedFileObject interface {
// NewCachingInodeOperations returns a new CachingInodeOperations backed by
// a CachedFileObject and its initial unstable attributes.
-func NewCachingInodeOperations(ctx context.Context, backingFile CachedFileObject, uattr fs.UnstableAttr, forcePageCache bool) *CachingInodeOperations {
+func NewCachingInodeOperations(ctx context.Context, backingFile CachedFileObject, uattr fs.UnstableAttr, opts CachingInodeOperationsOptions) *CachingInodeOperations {
mfp := pgalloc.MemoryFileProviderFromContext(ctx)
if mfp == nil {
panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider))
@@ -167,7 +179,7 @@ func NewCachingInodeOperations(ctx context.Context, backingFile CachedFileObject
return &CachingInodeOperations{
backingFile: backingFile,
mfp: mfp,
- forcePageCache: forcePageCache,
+ opts: opts,
attr: uattr,
hostFileMapper: NewHostFileMapper(),
}
@@ -568,21 +580,30 @@ type inodeReadWriter struct {
// ReadToBlocks implements safemem.Reader.ReadToBlocks.
func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+ mem := rw.c.mfp.MemoryFile()
+ fillCache := !rw.c.useHostPageCache() && mem.ShouldCacheEvictable()
+
// Hot path. Avoid defers.
- rw.c.dataMu.RLock()
+ var unlock func()
+ if fillCache {
+ rw.c.dataMu.Lock()
+ unlock = rw.c.dataMu.Unlock
+ } else {
+ rw.c.dataMu.RLock()
+ unlock = rw.c.dataMu.RUnlock
+ }
// Compute the range to read.
if rw.offset >= rw.c.attr.Size {
- rw.c.dataMu.RUnlock()
+ unlock()
return 0, io.EOF
}
end := fs.ReadEndOffset(rw.offset, int64(dsts.NumBytes()), rw.c.attr.Size)
if end == rw.offset { // dsts.NumBytes() == 0?
- rw.c.dataMu.RUnlock()
+ unlock()
return 0, nil
}
- mem := rw.c.mfp.MemoryFile()
var done uint64
seg, gap := rw.c.cache.Find(uint64(rw.offset))
for rw.offset < end {
@@ -592,7 +613,7 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
// Get internal mappings from the cache.
ims, err := mem.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
if err != nil {
- rw.c.dataMu.RUnlock()
+ unlock()
return done, err
}
@@ -602,7 +623,7 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
rw.offset += int64(n)
dsts = dsts.DropFirst64(n)
if err != nil {
- rw.c.dataMu.RUnlock()
+ unlock()
return done, err
}
@@ -610,27 +631,48 @@ func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
seg, gap = seg.NextNonEmpty()
case gap.Ok():
- // Read directly from the backing file.
- gapmr := gap.Range().Intersect(mr)
- dst := dsts.TakeFirst64(gapmr.Length())
- n, err := rw.c.backingFile.ReadToBlocksAt(rw.ctx, dst, gapmr.Start)
- done += n
- rw.offset += int64(n)
- dsts = dsts.DropFirst64(n)
- // Partial reads are fine. But we must stop reading.
- if n != dst.NumBytes() || err != nil {
- rw.c.dataMu.RUnlock()
- return done, err
+ gapMR := gap.Range().Intersect(mr)
+ if fillCache {
+ // Read into the cache, then re-enter the loop to read from the
+ // cache.
+ reqMR := memmap.MappableRange{
+ Start: uint64(usermem.Addr(gapMR.Start).RoundDown()),
+ End: fs.OffsetPageEnd(int64(gapMR.End)),
+ }
+ optMR := gap.Range()
+ err := rw.c.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mem, usage.PageCache, rw.c.backingFile.ReadToBlocksAt)
+ mem.MarkEvictable(rw.c, pgalloc.EvictableRange{optMR.Start, optMR.End})
+ seg, gap = rw.c.cache.Find(uint64(rw.offset))
+ if !seg.Ok() {
+ unlock()
+ return done, err
+ }
+ // err might have occurred in part of gap.Range() outside
+ // gapMR. Forget about it for now; if the error matters and
+ // persists, we'll run into it again in a later iteration of
+ // this loop.
+ } else {
+ // Read directly from the backing file.
+ dst := dsts.TakeFirst64(gapMR.Length())
+ n, err := rw.c.backingFile.ReadToBlocksAt(rw.ctx, dst, gapMR.Start)
+ done += n
+ rw.offset += int64(n)
+ dsts = dsts.DropFirst64(n)
+ // Partial reads are fine. But we must stop reading.
+ if n != dst.NumBytes() || err != nil {
+ unlock()
+ return done, err
+ }
+
+ // Continue.
+ seg, gap = gap.NextSegment(), FileRangeGapIterator{}
}
- // Continue.
- seg, gap = gap.NextSegment(), FileRangeGapIterator{}
-
default:
break
}
}
- rw.c.dataMu.RUnlock()
+ unlock()
return done, nil
}
@@ -700,7 +742,10 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
seg, gap = seg.NextNonEmpty()
case gap.Ok() && gap.Start() < mr.End:
- // Write directly to the backing file.
+ // Write directly to the backing file. At present, we never fill
+ // the cache when writing, since doing so can convert small writes
+ // into inefficient read-modify-write cycles, and we have no
+ // mechanism for detecting or avoiding this.
gapmr := gap.Range().Intersect(mr)
src := srcs.TakeFirst64(gapmr.Length())
n, err := rw.c.backingFile.WriteFromBlocksAt(rw.ctx, src, gapmr.Start)
@@ -730,7 +775,7 @@ func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error
// and memory mappings, and false if c.cache may contain data cached from
// c.backingFile.
func (c *CachingInodeOperations) useHostPageCache() bool {
- return !c.forcePageCache && c.backingFile.FD() >= 0
+ return !c.opts.ForcePageCache && c.backingFile.FD() >= 0
}
// AddMapping implements memmap.Mappable.AddMapping.
@@ -802,11 +847,15 @@ func (c *CachingInodeOperations) CopyMapping(ctx context.Context, ms memmap.Mapp
func (c *CachingInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
// Hot path. Avoid defer.
if c.useHostPageCache() {
+ mr := optional
+ if c.opts.LimitHostFDTranslation {
+ mr = maxFillRange(required, optional)
+ }
return []memmap.Translation{
{
- Source: optional,
+ Source: mr,
File: c,
- Offset: optional.Start,
+ Offset: mr.Start,
Perms: usermem.AnyAccess,
},
}, nil
diff --git a/pkg/sentry/fs/fsutil/inode_cached_test.go b/pkg/sentry/fs/fsutil/inode_cached_test.go
index dc19255ed..eb5730c35 100644
--- a/pkg/sentry/fs/fsutil/inode_cached_test.go
+++ b/pkg/sentry/fs/fsutil/inode_cached_test.go
@@ -61,7 +61,7 @@ func TestSetPermissions(t *testing.T) {
uattr := fs.WithCurrentTime(ctx, fs.UnstableAttr{
Perms: fs.FilePermsFromMode(0444),
})
- iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, false /*forcePageCache*/)
+ iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, CachingInodeOperationsOptions{})
defer iops.Release()
perms := fs.FilePermsFromMode(0777)
@@ -150,7 +150,7 @@ func TestSetTimestamps(t *testing.T) {
ModificationTime: epoch,
StatusChangeTime: epoch,
}
- iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, false /*forcePageCache*/)
+ iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, CachingInodeOperationsOptions{})
defer iops.Release()
if err := iops.SetTimestamps(ctx, nil, test.ts); err != nil {
@@ -188,7 +188,7 @@ func TestTruncate(t *testing.T) {
uattr := fs.UnstableAttr{
Size: 0,
}
- iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, false /*forcePageCache*/)
+ iops := NewCachingInodeOperations(ctx, noopBackingFile{}, uattr, CachingInodeOperationsOptions{})
defer iops.Release()
if err := iops.Truncate(ctx, nil, uattr.Size); err != nil {
@@ -280,7 +280,7 @@ func TestRead(t *testing.T) {
uattr := fs.UnstableAttr{
Size: int64(len(buf)),
}
- iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, false /*forcePageCache*/)
+ iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, CachingInodeOperationsOptions{})
defer iops.Release()
// Expect the cache to be initially empty.
@@ -336,7 +336,7 @@ func TestWrite(t *testing.T) {
uattr := fs.UnstableAttr{
Size: int64(len(buf)),
}
- iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, false /*forcePageCache*/)
+ iops := NewCachingInodeOperations(ctx, newSliceBackingFile(buf), uattr, CachingInodeOperationsOptions{})
defer iops.Release()
// Expect the cache to be initially empty.
diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go
index 69999dc28..8f8ab5d29 100644
--- a/pkg/sentry/fs/gofer/fs.go
+++ b/pkg/sentry/fs/gofer/fs.go
@@ -54,6 +54,10 @@ const (
// sandbox using files backed by the gofer. If set to false, unix sockets
// cannot be bound to gofer files without an overlay on top.
privateUnixSocketKey = "privateunixsocket"
+
+ // If present, sets CachingInodeOperationsOptions.LimitHostFDTranslation to
+ // true.
+ limitHostFDTranslationKey = "limit_host_fd_translation"
)
// defaultAname is the default attach name.
@@ -134,12 +138,13 @@ func (f *filesystem) Mount(ctx context.Context, device string, flags fs.MountSou
// opts are parsed 9p mount options.
type opts struct {
- fd int
- aname string
- policy cachePolicy
- msize uint32
- version string
- privateunixsocket bool
+ fd int
+ aname string
+ policy cachePolicy
+ msize uint32
+ version string
+ privateunixsocket bool
+ limitHostFDTranslation bool
}
// options parses mount(2) data into structured options.
@@ -237,6 +242,11 @@ func options(data string) (opts, error) {
delete(options, privateUnixSocketKey)
}
+ if _, ok := options[limitHostFDTranslationKey]; ok {
+ o.limitHostFDTranslation = true
+ delete(options, limitHostFDTranslationKey)
+ }
+
// Fail to attach if the caller wanted us to do something that we
// don't support.
if len(options) > 0 {
diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go
index 69d08a627..50da865c1 100644
--- a/pkg/sentry/fs/gofer/session.go
+++ b/pkg/sentry/fs/gofer/session.go
@@ -117,6 +117,11 @@ type session struct {
// Flags provided to the mount.
superBlockFlags fs.MountSourceFlags `state:"wait"`
+ // limitHostFDTranslation is the value used for
+ // CachingInodeOperationsOptions.LimitHostFDTranslation for all
+ // CachingInodeOperations created by the session.
+ limitHostFDTranslation bool
+
// connID is a unique identifier for the session connection.
connID string `state:"wait"`
@@ -218,8 +223,11 @@ func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p
uattr := unstable(ctx, valid, attr, s.mounter, s.client)
return sattr, &inodeOperations{
- fileState: fileState,
- cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, s.superBlockFlags.ForcePageCache),
+ fileState: fileState,
+ cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{
+ ForcePageCache: s.superBlockFlags.ForcePageCache,
+ LimitHostFDTranslation: s.limitHostFDTranslation,
+ }),
}
}
@@ -242,13 +250,14 @@ func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockF
// Construct the session.
s := session{
- connID: dev,
- msize: o.msize,
- version: o.version,
- cachePolicy: o.policy,
- aname: o.aname,
- superBlockFlags: superBlockFlags,
- mounter: mounter,
+ connID: dev,
+ msize: o.msize,
+ version: o.version,
+ cachePolicy: o.policy,
+ aname: o.aname,
+ superBlockFlags: superBlockFlags,
+ limitHostFDTranslation: o.limitHostFDTranslation,
+ mounter: mounter,
}
s.EnableLeakCheck("gofer.session")
diff --git a/pkg/sentry/fs/host/inode.go b/pkg/sentry/fs/host/inode.go
index 679d8321a..894ab01f0 100644
--- a/pkg/sentry/fs/host/inode.go
+++ b/pkg/sentry/fs/host/inode.go
@@ -200,8 +200,10 @@ func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool,
// Build the fs.InodeOperations.
uattr := unstableAttr(msrc.MountSourceOperations.(*superOperations), &s)
iops := &inodeOperations{
- fileState: fileState,
- cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, msrc.Flags.ForcePageCache),
+ fileState: fileState,
+ cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{
+ ForcePageCache: msrc.Flags.ForcePageCache,
+ }),
}
// Return the fs.Inode.
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index 44c4ee5f2..2392787cb 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -65,7 +65,7 @@ type ConnectedEndpoint struct {
// GetSockOpt and message splitting/rejection in SendMsg, but do not
// prevent lots of small messages from filling the real send buffer
// size on the host.
- sndbuf int `state:"nosave"`
+ sndbuf int64 `state:"nosave"`
// mu protects the fields below.
mu sync.RWMutex `state:"nosave"`
@@ -107,7 +107,7 @@ func (c *ConnectedEndpoint) init() *syserr.Error {
}
c.stype = linux.SockType(stype)
- c.sndbuf = sndbuf
+ c.sndbuf = int64(sndbuf)
return nil
}
@@ -202,7 +202,7 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error)
}
// Send implements transport.ConnectedEndpoint.Send.
-func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (uintptr, bool, *syserr.Error) {
+func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) {
c.mu.RLock()
defer c.mu.RUnlock()
@@ -279,7 +279,7 @@ func (c *ConnectedEndpoint) EventUpdate() {
}
// Recv implements transport.Receiver.Recv.
-func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights uintptr, peek bool) (uintptr, uintptr, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
c.mu.RLock()
defer c.mu.RUnlock()
diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go
index 05d7c79ad..af6955675 100644
--- a/pkg/sentry/fs/host/socket_iovec.go
+++ b/pkg/sentry/fs/host/socket_iovec.go
@@ -55,19 +55,19 @@ func copyFromMulti(dst []byte, src [][]byte) {
//
// If intermediate != nil, iovecs references intermediate rather than bufs and
// the caller must copy to/from bufs as necessary.
-func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovecs []syscall.Iovec, intermediate []byte, err error) {
+func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovecs []syscall.Iovec, intermediate []byte, err error) {
var iovsRequired int
for _, b := range bufs {
- length += uintptr(len(b))
+ length += int64(len(b))
if len(b) > 0 {
iovsRequired++
}
}
stopLen := length
- if length > uintptr(maxlen) {
+ if length > maxlen {
if truncate {
- stopLen = uintptr(maxlen)
+ stopLen = maxlen
err = syserror.EAGAIN
} else {
return 0, nil, nil, syserror.EMSGSIZE
@@ -85,7 +85,7 @@ func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovec
}}, b, err
}
- var total uintptr
+ var total int64
iovecs = make([]syscall.Iovec, 0, iovsRequired)
for i := range bufs {
l := len(bufs[i])
@@ -93,9 +93,9 @@ func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovec
continue
}
- stop := l
- if total+uintptr(stop) > stopLen {
- stop = int(stopLen - total)
+ stop := int64(l)
+ if total+stop > stopLen {
+ stop = stopLen - total
}
iovecs = append(iovecs, syscall.Iovec{
@@ -103,7 +103,7 @@ func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovec
Len: uint64(stop),
})
- total += uintptr(stop)
+ total += stop
if total >= stopLen {
break
}
diff --git a/pkg/sentry/fs/host/socket_unsafe.go b/pkg/sentry/fs/host/socket_unsafe.go
index e57be0506..f3bbed7ea 100644
--- a/pkg/sentry/fs/host/socket_unsafe.go
+++ b/pkg/sentry/fs/host/socket_unsafe.go
@@ -23,7 +23,7 @@ import (
//
// If the total length of bufs is > maxlen, fdReadVec will do a partial read
// and err will indicate why the message was truncated.
-func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int) (readLen uintptr, msgLen uintptr, controlLen uint64, controlTrunc bool, err error) {
+func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int64) (readLen int64, msgLen int64, controlLen uint64, controlTrunc bool, err error) {
flags := uintptr(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC)
if peek {
flags |= syscall.MSG_PEEK
@@ -48,11 +48,12 @@ func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int) (re
msg.Iovlen = uint64(len(iovecs))
}
- n, _, e := syscall.RawSyscall(syscall.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), flags)
+ rawN, _, e := syscall.RawSyscall(syscall.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), flags)
if e != 0 {
// N.B. prioritize the syscall error over the buildIovec error.
return 0, 0, 0, false, e
}
+ n := int64(rawN)
// Copy data back to bufs.
if intermediate != nil {
@@ -72,7 +73,7 @@ func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int) (re
//
// If the total length of bufs is > maxlen && truncate, fdWriteVec will do a
// partial write and err will indicate why the message was truncated.
-func fdWriteVec(fd int, bufs [][]byte, maxlen int, truncate bool) (uintptr, uintptr, error) {
+func fdWriteVec(fd int, bufs [][]byte, maxlen int64, truncate bool) (int64, int64, error) {
length, iovecs, intermediate, err := buildIovec(bufs, maxlen, truncate)
if err != nil && len(iovecs) == 0 {
// No partial write to do, return error immediately.
@@ -96,5 +97,5 @@ func fdWriteVec(fd int, bufs [][]byte, maxlen int, truncate bool) (uintptr, uint
return 0, length, e
}
- return n, length, err
+ return int64(n), length, err
}
diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go
index 693ffc760..ac0398bd9 100644
--- a/pkg/sentry/fs/mounts.go
+++ b/pkg/sentry/fs/mounts.go
@@ -171,8 +171,6 @@ type MountNamespace struct {
// NewMountNamespace returns a new MountNamespace, with the provided node at the
// root, and the given cache size. A root must always be provided.
func NewMountNamespace(ctx context.Context, root *Inode) (*MountNamespace, error) {
- creds := auth.CredentialsFromContext(ctx)
-
// Set the root dirent and id on the root mount. The reference returned from
// NewDirent will be donated to the MountNamespace constructed below.
d := NewDirent(ctx, root, "/")
@@ -181,6 +179,7 @@ func NewMountNamespace(ctx context.Context, root *Inode) (*MountNamespace, error
d: newRootMount(1, d),
}
+ creds := auth.CredentialsFromContext(ctx)
mns := MountNamespace{
userns: creds.UserNamespace,
root: d,
@@ -219,6 +218,13 @@ func (mns *MountNamespace) flushMountSourceRefsLocked() {
}
}
+ if mns.root == nil {
+ // No root? This MountSource must have already been destroyed.
+ // This can happen when a Save is triggered while a process is
+ // exiting. There is nothing to flush.
+ return
+ }
+
// Flush root's MountSource references.
mns.root.Inode.MountSource.FlushDirentRefs()
}
@@ -249,6 +255,10 @@ func (mns *MountNamespace) destroy() {
// Drop reference on the root.
mns.root.DecRef()
+ // Ensure that root cannot be accessed via this MountNamespace any
+ // more.
+ mns.root = nil
+
// Wait for asynchronous work (queued by dropping Dirent references
// above) to complete before destroying this MountNamespace.
AsyncBarrier()
@@ -678,7 +688,7 @@ func (mns *MountNamespace) ResolveExecutablePath(ctx context.Context, wd, name s
return "", syserror.ENOENT
}
-// GetPath returns the PATH as a slice of strings given the environemnt
+// GetPath returns the PATH as a slice of strings given the environment
// variables.
func GetPath(env []string) []string {
const prefix = "PATH="
diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go
index 6b839685b..9adb23608 100644
--- a/pkg/sentry/fs/proc/net.go
+++ b/pkg/sentry/fs/proc/net.go
@@ -348,7 +348,7 @@ func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
// Field: local_adddress.
var localAddr linux.SockAddrInet
if local, _, err := sops.GetSockName(t); err == nil {
- localAddr = local.(linux.SockAddrInet)
+ localAddr = *local.(*linux.SockAddrInet)
}
binary.LittleEndian.PutUint16(portBuf, localAddr.Port)
fmt.Fprintf(&buf, "%08X:%04X ",
@@ -358,7 +358,7 @@ func (n *netTCP) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
// Field: rem_address.
var remoteAddr linux.SockAddrInet
if remote, _, err := sops.GetPeerName(t); err == nil {
- remoteAddr = remote.(linux.SockAddrInet)
+ remoteAddr = *remote.(*linux.SockAddrInet)
}
binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port)
fmt.Fprintf(&buf, "%08X:%04X ",
diff --git a/pkg/sentry/fs/ramfs/dir.go b/pkg/sentry/fs/ramfs/dir.go
index f3e984c24..78e082b8e 100644
--- a/pkg/sentry/fs/ramfs/dir.go
+++ b/pkg/sentry/fs/ramfs/dir.go
@@ -53,7 +53,6 @@ type Dir struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeIsDirAllocate `state:"nosave"`
fsutil.InodeIsDirTruncate `state:"nosave"`
- fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
@@ -84,7 +83,8 @@ type Dir struct {
var _ fs.InodeOperations = (*Dir)(nil)
-// NewDir returns a new Dir with the given contents and attributes.
+// NewDir returns a new Dir with the given contents and attributes. A reference
+// on each fs.Inode in the `contents` map will be donated to this Dir.
func NewDir(ctx context.Context, contents map[string]*fs.Inode, owner fs.FileOwner, perms fs.FilePermissions) *Dir {
d := &Dir{
InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, owner, perms, linux.RAMFS_MAGIC),
@@ -138,7 +138,7 @@ func (d *Dir) addChildLocked(ctx context.Context, name string, inode *fs.Inode)
d.NotifyModificationAndStatusChange(ctx)
}
-// AddChild adds a child to this dir.
+// AddChild adds a child to this dir, inheriting its reference.
func (d *Dir) AddChild(ctx context.Context, name string, inode *fs.Inode) {
d.mu.Lock()
defer d.mu.Unlock()
@@ -172,7 +172,9 @@ func (d *Dir) Children() ([]string, map[string]fs.DentAttr) {
return namesCopy, entriesCopy
}
-// removeChildLocked attempts to remove an entry from this directory.
+// removeChildLocked attempts to remove an entry from this directory. It
+// returns the removed fs.Inode along with its reference, which callers are
+// responsible for decrementing.
func (d *Dir) removeChildLocked(ctx context.Context, name string) (*fs.Inode, error) {
inode, ok := d.children[name]
if !ok {
@@ -253,7 +255,8 @@ func (d *Dir) RemoveDirectory(ctx context.Context, _ *fs.Inode, name string) err
return nil
}
-// Lookup loads an inode at p into a Dirent.
+// Lookup loads an inode at p into a Dirent. It returns the fs.Dirent along
+// with a reference.
func (d *Dir) Lookup(ctx context.Context, _ *fs.Inode, p string) (*fs.Dirent, error) {
if len(p) > linux.NAME_MAX {
return nil, syserror.ENAMETOOLONG
@@ -408,6 +411,16 @@ func (*Dir) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, ol
return Rename(ctx, oldParent.InodeOperations, oldName, newParent.InodeOperations, newName, replacement)
}
+// Release implements fs.InodeOperation.Release.
+func (d *Dir) Release(_ context.Context) {
+ // Drop references on all children.
+ d.mu.Lock()
+ for _, i := range d.children {
+ i.DecRef()
+ }
+ d.mu.Unlock()
+}
+
// dirFileOperations implements fs.FileOperations for a ramfs directory.
//
// +stateify savable
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index 0f4497cd6..159fb7c08 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -56,7 +56,6 @@ func rename(ctx context.Context, oldParent *fs.Inode, oldName string, newParent
type Dir struct {
fsutil.InodeGenericChecker `state:"nosave"`
fsutil.InodeIsDirTruncate `state:"nosave"`
- fsutil.InodeNoopRelease `state:"nosave"`
fsutil.InodeNoopWriteOut `state:"nosave"`
fsutil.InodeNotMappable `state:"nosave"`
fsutil.InodeNotSocket `state:"nosave"`
@@ -252,6 +251,11 @@ func (d *Dir) Allocate(ctx context.Context, node *fs.Inode, offset, length int64
return d.ramfsDir.Allocate(ctx, node, offset, length)
}
+// Release implements fs.InodeOperations.Release.
+func (d *Dir) Release(ctx context.Context) {
+ d.ramfsDir.Release(ctx)
+}
+
// Symlink is a symlink.
//
// +stateify savable
diff --git a/pkg/sentry/fs/tty/BUILD b/pkg/sentry/fs/tty/BUILD
index 5e9327aec..291164986 100644
--- a/pkg/sentry/fs/tty/BUILD
+++ b/pkg/sentry/fs/tty/BUILD
@@ -23,6 +23,7 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/safemem",
"//pkg/sentry/socket/unix/transport",
diff --git a/pkg/sentry/fs/tty/dir.go b/pkg/sentry/fs/tty/dir.go
index 1d128532b..2f639c823 100644
--- a/pkg/sentry/fs/tty/dir.go
+++ b/pkg/sentry/fs/tty/dir.go
@@ -129,6 +129,9 @@ func newDir(ctx context.Context, m *fs.MountSource) *fs.Inode {
// Release implements fs.InodeOperations.Release.
func (d *dirInodeOperations) Release(ctx context.Context) {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+
d.master.DecRef()
if len(d.slaves) != 0 {
panic(fmt.Sprintf("devpts directory still contains active terminals: %+v", d))
diff --git a/pkg/sentry/fs/tty/master.go b/pkg/sentry/fs/tty/master.go
index 92ec1ca18..19b7557d5 100644
--- a/pkg/sentry/fs/tty/master.go
+++ b/pkg/sentry/fs/tty/master.go
@@ -172,6 +172,19 @@ func (mf *masterFileOperations) Ioctl(ctx context.Context, _ *fs.File, io userme
return 0, mf.t.ld.windowSize(ctx, io, args)
case linux.TIOCSWINSZ:
return 0, mf.t.ld.setWindowSize(ctx, io, args)
+ case linux.TIOCSCTTY:
+ // Make the given terminal the controlling terminal of the
+ // calling process.
+ return 0, mf.t.setControllingTTY(ctx, io, args, true /* isMaster */)
+ case linux.TIOCNOTTY:
+ // Release this process's controlling terminal.
+ return 0, mf.t.releaseControllingTTY(ctx, io, args, true /* isMaster */)
+ case linux.TIOCGPGRP:
+ // Get the foreground process group.
+ return mf.t.foregroundProcessGroup(ctx, io, args, true /* isMaster */)
+ case linux.TIOCSPGRP:
+ // Set the foreground process group.
+ return mf.t.setForegroundProcessGroup(ctx, io, args, true /* isMaster */)
default:
maybeEmitUnimplementedEvent(ctx, cmd)
return 0, syserror.ENOTTY
@@ -185,8 +198,6 @@ func maybeEmitUnimplementedEvent(ctx context.Context, cmd uint32) {
linux.TCSETS,
linux.TCSETSW,
linux.TCSETSF,
- linux.TIOCGPGRP,
- linux.TIOCSPGRP,
linux.TIOCGWINSZ,
linux.TIOCSWINSZ,
linux.TIOCSETD,
@@ -200,8 +211,6 @@ func maybeEmitUnimplementedEvent(ctx context.Context, cmd uint32) {
linux.TIOCEXCL,
linux.TIOCNXCL,
linux.TIOCGEXCL,
- linux.TIOCNOTTY,
- linux.TIOCSCTTY,
linux.TIOCGSID,
linux.TIOCGETD,
linux.TIOCVHANGUP,
diff --git a/pkg/sentry/fs/tty/slave.go b/pkg/sentry/fs/tty/slave.go
index e30266404..944c4ada1 100644
--- a/pkg/sentry/fs/tty/slave.go
+++ b/pkg/sentry/fs/tty/slave.go
@@ -152,9 +152,16 @@ func (sf *slaveFileOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem
case linux.TIOCSCTTY:
// Make the given terminal the controlling terminal of the
// calling process.
- // TODO(b/129283598): Implement once we have support for job
- // control.
- return 0, nil
+ return 0, sf.si.t.setControllingTTY(ctx, io, args, false /* isMaster */)
+ case linux.TIOCNOTTY:
+ // Release this process's controlling terminal.
+ return 0, sf.si.t.releaseControllingTTY(ctx, io, args, false /* isMaster */)
+ case linux.TIOCGPGRP:
+ // Get the foreground process group.
+ return sf.si.t.foregroundProcessGroup(ctx, io, args, false /* isMaster */)
+ case linux.TIOCSPGRP:
+ // Set the foreground process group.
+ return sf.si.t.setForegroundProcessGroup(ctx, io, args, false /* isMaster */)
default:
maybeEmitUnimplementedEvent(ctx, cmd)
return 0, syserror.ENOTTY
diff --git a/pkg/sentry/fs/tty/terminal.go b/pkg/sentry/fs/tty/terminal.go
index b7cecb2ed..ff8138820 100644
--- a/pkg/sentry/fs/tty/terminal.go
+++ b/pkg/sentry/fs/tty/terminal.go
@@ -17,7 +17,10 @@ package tty
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/usermem"
)
// Terminal is a pseudoterminal.
@@ -26,23 +29,100 @@ import (
type Terminal struct {
refs.AtomicRefCount
- // n is the terminal index.
+ // n is the terminal index. It is immutable.
n uint32
- // d is the containing directory.
+ // d is the containing directory. It is immutable.
d *dirInodeOperations
- // ld is the line discipline of the terminal.
+ // ld is the line discipline of the terminal. It is immutable.
ld *lineDiscipline
+
+ // masterKTTY contains the controlling process of the master end of
+ // this terminal. This field is immutable.
+ masterKTTY *kernel.TTY
+
+ // slaveKTTY contains the controlling process of the slave end of this
+ // terminal. This field is immutable.
+ slaveKTTY *kernel.TTY
}
func newTerminal(ctx context.Context, d *dirInodeOperations, n uint32) *Terminal {
termios := linux.DefaultSlaveTermios
t := Terminal{
- d: d,
- n: n,
- ld: newLineDiscipline(termios),
+ d: d,
+ n: n,
+ ld: newLineDiscipline(termios),
+ masterKTTY: &kernel.TTY{},
+ slaveKTTY: &kernel.TTY{},
}
t.EnableLeakCheck("tty.Terminal")
return &t
}
+
+// setControllingTTY makes tm the controlling terminal of the calling thread
+// group.
+func (tm *Terminal) setControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error {
+ task := kernel.TaskFromContext(ctx)
+ if task == nil {
+ panic("setControllingTTY must be called from a task context")
+ }
+
+ return task.ThreadGroup().SetControllingTTY(tm.tty(isMaster), args[2].Int())
+}
+
+// releaseControllingTTY removes tm as the controlling terminal of the calling
+// thread group.
+func (tm *Terminal) releaseControllingTTY(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) error {
+ task := kernel.TaskFromContext(ctx)
+ if task == nil {
+ panic("releaseControllingTTY must be called from a task context")
+ }
+
+ return task.ThreadGroup().ReleaseControllingTTY(tm.tty(isMaster))
+}
+
+// foregroundProcessGroup gets the process group ID of tm's foreground process.
+func (tm *Terminal) foregroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
+ task := kernel.TaskFromContext(ctx)
+ if task == nil {
+ panic("foregroundProcessGroup must be called from a task context")
+ }
+
+ ret, err := task.ThreadGroup().ForegroundProcessGroup(tm.tty(isMaster))
+ if err != nil {
+ return 0, err
+ }
+
+ // Write it out to *arg.
+ _, err = usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(ret), usermem.IOOpts{
+ AddressSpaceActive: true,
+ })
+ return 0, err
+}
+
+// foregroundProcessGroup sets tm's foreground process.
+func (tm *Terminal) setForegroundProcessGroup(ctx context.Context, io usermem.IO, args arch.SyscallArguments, isMaster bool) (uintptr, error) {
+ task := kernel.TaskFromContext(ctx)
+ if task == nil {
+ panic("setForegroundProcessGroup must be called from a task context")
+ }
+
+ // Read in the process group ID.
+ var pgid int32
+ if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &pgid, usermem.IOOpts{
+ AddressSpaceActive: true,
+ }); err != nil {
+ return 0, err
+ }
+
+ ret, err := task.ThreadGroup().SetForegroundProcessGroup(tm.tty(isMaster), kernel.ProcessGroupID(pgid))
+ return uintptr(ret), err
+}
+
+func (tm *Terminal) tty(isMaster bool) *kernel.TTY {
+ if isMaster {
+ return tm.masterKTTY
+ }
+ return tm.slaveKTTY
+}